Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1177 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26import binascii 

27import os 

28import stat 

29import sys 

30import time 

31import warnings 

32from collections.abc import Iterable, Iterator, Sequence 

33from contextlib import suppress 

34from io import BytesIO 

35from pathlib import Path 

36from typing import ( 

37 TYPE_CHECKING, 

38 Callable, 

39 Optional, 

40 Protocol, 

41 Union, 

42) 

43 

44from .errors import NotTreeError 

45from .file import GitFile, _GitFile 

46from .objects import ( 

47 S_ISGITLINK, 

48 ZERO_SHA, 

49 Blob, 

50 Commit, 

51 ObjectID, 

52 ShaFile, 

53 Tag, 

54 Tree, 

55 TreeEntry, 

56 hex_to_filename, 

57 hex_to_sha, 

58 object_class, 

59 sha_to_hex, 

60 valid_hexsha, 

61) 

62from .pack import ( 

63 PACK_SPOOL_FILE_MAX_SIZE, 

64 ObjectContainer, 

65 Pack, 

66 PackData, 

67 PackedObjectContainer, 

68 PackFileDisappeared, 

69 PackHint, 

70 PackIndexer, 

71 PackInflater, 

72 PackStreamCopier, 

73 UnpackedObject, 

74 extend_pack, 

75 full_unpacked_object, 

76 generate_unpacked_objects, 

77 iter_sha1, 

78 load_pack_index_file, 

79 pack_objects_to_data, 

80 write_pack_data, 

81 write_pack_index, 

82) 

83from .protocol import DEPTH_INFINITE 

84from .refs import PEELED_TAG_SUFFIX, Ref 

85 

86if TYPE_CHECKING: 

87 from .commit_graph import CommitGraph 

88 from .diff_tree import RenameDetector 

89 

90 

91class GraphWalker(Protocol): 

92 """Protocol for graph walker objects.""" 

93 

94 def __next__(self) -> Optional[bytes]: 

95 """Return the next object SHA to visit.""" 

96 ... 

97 

98 def ack(self, sha: bytes) -> None: 

99 """Acknowledge that an object has been received.""" 

100 ... 

101 

102 

103INFODIR = "info" 

104PACKDIR = "pack" 

105 

106# use permissions consistent with Git; just readable by everyone 

107# TODO: should packs also be non-writable on Windows? if so, that 

108# would requite some rather significant adjustments to the test suite 

109PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

110 

111# Grace period for cleaning up temporary pack files (in seconds) 

112# Matches git's default of 2 weeks 

113DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

114 

115 

116def find_shallow( 

117 store: ObjectContainer, heads: Iterable[bytes], depth: int 

118) -> tuple[set[bytes], set[bytes]]: 

119 """Find shallow commits according to a given depth. 

120 

121 Args: 

122 store: An ObjectStore for looking up objects. 

123 heads: Iterable of head SHAs to start walking from. 

124 depth: The depth of ancestors to include. A depth of one includes 

125 only the heads themselves. 

126 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

127 considered shallow and unshallow according to the arguments. Note that 

128 these sets may overlap if a commit is reachable along multiple paths. 

129 """ 

130 parents: dict[bytes, list[bytes]] = {} 

131 commit_graph = store.get_commit_graph() 

132 

133 def get_parents(sha: bytes) -> list[bytes]: 

134 result = parents.get(sha, None) 

135 if not result: 

136 # Try to use commit graph first if available 

137 if commit_graph: 

138 graph_parents = commit_graph.get_parents(sha) 

139 if graph_parents is not None: 

140 result = graph_parents 

141 parents[sha] = result 

142 return result 

143 # Fall back to loading the object 

144 commit = store[sha] 

145 assert isinstance(commit, Commit) 

146 result = commit.parents 

147 parents[sha] = result 

148 return result 

149 

150 todo = [] # stack of (sha, depth) 

151 for head_sha in heads: 

152 obj = store[head_sha] 

153 # Peel tags if necessary 

154 while isinstance(obj, Tag): 

155 _, sha = obj.object 

156 obj = store[sha] 

157 if isinstance(obj, Commit): 

158 todo.append((obj.id, 1)) 

159 

160 not_shallow = set() 

161 shallow = set() 

162 while todo: 

163 sha, cur_depth = todo.pop() 

164 if cur_depth < depth: 

165 not_shallow.add(sha) 

166 new_depth = cur_depth + 1 

167 todo.extend((p, new_depth) for p in get_parents(sha)) 

168 else: 

169 shallow.add(sha) 

170 

171 return shallow, not_shallow 

172 

173 

174def get_depth( 

175 store: ObjectContainer, 

176 head: bytes, 

177 get_parents: Callable = lambda commit: commit.parents, 

178 max_depth: Optional[int] = None, 

179) -> int: 

180 """Return the current available depth for the given head. 

181 

182 For commits with multiple parents, the largest possible depth will be 

183 returned. 

184 

185 Args: 

186 store: Object store to search in 

187 head: commit to start from 

188 get_parents: optional function for getting the parents of a commit 

189 max_depth: maximum depth to search 

190 """ 

191 if head not in store: 

192 return 0 

193 current_depth = 1 

194 queue = [(head, current_depth)] 

195 commit_graph = store.get_commit_graph() 

196 

197 while queue and (max_depth is None or current_depth < max_depth): 

198 e, depth = queue.pop(0) 

199 current_depth = max(current_depth, depth) 

200 

201 # Try to use commit graph for parent lookup if available 

202 parents = None 

203 if commit_graph: 

204 parents = commit_graph.get_parents(e) 

205 

206 if parents is None: 

207 # Fall back to loading the object 

208 cmt = store[e] 

209 if isinstance(cmt, Tag): 

210 _cls, sha = cmt.object 

211 cmt = store[sha] 

212 parents = get_parents(cmt) 

213 

214 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

215 return current_depth 

216 

217 

218class PackContainer(Protocol): 

219 """Protocol for containers that can accept pack files.""" 

220 

221 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

222 """Add a new pack.""" 

223 

224 

225class BaseObjectStore: 

226 """Object store interface.""" 

227 

228 def determine_wants_all( 

229 self, refs: dict[Ref, ObjectID], depth: Optional[int] = None 

230 ) -> list[ObjectID]: 

231 """Determine which objects are wanted based on refs.""" 

232 

233 def _want_deepen(sha: bytes) -> bool: 

234 if not depth: 

235 return False 

236 if depth == DEPTH_INFINITE: 

237 return True 

238 return depth > self._get_depth(sha) 

239 

240 return [ 

241 sha 

242 for (ref, sha) in refs.items() 

243 if (sha not in self or _want_deepen(sha)) 

244 and not ref.endswith(PEELED_TAG_SUFFIX) 

245 and not sha == ZERO_SHA 

246 ] 

247 

248 def contains_loose(self, sha: bytes) -> bool: 

249 """Check if a particular object is present by SHA1 and is loose.""" 

250 raise NotImplementedError(self.contains_loose) 

251 

252 def __contains__(self, sha1: bytes) -> bool: 

253 """Check if a particular object is present by SHA1. 

254 

255 This method makes no distinction between loose and packed objects. 

256 """ 

257 return self.contains_loose(sha1) 

258 

259 @property 

260 def packs(self) -> list[Pack]: 

261 """Iterable of pack objects.""" 

262 raise NotImplementedError 

263 

264 def get_raw(self, name: bytes) -> tuple[int, bytes]: 

265 """Obtain the raw text for an object. 

266 

267 Args: 

268 name: sha for the object. 

269 Returns: tuple with numeric type and object contents. 

270 """ 

271 raise NotImplementedError(self.get_raw) 

272 

273 def __getitem__(self, sha1: ObjectID) -> ShaFile: 

274 """Obtain an object by SHA1.""" 

275 type_num, uncomp = self.get_raw(sha1) 

276 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

277 

278 def __iter__(self) -> Iterator[bytes]: 

279 """Iterate over the SHAs that are present in this store.""" 

280 raise NotImplementedError(self.__iter__) 

281 

282 def add_object(self, obj: ShaFile) -> None: 

283 """Add a single object to this object store.""" 

284 raise NotImplementedError(self.add_object) 

285 

286 def add_objects( 

287 self, 

288 objects: Sequence[tuple[ShaFile, Optional[str]]], 

289 progress: Optional[Callable] = None, 

290 ) -> Optional["Pack"]: 

291 """Add a set of objects to this object store. 

292 

293 Args: 

294 objects: Iterable over a list of (object, path) tuples 

295 progress: Optional progress callback 

296 """ 

297 raise NotImplementedError(self.add_objects) 

298 

299 def tree_changes( 

300 self, 

301 source: Optional[bytes], 

302 target: Optional[bytes], 

303 want_unchanged: bool = False, 

304 include_trees: bool = False, 

305 change_type_same: bool = False, 

306 rename_detector: Optional["RenameDetector"] = None, 

307 paths: Optional[list[bytes]] = None, 

308 ) -> Iterator[ 

309 tuple[ 

310 tuple[Optional[bytes], Optional[bytes]], 

311 tuple[Optional[int], Optional[int]], 

312 tuple[Optional[bytes], Optional[bytes]], 

313 ] 

314 ]: 

315 """Find the differences between the contents of two trees. 

316 

317 Args: 

318 source: SHA1 of the source tree 

319 target: SHA1 of the target tree 

320 want_unchanged: Whether unchanged files should be reported 

321 include_trees: Whether to include trees 

322 change_type_same: Whether to report files changing 

323 type in the same entry. 

324 rename_detector: RenameDetector object for detecting renames. 

325 paths: Optional list of paths to filter to (as bytes). 

326 Returns: Iterator over tuples with 

327 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

328 """ 

329 from .diff_tree import tree_changes 

330 

331 for change in tree_changes( 

332 self, 

333 source, 

334 target, 

335 want_unchanged=want_unchanged, 

336 include_trees=include_trees, 

337 change_type_same=change_type_same, 

338 rename_detector=rename_detector, 

339 paths=paths, 

340 ): 

341 old_path = change.old.path if change.old is not None else None 

342 new_path = change.new.path if change.new is not None else None 

343 old_mode = change.old.mode if change.old is not None else None 

344 new_mode = change.new.mode if change.new is not None else None 

345 old_sha = change.old.sha if change.old is not None else None 

346 new_sha = change.new.sha if change.new is not None else None 

347 yield ( 

348 (old_path, new_path), 

349 (old_mode, new_mode), 

350 (old_sha, new_sha), 

351 ) 

352 

353 def iter_tree_contents( 

354 self, tree_id: bytes, include_trees: bool = False 

355 ) -> Iterator[tuple[bytes, int, bytes]]: 

356 """Iterate the contents of a tree and all subtrees. 

357 

358 Iteration is depth-first pre-order, as in e.g. os.walk. 

359 

360 Args: 

361 tree_id: SHA1 of the tree. 

362 include_trees: If True, include tree objects in the iteration. 

363 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

364 tree. 

365 """ 

366 warnings.warn( 

367 "Please use dulwich.object_store.iter_tree_contents", 

368 DeprecationWarning, 

369 stacklevel=2, 

370 ) 

371 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

372 

373 def iterobjects_subset( 

374 self, shas: Iterable[bytes], *, allow_missing: bool = False 

375 ) -> Iterator[ShaFile]: 

376 """Iterate over a subset of objects in the store. 

377 

378 Args: 

379 shas: Iterable of object SHAs to retrieve 

380 allow_missing: If True, skip missing objects; if False, raise KeyError 

381 

382 Returns: 

383 Iterator of ShaFile objects 

384 

385 Raises: 

386 KeyError: If an object is missing and allow_missing is False 

387 """ 

388 for sha in shas: 

389 try: 

390 yield self[sha] 

391 except KeyError: 

392 if not allow_missing: 

393 raise 

394 

395 def find_missing_objects( 

396 self, 

397 haves: Iterable[bytes], 

398 wants: Iterable[bytes], 

399 shallow: Optional[set[bytes]] = None, 

400 progress: Optional[Callable] = None, 

401 get_tagged: Optional[Callable] = None, 

402 get_parents: Callable = lambda commit: commit.parents, 

403 ) -> Iterator[tuple[bytes, Optional[bytes]]]: 

404 """Find the missing objects required for a set of revisions. 

405 

406 Args: 

407 haves: Iterable over SHAs already in common. 

408 wants: Iterable over SHAs of objects to fetch. 

409 shallow: Set of shallow commit SHA1s to skip 

410 progress: Simple progress function that will be called with 

411 updated progress strings. 

412 get_tagged: Function that returns a dict of pointed-to sha -> 

413 tag sha for including tags. 

414 get_parents: Optional function for getting the parents of a 

415 commit. 

416 Returns: Iterator over (sha, path) pairs. 

417 """ 

418 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

419 finder = MissingObjectFinder( 

420 self, 

421 haves=haves, 

422 wants=wants, 

423 shallow=shallow, 

424 progress=progress, 

425 get_tagged=get_tagged, 

426 get_parents=get_parents, 

427 ) 

428 return iter(finder) 

429 

430 def find_common_revisions(self, graphwalker: GraphWalker) -> list[bytes]: 

431 """Find which revisions this store has in common using graphwalker. 

432 

433 Args: 

434 graphwalker: A graphwalker object. 

435 Returns: List of SHAs that are in common 

436 """ 

437 haves = [] 

438 sha = next(graphwalker) 

439 while sha: 

440 if sha in self: 

441 haves.append(sha) 

442 graphwalker.ack(sha) 

443 sha = next(graphwalker) 

444 return haves 

445 

446 def generate_pack_data( 

447 self, 

448 have: Iterable[bytes], 

449 want: Iterable[bytes], 

450 shallow: Optional[set[bytes]] = None, 

451 progress: Optional[Callable] = None, 

452 ofs_delta: bool = True, 

453 ) -> tuple[int, Iterator[UnpackedObject]]: 

454 """Generate pack data objects for a set of wants/haves. 

455 

456 Args: 

457 have: List of SHA1s of objects that should not be sent 

458 want: List of SHA1s of objects that should be sent 

459 shallow: Set of shallow commit SHA1s to skip 

460 ofs_delta: Whether OFS deltas can be included 

461 progress: Optional progress reporting method 

462 """ 

463 # Note that the pack-specific implementation below is more efficient, 

464 # as it reuses deltas 

465 missing_objects = MissingObjectFinder( 

466 self, haves=have, wants=want, shallow=shallow, progress=progress 

467 ) 

468 object_ids = list(missing_objects) 

469 return pack_objects_to_data( 

470 [(self[oid], path) for oid, path in object_ids], 

471 ofs_delta=ofs_delta, 

472 progress=progress, 

473 ) 

474 

475 def peel_sha(self, sha: bytes) -> bytes: 

476 """Peel all tags from a SHA. 

477 

478 Args: 

479 sha: The object SHA to peel. 

480 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

481 intermediate tags; if the original ref does not point to a tag, 

482 this will equal the original SHA1. 

483 """ 

484 warnings.warn( 

485 "Please use dulwich.object_store.peel_sha()", 

486 DeprecationWarning, 

487 stacklevel=2, 

488 ) 

489 return peel_sha(self, sha)[1].id 

490 

491 def _get_depth( 

492 self, 

493 head: bytes, 

494 get_parents: Callable = lambda commit: commit.parents, 

495 max_depth: Optional[int] = None, 

496 ) -> int: 

497 """Return the current available depth for the given head. 

498 

499 For commits with multiple parents, the largest possible depth will be 

500 returned. 

501 

502 Args: 

503 head: commit to start from 

504 get_parents: optional function for getting the parents of a commit 

505 max_depth: maximum depth to search 

506 """ 

507 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

508 

509 def close(self) -> None: 

510 """Close any files opened by this object store.""" 

511 # Default implementation is a NO-OP 

512 

513 def prune(self, grace_period: Optional[int] = None) -> None: 

514 """Prune/clean up this object store. 

515 

516 This includes removing orphaned temporary files and other 

517 housekeeping tasks. Default implementation is a NO-OP. 

518 

519 Args: 

520 grace_period: Grace period in seconds for removing temporary files. 

521 If None, uses the default grace period. 

522 """ 

523 # Default implementation is a NO-OP 

524 

525 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

526 """Iterate over all SHA1s that start with a given prefix. 

527 

528 The default implementation is a naive iteration over all objects. 

529 However, subclasses may override this method with more efficient 

530 implementations. 

531 """ 

532 for sha in self: 

533 if sha.startswith(prefix): 

534 yield sha 

535 

536 def get_commit_graph(self) -> Optional["CommitGraph"]: 

537 """Get the commit graph for this object store. 

538 

539 Returns: 

540 CommitGraph object if available, None otherwise 

541 """ 

542 return None 

543 

544 def write_commit_graph( 

545 self, refs: Optional[list[bytes]] = None, reachable: bool = True 

546 ) -> None: 

547 """Write a commit graph file for this object store. 

548 

549 Args: 

550 refs: List of refs to include. If None, includes all refs from object store. 

551 reachable: If True, includes all commits reachable from refs. 

552 If False, only includes the direct ref targets. 

553 

554 Note: 

555 Default implementation does nothing. Subclasses should override 

556 this method to provide commit graph writing functionality. 

557 """ 

558 raise NotImplementedError(self.write_commit_graph) 

559 

560 def get_object_mtime(self, sha: bytes) -> float: 

561 """Get the modification time of an object. 

562 

563 Args: 

564 sha: SHA1 of the object 

565 

566 Returns: 

567 Modification time as seconds since epoch 

568 

569 Raises: 

570 KeyError: if the object is not found 

571 """ 

572 # Default implementation raises KeyError 

573 # Subclasses should override to provide actual mtime 

574 raise KeyError(sha) 

575 

576 

577class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer): 

578 """Object store that uses pack files for storage. 

579 

580 This class provides a base implementation for object stores that use 

581 Git pack files as their primary storage mechanism. It handles caching 

582 of open pack files and provides configuration for pack file operations. 

583 """ 

584 

585 def __init__( 

586 self, 

587 pack_compression_level: int = -1, 

588 pack_index_version: Optional[int] = None, 

589 pack_delta_window_size: Optional[int] = None, 

590 pack_window_memory: Optional[int] = None, 

591 pack_delta_cache_size: Optional[int] = None, 

592 pack_depth: Optional[int] = None, 

593 pack_threads: Optional[int] = None, 

594 pack_big_file_threshold: Optional[int] = None, 

595 ) -> None: 

596 """Initialize a PackBasedObjectStore. 

597 

598 Args: 

599 pack_compression_level: Compression level for pack files (-1 to 9) 

600 pack_index_version: Pack index version to use 

601 pack_delta_window_size: Window size for delta compression 

602 pack_window_memory: Maximum memory to use for delta window 

603 pack_delta_cache_size: Cache size for delta operations 

604 pack_depth: Maximum depth for pack deltas 

605 pack_threads: Number of threads to use for packing 

606 pack_big_file_threshold: Threshold for treating files as "big" 

607 """ 

608 self._pack_cache: dict[str, Pack] = {} 

609 self.pack_compression_level = pack_compression_level 

610 self.pack_index_version = pack_index_version 

611 self.pack_delta_window_size = pack_delta_window_size 

612 self.pack_window_memory = pack_window_memory 

613 self.pack_delta_cache_size = pack_delta_cache_size 

614 self.pack_depth = pack_depth 

615 self.pack_threads = pack_threads 

616 self.pack_big_file_threshold = pack_big_file_threshold 

617 

618 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

619 """Add a new pack to this object store.""" 

620 raise NotImplementedError(self.add_pack) 

621 

622 def add_pack_data( 

623 self, 

624 count: int, 

625 unpacked_objects: Iterator[UnpackedObject], 

626 progress: Optional[Callable] = None, 

627 ) -> Optional["Pack"]: 

628 """Add pack data to this object store. 

629 

630 Args: 

631 count: Number of items to add 

632 unpacked_objects: Iterator of UnpackedObject instances 

633 progress: Optional progress callback 

634 """ 

635 if count == 0: 

636 # Don't bother writing an empty pack file 

637 return None 

638 f, commit, abort = self.add_pack() 

639 try: 

640 write_pack_data( 

641 f.write, 

642 unpacked_objects, 

643 num_records=count, 

644 progress=progress, 

645 compression_level=self.pack_compression_level, 

646 ) 

647 except BaseException: 

648 abort() 

649 raise 

650 else: 

651 return commit() 

652 

653 @property 

654 def alternates(self) -> list: 

655 """Return list of alternate object stores.""" 

656 return [] 

657 

658 def contains_packed(self, sha: bytes) -> bool: 

659 """Check if a particular object is present by SHA1 and is packed. 

660 

661 This does not check alternates. 

662 """ 

663 for pack in self.packs: 

664 try: 

665 if sha in pack: 

666 return True 

667 except PackFileDisappeared: 

668 pass 

669 return False 

670 

671 def __contains__(self, sha) -> bool: 

672 """Check if a particular object is present by SHA1. 

673 

674 This method makes no distinction between loose and packed objects. 

675 """ 

676 if self.contains_packed(sha) or self.contains_loose(sha): 

677 return True 

678 for alternate in self.alternates: 

679 if sha in alternate: 

680 return True 

681 return False 

682 

683 def _add_cached_pack(self, base_name: str, pack: Pack) -> None: 

684 """Add a newly appeared pack to the cache by path.""" 

685 prev_pack = self._pack_cache.get(base_name) 

686 if prev_pack is not pack: 

687 self._pack_cache[base_name] = pack 

688 if prev_pack: 

689 prev_pack.close() 

690 

691 def generate_pack_data( 

692 self, have, want, shallow=None, progress=None, ofs_delta=True 

693 ) -> tuple[int, Iterator[UnpackedObject]]: 

694 """Generate pack data objects for a set of wants/haves. 

695 

696 Args: 

697 have: List of SHA1s of objects that should not be sent 

698 want: List of SHA1s of objects that should be sent 

699 shallow: Set of shallow commit SHA1s to skip 

700 ofs_delta: Whether OFS deltas can be included 

701 progress: Optional progress reporting method 

702 """ 

703 missing_objects = MissingObjectFinder( 

704 self, haves=have, wants=want, shallow=shallow, progress=progress 

705 ) 

706 remote_has = missing_objects.get_remote_has() 

707 object_ids = list(missing_objects) 

708 return len(object_ids), generate_unpacked_objects( 

709 self, 

710 object_ids, 

711 progress=progress, 

712 ofs_delta=ofs_delta, 

713 other_haves=remote_has, 

714 ) 

715 

716 def _clear_cached_packs(self) -> None: 

717 pack_cache = self._pack_cache 

718 self._pack_cache = {} 

719 while pack_cache: 

720 (name, pack) = pack_cache.popitem() 

721 pack.close() 

722 

723 def _iter_cached_packs(self) -> Iterator[Pack]: 

724 return iter(self._pack_cache.values()) 

725 

726 def _update_pack_cache(self) -> list[Pack]: 

727 raise NotImplementedError(self._update_pack_cache) 

728 

729 def close(self) -> None: 

730 """Close the object store and release resources. 

731 

732 This method closes all cached pack files and frees associated resources. 

733 """ 

734 self._clear_cached_packs() 

735 

736 @property 

737 def packs(self) -> list[Pack]: 

738 """List with pack objects.""" 

739 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

740 

741 def count_pack_files(self) -> int: 

742 """Count the number of pack files. 

743 

744 Returns: 

745 Number of pack files (excluding those with .keep files) 

746 """ 

747 count = 0 

748 for pack in self.packs: 

749 # Check if there's a .keep file for this pack 

750 keep_path = pack._basename + ".keep" 

751 if not os.path.exists(keep_path): 

752 count += 1 

753 return count 

754 

755 def _iter_alternate_objects(self) -> Iterator[bytes]: 

756 """Iterate over the SHAs of all the objects in alternate stores.""" 

757 for alternate in self.alternates: 

758 yield from alternate 

759 

760 def _iter_loose_objects(self) -> Iterator[bytes]: 

761 """Iterate over the SHAs of all loose objects.""" 

762 raise NotImplementedError(self._iter_loose_objects) 

763 

764 def _get_loose_object(self, sha: bytes) -> Optional[ShaFile]: 

765 raise NotImplementedError(self._get_loose_object) 

766 

767 def delete_loose_object(self, sha: bytes) -> None: 

768 """Delete a loose object. 

769 

770 This method only handles loose objects. For packed objects, 

771 use repack(exclude=...) to exclude them during repacking. 

772 """ 

773 raise NotImplementedError(self.delete_loose_object) 

774 

775 def _remove_pack(self, pack: "Pack") -> None: 

776 raise NotImplementedError(self._remove_pack) 

777 

778 def pack_loose_objects(self, progress: Optional[Callable] = None) -> int: 

779 """Pack loose objects. 

780 

781 Args: 

782 progress: Optional progress reporting callback 

783 

784 Returns: Number of objects packed 

785 """ 

786 objects: list[tuple[ShaFile, None]] = [] 

787 for sha in self._iter_loose_objects(): 

788 obj = self._get_loose_object(sha) 

789 if obj is not None: 

790 objects.append((obj, None)) 

791 self.add_objects(objects, progress=progress) 

792 for obj, path in objects: 

793 self.delete_loose_object(obj.id) 

794 return len(objects) 

795 

796 def repack( 

797 self, exclude: Optional[set] = None, progress: Optional[Callable] = None 

798 ) -> int: 

799 """Repack the packs in this repository. 

800 

801 Note that this implementation is fairly naive and currently keeps all 

802 objects in memory while it repacks. 

803 

804 Args: 

805 exclude: Optional set of object SHAs to exclude from repacking 

806 progress: Optional progress reporting callback 

807 """ 

808 if exclude is None: 

809 exclude = set() 

810 

811 loose_objects = set() 

812 excluded_loose_objects = set() 

813 for sha in self._iter_loose_objects(): 

814 if sha not in exclude: 

815 obj = self._get_loose_object(sha) 

816 if obj is not None: 

817 loose_objects.add(obj) 

818 else: 

819 excluded_loose_objects.add(sha) 

820 

821 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects} 

822 old_packs = {p.name(): p for p in self.packs} 

823 for name, pack in old_packs.items(): 

824 objects.update( 

825 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

826 ) 

827 

828 # Only create a new pack if there are objects to pack 

829 if objects: 

830 # The name of the consolidated pack might match the name of a 

831 # pre-existing pack. Take care not to remove the newly created 

832 # consolidated pack. 

833 consolidated = self.add_objects(list(objects), progress=progress) 

834 if consolidated is not None: 

835 old_packs.pop(consolidated.name(), None) 

836 

837 # Delete loose objects that were packed 

838 for obj in loose_objects: 

839 if obj is not None: 

840 self.delete_loose_object(obj.id) 

841 # Delete excluded loose objects 

842 for sha in excluded_loose_objects: 

843 self.delete_loose_object(sha) 

844 for name, pack in old_packs.items(): 

845 self._remove_pack(pack) 

846 self._update_pack_cache() 

847 return len(objects) 

848 

849 def __iter__(self): 

850 """Iterate over the SHAs that are present in this store.""" 

851 self._update_pack_cache() 

852 for pack in self._iter_cached_packs(): 

853 try: 

854 yield from pack 

855 except PackFileDisappeared: 

856 pass 

857 yield from self._iter_loose_objects() 

858 yield from self._iter_alternate_objects() 

859 

860 def contains_loose(self, sha): 

861 """Check if a particular object is present by SHA1 and is loose. 

862 

863 This does not check alternates. 

864 """ 

865 return self._get_loose_object(sha) is not None 

866 

867 def get_raw(self, name): 

868 """Obtain the raw fulltext for an object. 

869 

870 Args: 

871 name: sha for the object. 

872 Returns: tuple with numeric type and object contents. 

873 """ 

874 if name == ZERO_SHA: 

875 raise KeyError(name) 

876 if len(name) == 40: 

877 sha = hex_to_sha(name) 

878 hexsha = name 

879 elif len(name) == 20: 

880 sha = name 

881 hexsha = None 

882 else: 

883 raise AssertionError(f"Invalid object name {name!r}") 

884 for pack in self._iter_cached_packs(): 

885 try: 

886 return pack.get_raw(sha) 

887 except (KeyError, PackFileDisappeared): 

888 pass 

889 if hexsha is None: 

890 hexsha = sha_to_hex(name) 

891 ret = self._get_loose_object(hexsha) 

892 if ret is not None: 

893 return ret.type_num, ret.as_raw_string() 

894 # Maybe something else has added a pack with the object 

895 # in the mean time? 

896 for pack in self._update_pack_cache(): 

897 try: 

898 return pack.get_raw(sha) 

899 except KeyError: 

900 pass 

901 for alternate in self.alternates: 

902 try: 

903 return alternate.get_raw(hexsha) 

904 except KeyError: 

905 pass 

906 raise KeyError(hexsha) 

907 

908 def iter_unpacked_subset( 

909 self, 

910 shas: set[bytes], 

911 include_comp: bool = False, 

912 allow_missing: bool = False, 

913 convert_ofs_delta: bool = True, 

914 ) -> Iterator[UnpackedObject]: 

915 """Iterate over a subset of objects, yielding UnpackedObject instances. 

916 

917 Args: 

918 shas: Set of object SHAs to retrieve 

919 include_comp: Whether to include compressed data 

920 allow_missing: If True, skip missing objects; if False, raise KeyError 

921 convert_ofs_delta: Whether to convert OFS_DELTA objects 

922 

923 Returns: 

924 Iterator of UnpackedObject instances 

925 

926 Raises: 

927 KeyError: If an object is missing and allow_missing is False 

928 """ 

929 todo: set[bytes] = set(shas) 

930 for p in self._iter_cached_packs(): 

931 for unpacked in p.iter_unpacked_subset( 

932 todo, 

933 include_comp=include_comp, 

934 allow_missing=True, 

935 convert_ofs_delta=convert_ofs_delta, 

936 ): 

937 yield unpacked 

938 hexsha = sha_to_hex(unpacked.sha()) 

939 todo.remove(hexsha) 

940 # Maybe something else has added a pack with the object 

941 # in the mean time? 

942 for p in self._update_pack_cache(): 

943 for unpacked in p.iter_unpacked_subset( 

944 todo, 

945 include_comp=include_comp, 

946 allow_missing=True, 

947 convert_ofs_delta=convert_ofs_delta, 

948 ): 

949 yield unpacked 

950 hexsha = sha_to_hex(unpacked.sha()) 

951 todo.remove(hexsha) 

952 for alternate in self.alternates: 

953 for unpacked in alternate.iter_unpacked_subset( 

954 todo, 

955 include_comp=include_comp, 

956 allow_missing=True, 

957 convert_ofs_delta=convert_ofs_delta, 

958 ): 

959 yield unpacked 

960 hexsha = sha_to_hex(unpacked.sha()) 

961 todo.remove(hexsha) 

962 

963 def iterobjects_subset( 

964 self, shas: Iterable[bytes], *, allow_missing: bool = False 

965 ) -> Iterator[ShaFile]: 

966 """Iterate over a subset of objects in the store. 

967 

968 This method searches for objects in pack files, alternates, and loose storage. 

969 

970 Args: 

971 shas: Iterable of object SHAs to retrieve 

972 allow_missing: If True, skip missing objects; if False, raise KeyError 

973 

974 Returns: 

975 Iterator of ShaFile objects 

976 

977 Raises: 

978 KeyError: If an object is missing and allow_missing is False 

979 """ 

980 todo: set[bytes] = set(shas) 

981 for p in self._iter_cached_packs(): 

982 for o in p.iterobjects_subset(todo, allow_missing=True): 

983 yield o 

984 todo.remove(o.id) 

985 # Maybe something else has added a pack with the object 

986 # in the mean time? 

987 for p in self._update_pack_cache(): 

988 for o in p.iterobjects_subset(todo, allow_missing=True): 

989 yield o 

990 todo.remove(o.id) 

991 for alternate in self.alternates: 

992 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

993 yield o 

994 todo.remove(o.id) 

995 for oid in todo: 

996 loose_obj: Optional[ShaFile] = self._get_loose_object(oid) 

997 if loose_obj is not None: 

998 yield loose_obj 

999 elif not allow_missing: 

1000 raise KeyError(oid) 

1001 

1002 def get_unpacked_object( 

1003 self, sha1: bytes, *, include_comp: bool = False 

1004 ) -> UnpackedObject: 

1005 """Obtain the unpacked object. 

1006 

1007 Args: 

1008 sha1: sha for the object. 

1009 include_comp: Whether to include compression metadata. 

1010 """ 

1011 if sha1 == ZERO_SHA: 

1012 raise KeyError(sha1) 

1013 if len(sha1) == 40: 

1014 sha = hex_to_sha(sha1) 

1015 hexsha = sha1 

1016 elif len(sha1) == 20: 

1017 sha = sha1 

1018 hexsha = None 

1019 else: 

1020 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

1021 for pack in self._iter_cached_packs(): 

1022 try: 

1023 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1024 except (KeyError, PackFileDisappeared): 

1025 pass 

1026 if hexsha is None: 

1027 hexsha = sha_to_hex(sha1) 

1028 # Maybe something else has added a pack with the object 

1029 # in the mean time? 

1030 for pack in self._update_pack_cache(): 

1031 try: 

1032 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1033 except KeyError: 

1034 pass 

1035 for alternate in self.alternates: 

1036 try: 

1037 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

1038 except KeyError: 

1039 pass 

1040 raise KeyError(hexsha) 

1041 

1042 def add_objects( 

1043 self, 

1044 objects: Sequence[tuple[ShaFile, Optional[str]]], 

1045 progress: Optional[Callable[[str], None]] = None, 

1046 ) -> Optional["Pack"]: 

1047 """Add a set of objects to this object store. 

1048 

1049 Args: 

1050 objects: Iterable over (object, path) tuples, should support 

1051 __len__. 

1052 progress: Optional progress reporting function. 

1053 Returns: Pack object of the objects written. 

1054 """ 

1055 count = len(objects) 

1056 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

1057 return self.add_pack_data(count, record_iter, progress=progress) 

1058 

1059 

1060class DiskObjectStore(PackBasedObjectStore): 

1061 """Git-style object store that exists on disk.""" 

1062 

1063 path: Union[str, os.PathLike] 

1064 pack_dir: Union[str, os.PathLike] 

1065 _alternates: Optional[list["DiskObjectStore"]] 

1066 _commit_graph: Optional["CommitGraph"] 

1067 

1068 def __init__( 

1069 self, 

1070 path: Union[str, os.PathLike], 

1071 loose_compression_level=-1, 

1072 pack_compression_level=-1, 

1073 pack_index_version=None, 

1074 pack_delta_window_size=None, 

1075 pack_window_memory=None, 

1076 pack_delta_cache_size=None, 

1077 pack_depth=None, 

1078 pack_threads=None, 

1079 pack_big_file_threshold=None, 

1080 ) -> None: 

1081 """Open an object store. 

1082 

1083 Args: 

1084 path: Path of the object store. 

1085 loose_compression_level: zlib compression level for loose objects 

1086 pack_compression_level: zlib compression level for pack objects 

1087 pack_index_version: pack index version to use (1, 2, or 3) 

1088 pack_delta_window_size: sliding window size for delta compression 

1089 pack_window_memory: memory limit for delta window operations 

1090 pack_delta_cache_size: size of cache for delta operations 

1091 pack_depth: maximum delta chain depth 

1092 pack_threads: number of threads for pack operations 

1093 pack_big_file_threshold: threshold for treating files as big 

1094 """ 

1095 super().__init__( 

1096 pack_compression_level=pack_compression_level, 

1097 pack_index_version=pack_index_version, 

1098 pack_delta_window_size=pack_delta_window_size, 

1099 pack_window_memory=pack_window_memory, 

1100 pack_delta_cache_size=pack_delta_cache_size, 

1101 pack_depth=pack_depth, 

1102 pack_threads=pack_threads, 

1103 pack_big_file_threshold=pack_big_file_threshold, 

1104 ) 

1105 self.path = path 

1106 self.pack_dir = os.path.join(self.path, PACKDIR) 

1107 self._alternates = None 

1108 self.loose_compression_level = loose_compression_level 

1109 self.pack_compression_level = pack_compression_level 

1110 self.pack_index_version = pack_index_version 

1111 

1112 # Commit graph support - lazy loaded 

1113 self._commit_graph = None 

1114 self._use_commit_graph = True # Default to true 

1115 

1116 def __repr__(self) -> str: 

1117 """Return string representation of DiskObjectStore. 

1118 

1119 Returns: 

1120 String representation including the store path 

1121 """ 

1122 return f"<{self.__class__.__name__}({self.path!r})>" 

1123 

1124 @classmethod 

1125 def from_config(cls, path: Union[str, os.PathLike], config): 

1126 """Create a DiskObjectStore from a configuration object. 

1127 

1128 Args: 

1129 path: Path to the object store directory 

1130 config: Configuration object to read settings from 

1131 

1132 Returns: 

1133 New DiskObjectStore instance configured according to config 

1134 """ 

1135 try: 

1136 default_compression_level = int( 

1137 config.get((b"core",), b"compression").decode() 

1138 ) 

1139 except KeyError: 

1140 default_compression_level = -1 

1141 try: 

1142 loose_compression_level = int( 

1143 config.get((b"core",), b"looseCompression").decode() 

1144 ) 

1145 except KeyError: 

1146 loose_compression_level = default_compression_level 

1147 try: 

1148 pack_compression_level = int( 

1149 config.get((b"core",), "packCompression").decode() 

1150 ) 

1151 except KeyError: 

1152 pack_compression_level = default_compression_level 

1153 try: 

1154 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

1155 except KeyError: 

1156 pack_index_version = None 

1157 

1158 # Read pack configuration options 

1159 try: 

1160 pack_delta_window_size = int( 

1161 config.get((b"pack",), b"deltaWindowSize").decode() 

1162 ) 

1163 except KeyError: 

1164 pack_delta_window_size = None 

1165 try: 

1166 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode()) 

1167 except KeyError: 

1168 pack_window_memory = None 

1169 try: 

1170 pack_delta_cache_size = int( 

1171 config.get((b"pack",), b"deltaCacheSize").decode() 

1172 ) 

1173 except KeyError: 

1174 pack_delta_cache_size = None 

1175 try: 

1176 pack_depth = int(config.get((b"pack",), b"depth").decode()) 

1177 except KeyError: 

1178 pack_depth = None 

1179 try: 

1180 pack_threads = int(config.get((b"pack",), b"threads").decode()) 

1181 except KeyError: 

1182 pack_threads = None 

1183 try: 

1184 pack_big_file_threshold = int( 

1185 config.get((b"pack",), b"bigFileThreshold").decode() 

1186 ) 

1187 except KeyError: 

1188 pack_big_file_threshold = None 

1189 

1190 # Read core.commitGraph setting 

1191 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

1192 

1193 instance = cls( 

1194 path, 

1195 loose_compression_level, 

1196 pack_compression_level, 

1197 pack_index_version, 

1198 pack_delta_window_size, 

1199 pack_window_memory, 

1200 pack_delta_cache_size, 

1201 pack_depth, 

1202 pack_threads, 

1203 pack_big_file_threshold, 

1204 ) 

1205 instance._use_commit_graph = use_commit_graph 

1206 return instance 

1207 

1208 @property 

1209 def alternates(self): 

1210 """Get the list of alternate object stores. 

1211 

1212 Reads from .git/objects/info/alternates if not already cached. 

1213 

1214 Returns: 

1215 List of DiskObjectStore instances for alternate object directories 

1216 """ 

1217 if self._alternates is not None: 

1218 return self._alternates 

1219 self._alternates = [] 

1220 for path in self._read_alternate_paths(): 

1221 self._alternates.append(DiskObjectStore(path)) 

1222 return self._alternates 

1223 

1224 def _read_alternate_paths(self): 

1225 try: 

1226 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

1227 except FileNotFoundError: 

1228 return 

1229 with f: 

1230 for line in f.readlines(): 

1231 line = line.rstrip(b"\n") 

1232 if line.startswith(b"#"): 

1233 continue 

1234 if os.path.isabs(line): 

1235 yield os.fsdecode(line) 

1236 else: 

1237 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1238 

1239 def add_alternate_path(self, path) -> None: 

1240 """Add an alternate path to this object store.""" 

1241 try: 

1242 os.mkdir(os.path.join(self.path, INFODIR)) 

1243 except FileExistsError: 

1244 pass 

1245 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1246 with GitFile(alternates_path, "wb") as f: 

1247 try: 

1248 orig_f = open(alternates_path, "rb") 

1249 except FileNotFoundError: 

1250 pass 

1251 else: 

1252 with orig_f: 

1253 f.write(orig_f.read()) 

1254 f.write(os.fsencode(path) + b"\n") 

1255 

1256 if not os.path.isabs(path): 

1257 path = os.path.join(self.path, path) 

1258 self.alternates.append(DiskObjectStore(path)) 

1259 

1260 def _update_pack_cache(self): 

1261 """Read and iterate over new pack files and cache them.""" 

1262 try: 

1263 pack_dir_contents = os.listdir(self.pack_dir) 

1264 except FileNotFoundError: 

1265 self.close() 

1266 return [] 

1267 pack_files = set() 

1268 for name in pack_dir_contents: 

1269 if name.startswith("pack-") and name.endswith(".pack"): 

1270 # verify that idx exists first (otherwise the pack was not yet 

1271 # fully written) 

1272 idx_name = os.path.splitext(name)[0] + ".idx" 

1273 if idx_name in pack_dir_contents: 

1274 pack_name = name[: -len(".pack")] 

1275 pack_files.add(pack_name) 

1276 

1277 # Open newly appeared pack files 

1278 new_packs = [] 

1279 for f in pack_files: 

1280 if f not in self._pack_cache: 

1281 pack = Pack( 

1282 os.path.join(self.pack_dir, f), 

1283 delta_window_size=self.pack_delta_window_size, 

1284 window_memory=self.pack_window_memory, 

1285 delta_cache_size=self.pack_delta_cache_size, 

1286 depth=self.pack_depth, 

1287 threads=self.pack_threads, 

1288 big_file_threshold=self.pack_big_file_threshold, 

1289 ) 

1290 new_packs.append(pack) 

1291 self._pack_cache[f] = pack 

1292 # Remove disappeared pack files 

1293 for f in set(self._pack_cache) - pack_files: 

1294 self._pack_cache.pop(f).close() 

1295 return new_packs 

1296 

1297 def _get_shafile_path(self, sha): 

1298 # Check from object dir 

1299 return hex_to_filename(os.fspath(self.path), sha) 

1300 

1301 def _iter_loose_objects(self): 

1302 for base in os.listdir(self.path): 

1303 if len(base) != 2: 

1304 continue 

1305 for rest in os.listdir(os.path.join(self.path, base)): 

1306 sha = os.fsencode(base + rest) 

1307 if not valid_hexsha(sha): 

1308 continue 

1309 yield sha 

1310 

1311 def count_loose_objects(self) -> int: 

1312 """Count the number of loose objects in the object store. 

1313 

1314 Returns: 

1315 Number of loose objects 

1316 """ 

1317 count = 0 

1318 if not os.path.exists(self.path): 

1319 return 0 

1320 

1321 for i in range(256): 

1322 subdir = os.path.join(self.path, f"{i:02x}") 

1323 try: 

1324 count += len( 

1325 [ 

1326 name 

1327 for name in os.listdir(subdir) 

1328 if len(name) == 38 # 40 - 2 for the prefix 

1329 ] 

1330 ) 

1331 except FileNotFoundError: 

1332 # Directory may have been removed or is inaccessible 

1333 continue 

1334 

1335 return count 

1336 

1337 def _get_loose_object(self, sha): 

1338 path = self._get_shafile_path(sha) 

1339 try: 

1340 return ShaFile.from_path(path) 

1341 except FileNotFoundError: 

1342 return None 

1343 

1344 def delete_loose_object(self, sha) -> None: 

1345 """Delete a loose object from disk. 

1346 

1347 Args: 

1348 sha: SHA1 of the object to delete 

1349 

1350 Raises: 

1351 FileNotFoundError: If the object file doesn't exist 

1352 """ 

1353 os.remove(self._get_shafile_path(sha)) 

1354 

1355 def get_object_mtime(self, sha): 

1356 """Get the modification time of an object. 

1357 

1358 Args: 

1359 sha: SHA1 of the object 

1360 

1361 Returns: 

1362 Modification time as seconds since epoch 

1363 

1364 Raises: 

1365 KeyError: if the object is not found 

1366 """ 

1367 # First check if it's a loose object 

1368 if self.contains_loose(sha): 

1369 path = self._get_shafile_path(sha) 

1370 try: 

1371 return os.path.getmtime(path) 

1372 except FileNotFoundError: 

1373 pass 

1374 

1375 # Check if it's in a pack file 

1376 for pack in self.packs: 

1377 try: 

1378 if sha in pack: 

1379 # Use the pack file's mtime for packed objects 

1380 pack_path = pack._data_path 

1381 try: 

1382 return os.path.getmtime(pack_path) 

1383 except (FileNotFoundError, AttributeError): 

1384 pass 

1385 except PackFileDisappeared: 

1386 pass 

1387 

1388 raise KeyError(sha) 

1389 

1390 def _remove_pack(self, pack) -> None: 

1391 try: 

1392 del self._pack_cache[os.path.basename(pack._basename)] 

1393 except KeyError: 

1394 pass 

1395 pack.close() 

1396 os.remove(pack.data.path) 

1397 os.remove(pack.index.path) 

1398 

1399 def _get_pack_basepath(self, entries): 

1400 suffix_bytes = iter_sha1(entry[0] for entry in entries) 

1401 # TODO: Handle self.pack_dir being bytes 

1402 suffix = suffix_bytes.decode("ascii") 

1403 return os.path.join(self.pack_dir, "pack-" + suffix) 

1404 

1405 def _complete_pack(self, f, path, num_objects, indexer, progress=None): 

1406 """Move a specific file containing a pack into the pack directory. 

1407 

1408 Note: The file should be on the same file system as the 

1409 packs directory. 

1410 

1411 Args: 

1412 f: Open file object for the pack. 

1413 path: Path to the pack file. 

1414 num_objects: Number of objects in the pack. 

1415 indexer: A PackIndexer for indexing the pack. 

1416 progress: Optional progress reporting function. 

1417 """ 

1418 entries = [] 

1419 for i, entry in enumerate(indexer): 

1420 if progress is not None: 

1421 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1422 entries.append(entry) 

1423 

1424 pack_sha, extra_entries = extend_pack( 

1425 f, 

1426 indexer.ext_refs(), 

1427 get_raw=self.get_raw, 

1428 compression_level=self.pack_compression_level, 

1429 progress=progress, 

1430 ) 

1431 f.flush() 

1432 try: 

1433 fileno = f.fileno() 

1434 except AttributeError: 

1435 pass 

1436 else: 

1437 os.fsync(fileno) 

1438 f.close() 

1439 

1440 entries.extend(extra_entries) 

1441 

1442 # Move the pack in. 

1443 entries.sort() 

1444 pack_base_name = self._get_pack_basepath(entries) 

1445 

1446 for pack in self.packs: 

1447 if pack._basename == pack_base_name: 

1448 return pack 

1449 

1450 target_pack_path = pack_base_name + ".pack" 

1451 target_index_path = pack_base_name + ".idx" 

1452 if sys.platform == "win32": 

1453 # Windows might have the target pack file lingering. Attempt 

1454 # removal, silently passing if the target does not exist. 

1455 with suppress(FileNotFoundError): 

1456 os.remove(target_pack_path) 

1457 os.rename(path, target_pack_path) 

1458 

1459 # Write the index. 

1460 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file: 

1461 write_pack_index( 

1462 index_file, entries, pack_sha, version=self.pack_index_version 

1463 ) 

1464 

1465 # Add the pack to the store and return it. 

1466 final_pack = Pack( 

1467 pack_base_name, 

1468 delta_window_size=self.pack_delta_window_size, 

1469 window_memory=self.pack_window_memory, 

1470 delta_cache_size=self.pack_delta_cache_size, 

1471 depth=self.pack_depth, 

1472 threads=self.pack_threads, 

1473 big_file_threshold=self.pack_big_file_threshold, 

1474 ) 

1475 final_pack.check_length_and_checksum() 

1476 self._add_cached_pack(pack_base_name, final_pack) 

1477 return final_pack 

1478 

1479 def add_thin_pack(self, read_all, read_some, progress=None): 

1480 """Add a new thin pack to this object store. 

1481 

1482 Thin packs are packs that contain deltas with parents that exist 

1483 outside the pack. They should never be placed in the object store 

1484 directly, and always indexed and completed as they are copied. 

1485 

1486 Args: 

1487 read_all: Read function that blocks until the number of 

1488 requested bytes are read. 

1489 read_some: Read function that returns at least one byte, but may 

1490 not return the number of bytes requested. 

1491 progress: Optional progress reporting function. 

1492 Returns: A Pack object pointing at the now-completed thin pack in the 

1493 objects/pack directory. 

1494 """ 

1495 import tempfile 

1496 

1497 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1498 with os.fdopen(fd, "w+b") as f: 

1499 os.chmod(path, PACK_MODE) 

1500 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) 

1501 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) 

1502 copier.verify(progress=progress) 

1503 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

1504 

1505 def add_pack(self): 

1506 """Add a new pack to this object store. 

1507 

1508 Returns: Fileobject to write to, a commit function to 

1509 call when the pack is finished and an abort 

1510 function. 

1511 """ 

1512 import tempfile 

1513 

1514 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

1515 f = os.fdopen(fd, "w+b") 

1516 os.chmod(path, PACK_MODE) 

1517 

1518 def commit(): 

1519 if f.tell() > 0: 

1520 f.seek(0) 

1521 

1522 with PackData(path, f) as pd: 

1523 indexer = PackIndexer.for_pack_data( 

1524 pd, resolve_ext_ref=self.get_raw 

1525 ) 

1526 return self._complete_pack(f, path, len(pd), indexer) 

1527 else: 

1528 f.close() 

1529 os.remove(path) 

1530 return None 

1531 

1532 def abort() -> None: 

1533 f.close() 

1534 os.remove(path) 

1535 

1536 return f, commit, abort 

1537 

1538 def add_object(self, obj) -> None: 

1539 """Add a single object to this object store. 

1540 

1541 Args: 

1542 obj: Object to add 

1543 """ 

1544 path = self._get_shafile_path(obj.id) 

1545 dir = os.path.dirname(path) 

1546 try: 

1547 os.mkdir(dir) 

1548 except FileExistsError: 

1549 pass 

1550 if os.path.exists(path): 

1551 return # Already there, no need to write again 

1552 with GitFile(path, "wb", mask=PACK_MODE) as f: 

1553 f.write( 

1554 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1555 ) 

1556 

1557 @classmethod 

1558 def init(cls, path: Union[str, os.PathLike]): 

1559 """Initialize a new disk object store. 

1560 

1561 Creates the necessary directory structure for a Git object store. 

1562 

1563 Args: 

1564 path: Path where the object store should be created 

1565 

1566 Returns: 

1567 New DiskObjectStore instance 

1568 """ 

1569 try: 

1570 os.mkdir(path) 

1571 except FileExistsError: 

1572 pass 

1573 os.mkdir(os.path.join(path, "info")) 

1574 os.mkdir(os.path.join(path, PACKDIR)) 

1575 return cls(path) 

1576 

1577 def iter_prefix(self, prefix): 

1578 """Iterate over all object SHAs with the given prefix. 

1579 

1580 Args: 

1581 prefix: Hex prefix to search for (as bytes) 

1582 

1583 Returns: 

1584 Iterator of object SHAs (as bytes) matching the prefix 

1585 """ 

1586 if len(prefix) < 2: 

1587 yield from super().iter_prefix(prefix) 

1588 return 

1589 seen = set() 

1590 dir = prefix[:2].decode() 

1591 rest = prefix[2:].decode() 

1592 try: 

1593 for name in os.listdir(os.path.join(self.path, dir)): 

1594 if name.startswith(rest): 

1595 sha = os.fsencode(dir + name) 

1596 if sha not in seen: 

1597 seen.add(sha) 

1598 yield sha 

1599 except FileNotFoundError: 

1600 pass 

1601 

1602 for p in self.packs: 

1603 bin_prefix = ( 

1604 binascii.unhexlify(prefix) 

1605 if len(prefix) % 2 == 0 

1606 else binascii.unhexlify(prefix[:-1]) 

1607 ) 

1608 for sha in p.index.iter_prefix(bin_prefix): 

1609 sha = sha_to_hex(sha) 

1610 if sha.startswith(prefix) and sha not in seen: 

1611 seen.add(sha) 

1612 yield sha 

1613 for alternate in self.alternates: 

1614 for sha in alternate.iter_prefix(prefix): 

1615 if sha not in seen: 

1616 seen.add(sha) 

1617 yield sha 

1618 

1619 def get_commit_graph(self): 

1620 """Get the commit graph for this object store. 

1621 

1622 Returns: 

1623 CommitGraph object if available, None otherwise 

1624 """ 

1625 if not self._use_commit_graph: 

1626 return None 

1627 

1628 if self._commit_graph is None: 

1629 from .commit_graph import read_commit_graph 

1630 

1631 # Look for commit graph in our objects directory 

1632 graph_file = os.path.join(self.path, "info", "commit-graph") 

1633 if os.path.exists(graph_file): 

1634 self._commit_graph = read_commit_graph(graph_file) 

1635 return self._commit_graph 

1636 

1637 def write_commit_graph(self, refs=None, reachable=True) -> None: 

1638 """Write a commit graph file for this object store. 

1639 

1640 Args: 

1641 refs: List of refs to include. If None, includes all refs from object store. 

1642 reachable: If True, includes all commits reachable from refs. 

1643 If False, only includes the direct ref targets. 

1644 """ 

1645 from .commit_graph import get_reachable_commits 

1646 

1647 if refs is None: 

1648 # Get all commit objects from the object store 

1649 all_refs = [] 

1650 # Iterate through all objects to find commits 

1651 for sha in self: 

1652 try: 

1653 obj = self[sha] 

1654 if obj.type_name == b"commit": 

1655 all_refs.append(sha) 

1656 except KeyError: 

1657 continue 

1658 else: 

1659 # Use provided refs 

1660 all_refs = refs 

1661 

1662 if not all_refs: 

1663 return # No commits to include 

1664 

1665 if reachable: 

1666 # Get all reachable commits 

1667 commit_ids = get_reachable_commits(self, all_refs) 

1668 else: 

1669 # Just use the direct ref targets - ensure they're hex ObjectIDs 

1670 commit_ids = [] 

1671 for ref in all_refs: 

1672 if isinstance(ref, bytes) and len(ref) == 40: 

1673 # Already hex ObjectID 

1674 commit_ids.append(ref) 

1675 elif isinstance(ref, bytes) and len(ref) == 20: 

1676 # Binary SHA, convert to hex ObjectID 

1677 from .objects import sha_to_hex 

1678 

1679 commit_ids.append(sha_to_hex(ref)) 

1680 else: 

1681 # Assume it's already correct format 

1682 commit_ids.append(ref) 

1683 

1684 if commit_ids: 

1685 # Write commit graph directly to our object store path 

1686 # Generate the commit graph 

1687 from .commit_graph import generate_commit_graph 

1688 

1689 graph = generate_commit_graph(self, commit_ids) 

1690 

1691 if graph.entries: 

1692 # Ensure the info directory exists 

1693 info_dir = os.path.join(self.path, "info") 

1694 os.makedirs(info_dir, exist_ok=True) 

1695 

1696 # Write using GitFile for atomic operation 

1697 graph_path = os.path.join(info_dir, "commit-graph") 

1698 with GitFile(graph_path, "wb") as f: 

1699 assert isinstance( 

1700 f, _GitFile 

1701 ) # GitFile in write mode always returns _GitFile 

1702 graph.write_to_file(f) 

1703 

1704 # Clear cached commit graph so it gets reloaded 

1705 self._commit_graph = None 

1706 

1707 def prune(self, grace_period: Optional[int] = None) -> None: 

1708 """Prune/clean up this object store. 

1709 

1710 This removes temporary files that were left behind by interrupted 

1711 pack operations. These are files that start with ``tmp_pack_`` in the 

1712 repository directory or files with .pack extension but no corresponding 

1713 .idx file in the pack directory. 

1714 

1715 Args: 

1716 grace_period: Grace period in seconds for removing temporary files. 

1717 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

1718 """ 

1719 import glob 

1720 

1721 if grace_period is None: 

1722 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

1723 

1724 # Clean up tmp_pack_* files in the repository directory 

1725 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

1726 # Check if file is old enough (more than grace period) 

1727 mtime = os.path.getmtime(tmp_file) 

1728 if time.time() - mtime > grace_period: 

1729 os.remove(tmp_file) 

1730 

1731 # Clean up orphaned .pack files without corresponding .idx files 

1732 try: 

1733 pack_dir_contents = os.listdir(self.pack_dir) 

1734 except FileNotFoundError: 

1735 return 

1736 

1737 pack_files = {} 

1738 idx_files = set() 

1739 

1740 for name in pack_dir_contents: 

1741 if name.endswith(".pack"): 

1742 base_name = name[:-5] # Remove .pack extension 

1743 pack_files[base_name] = name 

1744 elif name.endswith(".idx"): 

1745 base_name = name[:-4] # Remove .idx extension 

1746 idx_files.add(base_name) 

1747 

1748 # Remove .pack files without corresponding .idx files 

1749 for base_name, pack_name in pack_files.items(): 

1750 if base_name not in idx_files: 

1751 pack_path = os.path.join(self.pack_dir, pack_name) 

1752 # Check if file is old enough (more than grace period) 

1753 mtime = os.path.getmtime(pack_path) 

1754 if time.time() - mtime > grace_period: 

1755 os.remove(pack_path) 

1756 

1757 

1758class MemoryObjectStore(BaseObjectStore): 

1759 """Object store that keeps all objects in memory.""" 

1760 

1761 def __init__(self) -> None: 

1762 """Initialize a MemoryObjectStore. 

1763 

1764 Creates an empty in-memory object store. 

1765 """ 

1766 super().__init__() 

1767 self._data: dict[str, ShaFile] = {} 

1768 self.pack_compression_level = -1 

1769 

1770 def _to_hexsha(self, sha): 

1771 if len(sha) == 40: 

1772 return sha 

1773 elif len(sha) == 20: 

1774 return sha_to_hex(sha) 

1775 else: 

1776 raise ValueError(f"Invalid sha {sha!r}") 

1777 

1778 def contains_loose(self, sha): 

1779 """Check if a particular object is present by SHA1 and is loose.""" 

1780 return self._to_hexsha(sha) in self._data 

1781 

1782 def contains_packed(self, sha) -> bool: 

1783 """Check if a particular object is present by SHA1 and is packed.""" 

1784 return False 

1785 

1786 def __iter__(self): 

1787 """Iterate over the SHAs that are present in this store.""" 

1788 return iter(self._data.keys()) 

1789 

1790 @property 

1791 def packs(self): 

1792 """List with pack objects.""" 

1793 return [] 

1794 

1795 def get_raw(self, name: ObjectID): 

1796 """Obtain the raw text for an object. 

1797 

1798 Args: 

1799 name: sha for the object. 

1800 Returns: tuple with numeric type and object contents. 

1801 """ 

1802 obj = self[self._to_hexsha(name)] 

1803 return obj.type_num, obj.as_raw_string() 

1804 

1805 def __getitem__(self, name: ObjectID): 

1806 """Retrieve an object by SHA. 

1807 

1808 Args: 

1809 name: SHA of the object (as hex string or bytes) 

1810 

1811 Returns: 

1812 Copy of the ShaFile object 

1813 

1814 Raises: 

1815 KeyError: If the object is not found 

1816 """ 

1817 return self._data[self._to_hexsha(name)].copy() 

1818 

1819 def __delitem__(self, name: ObjectID) -> None: 

1820 """Delete an object from this store, for testing only.""" 

1821 del self._data[self._to_hexsha(name)] 

1822 

1823 def add_object(self, obj) -> None: 

1824 """Add a single object to this object store.""" 

1825 self._data[obj.id] = obj.copy() 

1826 

1827 def add_objects(self, objects, progress=None) -> None: 

1828 """Add a set of objects to this object store. 

1829 

1830 Args: 

1831 objects: Iterable over a list of (object, path) tuples 

1832 progress: Optional progress reporting function. 

1833 """ 

1834 for obj, path in objects: 

1835 self.add_object(obj) 

1836 

1837 def add_pack(self): 

1838 """Add a new pack to this object store. 

1839 

1840 Because this object store doesn't support packs, we extract and add the 

1841 individual objects. 

1842 

1843 Returns: Fileobject to write to and a commit function to 

1844 call when the pack is finished. 

1845 """ 

1846 from tempfile import SpooledTemporaryFile 

1847 

1848 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

1849 

1850 def commit() -> None: 

1851 size = f.tell() 

1852 if size > 0: 

1853 f.seek(0) 

1854 

1855 p = PackData.from_file(f, size) 

1856 for obj in PackInflater.for_pack_data(p, self.get_raw): 

1857 self.add_object(obj) 

1858 p.close() 

1859 f.close() 

1860 else: 

1861 f.close() 

1862 

1863 def abort() -> None: 

1864 f.close() 

1865 

1866 return f, commit, abort 

1867 

1868 def add_pack_data( 

1869 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

1870 ) -> None: 

1871 """Add pack data to this object store. 

1872 

1873 Args: 

1874 count: Number of items to add 

1875 unpacked_objects: Iterator of UnpackedObject instances 

1876 progress: Optional progress reporting function. 

1877 """ 

1878 if count == 0: 

1879 return 

1880 

1881 # Since MemoryObjectStore doesn't support pack files, we need to 

1882 # extract individual objects. To handle deltas properly, we write 

1883 # to a temporary pack and then use PackInflater to resolve them. 

1884 f, commit, abort = self.add_pack() 

1885 try: 

1886 write_pack_data( 

1887 f.write, 

1888 unpacked_objects, 

1889 num_records=count, 

1890 progress=progress, 

1891 ) 

1892 except BaseException: 

1893 abort() 

1894 raise 

1895 else: 

1896 commit() 

1897 

1898 def add_thin_pack(self, read_all, read_some, progress=None) -> None: 

1899 """Add a new thin pack to this object store. 

1900 

1901 Thin packs are packs that contain deltas with parents that exist 

1902 outside the pack. Because this object store doesn't support packs, we 

1903 extract and add the individual objects. 

1904 

1905 Args: 

1906 read_all: Read function that blocks until the number of 

1907 requested bytes are read. 

1908 read_some: Read function that returns at least one byte, but may 

1909 not return the number of bytes requested. 

1910 progress: Optional progress reporting function. 

1911 """ 

1912 f, commit, abort = self.add_pack() 

1913 try: 

1914 copier = PackStreamCopier(read_all, read_some, f) 

1915 copier.verify() 

1916 except BaseException: 

1917 abort() 

1918 raise 

1919 else: 

1920 commit() 

1921 

1922 

1923class ObjectIterator(Protocol): 

1924 """Interface for iterating over objects.""" 

1925 

1926 def iterobjects(self) -> Iterator[ShaFile]: 

1927 """Iterate over all objects. 

1928 

1929 Returns: 

1930 Iterator of ShaFile objects 

1931 """ 

1932 raise NotImplementedError(self.iterobjects) 

1933 

1934 

1935def tree_lookup_path(lookup_obj, root_sha, path): 

1936 """Look up an object in a Git tree. 

1937 

1938 Args: 

1939 lookup_obj: Callback for retrieving object by SHA1 

1940 root_sha: SHA1 of the root tree 

1941 path: Path to lookup 

1942 Returns: A tuple of (mode, SHA) of the resulting path. 

1943 """ 

1944 tree = lookup_obj(root_sha) 

1945 if not isinstance(tree, Tree): 

1946 raise NotTreeError(root_sha) 

1947 return tree.lookup_path(lookup_obj, path) 

1948 

1949 

1950def _collect_filetree_revs( 

1951 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

1952) -> None: 

1953 """Collect SHA1s of files and directories for specified tree. 

1954 

1955 Args: 

1956 obj_store: Object store to get objects by SHA from 

1957 tree_sha: tree reference to walk 

1958 kset: set to fill with references to files and directories 

1959 """ 

1960 filetree = obj_store[tree_sha] 

1961 assert isinstance(filetree, Tree) 

1962 for name, mode, sha in filetree.iteritems(): 

1963 if not S_ISGITLINK(mode) and sha not in kset: 

1964 kset.add(sha) 

1965 if stat.S_ISDIR(mode): 

1966 _collect_filetree_revs(obj_store, sha, kset) 

1967 

1968 

1969def _split_commits_and_tags( 

1970 obj_store: ObjectContainer, lst, *, ignore_unknown=False 

1971) -> tuple[set[bytes], set[bytes], set[bytes]]: 

1972 """Split object id list into three lists with commit, tag, and other SHAs. 

1973 

1974 Commits referenced by tags are included into commits 

1975 list as well. Only SHA1s known in this repository will get 

1976 through, and unless ignore_unknown argument is True, KeyError 

1977 is thrown for SHA1 missing in the repository 

1978 

1979 Args: 

1980 obj_store: Object store to get objects by SHA1 from 

1981 lst: Collection of commit and tag SHAs 

1982 ignore_unknown: True to skip SHA1 missing in the repository 

1983 silently. 

1984 Returns: A tuple of (commits, tags, others) SHA1s 

1985 """ 

1986 commits: set[bytes] = set() 

1987 tags: set[bytes] = set() 

1988 others: set[bytes] = set() 

1989 for e in lst: 

1990 try: 

1991 o = obj_store[e] 

1992 except KeyError: 

1993 if not ignore_unknown: 

1994 raise 

1995 else: 

1996 if isinstance(o, Commit): 

1997 commits.add(e) 

1998 elif isinstance(o, Tag): 

1999 tags.add(e) 

2000 tagged = o.object[1] 

2001 c, t, os = _split_commits_and_tags( 

2002 obj_store, [tagged], ignore_unknown=ignore_unknown 

2003 ) 

2004 commits |= c 

2005 tags |= t 

2006 others |= os 

2007 else: 

2008 others.add(e) 

2009 return (commits, tags, others) 

2010 

2011 

2012class MissingObjectFinder: 

2013 """Find the objects missing from another object store. 

2014 

2015 Args: 

2016 object_store: Object store containing at least all objects to be 

2017 sent 

2018 haves: SHA1s of commits not to send (already present in target) 

2019 wants: SHA1s of commits to send 

2020 progress: Optional function to report progress to. 

2021 get_tagged: Function that returns a dict of pointed-to sha -> tag 

2022 sha for including tags. 

2023 get_parents: Optional function for getting the parents of a commit. 

2024 """ 

2025 

2026 def __init__( 

2027 self, 

2028 object_store, 

2029 haves, 

2030 wants, 

2031 *, 

2032 shallow=None, 

2033 progress=None, 

2034 get_tagged=None, 

2035 get_parents=lambda commit: commit.parents, 

2036 ) -> None: 

2037 """Initialize a MissingObjectFinder. 

2038 

2039 Args: 

2040 object_store: Object store containing objects 

2041 haves: SHA1s of objects already present in target 

2042 wants: SHA1s of objects to send 

2043 shallow: Set of shallow commit SHA1s 

2044 progress: Optional progress reporting callback 

2045 get_tagged: Function returning dict of pointed-to sha -> tag sha 

2046 get_parents: Function for getting commit parents 

2047 """ 

2048 self.object_store = object_store 

2049 if shallow is None: 

2050 shallow = set() 

2051 self._get_parents = get_parents 

2052 # process Commits and Tags differently 

2053 # Note, while haves may list commits/tags not available locally, 

2054 # and such SHAs would get filtered out by _split_commits_and_tags, 

2055 # wants shall list only known SHAs, and otherwise 

2056 # _split_commits_and_tags fails with KeyError 

2057 have_commits, have_tags, have_others = _split_commits_and_tags( 

2058 object_store, haves, ignore_unknown=True 

2059 ) 

2060 want_commits, want_tags, want_others = _split_commits_and_tags( 

2061 object_store, wants, ignore_unknown=False 

2062 ) 

2063 # all_ancestors is a set of commits that shall not be sent 

2064 # (complete repository up to 'haves') 

2065 all_ancestors = _collect_ancestors( 

2066 object_store, have_commits, shallow=shallow, get_parents=self._get_parents 

2067 )[0] 

2068 # all_missing - complete set of commits between haves and wants 

2069 # common - commits from all_ancestors we hit into while 

2070 # traversing parent hierarchy of wants 

2071 missing_commits, common_commits = _collect_ancestors( 

2072 object_store, 

2073 want_commits, 

2074 all_ancestors, 

2075 shallow=shallow, 

2076 get_parents=self._get_parents, 

2077 ) 

2078 self.remote_has: set[bytes] = set() 

2079 # Now, fill sha_done with commits and revisions of 

2080 # files and directories known to be both locally 

2081 # and on target. Thus these commits and files 

2082 # won't get selected for fetch 

2083 for h in common_commits: 

2084 self.remote_has.add(h) 

2085 cmt = object_store[h] 

2086 _collect_filetree_revs(object_store, cmt.tree, self.remote_has) 

2087 # record tags we have as visited, too 

2088 for t in have_tags: 

2089 self.remote_has.add(t) 

2090 self.sha_done = set(self.remote_has) 

2091 

2092 # in fact, what we 'want' is commits, tags, and others 

2093 # we've found missing 

2094 self.objects_to_send: set[ 

2095 tuple[ObjectID, Optional[bytes], Optional[int], bool] 

2096 ] = {(w, None, Commit.type_num, False) for w in missing_commits} 

2097 missing_tags = want_tags.difference(have_tags) 

2098 self.objects_to_send.update( 

2099 {(w, None, Tag.type_num, False) for w in missing_tags} 

2100 ) 

2101 missing_others = want_others.difference(have_others) 

2102 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

2103 

2104 if progress is None: 

2105 self.progress = lambda x: None 

2106 else: 

2107 self.progress = progress 

2108 self._tagged = (get_tagged and get_tagged()) or {} 

2109 

2110 def get_remote_has(self): 

2111 """Get the set of SHAs the remote has. 

2112 

2113 Returns: 

2114 Set of SHA1s that the remote side already has 

2115 """ 

2116 return self.remote_has 

2117 

2118 def add_todo( 

2119 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]] 

2120 ) -> None: 

2121 """Add objects to the todo list. 

2122 

2123 Args: 

2124 entries: Iterable of tuples (sha, name, type_num, is_leaf) 

2125 """ 

2126 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

2127 

2128 def __next__(self) -> tuple[bytes, Optional[PackHint]]: 

2129 """Get the next object to send. 

2130 

2131 Returns: 

2132 Tuple of (sha, pack_hint) 

2133 

2134 Raises: 

2135 StopIteration: When no more objects to send 

2136 """ 

2137 while True: 

2138 if not self.objects_to_send: 

2139 self.progress( 

2140 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

2141 ) 

2142 raise StopIteration 

2143 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

2144 if sha not in self.sha_done: 

2145 break 

2146 if not leaf: 

2147 o = self.object_store[sha] 

2148 if isinstance(o, Commit): 

2149 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

2150 elif isinstance(o, Tree): 

2151 self.add_todo( 

2152 [ 

2153 ( 

2154 s, 

2155 n, 

2156 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

2157 not stat.S_ISDIR(m), 

2158 ) 

2159 for n, m, s in o.iteritems() 

2160 if not S_ISGITLINK(m) 

2161 ] 

2162 ) 

2163 elif isinstance(o, Tag): 

2164 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

2165 if sha in self._tagged: 

2166 self.add_todo([(self._tagged[sha], None, None, True)]) 

2167 self.sha_done.add(sha) 

2168 if len(self.sha_done) % 1000 == 0: 

2169 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

2170 if type_num is None: 

2171 pack_hint = None 

2172 else: 

2173 pack_hint = (type_num, name) 

2174 return (sha, pack_hint) 

2175 

2176 def __iter__(self): 

2177 """Return iterator over objects to send. 

2178 

2179 Returns: 

2180 Self (this class implements the iterator protocol) 

2181 """ 

2182 return self 

2183 

2184 

2185class ObjectStoreGraphWalker: 

2186 """Graph walker that finds what commits are missing from an object store.""" 

2187 

2188 heads: set[ObjectID] 

2189 """Revisions without descendants in the local repo.""" 

2190 

2191 get_parents: Callable[[ObjectID], list[ObjectID]] 

2192 """Function to retrieve parents in the local repo.""" 

2193 

2194 shallow: set[ObjectID] 

2195 

2196 def __init__( 

2197 self, 

2198 local_heads: Iterable[ObjectID], 

2199 get_parents, 

2200 shallow: Optional[set[ObjectID]] = None, 

2201 update_shallow=None, 

2202 ) -> None: 

2203 """Create a new instance. 

2204 

2205 Args: 

2206 local_heads: Heads to start search with 

2207 get_parents: Function for finding the parents of a SHA1. 

2208 shallow: Set of shallow commits. 

2209 update_shallow: Function to update shallow commits. 

2210 """ 

2211 self.heads = set(local_heads) 

2212 self.get_parents = get_parents 

2213 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {} 

2214 if shallow is None: 

2215 shallow = set() 

2216 self.shallow = shallow 

2217 self.update_shallow = update_shallow 

2218 

2219 def nak(self) -> None: 

2220 """Nothing in common was found.""" 

2221 

2222 def ack(self, sha: ObjectID) -> None: 

2223 """Ack that a revision and its ancestors are present in the source.""" 

2224 if len(sha) != 40: 

2225 raise ValueError(f"unexpected sha {sha!r} received") 

2226 ancestors = {sha} 

2227 

2228 # stop if we run out of heads to remove 

2229 while self.heads: 

2230 for a in ancestors: 

2231 if a in self.heads: 

2232 self.heads.remove(a) 

2233 

2234 # collect all ancestors 

2235 new_ancestors = set() 

2236 for a in ancestors: 

2237 ps = self.parents.get(a) 

2238 if ps is not None: 

2239 new_ancestors.update(ps) 

2240 self.parents[a] = None 

2241 

2242 # no more ancestors; stop 

2243 if not new_ancestors: 

2244 break 

2245 

2246 ancestors = new_ancestors 

2247 

2248 def next(self): 

2249 """Iterate over ancestors of heads in the target.""" 

2250 if self.heads: 

2251 ret = self.heads.pop() 

2252 try: 

2253 ps = self.get_parents(ret) 

2254 except KeyError: 

2255 return None 

2256 self.parents[ret] = ps 

2257 self.heads.update([p for p in ps if p not in self.parents]) 

2258 return ret 

2259 return None 

2260 

2261 __next__ = next 

2262 

2263 

2264def commit_tree_changes(object_store, tree, changes): 

2265 """Commit a specified set of changes to a tree structure. 

2266 

2267 This will apply a set of changes on top of an existing tree, storing new 

2268 objects in object_store. 

2269 

2270 changes are a list of tuples with (path, mode, object_sha). 

2271 Paths can be both blobs and trees. See the mode and 

2272 object sha to None deletes the path. 

2273 

2274 This method works especially well if there are only a small 

2275 number of changes to a big tree. For a large number of changes 

2276 to a large tree, use e.g. commit_tree. 

2277 

2278 Args: 

2279 object_store: Object store to store new objects in 

2280 and retrieve old ones from. 

2281 tree: Original tree root 

2282 changes: changes to apply 

2283 Returns: New tree root object 

2284 """ 

2285 # TODO(jelmer): Save up the objects and add them using .add_objects 

2286 # rather than with individual calls to .add_object. 

2287 nested_changes: dict[bytes, list[tuple[bytes, Optional[int], Optional[bytes]]]] = {} 

2288 for path, new_mode, new_sha in changes: 

2289 try: 

2290 (dirname, subpath) = path.split(b"/", 1) 

2291 except ValueError: 

2292 if new_sha is None: 

2293 del tree[path] 

2294 else: 

2295 tree[path] = (new_mode, new_sha) 

2296 else: 

2297 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

2298 for name, subchanges in nested_changes.items(): 

2299 try: 

2300 orig_subtree = object_store[tree[name][1]] 

2301 except KeyError: 

2302 orig_subtree = Tree() 

2303 subtree = commit_tree_changes(object_store, orig_subtree, subchanges) 

2304 if len(subtree) == 0: 

2305 del tree[name] 

2306 else: 

2307 tree[name] = (stat.S_IFDIR, subtree.id) 

2308 object_store.add_object(tree) 

2309 return tree 

2310 

2311 

2312class OverlayObjectStore(BaseObjectStore): 

2313 """Object store that can overlay multiple object stores.""" 

2314 

2315 def __init__(self, bases, add_store=None) -> None: 

2316 """Initialize an OverlayObjectStore. 

2317 

2318 Args: 

2319 bases: List of base object stores to overlay 

2320 add_store: Optional store to write new objects to 

2321 """ 

2322 self.bases = bases 

2323 self.add_store = add_store 

2324 

2325 def add_object(self, object): 

2326 """Add a single object to the store. 

2327 

2328 Args: 

2329 object: Object to add 

2330 

2331 Raises: 

2332 NotImplementedError: If no add_store was provided 

2333 """ 

2334 if self.add_store is None: 

2335 raise NotImplementedError(self.add_object) 

2336 return self.add_store.add_object(object) 

2337 

2338 def add_objects(self, objects, progress=None): 

2339 """Add multiple objects to the store. 

2340 

2341 Args: 

2342 objects: Iterator of objects to add 

2343 progress: Optional progress reporting callback 

2344 

2345 Raises: 

2346 NotImplementedError: If no add_store was provided 

2347 """ 

2348 if self.add_store is None: 

2349 raise NotImplementedError(self.add_object) 

2350 return self.add_store.add_objects(objects, progress) 

2351 

2352 @property 

2353 def packs(self): 

2354 """Get the list of packs from all overlaid stores. 

2355 

2356 Returns: 

2357 Combined list of packs from all base stores 

2358 """ 

2359 ret = [] 

2360 for b in self.bases: 

2361 ret.extend(b.packs) 

2362 return ret 

2363 

2364 def __iter__(self): 

2365 """Iterate over all object SHAs in the overlaid stores. 

2366 

2367 Returns: 

2368 Iterator of object SHAs (deduped across stores) 

2369 """ 

2370 done = set() 

2371 for b in self.bases: 

2372 for o_id in b: 

2373 if o_id not in done: 

2374 yield o_id 

2375 done.add(o_id) 

2376 

2377 def iterobjects_subset( 

2378 self, shas: Iterable[bytes], *, allow_missing: bool = False 

2379 ) -> Iterator[ShaFile]: 

2380 """Iterate over a subset of objects from the overlaid stores. 

2381 

2382 Args: 

2383 shas: Iterable of object SHAs to retrieve 

2384 allow_missing: If True, skip missing objects; if False, raise KeyError 

2385 

2386 Returns: 

2387 Iterator of ShaFile objects 

2388 

2389 Raises: 

2390 KeyError: If an object is missing and allow_missing is False 

2391 """ 

2392 todo = set(shas) 

2393 found: set[bytes] = set() 

2394 

2395 for b in self.bases: 

2396 # Create a copy of todo for each base to avoid modifying 

2397 # the set while iterating through it 

2398 current_todo = todo - found 

2399 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

2400 yield o 

2401 found.add(o.id) 

2402 

2403 # Check for any remaining objects not found 

2404 missing = todo - found 

2405 if missing and not allow_missing: 

2406 raise KeyError(next(iter(missing))) 

2407 

2408 def iter_unpacked_subset( 

2409 self, 

2410 shas: Iterable[bytes], 

2411 *, 

2412 include_comp=False, 

2413 allow_missing: bool = False, 

2414 convert_ofs_delta=True, 

2415 ) -> Iterator[ShaFile]: 

2416 """Iterate over unpacked objects from the overlaid stores. 

2417 

2418 Args: 

2419 shas: Iterable of object SHAs to retrieve 

2420 include_comp: Whether to include compressed data 

2421 allow_missing: If True, skip missing objects; if False, raise KeyError 

2422 convert_ofs_delta: Whether to convert OFS_DELTA objects 

2423 

2424 Returns: 

2425 Iterator of unpacked objects 

2426 

2427 Raises: 

2428 KeyError: If an object is missing and allow_missing is False 

2429 """ 

2430 todo = set(shas) 

2431 for b in self.bases: 

2432 for o in b.iter_unpacked_subset( 

2433 todo, 

2434 include_comp=include_comp, 

2435 allow_missing=True, 

2436 convert_ofs_delta=convert_ofs_delta, 

2437 ): 

2438 yield o 

2439 todo.remove(o.id) 

2440 if todo and not allow_missing: 

2441 raise KeyError(o.id) 

2442 

2443 def get_raw(self, sha_id): 

2444 """Get the raw object data from the overlaid stores. 

2445 

2446 Args: 

2447 sha_id: SHA of the object 

2448 

2449 Returns: 

2450 Tuple of (type_num, raw_data) 

2451 

2452 Raises: 

2453 KeyError: If object not found in any base store 

2454 """ 

2455 for b in self.bases: 

2456 try: 

2457 return b.get_raw(sha_id) 

2458 except KeyError: 

2459 pass 

2460 raise KeyError(sha_id) 

2461 

2462 def contains_packed(self, sha) -> bool: 

2463 """Check if an object is packed in any base store. 

2464 

2465 Args: 

2466 sha: SHA of the object 

2467 

2468 Returns: 

2469 True if object is packed in any base store 

2470 """ 

2471 for b in self.bases: 

2472 if b.contains_packed(sha): 

2473 return True 

2474 return False 

2475 

2476 def contains_loose(self, sha) -> bool: 

2477 """Check if an object is loose in any base store. 

2478 

2479 Args: 

2480 sha: SHA of the object 

2481 

2482 Returns: 

2483 True if object is loose in any base store 

2484 """ 

2485 for b in self.bases: 

2486 if b.contains_loose(sha): 

2487 return True 

2488 return False 

2489 

2490 

2491def read_packs_file(f): 

2492 """Yield the packs listed in a packs file.""" 

2493 for line in f.read().splitlines(): 

2494 if not line: 

2495 continue 

2496 (kind, name) = line.split(b" ", 1) 

2497 if kind != b"P": 

2498 continue 

2499 yield os.fsdecode(name) 

2500 

2501 

2502class BucketBasedObjectStore(PackBasedObjectStore): 

2503 """Object store implementation that uses a bucket store like S3 as backend.""" 

2504 

2505 def _iter_loose_objects(self): 

2506 """Iterate over the SHAs of all loose objects.""" 

2507 return iter([]) 

2508 

2509 def _get_loose_object(self, sha) -> None: 

2510 return None 

2511 

2512 def delete_loose_object(self, sha) -> None: 

2513 """Delete a loose object (no-op for bucket stores). 

2514 

2515 Bucket-based stores don't have loose objects, so this is a no-op. 

2516 

2517 Args: 

2518 sha: SHA of the object to delete 

2519 """ 

2520 # Doesn't exist.. 

2521 

2522 def pack_loose_objects(self, progress: Optional[Callable] = None) -> int: 

2523 """Pack loose objects. Returns number of objects packed. 

2524 

2525 BucketBasedObjectStore doesn't support loose objects, so this is a no-op. 

2526 

2527 Args: 

2528 progress: Optional progress reporting callback (ignored) 

2529 """ 

2530 return 0 

2531 

2532 def _remove_pack_by_name(self, name: str) -> None: 

2533 """Remove a pack by name. Subclasses should implement this.""" 

2534 raise NotImplementedError(self._remove_pack_by_name) 

2535 

2536 def _iter_pack_names(self) -> Iterator[str]: 

2537 raise NotImplementedError(self._iter_pack_names) 

2538 

2539 def _get_pack(self, name) -> Pack: 

2540 raise NotImplementedError(self._get_pack) 

2541 

2542 def _update_pack_cache(self): 

2543 pack_files = set(self._iter_pack_names()) 

2544 

2545 # Open newly appeared pack files 

2546 new_packs = [] 

2547 for f in pack_files: 

2548 if f not in self._pack_cache: 

2549 pack = self._get_pack(f) 

2550 new_packs.append(pack) 

2551 self._pack_cache[f] = pack 

2552 # Remove disappeared pack files 

2553 for f in set(self._pack_cache) - pack_files: 

2554 self._pack_cache.pop(f).close() 

2555 return new_packs 

2556 

2557 def _upload_pack(self, basename, pack_file, index_file) -> None: 

2558 raise NotImplementedError 

2559 

2560 def add_pack(self): 

2561 """Add a new pack to this object store. 

2562 

2563 Returns: Fileobject to write to, a commit function to 

2564 call when the pack is finished and an abort 

2565 function. 

2566 """ 

2567 import tempfile 

2568 

2569 pf = tempfile.SpooledTemporaryFile( 

2570 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2571 ) 

2572 

2573 def commit(): 

2574 if pf.tell() == 0: 

2575 pf.close() 

2576 return None 

2577 

2578 pf.seek(0) 

2579 

2580 p = PackData(pf.name, pf) 

2581 entries = p.sorted_entries() 

2582 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

2583 idxf = tempfile.SpooledTemporaryFile( 

2584 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2585 ) 

2586 checksum = p.get_stored_checksum() 

2587 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

2588 idxf.seek(0) 

2589 idx = load_pack_index_file(basename + ".idx", idxf) 

2590 for pack in self.packs: 

2591 if pack.get_stored_checksum() == p.get_stored_checksum(): 

2592 p.close() 

2593 idx.close() 

2594 pf.close() 

2595 idxf.close() 

2596 return pack 

2597 pf.seek(0) 

2598 idxf.seek(0) 

2599 self._upload_pack(basename, pf, idxf) 

2600 final_pack = Pack.from_objects(p, idx) 

2601 self._add_cached_pack(basename, final_pack) 

2602 pf.close() 

2603 idxf.close() 

2604 return final_pack 

2605 

2606 return pf, commit, pf.close 

2607 

2608 

2609def _collect_ancestors( 

2610 store: ObjectContainer, 

2611 heads, 

2612 common: frozenset[ObjectID] = frozenset(), 

2613 shallow: frozenset[ObjectID] = frozenset(), 

2614 get_parents=lambda commit: commit.parents, 

2615): 

2616 """Collect all ancestors of heads up to (excluding) those in common. 

2617 

2618 Args: 

2619 store: Object store to get commits from 

2620 heads: commits to start from 

2621 common: commits to end at, or empty set to walk repository 

2622 completely 

2623 shallow: Set of shallow commits 

2624 get_parents: Optional function for getting the parents of a 

2625 commit. 

2626 Returns: a tuple (A, B) where A - all commits reachable 

2627 from heads but not present in common, B - common (shared) elements 

2628 that are directly reachable from heads 

2629 """ 

2630 bases = set() 

2631 commits = set() 

2632 queue = [] 

2633 queue.extend(heads) 

2634 

2635 # Try to use commit graph if available 

2636 commit_graph = store.get_commit_graph() 

2637 

2638 while queue: 

2639 e = queue.pop(0) 

2640 if e in common: 

2641 bases.add(e) 

2642 elif e not in commits: 

2643 commits.add(e) 

2644 if e in shallow: 

2645 continue 

2646 

2647 # Try to use commit graph for parent lookup 

2648 parents = None 

2649 if commit_graph: 

2650 parents = commit_graph.get_parents(e) 

2651 

2652 if parents is None: 

2653 # Fall back to loading the object 

2654 cmt = store[e] 

2655 parents = get_parents(cmt) 

2656 

2657 queue.extend(parents) 

2658 return (commits, bases) 

2659 

2660 

2661def iter_tree_contents( 

2662 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False 

2663): 

2664 """Iterate the contents of a tree and all subtrees. 

2665 

2666 Iteration is depth-first pre-order, as in e.g. os.walk. 

2667 

2668 Args: 

2669 store: Object store to get trees from 

2670 tree_id: SHA1 of the tree. 

2671 include_trees: If True, include tree objects in the iteration. 

2672 

2673 Yields: TreeEntry namedtuples for all the objects in a tree. 

2674 """ 

2675 if tree_id is None: 

2676 return 

2677 # This could be fairly easily generalized to >2 trees if we find a use 

2678 # case. 

2679 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

2680 while todo: 

2681 entry = todo.pop() 

2682 if stat.S_ISDIR(entry.mode): 

2683 extra = [] 

2684 tree = store[entry.sha] 

2685 assert isinstance(tree, Tree) 

2686 for subentry in tree.iteritems(name_order=True): 

2687 extra.append(subentry.in_path(entry.path)) 

2688 todo.extend(reversed(extra)) 

2689 if not stat.S_ISDIR(entry.mode) or include_trees: 

2690 yield entry 

2691 

2692 

2693def iter_commit_contents( 

2694 store: ObjectContainer, 

2695 commit: Union[Commit, bytes], 

2696 *, 

2697 include: Optional[Sequence[Union[str, bytes, Path]]] = None, 

2698): 

2699 """Iterate the contents of the repository at the specified commit. 

2700 

2701 This is a wrapper around iter_tree_contents() and 

2702 tree_lookup_path() to simplify the common task of getting the 

2703 contest of a repo at a particular commit. See also 

2704 dulwich.index.build_file_from_blob() for writing individual files 

2705 to disk. 

2706 

2707 Args: 

2708 store: Object store to get trees from 

2709 commit: Commit object, or SHA1 of a commit 

2710 include: if provided, only the entries whose paths are in the 

2711 list, or whose parent tree is in the list, will be 

2712 included. Note that duplicate or overlapping paths 

2713 (e.g. ["foo", "foo/bar"]) may result in duplicate entries 

2714 

2715 Yields: TreeEntry namedtuples for all matching files in a commit. 

2716 """ 

2717 sha = commit.id if isinstance(commit, Commit) else commit 

2718 if not isinstance(obj := store[sha], Commit): 

2719 raise TypeError( 

2720 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}" 

2721 ) 

2722 commit = obj 

2723 encoding = commit.encoding or "utf-8" 

2724 include_bytes: list[bytes] = ( 

2725 [ 

2726 path if isinstance(path, bytes) else str(path).encode(encoding) 

2727 for path in include 

2728 ] 

2729 if include is not None 

2730 else [b""] 

2731 ) 

2732 

2733 for path in include_bytes: 

2734 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path) 

2735 # Iterate all contained files if path points to a dir, otherwise just get that 

2736 # single file 

2737 if isinstance(store[obj_id], Tree): 

2738 for entry in iter_tree_contents(store, obj_id): 

2739 yield entry.in_path(path) 

2740 else: 

2741 yield TreeEntry(path, mode, obj_id) 

2742 

2743 

2744def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]: 

2745 """Peel all tags from a SHA. 

2746 

2747 Args: 

2748 store: Object store to get objects from 

2749 sha: The object SHA to peel. 

2750 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

2751 intermediate tags; if the original ref does not point to a tag, 

2752 this will equal the original SHA1. 

2753 """ 

2754 unpeeled = obj = store[sha] 

2755 obj_class = object_class(obj.type_name) 

2756 while obj_class is Tag: 

2757 assert isinstance(obj, Tag) 

2758 obj_class, sha = obj.object 

2759 obj = store[sha] 

2760 return unpeeled, obj