Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1171 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26import binascii 

27import os 

28import stat 

29import sys 

30import time 

31import warnings 

32from collections.abc import Iterable, Iterator, Sequence 

33from contextlib import suppress 

34from io import BytesIO 

35from pathlib import Path 

36from typing import ( 

37 TYPE_CHECKING, 

38 Callable, 

39 Optional, 

40 Protocol, 

41 Union, 

42) 

43 

44from .errors import NotTreeError 

45from .file import GitFile, _GitFile 

46from .objects import ( 

47 S_ISGITLINK, 

48 ZERO_SHA, 

49 Blob, 

50 Commit, 

51 ObjectID, 

52 ShaFile, 

53 Tag, 

54 Tree, 

55 TreeEntry, 

56 hex_to_filename, 

57 hex_to_sha, 

58 object_class, 

59 sha_to_hex, 

60 valid_hexsha, 

61) 

62from .pack import ( 

63 PACK_SPOOL_FILE_MAX_SIZE, 

64 ObjectContainer, 

65 Pack, 

66 PackData, 

67 PackedObjectContainer, 

68 PackFileDisappeared, 

69 PackHint, 

70 PackIndexer, 

71 PackInflater, 

72 PackStreamCopier, 

73 UnpackedObject, 

74 extend_pack, 

75 full_unpacked_object, 

76 generate_unpacked_objects, 

77 iter_sha1, 

78 load_pack_index_file, 

79 pack_objects_to_data, 

80 write_pack_data, 

81 write_pack_index, 

82) 

83from .protocol import DEPTH_INFINITE 

84from .refs import PEELED_TAG_SUFFIX, Ref 

85 

86if TYPE_CHECKING: 

87 from .commit_graph import CommitGraph 

88 from .diff_tree import RenameDetector 

89 

90 

91class GraphWalker(Protocol): 

92 """Protocol for graph walker objects.""" 

93 

94 def __next__(self) -> Optional[bytes]: 

95 """Return the next object SHA to visit.""" 

96 ... 

97 

98 def ack(self, sha: bytes) -> None: 

99 """Acknowledge that an object has been received.""" 

100 ... 

101 

102 

103INFODIR = "info" 

104PACKDIR = "pack" 

105 

106# use permissions consistent with Git; just readable by everyone 

107# TODO: should packs also be non-writable on Windows? if so, that 

108# would requite some rather significant adjustments to the test suite 

109PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

110 

111# Grace period for cleaning up temporary pack files (in seconds) 

112# Matches git's default of 2 weeks 

113DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

114 

115 

116def find_shallow( 

117 store: ObjectContainer, heads: Iterable[bytes], depth: int 

118) -> tuple[set[bytes], set[bytes]]: 

119 """Find shallow commits according to a given depth. 

120 

121 Args: 

122 store: An ObjectStore for looking up objects. 

123 heads: Iterable of head SHAs to start walking from. 

124 depth: The depth of ancestors to include. A depth of one includes 

125 only the heads themselves. 

126 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

127 considered shallow and unshallow according to the arguments. Note that 

128 these sets may overlap if a commit is reachable along multiple paths. 

129 """ 

130 parents: dict[bytes, list[bytes]] = {} 

131 commit_graph = store.get_commit_graph() 

132 

133 def get_parents(sha: bytes) -> list[bytes]: 

134 result = parents.get(sha, None) 

135 if not result: 

136 # Try to use commit graph first if available 

137 if commit_graph: 

138 graph_parents = commit_graph.get_parents(sha) 

139 if graph_parents is not None: 

140 result = graph_parents 

141 parents[sha] = result 

142 return result 

143 # Fall back to loading the object 

144 commit = store[sha] 

145 assert isinstance(commit, Commit) 

146 result = commit.parents 

147 parents[sha] = result 

148 return result 

149 

150 todo = [] # stack of (sha, depth) 

151 for head_sha in heads: 

152 obj = store[head_sha] 

153 # Peel tags if necessary 

154 while isinstance(obj, Tag): 

155 _, sha = obj.object 

156 obj = store[sha] 

157 if isinstance(obj, Commit): 

158 todo.append((obj.id, 1)) 

159 

160 not_shallow = set() 

161 shallow = set() 

162 while todo: 

163 sha, cur_depth = todo.pop() 

164 if cur_depth < depth: 

165 not_shallow.add(sha) 

166 new_depth = cur_depth + 1 

167 todo.extend((p, new_depth) for p in get_parents(sha)) 

168 else: 

169 shallow.add(sha) 

170 

171 return shallow, not_shallow 

172 

173 

174def get_depth( 

175 store: ObjectContainer, 

176 head: bytes, 

177 get_parents: Callable = lambda commit: commit.parents, 

178 max_depth: Optional[int] = None, 

179) -> int: 

180 """Return the current available depth for the given head. 

181 

182 For commits with multiple parents, the largest possible depth will be 

183 returned. 

184 

185 Args: 

186 store: Object store to search in 

187 head: commit to start from 

188 get_parents: optional function for getting the parents of a commit 

189 max_depth: maximum depth to search 

190 """ 

191 if head not in store: 

192 return 0 

193 current_depth = 1 

194 queue = [(head, current_depth)] 

195 commit_graph = store.get_commit_graph() 

196 

197 while queue and (max_depth is None or current_depth < max_depth): 

198 e, depth = queue.pop(0) 

199 current_depth = max(current_depth, depth) 

200 

201 # Try to use commit graph for parent lookup if available 

202 parents = None 

203 if commit_graph: 

204 parents = commit_graph.get_parents(e) 

205 

206 if parents is None: 

207 # Fall back to loading the object 

208 cmt = store[e] 

209 if isinstance(cmt, Tag): 

210 _cls, sha = cmt.object 

211 cmt = store[sha] 

212 parents = get_parents(cmt) 

213 

214 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

215 return current_depth 

216 

217 

218class PackContainer(Protocol): 

219 """Protocol for containers that can accept pack files.""" 

220 

221 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

222 """Add a new pack.""" 

223 

224 

225class BaseObjectStore: 

226 """Object store interface.""" 

227 

228 def determine_wants_all( 

229 self, refs: dict[Ref, ObjectID], depth: Optional[int] = None 

230 ) -> list[ObjectID]: 

231 """Determine which objects are wanted based on refs.""" 

232 

233 def _want_deepen(sha: bytes) -> bool: 

234 if not depth: 

235 return False 

236 if depth == DEPTH_INFINITE: 

237 return True 

238 return depth > self._get_depth(sha) 

239 

240 return [ 

241 sha 

242 for (ref, sha) in refs.items() 

243 if (sha not in self or _want_deepen(sha)) 

244 and not ref.endswith(PEELED_TAG_SUFFIX) 

245 and not sha == ZERO_SHA 

246 ] 

247 

248 def contains_loose(self, sha: bytes) -> bool: 

249 """Check if a particular object is present by SHA1 and is loose.""" 

250 raise NotImplementedError(self.contains_loose) 

251 

252 def __contains__(self, sha1: bytes) -> bool: 

253 """Check if a particular object is present by SHA1. 

254 

255 This method makes no distinction between loose and packed objects. 

256 """ 

257 return self.contains_loose(sha1) 

258 

259 @property 

260 def packs(self) -> list[Pack]: 

261 """Iterable of pack objects.""" 

262 raise NotImplementedError 

263 

264 def get_raw(self, name: bytes) -> tuple[int, bytes]: 

265 """Obtain the raw text for an object. 

266 

267 Args: 

268 name: sha for the object. 

269 Returns: tuple with numeric type and object contents. 

270 """ 

271 raise NotImplementedError(self.get_raw) 

272 

273 def __getitem__(self, sha1: ObjectID) -> ShaFile: 

274 """Obtain an object by SHA1.""" 

275 type_num, uncomp = self.get_raw(sha1) 

276 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

277 

278 def __iter__(self) -> Iterator[bytes]: 

279 """Iterate over the SHAs that are present in this store.""" 

280 raise NotImplementedError(self.__iter__) 

281 

282 def add_object(self, obj: ShaFile) -> None: 

283 """Add a single object to this object store.""" 

284 raise NotImplementedError(self.add_object) 

285 

286 def add_objects( 

287 self, 

288 objects: Sequence[tuple[ShaFile, Optional[str]]], 

289 progress: Optional[Callable] = None, 

290 ) -> Optional["Pack"]: 

291 """Add a set of objects to this object store. 

292 

293 Args: 

294 objects: Iterable over a list of (object, path) tuples 

295 progress: Optional progress callback 

296 """ 

297 raise NotImplementedError(self.add_objects) 

298 

299 def tree_changes( 

300 self, 

301 source: Optional[bytes], 

302 target: Optional[bytes], 

303 want_unchanged: bool = False, 

304 include_trees: bool = False, 

305 change_type_same: bool = False, 

306 rename_detector: Optional["RenameDetector"] = None, 

307 paths: Optional[list[bytes]] = None, 

308 ) -> Iterator[ 

309 tuple[ 

310 tuple[Optional[bytes], Optional[bytes]], 

311 tuple[Optional[int], Optional[int]], 

312 tuple[Optional[bytes], Optional[bytes]], 

313 ] 

314 ]: 

315 """Find the differences between the contents of two trees. 

316 

317 Args: 

318 source: SHA1 of the source tree 

319 target: SHA1 of the target tree 

320 want_unchanged: Whether unchanged files should be reported 

321 include_trees: Whether to include trees 

322 change_type_same: Whether to report files changing 

323 type in the same entry. 

324 rename_detector: RenameDetector object for detecting renames. 

325 paths: Optional list of paths to filter to (as bytes). 

326 Returns: Iterator over tuples with 

327 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

328 """ 

329 from .diff_tree import tree_changes 

330 

331 for change in tree_changes( 

332 self, 

333 source, 

334 target, 

335 want_unchanged=want_unchanged, 

336 include_trees=include_trees, 

337 change_type_same=change_type_same, 

338 rename_detector=rename_detector, 

339 paths=paths, 

340 ): 

341 yield ( 

342 (change.old.path, change.new.path), 

343 (change.old.mode, change.new.mode), 

344 (change.old.sha, change.new.sha), 

345 ) 

346 

347 def iter_tree_contents( 

348 self, tree_id: bytes, include_trees: bool = False 

349 ) -> Iterator[tuple[bytes, int, bytes]]: 

350 """Iterate the contents of a tree and all subtrees. 

351 

352 Iteration is depth-first pre-order, as in e.g. os.walk. 

353 

354 Args: 

355 tree_id: SHA1 of the tree. 

356 include_trees: If True, include tree objects in the iteration. 

357 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

358 tree. 

359 """ 

360 warnings.warn( 

361 "Please use dulwich.object_store.iter_tree_contents", 

362 DeprecationWarning, 

363 stacklevel=2, 

364 ) 

365 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

366 

367 def iterobjects_subset( 

368 self, shas: Iterable[bytes], *, allow_missing: bool = False 

369 ) -> Iterator[ShaFile]: 

370 """Iterate over a subset of objects in the store. 

371 

372 Args: 

373 shas: Iterable of object SHAs to retrieve 

374 allow_missing: If True, skip missing objects; if False, raise KeyError 

375 

376 Returns: 

377 Iterator of ShaFile objects 

378 

379 Raises: 

380 KeyError: If an object is missing and allow_missing is False 

381 """ 

382 for sha in shas: 

383 try: 

384 yield self[sha] 

385 except KeyError: 

386 if not allow_missing: 

387 raise 

388 

389 def find_missing_objects( 

390 self, 

391 haves: Iterable[bytes], 

392 wants: Iterable[bytes], 

393 shallow: Optional[set[bytes]] = None, 

394 progress: Optional[Callable] = None, 

395 get_tagged: Optional[Callable] = None, 

396 get_parents: Callable = lambda commit: commit.parents, 

397 ) -> Iterator[tuple[bytes, Optional[bytes]]]: 

398 """Find the missing objects required for a set of revisions. 

399 

400 Args: 

401 haves: Iterable over SHAs already in common. 

402 wants: Iterable over SHAs of objects to fetch. 

403 shallow: Set of shallow commit SHA1s to skip 

404 progress: Simple progress function that will be called with 

405 updated progress strings. 

406 get_tagged: Function that returns a dict of pointed-to sha -> 

407 tag sha for including tags. 

408 get_parents: Optional function for getting the parents of a 

409 commit. 

410 Returns: Iterator over (sha, path) pairs. 

411 """ 

412 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

413 finder = MissingObjectFinder( 

414 self, 

415 haves=haves, 

416 wants=wants, 

417 shallow=shallow, 

418 progress=progress, 

419 get_tagged=get_tagged, 

420 get_parents=get_parents, 

421 ) 

422 return iter(finder) 

423 

424 def find_common_revisions(self, graphwalker: GraphWalker) -> list[bytes]: 

425 """Find which revisions this store has in common using graphwalker. 

426 

427 Args: 

428 graphwalker: A graphwalker object. 

429 Returns: List of SHAs that are in common 

430 """ 

431 haves = [] 

432 sha = next(graphwalker) 

433 while sha: 

434 if sha in self: 

435 haves.append(sha) 

436 graphwalker.ack(sha) 

437 sha = next(graphwalker) 

438 return haves 

439 

440 def generate_pack_data( 

441 self, 

442 have: Iterable[bytes], 

443 want: Iterable[bytes], 

444 shallow: Optional[set[bytes]] = None, 

445 progress: Optional[Callable] = None, 

446 ofs_delta: bool = True, 

447 ) -> tuple[int, Iterator[UnpackedObject]]: 

448 """Generate pack data objects for a set of wants/haves. 

449 

450 Args: 

451 have: List of SHA1s of objects that should not be sent 

452 want: List of SHA1s of objects that should be sent 

453 shallow: Set of shallow commit SHA1s to skip 

454 ofs_delta: Whether OFS deltas can be included 

455 progress: Optional progress reporting method 

456 """ 

457 # Note that the pack-specific implementation below is more efficient, 

458 # as it reuses deltas 

459 missing_objects = MissingObjectFinder( 

460 self, haves=have, wants=want, shallow=shallow, progress=progress 

461 ) 

462 object_ids = list(missing_objects) 

463 return pack_objects_to_data( 

464 [(self[oid], path) for oid, path in object_ids], 

465 ofs_delta=ofs_delta, 

466 progress=progress, 

467 ) 

468 

469 def peel_sha(self, sha: bytes) -> bytes: 

470 """Peel all tags from a SHA. 

471 

472 Args: 

473 sha: The object SHA to peel. 

474 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

475 intermediate tags; if the original ref does not point to a tag, 

476 this will equal the original SHA1. 

477 """ 

478 warnings.warn( 

479 "Please use dulwich.object_store.peel_sha()", 

480 DeprecationWarning, 

481 stacklevel=2, 

482 ) 

483 return peel_sha(self, sha)[1].id 

484 

485 def _get_depth( 

486 self, 

487 head: bytes, 

488 get_parents: Callable = lambda commit: commit.parents, 

489 max_depth: Optional[int] = None, 

490 ) -> int: 

491 """Return the current available depth for the given head. 

492 

493 For commits with multiple parents, the largest possible depth will be 

494 returned. 

495 

496 Args: 

497 head: commit to start from 

498 get_parents: optional function for getting the parents of a commit 

499 max_depth: maximum depth to search 

500 """ 

501 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

502 

503 def close(self) -> None: 

504 """Close any files opened by this object store.""" 

505 # Default implementation is a NO-OP 

506 

507 def prune(self, grace_period: Optional[int] = None) -> None: 

508 """Prune/clean up this object store. 

509 

510 This includes removing orphaned temporary files and other 

511 housekeeping tasks. Default implementation is a NO-OP. 

512 

513 Args: 

514 grace_period: Grace period in seconds for removing temporary files. 

515 If None, uses the default grace period. 

516 """ 

517 # Default implementation is a NO-OP 

518 

519 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

520 """Iterate over all SHA1s that start with a given prefix. 

521 

522 The default implementation is a naive iteration over all objects. 

523 However, subclasses may override this method with more efficient 

524 implementations. 

525 """ 

526 for sha in self: 

527 if sha.startswith(prefix): 

528 yield sha 

529 

530 def get_commit_graph(self) -> Optional["CommitGraph"]: 

531 """Get the commit graph for this object store. 

532 

533 Returns: 

534 CommitGraph object if available, None otherwise 

535 """ 

536 return None 

537 

538 def write_commit_graph( 

539 self, refs: Optional[list[bytes]] = None, reachable: bool = True 

540 ) -> None: 

541 """Write a commit graph file for this object store. 

542 

543 Args: 

544 refs: List of refs to include. If None, includes all refs from object store. 

545 reachable: If True, includes all commits reachable from refs. 

546 If False, only includes the direct ref targets. 

547 

548 Note: 

549 Default implementation does nothing. Subclasses should override 

550 this method to provide commit graph writing functionality. 

551 """ 

552 raise NotImplementedError(self.write_commit_graph) 

553 

554 def get_object_mtime(self, sha: bytes) -> float: 

555 """Get the modification time of an object. 

556 

557 Args: 

558 sha: SHA1 of the object 

559 

560 Returns: 

561 Modification time as seconds since epoch 

562 

563 Raises: 

564 KeyError: if the object is not found 

565 """ 

566 # Default implementation raises KeyError 

567 # Subclasses should override to provide actual mtime 

568 raise KeyError(sha) 

569 

570 

571class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer): 

572 """Object store that uses pack files for storage. 

573 

574 This class provides a base implementation for object stores that use 

575 Git pack files as their primary storage mechanism. It handles caching 

576 of open pack files and provides configuration for pack file operations. 

577 """ 

578 

579 def __init__( 

580 self, 

581 pack_compression_level: int = -1, 

582 pack_index_version: Optional[int] = None, 

583 pack_delta_window_size: Optional[int] = None, 

584 pack_window_memory: Optional[int] = None, 

585 pack_delta_cache_size: Optional[int] = None, 

586 pack_depth: Optional[int] = None, 

587 pack_threads: Optional[int] = None, 

588 pack_big_file_threshold: Optional[int] = None, 

589 ) -> None: 

590 """Initialize a PackBasedObjectStore. 

591 

592 Args: 

593 pack_compression_level: Compression level for pack files (-1 to 9) 

594 pack_index_version: Pack index version to use 

595 pack_delta_window_size: Window size for delta compression 

596 pack_window_memory: Maximum memory to use for delta window 

597 pack_delta_cache_size: Cache size for delta operations 

598 pack_depth: Maximum depth for pack deltas 

599 pack_threads: Number of threads to use for packing 

600 pack_big_file_threshold: Threshold for treating files as "big" 

601 """ 

602 self._pack_cache: dict[str, Pack] = {} 

603 self.pack_compression_level = pack_compression_level 

604 self.pack_index_version = pack_index_version 

605 self.pack_delta_window_size = pack_delta_window_size 

606 self.pack_window_memory = pack_window_memory 

607 self.pack_delta_cache_size = pack_delta_cache_size 

608 self.pack_depth = pack_depth 

609 self.pack_threads = pack_threads 

610 self.pack_big_file_threshold = pack_big_file_threshold 

611 

612 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

613 """Add a new pack to this object store.""" 

614 raise NotImplementedError(self.add_pack) 

615 

616 def add_pack_data( 

617 self, 

618 count: int, 

619 unpacked_objects: Iterator[UnpackedObject], 

620 progress: Optional[Callable] = None, 

621 ) -> Optional["Pack"]: 

622 """Add pack data to this object store. 

623 

624 Args: 

625 count: Number of items to add 

626 unpacked_objects: Iterator of UnpackedObject instances 

627 progress: Optional progress callback 

628 """ 

629 if count == 0: 

630 # Don't bother writing an empty pack file 

631 return None 

632 f, commit, abort = self.add_pack() 

633 try: 

634 write_pack_data( 

635 f.write, 

636 unpacked_objects, 

637 num_records=count, 

638 progress=progress, 

639 compression_level=self.pack_compression_level, 

640 ) 

641 except BaseException: 

642 abort() 

643 raise 

644 else: 

645 return commit() 

646 

647 @property 

648 def alternates(self) -> list: 

649 """Return list of alternate object stores.""" 

650 return [] 

651 

652 def contains_packed(self, sha: bytes) -> bool: 

653 """Check if a particular object is present by SHA1 and is packed. 

654 

655 This does not check alternates. 

656 """ 

657 for pack in self.packs: 

658 try: 

659 if sha in pack: 

660 return True 

661 except PackFileDisappeared: 

662 pass 

663 return False 

664 

665 def __contains__(self, sha) -> bool: 

666 """Check if a particular object is present by SHA1. 

667 

668 This method makes no distinction between loose and packed objects. 

669 """ 

670 if self.contains_packed(sha) or self.contains_loose(sha): 

671 return True 

672 for alternate in self.alternates: 

673 if sha in alternate: 

674 return True 

675 return False 

676 

677 def _add_cached_pack(self, base_name: str, pack: Pack) -> None: 

678 """Add a newly appeared pack to the cache by path.""" 

679 prev_pack = self._pack_cache.get(base_name) 

680 if prev_pack is not pack: 

681 self._pack_cache[base_name] = pack 

682 if prev_pack: 

683 prev_pack.close() 

684 

685 def generate_pack_data( 

686 self, have, want, shallow=None, progress=None, ofs_delta=True 

687 ) -> tuple[int, Iterator[UnpackedObject]]: 

688 """Generate pack data objects for a set of wants/haves. 

689 

690 Args: 

691 have: List of SHA1s of objects that should not be sent 

692 want: List of SHA1s of objects that should be sent 

693 shallow: Set of shallow commit SHA1s to skip 

694 ofs_delta: Whether OFS deltas can be included 

695 progress: Optional progress reporting method 

696 """ 

697 missing_objects = MissingObjectFinder( 

698 self, haves=have, wants=want, shallow=shallow, progress=progress 

699 ) 

700 remote_has = missing_objects.get_remote_has() 

701 object_ids = list(missing_objects) 

702 return len(object_ids), generate_unpacked_objects( 

703 self, 

704 object_ids, 

705 progress=progress, 

706 ofs_delta=ofs_delta, 

707 other_haves=remote_has, 

708 ) 

709 

710 def _clear_cached_packs(self) -> None: 

711 pack_cache = self._pack_cache 

712 self._pack_cache = {} 

713 while pack_cache: 

714 (name, pack) = pack_cache.popitem() 

715 pack.close() 

716 

717 def _iter_cached_packs(self) -> Iterator[Pack]: 

718 return iter(self._pack_cache.values()) 

719 

720 def _update_pack_cache(self) -> list[Pack]: 

721 raise NotImplementedError(self._update_pack_cache) 

722 

723 def close(self) -> None: 

724 """Close the object store and release resources. 

725 

726 This method closes all cached pack files and frees associated resources. 

727 """ 

728 self._clear_cached_packs() 

729 

730 @property 

731 def packs(self) -> list[Pack]: 

732 """List with pack objects.""" 

733 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

734 

735 def count_pack_files(self) -> int: 

736 """Count the number of pack files. 

737 

738 Returns: 

739 Number of pack files (excluding those with .keep files) 

740 """ 

741 count = 0 

742 for pack in self.packs: 

743 # Check if there's a .keep file for this pack 

744 keep_path = pack._basename + ".keep" 

745 if not os.path.exists(keep_path): 

746 count += 1 

747 return count 

748 

749 def _iter_alternate_objects(self) -> Iterator[bytes]: 

750 """Iterate over the SHAs of all the objects in alternate stores.""" 

751 for alternate in self.alternates: 

752 yield from alternate 

753 

754 def _iter_loose_objects(self) -> Iterator[bytes]: 

755 """Iterate over the SHAs of all loose objects.""" 

756 raise NotImplementedError(self._iter_loose_objects) 

757 

758 def _get_loose_object(self, sha: bytes) -> Optional[ShaFile]: 

759 raise NotImplementedError(self._get_loose_object) 

760 

761 def delete_loose_object(self, sha: bytes) -> None: 

762 """Delete a loose object. 

763 

764 This method only handles loose objects. For packed objects, 

765 use repack(exclude=...) to exclude them during repacking. 

766 """ 

767 raise NotImplementedError(self.delete_loose_object) 

768 

769 def _remove_pack(self, pack: "Pack") -> None: 

770 raise NotImplementedError(self._remove_pack) 

771 

772 def pack_loose_objects(self, progress: Optional[Callable] = None) -> int: 

773 """Pack loose objects. 

774 

775 Args: 

776 progress: Optional progress reporting callback 

777 

778 Returns: Number of objects packed 

779 """ 

780 objects: list[tuple[ShaFile, None]] = [] 

781 for sha in self._iter_loose_objects(): 

782 obj = self._get_loose_object(sha) 

783 if obj is not None: 

784 objects.append((obj, None)) 

785 self.add_objects(objects, progress=progress) 

786 for obj, path in objects: 

787 self.delete_loose_object(obj.id) 

788 return len(objects) 

789 

790 def repack( 

791 self, exclude: Optional[set] = None, progress: Optional[Callable] = None 

792 ) -> int: 

793 """Repack the packs in this repository. 

794 

795 Note that this implementation is fairly naive and currently keeps all 

796 objects in memory while it repacks. 

797 

798 Args: 

799 exclude: Optional set of object SHAs to exclude from repacking 

800 progress: Optional progress reporting callback 

801 """ 

802 if exclude is None: 

803 exclude = set() 

804 

805 loose_objects = set() 

806 excluded_loose_objects = set() 

807 for sha in self._iter_loose_objects(): 

808 if sha not in exclude: 

809 obj = self._get_loose_object(sha) 

810 if obj is not None: 

811 loose_objects.add(obj) 

812 else: 

813 excluded_loose_objects.add(sha) 

814 

815 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects} 

816 old_packs = {p.name(): p for p in self.packs} 

817 for name, pack in old_packs.items(): 

818 objects.update( 

819 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

820 ) 

821 

822 # Only create a new pack if there are objects to pack 

823 if objects: 

824 # The name of the consolidated pack might match the name of a 

825 # pre-existing pack. Take care not to remove the newly created 

826 # consolidated pack. 

827 consolidated = self.add_objects(list(objects), progress=progress) 

828 if consolidated is not None: 

829 old_packs.pop(consolidated.name(), None) 

830 

831 # Delete loose objects that were packed 

832 for obj in loose_objects: 

833 if obj is not None: 

834 self.delete_loose_object(obj.id) 

835 # Delete excluded loose objects 

836 for sha in excluded_loose_objects: 

837 self.delete_loose_object(sha) 

838 for name, pack in old_packs.items(): 

839 self._remove_pack(pack) 

840 self._update_pack_cache() 

841 return len(objects) 

842 

843 def __iter__(self): 

844 """Iterate over the SHAs that are present in this store.""" 

845 self._update_pack_cache() 

846 for pack in self._iter_cached_packs(): 

847 try: 

848 yield from pack 

849 except PackFileDisappeared: 

850 pass 

851 yield from self._iter_loose_objects() 

852 yield from self._iter_alternate_objects() 

853 

854 def contains_loose(self, sha): 

855 """Check if a particular object is present by SHA1 and is loose. 

856 

857 This does not check alternates. 

858 """ 

859 return self._get_loose_object(sha) is not None 

860 

861 def get_raw(self, name): 

862 """Obtain the raw fulltext for an object. 

863 

864 Args: 

865 name: sha for the object. 

866 Returns: tuple with numeric type and object contents. 

867 """ 

868 if name == ZERO_SHA: 

869 raise KeyError(name) 

870 if len(name) == 40: 

871 sha = hex_to_sha(name) 

872 hexsha = name 

873 elif len(name) == 20: 

874 sha = name 

875 hexsha = None 

876 else: 

877 raise AssertionError(f"Invalid object name {name!r}") 

878 for pack in self._iter_cached_packs(): 

879 try: 

880 return pack.get_raw(sha) 

881 except (KeyError, PackFileDisappeared): 

882 pass 

883 if hexsha is None: 

884 hexsha = sha_to_hex(name) 

885 ret = self._get_loose_object(hexsha) 

886 if ret is not None: 

887 return ret.type_num, ret.as_raw_string() 

888 # Maybe something else has added a pack with the object 

889 # in the mean time? 

890 for pack in self._update_pack_cache(): 

891 try: 

892 return pack.get_raw(sha) 

893 except KeyError: 

894 pass 

895 for alternate in self.alternates: 

896 try: 

897 return alternate.get_raw(hexsha) 

898 except KeyError: 

899 pass 

900 raise KeyError(hexsha) 

901 

902 def iter_unpacked_subset( 

903 self, 

904 shas: set[bytes], 

905 include_comp: bool = False, 

906 allow_missing: bool = False, 

907 convert_ofs_delta: bool = True, 

908 ) -> Iterator[UnpackedObject]: 

909 """Iterate over a subset of objects, yielding UnpackedObject instances. 

910 

911 Args: 

912 shas: Set of object SHAs to retrieve 

913 include_comp: Whether to include compressed data 

914 allow_missing: If True, skip missing objects; if False, raise KeyError 

915 convert_ofs_delta: Whether to convert OFS_DELTA objects 

916 

917 Returns: 

918 Iterator of UnpackedObject instances 

919 

920 Raises: 

921 KeyError: If an object is missing and allow_missing is False 

922 """ 

923 todo: set[bytes] = set(shas) 

924 for p in self._iter_cached_packs(): 

925 for unpacked in p.iter_unpacked_subset( 

926 todo, 

927 include_comp=include_comp, 

928 allow_missing=True, 

929 convert_ofs_delta=convert_ofs_delta, 

930 ): 

931 yield unpacked 

932 hexsha = sha_to_hex(unpacked.sha()) 

933 todo.remove(hexsha) 

934 # Maybe something else has added a pack with the object 

935 # in the mean time? 

936 for p in self._update_pack_cache(): 

937 for unpacked in p.iter_unpacked_subset( 

938 todo, 

939 include_comp=include_comp, 

940 allow_missing=True, 

941 convert_ofs_delta=convert_ofs_delta, 

942 ): 

943 yield unpacked 

944 hexsha = sha_to_hex(unpacked.sha()) 

945 todo.remove(hexsha) 

946 for alternate in self.alternates: 

947 for unpacked in alternate.iter_unpacked_subset( 

948 todo, 

949 include_comp=include_comp, 

950 allow_missing=True, 

951 convert_ofs_delta=convert_ofs_delta, 

952 ): 

953 yield unpacked 

954 hexsha = sha_to_hex(unpacked.sha()) 

955 todo.remove(hexsha) 

956 

957 def iterobjects_subset( 

958 self, shas: Iterable[bytes], *, allow_missing: bool = False 

959 ) -> Iterator[ShaFile]: 

960 """Iterate over a subset of objects in the store. 

961 

962 This method searches for objects in pack files, alternates, and loose storage. 

963 

964 Args: 

965 shas: Iterable of object SHAs to retrieve 

966 allow_missing: If True, skip missing objects; if False, raise KeyError 

967 

968 Returns: 

969 Iterator of ShaFile objects 

970 

971 Raises: 

972 KeyError: If an object is missing and allow_missing is False 

973 """ 

974 todo: set[bytes] = set(shas) 

975 for p in self._iter_cached_packs(): 

976 for o in p.iterobjects_subset(todo, allow_missing=True): 

977 yield o 

978 todo.remove(o.id) 

979 # Maybe something else has added a pack with the object 

980 # in the mean time? 

981 for p in self._update_pack_cache(): 

982 for o in p.iterobjects_subset(todo, allow_missing=True): 

983 yield o 

984 todo.remove(o.id) 

985 for alternate in self.alternates: 

986 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

987 yield o 

988 todo.remove(o.id) 

989 for oid in todo: 

990 loose_obj: Optional[ShaFile] = self._get_loose_object(oid) 

991 if loose_obj is not None: 

992 yield loose_obj 

993 elif not allow_missing: 

994 raise KeyError(oid) 

995 

996 def get_unpacked_object( 

997 self, sha1: bytes, *, include_comp: bool = False 

998 ) -> UnpackedObject: 

999 """Obtain the unpacked object. 

1000 

1001 Args: 

1002 sha1: sha for the object. 

1003 include_comp: Whether to include compression metadata. 

1004 """ 

1005 if sha1 == ZERO_SHA: 

1006 raise KeyError(sha1) 

1007 if len(sha1) == 40: 

1008 sha = hex_to_sha(sha1) 

1009 hexsha = sha1 

1010 elif len(sha1) == 20: 

1011 sha = sha1 

1012 hexsha = None 

1013 else: 

1014 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

1015 for pack in self._iter_cached_packs(): 

1016 try: 

1017 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1018 except (KeyError, PackFileDisappeared): 

1019 pass 

1020 if hexsha is None: 

1021 hexsha = sha_to_hex(sha1) 

1022 # Maybe something else has added a pack with the object 

1023 # in the mean time? 

1024 for pack in self._update_pack_cache(): 

1025 try: 

1026 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1027 except KeyError: 

1028 pass 

1029 for alternate in self.alternates: 

1030 try: 

1031 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

1032 except KeyError: 

1033 pass 

1034 raise KeyError(hexsha) 

1035 

1036 def add_objects( 

1037 self, 

1038 objects: Sequence[tuple[ShaFile, Optional[str]]], 

1039 progress: Optional[Callable[[str], None]] = None, 

1040 ) -> Optional["Pack"]: 

1041 """Add a set of objects to this object store. 

1042 

1043 Args: 

1044 objects: Iterable over (object, path) tuples, should support 

1045 __len__. 

1046 progress: Optional progress reporting function. 

1047 Returns: Pack object of the objects written. 

1048 """ 

1049 count = len(objects) 

1050 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

1051 return self.add_pack_data(count, record_iter, progress=progress) 

1052 

1053 

1054class DiskObjectStore(PackBasedObjectStore): 

1055 """Git-style object store that exists on disk.""" 

1056 

1057 path: Union[str, os.PathLike] 

1058 pack_dir: Union[str, os.PathLike] 

1059 _alternates: Optional[list["DiskObjectStore"]] 

1060 _commit_graph: Optional["CommitGraph"] 

1061 

1062 def __init__( 

1063 self, 

1064 path: Union[str, os.PathLike], 

1065 loose_compression_level=-1, 

1066 pack_compression_level=-1, 

1067 pack_index_version=None, 

1068 pack_delta_window_size=None, 

1069 pack_window_memory=None, 

1070 pack_delta_cache_size=None, 

1071 pack_depth=None, 

1072 pack_threads=None, 

1073 pack_big_file_threshold=None, 

1074 ) -> None: 

1075 """Open an object store. 

1076 

1077 Args: 

1078 path: Path of the object store. 

1079 loose_compression_level: zlib compression level for loose objects 

1080 pack_compression_level: zlib compression level for pack objects 

1081 pack_index_version: pack index version to use (1, 2, or 3) 

1082 pack_delta_window_size: sliding window size for delta compression 

1083 pack_window_memory: memory limit for delta window operations 

1084 pack_delta_cache_size: size of cache for delta operations 

1085 pack_depth: maximum delta chain depth 

1086 pack_threads: number of threads for pack operations 

1087 pack_big_file_threshold: threshold for treating files as big 

1088 """ 

1089 super().__init__( 

1090 pack_compression_level=pack_compression_level, 

1091 pack_index_version=pack_index_version, 

1092 pack_delta_window_size=pack_delta_window_size, 

1093 pack_window_memory=pack_window_memory, 

1094 pack_delta_cache_size=pack_delta_cache_size, 

1095 pack_depth=pack_depth, 

1096 pack_threads=pack_threads, 

1097 pack_big_file_threshold=pack_big_file_threshold, 

1098 ) 

1099 self.path = path 

1100 self.pack_dir = os.path.join(self.path, PACKDIR) 

1101 self._alternates = None 

1102 self.loose_compression_level = loose_compression_level 

1103 self.pack_compression_level = pack_compression_level 

1104 self.pack_index_version = pack_index_version 

1105 

1106 # Commit graph support - lazy loaded 

1107 self._commit_graph = None 

1108 self._use_commit_graph = True # Default to true 

1109 

1110 def __repr__(self) -> str: 

1111 """Return string representation of DiskObjectStore. 

1112 

1113 Returns: 

1114 String representation including the store path 

1115 """ 

1116 return f"<{self.__class__.__name__}({self.path!r})>" 

1117 

1118 @classmethod 

1119 def from_config(cls, path: Union[str, os.PathLike], config): 

1120 """Create a DiskObjectStore from a configuration object. 

1121 

1122 Args: 

1123 path: Path to the object store directory 

1124 config: Configuration object to read settings from 

1125 

1126 Returns: 

1127 New DiskObjectStore instance configured according to config 

1128 """ 

1129 try: 

1130 default_compression_level = int( 

1131 config.get((b"core",), b"compression").decode() 

1132 ) 

1133 except KeyError: 

1134 default_compression_level = -1 

1135 try: 

1136 loose_compression_level = int( 

1137 config.get((b"core",), b"looseCompression").decode() 

1138 ) 

1139 except KeyError: 

1140 loose_compression_level = default_compression_level 

1141 try: 

1142 pack_compression_level = int( 

1143 config.get((b"core",), "packCompression").decode() 

1144 ) 

1145 except KeyError: 

1146 pack_compression_level = default_compression_level 

1147 try: 

1148 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

1149 except KeyError: 

1150 pack_index_version = None 

1151 

1152 # Read pack configuration options 

1153 try: 

1154 pack_delta_window_size = int( 

1155 config.get((b"pack",), b"deltaWindowSize").decode() 

1156 ) 

1157 except KeyError: 

1158 pack_delta_window_size = None 

1159 try: 

1160 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode()) 

1161 except KeyError: 

1162 pack_window_memory = None 

1163 try: 

1164 pack_delta_cache_size = int( 

1165 config.get((b"pack",), b"deltaCacheSize").decode() 

1166 ) 

1167 except KeyError: 

1168 pack_delta_cache_size = None 

1169 try: 

1170 pack_depth = int(config.get((b"pack",), b"depth").decode()) 

1171 except KeyError: 

1172 pack_depth = None 

1173 try: 

1174 pack_threads = int(config.get((b"pack",), b"threads").decode()) 

1175 except KeyError: 

1176 pack_threads = None 

1177 try: 

1178 pack_big_file_threshold = int( 

1179 config.get((b"pack",), b"bigFileThreshold").decode() 

1180 ) 

1181 except KeyError: 

1182 pack_big_file_threshold = None 

1183 

1184 # Read core.commitGraph setting 

1185 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

1186 

1187 instance = cls( 

1188 path, 

1189 loose_compression_level, 

1190 pack_compression_level, 

1191 pack_index_version, 

1192 pack_delta_window_size, 

1193 pack_window_memory, 

1194 pack_delta_cache_size, 

1195 pack_depth, 

1196 pack_threads, 

1197 pack_big_file_threshold, 

1198 ) 

1199 instance._use_commit_graph = use_commit_graph 

1200 return instance 

1201 

1202 @property 

1203 def alternates(self): 

1204 """Get the list of alternate object stores. 

1205 

1206 Reads from .git/objects/info/alternates if not already cached. 

1207 

1208 Returns: 

1209 List of DiskObjectStore instances for alternate object directories 

1210 """ 

1211 if self._alternates is not None: 

1212 return self._alternates 

1213 self._alternates = [] 

1214 for path in self._read_alternate_paths(): 

1215 self._alternates.append(DiskObjectStore(path)) 

1216 return self._alternates 

1217 

1218 def _read_alternate_paths(self): 

1219 try: 

1220 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

1221 except FileNotFoundError: 

1222 return 

1223 with f: 

1224 for line in f.readlines(): 

1225 line = line.rstrip(b"\n") 

1226 if line.startswith(b"#"): 

1227 continue 

1228 if os.path.isabs(line): 

1229 yield os.fsdecode(line) 

1230 else: 

1231 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1232 

1233 def add_alternate_path(self, path) -> None: 

1234 """Add an alternate path to this object store.""" 

1235 try: 

1236 os.mkdir(os.path.join(self.path, INFODIR)) 

1237 except FileExistsError: 

1238 pass 

1239 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1240 with GitFile(alternates_path, "wb") as f: 

1241 try: 

1242 orig_f = open(alternates_path, "rb") 

1243 except FileNotFoundError: 

1244 pass 

1245 else: 

1246 with orig_f: 

1247 f.write(orig_f.read()) 

1248 f.write(os.fsencode(path) + b"\n") 

1249 

1250 if not os.path.isabs(path): 

1251 path = os.path.join(self.path, path) 

1252 self.alternates.append(DiskObjectStore(path)) 

1253 

1254 def _update_pack_cache(self): 

1255 """Read and iterate over new pack files and cache them.""" 

1256 try: 

1257 pack_dir_contents = os.listdir(self.pack_dir) 

1258 except FileNotFoundError: 

1259 self.close() 

1260 return [] 

1261 pack_files = set() 

1262 for name in pack_dir_contents: 

1263 if name.startswith("pack-") and name.endswith(".pack"): 

1264 # verify that idx exists first (otherwise the pack was not yet 

1265 # fully written) 

1266 idx_name = os.path.splitext(name)[0] + ".idx" 

1267 if idx_name in pack_dir_contents: 

1268 pack_name = name[: -len(".pack")] 

1269 pack_files.add(pack_name) 

1270 

1271 # Open newly appeared pack files 

1272 new_packs = [] 

1273 for f in pack_files: 

1274 if f not in self._pack_cache: 

1275 pack = Pack( 

1276 os.path.join(self.pack_dir, f), 

1277 delta_window_size=self.pack_delta_window_size, 

1278 window_memory=self.pack_window_memory, 

1279 delta_cache_size=self.pack_delta_cache_size, 

1280 depth=self.pack_depth, 

1281 threads=self.pack_threads, 

1282 big_file_threshold=self.pack_big_file_threshold, 

1283 ) 

1284 new_packs.append(pack) 

1285 self._pack_cache[f] = pack 

1286 # Remove disappeared pack files 

1287 for f in set(self._pack_cache) - pack_files: 

1288 self._pack_cache.pop(f).close() 

1289 return new_packs 

1290 

1291 def _get_shafile_path(self, sha): 

1292 # Check from object dir 

1293 return hex_to_filename(os.fspath(self.path), sha) 

1294 

1295 def _iter_loose_objects(self): 

1296 for base in os.listdir(self.path): 

1297 if len(base) != 2: 

1298 continue 

1299 for rest in os.listdir(os.path.join(self.path, base)): 

1300 sha = os.fsencode(base + rest) 

1301 if not valid_hexsha(sha): 

1302 continue 

1303 yield sha 

1304 

1305 def count_loose_objects(self) -> int: 

1306 """Count the number of loose objects in the object store. 

1307 

1308 Returns: 

1309 Number of loose objects 

1310 """ 

1311 count = 0 

1312 if not os.path.exists(self.path): 

1313 return 0 

1314 

1315 for i in range(256): 

1316 subdir = os.path.join(self.path, f"{i:02x}") 

1317 try: 

1318 count += len( 

1319 [ 

1320 name 

1321 for name in os.listdir(subdir) 

1322 if len(name) == 38 # 40 - 2 for the prefix 

1323 ] 

1324 ) 

1325 except FileNotFoundError: 

1326 # Directory may have been removed or is inaccessible 

1327 continue 

1328 

1329 return count 

1330 

1331 def _get_loose_object(self, sha): 

1332 path = self._get_shafile_path(sha) 

1333 try: 

1334 return ShaFile.from_path(path) 

1335 except FileNotFoundError: 

1336 return None 

1337 

1338 def delete_loose_object(self, sha) -> None: 

1339 """Delete a loose object from disk. 

1340 

1341 Args: 

1342 sha: SHA1 of the object to delete 

1343 

1344 Raises: 

1345 FileNotFoundError: If the object file doesn't exist 

1346 """ 

1347 os.remove(self._get_shafile_path(sha)) 

1348 

1349 def get_object_mtime(self, sha): 

1350 """Get the modification time of an object. 

1351 

1352 Args: 

1353 sha: SHA1 of the object 

1354 

1355 Returns: 

1356 Modification time as seconds since epoch 

1357 

1358 Raises: 

1359 KeyError: if the object is not found 

1360 """ 

1361 # First check if it's a loose object 

1362 if self.contains_loose(sha): 

1363 path = self._get_shafile_path(sha) 

1364 try: 

1365 return os.path.getmtime(path) 

1366 except FileNotFoundError: 

1367 pass 

1368 

1369 # Check if it's in a pack file 

1370 for pack in self.packs: 

1371 try: 

1372 if sha in pack: 

1373 # Use the pack file's mtime for packed objects 

1374 pack_path = pack._data_path 

1375 try: 

1376 return os.path.getmtime(pack_path) 

1377 except (FileNotFoundError, AttributeError): 

1378 pass 

1379 except PackFileDisappeared: 

1380 pass 

1381 

1382 raise KeyError(sha) 

1383 

1384 def _remove_pack(self, pack) -> None: 

1385 try: 

1386 del self._pack_cache[os.path.basename(pack._basename)] 

1387 except KeyError: 

1388 pass 

1389 pack.close() 

1390 os.remove(pack.data.path) 

1391 os.remove(pack.index.path) 

1392 

1393 def _get_pack_basepath(self, entries): 

1394 suffix_bytes = iter_sha1(entry[0] for entry in entries) 

1395 # TODO: Handle self.pack_dir being bytes 

1396 suffix = suffix_bytes.decode("ascii") 

1397 return os.path.join(self.pack_dir, "pack-" + suffix) 

1398 

1399 def _complete_pack(self, f, path, num_objects, indexer, progress=None): 

1400 """Move a specific file containing a pack into the pack directory. 

1401 

1402 Note: The file should be on the same file system as the 

1403 packs directory. 

1404 

1405 Args: 

1406 f: Open file object for the pack. 

1407 path: Path to the pack file. 

1408 num_objects: Number of objects in the pack. 

1409 indexer: A PackIndexer for indexing the pack. 

1410 progress: Optional progress reporting function. 

1411 """ 

1412 entries = [] 

1413 for i, entry in enumerate(indexer): 

1414 if progress is not None: 

1415 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1416 entries.append(entry) 

1417 

1418 pack_sha, extra_entries = extend_pack( 

1419 f, 

1420 indexer.ext_refs(), 

1421 get_raw=self.get_raw, 

1422 compression_level=self.pack_compression_level, 

1423 progress=progress, 

1424 ) 

1425 f.flush() 

1426 try: 

1427 fileno = f.fileno() 

1428 except AttributeError: 

1429 pass 

1430 else: 

1431 os.fsync(fileno) 

1432 f.close() 

1433 

1434 entries.extend(extra_entries) 

1435 

1436 # Move the pack in. 

1437 entries.sort() 

1438 pack_base_name = self._get_pack_basepath(entries) 

1439 

1440 for pack in self.packs: 

1441 if pack._basename == pack_base_name: 

1442 return pack 

1443 

1444 target_pack_path = pack_base_name + ".pack" 

1445 target_index_path = pack_base_name + ".idx" 

1446 if sys.platform == "win32": 

1447 # Windows might have the target pack file lingering. Attempt 

1448 # removal, silently passing if the target does not exist. 

1449 with suppress(FileNotFoundError): 

1450 os.remove(target_pack_path) 

1451 os.rename(path, target_pack_path) 

1452 

1453 # Write the index. 

1454 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file: 

1455 write_pack_index( 

1456 index_file, entries, pack_sha, version=self.pack_index_version 

1457 ) 

1458 

1459 # Add the pack to the store and return it. 

1460 final_pack = Pack( 

1461 pack_base_name, 

1462 delta_window_size=self.pack_delta_window_size, 

1463 window_memory=self.pack_window_memory, 

1464 delta_cache_size=self.pack_delta_cache_size, 

1465 depth=self.pack_depth, 

1466 threads=self.pack_threads, 

1467 big_file_threshold=self.pack_big_file_threshold, 

1468 ) 

1469 final_pack.check_length_and_checksum() 

1470 self._add_cached_pack(pack_base_name, final_pack) 

1471 return final_pack 

1472 

1473 def add_thin_pack(self, read_all, read_some, progress=None): 

1474 """Add a new thin pack to this object store. 

1475 

1476 Thin packs are packs that contain deltas with parents that exist 

1477 outside the pack. They should never be placed in the object store 

1478 directly, and always indexed and completed as they are copied. 

1479 

1480 Args: 

1481 read_all: Read function that blocks until the number of 

1482 requested bytes are read. 

1483 read_some: Read function that returns at least one byte, but may 

1484 not return the number of bytes requested. 

1485 progress: Optional progress reporting function. 

1486 Returns: A Pack object pointing at the now-completed thin pack in the 

1487 objects/pack directory. 

1488 """ 

1489 import tempfile 

1490 

1491 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1492 with os.fdopen(fd, "w+b") as f: 

1493 os.chmod(path, PACK_MODE) 

1494 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) 

1495 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) 

1496 copier.verify(progress=progress) 

1497 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

1498 

1499 def add_pack(self): 

1500 """Add a new pack to this object store. 

1501 

1502 Returns: Fileobject to write to, a commit function to 

1503 call when the pack is finished and an abort 

1504 function. 

1505 """ 

1506 import tempfile 

1507 

1508 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

1509 f = os.fdopen(fd, "w+b") 

1510 os.chmod(path, PACK_MODE) 

1511 

1512 def commit(): 

1513 if f.tell() > 0: 

1514 f.seek(0) 

1515 

1516 with PackData(path, f) as pd: 

1517 indexer = PackIndexer.for_pack_data( 

1518 pd, resolve_ext_ref=self.get_raw 

1519 ) 

1520 return self._complete_pack(f, path, len(pd), indexer) 

1521 else: 

1522 f.close() 

1523 os.remove(path) 

1524 return None 

1525 

1526 def abort() -> None: 

1527 f.close() 

1528 os.remove(path) 

1529 

1530 return f, commit, abort 

1531 

1532 def add_object(self, obj) -> None: 

1533 """Add a single object to this object store. 

1534 

1535 Args: 

1536 obj: Object to add 

1537 """ 

1538 path = self._get_shafile_path(obj.id) 

1539 dir = os.path.dirname(path) 

1540 try: 

1541 os.mkdir(dir) 

1542 except FileExistsError: 

1543 pass 

1544 if os.path.exists(path): 

1545 return # Already there, no need to write again 

1546 with GitFile(path, "wb", mask=PACK_MODE) as f: 

1547 f.write( 

1548 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1549 ) 

1550 

1551 @classmethod 

1552 def init(cls, path: Union[str, os.PathLike]): 

1553 """Initialize a new disk object store. 

1554 

1555 Creates the necessary directory structure for a Git object store. 

1556 

1557 Args: 

1558 path: Path where the object store should be created 

1559 

1560 Returns: 

1561 New DiskObjectStore instance 

1562 """ 

1563 try: 

1564 os.mkdir(path) 

1565 except FileExistsError: 

1566 pass 

1567 os.mkdir(os.path.join(path, "info")) 

1568 os.mkdir(os.path.join(path, PACKDIR)) 

1569 return cls(path) 

1570 

1571 def iter_prefix(self, prefix): 

1572 """Iterate over all object SHAs with the given prefix. 

1573 

1574 Args: 

1575 prefix: Hex prefix to search for (as bytes) 

1576 

1577 Returns: 

1578 Iterator of object SHAs (as bytes) matching the prefix 

1579 """ 

1580 if len(prefix) < 2: 

1581 yield from super().iter_prefix(prefix) 

1582 return 

1583 seen = set() 

1584 dir = prefix[:2].decode() 

1585 rest = prefix[2:].decode() 

1586 try: 

1587 for name in os.listdir(os.path.join(self.path, dir)): 

1588 if name.startswith(rest): 

1589 sha = os.fsencode(dir + name) 

1590 if sha not in seen: 

1591 seen.add(sha) 

1592 yield sha 

1593 except FileNotFoundError: 

1594 pass 

1595 

1596 for p in self.packs: 

1597 bin_prefix = ( 

1598 binascii.unhexlify(prefix) 

1599 if len(prefix) % 2 == 0 

1600 else binascii.unhexlify(prefix[:-1]) 

1601 ) 

1602 for sha in p.index.iter_prefix(bin_prefix): 

1603 sha = sha_to_hex(sha) 

1604 if sha.startswith(prefix) and sha not in seen: 

1605 seen.add(sha) 

1606 yield sha 

1607 for alternate in self.alternates: 

1608 for sha in alternate.iter_prefix(prefix): 

1609 if sha not in seen: 

1610 seen.add(sha) 

1611 yield sha 

1612 

1613 def get_commit_graph(self): 

1614 """Get the commit graph for this object store. 

1615 

1616 Returns: 

1617 CommitGraph object if available, None otherwise 

1618 """ 

1619 if not self._use_commit_graph: 

1620 return None 

1621 

1622 if self._commit_graph is None: 

1623 from .commit_graph import read_commit_graph 

1624 

1625 # Look for commit graph in our objects directory 

1626 graph_file = os.path.join(self.path, "info", "commit-graph") 

1627 if os.path.exists(graph_file): 

1628 self._commit_graph = read_commit_graph(graph_file) 

1629 return self._commit_graph 

1630 

1631 def write_commit_graph(self, refs=None, reachable=True) -> None: 

1632 """Write a commit graph file for this object store. 

1633 

1634 Args: 

1635 refs: List of refs to include. If None, includes all refs from object store. 

1636 reachable: If True, includes all commits reachable from refs. 

1637 If False, only includes the direct ref targets. 

1638 """ 

1639 from .commit_graph import get_reachable_commits 

1640 

1641 if refs is None: 

1642 # Get all commit objects from the object store 

1643 all_refs = [] 

1644 # Iterate through all objects to find commits 

1645 for sha in self: 

1646 try: 

1647 obj = self[sha] 

1648 if obj.type_name == b"commit": 

1649 all_refs.append(sha) 

1650 except KeyError: 

1651 continue 

1652 else: 

1653 # Use provided refs 

1654 all_refs = refs 

1655 

1656 if not all_refs: 

1657 return # No commits to include 

1658 

1659 if reachable: 

1660 # Get all reachable commits 

1661 commit_ids = get_reachable_commits(self, all_refs) 

1662 else: 

1663 # Just use the direct ref targets - ensure they're hex ObjectIDs 

1664 commit_ids = [] 

1665 for ref in all_refs: 

1666 if isinstance(ref, bytes) and len(ref) == 40: 

1667 # Already hex ObjectID 

1668 commit_ids.append(ref) 

1669 elif isinstance(ref, bytes) and len(ref) == 20: 

1670 # Binary SHA, convert to hex ObjectID 

1671 from .objects import sha_to_hex 

1672 

1673 commit_ids.append(sha_to_hex(ref)) 

1674 else: 

1675 # Assume it's already correct format 

1676 commit_ids.append(ref) 

1677 

1678 if commit_ids: 

1679 # Write commit graph directly to our object store path 

1680 # Generate the commit graph 

1681 from .commit_graph import generate_commit_graph 

1682 

1683 graph = generate_commit_graph(self, commit_ids) 

1684 

1685 if graph.entries: 

1686 # Ensure the info directory exists 

1687 info_dir = os.path.join(self.path, "info") 

1688 os.makedirs(info_dir, exist_ok=True) 

1689 

1690 # Write using GitFile for atomic operation 

1691 graph_path = os.path.join(info_dir, "commit-graph") 

1692 with GitFile(graph_path, "wb") as f: 

1693 assert isinstance( 

1694 f, _GitFile 

1695 ) # GitFile in write mode always returns _GitFile 

1696 graph.write_to_file(f) 

1697 

1698 # Clear cached commit graph so it gets reloaded 

1699 self._commit_graph = None 

1700 

1701 def prune(self, grace_period: Optional[int] = None) -> None: 

1702 """Prune/clean up this object store. 

1703 

1704 This removes temporary files that were left behind by interrupted 

1705 pack operations. These are files that start with ``tmp_pack_`` in the 

1706 repository directory or files with .pack extension but no corresponding 

1707 .idx file in the pack directory. 

1708 

1709 Args: 

1710 grace_period: Grace period in seconds for removing temporary files. 

1711 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

1712 """ 

1713 import glob 

1714 

1715 if grace_period is None: 

1716 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

1717 

1718 # Clean up tmp_pack_* files in the repository directory 

1719 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

1720 # Check if file is old enough (more than grace period) 

1721 mtime = os.path.getmtime(tmp_file) 

1722 if time.time() - mtime > grace_period: 

1723 os.remove(tmp_file) 

1724 

1725 # Clean up orphaned .pack files without corresponding .idx files 

1726 try: 

1727 pack_dir_contents = os.listdir(self.pack_dir) 

1728 except FileNotFoundError: 

1729 return 

1730 

1731 pack_files = {} 

1732 idx_files = set() 

1733 

1734 for name in pack_dir_contents: 

1735 if name.endswith(".pack"): 

1736 base_name = name[:-5] # Remove .pack extension 

1737 pack_files[base_name] = name 

1738 elif name.endswith(".idx"): 

1739 base_name = name[:-4] # Remove .idx extension 

1740 idx_files.add(base_name) 

1741 

1742 # Remove .pack files without corresponding .idx files 

1743 for base_name, pack_name in pack_files.items(): 

1744 if base_name not in idx_files: 

1745 pack_path = os.path.join(self.pack_dir, pack_name) 

1746 # Check if file is old enough (more than grace period) 

1747 mtime = os.path.getmtime(pack_path) 

1748 if time.time() - mtime > grace_period: 

1749 os.remove(pack_path) 

1750 

1751 

1752class MemoryObjectStore(BaseObjectStore): 

1753 """Object store that keeps all objects in memory.""" 

1754 

1755 def __init__(self) -> None: 

1756 """Initialize a MemoryObjectStore. 

1757 

1758 Creates an empty in-memory object store. 

1759 """ 

1760 super().__init__() 

1761 self._data: dict[str, ShaFile] = {} 

1762 self.pack_compression_level = -1 

1763 

1764 def _to_hexsha(self, sha): 

1765 if len(sha) == 40: 

1766 return sha 

1767 elif len(sha) == 20: 

1768 return sha_to_hex(sha) 

1769 else: 

1770 raise ValueError(f"Invalid sha {sha!r}") 

1771 

1772 def contains_loose(self, sha): 

1773 """Check if a particular object is present by SHA1 and is loose.""" 

1774 return self._to_hexsha(sha) in self._data 

1775 

1776 def contains_packed(self, sha) -> bool: 

1777 """Check if a particular object is present by SHA1 and is packed.""" 

1778 return False 

1779 

1780 def __iter__(self): 

1781 """Iterate over the SHAs that are present in this store.""" 

1782 return iter(self._data.keys()) 

1783 

1784 @property 

1785 def packs(self): 

1786 """List with pack objects.""" 

1787 return [] 

1788 

1789 def get_raw(self, name: ObjectID): 

1790 """Obtain the raw text for an object. 

1791 

1792 Args: 

1793 name: sha for the object. 

1794 Returns: tuple with numeric type and object contents. 

1795 """ 

1796 obj = self[self._to_hexsha(name)] 

1797 return obj.type_num, obj.as_raw_string() 

1798 

1799 def __getitem__(self, name: ObjectID): 

1800 """Retrieve an object by SHA. 

1801 

1802 Args: 

1803 name: SHA of the object (as hex string or bytes) 

1804 

1805 Returns: 

1806 Copy of the ShaFile object 

1807 

1808 Raises: 

1809 KeyError: If the object is not found 

1810 """ 

1811 return self._data[self._to_hexsha(name)].copy() 

1812 

1813 def __delitem__(self, name: ObjectID) -> None: 

1814 """Delete an object from this store, for testing only.""" 

1815 del self._data[self._to_hexsha(name)] 

1816 

1817 def add_object(self, obj) -> None: 

1818 """Add a single object to this object store.""" 

1819 self._data[obj.id] = obj.copy() 

1820 

1821 def add_objects(self, objects, progress=None) -> None: 

1822 """Add a set of objects to this object store. 

1823 

1824 Args: 

1825 objects: Iterable over a list of (object, path) tuples 

1826 progress: Optional progress reporting function. 

1827 """ 

1828 for obj, path in objects: 

1829 self.add_object(obj) 

1830 

1831 def add_pack(self): 

1832 """Add a new pack to this object store. 

1833 

1834 Because this object store doesn't support packs, we extract and add the 

1835 individual objects. 

1836 

1837 Returns: Fileobject to write to and a commit function to 

1838 call when the pack is finished. 

1839 """ 

1840 from tempfile import SpooledTemporaryFile 

1841 

1842 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

1843 

1844 def commit() -> None: 

1845 size = f.tell() 

1846 if size > 0: 

1847 f.seek(0) 

1848 

1849 p = PackData.from_file(f, size) 

1850 for obj in PackInflater.for_pack_data(p, self.get_raw): 

1851 self.add_object(obj) 

1852 p.close() 

1853 f.close() 

1854 else: 

1855 f.close() 

1856 

1857 def abort() -> None: 

1858 f.close() 

1859 

1860 return f, commit, abort 

1861 

1862 def add_pack_data( 

1863 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

1864 ) -> None: 

1865 """Add pack data to this object store. 

1866 

1867 Args: 

1868 count: Number of items to add 

1869 unpacked_objects: Iterator of UnpackedObject instances 

1870 progress: Optional progress reporting function. 

1871 """ 

1872 if count == 0: 

1873 return 

1874 

1875 # Since MemoryObjectStore doesn't support pack files, we need to 

1876 # extract individual objects. To handle deltas properly, we write 

1877 # to a temporary pack and then use PackInflater to resolve them. 

1878 f, commit, abort = self.add_pack() 

1879 try: 

1880 write_pack_data( 

1881 f.write, 

1882 unpacked_objects, 

1883 num_records=count, 

1884 progress=progress, 

1885 ) 

1886 except BaseException: 

1887 abort() 

1888 raise 

1889 else: 

1890 commit() 

1891 

1892 def add_thin_pack(self, read_all, read_some, progress=None) -> None: 

1893 """Add a new thin pack to this object store. 

1894 

1895 Thin packs are packs that contain deltas with parents that exist 

1896 outside the pack. Because this object store doesn't support packs, we 

1897 extract and add the individual objects. 

1898 

1899 Args: 

1900 read_all: Read function that blocks until the number of 

1901 requested bytes are read. 

1902 read_some: Read function that returns at least one byte, but may 

1903 not return the number of bytes requested. 

1904 progress: Optional progress reporting function. 

1905 """ 

1906 f, commit, abort = self.add_pack() 

1907 try: 

1908 copier = PackStreamCopier(read_all, read_some, f) 

1909 copier.verify() 

1910 except BaseException: 

1911 abort() 

1912 raise 

1913 else: 

1914 commit() 

1915 

1916 

1917class ObjectIterator(Protocol): 

1918 """Interface for iterating over objects.""" 

1919 

1920 def iterobjects(self) -> Iterator[ShaFile]: 

1921 """Iterate over all objects. 

1922 

1923 Returns: 

1924 Iterator of ShaFile objects 

1925 """ 

1926 raise NotImplementedError(self.iterobjects) 

1927 

1928 

1929def tree_lookup_path(lookup_obj, root_sha, path): 

1930 """Look up an object in a Git tree. 

1931 

1932 Args: 

1933 lookup_obj: Callback for retrieving object by SHA1 

1934 root_sha: SHA1 of the root tree 

1935 path: Path to lookup 

1936 Returns: A tuple of (mode, SHA) of the resulting path. 

1937 """ 

1938 tree = lookup_obj(root_sha) 

1939 if not isinstance(tree, Tree): 

1940 raise NotTreeError(root_sha) 

1941 return tree.lookup_path(lookup_obj, path) 

1942 

1943 

1944def _collect_filetree_revs( 

1945 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

1946) -> None: 

1947 """Collect SHA1s of files and directories for specified tree. 

1948 

1949 Args: 

1950 obj_store: Object store to get objects by SHA from 

1951 tree_sha: tree reference to walk 

1952 kset: set to fill with references to files and directories 

1953 """ 

1954 filetree = obj_store[tree_sha] 

1955 assert isinstance(filetree, Tree) 

1956 for name, mode, sha in filetree.iteritems(): 

1957 if not S_ISGITLINK(mode) and sha not in kset: 

1958 kset.add(sha) 

1959 if stat.S_ISDIR(mode): 

1960 _collect_filetree_revs(obj_store, sha, kset) 

1961 

1962 

1963def _split_commits_and_tags( 

1964 obj_store: ObjectContainer, lst, *, ignore_unknown=False 

1965) -> tuple[set[bytes], set[bytes], set[bytes]]: 

1966 """Split object id list into three lists with commit, tag, and other SHAs. 

1967 

1968 Commits referenced by tags are included into commits 

1969 list as well. Only SHA1s known in this repository will get 

1970 through, and unless ignore_unknown argument is True, KeyError 

1971 is thrown for SHA1 missing in the repository 

1972 

1973 Args: 

1974 obj_store: Object store to get objects by SHA1 from 

1975 lst: Collection of commit and tag SHAs 

1976 ignore_unknown: True to skip SHA1 missing in the repository 

1977 silently. 

1978 Returns: A tuple of (commits, tags, others) SHA1s 

1979 """ 

1980 commits: set[bytes] = set() 

1981 tags: set[bytes] = set() 

1982 others: set[bytes] = set() 

1983 for e in lst: 

1984 try: 

1985 o = obj_store[e] 

1986 except KeyError: 

1987 if not ignore_unknown: 

1988 raise 

1989 else: 

1990 if isinstance(o, Commit): 

1991 commits.add(e) 

1992 elif isinstance(o, Tag): 

1993 tags.add(e) 

1994 tagged = o.object[1] 

1995 c, t, os = _split_commits_and_tags( 

1996 obj_store, [tagged], ignore_unknown=ignore_unknown 

1997 ) 

1998 commits |= c 

1999 tags |= t 

2000 others |= os 

2001 else: 

2002 others.add(e) 

2003 return (commits, tags, others) 

2004 

2005 

2006class MissingObjectFinder: 

2007 """Find the objects missing from another object store. 

2008 

2009 Args: 

2010 object_store: Object store containing at least all objects to be 

2011 sent 

2012 haves: SHA1s of commits not to send (already present in target) 

2013 wants: SHA1s of commits to send 

2014 progress: Optional function to report progress to. 

2015 get_tagged: Function that returns a dict of pointed-to sha -> tag 

2016 sha for including tags. 

2017 get_parents: Optional function for getting the parents of a commit. 

2018 """ 

2019 

2020 def __init__( 

2021 self, 

2022 object_store, 

2023 haves, 

2024 wants, 

2025 *, 

2026 shallow=None, 

2027 progress=None, 

2028 get_tagged=None, 

2029 get_parents=lambda commit: commit.parents, 

2030 ) -> None: 

2031 """Initialize a MissingObjectFinder. 

2032 

2033 Args: 

2034 object_store: Object store containing objects 

2035 haves: SHA1s of objects already present in target 

2036 wants: SHA1s of objects to send 

2037 shallow: Set of shallow commit SHA1s 

2038 progress: Optional progress reporting callback 

2039 get_tagged: Function returning dict of pointed-to sha -> tag sha 

2040 get_parents: Function for getting commit parents 

2041 """ 

2042 self.object_store = object_store 

2043 if shallow is None: 

2044 shallow = set() 

2045 self._get_parents = get_parents 

2046 # process Commits and Tags differently 

2047 # Note, while haves may list commits/tags not available locally, 

2048 # and such SHAs would get filtered out by _split_commits_and_tags, 

2049 # wants shall list only known SHAs, and otherwise 

2050 # _split_commits_and_tags fails with KeyError 

2051 have_commits, have_tags, have_others = _split_commits_and_tags( 

2052 object_store, haves, ignore_unknown=True 

2053 ) 

2054 want_commits, want_tags, want_others = _split_commits_and_tags( 

2055 object_store, wants, ignore_unknown=False 

2056 ) 

2057 # all_ancestors is a set of commits that shall not be sent 

2058 # (complete repository up to 'haves') 

2059 all_ancestors = _collect_ancestors( 

2060 object_store, have_commits, shallow=shallow, get_parents=self._get_parents 

2061 )[0] 

2062 # all_missing - complete set of commits between haves and wants 

2063 # common - commits from all_ancestors we hit into while 

2064 # traversing parent hierarchy of wants 

2065 missing_commits, common_commits = _collect_ancestors( 

2066 object_store, 

2067 want_commits, 

2068 all_ancestors, 

2069 shallow=shallow, 

2070 get_parents=self._get_parents, 

2071 ) 

2072 self.remote_has: set[bytes] = set() 

2073 # Now, fill sha_done with commits and revisions of 

2074 # files and directories known to be both locally 

2075 # and on target. Thus these commits and files 

2076 # won't get selected for fetch 

2077 for h in common_commits: 

2078 self.remote_has.add(h) 

2079 cmt = object_store[h] 

2080 _collect_filetree_revs(object_store, cmt.tree, self.remote_has) 

2081 # record tags we have as visited, too 

2082 for t in have_tags: 

2083 self.remote_has.add(t) 

2084 self.sha_done = set(self.remote_has) 

2085 

2086 # in fact, what we 'want' is commits, tags, and others 

2087 # we've found missing 

2088 self.objects_to_send: set[ 

2089 tuple[ObjectID, Optional[bytes], Optional[int], bool] 

2090 ] = {(w, None, Commit.type_num, False) for w in missing_commits} 

2091 missing_tags = want_tags.difference(have_tags) 

2092 self.objects_to_send.update( 

2093 {(w, None, Tag.type_num, False) for w in missing_tags} 

2094 ) 

2095 missing_others = want_others.difference(have_others) 

2096 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

2097 

2098 if progress is None: 

2099 self.progress = lambda x: None 

2100 else: 

2101 self.progress = progress 

2102 self._tagged = (get_tagged and get_tagged()) or {} 

2103 

2104 def get_remote_has(self): 

2105 """Get the set of SHAs the remote has. 

2106 

2107 Returns: 

2108 Set of SHA1s that the remote side already has 

2109 """ 

2110 return self.remote_has 

2111 

2112 def add_todo( 

2113 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]] 

2114 ) -> None: 

2115 """Add objects to the todo list. 

2116 

2117 Args: 

2118 entries: Iterable of tuples (sha, name, type_num, is_leaf) 

2119 """ 

2120 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

2121 

2122 def __next__(self) -> tuple[bytes, Optional[PackHint]]: 

2123 """Get the next object to send. 

2124 

2125 Returns: 

2126 Tuple of (sha, pack_hint) 

2127 

2128 Raises: 

2129 StopIteration: When no more objects to send 

2130 """ 

2131 while True: 

2132 if not self.objects_to_send: 

2133 self.progress( 

2134 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

2135 ) 

2136 raise StopIteration 

2137 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

2138 if sha not in self.sha_done: 

2139 break 

2140 if not leaf: 

2141 o = self.object_store[sha] 

2142 if isinstance(o, Commit): 

2143 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

2144 elif isinstance(o, Tree): 

2145 self.add_todo( 

2146 [ 

2147 ( 

2148 s, 

2149 n, 

2150 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

2151 not stat.S_ISDIR(m), 

2152 ) 

2153 for n, m, s in o.iteritems() 

2154 if not S_ISGITLINK(m) 

2155 ] 

2156 ) 

2157 elif isinstance(o, Tag): 

2158 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

2159 if sha in self._tagged: 

2160 self.add_todo([(self._tagged[sha], None, None, True)]) 

2161 self.sha_done.add(sha) 

2162 if len(self.sha_done) % 1000 == 0: 

2163 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

2164 if type_num is None: 

2165 pack_hint = None 

2166 else: 

2167 pack_hint = (type_num, name) 

2168 return (sha, pack_hint) 

2169 

2170 def __iter__(self): 

2171 """Return iterator over objects to send. 

2172 

2173 Returns: 

2174 Self (this class implements the iterator protocol) 

2175 """ 

2176 return self 

2177 

2178 

2179class ObjectStoreGraphWalker: 

2180 """Graph walker that finds what commits are missing from an object store.""" 

2181 

2182 heads: set[ObjectID] 

2183 """Revisions without descendants in the local repo.""" 

2184 

2185 get_parents: Callable[[ObjectID], list[ObjectID]] 

2186 """Function to retrieve parents in the local repo.""" 

2187 

2188 shallow: set[ObjectID] 

2189 

2190 def __init__( 

2191 self, 

2192 local_heads: Iterable[ObjectID], 

2193 get_parents, 

2194 shallow: Optional[set[ObjectID]] = None, 

2195 update_shallow=None, 

2196 ) -> None: 

2197 """Create a new instance. 

2198 

2199 Args: 

2200 local_heads: Heads to start search with 

2201 get_parents: Function for finding the parents of a SHA1. 

2202 shallow: Set of shallow commits. 

2203 update_shallow: Function to update shallow commits. 

2204 """ 

2205 self.heads = set(local_heads) 

2206 self.get_parents = get_parents 

2207 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {} 

2208 if shallow is None: 

2209 shallow = set() 

2210 self.shallow = shallow 

2211 self.update_shallow = update_shallow 

2212 

2213 def nak(self) -> None: 

2214 """Nothing in common was found.""" 

2215 

2216 def ack(self, sha: ObjectID) -> None: 

2217 """Ack that a revision and its ancestors are present in the source.""" 

2218 if len(sha) != 40: 

2219 raise ValueError(f"unexpected sha {sha!r} received") 

2220 ancestors = {sha} 

2221 

2222 # stop if we run out of heads to remove 

2223 while self.heads: 

2224 for a in ancestors: 

2225 if a in self.heads: 

2226 self.heads.remove(a) 

2227 

2228 # collect all ancestors 

2229 new_ancestors = set() 

2230 for a in ancestors: 

2231 ps = self.parents.get(a) 

2232 if ps is not None: 

2233 new_ancestors.update(ps) 

2234 self.parents[a] = None 

2235 

2236 # no more ancestors; stop 

2237 if not new_ancestors: 

2238 break 

2239 

2240 ancestors = new_ancestors 

2241 

2242 def next(self): 

2243 """Iterate over ancestors of heads in the target.""" 

2244 if self.heads: 

2245 ret = self.heads.pop() 

2246 try: 

2247 ps = self.get_parents(ret) 

2248 except KeyError: 

2249 return None 

2250 self.parents[ret] = ps 

2251 self.heads.update([p for p in ps if p not in self.parents]) 

2252 return ret 

2253 return None 

2254 

2255 __next__ = next 

2256 

2257 

2258def commit_tree_changes(object_store, tree, changes): 

2259 """Commit a specified set of changes to a tree structure. 

2260 

2261 This will apply a set of changes on top of an existing tree, storing new 

2262 objects in object_store. 

2263 

2264 changes are a list of tuples with (path, mode, object_sha). 

2265 Paths can be both blobs and trees. See the mode and 

2266 object sha to None deletes the path. 

2267 

2268 This method works especially well if there are only a small 

2269 number of changes to a big tree. For a large number of changes 

2270 to a large tree, use e.g. commit_tree. 

2271 

2272 Args: 

2273 object_store: Object store to store new objects in 

2274 and retrieve old ones from. 

2275 tree: Original tree root 

2276 changes: changes to apply 

2277 Returns: New tree root object 

2278 """ 

2279 # TODO(jelmer): Save up the objects and add them using .add_objects 

2280 # rather than with individual calls to .add_object. 

2281 nested_changes: dict[bytes, list[tuple[bytes, Optional[int], Optional[bytes]]]] = {} 

2282 for path, new_mode, new_sha in changes: 

2283 try: 

2284 (dirname, subpath) = path.split(b"/", 1) 

2285 except ValueError: 

2286 if new_sha is None: 

2287 del tree[path] 

2288 else: 

2289 tree[path] = (new_mode, new_sha) 

2290 else: 

2291 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

2292 for name, subchanges in nested_changes.items(): 

2293 try: 

2294 orig_subtree = object_store[tree[name][1]] 

2295 except KeyError: 

2296 orig_subtree = Tree() 

2297 subtree = commit_tree_changes(object_store, orig_subtree, subchanges) 

2298 if len(subtree) == 0: 

2299 del tree[name] 

2300 else: 

2301 tree[name] = (stat.S_IFDIR, subtree.id) 

2302 object_store.add_object(tree) 

2303 return tree 

2304 

2305 

2306class OverlayObjectStore(BaseObjectStore): 

2307 """Object store that can overlay multiple object stores.""" 

2308 

2309 def __init__(self, bases, add_store=None) -> None: 

2310 """Initialize an OverlayObjectStore. 

2311 

2312 Args: 

2313 bases: List of base object stores to overlay 

2314 add_store: Optional store to write new objects to 

2315 """ 

2316 self.bases = bases 

2317 self.add_store = add_store 

2318 

2319 def add_object(self, object): 

2320 """Add a single object to the store. 

2321 

2322 Args: 

2323 object: Object to add 

2324 

2325 Raises: 

2326 NotImplementedError: If no add_store was provided 

2327 """ 

2328 if self.add_store is None: 

2329 raise NotImplementedError(self.add_object) 

2330 return self.add_store.add_object(object) 

2331 

2332 def add_objects(self, objects, progress=None): 

2333 """Add multiple objects to the store. 

2334 

2335 Args: 

2336 objects: Iterator of objects to add 

2337 progress: Optional progress reporting callback 

2338 

2339 Raises: 

2340 NotImplementedError: If no add_store was provided 

2341 """ 

2342 if self.add_store is None: 

2343 raise NotImplementedError(self.add_object) 

2344 return self.add_store.add_objects(objects, progress) 

2345 

2346 @property 

2347 def packs(self): 

2348 """Get the list of packs from all overlaid stores. 

2349 

2350 Returns: 

2351 Combined list of packs from all base stores 

2352 """ 

2353 ret = [] 

2354 for b in self.bases: 

2355 ret.extend(b.packs) 

2356 return ret 

2357 

2358 def __iter__(self): 

2359 """Iterate over all object SHAs in the overlaid stores. 

2360 

2361 Returns: 

2362 Iterator of object SHAs (deduped across stores) 

2363 """ 

2364 done = set() 

2365 for b in self.bases: 

2366 for o_id in b: 

2367 if o_id not in done: 

2368 yield o_id 

2369 done.add(o_id) 

2370 

2371 def iterobjects_subset( 

2372 self, shas: Iterable[bytes], *, allow_missing: bool = False 

2373 ) -> Iterator[ShaFile]: 

2374 """Iterate over a subset of objects from the overlaid stores. 

2375 

2376 Args: 

2377 shas: Iterable of object SHAs to retrieve 

2378 allow_missing: If True, skip missing objects; if False, raise KeyError 

2379 

2380 Returns: 

2381 Iterator of ShaFile objects 

2382 

2383 Raises: 

2384 KeyError: If an object is missing and allow_missing is False 

2385 """ 

2386 todo = set(shas) 

2387 found: set[bytes] = set() 

2388 

2389 for b in self.bases: 

2390 # Create a copy of todo for each base to avoid modifying 

2391 # the set while iterating through it 

2392 current_todo = todo - found 

2393 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

2394 yield o 

2395 found.add(o.id) 

2396 

2397 # Check for any remaining objects not found 

2398 missing = todo - found 

2399 if missing and not allow_missing: 

2400 raise KeyError(next(iter(missing))) 

2401 

2402 def iter_unpacked_subset( 

2403 self, 

2404 shas: Iterable[bytes], 

2405 *, 

2406 include_comp=False, 

2407 allow_missing: bool = False, 

2408 convert_ofs_delta=True, 

2409 ) -> Iterator[ShaFile]: 

2410 """Iterate over unpacked objects from the overlaid stores. 

2411 

2412 Args: 

2413 shas: Iterable of object SHAs to retrieve 

2414 include_comp: Whether to include compressed data 

2415 allow_missing: If True, skip missing objects; if False, raise KeyError 

2416 convert_ofs_delta: Whether to convert OFS_DELTA objects 

2417 

2418 Returns: 

2419 Iterator of unpacked objects 

2420 

2421 Raises: 

2422 KeyError: If an object is missing and allow_missing is False 

2423 """ 

2424 todo = set(shas) 

2425 for b in self.bases: 

2426 for o in b.iter_unpacked_subset( 

2427 todo, 

2428 include_comp=include_comp, 

2429 allow_missing=True, 

2430 convert_ofs_delta=convert_ofs_delta, 

2431 ): 

2432 yield o 

2433 todo.remove(o.id) 

2434 if todo and not allow_missing: 

2435 raise KeyError(o.id) 

2436 

2437 def get_raw(self, sha_id): 

2438 """Get the raw object data from the overlaid stores. 

2439 

2440 Args: 

2441 sha_id: SHA of the object 

2442 

2443 Returns: 

2444 Tuple of (type_num, raw_data) 

2445 

2446 Raises: 

2447 KeyError: If object not found in any base store 

2448 """ 

2449 for b in self.bases: 

2450 try: 

2451 return b.get_raw(sha_id) 

2452 except KeyError: 

2453 pass 

2454 raise KeyError(sha_id) 

2455 

2456 def contains_packed(self, sha) -> bool: 

2457 """Check if an object is packed in any base store. 

2458 

2459 Args: 

2460 sha: SHA of the object 

2461 

2462 Returns: 

2463 True if object is packed in any base store 

2464 """ 

2465 for b in self.bases: 

2466 if b.contains_packed(sha): 

2467 return True 

2468 return False 

2469 

2470 def contains_loose(self, sha) -> bool: 

2471 """Check if an object is loose in any base store. 

2472 

2473 Args: 

2474 sha: SHA of the object 

2475 

2476 Returns: 

2477 True if object is loose in any base store 

2478 """ 

2479 for b in self.bases: 

2480 if b.contains_loose(sha): 

2481 return True 

2482 return False 

2483 

2484 

2485def read_packs_file(f): 

2486 """Yield the packs listed in a packs file.""" 

2487 for line in f.read().splitlines(): 

2488 if not line: 

2489 continue 

2490 (kind, name) = line.split(b" ", 1) 

2491 if kind != b"P": 

2492 continue 

2493 yield os.fsdecode(name) 

2494 

2495 

2496class BucketBasedObjectStore(PackBasedObjectStore): 

2497 """Object store implementation that uses a bucket store like S3 as backend.""" 

2498 

2499 def _iter_loose_objects(self): 

2500 """Iterate over the SHAs of all loose objects.""" 

2501 return iter([]) 

2502 

2503 def _get_loose_object(self, sha) -> None: 

2504 return None 

2505 

2506 def delete_loose_object(self, sha) -> None: 

2507 """Delete a loose object (no-op for bucket stores). 

2508 

2509 Bucket-based stores don't have loose objects, so this is a no-op. 

2510 

2511 Args: 

2512 sha: SHA of the object to delete 

2513 """ 

2514 # Doesn't exist.. 

2515 

2516 def pack_loose_objects(self, progress: Optional[Callable] = None) -> int: 

2517 """Pack loose objects. Returns number of objects packed. 

2518 

2519 BucketBasedObjectStore doesn't support loose objects, so this is a no-op. 

2520 

2521 Args: 

2522 progress: Optional progress reporting callback (ignored) 

2523 """ 

2524 return 0 

2525 

2526 def _remove_pack_by_name(self, name: str) -> None: 

2527 """Remove a pack by name. Subclasses should implement this.""" 

2528 raise NotImplementedError(self._remove_pack_by_name) 

2529 

2530 def _iter_pack_names(self) -> Iterator[str]: 

2531 raise NotImplementedError(self._iter_pack_names) 

2532 

2533 def _get_pack(self, name) -> Pack: 

2534 raise NotImplementedError(self._get_pack) 

2535 

2536 def _update_pack_cache(self): 

2537 pack_files = set(self._iter_pack_names()) 

2538 

2539 # Open newly appeared pack files 

2540 new_packs = [] 

2541 for f in pack_files: 

2542 if f not in self._pack_cache: 

2543 pack = self._get_pack(f) 

2544 new_packs.append(pack) 

2545 self._pack_cache[f] = pack 

2546 # Remove disappeared pack files 

2547 for f in set(self._pack_cache) - pack_files: 

2548 self._pack_cache.pop(f).close() 

2549 return new_packs 

2550 

2551 def _upload_pack(self, basename, pack_file, index_file) -> None: 

2552 raise NotImplementedError 

2553 

2554 def add_pack(self): 

2555 """Add a new pack to this object store. 

2556 

2557 Returns: Fileobject to write to, a commit function to 

2558 call when the pack is finished and an abort 

2559 function. 

2560 """ 

2561 import tempfile 

2562 

2563 pf = tempfile.SpooledTemporaryFile( 

2564 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2565 ) 

2566 

2567 def commit(): 

2568 if pf.tell() == 0: 

2569 pf.close() 

2570 return None 

2571 

2572 pf.seek(0) 

2573 

2574 p = PackData(pf.name, pf) 

2575 entries = p.sorted_entries() 

2576 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

2577 idxf = tempfile.SpooledTemporaryFile( 

2578 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2579 ) 

2580 checksum = p.get_stored_checksum() 

2581 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

2582 idxf.seek(0) 

2583 idx = load_pack_index_file(basename + ".idx", idxf) 

2584 for pack in self.packs: 

2585 if pack.get_stored_checksum() == p.get_stored_checksum(): 

2586 p.close() 

2587 idx.close() 

2588 pf.close() 

2589 idxf.close() 

2590 return pack 

2591 pf.seek(0) 

2592 idxf.seek(0) 

2593 self._upload_pack(basename, pf, idxf) 

2594 final_pack = Pack.from_objects(p, idx) 

2595 self._add_cached_pack(basename, final_pack) 

2596 pf.close() 

2597 idxf.close() 

2598 return final_pack 

2599 

2600 return pf, commit, pf.close 

2601 

2602 

2603def _collect_ancestors( 

2604 store: ObjectContainer, 

2605 heads, 

2606 common: frozenset[ObjectID] = frozenset(), 

2607 shallow: frozenset[ObjectID] = frozenset(), 

2608 get_parents=lambda commit: commit.parents, 

2609): 

2610 """Collect all ancestors of heads up to (excluding) those in common. 

2611 

2612 Args: 

2613 store: Object store to get commits from 

2614 heads: commits to start from 

2615 common: commits to end at, or empty set to walk repository 

2616 completely 

2617 shallow: Set of shallow commits 

2618 get_parents: Optional function for getting the parents of a 

2619 commit. 

2620 Returns: a tuple (A, B) where A - all commits reachable 

2621 from heads but not present in common, B - common (shared) elements 

2622 that are directly reachable from heads 

2623 """ 

2624 bases = set() 

2625 commits = set() 

2626 queue = [] 

2627 queue.extend(heads) 

2628 

2629 # Try to use commit graph if available 

2630 commit_graph = store.get_commit_graph() 

2631 

2632 while queue: 

2633 e = queue.pop(0) 

2634 if e in common: 

2635 bases.add(e) 

2636 elif e not in commits: 

2637 commits.add(e) 

2638 if e in shallow: 

2639 continue 

2640 

2641 # Try to use commit graph for parent lookup 

2642 parents = None 

2643 if commit_graph: 

2644 parents = commit_graph.get_parents(e) 

2645 

2646 if parents is None: 

2647 # Fall back to loading the object 

2648 cmt = store[e] 

2649 parents = get_parents(cmt) 

2650 

2651 queue.extend(parents) 

2652 return (commits, bases) 

2653 

2654 

2655def iter_tree_contents( 

2656 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False 

2657): 

2658 """Iterate the contents of a tree and all subtrees. 

2659 

2660 Iteration is depth-first pre-order, as in e.g. os.walk. 

2661 

2662 Args: 

2663 store: Object store to get trees from 

2664 tree_id: SHA1 of the tree. 

2665 include_trees: If True, include tree objects in the iteration. 

2666 

2667 Yields: TreeEntry namedtuples for all the objects in a tree. 

2668 """ 

2669 if tree_id is None: 

2670 return 

2671 # This could be fairly easily generalized to >2 trees if we find a use 

2672 # case. 

2673 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

2674 while todo: 

2675 entry = todo.pop() 

2676 if stat.S_ISDIR(entry.mode): 

2677 extra = [] 

2678 tree = store[entry.sha] 

2679 assert isinstance(tree, Tree) 

2680 for subentry in tree.iteritems(name_order=True): 

2681 extra.append(subentry.in_path(entry.path)) 

2682 todo.extend(reversed(extra)) 

2683 if not stat.S_ISDIR(entry.mode) or include_trees: 

2684 yield entry 

2685 

2686 

2687def iter_commit_contents( 

2688 store: ObjectContainer, 

2689 commit: Union[Commit, bytes], 

2690 *, 

2691 include: Optional[Sequence[Union[str, bytes, Path]]] = None, 

2692): 

2693 """Iterate the contents of the repository at the specified commit. 

2694 

2695 This is a wrapper around iter_tree_contents() and 

2696 tree_lookup_path() to simplify the common task of getting the 

2697 contest of a repo at a particular commit. See also 

2698 dulwich.index.build_file_from_blob() for writing individual files 

2699 to disk. 

2700 

2701 Args: 

2702 store: Object store to get trees from 

2703 commit: Commit object, or SHA1 of a commit 

2704 include: if provided, only the entries whose paths are in the 

2705 list, or whose parent tree is in the list, will be 

2706 included. Note that duplicate or overlapping paths 

2707 (e.g. ["foo", "foo/bar"]) may result in duplicate entries 

2708 

2709 Yields: TreeEntry namedtuples for all matching files in a commit. 

2710 """ 

2711 sha = commit.id if isinstance(commit, Commit) else commit 

2712 if not isinstance(obj := store[sha], Commit): 

2713 raise TypeError( 

2714 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}" 

2715 ) 

2716 commit = obj 

2717 encoding = commit.encoding or "utf-8" 

2718 include = ( 

2719 [ 

2720 path if isinstance(path, bytes) else str(path).encode(encoding) 

2721 for path in include 

2722 ] 

2723 if include is not None 

2724 else [b""] 

2725 ) 

2726 

2727 for path in include: 

2728 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path) 

2729 # Iterate all contained files if path points to a dir, otherwise just get that 

2730 # single file 

2731 if isinstance(store[obj_id], Tree): 

2732 for entry in iter_tree_contents(store, obj_id): 

2733 yield entry.in_path(path) 

2734 else: 

2735 yield TreeEntry(path, mode, obj_id) 

2736 

2737 

2738def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]: 

2739 """Peel all tags from a SHA. 

2740 

2741 Args: 

2742 store: Object store to get objects from 

2743 sha: The object SHA to peel. 

2744 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

2745 intermediate tags; if the original ref does not point to a tag, 

2746 this will equal the original SHA1. 

2747 """ 

2748 unpeeled = obj = store[sha] 

2749 obj_class = object_class(obj.type_name) 

2750 while obj_class is Tag: 

2751 assert isinstance(obj, Tag) 

2752 obj_class, sha = obj.object 

2753 obj = store[sha] 

2754 return unpeeled, obj