Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1157 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26import binascii 

27import os 

28import stat 

29import sys 

30import time 

31import warnings 

32from collections.abc import Iterable, Iterator, Sequence 

33from contextlib import suppress 

34from io import BytesIO 

35from typing import ( 

36 TYPE_CHECKING, 

37 Callable, 

38 Optional, 

39 Protocol, 

40 Union, 

41) 

42 

43from .errors import NotTreeError 

44from .file import GitFile, _GitFile 

45from .objects import ( 

46 S_ISGITLINK, 

47 ZERO_SHA, 

48 Blob, 

49 Commit, 

50 ObjectID, 

51 ShaFile, 

52 Tag, 

53 Tree, 

54 TreeEntry, 

55 hex_to_filename, 

56 hex_to_sha, 

57 object_class, 

58 sha_to_hex, 

59 valid_hexsha, 

60) 

61from .pack import ( 

62 PACK_SPOOL_FILE_MAX_SIZE, 

63 ObjectContainer, 

64 Pack, 

65 PackData, 

66 PackedObjectContainer, 

67 PackFileDisappeared, 

68 PackHint, 

69 PackIndexer, 

70 PackInflater, 

71 PackStreamCopier, 

72 UnpackedObject, 

73 extend_pack, 

74 full_unpacked_object, 

75 generate_unpacked_objects, 

76 iter_sha1, 

77 load_pack_index_file, 

78 pack_objects_to_data, 

79 write_pack_data, 

80 write_pack_index, 

81) 

82from .protocol import DEPTH_INFINITE 

83from .refs import PEELED_TAG_SUFFIX, Ref 

84 

85if TYPE_CHECKING: 

86 from .commit_graph import CommitGraph 

87 from .diff_tree import RenameDetector 

88 

89 

90class GraphWalker(Protocol): 

91 """Protocol for graph walker objects.""" 

92 

93 def __next__(self) -> Optional[bytes]: 

94 """Return the next object SHA to visit.""" 

95 ... 

96 

97 def ack(self, sha: bytes) -> None: 

98 """Acknowledge that an object has been received.""" 

99 ... 

100 

101 

102INFODIR = "info" 

103PACKDIR = "pack" 

104 

105# use permissions consistent with Git; just readable by everyone 

106# TODO: should packs also be non-writable on Windows? if so, that 

107# would requite some rather significant adjustments to the test suite 

108PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

109 

110# Grace period for cleaning up temporary pack files (in seconds) 

111# Matches git's default of 2 weeks 

112DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

113 

114 

115def find_shallow( 

116 store: ObjectContainer, heads: Iterable[bytes], depth: int 

117) -> tuple[set[bytes], set[bytes]]: 

118 """Find shallow commits according to a given depth. 

119 

120 Args: 

121 store: An ObjectStore for looking up objects. 

122 heads: Iterable of head SHAs to start walking from. 

123 depth: The depth of ancestors to include. A depth of one includes 

124 only the heads themselves. 

125 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

126 considered shallow and unshallow according to the arguments. Note that 

127 these sets may overlap if a commit is reachable along multiple paths. 

128 """ 

129 parents: dict[bytes, list[bytes]] = {} 

130 commit_graph = store.get_commit_graph() 

131 

132 def get_parents(sha: bytes) -> list[bytes]: 

133 result = parents.get(sha, None) 

134 if not result: 

135 # Try to use commit graph first if available 

136 if commit_graph: 

137 graph_parents = commit_graph.get_parents(sha) 

138 if graph_parents is not None: 

139 result = graph_parents 

140 parents[sha] = result 

141 return result 

142 # Fall back to loading the object 

143 commit = store[sha] 

144 assert isinstance(commit, Commit) 

145 result = commit.parents 

146 parents[sha] = result 

147 return result 

148 

149 todo = [] # stack of (sha, depth) 

150 for head_sha in heads: 

151 obj = store[head_sha] 

152 # Peel tags if necessary 

153 while isinstance(obj, Tag): 

154 _, sha = obj.object 

155 obj = store[sha] 

156 if isinstance(obj, Commit): 

157 todo.append((obj.id, 1)) 

158 

159 not_shallow = set() 

160 shallow = set() 

161 while todo: 

162 sha, cur_depth = todo.pop() 

163 if cur_depth < depth: 

164 not_shallow.add(sha) 

165 new_depth = cur_depth + 1 

166 todo.extend((p, new_depth) for p in get_parents(sha)) 

167 else: 

168 shallow.add(sha) 

169 

170 return shallow, not_shallow 

171 

172 

173def get_depth( 

174 store: ObjectContainer, 

175 head: bytes, 

176 get_parents: Callable = lambda commit: commit.parents, 

177 max_depth: Optional[int] = None, 

178) -> int: 

179 """Return the current available depth for the given head. 

180 

181 For commits with multiple parents, the largest possible depth will be 

182 returned. 

183 

184 Args: 

185 store: Object store to search in 

186 head: commit to start from 

187 get_parents: optional function for getting the parents of a commit 

188 max_depth: maximum depth to search 

189 """ 

190 if head not in store: 

191 return 0 

192 current_depth = 1 

193 queue = [(head, current_depth)] 

194 commit_graph = store.get_commit_graph() 

195 

196 while queue and (max_depth is None or current_depth < max_depth): 

197 e, depth = queue.pop(0) 

198 current_depth = max(current_depth, depth) 

199 

200 # Try to use commit graph for parent lookup if available 

201 parents = None 

202 if commit_graph: 

203 parents = commit_graph.get_parents(e) 

204 

205 if parents is None: 

206 # Fall back to loading the object 

207 cmt = store[e] 

208 if isinstance(cmt, Tag): 

209 _cls, sha = cmt.object 

210 cmt = store[sha] 

211 parents = get_parents(cmt) 

212 

213 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

214 return current_depth 

215 

216 

217class PackContainer(Protocol): 

218 """Protocol for containers that can accept pack files.""" 

219 

220 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

221 """Add a new pack.""" 

222 

223 

224class BaseObjectStore: 

225 """Object store interface.""" 

226 

227 def determine_wants_all( 

228 self, refs: dict[Ref, ObjectID], depth: Optional[int] = None 

229 ) -> list[ObjectID]: 

230 """Determine which objects are wanted based on refs.""" 

231 

232 def _want_deepen(sha: bytes) -> bool: 

233 if not depth: 

234 return False 

235 if depth == DEPTH_INFINITE: 

236 return True 

237 return depth > self._get_depth(sha) 

238 

239 return [ 

240 sha 

241 for (ref, sha) in refs.items() 

242 if (sha not in self or _want_deepen(sha)) 

243 and not ref.endswith(PEELED_TAG_SUFFIX) 

244 and not sha == ZERO_SHA 

245 ] 

246 

247 def contains_loose(self, sha: bytes) -> bool: 

248 """Check if a particular object is present by SHA1 and is loose.""" 

249 raise NotImplementedError(self.contains_loose) 

250 

251 def __contains__(self, sha1: bytes) -> bool: 

252 """Check if a particular object is present by SHA1. 

253 

254 This method makes no distinction between loose and packed objects. 

255 """ 

256 return self.contains_loose(sha1) 

257 

258 @property 

259 def packs(self) -> list[Pack]: 

260 """Iterable of pack objects.""" 

261 raise NotImplementedError 

262 

263 def get_raw(self, name: bytes) -> tuple[int, bytes]: 

264 """Obtain the raw text for an object. 

265 

266 Args: 

267 name: sha for the object. 

268 Returns: tuple with numeric type and object contents. 

269 """ 

270 raise NotImplementedError(self.get_raw) 

271 

272 def __getitem__(self, sha1: ObjectID) -> ShaFile: 

273 """Obtain an object by SHA1.""" 

274 type_num, uncomp = self.get_raw(sha1) 

275 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

276 

277 def __iter__(self) -> Iterator[bytes]: 

278 """Iterate over the SHAs that are present in this store.""" 

279 raise NotImplementedError(self.__iter__) 

280 

281 def add_object(self, obj: ShaFile) -> None: 

282 """Add a single object to this object store.""" 

283 raise NotImplementedError(self.add_object) 

284 

285 def add_objects( 

286 self, 

287 objects: Sequence[tuple[ShaFile, Optional[str]]], 

288 progress: Optional[Callable] = None, 

289 ) -> Optional["Pack"]: 

290 """Add a set of objects to this object store. 

291 

292 Args: 

293 objects: Iterable over a list of (object, path) tuples 

294 progress: Optional progress callback 

295 """ 

296 raise NotImplementedError(self.add_objects) 

297 

298 def tree_changes( 

299 self, 

300 source: Optional[bytes], 

301 target: Optional[bytes], 

302 want_unchanged: bool = False, 

303 include_trees: bool = False, 

304 change_type_same: bool = False, 

305 rename_detector: Optional["RenameDetector"] = None, 

306 paths: Optional[list[bytes]] = None, 

307 ) -> Iterator[ 

308 tuple[ 

309 tuple[Optional[bytes], Optional[bytes]], 

310 tuple[Optional[int], Optional[int]], 

311 tuple[Optional[bytes], Optional[bytes]], 

312 ] 

313 ]: 

314 """Find the differences between the contents of two trees. 

315 

316 Args: 

317 source: SHA1 of the source tree 

318 target: SHA1 of the target tree 

319 want_unchanged: Whether unchanged files should be reported 

320 include_trees: Whether to include trees 

321 change_type_same: Whether to report files changing 

322 type in the same entry. 

323 rename_detector: RenameDetector object for detecting renames. 

324 paths: Optional list of paths to filter to (as bytes). 

325 Returns: Iterator over tuples with 

326 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

327 """ 

328 from .diff_tree import tree_changes 

329 

330 for change in tree_changes( 

331 self, 

332 source, 

333 target, 

334 want_unchanged=want_unchanged, 

335 include_trees=include_trees, 

336 change_type_same=change_type_same, 

337 rename_detector=rename_detector, 

338 paths=paths, 

339 ): 

340 yield ( 

341 (change.old.path, change.new.path), 

342 (change.old.mode, change.new.mode), 

343 (change.old.sha, change.new.sha), 

344 ) 

345 

346 def iter_tree_contents( 

347 self, tree_id: bytes, include_trees: bool = False 

348 ) -> Iterator[tuple[bytes, int, bytes]]: 

349 """Iterate the contents of a tree and all subtrees. 

350 

351 Iteration is depth-first pre-order, as in e.g. os.walk. 

352 

353 Args: 

354 tree_id: SHA1 of the tree. 

355 include_trees: If True, include tree objects in the iteration. 

356 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

357 tree. 

358 """ 

359 warnings.warn( 

360 "Please use dulwich.object_store.iter_tree_contents", 

361 DeprecationWarning, 

362 stacklevel=2, 

363 ) 

364 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

365 

366 def iterobjects_subset( 

367 self, shas: Iterable[bytes], *, allow_missing: bool = False 

368 ) -> Iterator[ShaFile]: 

369 """Iterate over a subset of objects in the store. 

370 

371 Args: 

372 shas: Iterable of object SHAs to retrieve 

373 allow_missing: If True, skip missing objects; if False, raise KeyError 

374 

375 Returns: 

376 Iterator of ShaFile objects 

377 

378 Raises: 

379 KeyError: If an object is missing and allow_missing is False 

380 """ 

381 for sha in shas: 

382 try: 

383 yield self[sha] 

384 except KeyError: 

385 if not allow_missing: 

386 raise 

387 

388 def find_missing_objects( 

389 self, 

390 haves: Iterable[bytes], 

391 wants: Iterable[bytes], 

392 shallow: Optional[set[bytes]] = None, 

393 progress: Optional[Callable] = None, 

394 get_tagged: Optional[Callable] = None, 

395 get_parents: Callable = lambda commit: commit.parents, 

396 ) -> Iterator[tuple[bytes, Optional[bytes]]]: 

397 """Find the missing objects required for a set of revisions. 

398 

399 Args: 

400 haves: Iterable over SHAs already in common. 

401 wants: Iterable over SHAs of objects to fetch. 

402 shallow: Set of shallow commit SHA1s to skip 

403 progress: Simple progress function that will be called with 

404 updated progress strings. 

405 get_tagged: Function that returns a dict of pointed-to sha -> 

406 tag sha for including tags. 

407 get_parents: Optional function for getting the parents of a 

408 commit. 

409 Returns: Iterator over (sha, path) pairs. 

410 """ 

411 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

412 finder = MissingObjectFinder( 

413 self, 

414 haves=haves, 

415 wants=wants, 

416 shallow=shallow, 

417 progress=progress, 

418 get_tagged=get_tagged, 

419 get_parents=get_parents, 

420 ) 

421 return iter(finder) 

422 

423 def find_common_revisions(self, graphwalker: GraphWalker) -> list[bytes]: 

424 """Find which revisions this store has in common using graphwalker. 

425 

426 Args: 

427 graphwalker: A graphwalker object. 

428 Returns: List of SHAs that are in common 

429 """ 

430 haves = [] 

431 sha = next(graphwalker) 

432 while sha: 

433 if sha in self: 

434 haves.append(sha) 

435 graphwalker.ack(sha) 

436 sha = next(graphwalker) 

437 return haves 

438 

439 def generate_pack_data( 

440 self, 

441 have: Iterable[bytes], 

442 want: Iterable[bytes], 

443 shallow: Optional[set[bytes]] = None, 

444 progress: Optional[Callable] = None, 

445 ofs_delta: bool = True, 

446 ) -> tuple[int, Iterator[UnpackedObject]]: 

447 """Generate pack data objects for a set of wants/haves. 

448 

449 Args: 

450 have: List of SHA1s of objects that should not be sent 

451 want: List of SHA1s of objects that should be sent 

452 shallow: Set of shallow commit SHA1s to skip 

453 ofs_delta: Whether OFS deltas can be included 

454 progress: Optional progress reporting method 

455 """ 

456 # Note that the pack-specific implementation below is more efficient, 

457 # as it reuses deltas 

458 missing_objects = MissingObjectFinder( 

459 self, haves=have, wants=want, shallow=shallow, progress=progress 

460 ) 

461 object_ids = list(missing_objects) 

462 return pack_objects_to_data( 

463 [(self[oid], path) for oid, path in object_ids], 

464 ofs_delta=ofs_delta, 

465 progress=progress, 

466 ) 

467 

468 def peel_sha(self, sha: bytes) -> bytes: 

469 """Peel all tags from a SHA. 

470 

471 Args: 

472 sha: The object SHA to peel. 

473 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

474 intermediate tags; if the original ref does not point to a tag, 

475 this will equal the original SHA1. 

476 """ 

477 warnings.warn( 

478 "Please use dulwich.object_store.peel_sha()", 

479 DeprecationWarning, 

480 stacklevel=2, 

481 ) 

482 return peel_sha(self, sha)[1].id 

483 

484 def _get_depth( 

485 self, 

486 head: bytes, 

487 get_parents: Callable = lambda commit: commit.parents, 

488 max_depth: Optional[int] = None, 

489 ) -> int: 

490 """Return the current available depth for the given head. 

491 

492 For commits with multiple parents, the largest possible depth will be 

493 returned. 

494 

495 Args: 

496 head: commit to start from 

497 get_parents: optional function for getting the parents of a commit 

498 max_depth: maximum depth to search 

499 """ 

500 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

501 

502 def close(self) -> None: 

503 """Close any files opened by this object store.""" 

504 # Default implementation is a NO-OP 

505 

506 def prune(self, grace_period: Optional[int] = None) -> None: 

507 """Prune/clean up this object store. 

508 

509 This includes removing orphaned temporary files and other 

510 housekeeping tasks. Default implementation is a NO-OP. 

511 

512 Args: 

513 grace_period: Grace period in seconds for removing temporary files. 

514 If None, uses the default grace period. 

515 """ 

516 # Default implementation is a NO-OP 

517 

518 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

519 """Iterate over all SHA1s that start with a given prefix. 

520 

521 The default implementation is a naive iteration over all objects. 

522 However, subclasses may override this method with more efficient 

523 implementations. 

524 """ 

525 for sha in self: 

526 if sha.startswith(prefix): 

527 yield sha 

528 

529 def get_commit_graph(self) -> Optional["CommitGraph"]: 

530 """Get the commit graph for this object store. 

531 

532 Returns: 

533 CommitGraph object if available, None otherwise 

534 """ 

535 return None 

536 

537 def write_commit_graph( 

538 self, refs: Optional[list[bytes]] = None, reachable: bool = True 

539 ) -> None: 

540 """Write a commit graph file for this object store. 

541 

542 Args: 

543 refs: List of refs to include. If None, includes all refs from object store. 

544 reachable: If True, includes all commits reachable from refs. 

545 If False, only includes the direct ref targets. 

546 

547 Note: 

548 Default implementation does nothing. Subclasses should override 

549 this method to provide commit graph writing functionality. 

550 """ 

551 raise NotImplementedError(self.write_commit_graph) 

552 

553 def get_object_mtime(self, sha: bytes) -> float: 

554 """Get the modification time of an object. 

555 

556 Args: 

557 sha: SHA1 of the object 

558 

559 Returns: 

560 Modification time as seconds since epoch 

561 

562 Raises: 

563 KeyError: if the object is not found 

564 """ 

565 # Default implementation raises KeyError 

566 # Subclasses should override to provide actual mtime 

567 raise KeyError(sha) 

568 

569 

570class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer): 

571 """Object store that uses pack files for storage. 

572 

573 This class provides a base implementation for object stores that use 

574 Git pack files as their primary storage mechanism. It handles caching 

575 of open pack files and provides configuration for pack file operations. 

576 """ 

577 

578 def __init__( 

579 self, 

580 pack_compression_level: int = -1, 

581 pack_index_version: Optional[int] = None, 

582 pack_delta_window_size: Optional[int] = None, 

583 pack_window_memory: Optional[int] = None, 

584 pack_delta_cache_size: Optional[int] = None, 

585 pack_depth: Optional[int] = None, 

586 pack_threads: Optional[int] = None, 

587 pack_big_file_threshold: Optional[int] = None, 

588 ) -> None: 

589 """Initialize a PackBasedObjectStore. 

590 

591 Args: 

592 pack_compression_level: Compression level for pack files (-1 to 9) 

593 pack_index_version: Pack index version to use 

594 pack_delta_window_size: Window size for delta compression 

595 pack_window_memory: Maximum memory to use for delta window 

596 pack_delta_cache_size: Cache size for delta operations 

597 pack_depth: Maximum depth for pack deltas 

598 pack_threads: Number of threads to use for packing 

599 pack_big_file_threshold: Threshold for treating files as "big" 

600 """ 

601 self._pack_cache: dict[str, Pack] = {} 

602 self.pack_compression_level = pack_compression_level 

603 self.pack_index_version = pack_index_version 

604 self.pack_delta_window_size = pack_delta_window_size 

605 self.pack_window_memory = pack_window_memory 

606 self.pack_delta_cache_size = pack_delta_cache_size 

607 self.pack_depth = pack_depth 

608 self.pack_threads = pack_threads 

609 self.pack_big_file_threshold = pack_big_file_threshold 

610 

611 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

612 """Add a new pack to this object store.""" 

613 raise NotImplementedError(self.add_pack) 

614 

615 def add_pack_data( 

616 self, 

617 count: int, 

618 unpacked_objects: Iterator[UnpackedObject], 

619 progress: Optional[Callable] = None, 

620 ) -> Optional["Pack"]: 

621 """Add pack data to this object store. 

622 

623 Args: 

624 count: Number of items to add 

625 unpacked_objects: Iterator of UnpackedObject instances 

626 progress: Optional progress callback 

627 """ 

628 if count == 0: 

629 # Don't bother writing an empty pack file 

630 return None 

631 f, commit, abort = self.add_pack() 

632 try: 

633 write_pack_data( 

634 f.write, 

635 unpacked_objects, 

636 num_records=count, 

637 progress=progress, 

638 compression_level=self.pack_compression_level, 

639 ) 

640 except BaseException: 

641 abort() 

642 raise 

643 else: 

644 return commit() 

645 

646 @property 

647 def alternates(self) -> list: 

648 """Return list of alternate object stores.""" 

649 return [] 

650 

651 def contains_packed(self, sha: bytes) -> bool: 

652 """Check if a particular object is present by SHA1 and is packed. 

653 

654 This does not check alternates. 

655 """ 

656 for pack in self.packs: 

657 try: 

658 if sha in pack: 

659 return True 

660 except PackFileDisappeared: 

661 pass 

662 return False 

663 

664 def __contains__(self, sha) -> bool: 

665 """Check if a particular object is present by SHA1. 

666 

667 This method makes no distinction between loose and packed objects. 

668 """ 

669 if self.contains_packed(sha) or self.contains_loose(sha): 

670 return True 

671 for alternate in self.alternates: 

672 if sha in alternate: 

673 return True 

674 return False 

675 

676 def _add_cached_pack(self, base_name: str, pack: Pack) -> None: 

677 """Add a newly appeared pack to the cache by path.""" 

678 prev_pack = self._pack_cache.get(base_name) 

679 if prev_pack is not pack: 

680 self._pack_cache[base_name] = pack 

681 if prev_pack: 

682 prev_pack.close() 

683 

684 def generate_pack_data( 

685 self, have, want, shallow=None, progress=None, ofs_delta=True 

686 ) -> tuple[int, Iterator[UnpackedObject]]: 

687 """Generate pack data objects for a set of wants/haves. 

688 

689 Args: 

690 have: List of SHA1s of objects that should not be sent 

691 want: List of SHA1s of objects that should be sent 

692 shallow: Set of shallow commit SHA1s to skip 

693 ofs_delta: Whether OFS deltas can be included 

694 progress: Optional progress reporting method 

695 """ 

696 missing_objects = MissingObjectFinder( 

697 self, haves=have, wants=want, shallow=shallow, progress=progress 

698 ) 

699 remote_has = missing_objects.get_remote_has() 

700 object_ids = list(missing_objects) 

701 return len(object_ids), generate_unpacked_objects( 

702 self, 

703 object_ids, 

704 progress=progress, 

705 ofs_delta=ofs_delta, 

706 other_haves=remote_has, 

707 ) 

708 

709 def _clear_cached_packs(self) -> None: 

710 pack_cache = self._pack_cache 

711 self._pack_cache = {} 

712 while pack_cache: 

713 (name, pack) = pack_cache.popitem() 

714 pack.close() 

715 

716 def _iter_cached_packs(self) -> Iterator[Pack]: 

717 return iter(self._pack_cache.values()) 

718 

719 def _update_pack_cache(self) -> list[Pack]: 

720 raise NotImplementedError(self._update_pack_cache) 

721 

722 def close(self) -> None: 

723 """Close the object store and release resources. 

724 

725 This method closes all cached pack files and frees associated resources. 

726 """ 

727 self._clear_cached_packs() 

728 

729 @property 

730 def packs(self) -> list[Pack]: 

731 """List with pack objects.""" 

732 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

733 

734 def count_pack_files(self) -> int: 

735 """Count the number of pack files. 

736 

737 Returns: 

738 Number of pack files (excluding those with .keep files) 

739 """ 

740 count = 0 

741 for pack in self.packs: 

742 # Check if there's a .keep file for this pack 

743 keep_path = pack._basename + ".keep" 

744 if not os.path.exists(keep_path): 

745 count += 1 

746 return count 

747 

748 def _iter_alternate_objects(self) -> Iterator[bytes]: 

749 """Iterate over the SHAs of all the objects in alternate stores.""" 

750 for alternate in self.alternates: 

751 yield from alternate 

752 

753 def _iter_loose_objects(self) -> Iterator[bytes]: 

754 """Iterate over the SHAs of all loose objects.""" 

755 raise NotImplementedError(self._iter_loose_objects) 

756 

757 def _get_loose_object(self, sha: bytes) -> Optional[ShaFile]: 

758 raise NotImplementedError(self._get_loose_object) 

759 

760 def delete_loose_object(self, sha: bytes) -> None: 

761 """Delete a loose object. 

762 

763 This method only handles loose objects. For packed objects, 

764 use repack(exclude=...) to exclude them during repacking. 

765 """ 

766 raise NotImplementedError(self.delete_loose_object) 

767 

768 def _remove_pack(self, pack: "Pack") -> None: 

769 raise NotImplementedError(self._remove_pack) 

770 

771 def pack_loose_objects(self) -> int: 

772 """Pack loose objects. 

773 

774 Returns: Number of objects packed 

775 """ 

776 objects: list[tuple[ShaFile, None]] = [] 

777 for sha in self._iter_loose_objects(): 

778 obj = self._get_loose_object(sha) 

779 if obj is not None: 

780 objects.append((obj, None)) 

781 self.add_objects(objects) 

782 for obj, path in objects: 

783 self.delete_loose_object(obj.id) 

784 return len(objects) 

785 

786 def repack(self, exclude: Optional[set] = None) -> int: 

787 """Repack the packs in this repository. 

788 

789 Note that this implementation is fairly naive and currently keeps all 

790 objects in memory while it repacks. 

791 

792 Args: 

793 exclude: Optional set of object SHAs to exclude from repacking 

794 """ 

795 if exclude is None: 

796 exclude = set() 

797 

798 loose_objects = set() 

799 excluded_loose_objects = set() 

800 for sha in self._iter_loose_objects(): 

801 if sha not in exclude: 

802 obj = self._get_loose_object(sha) 

803 if obj is not None: 

804 loose_objects.add(obj) 

805 else: 

806 excluded_loose_objects.add(sha) 

807 

808 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects} 

809 old_packs = {p.name(): p for p in self.packs} 

810 for name, pack in old_packs.items(): 

811 objects.update( 

812 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

813 ) 

814 

815 # Only create a new pack if there are objects to pack 

816 if objects: 

817 # The name of the consolidated pack might match the name of a 

818 # pre-existing pack. Take care not to remove the newly created 

819 # consolidated pack. 

820 consolidated = self.add_objects(list(objects)) 

821 if consolidated is not None: 

822 old_packs.pop(consolidated.name(), None) 

823 

824 # Delete loose objects that were packed 

825 for obj in loose_objects: 

826 if obj is not None: 

827 self.delete_loose_object(obj.id) 

828 # Delete excluded loose objects 

829 for sha in excluded_loose_objects: 

830 self.delete_loose_object(sha) 

831 for name, pack in old_packs.items(): 

832 self._remove_pack(pack) 

833 self._update_pack_cache() 

834 return len(objects) 

835 

836 def __iter__(self): 

837 """Iterate over the SHAs that are present in this store.""" 

838 self._update_pack_cache() 

839 for pack in self._iter_cached_packs(): 

840 try: 

841 yield from pack 

842 except PackFileDisappeared: 

843 pass 

844 yield from self._iter_loose_objects() 

845 yield from self._iter_alternate_objects() 

846 

847 def contains_loose(self, sha): 

848 """Check if a particular object is present by SHA1 and is loose. 

849 

850 This does not check alternates. 

851 """ 

852 return self._get_loose_object(sha) is not None 

853 

854 def get_raw(self, name): 

855 """Obtain the raw fulltext for an object. 

856 

857 Args: 

858 name: sha for the object. 

859 Returns: tuple with numeric type and object contents. 

860 """ 

861 if name == ZERO_SHA: 

862 raise KeyError(name) 

863 if len(name) == 40: 

864 sha = hex_to_sha(name) 

865 hexsha = name 

866 elif len(name) == 20: 

867 sha = name 

868 hexsha = None 

869 else: 

870 raise AssertionError(f"Invalid object name {name!r}") 

871 for pack in self._iter_cached_packs(): 

872 try: 

873 return pack.get_raw(sha) 

874 except (KeyError, PackFileDisappeared): 

875 pass 

876 if hexsha is None: 

877 hexsha = sha_to_hex(name) 

878 ret = self._get_loose_object(hexsha) 

879 if ret is not None: 

880 return ret.type_num, ret.as_raw_string() 

881 # Maybe something else has added a pack with the object 

882 # in the mean time? 

883 for pack in self._update_pack_cache(): 

884 try: 

885 return pack.get_raw(sha) 

886 except KeyError: 

887 pass 

888 for alternate in self.alternates: 

889 try: 

890 return alternate.get_raw(hexsha) 

891 except KeyError: 

892 pass 

893 raise KeyError(hexsha) 

894 

895 def iter_unpacked_subset( 

896 self, 

897 shas: set[bytes], 

898 include_comp: bool = False, 

899 allow_missing: bool = False, 

900 convert_ofs_delta: bool = True, 

901 ) -> Iterator[UnpackedObject]: 

902 """Iterate over a subset of objects, yielding UnpackedObject instances. 

903 

904 Args: 

905 shas: Set of object SHAs to retrieve 

906 include_comp: Whether to include compressed data 

907 allow_missing: If True, skip missing objects; if False, raise KeyError 

908 convert_ofs_delta: Whether to convert OFS_DELTA objects 

909 

910 Returns: 

911 Iterator of UnpackedObject instances 

912 

913 Raises: 

914 KeyError: If an object is missing and allow_missing is False 

915 """ 

916 todo: set[bytes] = set(shas) 

917 for p in self._iter_cached_packs(): 

918 for unpacked in p.iter_unpacked_subset( 

919 todo, 

920 include_comp=include_comp, 

921 allow_missing=True, 

922 convert_ofs_delta=convert_ofs_delta, 

923 ): 

924 yield unpacked 

925 hexsha = sha_to_hex(unpacked.sha()) 

926 todo.remove(hexsha) 

927 # Maybe something else has added a pack with the object 

928 # in the mean time? 

929 for p in self._update_pack_cache(): 

930 for unpacked in p.iter_unpacked_subset( 

931 todo, 

932 include_comp=include_comp, 

933 allow_missing=True, 

934 convert_ofs_delta=convert_ofs_delta, 

935 ): 

936 yield unpacked 

937 hexsha = sha_to_hex(unpacked.sha()) 

938 todo.remove(hexsha) 

939 for alternate in self.alternates: 

940 for unpacked in alternate.iter_unpacked_subset( 

941 todo, 

942 include_comp=include_comp, 

943 allow_missing=True, 

944 convert_ofs_delta=convert_ofs_delta, 

945 ): 

946 yield unpacked 

947 hexsha = sha_to_hex(unpacked.sha()) 

948 todo.remove(hexsha) 

949 

950 def iterobjects_subset( 

951 self, shas: Iterable[bytes], *, allow_missing: bool = False 

952 ) -> Iterator[ShaFile]: 

953 """Iterate over a subset of objects in the store. 

954 

955 This method searches for objects in pack files, alternates, and loose storage. 

956 

957 Args: 

958 shas: Iterable of object SHAs to retrieve 

959 allow_missing: If True, skip missing objects; if False, raise KeyError 

960 

961 Returns: 

962 Iterator of ShaFile objects 

963 

964 Raises: 

965 KeyError: If an object is missing and allow_missing is False 

966 """ 

967 todo: set[bytes] = set(shas) 

968 for p in self._iter_cached_packs(): 

969 for o in p.iterobjects_subset(todo, allow_missing=True): 

970 yield o 

971 todo.remove(o.id) 

972 # Maybe something else has added a pack with the object 

973 # in the mean time? 

974 for p in self._update_pack_cache(): 

975 for o in p.iterobjects_subset(todo, allow_missing=True): 

976 yield o 

977 todo.remove(o.id) 

978 for alternate in self.alternates: 

979 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

980 yield o 

981 todo.remove(o.id) 

982 for oid in todo: 

983 loose_obj: Optional[ShaFile] = self._get_loose_object(oid) 

984 if loose_obj is not None: 

985 yield loose_obj 

986 elif not allow_missing: 

987 raise KeyError(oid) 

988 

989 def get_unpacked_object( 

990 self, sha1: bytes, *, include_comp: bool = False 

991 ) -> UnpackedObject: 

992 """Obtain the unpacked object. 

993 

994 Args: 

995 sha1: sha for the object. 

996 include_comp: Whether to include compression metadata. 

997 """ 

998 if sha1 == ZERO_SHA: 

999 raise KeyError(sha1) 

1000 if len(sha1) == 40: 

1001 sha = hex_to_sha(sha1) 

1002 hexsha = sha1 

1003 elif len(sha1) == 20: 

1004 sha = sha1 

1005 hexsha = None 

1006 else: 

1007 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

1008 for pack in self._iter_cached_packs(): 

1009 try: 

1010 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1011 except (KeyError, PackFileDisappeared): 

1012 pass 

1013 if hexsha is None: 

1014 hexsha = sha_to_hex(sha1) 

1015 # Maybe something else has added a pack with the object 

1016 # in the mean time? 

1017 for pack in self._update_pack_cache(): 

1018 try: 

1019 return pack.get_unpacked_object(sha, include_comp=include_comp) 

1020 except KeyError: 

1021 pass 

1022 for alternate in self.alternates: 

1023 try: 

1024 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

1025 except KeyError: 

1026 pass 

1027 raise KeyError(hexsha) 

1028 

1029 def add_objects( 

1030 self, 

1031 objects: Sequence[tuple[ShaFile, Optional[str]]], 

1032 progress: Optional[Callable[[str], None]] = None, 

1033 ) -> Optional["Pack"]: 

1034 """Add a set of objects to this object store. 

1035 

1036 Args: 

1037 objects: Iterable over (object, path) tuples, should support 

1038 __len__. 

1039 progress: Optional progress reporting function. 

1040 Returns: Pack object of the objects written. 

1041 """ 

1042 count = len(objects) 

1043 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

1044 return self.add_pack_data(count, record_iter, progress=progress) 

1045 

1046 

1047class DiskObjectStore(PackBasedObjectStore): 

1048 """Git-style object store that exists on disk.""" 

1049 

1050 path: Union[str, os.PathLike] 

1051 pack_dir: Union[str, os.PathLike] 

1052 _alternates: Optional[list["DiskObjectStore"]] 

1053 _commit_graph: Optional["CommitGraph"] 

1054 

1055 def __init__( 

1056 self, 

1057 path: Union[str, os.PathLike], 

1058 loose_compression_level=-1, 

1059 pack_compression_level=-1, 

1060 pack_index_version=None, 

1061 pack_delta_window_size=None, 

1062 pack_window_memory=None, 

1063 pack_delta_cache_size=None, 

1064 pack_depth=None, 

1065 pack_threads=None, 

1066 pack_big_file_threshold=None, 

1067 ) -> None: 

1068 """Open an object store. 

1069 

1070 Args: 

1071 path: Path of the object store. 

1072 loose_compression_level: zlib compression level for loose objects 

1073 pack_compression_level: zlib compression level for pack objects 

1074 pack_index_version: pack index version to use (1, 2, or 3) 

1075 pack_delta_window_size: sliding window size for delta compression 

1076 pack_window_memory: memory limit for delta window operations 

1077 pack_delta_cache_size: size of cache for delta operations 

1078 pack_depth: maximum delta chain depth 

1079 pack_threads: number of threads for pack operations 

1080 pack_big_file_threshold: threshold for treating files as big 

1081 """ 

1082 super().__init__( 

1083 pack_compression_level=pack_compression_level, 

1084 pack_index_version=pack_index_version, 

1085 pack_delta_window_size=pack_delta_window_size, 

1086 pack_window_memory=pack_window_memory, 

1087 pack_delta_cache_size=pack_delta_cache_size, 

1088 pack_depth=pack_depth, 

1089 pack_threads=pack_threads, 

1090 pack_big_file_threshold=pack_big_file_threshold, 

1091 ) 

1092 self.path = path 

1093 self.pack_dir = os.path.join(self.path, PACKDIR) 

1094 self._alternates = None 

1095 self.loose_compression_level = loose_compression_level 

1096 self.pack_compression_level = pack_compression_level 

1097 self.pack_index_version = pack_index_version 

1098 

1099 # Commit graph support - lazy loaded 

1100 self._commit_graph = None 

1101 self._use_commit_graph = True # Default to true 

1102 

1103 def __repr__(self) -> str: 

1104 """Return string representation of DiskObjectStore. 

1105 

1106 Returns: 

1107 String representation including the store path 

1108 """ 

1109 return f"<{self.__class__.__name__}({self.path!r})>" 

1110 

1111 @classmethod 

1112 def from_config(cls, path: Union[str, os.PathLike], config): 

1113 """Create a DiskObjectStore from a configuration object. 

1114 

1115 Args: 

1116 path: Path to the object store directory 

1117 config: Configuration object to read settings from 

1118 

1119 Returns: 

1120 New DiskObjectStore instance configured according to config 

1121 """ 

1122 try: 

1123 default_compression_level = int( 

1124 config.get((b"core",), b"compression").decode() 

1125 ) 

1126 except KeyError: 

1127 default_compression_level = -1 

1128 try: 

1129 loose_compression_level = int( 

1130 config.get((b"core",), b"looseCompression").decode() 

1131 ) 

1132 except KeyError: 

1133 loose_compression_level = default_compression_level 

1134 try: 

1135 pack_compression_level = int( 

1136 config.get((b"core",), "packCompression").decode() 

1137 ) 

1138 except KeyError: 

1139 pack_compression_level = default_compression_level 

1140 try: 

1141 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

1142 except KeyError: 

1143 pack_index_version = None 

1144 

1145 # Read pack configuration options 

1146 try: 

1147 pack_delta_window_size = int( 

1148 config.get((b"pack",), b"deltaWindowSize").decode() 

1149 ) 

1150 except KeyError: 

1151 pack_delta_window_size = None 

1152 try: 

1153 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode()) 

1154 except KeyError: 

1155 pack_window_memory = None 

1156 try: 

1157 pack_delta_cache_size = int( 

1158 config.get((b"pack",), b"deltaCacheSize").decode() 

1159 ) 

1160 except KeyError: 

1161 pack_delta_cache_size = None 

1162 try: 

1163 pack_depth = int(config.get((b"pack",), b"depth").decode()) 

1164 except KeyError: 

1165 pack_depth = None 

1166 try: 

1167 pack_threads = int(config.get((b"pack",), b"threads").decode()) 

1168 except KeyError: 

1169 pack_threads = None 

1170 try: 

1171 pack_big_file_threshold = int( 

1172 config.get((b"pack",), b"bigFileThreshold").decode() 

1173 ) 

1174 except KeyError: 

1175 pack_big_file_threshold = None 

1176 

1177 # Read core.commitGraph setting 

1178 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

1179 

1180 instance = cls( 

1181 path, 

1182 loose_compression_level, 

1183 pack_compression_level, 

1184 pack_index_version, 

1185 pack_delta_window_size, 

1186 pack_window_memory, 

1187 pack_delta_cache_size, 

1188 pack_depth, 

1189 pack_threads, 

1190 pack_big_file_threshold, 

1191 ) 

1192 instance._use_commit_graph = use_commit_graph 

1193 return instance 

1194 

1195 @property 

1196 def alternates(self): 

1197 """Get the list of alternate object stores. 

1198 

1199 Reads from .git/objects/info/alternates if not already cached. 

1200 

1201 Returns: 

1202 List of DiskObjectStore instances for alternate object directories 

1203 """ 

1204 if self._alternates is not None: 

1205 return self._alternates 

1206 self._alternates = [] 

1207 for path in self._read_alternate_paths(): 

1208 self._alternates.append(DiskObjectStore(path)) 

1209 return self._alternates 

1210 

1211 def _read_alternate_paths(self): 

1212 try: 

1213 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

1214 except FileNotFoundError: 

1215 return 

1216 with f: 

1217 for line in f.readlines(): 

1218 line = line.rstrip(b"\n") 

1219 if line.startswith(b"#"): 

1220 continue 

1221 if os.path.isabs(line): 

1222 yield os.fsdecode(line) 

1223 else: 

1224 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1225 

1226 def add_alternate_path(self, path) -> None: 

1227 """Add an alternate path to this object store.""" 

1228 try: 

1229 os.mkdir(os.path.join(self.path, INFODIR)) 

1230 except FileExistsError: 

1231 pass 

1232 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1233 with GitFile(alternates_path, "wb") as f: 

1234 try: 

1235 orig_f = open(alternates_path, "rb") 

1236 except FileNotFoundError: 

1237 pass 

1238 else: 

1239 with orig_f: 

1240 f.write(orig_f.read()) 

1241 f.write(os.fsencode(path) + b"\n") 

1242 

1243 if not os.path.isabs(path): 

1244 path = os.path.join(self.path, path) 

1245 self.alternates.append(DiskObjectStore(path)) 

1246 

1247 def _update_pack_cache(self): 

1248 """Read and iterate over new pack files and cache them.""" 

1249 try: 

1250 pack_dir_contents = os.listdir(self.pack_dir) 

1251 except FileNotFoundError: 

1252 self.close() 

1253 return [] 

1254 pack_files = set() 

1255 for name in pack_dir_contents: 

1256 if name.startswith("pack-") and name.endswith(".pack"): 

1257 # verify that idx exists first (otherwise the pack was not yet 

1258 # fully written) 

1259 idx_name = os.path.splitext(name)[0] + ".idx" 

1260 if idx_name in pack_dir_contents: 

1261 pack_name = name[: -len(".pack")] 

1262 pack_files.add(pack_name) 

1263 

1264 # Open newly appeared pack files 

1265 new_packs = [] 

1266 for f in pack_files: 

1267 if f not in self._pack_cache: 

1268 pack = Pack( 

1269 os.path.join(self.pack_dir, f), 

1270 delta_window_size=self.pack_delta_window_size, 

1271 window_memory=self.pack_window_memory, 

1272 delta_cache_size=self.pack_delta_cache_size, 

1273 depth=self.pack_depth, 

1274 threads=self.pack_threads, 

1275 big_file_threshold=self.pack_big_file_threshold, 

1276 ) 

1277 new_packs.append(pack) 

1278 self._pack_cache[f] = pack 

1279 # Remove disappeared pack files 

1280 for f in set(self._pack_cache) - pack_files: 

1281 self._pack_cache.pop(f).close() 

1282 return new_packs 

1283 

1284 def _get_shafile_path(self, sha): 

1285 # Check from object dir 

1286 return hex_to_filename(os.fspath(self.path), sha) 

1287 

1288 def _iter_loose_objects(self): 

1289 for base in os.listdir(self.path): 

1290 if len(base) != 2: 

1291 continue 

1292 for rest in os.listdir(os.path.join(self.path, base)): 

1293 sha = os.fsencode(base + rest) 

1294 if not valid_hexsha(sha): 

1295 continue 

1296 yield sha 

1297 

1298 def count_loose_objects(self) -> int: 

1299 """Count the number of loose objects in the object store. 

1300 

1301 Returns: 

1302 Number of loose objects 

1303 """ 

1304 count = 0 

1305 if not os.path.exists(self.path): 

1306 return 0 

1307 

1308 for i in range(256): 

1309 subdir = os.path.join(self.path, f"{i:02x}") 

1310 try: 

1311 count += len( 

1312 [ 

1313 name 

1314 for name in os.listdir(subdir) 

1315 if len(name) == 38 # 40 - 2 for the prefix 

1316 ] 

1317 ) 

1318 except FileNotFoundError: 

1319 # Directory may have been removed or is inaccessible 

1320 continue 

1321 

1322 return count 

1323 

1324 def _get_loose_object(self, sha): 

1325 path = self._get_shafile_path(sha) 

1326 try: 

1327 return ShaFile.from_path(path) 

1328 except FileNotFoundError: 

1329 return None 

1330 

1331 def delete_loose_object(self, sha) -> None: 

1332 """Delete a loose object from disk. 

1333 

1334 Args: 

1335 sha: SHA1 of the object to delete 

1336 

1337 Raises: 

1338 FileNotFoundError: If the object file doesn't exist 

1339 """ 

1340 os.remove(self._get_shafile_path(sha)) 

1341 

1342 def get_object_mtime(self, sha): 

1343 """Get the modification time of an object. 

1344 

1345 Args: 

1346 sha: SHA1 of the object 

1347 

1348 Returns: 

1349 Modification time as seconds since epoch 

1350 

1351 Raises: 

1352 KeyError: if the object is not found 

1353 """ 

1354 # First check if it's a loose object 

1355 if self.contains_loose(sha): 

1356 path = self._get_shafile_path(sha) 

1357 try: 

1358 return os.path.getmtime(path) 

1359 except FileNotFoundError: 

1360 pass 

1361 

1362 # Check if it's in a pack file 

1363 for pack in self.packs: 

1364 try: 

1365 if sha in pack: 

1366 # Use the pack file's mtime for packed objects 

1367 pack_path = pack._data_path 

1368 try: 

1369 return os.path.getmtime(pack_path) 

1370 except (FileNotFoundError, AttributeError): 

1371 pass 

1372 except PackFileDisappeared: 

1373 pass 

1374 

1375 raise KeyError(sha) 

1376 

1377 def _remove_pack(self, pack) -> None: 

1378 try: 

1379 del self._pack_cache[os.path.basename(pack._basename)] 

1380 except KeyError: 

1381 pass 

1382 pack.close() 

1383 os.remove(pack.data.path) 

1384 os.remove(pack.index.path) 

1385 

1386 def _get_pack_basepath(self, entries): 

1387 suffix_bytes = iter_sha1(entry[0] for entry in entries) 

1388 # TODO: Handle self.pack_dir being bytes 

1389 suffix = suffix_bytes.decode("ascii") 

1390 return os.path.join(self.pack_dir, "pack-" + suffix) 

1391 

1392 def _complete_pack(self, f, path, num_objects, indexer, progress=None): 

1393 """Move a specific file containing a pack into the pack directory. 

1394 

1395 Note: The file should be on the same file system as the 

1396 packs directory. 

1397 

1398 Args: 

1399 f: Open file object for the pack. 

1400 path: Path to the pack file. 

1401 num_objects: Number of objects in the pack. 

1402 indexer: A PackIndexer for indexing the pack. 

1403 progress: Optional progress reporting function. 

1404 """ 

1405 entries = [] 

1406 for i, entry in enumerate(indexer): 

1407 if progress is not None: 

1408 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1409 entries.append(entry) 

1410 

1411 pack_sha, extra_entries = extend_pack( 

1412 f, 

1413 indexer.ext_refs, 

1414 get_raw=self.get_raw, 

1415 compression_level=self.pack_compression_level, 

1416 progress=progress, 

1417 ) 

1418 f.flush() 

1419 try: 

1420 fileno = f.fileno() 

1421 except AttributeError: 

1422 pass 

1423 else: 

1424 os.fsync(fileno) 

1425 f.close() 

1426 

1427 entries.extend(extra_entries) 

1428 

1429 # Move the pack in. 

1430 entries.sort() 

1431 pack_base_name = self._get_pack_basepath(entries) 

1432 

1433 for pack in self.packs: 

1434 if pack._basename == pack_base_name: 

1435 return pack 

1436 

1437 target_pack_path = pack_base_name + ".pack" 

1438 target_index_path = pack_base_name + ".idx" 

1439 if sys.platform == "win32": 

1440 # Windows might have the target pack file lingering. Attempt 

1441 # removal, silently passing if the target does not exist. 

1442 with suppress(FileNotFoundError): 

1443 os.remove(target_pack_path) 

1444 os.rename(path, target_pack_path) 

1445 

1446 # Write the index. 

1447 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file: 

1448 write_pack_index( 

1449 index_file, entries, pack_sha, version=self.pack_index_version 

1450 ) 

1451 

1452 # Add the pack to the store and return it. 

1453 final_pack = Pack( 

1454 pack_base_name, 

1455 delta_window_size=self.pack_delta_window_size, 

1456 window_memory=self.pack_window_memory, 

1457 delta_cache_size=self.pack_delta_cache_size, 

1458 depth=self.pack_depth, 

1459 threads=self.pack_threads, 

1460 big_file_threshold=self.pack_big_file_threshold, 

1461 ) 

1462 final_pack.check_length_and_checksum() 

1463 self._add_cached_pack(pack_base_name, final_pack) 

1464 return final_pack 

1465 

1466 def add_thin_pack(self, read_all, read_some, progress=None): 

1467 """Add a new thin pack to this object store. 

1468 

1469 Thin packs are packs that contain deltas with parents that exist 

1470 outside the pack. They should never be placed in the object store 

1471 directly, and always indexed and completed as they are copied. 

1472 

1473 Args: 

1474 read_all: Read function that blocks until the number of 

1475 requested bytes are read. 

1476 read_some: Read function that returns at least one byte, but may 

1477 not return the number of bytes requested. 

1478 progress: Optional progress reporting function. 

1479 Returns: A Pack object pointing at the now-completed thin pack in the 

1480 objects/pack directory. 

1481 """ 

1482 import tempfile 

1483 

1484 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1485 with os.fdopen(fd, "w+b") as f: 

1486 os.chmod(path, PACK_MODE) 

1487 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) 

1488 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) 

1489 copier.verify(progress=progress) 

1490 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

1491 

1492 def add_pack(self): 

1493 """Add a new pack to this object store. 

1494 

1495 Returns: Fileobject to write to, a commit function to 

1496 call when the pack is finished and an abort 

1497 function. 

1498 """ 

1499 import tempfile 

1500 

1501 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

1502 f = os.fdopen(fd, "w+b") 

1503 os.chmod(path, PACK_MODE) 

1504 

1505 def commit(): 

1506 if f.tell() > 0: 

1507 f.seek(0) 

1508 

1509 with PackData(path, f) as pd: 

1510 indexer = PackIndexer.for_pack_data( 

1511 pd, resolve_ext_ref=self.get_raw 

1512 ) 

1513 return self._complete_pack(f, path, len(pd), indexer) 

1514 else: 

1515 f.close() 

1516 os.remove(path) 

1517 return None 

1518 

1519 def abort() -> None: 

1520 f.close() 

1521 os.remove(path) 

1522 

1523 return f, commit, abort 

1524 

1525 def add_object(self, obj) -> None: 

1526 """Add a single object to this object store. 

1527 

1528 Args: 

1529 obj: Object to add 

1530 """ 

1531 path = self._get_shafile_path(obj.id) 

1532 dir = os.path.dirname(path) 

1533 try: 

1534 os.mkdir(dir) 

1535 except FileExistsError: 

1536 pass 

1537 if os.path.exists(path): 

1538 return # Already there, no need to write again 

1539 with GitFile(path, "wb", mask=PACK_MODE) as f: 

1540 f.write( 

1541 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1542 ) 

1543 

1544 @classmethod 

1545 def init(cls, path: Union[str, os.PathLike]): 

1546 """Initialize a new disk object store. 

1547 

1548 Creates the necessary directory structure for a Git object store. 

1549 

1550 Args: 

1551 path: Path where the object store should be created 

1552 

1553 Returns: 

1554 New DiskObjectStore instance 

1555 """ 

1556 try: 

1557 os.mkdir(path) 

1558 except FileExistsError: 

1559 pass 

1560 os.mkdir(os.path.join(path, "info")) 

1561 os.mkdir(os.path.join(path, PACKDIR)) 

1562 return cls(path) 

1563 

1564 def iter_prefix(self, prefix): 

1565 """Iterate over all object SHAs with the given prefix. 

1566 

1567 Args: 

1568 prefix: Hex prefix to search for (as bytes) 

1569 

1570 Returns: 

1571 Iterator of object SHAs (as bytes) matching the prefix 

1572 """ 

1573 if len(prefix) < 2: 

1574 yield from super().iter_prefix(prefix) 

1575 return 

1576 seen = set() 

1577 dir = prefix[:2].decode() 

1578 rest = prefix[2:].decode() 

1579 try: 

1580 for name in os.listdir(os.path.join(self.path, dir)): 

1581 if name.startswith(rest): 

1582 sha = os.fsencode(dir + name) 

1583 if sha not in seen: 

1584 seen.add(sha) 

1585 yield sha 

1586 except FileNotFoundError: 

1587 pass 

1588 

1589 for p in self.packs: 

1590 bin_prefix = ( 

1591 binascii.unhexlify(prefix) 

1592 if len(prefix) % 2 == 0 

1593 else binascii.unhexlify(prefix[:-1]) 

1594 ) 

1595 for sha in p.index.iter_prefix(bin_prefix): 

1596 sha = sha_to_hex(sha) 

1597 if sha.startswith(prefix) and sha not in seen: 

1598 seen.add(sha) 

1599 yield sha 

1600 for alternate in self.alternates: 

1601 for sha in alternate.iter_prefix(prefix): 

1602 if sha not in seen: 

1603 seen.add(sha) 

1604 yield sha 

1605 

1606 def get_commit_graph(self): 

1607 """Get the commit graph for this object store. 

1608 

1609 Returns: 

1610 CommitGraph object if available, None otherwise 

1611 """ 

1612 if not self._use_commit_graph: 

1613 return None 

1614 

1615 if self._commit_graph is None: 

1616 from .commit_graph import read_commit_graph 

1617 

1618 # Look for commit graph in our objects directory 

1619 graph_file = os.path.join(self.path, "info", "commit-graph") 

1620 if os.path.exists(graph_file): 

1621 self._commit_graph = read_commit_graph(graph_file) 

1622 return self._commit_graph 

1623 

1624 def write_commit_graph(self, refs=None, reachable=True) -> None: 

1625 """Write a commit graph file for this object store. 

1626 

1627 Args: 

1628 refs: List of refs to include. If None, includes all refs from object store. 

1629 reachable: If True, includes all commits reachable from refs. 

1630 If False, only includes the direct ref targets. 

1631 """ 

1632 from .commit_graph import get_reachable_commits 

1633 

1634 if refs is None: 

1635 # Get all commit objects from the object store 

1636 all_refs = [] 

1637 # Iterate through all objects to find commits 

1638 for sha in self: 

1639 try: 

1640 obj = self[sha] 

1641 if obj.type_name == b"commit": 

1642 all_refs.append(sha) 

1643 except KeyError: 

1644 continue 

1645 else: 

1646 # Use provided refs 

1647 all_refs = refs 

1648 

1649 if not all_refs: 

1650 return # No commits to include 

1651 

1652 if reachable: 

1653 # Get all reachable commits 

1654 commit_ids = get_reachable_commits(self, all_refs) 

1655 else: 

1656 # Just use the direct ref targets - ensure they're hex ObjectIDs 

1657 commit_ids = [] 

1658 for ref in all_refs: 

1659 if isinstance(ref, bytes) and len(ref) == 40: 

1660 # Already hex ObjectID 

1661 commit_ids.append(ref) 

1662 elif isinstance(ref, bytes) and len(ref) == 20: 

1663 # Binary SHA, convert to hex ObjectID 

1664 from .objects import sha_to_hex 

1665 

1666 commit_ids.append(sha_to_hex(ref)) 

1667 else: 

1668 # Assume it's already correct format 

1669 commit_ids.append(ref) 

1670 

1671 if commit_ids: 

1672 # Write commit graph directly to our object store path 

1673 # Generate the commit graph 

1674 from .commit_graph import generate_commit_graph 

1675 

1676 graph = generate_commit_graph(self, commit_ids) 

1677 

1678 if graph.entries: 

1679 # Ensure the info directory exists 

1680 info_dir = os.path.join(self.path, "info") 

1681 os.makedirs(info_dir, exist_ok=True) 

1682 

1683 # Write using GitFile for atomic operation 

1684 graph_path = os.path.join(info_dir, "commit-graph") 

1685 with GitFile(graph_path, "wb") as f: 

1686 assert isinstance( 

1687 f, _GitFile 

1688 ) # GitFile in write mode always returns _GitFile 

1689 graph.write_to_file(f) 

1690 

1691 # Clear cached commit graph so it gets reloaded 

1692 self._commit_graph = None 

1693 

1694 def prune(self, grace_period: Optional[int] = None) -> None: 

1695 """Prune/clean up this object store. 

1696 

1697 This removes temporary files that were left behind by interrupted 

1698 pack operations. These are files that start with ``tmp_pack_`` in the 

1699 repository directory or files with .pack extension but no corresponding 

1700 .idx file in the pack directory. 

1701 

1702 Args: 

1703 grace_period: Grace period in seconds for removing temporary files. 

1704 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

1705 """ 

1706 import glob 

1707 

1708 if grace_period is None: 

1709 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

1710 

1711 # Clean up tmp_pack_* files in the repository directory 

1712 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

1713 # Check if file is old enough (more than grace period) 

1714 mtime = os.path.getmtime(tmp_file) 

1715 if time.time() - mtime > grace_period: 

1716 os.remove(tmp_file) 

1717 

1718 # Clean up orphaned .pack files without corresponding .idx files 

1719 try: 

1720 pack_dir_contents = os.listdir(self.pack_dir) 

1721 except FileNotFoundError: 

1722 return 

1723 

1724 pack_files = {} 

1725 idx_files = set() 

1726 

1727 for name in pack_dir_contents: 

1728 if name.endswith(".pack"): 

1729 base_name = name[:-5] # Remove .pack extension 

1730 pack_files[base_name] = name 

1731 elif name.endswith(".idx"): 

1732 base_name = name[:-4] # Remove .idx extension 

1733 idx_files.add(base_name) 

1734 

1735 # Remove .pack files without corresponding .idx files 

1736 for base_name, pack_name in pack_files.items(): 

1737 if base_name not in idx_files: 

1738 pack_path = os.path.join(self.pack_dir, pack_name) 

1739 # Check if file is old enough (more than grace period) 

1740 mtime = os.path.getmtime(pack_path) 

1741 if time.time() - mtime > grace_period: 

1742 os.remove(pack_path) 

1743 

1744 

1745class MemoryObjectStore(BaseObjectStore): 

1746 """Object store that keeps all objects in memory.""" 

1747 

1748 def __init__(self) -> None: 

1749 """Initialize a MemoryObjectStore. 

1750 

1751 Creates an empty in-memory object store. 

1752 """ 

1753 super().__init__() 

1754 self._data: dict[str, ShaFile] = {} 

1755 self.pack_compression_level = -1 

1756 

1757 def _to_hexsha(self, sha): 

1758 if len(sha) == 40: 

1759 return sha 

1760 elif len(sha) == 20: 

1761 return sha_to_hex(sha) 

1762 else: 

1763 raise ValueError(f"Invalid sha {sha!r}") 

1764 

1765 def contains_loose(self, sha): 

1766 """Check if a particular object is present by SHA1 and is loose.""" 

1767 return self._to_hexsha(sha) in self._data 

1768 

1769 def contains_packed(self, sha) -> bool: 

1770 """Check if a particular object is present by SHA1 and is packed.""" 

1771 return False 

1772 

1773 def __iter__(self): 

1774 """Iterate over the SHAs that are present in this store.""" 

1775 return iter(self._data.keys()) 

1776 

1777 @property 

1778 def packs(self): 

1779 """List with pack objects.""" 

1780 return [] 

1781 

1782 def get_raw(self, name: ObjectID): 

1783 """Obtain the raw text for an object. 

1784 

1785 Args: 

1786 name: sha for the object. 

1787 Returns: tuple with numeric type and object contents. 

1788 """ 

1789 obj = self[self._to_hexsha(name)] 

1790 return obj.type_num, obj.as_raw_string() 

1791 

1792 def __getitem__(self, name: ObjectID): 

1793 """Retrieve an object by SHA. 

1794 

1795 Args: 

1796 name: SHA of the object (as hex string or bytes) 

1797 

1798 Returns: 

1799 Copy of the ShaFile object 

1800 

1801 Raises: 

1802 KeyError: If the object is not found 

1803 """ 

1804 return self._data[self._to_hexsha(name)].copy() 

1805 

1806 def __delitem__(self, name: ObjectID) -> None: 

1807 """Delete an object from this store, for testing only.""" 

1808 del self._data[self._to_hexsha(name)] 

1809 

1810 def add_object(self, obj) -> None: 

1811 """Add a single object to this object store.""" 

1812 self._data[obj.id] = obj.copy() 

1813 

1814 def add_objects(self, objects, progress=None) -> None: 

1815 """Add a set of objects to this object store. 

1816 

1817 Args: 

1818 objects: Iterable over a list of (object, path) tuples 

1819 progress: Optional progress reporting function. 

1820 """ 

1821 for obj, path in objects: 

1822 self.add_object(obj) 

1823 

1824 def add_pack(self): 

1825 """Add a new pack to this object store. 

1826 

1827 Because this object store doesn't support packs, we extract and add the 

1828 individual objects. 

1829 

1830 Returns: Fileobject to write to and a commit function to 

1831 call when the pack is finished. 

1832 """ 

1833 from tempfile import SpooledTemporaryFile 

1834 

1835 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

1836 

1837 def commit() -> None: 

1838 size = f.tell() 

1839 if size > 0: 

1840 f.seek(0) 

1841 

1842 p = PackData.from_file(f, size) 

1843 for obj in PackInflater.for_pack_data(p, self.get_raw): 

1844 self.add_object(obj) 

1845 p.close() 

1846 f.close() 

1847 else: 

1848 f.close() 

1849 

1850 def abort() -> None: 

1851 f.close() 

1852 

1853 return f, commit, abort 

1854 

1855 def add_pack_data( 

1856 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

1857 ) -> None: 

1858 """Add pack data to this object store. 

1859 

1860 Args: 

1861 count: Number of items to add 

1862 unpacked_objects: Iterator of UnpackedObject instances 

1863 progress: Optional progress reporting function. 

1864 """ 

1865 if count == 0: 

1866 return 

1867 

1868 # Since MemoryObjectStore doesn't support pack files, we need to 

1869 # extract individual objects. To handle deltas properly, we write 

1870 # to a temporary pack and then use PackInflater to resolve them. 

1871 f, commit, abort = self.add_pack() 

1872 try: 

1873 write_pack_data( 

1874 f.write, 

1875 unpacked_objects, 

1876 num_records=count, 

1877 progress=progress, 

1878 ) 

1879 except BaseException: 

1880 abort() 

1881 raise 

1882 else: 

1883 commit() 

1884 

1885 def add_thin_pack(self, read_all, read_some, progress=None) -> None: 

1886 """Add a new thin pack to this object store. 

1887 

1888 Thin packs are packs that contain deltas with parents that exist 

1889 outside the pack. Because this object store doesn't support packs, we 

1890 extract and add the individual objects. 

1891 

1892 Args: 

1893 read_all: Read function that blocks until the number of 

1894 requested bytes are read. 

1895 read_some: Read function that returns at least one byte, but may 

1896 not return the number of bytes requested. 

1897 progress: Optional progress reporting function. 

1898 """ 

1899 f, commit, abort = self.add_pack() 

1900 try: 

1901 copier = PackStreamCopier(read_all, read_some, f) 

1902 copier.verify() 

1903 except BaseException: 

1904 abort() 

1905 raise 

1906 else: 

1907 commit() 

1908 

1909 

1910class ObjectIterator(Protocol): 

1911 """Interface for iterating over objects.""" 

1912 

1913 def iterobjects(self) -> Iterator[ShaFile]: 

1914 """Iterate over all objects. 

1915 

1916 Returns: 

1917 Iterator of ShaFile objects 

1918 """ 

1919 raise NotImplementedError(self.iterobjects) 

1920 

1921 

1922def tree_lookup_path(lookup_obj, root_sha, path): 

1923 """Look up an object in a Git tree. 

1924 

1925 Args: 

1926 lookup_obj: Callback for retrieving object by SHA1 

1927 root_sha: SHA1 of the root tree 

1928 path: Path to lookup 

1929 Returns: A tuple of (mode, SHA) of the resulting path. 

1930 """ 

1931 tree = lookup_obj(root_sha) 

1932 if not isinstance(tree, Tree): 

1933 raise NotTreeError(root_sha) 

1934 return tree.lookup_path(lookup_obj, path) 

1935 

1936 

1937def _collect_filetree_revs( 

1938 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

1939) -> None: 

1940 """Collect SHA1s of files and directories for specified tree. 

1941 

1942 Args: 

1943 obj_store: Object store to get objects by SHA from 

1944 tree_sha: tree reference to walk 

1945 kset: set to fill with references to files and directories 

1946 """ 

1947 filetree = obj_store[tree_sha] 

1948 assert isinstance(filetree, Tree) 

1949 for name, mode, sha in filetree.iteritems(): 

1950 if not S_ISGITLINK(mode) and sha not in kset: 

1951 kset.add(sha) 

1952 if stat.S_ISDIR(mode): 

1953 _collect_filetree_revs(obj_store, sha, kset) 

1954 

1955 

1956def _split_commits_and_tags( 

1957 obj_store: ObjectContainer, lst, *, ignore_unknown=False 

1958) -> tuple[set[bytes], set[bytes], set[bytes]]: 

1959 """Split object id list into three lists with commit, tag, and other SHAs. 

1960 

1961 Commits referenced by tags are included into commits 

1962 list as well. Only SHA1s known in this repository will get 

1963 through, and unless ignore_unknown argument is True, KeyError 

1964 is thrown for SHA1 missing in the repository 

1965 

1966 Args: 

1967 obj_store: Object store to get objects by SHA1 from 

1968 lst: Collection of commit and tag SHAs 

1969 ignore_unknown: True to skip SHA1 missing in the repository 

1970 silently. 

1971 Returns: A tuple of (commits, tags, others) SHA1s 

1972 """ 

1973 commits: set[bytes] = set() 

1974 tags: set[bytes] = set() 

1975 others: set[bytes] = set() 

1976 for e in lst: 

1977 try: 

1978 o = obj_store[e] 

1979 except KeyError: 

1980 if not ignore_unknown: 

1981 raise 

1982 else: 

1983 if isinstance(o, Commit): 

1984 commits.add(e) 

1985 elif isinstance(o, Tag): 

1986 tags.add(e) 

1987 tagged = o.object[1] 

1988 c, t, os = _split_commits_and_tags( 

1989 obj_store, [tagged], ignore_unknown=ignore_unknown 

1990 ) 

1991 commits |= c 

1992 tags |= t 

1993 others |= os 

1994 else: 

1995 others.add(e) 

1996 return (commits, tags, others) 

1997 

1998 

1999class MissingObjectFinder: 

2000 """Find the objects missing from another object store. 

2001 

2002 Args: 

2003 object_store: Object store containing at least all objects to be 

2004 sent 

2005 haves: SHA1s of commits not to send (already present in target) 

2006 wants: SHA1s of commits to send 

2007 progress: Optional function to report progress to. 

2008 get_tagged: Function that returns a dict of pointed-to sha -> tag 

2009 sha for including tags. 

2010 get_parents: Optional function for getting the parents of a commit. 

2011 """ 

2012 

2013 def __init__( 

2014 self, 

2015 object_store, 

2016 haves, 

2017 wants, 

2018 *, 

2019 shallow=None, 

2020 progress=None, 

2021 get_tagged=None, 

2022 get_parents=lambda commit: commit.parents, 

2023 ) -> None: 

2024 """Initialize a MissingObjectFinder. 

2025 

2026 Args: 

2027 object_store: Object store containing objects 

2028 haves: SHA1s of objects already present in target 

2029 wants: SHA1s of objects to send 

2030 shallow: Set of shallow commit SHA1s 

2031 progress: Optional progress reporting callback 

2032 get_tagged: Function returning dict of pointed-to sha -> tag sha 

2033 get_parents: Function for getting commit parents 

2034 """ 

2035 self.object_store = object_store 

2036 if shallow is None: 

2037 shallow = set() 

2038 self._get_parents = get_parents 

2039 # process Commits and Tags differently 

2040 # Note, while haves may list commits/tags not available locally, 

2041 # and such SHAs would get filtered out by _split_commits_and_tags, 

2042 # wants shall list only known SHAs, and otherwise 

2043 # _split_commits_and_tags fails with KeyError 

2044 have_commits, have_tags, have_others = _split_commits_and_tags( 

2045 object_store, haves, ignore_unknown=True 

2046 ) 

2047 want_commits, want_tags, want_others = _split_commits_and_tags( 

2048 object_store, wants, ignore_unknown=False 

2049 ) 

2050 # all_ancestors is a set of commits that shall not be sent 

2051 # (complete repository up to 'haves') 

2052 all_ancestors = _collect_ancestors( 

2053 object_store, have_commits, shallow=shallow, get_parents=self._get_parents 

2054 )[0] 

2055 # all_missing - complete set of commits between haves and wants 

2056 # common - commits from all_ancestors we hit into while 

2057 # traversing parent hierarchy of wants 

2058 missing_commits, common_commits = _collect_ancestors( 

2059 object_store, 

2060 want_commits, 

2061 all_ancestors, 

2062 shallow=shallow, 

2063 get_parents=self._get_parents, 

2064 ) 

2065 self.remote_has: set[bytes] = set() 

2066 # Now, fill sha_done with commits and revisions of 

2067 # files and directories known to be both locally 

2068 # and on target. Thus these commits and files 

2069 # won't get selected for fetch 

2070 for h in common_commits: 

2071 self.remote_has.add(h) 

2072 cmt = object_store[h] 

2073 _collect_filetree_revs(object_store, cmt.tree, self.remote_has) 

2074 # record tags we have as visited, too 

2075 for t in have_tags: 

2076 self.remote_has.add(t) 

2077 self.sha_done = set(self.remote_has) 

2078 

2079 # in fact, what we 'want' is commits, tags, and others 

2080 # we've found missing 

2081 self.objects_to_send: set[ 

2082 tuple[ObjectID, Optional[bytes], Optional[int], bool] 

2083 ] = {(w, None, Commit.type_num, False) for w in missing_commits} 

2084 missing_tags = want_tags.difference(have_tags) 

2085 self.objects_to_send.update( 

2086 {(w, None, Tag.type_num, False) for w in missing_tags} 

2087 ) 

2088 missing_others = want_others.difference(have_others) 

2089 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

2090 

2091 if progress is None: 

2092 self.progress = lambda x: None 

2093 else: 

2094 self.progress = progress 

2095 self._tagged = (get_tagged and get_tagged()) or {} 

2096 

2097 def get_remote_has(self): 

2098 """Get the set of SHAs the remote has. 

2099 

2100 Returns: 

2101 Set of SHA1s that the remote side already has 

2102 """ 

2103 return self.remote_has 

2104 

2105 def add_todo( 

2106 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]] 

2107 ) -> None: 

2108 """Add objects to the todo list. 

2109 

2110 Args: 

2111 entries: Iterable of tuples (sha, name, type_num, is_leaf) 

2112 """ 

2113 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

2114 

2115 def __next__(self) -> tuple[bytes, Optional[PackHint]]: 

2116 """Get the next object to send. 

2117 

2118 Returns: 

2119 Tuple of (sha, pack_hint) 

2120 

2121 Raises: 

2122 StopIteration: When no more objects to send 

2123 """ 

2124 while True: 

2125 if not self.objects_to_send: 

2126 self.progress( 

2127 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

2128 ) 

2129 raise StopIteration 

2130 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

2131 if sha not in self.sha_done: 

2132 break 

2133 if not leaf: 

2134 o = self.object_store[sha] 

2135 if isinstance(o, Commit): 

2136 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

2137 elif isinstance(o, Tree): 

2138 self.add_todo( 

2139 [ 

2140 ( 

2141 s, 

2142 n, 

2143 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

2144 not stat.S_ISDIR(m), 

2145 ) 

2146 for n, m, s in o.iteritems() 

2147 if not S_ISGITLINK(m) 

2148 ] 

2149 ) 

2150 elif isinstance(o, Tag): 

2151 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

2152 if sha in self._tagged: 

2153 self.add_todo([(self._tagged[sha], None, None, True)]) 

2154 self.sha_done.add(sha) 

2155 if len(self.sha_done) % 1000 == 0: 

2156 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

2157 if type_num is None: 

2158 pack_hint = None 

2159 else: 

2160 pack_hint = (type_num, name) 

2161 return (sha, pack_hint) 

2162 

2163 def __iter__(self): 

2164 """Return iterator over objects to send. 

2165 

2166 Returns: 

2167 Self (this class implements the iterator protocol) 

2168 """ 

2169 return self 

2170 

2171 

2172class ObjectStoreGraphWalker: 

2173 """Graph walker that finds what commits are missing from an object store.""" 

2174 

2175 heads: set[ObjectID] 

2176 """Revisions without descendants in the local repo.""" 

2177 

2178 get_parents: Callable[[ObjectID], list[ObjectID]] 

2179 """Function to retrieve parents in the local repo.""" 

2180 

2181 shallow: set[ObjectID] 

2182 

2183 def __init__( 

2184 self, 

2185 local_heads: Iterable[ObjectID], 

2186 get_parents, 

2187 shallow: Optional[set[ObjectID]] = None, 

2188 update_shallow=None, 

2189 ) -> None: 

2190 """Create a new instance. 

2191 

2192 Args: 

2193 local_heads: Heads to start search with 

2194 get_parents: Function for finding the parents of a SHA1. 

2195 shallow: Set of shallow commits. 

2196 update_shallow: Function to update shallow commits. 

2197 """ 

2198 self.heads = set(local_heads) 

2199 self.get_parents = get_parents 

2200 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {} 

2201 if shallow is None: 

2202 shallow = set() 

2203 self.shallow = shallow 

2204 self.update_shallow = update_shallow 

2205 

2206 def nak(self) -> None: 

2207 """Nothing in common was found.""" 

2208 

2209 def ack(self, sha: ObjectID) -> None: 

2210 """Ack that a revision and its ancestors are present in the source.""" 

2211 if len(sha) != 40: 

2212 raise ValueError(f"unexpected sha {sha!r} received") 

2213 ancestors = {sha} 

2214 

2215 # stop if we run out of heads to remove 

2216 while self.heads: 

2217 for a in ancestors: 

2218 if a in self.heads: 

2219 self.heads.remove(a) 

2220 

2221 # collect all ancestors 

2222 new_ancestors = set() 

2223 for a in ancestors: 

2224 ps = self.parents.get(a) 

2225 if ps is not None: 

2226 new_ancestors.update(ps) 

2227 self.parents[a] = None 

2228 

2229 # no more ancestors; stop 

2230 if not new_ancestors: 

2231 break 

2232 

2233 ancestors = new_ancestors 

2234 

2235 def next(self): 

2236 """Iterate over ancestors of heads in the target.""" 

2237 if self.heads: 

2238 ret = self.heads.pop() 

2239 try: 

2240 ps = self.get_parents(ret) 

2241 except KeyError: 

2242 return None 

2243 self.parents[ret] = ps 

2244 self.heads.update([p for p in ps if p not in self.parents]) 

2245 return ret 

2246 return None 

2247 

2248 __next__ = next 

2249 

2250 

2251def commit_tree_changes(object_store, tree, changes): 

2252 """Commit a specified set of changes to a tree structure. 

2253 

2254 This will apply a set of changes on top of an existing tree, storing new 

2255 objects in object_store. 

2256 

2257 changes are a list of tuples with (path, mode, object_sha). 

2258 Paths can be both blobs and trees. See the mode and 

2259 object sha to None deletes the path. 

2260 

2261 This method works especially well if there are only a small 

2262 number of changes to a big tree. For a large number of changes 

2263 to a large tree, use e.g. commit_tree. 

2264 

2265 Args: 

2266 object_store: Object store to store new objects in 

2267 and retrieve old ones from. 

2268 tree: Original tree root 

2269 changes: changes to apply 

2270 Returns: New tree root object 

2271 """ 

2272 # TODO(jelmer): Save up the objects and add them using .add_objects 

2273 # rather than with individual calls to .add_object. 

2274 nested_changes: dict[bytes, list[tuple[bytes, Optional[int], Optional[bytes]]]] = {} 

2275 for path, new_mode, new_sha in changes: 

2276 try: 

2277 (dirname, subpath) = path.split(b"/", 1) 

2278 except ValueError: 

2279 if new_sha is None: 

2280 del tree[path] 

2281 else: 

2282 tree[path] = (new_mode, new_sha) 

2283 else: 

2284 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

2285 for name, subchanges in nested_changes.items(): 

2286 try: 

2287 orig_subtree = object_store[tree[name][1]] 

2288 except KeyError: 

2289 orig_subtree = Tree() 

2290 subtree = commit_tree_changes(object_store, orig_subtree, subchanges) 

2291 if len(subtree) == 0: 

2292 del tree[name] 

2293 else: 

2294 tree[name] = (stat.S_IFDIR, subtree.id) 

2295 object_store.add_object(tree) 

2296 return tree 

2297 

2298 

2299class OverlayObjectStore(BaseObjectStore): 

2300 """Object store that can overlay multiple object stores.""" 

2301 

2302 def __init__(self, bases, add_store=None) -> None: 

2303 """Initialize an OverlayObjectStore. 

2304 

2305 Args: 

2306 bases: List of base object stores to overlay 

2307 add_store: Optional store to write new objects to 

2308 """ 

2309 self.bases = bases 

2310 self.add_store = add_store 

2311 

2312 def add_object(self, object): 

2313 """Add a single object to the store. 

2314 

2315 Args: 

2316 object: Object to add 

2317 

2318 Raises: 

2319 NotImplementedError: If no add_store was provided 

2320 """ 

2321 if self.add_store is None: 

2322 raise NotImplementedError(self.add_object) 

2323 return self.add_store.add_object(object) 

2324 

2325 def add_objects(self, objects, progress=None): 

2326 """Add multiple objects to the store. 

2327 

2328 Args: 

2329 objects: Iterator of objects to add 

2330 progress: Optional progress reporting callback 

2331 

2332 Raises: 

2333 NotImplementedError: If no add_store was provided 

2334 """ 

2335 if self.add_store is None: 

2336 raise NotImplementedError(self.add_object) 

2337 return self.add_store.add_objects(objects, progress) 

2338 

2339 @property 

2340 def packs(self): 

2341 """Get the list of packs from all overlaid stores. 

2342 

2343 Returns: 

2344 Combined list of packs from all base stores 

2345 """ 

2346 ret = [] 

2347 for b in self.bases: 

2348 ret.extend(b.packs) 

2349 return ret 

2350 

2351 def __iter__(self): 

2352 """Iterate over all object SHAs in the overlaid stores. 

2353 

2354 Returns: 

2355 Iterator of object SHAs (deduped across stores) 

2356 """ 

2357 done = set() 

2358 for b in self.bases: 

2359 for o_id in b: 

2360 if o_id not in done: 

2361 yield o_id 

2362 done.add(o_id) 

2363 

2364 def iterobjects_subset( 

2365 self, shas: Iterable[bytes], *, allow_missing: bool = False 

2366 ) -> Iterator[ShaFile]: 

2367 """Iterate over a subset of objects from the overlaid stores. 

2368 

2369 Args: 

2370 shas: Iterable of object SHAs to retrieve 

2371 allow_missing: If True, skip missing objects; if False, raise KeyError 

2372 

2373 Returns: 

2374 Iterator of ShaFile objects 

2375 

2376 Raises: 

2377 KeyError: If an object is missing and allow_missing is False 

2378 """ 

2379 todo = set(shas) 

2380 found: set[bytes] = set() 

2381 

2382 for b in self.bases: 

2383 # Create a copy of todo for each base to avoid modifying 

2384 # the set while iterating through it 

2385 current_todo = todo - found 

2386 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

2387 yield o 

2388 found.add(o.id) 

2389 

2390 # Check for any remaining objects not found 

2391 missing = todo - found 

2392 if missing and not allow_missing: 

2393 raise KeyError(next(iter(missing))) 

2394 

2395 def iter_unpacked_subset( 

2396 self, 

2397 shas: Iterable[bytes], 

2398 *, 

2399 include_comp=False, 

2400 allow_missing: bool = False, 

2401 convert_ofs_delta=True, 

2402 ) -> Iterator[ShaFile]: 

2403 """Iterate over unpacked objects from the overlaid stores. 

2404 

2405 Args: 

2406 shas: Iterable of object SHAs to retrieve 

2407 include_comp: Whether to include compressed data 

2408 allow_missing: If True, skip missing objects; if False, raise KeyError 

2409 convert_ofs_delta: Whether to convert OFS_DELTA objects 

2410 

2411 Returns: 

2412 Iterator of unpacked objects 

2413 

2414 Raises: 

2415 KeyError: If an object is missing and allow_missing is False 

2416 """ 

2417 todo = set(shas) 

2418 for b in self.bases: 

2419 for o in b.iter_unpacked_subset( 

2420 todo, 

2421 include_comp=include_comp, 

2422 allow_missing=True, 

2423 convert_ofs_delta=convert_ofs_delta, 

2424 ): 

2425 yield o 

2426 todo.remove(o.id) 

2427 if todo and not allow_missing: 

2428 raise KeyError(o.id) 

2429 

2430 def get_raw(self, sha_id): 

2431 """Get the raw object data from the overlaid stores. 

2432 

2433 Args: 

2434 sha_id: SHA of the object 

2435 

2436 Returns: 

2437 Tuple of (type_num, raw_data) 

2438 

2439 Raises: 

2440 KeyError: If object not found in any base store 

2441 """ 

2442 for b in self.bases: 

2443 try: 

2444 return b.get_raw(sha_id) 

2445 except KeyError: 

2446 pass 

2447 raise KeyError(sha_id) 

2448 

2449 def contains_packed(self, sha) -> bool: 

2450 """Check if an object is packed in any base store. 

2451 

2452 Args: 

2453 sha: SHA of the object 

2454 

2455 Returns: 

2456 True if object is packed in any base store 

2457 """ 

2458 for b in self.bases: 

2459 if b.contains_packed(sha): 

2460 return True 

2461 return False 

2462 

2463 def contains_loose(self, sha) -> bool: 

2464 """Check if an object is loose in any base store. 

2465 

2466 Args: 

2467 sha: SHA of the object 

2468 

2469 Returns: 

2470 True if object is loose in any base store 

2471 """ 

2472 for b in self.bases: 

2473 if b.contains_loose(sha): 

2474 return True 

2475 return False 

2476 

2477 

2478def read_packs_file(f): 

2479 """Yield the packs listed in a packs file.""" 

2480 for line in f.read().splitlines(): 

2481 if not line: 

2482 continue 

2483 (kind, name) = line.split(b" ", 1) 

2484 if kind != b"P": 

2485 continue 

2486 yield os.fsdecode(name) 

2487 

2488 

2489class BucketBasedObjectStore(PackBasedObjectStore): 

2490 """Object store implementation that uses a bucket store like S3 as backend.""" 

2491 

2492 def _iter_loose_objects(self): 

2493 """Iterate over the SHAs of all loose objects.""" 

2494 return iter([]) 

2495 

2496 def _get_loose_object(self, sha) -> None: 

2497 return None 

2498 

2499 def delete_loose_object(self, sha) -> None: 

2500 """Delete a loose object (no-op for bucket stores). 

2501 

2502 Bucket-based stores don't have loose objects, so this is a no-op. 

2503 

2504 Args: 

2505 sha: SHA of the object to delete 

2506 """ 

2507 # Doesn't exist.. 

2508 

2509 def pack_loose_objects(self) -> int: 

2510 """Pack loose objects. Returns number of objects packed. 

2511 

2512 BucketBasedObjectStore doesn't support loose objects, so this is a no-op. 

2513 """ 

2514 return 0 

2515 

2516 def _remove_pack_by_name(self, name: str) -> None: 

2517 """Remove a pack by name. Subclasses should implement this.""" 

2518 raise NotImplementedError(self._remove_pack_by_name) 

2519 

2520 def _iter_pack_names(self) -> Iterator[str]: 

2521 raise NotImplementedError(self._iter_pack_names) 

2522 

2523 def _get_pack(self, name) -> Pack: 

2524 raise NotImplementedError(self._get_pack) 

2525 

2526 def _update_pack_cache(self): 

2527 pack_files = set(self._iter_pack_names()) 

2528 

2529 # Open newly appeared pack files 

2530 new_packs = [] 

2531 for f in pack_files: 

2532 if f not in self._pack_cache: 

2533 pack = self._get_pack(f) 

2534 new_packs.append(pack) 

2535 self._pack_cache[f] = pack 

2536 # Remove disappeared pack files 

2537 for f in set(self._pack_cache) - pack_files: 

2538 self._pack_cache.pop(f).close() 

2539 return new_packs 

2540 

2541 def _upload_pack(self, basename, pack_file, index_file) -> None: 

2542 raise NotImplementedError 

2543 

2544 def add_pack(self): 

2545 """Add a new pack to this object store. 

2546 

2547 Returns: Fileobject to write to, a commit function to 

2548 call when the pack is finished and an abort 

2549 function. 

2550 """ 

2551 import tempfile 

2552 

2553 pf = tempfile.SpooledTemporaryFile( 

2554 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2555 ) 

2556 

2557 def commit(): 

2558 if pf.tell() == 0: 

2559 pf.close() 

2560 return None 

2561 

2562 pf.seek(0) 

2563 

2564 p = PackData(pf.name, pf) 

2565 entries = p.sorted_entries() 

2566 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

2567 idxf = tempfile.SpooledTemporaryFile( 

2568 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2569 ) 

2570 checksum = p.get_stored_checksum() 

2571 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

2572 idxf.seek(0) 

2573 idx = load_pack_index_file(basename + ".idx", idxf) 

2574 for pack in self.packs: 

2575 if pack.get_stored_checksum() == p.get_stored_checksum(): 

2576 p.close() 

2577 idx.close() 

2578 pf.close() 

2579 idxf.close() 

2580 return pack 

2581 pf.seek(0) 

2582 idxf.seek(0) 

2583 self._upload_pack(basename, pf, idxf) 

2584 final_pack = Pack.from_objects(p, idx) 

2585 self._add_cached_pack(basename, final_pack) 

2586 pf.close() 

2587 idxf.close() 

2588 return final_pack 

2589 

2590 return pf, commit, pf.close 

2591 

2592 

2593def _collect_ancestors( 

2594 store: ObjectContainer, 

2595 heads, 

2596 common: frozenset[ObjectID] = frozenset(), 

2597 shallow: frozenset[ObjectID] = frozenset(), 

2598 get_parents=lambda commit: commit.parents, 

2599): 

2600 """Collect all ancestors of heads up to (excluding) those in common. 

2601 

2602 Args: 

2603 store: Object store to get commits from 

2604 heads: commits to start from 

2605 common: commits to end at, or empty set to walk repository 

2606 completely 

2607 shallow: Set of shallow commits 

2608 get_parents: Optional function for getting the parents of a 

2609 commit. 

2610 Returns: a tuple (A, B) where A - all commits reachable 

2611 from heads but not present in common, B - common (shared) elements 

2612 that are directly reachable from heads 

2613 """ 

2614 bases = set() 

2615 commits = set() 

2616 queue = [] 

2617 queue.extend(heads) 

2618 

2619 # Try to use commit graph if available 

2620 commit_graph = store.get_commit_graph() 

2621 

2622 while queue: 

2623 e = queue.pop(0) 

2624 if e in common: 

2625 bases.add(e) 

2626 elif e not in commits: 

2627 commits.add(e) 

2628 if e in shallow: 

2629 continue 

2630 

2631 # Try to use commit graph for parent lookup 

2632 parents = None 

2633 if commit_graph: 

2634 parents = commit_graph.get_parents(e) 

2635 

2636 if parents is None: 

2637 # Fall back to loading the object 

2638 cmt = store[e] 

2639 parents = get_parents(cmt) 

2640 

2641 queue.extend(parents) 

2642 return (commits, bases) 

2643 

2644 

2645def iter_tree_contents( 

2646 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False 

2647): 

2648 """Iterate the contents of a tree and all subtrees. 

2649 

2650 Iteration is depth-first pre-order, as in e.g. os.walk. 

2651 

2652 Args: 

2653 store: Object store to get trees from 

2654 tree_id: SHA1 of the tree. 

2655 include_trees: If True, include tree objects in the iteration. 

2656 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

2657 tree. 

2658 """ 

2659 if tree_id is None: 

2660 return 

2661 # This could be fairly easily generalized to >2 trees if we find a use 

2662 # case. 

2663 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

2664 while todo: 

2665 entry = todo.pop() 

2666 if stat.S_ISDIR(entry.mode): 

2667 extra = [] 

2668 tree = store[entry.sha] 

2669 assert isinstance(tree, Tree) 

2670 for subentry in tree.iteritems(name_order=True): 

2671 extra.append(subentry.in_path(entry.path)) 

2672 todo.extend(reversed(extra)) 

2673 if not stat.S_ISDIR(entry.mode) or include_trees: 

2674 yield entry 

2675 

2676 

2677def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]: 

2678 """Peel all tags from a SHA. 

2679 

2680 Args: 

2681 store: Object store to get objects from 

2682 sha: The object SHA to peel. 

2683 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

2684 intermediate tags; if the original ref does not point to a tag, 

2685 this will equal the original SHA1. 

2686 """ 

2687 unpeeled = obj = store[sha] 

2688 obj_class = object_class(obj.type_name) 

2689 while obj_class is Tag: 

2690 assert isinstance(obj, Tag) 

2691 obj_class, sha = obj.object 

2692 obj = store[sha] 

2693 return unpeeled, obj