Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1137 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26import binascii 

27import os 

28import stat 

29import sys 

30import time 

31import warnings 

32from collections.abc import Iterable, Iterator, Sequence 

33from contextlib import suppress 

34from io import BytesIO 

35from typing import ( 

36 Callable, 

37 Optional, 

38 Protocol, 

39 Union, 

40 cast, 

41) 

42 

43from .errors import NotTreeError 

44from .file import GitFile, _GitFile 

45from .objects import ( 

46 S_ISGITLINK, 

47 ZERO_SHA, 

48 Blob, 

49 Commit, 

50 ObjectID, 

51 ShaFile, 

52 Tag, 

53 Tree, 

54 TreeEntry, 

55 hex_to_filename, 

56 hex_to_sha, 

57 object_class, 

58 sha_to_hex, 

59 valid_hexsha, 

60) 

61from .pack import ( 

62 PACK_SPOOL_FILE_MAX_SIZE, 

63 ObjectContainer, 

64 Pack, 

65 PackData, 

66 PackedObjectContainer, 

67 PackFileDisappeared, 

68 PackHint, 

69 PackIndexer, 

70 PackInflater, 

71 PackStreamCopier, 

72 UnpackedObject, 

73 extend_pack, 

74 full_unpacked_object, 

75 generate_unpacked_objects, 

76 iter_sha1, 

77 load_pack_index_file, 

78 pack_objects_to_data, 

79 write_pack_data, 

80 write_pack_index, 

81) 

82from .protocol import DEPTH_INFINITE 

83from .refs import PEELED_TAG_SUFFIX, Ref 

84 

85INFODIR = "info" 

86PACKDIR = "pack" 

87 

88# use permissions consistent with Git; just readable by everyone 

89# TODO: should packs also be non-writable on Windows? if so, that 

90# would requite some rather significant adjustments to the test suite 

91PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

92 

93# Grace period for cleaning up temporary pack files (in seconds) 

94# Matches git's default of 2 weeks 

95DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

96 

97 

98def find_shallow(store, heads, depth): 

99 """Find shallow commits according to a given depth. 

100 

101 Args: 

102 store: An ObjectStore for looking up objects. 

103 heads: Iterable of head SHAs to start walking from. 

104 depth: The depth of ancestors to include. A depth of one includes 

105 only the heads themselves. 

106 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

107 considered shallow and unshallow according to the arguments. Note that 

108 these sets may overlap if a commit is reachable along multiple paths. 

109 """ 

110 parents = {} 

111 commit_graph = store.get_commit_graph() 

112 

113 def get_parents(sha): 

114 result = parents.get(sha, None) 

115 if not result: 

116 # Try to use commit graph first if available 

117 if commit_graph: 

118 graph_parents = commit_graph.get_parents(sha) 

119 if graph_parents is not None: 

120 result = graph_parents 

121 parents[sha] = result 

122 return result 

123 # Fall back to loading the object 

124 result = store[sha].parents 

125 parents[sha] = result 

126 return result 

127 

128 todo = [] # stack of (sha, depth) 

129 for head_sha in heads: 

130 obj = store[head_sha] 

131 # Peel tags if necessary 

132 while isinstance(obj, Tag): 

133 _, sha = obj.object 

134 obj = store[sha] 

135 if isinstance(obj, Commit): 

136 todo.append((obj.id, 1)) 

137 

138 not_shallow = set() 

139 shallow = set() 

140 while todo: 

141 sha, cur_depth = todo.pop() 

142 if cur_depth < depth: 

143 not_shallow.add(sha) 

144 new_depth = cur_depth + 1 

145 todo.extend((p, new_depth) for p in get_parents(sha)) 

146 else: 

147 shallow.add(sha) 

148 

149 return shallow, not_shallow 

150 

151 

152def get_depth( 

153 store, 

154 head, 

155 get_parents=lambda commit: commit.parents, 

156 max_depth=None, 

157): 

158 """Return the current available depth for the given head. 

159 For commits with multiple parents, the largest possible depth will be 

160 returned. 

161 

162 Args: 

163 head: commit to start from 

164 get_parents: optional function for getting the parents of a commit 

165 max_depth: maximum depth to search 

166 """ 

167 if head not in store: 

168 return 0 

169 current_depth = 1 

170 queue = [(head, current_depth)] 

171 commit_graph = store.get_commit_graph() 

172 

173 while queue and (max_depth is None or current_depth < max_depth): 

174 e, depth = queue.pop(0) 

175 current_depth = max(current_depth, depth) 

176 

177 # Try to use commit graph for parent lookup if available 

178 parents = None 

179 if commit_graph: 

180 parents = commit_graph.get_parents(e) 

181 

182 if parents is None: 

183 # Fall back to loading the object 

184 cmt = store[e] 

185 if isinstance(cmt, Tag): 

186 _cls, sha = cmt.object 

187 cmt = store[sha] 

188 parents = get_parents(cmt) 

189 

190 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

191 return current_depth 

192 

193 

194class PackContainer(Protocol): 

195 """Protocol for containers that can accept pack files.""" 

196 

197 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

198 """Add a new pack.""" 

199 

200 

201class BaseObjectStore: 

202 """Object store interface.""" 

203 

204 def determine_wants_all( 

205 self, refs: dict[Ref, ObjectID], depth: Optional[int] = None 

206 ) -> list[ObjectID]: 

207 """Determine all objects that are wanted by the client. 

208 

209 Args: 

210 refs: Dictionary mapping ref names to object IDs 

211 depth: Shallow fetch depth (None for full fetch) 

212 

213 Returns: 

214 List of object IDs that are wanted 

215 """ 

216 

217 def _want_deepen(sha): 

218 if not depth: 

219 return False 

220 if depth == DEPTH_INFINITE: 

221 return True 

222 return depth > self._get_depth(sha) 

223 

224 return [ 

225 sha 

226 for (ref, sha) in refs.items() 

227 if (sha not in self or _want_deepen(sha)) 

228 and not ref.endswith(PEELED_TAG_SUFFIX) 

229 and not sha == ZERO_SHA 

230 ] 

231 

232 def contains_loose(self, sha) -> bool: 

233 """Check if a particular object is present by SHA1 and is loose.""" 

234 raise NotImplementedError(self.contains_loose) 

235 

236 def __contains__(self, sha1: bytes) -> bool: 

237 """Check if a particular object is present by SHA1. 

238 

239 This method makes no distinction between loose and packed objects. 

240 """ 

241 return self.contains_loose(sha1) 

242 

243 @property 

244 def packs(self): 

245 """Iterable of pack objects.""" 

246 raise NotImplementedError 

247 

248 def get_raw(self, name) -> tuple[int, bytes]: 

249 """Obtain the raw text for an object. 

250 

251 Args: 

252 name: sha for the object. 

253 Returns: tuple with numeric type and object contents. 

254 """ 

255 raise NotImplementedError(self.get_raw) 

256 

257 def __getitem__(self, sha1: ObjectID) -> ShaFile: 

258 """Obtain an object by SHA1.""" 

259 type_num, uncomp = self.get_raw(sha1) 

260 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

261 

262 def __iter__(self): 

263 """Iterate over the SHAs that are present in this store.""" 

264 raise NotImplementedError(self.__iter__) 

265 

266 def add_object(self, obj) -> None: 

267 """Add a single object to this object store.""" 

268 raise NotImplementedError(self.add_object) 

269 

270 def add_objects(self, objects, progress=None) -> None: 

271 """Add a set of objects to this object store. 

272 

273 Args: 

274 objects: Iterable over a list of (object, path) tuples 

275 """ 

276 raise NotImplementedError(self.add_objects) 

277 

278 def tree_changes( 

279 self, 

280 source, 

281 target, 

282 want_unchanged=False, 

283 include_trees=False, 

284 change_type_same=False, 

285 rename_detector=None, 

286 paths=None, 

287 ): 

288 """Find the differences between the contents of two trees. 

289 

290 Args: 

291 source: SHA1 of the source tree 

292 target: SHA1 of the target tree 

293 want_unchanged: Whether unchanged files should be reported 

294 include_trees: Whether to include trees 

295 change_type_same: Whether to report files changing 

296 type in the same entry. 

297 rename_detector: RenameDetector object for detecting renames. 

298 paths: Optional list of paths to filter to (as bytes). 

299 Returns: Iterator over tuples with 

300 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

301 """ 

302 from .diff_tree import tree_changes 

303 

304 for change in tree_changes( 

305 self, 

306 source, 

307 target, 

308 want_unchanged=want_unchanged, 

309 include_trees=include_trees, 

310 change_type_same=change_type_same, 

311 rename_detector=rename_detector, 

312 paths=paths, 

313 ): 

314 yield ( 

315 (change.old.path, change.new.path), 

316 (change.old.mode, change.new.mode), 

317 (change.old.sha, change.new.sha), 

318 ) 

319 

320 def iter_tree_contents(self, tree_id, include_trees=False): 

321 """Iterate the contents of a tree and all subtrees. 

322 

323 Iteration is depth-first pre-order, as in e.g. os.walk. 

324 

325 Args: 

326 tree_id: SHA1 of the tree. 

327 include_trees: If True, include tree objects in the iteration. 

328 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

329 tree. 

330 """ 

331 warnings.warn( 

332 "Please use dulwich.object_store.iter_tree_contents", 

333 DeprecationWarning, 

334 stacklevel=2, 

335 ) 

336 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

337 

338 def iterobjects_subset( 

339 self, shas: Iterable[bytes], *, allow_missing: bool = False 

340 ) -> Iterator[ShaFile]: 

341 """Iterate over a subset of objects in the store. 

342 

343 Args: 

344 shas: Iterable of object SHAs to retrieve 

345 allow_missing: If True, skip missing objects; if False, raise KeyError 

346 

347 Returns: 

348 Iterator of ShaFile objects 

349 

350 Raises: 

351 KeyError: If an object is missing and allow_missing is False 

352 """ 

353 for sha in shas: 

354 try: 

355 yield self[sha] 

356 except KeyError: 

357 if not allow_missing: 

358 raise 

359 

360 def find_missing_objects( 

361 self, 

362 haves, 

363 wants, 

364 shallow=None, 

365 progress=None, 

366 get_tagged=None, 

367 get_parents=lambda commit: commit.parents, 

368 ): 

369 """Find the missing objects required for a set of revisions. 

370 

371 Args: 

372 haves: Iterable over SHAs already in common. 

373 wants: Iterable over SHAs of objects to fetch. 

374 shallow: Set of shallow commit SHA1s to skip 

375 progress: Simple progress function that will be called with 

376 updated progress strings. 

377 get_tagged: Function that returns a dict of pointed-to sha -> 

378 tag sha for including tags. 

379 get_parents: Optional function for getting the parents of a 

380 commit. 

381 Returns: Iterator over (sha, path) pairs. 

382 """ 

383 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

384 finder = MissingObjectFinder( 

385 self, 

386 haves=haves, 

387 wants=wants, 

388 shallow=shallow, 

389 progress=progress, 

390 get_tagged=get_tagged, 

391 get_parents=get_parents, 

392 ) 

393 return iter(finder) 

394 

395 def find_common_revisions(self, graphwalker): 

396 """Find which revisions this store has in common using graphwalker. 

397 

398 Args: 

399 graphwalker: A graphwalker object. 

400 Returns: List of SHAs that are in common 

401 """ 

402 haves = [] 

403 sha = next(graphwalker) 

404 while sha: 

405 if sha in self: 

406 haves.append(sha) 

407 graphwalker.ack(sha) 

408 sha = next(graphwalker) 

409 return haves 

410 

411 def generate_pack_data( 

412 self, have, want, shallow=None, progress=None, ofs_delta=True 

413 ) -> tuple[int, Iterator[UnpackedObject]]: 

414 """Generate pack data objects for a set of wants/haves. 

415 

416 Args: 

417 have: List of SHA1s of objects that should not be sent 

418 want: List of SHA1s of objects that should be sent 

419 shallow: Set of shallow commit SHA1s to skip 

420 ofs_delta: Whether OFS deltas can be included 

421 progress: Optional progress reporting method 

422 """ 

423 # Note that the pack-specific implementation below is more efficient, 

424 # as it reuses deltas 

425 missing_objects = MissingObjectFinder( 

426 self, haves=have, wants=want, shallow=shallow, progress=progress 

427 ) 

428 object_ids = list(missing_objects) 

429 return pack_objects_to_data( 

430 [(self[oid], path) for oid, path in object_ids], 

431 ofs_delta=ofs_delta, 

432 progress=progress, 

433 ) 

434 

435 def peel_sha(self, sha): 

436 """Peel all tags from a SHA. 

437 

438 Args: 

439 sha: The object SHA to peel. 

440 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

441 intermediate tags; if the original ref does not point to a tag, 

442 this will equal the original SHA1. 

443 """ 

444 warnings.warn( 

445 "Please use dulwich.object_store.peel_sha()", 

446 DeprecationWarning, 

447 stacklevel=2, 

448 ) 

449 return peel_sha(self, sha)[1] 

450 

451 def _get_depth( 

452 self, 

453 head, 

454 get_parents=lambda commit: commit.parents, 

455 max_depth=None, 

456 ): 

457 """Return the current available depth for the given head. 

458 For commits with multiple parents, the largest possible depth will be 

459 returned. 

460 

461 Args: 

462 head: commit to start from 

463 get_parents: optional function for getting the parents of a commit 

464 max_depth: maximum depth to search 

465 """ 

466 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

467 

468 def close(self) -> None: 

469 """Close any files opened by this object store.""" 

470 # Default implementation is a NO-OP 

471 

472 def prune(self, grace_period: Optional[int] = None) -> None: 

473 """Prune/clean up this object store. 

474 

475 This includes removing orphaned temporary files and other 

476 housekeeping tasks. Default implementation is a NO-OP. 

477 

478 Args: 

479 grace_period: Grace period in seconds for removing temporary files. 

480 If None, uses the default grace period. 

481 """ 

482 # Default implementation is a NO-OP 

483 

484 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

485 """Iterate over all SHA1s that start with a given prefix. 

486 

487 The default implementation is a naive iteration over all objects. 

488 However, subclasses may override this method with more efficient 

489 implementations. 

490 """ 

491 for sha in self: 

492 if sha.startswith(prefix): 

493 yield sha 

494 

495 def get_commit_graph(self): 

496 """Get the commit graph for this object store. 

497 

498 Returns: 

499 CommitGraph object if available, None otherwise 

500 """ 

501 return None 

502 

503 def write_commit_graph(self, refs=None, reachable=True) -> None: 

504 """Write a commit graph file for this object store. 

505 

506 Args: 

507 refs: List of refs to include. If None, includes all refs from object store. 

508 reachable: If True, includes all commits reachable from refs. 

509 If False, only includes the direct ref targets. 

510 

511 Note: 

512 Default implementation does nothing. Subclasses should override 

513 this method to provide commit graph writing functionality. 

514 """ 

515 raise NotImplementedError(self.write_commit_graph) 

516 

517 def get_object_mtime(self, sha): 

518 """Get the modification time of an object. 

519 

520 Args: 

521 sha: SHA1 of the object 

522 

523 Returns: 

524 Modification time as seconds since epoch 

525 

526 Raises: 

527 KeyError: if the object is not found 

528 """ 

529 # Default implementation raises KeyError 

530 # Subclasses should override to provide actual mtime 

531 raise KeyError(sha) 

532 

533 

534class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer): 

535 """Object store that uses pack files for storage. 

536 

537 This class provides a base implementation for object stores that use 

538 Git pack files as their primary storage mechanism. It handles caching 

539 of open pack files and provides configuration for pack file operations. 

540 """ 

541 

542 def __init__( 

543 self, 

544 pack_compression_level=-1, 

545 pack_index_version=None, 

546 pack_delta_window_size=None, 

547 pack_window_memory=None, 

548 pack_delta_cache_size=None, 

549 pack_depth=None, 

550 pack_threads=None, 

551 pack_big_file_threshold=None, 

552 ) -> None: 

553 """Initialize a PackBasedObjectStore. 

554 

555 Args: 

556 pack_compression_level: Compression level for pack files (-1 to 9) 

557 pack_index_version: Pack index version to use 

558 pack_delta_window_size: Window size for delta compression 

559 pack_window_memory: Maximum memory to use for delta window 

560 pack_delta_cache_size: Cache size for delta operations 

561 pack_depth: Maximum depth for pack deltas 

562 pack_threads: Number of threads to use for packing 

563 pack_big_file_threshold: Threshold for treating files as "big" 

564 """ 

565 self._pack_cache: dict[str, Pack] = {} 

566 self.pack_compression_level = pack_compression_level 

567 self.pack_index_version = pack_index_version 

568 self.pack_delta_window_size = pack_delta_window_size 

569 self.pack_window_memory = pack_window_memory 

570 self.pack_delta_cache_size = pack_delta_cache_size 

571 self.pack_depth = pack_depth 

572 self.pack_threads = pack_threads 

573 self.pack_big_file_threshold = pack_big_file_threshold 

574 

575 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

576 """Add a new pack to this object store.""" 

577 raise NotImplementedError(self.add_pack) 

578 

579 def add_pack_data( 

580 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

581 ) -> None: 

582 """Add pack data to this object store. 

583 

584 Args: 

585 count: Number of items to add 

586 """ 

587 if count == 0: 

588 # Don't bother writing an empty pack file 

589 return 

590 f, commit, abort = self.add_pack() 

591 try: 

592 write_pack_data( 

593 f.write, 

594 unpacked_objects, 

595 num_records=count, 

596 progress=progress, 

597 compression_level=self.pack_compression_level, 

598 ) 

599 except BaseException: 

600 abort() 

601 raise 

602 else: 

603 return commit() 

604 

605 @property 

606 def alternates(self): 

607 """Get the list of alternate object stores. 

608 

609 Returns: 

610 List of alternate BaseObjectStore instances 

611 """ 

612 return [] 

613 

614 def contains_packed(self, sha) -> bool: 

615 """Check if a particular object is present by SHA1 and is packed. 

616 

617 This does not check alternates. 

618 """ 

619 for pack in self.packs: 

620 try: 

621 if sha in pack: 

622 return True 

623 except PackFileDisappeared: 

624 pass 

625 return False 

626 

627 def __contains__(self, sha) -> bool: 

628 """Check if a particular object is present by SHA1. 

629 

630 This method makes no distinction between loose and packed objects. 

631 """ 

632 if self.contains_packed(sha) or self.contains_loose(sha): 

633 return True 

634 for alternate in self.alternates: 

635 if sha in alternate: 

636 return True 

637 return False 

638 

639 def _add_cached_pack(self, base_name, pack) -> None: 

640 """Add a newly appeared pack to the cache by path.""" 

641 prev_pack = self._pack_cache.get(base_name) 

642 if prev_pack is not pack: 

643 self._pack_cache[base_name] = pack 

644 if prev_pack: 

645 prev_pack.close() 

646 

647 def generate_pack_data( 

648 self, have, want, shallow=None, progress=None, ofs_delta=True 

649 ) -> tuple[int, Iterator[UnpackedObject]]: 

650 """Generate pack data objects for a set of wants/haves. 

651 

652 Args: 

653 have: List of SHA1s of objects that should not be sent 

654 want: List of SHA1s of objects that should be sent 

655 shallow: Set of shallow commit SHA1s to skip 

656 ofs_delta: Whether OFS deltas can be included 

657 progress: Optional progress reporting method 

658 """ 

659 missing_objects = MissingObjectFinder( 

660 self, haves=have, wants=want, shallow=shallow, progress=progress 

661 ) 

662 remote_has = missing_objects.get_remote_has() 

663 object_ids = list(missing_objects) 

664 return len(object_ids), generate_unpacked_objects( 

665 cast(PackedObjectContainer, self), 

666 object_ids, 

667 progress=progress, 

668 ofs_delta=ofs_delta, 

669 other_haves=remote_has, 

670 ) 

671 

672 def _clear_cached_packs(self) -> None: 

673 pack_cache = self._pack_cache 

674 self._pack_cache = {} 

675 while pack_cache: 

676 (name, pack) = pack_cache.popitem() 

677 pack.close() 

678 

679 def _iter_cached_packs(self): 

680 return self._pack_cache.values() 

681 

682 def _update_pack_cache(self) -> list[Pack]: 

683 raise NotImplementedError(self._update_pack_cache) 

684 

685 def close(self) -> None: 

686 """Close the object store and release resources. 

687 

688 This method closes all cached pack files and frees associated resources. 

689 """ 

690 self._clear_cached_packs() 

691 

692 @property 

693 def packs(self): 

694 """List with pack objects.""" 

695 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

696 

697 def count_pack_files(self) -> int: 

698 """Count the number of pack files. 

699 

700 Returns: 

701 Number of pack files (excluding those with .keep files) 

702 """ 

703 count = 0 

704 for pack in self.packs: 

705 # Check if there's a .keep file for this pack 

706 keep_path = pack._basename + ".keep" 

707 if not os.path.exists(keep_path): 

708 count += 1 

709 return count 

710 

711 def _iter_alternate_objects(self): 

712 """Iterate over the SHAs of all the objects in alternate stores.""" 

713 for alternate in self.alternates: 

714 yield from alternate 

715 

716 def _iter_loose_objects(self): 

717 """Iterate over the SHAs of all loose objects.""" 

718 raise NotImplementedError(self._iter_loose_objects) 

719 

720 def _get_loose_object(self, sha) -> Optional[ShaFile]: 

721 raise NotImplementedError(self._get_loose_object) 

722 

723 def delete_loose_object(self, sha) -> None: 

724 """Delete a loose object. 

725 

726 This method only handles loose objects. For packed objects, 

727 use repack(exclude=...) to exclude them during repacking. 

728 """ 

729 raise NotImplementedError(self.delete_loose_object) 

730 

731 def _remove_pack(self, name) -> None: 

732 raise NotImplementedError(self._remove_pack) 

733 

734 def pack_loose_objects(self): 

735 """Pack loose objects. 

736 

737 Returns: Number of objects packed 

738 """ 

739 objects = set() 

740 for sha in self._iter_loose_objects(): 

741 objects.add((self._get_loose_object(sha), None)) 

742 self.add_objects(list(objects)) 

743 for obj, path in objects: 

744 self.delete_loose_object(obj.id) 

745 return len(objects) 

746 

747 def repack(self, exclude=None): 

748 """Repack the packs in this repository. 

749 

750 Note that this implementation is fairly naive and currently keeps all 

751 objects in memory while it repacks. 

752 

753 Args: 

754 exclude: Optional set of object SHAs to exclude from repacking 

755 """ 

756 if exclude is None: 

757 exclude = set() 

758 

759 loose_objects = set() 

760 excluded_loose_objects = set() 

761 for sha in self._iter_loose_objects(): 

762 if sha not in exclude: 

763 loose_objects.add(self._get_loose_object(sha)) 

764 else: 

765 excluded_loose_objects.add(sha) 

766 

767 objects = {(obj, None) for obj in loose_objects} 

768 old_packs = {p.name(): p for p in self.packs} 

769 for name, pack in old_packs.items(): 

770 objects.update( 

771 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

772 ) 

773 

774 # Only create a new pack if there are objects to pack 

775 if objects: 

776 # The name of the consolidated pack might match the name of a 

777 # pre-existing pack. Take care not to remove the newly created 

778 # consolidated pack. 

779 consolidated = self.add_objects(objects) 

780 old_packs.pop(consolidated.name(), None) 

781 

782 # Delete loose objects that were packed 

783 for obj in loose_objects: 

784 self.delete_loose_object(obj.id) 

785 # Delete excluded loose objects 

786 for sha in excluded_loose_objects: 

787 self.delete_loose_object(sha) 

788 for name, pack in old_packs.items(): 

789 self._remove_pack(pack) 

790 self._update_pack_cache() 

791 return len(objects) 

792 

793 def __iter__(self): 

794 """Iterate over the SHAs that are present in this store.""" 

795 self._update_pack_cache() 

796 for pack in self._iter_cached_packs(): 

797 try: 

798 yield from pack 

799 except PackFileDisappeared: 

800 pass 

801 yield from self._iter_loose_objects() 

802 yield from self._iter_alternate_objects() 

803 

804 def contains_loose(self, sha): 

805 """Check if a particular object is present by SHA1 and is loose. 

806 

807 This does not check alternates. 

808 """ 

809 return self._get_loose_object(sha) is not None 

810 

811 def get_raw(self, name): 

812 """Obtain the raw fulltext for an object. 

813 

814 Args: 

815 name: sha for the object. 

816 Returns: tuple with numeric type and object contents. 

817 """ 

818 if name == ZERO_SHA: 

819 raise KeyError(name) 

820 if len(name) == 40: 

821 sha = hex_to_sha(name) 

822 hexsha = name 

823 elif len(name) == 20: 

824 sha = name 

825 hexsha = None 

826 else: 

827 raise AssertionError(f"Invalid object name {name!r}") 

828 for pack in self._iter_cached_packs(): 

829 try: 

830 return pack.get_raw(sha) 

831 except (KeyError, PackFileDisappeared): 

832 pass 

833 if hexsha is None: 

834 hexsha = sha_to_hex(name) 

835 ret = self._get_loose_object(hexsha) 

836 if ret is not None: 

837 return ret.type_num, ret.as_raw_string() 

838 # Maybe something else has added a pack with the object 

839 # in the mean time? 

840 for pack in self._update_pack_cache(): 

841 try: 

842 return pack.get_raw(sha) 

843 except KeyError: 

844 pass 

845 for alternate in self.alternates: 

846 try: 

847 return alternate.get_raw(hexsha) 

848 except KeyError: 

849 pass 

850 raise KeyError(hexsha) 

851 

852 def iter_unpacked_subset( 

853 self, 

854 shas: set[bytes], 

855 include_comp: bool = False, 

856 allow_missing: bool = False, 

857 convert_ofs_delta: bool = True, 

858 ) -> Iterator[UnpackedObject]: 

859 """Iterate over a subset of objects, yielding UnpackedObject instances. 

860 

861 Args: 

862 shas: Set of object SHAs to retrieve 

863 include_comp: Whether to include compressed data 

864 allow_missing: If True, skip missing objects; if False, raise KeyError 

865 convert_ofs_delta: Whether to convert OFS_DELTA objects 

866 

867 Returns: 

868 Iterator of UnpackedObject instances 

869 

870 Raises: 

871 KeyError: If an object is missing and allow_missing is False 

872 """ 

873 todo: set[bytes] = set(shas) 

874 for p in self._iter_cached_packs(): 

875 for unpacked in p.iter_unpacked_subset( 

876 todo, 

877 include_comp=include_comp, 

878 allow_missing=True, 

879 convert_ofs_delta=convert_ofs_delta, 

880 ): 

881 yield unpacked 

882 hexsha = sha_to_hex(unpacked.sha()) 

883 todo.remove(hexsha) 

884 # Maybe something else has added a pack with the object 

885 # in the mean time? 

886 for p in self._update_pack_cache(): 

887 for unpacked in p.iter_unpacked_subset( 

888 todo, 

889 include_comp=include_comp, 

890 allow_missing=True, 

891 convert_ofs_delta=convert_ofs_delta, 

892 ): 

893 yield unpacked 

894 hexsha = sha_to_hex(unpacked.sha()) 

895 todo.remove(hexsha) 

896 for alternate in self.alternates: 

897 for unpacked in alternate.iter_unpacked_subset( 

898 todo, 

899 include_comp=include_comp, 

900 allow_missing=True, 

901 convert_ofs_delta=convert_ofs_delta, 

902 ): 

903 yield unpacked 

904 hexsha = sha_to_hex(unpacked.sha()) 

905 todo.remove(hexsha) 

906 

907 def iterobjects_subset( 

908 self, shas: Iterable[bytes], *, allow_missing: bool = False 

909 ) -> Iterator[ShaFile]: 

910 """Iterate over a subset of objects in the store. 

911 

912 This method searches for objects in pack files, alternates, and loose storage. 

913 

914 Args: 

915 shas: Iterable of object SHAs to retrieve 

916 allow_missing: If True, skip missing objects; if False, raise KeyError 

917 

918 Returns: 

919 Iterator of ShaFile objects 

920 

921 Raises: 

922 KeyError: If an object is missing and allow_missing is False 

923 """ 

924 todo: set[bytes] = set(shas) 

925 for p in self._iter_cached_packs(): 

926 for o in p.iterobjects_subset(todo, allow_missing=True): 

927 yield o 

928 todo.remove(o.id) 

929 # Maybe something else has added a pack with the object 

930 # in the mean time? 

931 for p in self._update_pack_cache(): 

932 for o in p.iterobjects_subset(todo, allow_missing=True): 

933 yield o 

934 todo.remove(o.id) 

935 for alternate in self.alternates: 

936 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

937 yield o 

938 todo.remove(o.id) 

939 for oid in todo: 

940 o = self._get_loose_object(oid) 

941 if o is not None: 

942 yield o 

943 elif not allow_missing: 

944 raise KeyError(oid) 

945 

946 def get_unpacked_object( 

947 self, sha1: bytes, *, include_comp: bool = False 

948 ) -> UnpackedObject: 

949 """Obtain the unpacked object. 

950 

951 Args: 

952 sha1: sha for the object. 

953 """ 

954 if sha1 == ZERO_SHA: 

955 raise KeyError(sha1) 

956 if len(sha1) == 40: 

957 sha = hex_to_sha(sha1) 

958 hexsha = sha1 

959 elif len(sha1) == 20: 

960 sha = sha1 

961 hexsha = None 

962 else: 

963 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

964 for pack in self._iter_cached_packs(): 

965 try: 

966 return pack.get_unpacked_object(sha, include_comp=include_comp) 

967 except (KeyError, PackFileDisappeared): 

968 pass 

969 if hexsha is None: 

970 hexsha = sha_to_hex(sha1) 

971 # Maybe something else has added a pack with the object 

972 # in the mean time? 

973 for pack in self._update_pack_cache(): 

974 try: 

975 return pack.get_unpacked_object(sha, include_comp=include_comp) 

976 except KeyError: 

977 pass 

978 for alternate in self.alternates: 

979 try: 

980 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

981 except KeyError: 

982 pass 

983 raise KeyError(hexsha) 

984 

985 def add_objects( 

986 self, 

987 objects: Sequence[tuple[ShaFile, Optional[str]]], 

988 progress: Optional[Callable[[str], None]] = None, 

989 ) -> None: 

990 """Add a set of objects to this object store. 

991 

992 Args: 

993 objects: Iterable over (object, path) tuples, should support 

994 __len__. 

995 Returns: Pack object of the objects written. 

996 """ 

997 count = len(objects) 

998 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

999 return self.add_pack_data(count, record_iter, progress=progress) 

1000 

1001 

1002class DiskObjectStore(PackBasedObjectStore): 

1003 """Git-style object store that exists on disk.""" 

1004 

1005 path: Union[str, os.PathLike] 

1006 pack_dir: Union[str, os.PathLike] 

1007 

1008 def __init__( 

1009 self, 

1010 path: Union[str, os.PathLike], 

1011 loose_compression_level=-1, 

1012 pack_compression_level=-1, 

1013 pack_index_version=None, 

1014 pack_delta_window_size=None, 

1015 pack_window_memory=None, 

1016 pack_delta_cache_size=None, 

1017 pack_depth=None, 

1018 pack_threads=None, 

1019 pack_big_file_threshold=None, 

1020 ) -> None: 

1021 """Open an object store. 

1022 

1023 Args: 

1024 path: Path of the object store. 

1025 loose_compression_level: zlib compression level for loose objects 

1026 pack_compression_level: zlib compression level for pack objects 

1027 pack_index_version: pack index version to use (1, 2, or 3) 

1028 pack_delta_window_size: sliding window size for delta compression 

1029 pack_window_memory: memory limit for delta window operations 

1030 pack_delta_cache_size: size of cache for delta operations 

1031 pack_depth: maximum delta chain depth 

1032 pack_threads: number of threads for pack operations 

1033 pack_big_file_threshold: threshold for treating files as big 

1034 """ 

1035 super().__init__( 

1036 pack_compression_level=pack_compression_level, 

1037 pack_index_version=pack_index_version, 

1038 pack_delta_window_size=pack_delta_window_size, 

1039 pack_window_memory=pack_window_memory, 

1040 pack_delta_cache_size=pack_delta_cache_size, 

1041 pack_depth=pack_depth, 

1042 pack_threads=pack_threads, 

1043 pack_big_file_threshold=pack_big_file_threshold, 

1044 ) 

1045 self.path = path 

1046 self.pack_dir = os.path.join(self.path, PACKDIR) 

1047 self._alternates = None 

1048 self.loose_compression_level = loose_compression_level 

1049 self.pack_compression_level = pack_compression_level 

1050 self.pack_index_version = pack_index_version 

1051 

1052 # Commit graph support - lazy loaded 

1053 self._commit_graph = None 

1054 self._use_commit_graph = True # Default to true 

1055 

1056 def __repr__(self) -> str: 

1057 """Return string representation of DiskObjectStore. 

1058 

1059 Returns: 

1060 String representation including the store path 

1061 """ 

1062 return f"<{self.__class__.__name__}({self.path!r})>" 

1063 

1064 @classmethod 

1065 def from_config(cls, path: Union[str, os.PathLike], config): 

1066 """Create a DiskObjectStore from a configuration object. 

1067 

1068 Args: 

1069 path: Path to the object store directory 

1070 config: Configuration object to read settings from 

1071 

1072 Returns: 

1073 New DiskObjectStore instance configured according to config 

1074 """ 

1075 try: 

1076 default_compression_level = int( 

1077 config.get((b"core",), b"compression").decode() 

1078 ) 

1079 except KeyError: 

1080 default_compression_level = -1 

1081 try: 

1082 loose_compression_level = int( 

1083 config.get((b"core",), b"looseCompression").decode() 

1084 ) 

1085 except KeyError: 

1086 loose_compression_level = default_compression_level 

1087 try: 

1088 pack_compression_level = int( 

1089 config.get((b"core",), "packCompression").decode() 

1090 ) 

1091 except KeyError: 

1092 pack_compression_level = default_compression_level 

1093 try: 

1094 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

1095 except KeyError: 

1096 pack_index_version = None 

1097 

1098 # Read pack configuration options 

1099 try: 

1100 pack_delta_window_size = int( 

1101 config.get((b"pack",), b"deltaWindowSize").decode() 

1102 ) 

1103 except KeyError: 

1104 pack_delta_window_size = None 

1105 try: 

1106 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode()) 

1107 except KeyError: 

1108 pack_window_memory = None 

1109 try: 

1110 pack_delta_cache_size = int( 

1111 config.get((b"pack",), b"deltaCacheSize").decode() 

1112 ) 

1113 except KeyError: 

1114 pack_delta_cache_size = None 

1115 try: 

1116 pack_depth = int(config.get((b"pack",), b"depth").decode()) 

1117 except KeyError: 

1118 pack_depth = None 

1119 try: 

1120 pack_threads = int(config.get((b"pack",), b"threads").decode()) 

1121 except KeyError: 

1122 pack_threads = None 

1123 try: 

1124 pack_big_file_threshold = int( 

1125 config.get((b"pack",), b"bigFileThreshold").decode() 

1126 ) 

1127 except KeyError: 

1128 pack_big_file_threshold = None 

1129 

1130 # Read core.commitGraph setting 

1131 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

1132 

1133 instance = cls( 

1134 path, 

1135 loose_compression_level, 

1136 pack_compression_level, 

1137 pack_index_version, 

1138 pack_delta_window_size, 

1139 pack_window_memory, 

1140 pack_delta_cache_size, 

1141 pack_depth, 

1142 pack_threads, 

1143 pack_big_file_threshold, 

1144 ) 

1145 instance._use_commit_graph = use_commit_graph 

1146 return instance 

1147 

1148 @property 

1149 def alternates(self): 

1150 """Get the list of alternate object stores. 

1151 

1152 Reads from .git/objects/info/alternates if not already cached. 

1153 

1154 Returns: 

1155 List of DiskObjectStore instances for alternate object directories 

1156 """ 

1157 if self._alternates is not None: 

1158 return self._alternates 

1159 self._alternates = [] 

1160 for path in self._read_alternate_paths(): 

1161 self._alternates.append(DiskObjectStore(path)) 

1162 return self._alternates 

1163 

1164 def _read_alternate_paths(self): 

1165 try: 

1166 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

1167 except FileNotFoundError: 

1168 return 

1169 with f: 

1170 for line in f.readlines(): 

1171 line = line.rstrip(b"\n") 

1172 if line.startswith(b"#"): 

1173 continue 

1174 if os.path.isabs(line): 

1175 yield os.fsdecode(line) 

1176 else: 

1177 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1178 

1179 def add_alternate_path(self, path) -> None: 

1180 """Add an alternate path to this object store.""" 

1181 try: 

1182 os.mkdir(os.path.join(self.path, INFODIR)) 

1183 except FileExistsError: 

1184 pass 

1185 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1186 with GitFile(alternates_path, "wb") as f: 

1187 try: 

1188 orig_f = open(alternates_path, "rb") 

1189 except FileNotFoundError: 

1190 pass 

1191 else: 

1192 with orig_f: 

1193 f.write(orig_f.read()) 

1194 f.write(os.fsencode(path) + b"\n") 

1195 

1196 if not os.path.isabs(path): 

1197 path = os.path.join(self.path, path) 

1198 self.alternates.append(DiskObjectStore(path)) 

1199 

1200 def _update_pack_cache(self): 

1201 """Read and iterate over new pack files and cache them.""" 

1202 try: 

1203 pack_dir_contents = os.listdir(self.pack_dir) 

1204 except FileNotFoundError: 

1205 self.close() 

1206 return [] 

1207 pack_files = set() 

1208 for name in pack_dir_contents: 

1209 if name.startswith("pack-") and name.endswith(".pack"): 

1210 # verify that idx exists first (otherwise the pack was not yet 

1211 # fully written) 

1212 idx_name = os.path.splitext(name)[0] + ".idx" 

1213 if idx_name in pack_dir_contents: 

1214 pack_name = name[: -len(".pack")] 

1215 pack_files.add(pack_name) 

1216 

1217 # Open newly appeared pack files 

1218 new_packs = [] 

1219 for f in pack_files: 

1220 if f not in self._pack_cache: 

1221 pack = Pack( 

1222 os.path.join(self.pack_dir, f), 

1223 delta_window_size=self.pack_delta_window_size, 

1224 window_memory=self.pack_window_memory, 

1225 delta_cache_size=self.pack_delta_cache_size, 

1226 depth=self.pack_depth, 

1227 threads=self.pack_threads, 

1228 big_file_threshold=self.pack_big_file_threshold, 

1229 ) 

1230 new_packs.append(pack) 

1231 self._pack_cache[f] = pack 

1232 # Remove disappeared pack files 

1233 for f in set(self._pack_cache) - pack_files: 

1234 self._pack_cache.pop(f).close() 

1235 return new_packs 

1236 

1237 def _get_shafile_path(self, sha): 

1238 # Check from object dir 

1239 return hex_to_filename(self.path, sha) 

1240 

1241 def _iter_loose_objects(self): 

1242 for base in os.listdir(self.path): 

1243 if len(base) != 2: 

1244 continue 

1245 for rest in os.listdir(os.path.join(self.path, base)): 

1246 sha = os.fsencode(base + rest) 

1247 if not valid_hexsha(sha): 

1248 continue 

1249 yield sha 

1250 

1251 def count_loose_objects(self) -> int: 

1252 """Count the number of loose objects in the object store. 

1253 

1254 Returns: 

1255 Number of loose objects 

1256 """ 

1257 count = 0 

1258 if not os.path.exists(self.path): 

1259 return 0 

1260 

1261 for i in range(256): 

1262 subdir = os.path.join(self.path, f"{i:02x}") 

1263 try: 

1264 count += len( 

1265 [ 

1266 name 

1267 for name in os.listdir(subdir) 

1268 if len(name) == 38 # 40 - 2 for the prefix 

1269 ] 

1270 ) 

1271 except FileNotFoundError: 

1272 # Directory may have been removed or is inaccessible 

1273 continue 

1274 

1275 return count 

1276 

1277 def _get_loose_object(self, sha): 

1278 path = self._get_shafile_path(sha) 

1279 try: 

1280 return ShaFile.from_path(path) 

1281 except FileNotFoundError: 

1282 return None 

1283 

1284 def delete_loose_object(self, sha) -> None: 

1285 """Delete a loose object from disk. 

1286 

1287 Args: 

1288 sha: SHA1 of the object to delete 

1289 

1290 Raises: 

1291 FileNotFoundError: If the object file doesn't exist 

1292 """ 

1293 os.remove(self._get_shafile_path(sha)) 

1294 

1295 def get_object_mtime(self, sha): 

1296 """Get the modification time of an object. 

1297 

1298 Args: 

1299 sha: SHA1 of the object 

1300 

1301 Returns: 

1302 Modification time as seconds since epoch 

1303 

1304 Raises: 

1305 KeyError: if the object is not found 

1306 """ 

1307 # First check if it's a loose object 

1308 if self.contains_loose(sha): 

1309 path = self._get_shafile_path(sha) 

1310 try: 

1311 return os.path.getmtime(path) 

1312 except FileNotFoundError: 

1313 pass 

1314 

1315 # Check if it's in a pack file 

1316 for pack in self.packs: 

1317 try: 

1318 if sha in pack: 

1319 # Use the pack file's mtime for packed objects 

1320 pack_path = pack._data_path 

1321 try: 

1322 return os.path.getmtime(pack_path) 

1323 except (FileNotFoundError, AttributeError): 

1324 pass 

1325 except PackFileDisappeared: 

1326 pass 

1327 

1328 raise KeyError(sha) 

1329 

1330 def _remove_pack(self, pack) -> None: 

1331 try: 

1332 del self._pack_cache[os.path.basename(pack._basename)] 

1333 except KeyError: 

1334 pass 

1335 pack.close() 

1336 os.remove(pack.data.path) 

1337 os.remove(pack.index.path) 

1338 

1339 def _get_pack_basepath(self, entries): 

1340 suffix = iter_sha1(entry[0] for entry in entries) 

1341 # TODO: Handle self.pack_dir being bytes 

1342 suffix = suffix.decode("ascii") 

1343 return os.path.join(self.pack_dir, "pack-" + suffix) 

1344 

1345 def _complete_pack(self, f, path, num_objects, indexer, progress=None): 

1346 """Move a specific file containing a pack into the pack directory. 

1347 

1348 Note: The file should be on the same file system as the 

1349 packs directory. 

1350 

1351 Args: 

1352 f: Open file object for the pack. 

1353 path: Path to the pack file. 

1354 indexer: A PackIndexer for indexing the pack. 

1355 """ 

1356 entries = [] 

1357 for i, entry in enumerate(indexer): 

1358 if progress is not None: 

1359 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1360 entries.append(entry) 

1361 

1362 pack_sha, extra_entries = extend_pack( 

1363 f, 

1364 indexer.ext_refs(), 

1365 get_raw=self.get_raw, 

1366 compression_level=self.pack_compression_level, 

1367 progress=progress, 

1368 ) 

1369 f.flush() 

1370 try: 

1371 fileno = f.fileno() 

1372 except AttributeError: 

1373 pass 

1374 else: 

1375 os.fsync(fileno) 

1376 f.close() 

1377 

1378 entries.extend(extra_entries) 

1379 

1380 # Move the pack in. 

1381 entries.sort() 

1382 pack_base_name = self._get_pack_basepath(entries) 

1383 

1384 for pack in self.packs: 

1385 if pack._basename == pack_base_name: 

1386 return pack 

1387 

1388 target_pack_path = pack_base_name + ".pack" 

1389 target_index_path = pack_base_name + ".idx" 

1390 if sys.platform == "win32": 

1391 # Windows might have the target pack file lingering. Attempt 

1392 # removal, silently passing if the target does not exist. 

1393 with suppress(FileNotFoundError): 

1394 os.remove(target_pack_path) 

1395 os.rename(path, target_pack_path) 

1396 

1397 # Write the index. 

1398 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file: 

1399 write_pack_index( 

1400 index_file, entries, pack_sha, version=self.pack_index_version 

1401 ) 

1402 

1403 # Add the pack to the store and return it. 

1404 final_pack = Pack( 

1405 pack_base_name, 

1406 delta_window_size=self.pack_delta_window_size, 

1407 window_memory=self.pack_window_memory, 

1408 delta_cache_size=self.pack_delta_cache_size, 

1409 depth=self.pack_depth, 

1410 threads=self.pack_threads, 

1411 big_file_threshold=self.pack_big_file_threshold, 

1412 ) 

1413 final_pack.check_length_and_checksum() 

1414 self._add_cached_pack(pack_base_name, final_pack) 

1415 return final_pack 

1416 

1417 def add_thin_pack(self, read_all, read_some, progress=None): 

1418 """Add a new thin pack to this object store. 

1419 

1420 Thin packs are packs that contain deltas with parents that exist 

1421 outside the pack. They should never be placed in the object store 

1422 directly, and always indexed and completed as they are copied. 

1423 

1424 Args: 

1425 read_all: Read function that blocks until the number of 

1426 requested bytes are read. 

1427 read_some: Read function that returns at least one byte, but may 

1428 not return the number of bytes requested. 

1429 Returns: A Pack object pointing at the now-completed thin pack in the 

1430 objects/pack directory. 

1431 """ 

1432 import tempfile 

1433 

1434 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1435 with os.fdopen(fd, "w+b") as f: 

1436 os.chmod(path, PACK_MODE) 

1437 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) 

1438 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) 

1439 copier.verify(progress=progress) 

1440 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

1441 

1442 def add_pack(self): 

1443 """Add a new pack to this object store. 

1444 

1445 Returns: Fileobject to write to, a commit function to 

1446 call when the pack is finished and an abort 

1447 function. 

1448 """ 

1449 import tempfile 

1450 

1451 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

1452 f = os.fdopen(fd, "w+b") 

1453 os.chmod(path, PACK_MODE) 

1454 

1455 def commit(): 

1456 if f.tell() > 0: 

1457 f.seek(0) 

1458 with PackData(path, f) as pd: 

1459 indexer = PackIndexer.for_pack_data( 

1460 pd, resolve_ext_ref=self.get_raw 

1461 ) 

1462 return self._complete_pack(f, path, len(pd), indexer) 

1463 else: 

1464 f.close() 

1465 os.remove(path) 

1466 return None 

1467 

1468 def abort() -> None: 

1469 f.close() 

1470 os.remove(path) 

1471 

1472 return f, commit, abort 

1473 

1474 def add_object(self, obj) -> None: 

1475 """Add a single object to this object store. 

1476 

1477 Args: 

1478 obj: Object to add 

1479 """ 

1480 path = self._get_shafile_path(obj.id) 

1481 dir = os.path.dirname(path) 

1482 try: 

1483 os.mkdir(dir) 

1484 except FileExistsError: 

1485 pass 

1486 if os.path.exists(path): 

1487 return # Already there, no need to write again 

1488 with GitFile(path, "wb", mask=PACK_MODE) as f: 

1489 f.write( 

1490 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1491 ) 

1492 

1493 @classmethod 

1494 def init(cls, path: Union[str, os.PathLike]): 

1495 """Initialize a new disk object store. 

1496 

1497 Creates the necessary directory structure for a Git object store. 

1498 

1499 Args: 

1500 path: Path where the object store should be created 

1501 

1502 Returns: 

1503 New DiskObjectStore instance 

1504 """ 

1505 try: 

1506 os.mkdir(path) 

1507 except FileExistsError: 

1508 pass 

1509 os.mkdir(os.path.join(path, "info")) 

1510 os.mkdir(os.path.join(path, PACKDIR)) 

1511 return cls(path) 

1512 

1513 def iter_prefix(self, prefix): 

1514 """Iterate over all object SHAs with the given prefix. 

1515 

1516 Args: 

1517 prefix: Hex prefix to search for (as bytes) 

1518 

1519 Returns: 

1520 Iterator of object SHAs (as bytes) matching the prefix 

1521 """ 

1522 if len(prefix) < 2: 

1523 yield from super().iter_prefix(prefix) 

1524 return 

1525 seen = set() 

1526 dir = prefix[:2].decode() 

1527 rest = prefix[2:].decode() 

1528 try: 

1529 for name in os.listdir(os.path.join(self.path, dir)): 

1530 if name.startswith(rest): 

1531 sha = os.fsencode(dir + name) 

1532 if sha not in seen: 

1533 seen.add(sha) 

1534 yield sha 

1535 except FileNotFoundError: 

1536 pass 

1537 

1538 for p in self.packs: 

1539 bin_prefix = ( 

1540 binascii.unhexlify(prefix) 

1541 if len(prefix) % 2 == 0 

1542 else binascii.unhexlify(prefix[:-1]) 

1543 ) 

1544 for sha in p.index.iter_prefix(bin_prefix): 

1545 sha = sha_to_hex(sha) 

1546 if sha.startswith(prefix) and sha not in seen: 

1547 seen.add(sha) 

1548 yield sha 

1549 for alternate in self.alternates: 

1550 for sha in alternate.iter_prefix(prefix): 

1551 if sha not in seen: 

1552 seen.add(sha) 

1553 yield sha 

1554 

1555 def get_commit_graph(self): 

1556 """Get the commit graph for this object store. 

1557 

1558 Returns: 

1559 CommitGraph object if available, None otherwise 

1560 """ 

1561 if not self._use_commit_graph: 

1562 return None 

1563 

1564 if self._commit_graph is None: 

1565 from .commit_graph import read_commit_graph 

1566 

1567 # Look for commit graph in our objects directory 

1568 graph_file = os.path.join(self.path, "info", "commit-graph") 

1569 if os.path.exists(graph_file): 

1570 self._commit_graph = read_commit_graph(graph_file) 

1571 return self._commit_graph 

1572 

1573 def write_commit_graph(self, refs=None, reachable=True) -> None: 

1574 """Write a commit graph file for this object store. 

1575 

1576 Args: 

1577 refs: List of refs to include. If None, includes all refs from object store. 

1578 reachable: If True, includes all commits reachable from refs. 

1579 If False, only includes the direct ref targets. 

1580 """ 

1581 from .commit_graph import get_reachable_commits 

1582 

1583 if refs is None: 

1584 # Get all commit objects from the object store 

1585 all_refs = [] 

1586 # Iterate through all objects to find commits 

1587 for sha in self: 

1588 try: 

1589 obj = self[sha] 

1590 if obj.type_name == b"commit": 

1591 all_refs.append(sha) 

1592 except KeyError: 

1593 continue 

1594 else: 

1595 # Use provided refs 

1596 all_refs = refs 

1597 

1598 if not all_refs: 

1599 return # No commits to include 

1600 

1601 if reachable: 

1602 # Get all reachable commits 

1603 commit_ids = get_reachable_commits(self, all_refs) 

1604 else: 

1605 # Just use the direct ref targets - ensure they're hex ObjectIDs 

1606 commit_ids = [] 

1607 for ref in all_refs: 

1608 if isinstance(ref, bytes) and len(ref) == 40: 

1609 # Already hex ObjectID 

1610 commit_ids.append(ref) 

1611 elif isinstance(ref, bytes) and len(ref) == 20: 

1612 # Binary SHA, convert to hex ObjectID 

1613 from .objects import sha_to_hex 

1614 

1615 commit_ids.append(sha_to_hex(ref)) 

1616 else: 

1617 # Assume it's already correct format 

1618 commit_ids.append(ref) 

1619 

1620 if commit_ids: 

1621 # Write commit graph directly to our object store path 

1622 # Generate the commit graph 

1623 from .commit_graph import generate_commit_graph 

1624 

1625 graph = generate_commit_graph(self, commit_ids) 

1626 

1627 if graph.entries: 

1628 # Ensure the info directory exists 

1629 info_dir = os.path.join(self.path, "info") 

1630 os.makedirs(info_dir, exist_ok=True) 

1631 

1632 # Write using GitFile for atomic operation 

1633 graph_path = os.path.join(info_dir, "commit-graph") 

1634 with GitFile(graph_path, "wb") as f: 

1635 assert isinstance( 

1636 f, _GitFile 

1637 ) # GitFile in write mode always returns _GitFile 

1638 graph.write_to_file(f) 

1639 

1640 # Clear cached commit graph so it gets reloaded 

1641 self._commit_graph = None 

1642 

1643 def prune(self, grace_period: Optional[int] = None) -> None: 

1644 """Prune/clean up this object store. 

1645 

1646 This removes temporary files that were left behind by interrupted 

1647 pack operations. These are files that start with ``tmp_pack_`` in the 

1648 repository directory or files with .pack extension but no corresponding 

1649 .idx file in the pack directory. 

1650 

1651 Args: 

1652 grace_period: Grace period in seconds for removing temporary files. 

1653 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

1654 """ 

1655 import glob 

1656 

1657 if grace_period is None: 

1658 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

1659 

1660 # Clean up tmp_pack_* files in the repository directory 

1661 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

1662 # Check if file is old enough (more than grace period) 

1663 mtime = os.path.getmtime(tmp_file) 

1664 if time.time() - mtime > grace_period: 

1665 os.remove(tmp_file) 

1666 

1667 # Clean up orphaned .pack files without corresponding .idx files 

1668 try: 

1669 pack_dir_contents = os.listdir(self.pack_dir) 

1670 except FileNotFoundError: 

1671 return 

1672 

1673 pack_files = {} 

1674 idx_files = set() 

1675 

1676 for name in pack_dir_contents: 

1677 if name.endswith(".pack"): 

1678 base_name = name[:-5] # Remove .pack extension 

1679 pack_files[base_name] = name 

1680 elif name.endswith(".idx"): 

1681 base_name = name[:-4] # Remove .idx extension 

1682 idx_files.add(base_name) 

1683 

1684 # Remove .pack files without corresponding .idx files 

1685 for base_name, pack_name in pack_files.items(): 

1686 if base_name not in idx_files: 

1687 pack_path = os.path.join(self.pack_dir, pack_name) 

1688 # Check if file is old enough (more than grace period) 

1689 mtime = os.path.getmtime(pack_path) 

1690 if time.time() - mtime > grace_period: 

1691 os.remove(pack_path) 

1692 

1693 

1694class MemoryObjectStore(BaseObjectStore): 

1695 """Object store that keeps all objects in memory.""" 

1696 

1697 def __init__(self) -> None: 

1698 """Initialize a MemoryObjectStore. 

1699 

1700 Creates an empty in-memory object store. 

1701 """ 

1702 super().__init__() 

1703 self._data: dict[str, ShaFile] = {} 

1704 self.pack_compression_level = -1 

1705 

1706 def _to_hexsha(self, sha): 

1707 if len(sha) == 40: 

1708 return sha 

1709 elif len(sha) == 20: 

1710 return sha_to_hex(sha) 

1711 else: 

1712 raise ValueError(f"Invalid sha {sha!r}") 

1713 

1714 def contains_loose(self, sha): 

1715 """Check if a particular object is present by SHA1 and is loose.""" 

1716 return self._to_hexsha(sha) in self._data 

1717 

1718 def contains_packed(self, sha) -> bool: 

1719 """Check if a particular object is present by SHA1 and is packed.""" 

1720 return False 

1721 

1722 def __iter__(self): 

1723 """Iterate over the SHAs that are present in this store.""" 

1724 return iter(self._data.keys()) 

1725 

1726 @property 

1727 def packs(self): 

1728 """List with pack objects.""" 

1729 return [] 

1730 

1731 def get_raw(self, name: ObjectID): 

1732 """Obtain the raw text for an object. 

1733 

1734 Args: 

1735 name: sha for the object. 

1736 Returns: tuple with numeric type and object contents. 

1737 """ 

1738 obj = self[self._to_hexsha(name)] 

1739 return obj.type_num, obj.as_raw_string() 

1740 

1741 def __getitem__(self, name: ObjectID): 

1742 """Retrieve an object by SHA. 

1743 

1744 Args: 

1745 name: SHA of the object (as hex string or bytes) 

1746 

1747 Returns: 

1748 Copy of the ShaFile object 

1749 

1750 Raises: 

1751 KeyError: If the object is not found 

1752 """ 

1753 return self._data[self._to_hexsha(name)].copy() 

1754 

1755 def __delitem__(self, name: ObjectID) -> None: 

1756 """Delete an object from this store, for testing only.""" 

1757 del self._data[self._to_hexsha(name)] 

1758 

1759 def add_object(self, obj) -> None: 

1760 """Add a single object to this object store.""" 

1761 self._data[obj.id] = obj.copy() 

1762 

1763 def add_objects(self, objects, progress=None) -> None: 

1764 """Add a set of objects to this object store. 

1765 

1766 Args: 

1767 objects: Iterable over a list of (object, path) tuples 

1768 """ 

1769 for obj, path in objects: 

1770 self.add_object(obj) 

1771 

1772 def add_pack(self): 

1773 """Add a new pack to this object store. 

1774 

1775 Because this object store doesn't support packs, we extract and add the 

1776 individual objects. 

1777 

1778 Returns: Fileobject to write to and a commit function to 

1779 call when the pack is finished. 

1780 """ 

1781 from tempfile import SpooledTemporaryFile 

1782 

1783 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

1784 

1785 def commit() -> None: 

1786 size = f.tell() 

1787 if size > 0: 

1788 f.seek(0) 

1789 p = PackData.from_file(f, size) 

1790 for obj in PackInflater.for_pack_data(p, self.get_raw): 

1791 self.add_object(obj) 

1792 p.close() 

1793 f.close() 

1794 else: 

1795 f.close() 

1796 

1797 def abort() -> None: 

1798 f.close() 

1799 

1800 return f, commit, abort 

1801 

1802 def add_pack_data( 

1803 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

1804 ) -> None: 

1805 """Add pack data to this object store. 

1806 

1807 Args: 

1808 count: Number of items to add 

1809 """ 

1810 if count == 0: 

1811 return 

1812 

1813 # Since MemoryObjectStore doesn't support pack files, we need to 

1814 # extract individual objects. To handle deltas properly, we write 

1815 # to a temporary pack and then use PackInflater to resolve them. 

1816 f, commit, abort = self.add_pack() 

1817 try: 

1818 write_pack_data( 

1819 f.write, 

1820 unpacked_objects, 

1821 num_records=count, 

1822 progress=progress, 

1823 ) 

1824 except BaseException: 

1825 abort() 

1826 raise 

1827 else: 

1828 commit() 

1829 

1830 def add_thin_pack(self, read_all, read_some, progress=None) -> None: 

1831 """Add a new thin pack to this object store. 

1832 

1833 Thin packs are packs that contain deltas with parents that exist 

1834 outside the pack. Because this object store doesn't support packs, we 

1835 extract and add the individual objects. 

1836 

1837 Args: 

1838 read_all: Read function that blocks until the number of 

1839 requested bytes are read. 

1840 read_some: Read function that returns at least one byte, but may 

1841 not return the number of bytes requested. 

1842 """ 

1843 f, commit, abort = self.add_pack() 

1844 try: 

1845 copier = PackStreamCopier(read_all, read_some, f) 

1846 copier.verify() 

1847 except BaseException: 

1848 abort() 

1849 raise 

1850 else: 

1851 commit() 

1852 

1853 

1854class ObjectIterator(Protocol): 

1855 """Interface for iterating over objects.""" 

1856 

1857 def iterobjects(self) -> Iterator[ShaFile]: 

1858 """Iterate over all objects. 

1859 

1860 Returns: 

1861 Iterator of ShaFile objects 

1862 """ 

1863 raise NotImplementedError(self.iterobjects) 

1864 

1865 

1866def tree_lookup_path(lookup_obj, root_sha, path): 

1867 """Look up an object in a Git tree. 

1868 

1869 Args: 

1870 lookup_obj: Callback for retrieving object by SHA1 

1871 root_sha: SHA1 of the root tree 

1872 path: Path to lookup 

1873 Returns: A tuple of (mode, SHA) of the resulting path. 

1874 """ 

1875 tree = lookup_obj(root_sha) 

1876 if not isinstance(tree, Tree): 

1877 raise NotTreeError(root_sha) 

1878 return tree.lookup_path(lookup_obj, path) 

1879 

1880 

1881def _collect_filetree_revs( 

1882 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

1883) -> None: 

1884 """Collect SHA1s of files and directories for specified tree. 

1885 

1886 Args: 

1887 obj_store: Object store to get objects by SHA from 

1888 tree_sha: tree reference to walk 

1889 kset: set to fill with references to files and directories 

1890 """ 

1891 filetree = obj_store[tree_sha] 

1892 assert isinstance(filetree, Tree) 

1893 for name, mode, sha in filetree.iteritems(): 

1894 if not S_ISGITLINK(mode) and sha not in kset: 

1895 kset.add(sha) 

1896 if stat.S_ISDIR(mode): 

1897 _collect_filetree_revs(obj_store, sha, kset) 

1898 

1899 

1900def _split_commits_and_tags( 

1901 obj_store: ObjectContainer, lst, *, ignore_unknown=False 

1902) -> tuple[set[bytes], set[bytes], set[bytes]]: 

1903 """Split object id list into three lists with commit, tag, and other SHAs. 

1904 

1905 Commits referenced by tags are included into commits 

1906 list as well. Only SHA1s known in this repository will get 

1907 through, and unless ignore_unknown argument is True, KeyError 

1908 is thrown for SHA1 missing in the repository 

1909 

1910 Args: 

1911 obj_store: Object store to get objects by SHA1 from 

1912 lst: Collection of commit and tag SHAs 

1913 ignore_unknown: True to skip SHA1 missing in the repository 

1914 silently. 

1915 Returns: A tuple of (commits, tags, others) SHA1s 

1916 """ 

1917 commits: set[bytes] = set() 

1918 tags: set[bytes] = set() 

1919 others: set[bytes] = set() 

1920 for e in lst: 

1921 try: 

1922 o = obj_store[e] 

1923 except KeyError: 

1924 if not ignore_unknown: 

1925 raise 

1926 else: 

1927 if isinstance(o, Commit): 

1928 commits.add(e) 

1929 elif isinstance(o, Tag): 

1930 tags.add(e) 

1931 tagged = o.object[1] 

1932 c, t, os = _split_commits_and_tags( 

1933 obj_store, [tagged], ignore_unknown=ignore_unknown 

1934 ) 

1935 commits |= c 

1936 tags |= t 

1937 others |= os 

1938 else: 

1939 others.add(e) 

1940 return (commits, tags, others) 

1941 

1942 

1943class MissingObjectFinder: 

1944 """Find the objects missing from another object store. 

1945 

1946 Args: 

1947 object_store: Object store containing at least all objects to be 

1948 sent 

1949 haves: SHA1s of commits not to send (already present in target) 

1950 wants: SHA1s of commits to send 

1951 progress: Optional function to report progress to. 

1952 get_tagged: Function that returns a dict of pointed-to sha -> tag 

1953 sha for including tags. 

1954 get_parents: Optional function for getting the parents of a commit. 

1955 """ 

1956 

1957 def __init__( 

1958 self, 

1959 object_store, 

1960 haves, 

1961 wants, 

1962 *, 

1963 shallow=None, 

1964 progress=None, 

1965 get_tagged=None, 

1966 get_parents=lambda commit: commit.parents, 

1967 ) -> None: 

1968 """Initialize a MissingObjectFinder. 

1969 

1970 Args: 

1971 object_store: Object store containing objects 

1972 haves: SHA1s of objects already present in target 

1973 wants: SHA1s of objects to send 

1974 shallow: Set of shallow commit SHA1s 

1975 progress: Optional progress reporting callback 

1976 get_tagged: Function returning dict of pointed-to sha -> tag sha 

1977 get_parents: Function for getting commit parents 

1978 """ 

1979 self.object_store = object_store 

1980 if shallow is None: 

1981 shallow = set() 

1982 self._get_parents = get_parents 

1983 # process Commits and Tags differently 

1984 # Note, while haves may list commits/tags not available locally, 

1985 # and such SHAs would get filtered out by _split_commits_and_tags, 

1986 # wants shall list only known SHAs, and otherwise 

1987 # _split_commits_and_tags fails with KeyError 

1988 have_commits, have_tags, have_others = _split_commits_and_tags( 

1989 object_store, haves, ignore_unknown=True 

1990 ) 

1991 want_commits, want_tags, want_others = _split_commits_and_tags( 

1992 object_store, wants, ignore_unknown=False 

1993 ) 

1994 # all_ancestors is a set of commits that shall not be sent 

1995 # (complete repository up to 'haves') 

1996 all_ancestors = _collect_ancestors( 

1997 object_store, have_commits, shallow=shallow, get_parents=self._get_parents 

1998 )[0] 

1999 # all_missing - complete set of commits between haves and wants 

2000 # common - commits from all_ancestors we hit into while 

2001 # traversing parent hierarchy of wants 

2002 missing_commits, common_commits = _collect_ancestors( 

2003 object_store, 

2004 want_commits, 

2005 all_ancestors, 

2006 shallow=shallow, 

2007 get_parents=self._get_parents, 

2008 ) 

2009 self.remote_has: set[bytes] = set() 

2010 # Now, fill sha_done with commits and revisions of 

2011 # files and directories known to be both locally 

2012 # and on target. Thus these commits and files 

2013 # won't get selected for fetch 

2014 for h in common_commits: 

2015 self.remote_has.add(h) 

2016 cmt = object_store[h] 

2017 _collect_filetree_revs(object_store, cmt.tree, self.remote_has) 

2018 # record tags we have as visited, too 

2019 for t in have_tags: 

2020 self.remote_has.add(t) 

2021 self.sha_done = set(self.remote_has) 

2022 

2023 # in fact, what we 'want' is commits, tags, and others 

2024 # we've found missing 

2025 self.objects_to_send: set[ 

2026 tuple[ObjectID, Optional[bytes], Optional[int], bool] 

2027 ] = {(w, None, Commit.type_num, False) for w in missing_commits} 

2028 missing_tags = want_tags.difference(have_tags) 

2029 self.objects_to_send.update( 

2030 {(w, None, Tag.type_num, False) for w in missing_tags} 

2031 ) 

2032 missing_others = want_others.difference(have_others) 

2033 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

2034 

2035 if progress is None: 

2036 self.progress = lambda x: None 

2037 else: 

2038 self.progress = progress 

2039 self._tagged = (get_tagged and get_tagged()) or {} 

2040 

2041 def get_remote_has(self): 

2042 """Get the set of SHAs the remote has. 

2043 

2044 Returns: 

2045 Set of SHA1s that the remote side already has 

2046 """ 

2047 return self.remote_has 

2048 

2049 def add_todo( 

2050 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]] 

2051 ) -> None: 

2052 """Add objects to the todo list. 

2053 

2054 Args: 

2055 entries: Iterable of tuples (sha, name, type_num, is_leaf) 

2056 """ 

2057 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

2058 

2059 def __next__(self) -> tuple[bytes, Optional[PackHint]]: 

2060 """Get the next object to send. 

2061 

2062 Returns: 

2063 Tuple of (sha, pack_hint) 

2064 

2065 Raises: 

2066 StopIteration: When no more objects to send 

2067 """ 

2068 while True: 

2069 if not self.objects_to_send: 

2070 self.progress( 

2071 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

2072 ) 

2073 raise StopIteration 

2074 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

2075 if sha not in self.sha_done: 

2076 break 

2077 if not leaf: 

2078 o = self.object_store[sha] 

2079 if isinstance(o, Commit): 

2080 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

2081 elif isinstance(o, Tree): 

2082 self.add_todo( 

2083 [ 

2084 ( 

2085 s, 

2086 n, 

2087 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

2088 not stat.S_ISDIR(m), 

2089 ) 

2090 for n, m, s in o.iteritems() 

2091 if not S_ISGITLINK(m) 

2092 ] 

2093 ) 

2094 elif isinstance(o, Tag): 

2095 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

2096 if sha in self._tagged: 

2097 self.add_todo([(self._tagged[sha], None, None, True)]) 

2098 self.sha_done.add(sha) 

2099 if len(self.sha_done) % 1000 == 0: 

2100 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

2101 if type_num is None: 

2102 pack_hint = None 

2103 else: 

2104 pack_hint = (type_num, name) 

2105 return (sha, pack_hint) 

2106 

2107 def __iter__(self): 

2108 """Return iterator over objects to send. 

2109 

2110 Returns: 

2111 Self (this class implements the iterator protocol) 

2112 """ 

2113 return self 

2114 

2115 

2116class ObjectStoreGraphWalker: 

2117 """Graph walker that finds what commits are missing from an object store.""" 

2118 

2119 heads: set[ObjectID] 

2120 """Revisions without descendants in the local repo.""" 

2121 

2122 get_parents: Callable[[ObjectID], ObjectID] 

2123 """Function to retrieve parents in the local repo.""" 

2124 

2125 shallow: set[ObjectID] 

2126 

2127 def __init__( 

2128 self, 

2129 local_heads: Iterable[ObjectID], 

2130 get_parents, 

2131 shallow: Optional[set[ObjectID]] = None, 

2132 update_shallow=None, 

2133 ) -> None: 

2134 """Create a new instance. 

2135 

2136 Args: 

2137 local_heads: Heads to start search with 

2138 get_parents: Function for finding the parents of a SHA1. 

2139 """ 

2140 self.heads = set(local_heads) 

2141 self.get_parents = get_parents 

2142 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {} 

2143 if shallow is None: 

2144 shallow = set() 

2145 self.shallow = shallow 

2146 self.update_shallow = update_shallow 

2147 

2148 def nak(self) -> None: 

2149 """Nothing in common was found.""" 

2150 

2151 def ack(self, sha: ObjectID) -> None: 

2152 """Ack that a revision and its ancestors are present in the source.""" 

2153 if len(sha) != 40: 

2154 raise ValueError(f"unexpected sha {sha!r} received") 

2155 ancestors = {sha} 

2156 

2157 # stop if we run out of heads to remove 

2158 while self.heads: 

2159 for a in ancestors: 

2160 if a in self.heads: 

2161 self.heads.remove(a) 

2162 

2163 # collect all ancestors 

2164 new_ancestors = set() 

2165 for a in ancestors: 

2166 ps = self.parents.get(a) 

2167 if ps is not None: 

2168 new_ancestors.update(ps) 

2169 self.parents[a] = None 

2170 

2171 # no more ancestors; stop 

2172 if not new_ancestors: 

2173 break 

2174 

2175 ancestors = new_ancestors 

2176 

2177 def next(self): 

2178 """Iterate over ancestors of heads in the target.""" 

2179 if self.heads: 

2180 ret = self.heads.pop() 

2181 try: 

2182 ps = self.get_parents(ret) 

2183 except KeyError: 

2184 return None 

2185 self.parents[ret] = ps 

2186 self.heads.update([p for p in ps if p not in self.parents]) 

2187 return ret 

2188 return None 

2189 

2190 __next__ = next 

2191 

2192 

2193def commit_tree_changes(object_store, tree, changes): 

2194 """Commit a specified set of changes to a tree structure. 

2195 

2196 This will apply a set of changes on top of an existing tree, storing new 

2197 objects in object_store. 

2198 

2199 changes are a list of tuples with (path, mode, object_sha). 

2200 Paths can be both blobs and trees. See the mode and 

2201 object sha to None deletes the path. 

2202 

2203 This method works especially well if there are only a small 

2204 number of changes to a big tree. For a large number of changes 

2205 to a large tree, use e.g. commit_tree. 

2206 

2207 Args: 

2208 object_store: Object store to store new objects in 

2209 and retrieve old ones from. 

2210 tree: Original tree root 

2211 changes: changes to apply 

2212 Returns: New tree root object 

2213 """ 

2214 # TODO(jelmer): Save up the objects and add them using .add_objects 

2215 # rather than with individual calls to .add_object. 

2216 nested_changes = {} 

2217 for path, new_mode, new_sha in changes: 

2218 try: 

2219 (dirname, subpath) = path.split(b"/", 1) 

2220 except ValueError: 

2221 if new_sha is None: 

2222 del tree[path] 

2223 else: 

2224 tree[path] = (new_mode, new_sha) 

2225 else: 

2226 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

2227 for name, subchanges in nested_changes.items(): 

2228 try: 

2229 orig_subtree = object_store[tree[name][1]] 

2230 except KeyError: 

2231 orig_subtree = Tree() 

2232 subtree = commit_tree_changes(object_store, orig_subtree, subchanges) 

2233 if len(subtree) == 0: 

2234 del tree[name] 

2235 else: 

2236 tree[name] = (stat.S_IFDIR, subtree.id) 

2237 object_store.add_object(tree) 

2238 return tree 

2239 

2240 

2241class OverlayObjectStore(BaseObjectStore): 

2242 """Object store that can overlay multiple object stores.""" 

2243 

2244 def __init__(self, bases, add_store=None) -> None: 

2245 """Initialize an OverlayObjectStore. 

2246 

2247 Args: 

2248 bases: List of base object stores to overlay 

2249 add_store: Optional store to write new objects to 

2250 """ 

2251 self.bases = bases 

2252 self.add_store = add_store 

2253 

2254 def add_object(self, object): 

2255 """Add a single object to the store. 

2256 

2257 Args: 

2258 object: Object to add 

2259 

2260 Raises: 

2261 NotImplementedError: If no add_store was provided 

2262 """ 

2263 if self.add_store is None: 

2264 raise NotImplementedError(self.add_object) 

2265 return self.add_store.add_object(object) 

2266 

2267 def add_objects(self, objects, progress=None): 

2268 """Add multiple objects to the store. 

2269 

2270 Args: 

2271 objects: Iterator of objects to add 

2272 progress: Optional progress reporting callback 

2273 

2274 Raises: 

2275 NotImplementedError: If no add_store was provided 

2276 """ 

2277 if self.add_store is None: 

2278 raise NotImplementedError(self.add_object) 

2279 return self.add_store.add_objects(objects, progress) 

2280 

2281 @property 

2282 def packs(self): 

2283 """Get the list of packs from all overlaid stores. 

2284 

2285 Returns: 

2286 Combined list of packs from all base stores 

2287 """ 

2288 ret = [] 

2289 for b in self.bases: 

2290 ret.extend(b.packs) 

2291 return ret 

2292 

2293 def __iter__(self): 

2294 """Iterate over all object SHAs in the overlaid stores. 

2295 

2296 Returns: 

2297 Iterator of object SHAs (deduped across stores) 

2298 """ 

2299 done = set() 

2300 for b in self.bases: 

2301 for o_id in b: 

2302 if o_id not in done: 

2303 yield o_id 

2304 done.add(o_id) 

2305 

2306 def iterobjects_subset( 

2307 self, shas: Iterable[bytes], *, allow_missing: bool = False 

2308 ) -> Iterator[ShaFile]: 

2309 """Iterate over a subset of objects from the overlaid stores. 

2310 

2311 Args: 

2312 shas: Iterable of object SHAs to retrieve 

2313 allow_missing: If True, skip missing objects; if False, raise KeyError 

2314 

2315 Returns: 

2316 Iterator of ShaFile objects 

2317 

2318 Raises: 

2319 KeyError: If an object is missing and allow_missing is False 

2320 """ 

2321 todo = set(shas) 

2322 found: set[bytes] = set() 

2323 

2324 for b in self.bases: 

2325 # Create a copy of todo for each base to avoid modifying 

2326 # the set while iterating through it 

2327 current_todo = todo - found 

2328 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

2329 yield o 

2330 found.add(o.id) 

2331 

2332 # Check for any remaining objects not found 

2333 missing = todo - found 

2334 if missing and not allow_missing: 

2335 raise KeyError(next(iter(missing))) 

2336 

2337 def iter_unpacked_subset( 

2338 self, 

2339 shas: Iterable[bytes], 

2340 *, 

2341 include_comp=False, 

2342 allow_missing: bool = False, 

2343 convert_ofs_delta=True, 

2344 ) -> Iterator[ShaFile]: 

2345 """Iterate over unpacked objects from the overlaid stores. 

2346 

2347 Args: 

2348 shas: Iterable of object SHAs to retrieve 

2349 include_comp: Whether to include compressed data 

2350 allow_missing: If True, skip missing objects; if False, raise KeyError 

2351 convert_ofs_delta: Whether to convert OFS_DELTA objects 

2352 

2353 Returns: 

2354 Iterator of unpacked objects 

2355 

2356 Raises: 

2357 KeyError: If an object is missing and allow_missing is False 

2358 """ 

2359 todo = set(shas) 

2360 for b in self.bases: 

2361 for o in b.iter_unpacked_subset( 

2362 todo, 

2363 include_comp=include_comp, 

2364 allow_missing=True, 

2365 convert_ofs_delta=convert_ofs_delta, 

2366 ): 

2367 yield o 

2368 todo.remove(o.id) 

2369 if todo and not allow_missing: 

2370 raise KeyError(o.id) 

2371 

2372 def get_raw(self, sha_id): 

2373 """Get the raw object data from the overlaid stores. 

2374 

2375 Args: 

2376 sha_id: SHA of the object 

2377 

2378 Returns: 

2379 Tuple of (type_num, raw_data) 

2380 

2381 Raises: 

2382 KeyError: If object not found in any base store 

2383 """ 

2384 for b in self.bases: 

2385 try: 

2386 return b.get_raw(sha_id) 

2387 except KeyError: 

2388 pass 

2389 raise KeyError(sha_id) 

2390 

2391 def contains_packed(self, sha) -> bool: 

2392 """Check if an object is packed in any base store. 

2393 

2394 Args: 

2395 sha: SHA of the object 

2396 

2397 Returns: 

2398 True if object is packed in any base store 

2399 """ 

2400 for b in self.bases: 

2401 if b.contains_packed(sha): 

2402 return True 

2403 return False 

2404 

2405 def contains_loose(self, sha) -> bool: 

2406 """Check if an object is loose in any base store. 

2407 

2408 Args: 

2409 sha: SHA of the object 

2410 

2411 Returns: 

2412 True if object is loose in any base store 

2413 """ 

2414 for b in self.bases: 

2415 if b.contains_loose(sha): 

2416 return True 

2417 return False 

2418 

2419 

2420def read_packs_file(f): 

2421 """Yield the packs listed in a packs file.""" 

2422 for line in f.read().splitlines(): 

2423 if not line: 

2424 continue 

2425 (kind, name) = line.split(b" ", 1) 

2426 if kind != b"P": 

2427 continue 

2428 yield os.fsdecode(name) 

2429 

2430 

2431class BucketBasedObjectStore(PackBasedObjectStore): 

2432 """Object store implementation that uses a bucket store like S3 as backend.""" 

2433 

2434 def _iter_loose_objects(self): 

2435 """Iterate over the SHAs of all loose objects.""" 

2436 return iter([]) 

2437 

2438 def _get_loose_object(self, sha) -> None: 

2439 return None 

2440 

2441 def delete_loose_object(self, sha) -> None: 

2442 """Delete a loose object (no-op for bucket stores). 

2443 

2444 Bucket-based stores don't have loose objects, so this is a no-op. 

2445 

2446 Args: 

2447 sha: SHA of the object to delete 

2448 """ 

2449 # Doesn't exist.. 

2450 

2451 def _remove_pack(self, name) -> None: 

2452 raise NotImplementedError(self._remove_pack) 

2453 

2454 def _iter_pack_names(self) -> Iterator[str]: 

2455 raise NotImplementedError(self._iter_pack_names) 

2456 

2457 def _get_pack(self, name) -> Pack: 

2458 raise NotImplementedError(self._get_pack) 

2459 

2460 def _update_pack_cache(self): 

2461 pack_files = set(self._iter_pack_names()) 

2462 

2463 # Open newly appeared pack files 

2464 new_packs = [] 

2465 for f in pack_files: 

2466 if f not in self._pack_cache: 

2467 pack = self._get_pack(f) 

2468 new_packs.append(pack) 

2469 self._pack_cache[f] = pack 

2470 # Remove disappeared pack files 

2471 for f in set(self._pack_cache) - pack_files: 

2472 self._pack_cache.pop(f).close() 

2473 return new_packs 

2474 

2475 def _upload_pack(self, basename, pack_file, index_file) -> None: 

2476 raise NotImplementedError 

2477 

2478 def add_pack(self): 

2479 """Add a new pack to this object store. 

2480 

2481 Returns: Fileobject to write to, a commit function to 

2482 call when the pack is finished and an abort 

2483 function. 

2484 """ 

2485 import tempfile 

2486 

2487 pf = tempfile.SpooledTemporaryFile( 

2488 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2489 ) 

2490 

2491 def commit(): 

2492 if pf.tell() == 0: 

2493 pf.close() 

2494 return None 

2495 

2496 pf.seek(0) 

2497 p = PackData(pf.name, pf) 

2498 entries = p.sorted_entries() 

2499 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

2500 idxf = tempfile.SpooledTemporaryFile( 

2501 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2502 ) 

2503 checksum = p.get_stored_checksum() 

2504 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

2505 idxf.seek(0) 

2506 idx = load_pack_index_file(basename + ".idx", idxf) 

2507 for pack in self.packs: 

2508 if pack.get_stored_checksum() == p.get_stored_checksum(): 

2509 p.close() 

2510 idx.close() 

2511 pf.close() 

2512 idxf.close() 

2513 return pack 

2514 pf.seek(0) 

2515 idxf.seek(0) 

2516 self._upload_pack(basename, pf, idxf) 

2517 final_pack = Pack.from_objects(p, idx) 

2518 self._add_cached_pack(basename, final_pack) 

2519 pf.close() 

2520 idxf.close() 

2521 return final_pack 

2522 

2523 return pf, commit, pf.close 

2524 

2525 

2526def _collect_ancestors( 

2527 store: ObjectContainer, 

2528 heads, 

2529 common: frozenset[ObjectID] = frozenset(), 

2530 shallow: frozenset[ObjectID] = frozenset(), 

2531 get_parents=lambda commit: commit.parents, 

2532): 

2533 """Collect all ancestors of heads up to (excluding) those in common. 

2534 

2535 Args: 

2536 heads: commits to start from 

2537 common: commits to end at, or empty set to walk repository 

2538 completely 

2539 get_parents: Optional function for getting the parents of a 

2540 commit. 

2541 Returns: a tuple (A, B) where A - all commits reachable 

2542 from heads but not present in common, B - common (shared) elements 

2543 that are directly reachable from heads 

2544 """ 

2545 bases = set() 

2546 commits = set() 

2547 queue = [] 

2548 queue.extend(heads) 

2549 

2550 # Try to use commit graph if available 

2551 commit_graph = store.get_commit_graph() 

2552 

2553 while queue: 

2554 e = queue.pop(0) 

2555 if e in common: 

2556 bases.add(e) 

2557 elif e not in commits: 

2558 commits.add(e) 

2559 if e in shallow: 

2560 continue 

2561 

2562 # Try to use commit graph for parent lookup 

2563 parents = None 

2564 if commit_graph: 

2565 parents = commit_graph.get_parents(e) 

2566 

2567 if parents is None: 

2568 # Fall back to loading the object 

2569 cmt = store[e] 

2570 parents = get_parents(cmt) 

2571 

2572 queue.extend(parents) 

2573 return (commits, bases) 

2574 

2575 

2576def iter_tree_contents( 

2577 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False 

2578): 

2579 """Iterate the contents of a tree and all subtrees. 

2580 

2581 Iteration is depth-first pre-order, as in e.g. os.walk. 

2582 

2583 Args: 

2584 tree_id: SHA1 of the tree. 

2585 include_trees: If True, include tree objects in the iteration. 

2586 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

2587 tree. 

2588 """ 

2589 if tree_id is None: 

2590 return 

2591 # This could be fairly easily generalized to >2 trees if we find a use 

2592 # case. 

2593 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

2594 while todo: 

2595 entry = todo.pop() 

2596 if stat.S_ISDIR(entry.mode): 

2597 extra = [] 

2598 tree = store[entry.sha] 

2599 assert isinstance(tree, Tree) 

2600 for subentry in tree.iteritems(name_order=True): 

2601 extra.append(subentry.in_path(entry.path)) 

2602 todo.extend(reversed(extra)) 

2603 if not stat.S_ISDIR(entry.mode) or include_trees: 

2604 yield entry 

2605 

2606 

2607def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]: 

2608 """Peel all tags from a SHA. 

2609 

2610 Args: 

2611 sha: The object SHA to peel. 

2612 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

2613 intermediate tags; if the original ref does not point to a tag, 

2614 this will equal the original SHA1. 

2615 """ 

2616 unpeeled = obj = store[sha] 

2617 obj_class = object_class(obj.type_name) 

2618 while obj_class is Tag: 

2619 assert isinstance(obj, Tag) 

2620 obj_class, sha = obj.object 

2621 obj = store[sha] 

2622 return unpeeled, obj