Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1137 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26import binascii 

27import os 

28import stat 

29import sys 

30import time 

31import warnings 

32from collections.abc import Iterable, Iterator, Sequence 

33from contextlib import suppress 

34from io import BytesIO 

35from typing import ( 

36 Callable, 

37 Optional, 

38 Protocol, 

39 Union, 

40 cast, 

41) 

42 

43from .errors import NotTreeError 

44from .file import GitFile 

45from .objects import ( 

46 S_ISGITLINK, 

47 ZERO_SHA, 

48 Blob, 

49 Commit, 

50 ObjectID, 

51 ShaFile, 

52 Tag, 

53 Tree, 

54 TreeEntry, 

55 hex_to_filename, 

56 hex_to_sha, 

57 object_class, 

58 sha_to_hex, 

59 valid_hexsha, 

60) 

61from .pack import ( 

62 PACK_SPOOL_FILE_MAX_SIZE, 

63 ObjectContainer, 

64 Pack, 

65 PackData, 

66 PackedObjectContainer, 

67 PackFileDisappeared, 

68 PackHint, 

69 PackIndexer, 

70 PackInflater, 

71 PackStreamCopier, 

72 UnpackedObject, 

73 extend_pack, 

74 full_unpacked_object, 

75 generate_unpacked_objects, 

76 iter_sha1, 

77 load_pack_index_file, 

78 pack_objects_to_data, 

79 write_pack_data, 

80 write_pack_index, 

81) 

82from .protocol import DEPTH_INFINITE 

83from .refs import PEELED_TAG_SUFFIX, Ref 

84 

85INFODIR = "info" 

86PACKDIR = "pack" 

87 

88# use permissions consistent with Git; just readable by everyone 

89# TODO: should packs also be non-writable on Windows? if so, that 

90# would requite some rather significant adjustments to the test suite 

91PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

92 

93# Grace period for cleaning up temporary pack files (in seconds) 

94# Matches git's default of 2 weeks 

95DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

96 

97 

98def find_shallow(store, heads, depth): 

99 """Find shallow commits according to a given depth. 

100 

101 Args: 

102 store: An ObjectStore for looking up objects. 

103 heads: Iterable of head SHAs to start walking from. 

104 depth: The depth of ancestors to include. A depth of one includes 

105 only the heads themselves. 

106 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

107 considered shallow and unshallow according to the arguments. Note that 

108 these sets may overlap if a commit is reachable along multiple paths. 

109 """ 

110 parents = {} 

111 commit_graph = store.get_commit_graph() 

112 

113 def get_parents(sha): 

114 result = parents.get(sha, None) 

115 if not result: 

116 # Try to use commit graph first if available 

117 if commit_graph: 

118 graph_parents = commit_graph.get_parents(sha) 

119 if graph_parents is not None: 

120 result = graph_parents 

121 parents[sha] = result 

122 return result 

123 # Fall back to loading the object 

124 result = store[sha].parents 

125 parents[sha] = result 

126 return result 

127 

128 todo = [] # stack of (sha, depth) 

129 for head_sha in heads: 

130 obj = store[head_sha] 

131 # Peel tags if necessary 

132 while isinstance(obj, Tag): 

133 _, sha = obj.object 

134 obj = store[sha] 

135 if isinstance(obj, Commit): 

136 todo.append((obj.id, 1)) 

137 

138 not_shallow = set() 

139 shallow = set() 

140 while todo: 

141 sha, cur_depth = todo.pop() 

142 if cur_depth < depth: 

143 not_shallow.add(sha) 

144 new_depth = cur_depth + 1 

145 todo.extend((p, new_depth) for p in get_parents(sha)) 

146 else: 

147 shallow.add(sha) 

148 

149 return shallow, not_shallow 

150 

151 

152def get_depth( 

153 store, 

154 head, 

155 get_parents=lambda commit: commit.parents, 

156 max_depth=None, 

157): 

158 """Return the current available depth for the given head. 

159 For commits with multiple parents, the largest possible depth will be 

160 returned. 

161 

162 Args: 

163 head: commit to start from 

164 get_parents: optional function for getting the parents of a commit 

165 max_depth: maximum depth to search 

166 """ 

167 if head not in store: 

168 return 0 

169 current_depth = 1 

170 queue = [(head, current_depth)] 

171 commit_graph = store.get_commit_graph() 

172 

173 while queue and (max_depth is None or current_depth < max_depth): 

174 e, depth = queue.pop(0) 

175 current_depth = max(current_depth, depth) 

176 

177 # Try to use commit graph for parent lookup if available 

178 parents = None 

179 if commit_graph: 

180 parents = commit_graph.get_parents(e) 

181 

182 if parents is None: 

183 # Fall back to loading the object 

184 cmt = store[e] 

185 if isinstance(cmt, Tag): 

186 _cls, sha = cmt.object 

187 cmt = store[sha] 

188 parents = get_parents(cmt) 

189 

190 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

191 return current_depth 

192 

193 

194class PackContainer(Protocol): 

195 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

196 """Add a new pack.""" 

197 

198 

199class BaseObjectStore: 

200 """Object store interface.""" 

201 

202 def determine_wants_all( 

203 self, refs: dict[Ref, ObjectID], depth: Optional[int] = None 

204 ) -> list[ObjectID]: 

205 def _want_deepen(sha): 

206 if not depth: 

207 return False 

208 if depth == DEPTH_INFINITE: 

209 return True 

210 return depth > self._get_depth(sha) 

211 

212 return [ 

213 sha 

214 for (ref, sha) in refs.items() 

215 if (sha not in self or _want_deepen(sha)) 

216 and not ref.endswith(PEELED_TAG_SUFFIX) 

217 and not sha == ZERO_SHA 

218 ] 

219 

220 def contains_loose(self, sha) -> bool: 

221 """Check if a particular object is present by SHA1 and is loose.""" 

222 raise NotImplementedError(self.contains_loose) 

223 

224 def __contains__(self, sha1: bytes) -> bool: 

225 """Check if a particular object is present by SHA1. 

226 

227 This method makes no distinction between loose and packed objects. 

228 """ 

229 return self.contains_loose(sha1) 

230 

231 @property 

232 def packs(self): 

233 """Iterable of pack objects.""" 

234 raise NotImplementedError 

235 

236 def get_raw(self, name) -> tuple[int, bytes]: 

237 """Obtain the raw text for an object. 

238 

239 Args: 

240 name: sha for the object. 

241 Returns: tuple with numeric type and object contents. 

242 """ 

243 raise NotImplementedError(self.get_raw) 

244 

245 def __getitem__(self, sha1: ObjectID) -> ShaFile: 

246 """Obtain an object by SHA1.""" 

247 type_num, uncomp = self.get_raw(sha1) 

248 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

249 

250 def __iter__(self): 

251 """Iterate over the SHAs that are present in this store.""" 

252 raise NotImplementedError(self.__iter__) 

253 

254 def add_object(self, obj) -> None: 

255 """Add a single object to this object store.""" 

256 raise NotImplementedError(self.add_object) 

257 

258 def add_objects(self, objects, progress=None) -> None: 

259 """Add a set of objects to this object store. 

260 

261 Args: 

262 objects: Iterable over a list of (object, path) tuples 

263 """ 

264 raise NotImplementedError(self.add_objects) 

265 

266 def tree_changes( 

267 self, 

268 source, 

269 target, 

270 want_unchanged=False, 

271 include_trees=False, 

272 change_type_same=False, 

273 rename_detector=None, 

274 paths=None, 

275 ): 

276 """Find the differences between the contents of two trees. 

277 

278 Args: 

279 source: SHA1 of the source tree 

280 target: SHA1 of the target tree 

281 want_unchanged: Whether unchanged files should be reported 

282 include_trees: Whether to include trees 

283 change_type_same: Whether to report files changing 

284 type in the same entry. 

285 rename_detector: RenameDetector object for detecting renames. 

286 paths: Optional list of paths to filter to (as bytes). 

287 Returns: Iterator over tuples with 

288 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

289 """ 

290 from .diff_tree import tree_changes 

291 

292 for change in tree_changes( 

293 self, 

294 source, 

295 target, 

296 want_unchanged=want_unchanged, 

297 include_trees=include_trees, 

298 change_type_same=change_type_same, 

299 rename_detector=rename_detector, 

300 paths=paths, 

301 ): 

302 yield ( 

303 (change.old.path, change.new.path), 

304 (change.old.mode, change.new.mode), 

305 (change.old.sha, change.new.sha), 

306 ) 

307 

308 def iter_tree_contents(self, tree_id, include_trees=False): 

309 """Iterate the contents of a tree and all subtrees. 

310 

311 Iteration is depth-first pre-order, as in e.g. os.walk. 

312 

313 Args: 

314 tree_id: SHA1 of the tree. 

315 include_trees: If True, include tree objects in the iteration. 

316 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

317 tree. 

318 """ 

319 warnings.warn( 

320 "Please use dulwich.object_store.iter_tree_contents", 

321 DeprecationWarning, 

322 stacklevel=2, 

323 ) 

324 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

325 

326 def iterobjects_subset( 

327 self, shas: Iterable[bytes], *, allow_missing: bool = False 

328 ) -> Iterator[ShaFile]: 

329 for sha in shas: 

330 try: 

331 yield self[sha] 

332 except KeyError: 

333 if not allow_missing: 

334 raise 

335 

336 def find_missing_objects( 

337 self, 

338 haves, 

339 wants, 

340 shallow=None, 

341 progress=None, 

342 get_tagged=None, 

343 get_parents=lambda commit: commit.parents, 

344 ): 

345 """Find the missing objects required for a set of revisions. 

346 

347 Args: 

348 haves: Iterable over SHAs already in common. 

349 wants: Iterable over SHAs of objects to fetch. 

350 shallow: Set of shallow commit SHA1s to skip 

351 progress: Simple progress function that will be called with 

352 updated progress strings. 

353 get_tagged: Function that returns a dict of pointed-to sha -> 

354 tag sha for including tags. 

355 get_parents: Optional function for getting the parents of a 

356 commit. 

357 Returns: Iterator over (sha, path) pairs. 

358 """ 

359 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

360 finder = MissingObjectFinder( 

361 self, 

362 haves=haves, 

363 wants=wants, 

364 shallow=shallow, 

365 progress=progress, 

366 get_tagged=get_tagged, 

367 get_parents=get_parents, 

368 ) 

369 return iter(finder) 

370 

371 def find_common_revisions(self, graphwalker): 

372 """Find which revisions this store has in common using graphwalker. 

373 

374 Args: 

375 graphwalker: A graphwalker object. 

376 Returns: List of SHAs that are in common 

377 """ 

378 haves = [] 

379 sha = next(graphwalker) 

380 while sha: 

381 if sha in self: 

382 haves.append(sha) 

383 graphwalker.ack(sha) 

384 sha = next(graphwalker) 

385 return haves 

386 

387 def generate_pack_data( 

388 self, have, want, shallow=None, progress=None, ofs_delta=True 

389 ) -> tuple[int, Iterator[UnpackedObject]]: 

390 """Generate pack data objects for a set of wants/haves. 

391 

392 Args: 

393 have: List of SHA1s of objects that should not be sent 

394 want: List of SHA1s of objects that should be sent 

395 shallow: Set of shallow commit SHA1s to skip 

396 ofs_delta: Whether OFS deltas can be included 

397 progress: Optional progress reporting method 

398 """ 

399 # Note that the pack-specific implementation below is more efficient, 

400 # as it reuses deltas 

401 missing_objects = MissingObjectFinder( 

402 self, haves=have, wants=want, shallow=shallow, progress=progress 

403 ) 

404 object_ids = list(missing_objects) 

405 return pack_objects_to_data( 

406 [(self[oid], path) for oid, path in object_ids], 

407 ofs_delta=ofs_delta, 

408 progress=progress, 

409 ) 

410 

411 def peel_sha(self, sha): 

412 """Peel all tags from a SHA. 

413 

414 Args: 

415 sha: The object SHA to peel. 

416 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

417 intermediate tags; if the original ref does not point to a tag, 

418 this will equal the original SHA1. 

419 """ 

420 warnings.warn( 

421 "Please use dulwich.object_store.peel_sha()", 

422 DeprecationWarning, 

423 stacklevel=2, 

424 ) 

425 return peel_sha(self, sha)[1] 

426 

427 def _get_depth( 

428 self, 

429 head, 

430 get_parents=lambda commit: commit.parents, 

431 max_depth=None, 

432 ): 

433 """Return the current available depth for the given head. 

434 For commits with multiple parents, the largest possible depth will be 

435 returned. 

436 

437 Args: 

438 head: commit to start from 

439 get_parents: optional function for getting the parents of a commit 

440 max_depth: maximum depth to search 

441 """ 

442 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

443 

444 def close(self) -> None: 

445 """Close any files opened by this object store.""" 

446 # Default implementation is a NO-OP 

447 

448 def prune(self, grace_period: Optional[int] = None) -> None: 

449 """Prune/clean up this object store. 

450 

451 This includes removing orphaned temporary files and other 

452 housekeeping tasks. Default implementation is a NO-OP. 

453 

454 Args: 

455 grace_period: Grace period in seconds for removing temporary files. 

456 If None, uses the default grace period. 

457 """ 

458 # Default implementation is a NO-OP 

459 

460 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

461 """Iterate over all SHA1s that start with a given prefix. 

462 

463 The default implementation is a naive iteration over all objects. 

464 However, subclasses may override this method with more efficient 

465 implementations. 

466 """ 

467 for sha in self: 

468 if sha.startswith(prefix): 

469 yield sha 

470 

471 def get_commit_graph(self): 

472 """Get the commit graph for this object store. 

473 

474 Returns: 

475 CommitGraph object if available, None otherwise 

476 """ 

477 return None 

478 

479 def write_commit_graph(self, refs=None, reachable=True) -> None: 

480 """Write a commit graph file for this object store. 

481 

482 Args: 

483 refs: List of refs to include. If None, includes all refs from object store. 

484 reachable: If True, includes all commits reachable from refs. 

485 If False, only includes the direct ref targets. 

486 

487 Note: 

488 Default implementation does nothing. Subclasses should override 

489 this method to provide commit graph writing functionality. 

490 """ 

491 raise NotImplementedError(self.write_commit_graph) 

492 

493 def get_object_mtime(self, sha): 

494 """Get the modification time of an object. 

495 

496 Args: 

497 sha: SHA1 of the object 

498 

499 Returns: 

500 Modification time as seconds since epoch 

501 

502 Raises: 

503 KeyError: if the object is not found 

504 """ 

505 # Default implementation raises KeyError 

506 # Subclasses should override to provide actual mtime 

507 raise KeyError(sha) 

508 

509 

510class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer): 

511 def __init__( 

512 self, 

513 pack_compression_level=-1, 

514 pack_index_version=None, 

515 pack_delta_window_size=None, 

516 pack_window_memory=None, 

517 pack_delta_cache_size=None, 

518 pack_depth=None, 

519 pack_threads=None, 

520 pack_big_file_threshold=None, 

521 ) -> None: 

522 self._pack_cache: dict[str, Pack] = {} 

523 self.pack_compression_level = pack_compression_level 

524 self.pack_index_version = pack_index_version 

525 self.pack_delta_window_size = pack_delta_window_size 

526 self.pack_window_memory = pack_window_memory 

527 self.pack_delta_cache_size = pack_delta_cache_size 

528 self.pack_depth = pack_depth 

529 self.pack_threads = pack_threads 

530 self.pack_big_file_threshold = pack_big_file_threshold 

531 

532 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

533 """Add a new pack to this object store.""" 

534 raise NotImplementedError(self.add_pack) 

535 

536 def add_pack_data( 

537 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

538 ) -> None: 

539 """Add pack data to this object store. 

540 

541 Args: 

542 count: Number of items to add 

543 """ 

544 if count == 0: 

545 # Don't bother writing an empty pack file 

546 return 

547 f, commit, abort = self.add_pack() 

548 try: 

549 write_pack_data( 

550 f.write, 

551 unpacked_objects, 

552 num_records=count, 

553 progress=progress, 

554 compression_level=self.pack_compression_level, 

555 ) 

556 except BaseException: 

557 abort() 

558 raise 

559 else: 

560 return commit() 

561 

562 @property 

563 def alternates(self): 

564 return [] 

565 

566 def contains_packed(self, sha) -> bool: 

567 """Check if a particular object is present by SHA1 and is packed. 

568 

569 This does not check alternates. 

570 """ 

571 for pack in self.packs: 

572 try: 

573 if sha in pack: 

574 return True 

575 except PackFileDisappeared: 

576 pass 

577 return False 

578 

579 def __contains__(self, sha) -> bool: 

580 """Check if a particular object is present by SHA1. 

581 

582 This method makes no distinction between loose and packed objects. 

583 """ 

584 if self.contains_packed(sha) or self.contains_loose(sha): 

585 return True 

586 for alternate in self.alternates: 

587 if sha in alternate: 

588 return True 

589 return False 

590 

591 def _add_cached_pack(self, base_name, pack) -> None: 

592 """Add a newly appeared pack to the cache by path.""" 

593 prev_pack = self._pack_cache.get(base_name) 

594 if prev_pack is not pack: 

595 self._pack_cache[base_name] = pack 

596 if prev_pack: 

597 prev_pack.close() 

598 

599 def generate_pack_data( 

600 self, have, want, shallow=None, progress=None, ofs_delta=True 

601 ) -> tuple[int, Iterator[UnpackedObject]]: 

602 """Generate pack data objects for a set of wants/haves. 

603 

604 Args: 

605 have: List of SHA1s of objects that should not be sent 

606 want: List of SHA1s of objects that should be sent 

607 shallow: Set of shallow commit SHA1s to skip 

608 ofs_delta: Whether OFS deltas can be included 

609 progress: Optional progress reporting method 

610 """ 

611 missing_objects = MissingObjectFinder( 

612 self, haves=have, wants=want, shallow=shallow, progress=progress 

613 ) 

614 remote_has = missing_objects.get_remote_has() 

615 object_ids = list(missing_objects) 

616 return len(object_ids), generate_unpacked_objects( 

617 cast(PackedObjectContainer, self), 

618 object_ids, 

619 progress=progress, 

620 ofs_delta=ofs_delta, 

621 other_haves=remote_has, 

622 ) 

623 

624 def _clear_cached_packs(self) -> None: 

625 pack_cache = self._pack_cache 

626 self._pack_cache = {} 

627 while pack_cache: 

628 (name, pack) = pack_cache.popitem() 

629 pack.close() 

630 

631 def _iter_cached_packs(self): 

632 return self._pack_cache.values() 

633 

634 def _update_pack_cache(self) -> list[Pack]: 

635 raise NotImplementedError(self._update_pack_cache) 

636 

637 def close(self) -> None: 

638 self._clear_cached_packs() 

639 

640 @property 

641 def packs(self): 

642 """List with pack objects.""" 

643 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

644 

645 def count_pack_files(self) -> int: 

646 """Count the number of pack files. 

647 

648 Returns: 

649 Number of pack files (excluding those with .keep files) 

650 """ 

651 count = 0 

652 for pack in self.packs: 

653 # Check if there's a .keep file for this pack 

654 keep_path = pack._basename + ".keep" 

655 if not os.path.exists(keep_path): 

656 count += 1 

657 return count 

658 

659 def _iter_alternate_objects(self): 

660 """Iterate over the SHAs of all the objects in alternate stores.""" 

661 for alternate in self.alternates: 

662 yield from alternate 

663 

664 def _iter_loose_objects(self): 

665 """Iterate over the SHAs of all loose objects.""" 

666 raise NotImplementedError(self._iter_loose_objects) 

667 

668 def _get_loose_object(self, sha) -> Optional[ShaFile]: 

669 raise NotImplementedError(self._get_loose_object) 

670 

671 def delete_loose_object(self, sha) -> None: 

672 """Delete a loose object. 

673 

674 This method only handles loose objects. For packed objects, 

675 use repack(exclude=...) to exclude them during repacking. 

676 """ 

677 raise NotImplementedError(self.delete_loose_object) 

678 

679 def _remove_pack(self, name) -> None: 

680 raise NotImplementedError(self._remove_pack) 

681 

682 def pack_loose_objects(self): 

683 """Pack loose objects. 

684 

685 Returns: Number of objects packed 

686 """ 

687 objects = set() 

688 for sha in self._iter_loose_objects(): 

689 objects.add((self._get_loose_object(sha), None)) 

690 self.add_objects(list(objects)) 

691 for obj, path in objects: 

692 self.delete_loose_object(obj.id) 

693 return len(objects) 

694 

695 def repack(self, exclude=None): 

696 """Repack the packs in this repository. 

697 

698 Note that this implementation is fairly naive and currently keeps all 

699 objects in memory while it repacks. 

700 

701 Args: 

702 exclude: Optional set of object SHAs to exclude from repacking 

703 """ 

704 if exclude is None: 

705 exclude = set() 

706 

707 loose_objects = set() 

708 excluded_loose_objects = set() 

709 for sha in self._iter_loose_objects(): 

710 if sha not in exclude: 

711 loose_objects.add(self._get_loose_object(sha)) 

712 else: 

713 excluded_loose_objects.add(sha) 

714 

715 objects = {(obj, None) for obj in loose_objects} 

716 old_packs = {p.name(): p for p in self.packs} 

717 for name, pack in old_packs.items(): 

718 objects.update( 

719 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

720 ) 

721 

722 # Only create a new pack if there are objects to pack 

723 if objects: 

724 # The name of the consolidated pack might match the name of a 

725 # pre-existing pack. Take care not to remove the newly created 

726 # consolidated pack. 

727 consolidated = self.add_objects(objects) 

728 old_packs.pop(consolidated.name(), None) 

729 

730 # Delete loose objects that were packed 

731 for obj in loose_objects: 

732 self.delete_loose_object(obj.id) 

733 # Delete excluded loose objects 

734 for sha in excluded_loose_objects: 

735 self.delete_loose_object(sha) 

736 for name, pack in old_packs.items(): 

737 self._remove_pack(pack) 

738 self._update_pack_cache() 

739 return len(objects) 

740 

741 def __iter__(self): 

742 """Iterate over the SHAs that are present in this store.""" 

743 self._update_pack_cache() 

744 for pack in self._iter_cached_packs(): 

745 try: 

746 yield from pack 

747 except PackFileDisappeared: 

748 pass 

749 yield from self._iter_loose_objects() 

750 yield from self._iter_alternate_objects() 

751 

752 def contains_loose(self, sha): 

753 """Check if a particular object is present by SHA1 and is loose. 

754 

755 This does not check alternates. 

756 """ 

757 return self._get_loose_object(sha) is not None 

758 

759 def get_raw(self, name): 

760 """Obtain the raw fulltext for an object. 

761 

762 Args: 

763 name: sha for the object. 

764 Returns: tuple with numeric type and object contents. 

765 """ 

766 if name == ZERO_SHA: 

767 raise KeyError(name) 

768 if len(name) == 40: 

769 sha = hex_to_sha(name) 

770 hexsha = name 

771 elif len(name) == 20: 

772 sha = name 

773 hexsha = None 

774 else: 

775 raise AssertionError(f"Invalid object name {name!r}") 

776 for pack in self._iter_cached_packs(): 

777 try: 

778 return pack.get_raw(sha) 

779 except (KeyError, PackFileDisappeared): 

780 pass 

781 if hexsha is None: 

782 hexsha = sha_to_hex(name) 

783 ret = self._get_loose_object(hexsha) 

784 if ret is not None: 

785 return ret.type_num, ret.as_raw_string() 

786 # Maybe something else has added a pack with the object 

787 # in the mean time? 

788 for pack in self._update_pack_cache(): 

789 try: 

790 return pack.get_raw(sha) 

791 except KeyError: 

792 pass 

793 for alternate in self.alternates: 

794 try: 

795 return alternate.get_raw(hexsha) 

796 except KeyError: 

797 pass 

798 raise KeyError(hexsha) 

799 

800 def iter_unpacked_subset( 

801 self, 

802 shas: set[bytes], 

803 include_comp: bool = False, 

804 allow_missing: bool = False, 

805 convert_ofs_delta: bool = True, 

806 ) -> Iterator[UnpackedObject]: 

807 todo: set[bytes] = set(shas) 

808 for p in self._iter_cached_packs(): 

809 for unpacked in p.iter_unpacked_subset( 

810 todo, 

811 include_comp=include_comp, 

812 allow_missing=True, 

813 convert_ofs_delta=convert_ofs_delta, 

814 ): 

815 yield unpacked 

816 hexsha = sha_to_hex(unpacked.sha()) 

817 todo.remove(hexsha) 

818 # Maybe something else has added a pack with the object 

819 # in the mean time? 

820 for p in self._update_pack_cache(): 

821 for unpacked in p.iter_unpacked_subset( 

822 todo, 

823 include_comp=include_comp, 

824 allow_missing=True, 

825 convert_ofs_delta=convert_ofs_delta, 

826 ): 

827 yield unpacked 

828 hexsha = sha_to_hex(unpacked.sha()) 

829 todo.remove(hexsha) 

830 for alternate in self.alternates: 

831 for unpacked in alternate.iter_unpacked_subset( 

832 todo, 

833 include_comp=include_comp, 

834 allow_missing=True, 

835 convert_ofs_delta=convert_ofs_delta, 

836 ): 

837 yield unpacked 

838 hexsha = sha_to_hex(unpacked.sha()) 

839 todo.remove(hexsha) 

840 

841 def iterobjects_subset( 

842 self, shas: Iterable[bytes], *, allow_missing: bool = False 

843 ) -> Iterator[ShaFile]: 

844 todo: set[bytes] = set(shas) 

845 for p in self._iter_cached_packs(): 

846 for o in p.iterobjects_subset(todo, allow_missing=True): 

847 yield o 

848 todo.remove(o.id) 

849 # Maybe something else has added a pack with the object 

850 # in the mean time? 

851 for p in self._update_pack_cache(): 

852 for o in p.iterobjects_subset(todo, allow_missing=True): 

853 yield o 

854 todo.remove(o.id) 

855 for alternate in self.alternates: 

856 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

857 yield o 

858 todo.remove(o.id) 

859 for oid in todo: 

860 o = self._get_loose_object(oid) 

861 if o is not None: 

862 yield o 

863 elif not allow_missing: 

864 raise KeyError(oid) 

865 

866 def get_unpacked_object( 

867 self, sha1: bytes, *, include_comp: bool = False 

868 ) -> UnpackedObject: 

869 """Obtain the unpacked object. 

870 

871 Args: 

872 sha1: sha for the object. 

873 """ 

874 if sha1 == ZERO_SHA: 

875 raise KeyError(sha1) 

876 if len(sha1) == 40: 

877 sha = hex_to_sha(sha1) 

878 hexsha = sha1 

879 elif len(sha1) == 20: 

880 sha = sha1 

881 hexsha = None 

882 else: 

883 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

884 for pack in self._iter_cached_packs(): 

885 try: 

886 return pack.get_unpacked_object(sha, include_comp=include_comp) 

887 except (KeyError, PackFileDisappeared): 

888 pass 

889 if hexsha is None: 

890 hexsha = sha_to_hex(sha1) 

891 # Maybe something else has added a pack with the object 

892 # in the mean time? 

893 for pack in self._update_pack_cache(): 

894 try: 

895 return pack.get_unpacked_object(sha, include_comp=include_comp) 

896 except KeyError: 

897 pass 

898 for alternate in self.alternates: 

899 try: 

900 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

901 except KeyError: 

902 pass 

903 raise KeyError(hexsha) 

904 

905 def add_objects( 

906 self, 

907 objects: Sequence[tuple[ShaFile, Optional[str]]], 

908 progress: Optional[Callable[[str], None]] = None, 

909 ) -> None: 

910 """Add a set of objects to this object store. 

911 

912 Args: 

913 objects: Iterable over (object, path) tuples, should support 

914 __len__. 

915 Returns: Pack object of the objects written. 

916 """ 

917 count = len(objects) 

918 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

919 return self.add_pack_data(count, record_iter, progress=progress) 

920 

921 

922class DiskObjectStore(PackBasedObjectStore): 

923 """Git-style object store that exists on disk.""" 

924 

925 path: Union[str, os.PathLike] 

926 pack_dir: Union[str, os.PathLike] 

927 

928 def __init__( 

929 self, 

930 path: Union[str, os.PathLike], 

931 loose_compression_level=-1, 

932 pack_compression_level=-1, 

933 pack_index_version=None, 

934 pack_delta_window_size=None, 

935 pack_window_memory=None, 

936 pack_delta_cache_size=None, 

937 pack_depth=None, 

938 pack_threads=None, 

939 pack_big_file_threshold=None, 

940 ) -> None: 

941 """Open an object store. 

942 

943 Args: 

944 path: Path of the object store. 

945 loose_compression_level: zlib compression level for loose objects 

946 pack_compression_level: zlib compression level for pack objects 

947 pack_index_version: pack index version to use (1, 2, or 3) 

948 pack_delta_window_size: sliding window size for delta compression 

949 pack_window_memory: memory limit for delta window operations 

950 pack_delta_cache_size: size of cache for delta operations 

951 pack_depth: maximum delta chain depth 

952 pack_threads: number of threads for pack operations 

953 pack_big_file_threshold: threshold for treating files as big 

954 """ 

955 super().__init__( 

956 pack_compression_level=pack_compression_level, 

957 pack_index_version=pack_index_version, 

958 pack_delta_window_size=pack_delta_window_size, 

959 pack_window_memory=pack_window_memory, 

960 pack_delta_cache_size=pack_delta_cache_size, 

961 pack_depth=pack_depth, 

962 pack_threads=pack_threads, 

963 pack_big_file_threshold=pack_big_file_threshold, 

964 ) 

965 self.path = path 

966 self.pack_dir = os.path.join(self.path, PACKDIR) 

967 self._alternates = None 

968 self.loose_compression_level = loose_compression_level 

969 self.pack_compression_level = pack_compression_level 

970 self.pack_index_version = pack_index_version 

971 

972 # Commit graph support - lazy loaded 

973 self._commit_graph = None 

974 self._use_commit_graph = True # Default to true 

975 

976 def __repr__(self) -> str: 

977 return f"<{self.__class__.__name__}({self.path!r})>" 

978 

979 @classmethod 

980 def from_config(cls, path: Union[str, os.PathLike], config): 

981 try: 

982 default_compression_level = int( 

983 config.get((b"core",), b"compression").decode() 

984 ) 

985 except KeyError: 

986 default_compression_level = -1 

987 try: 

988 loose_compression_level = int( 

989 config.get((b"core",), b"looseCompression").decode() 

990 ) 

991 except KeyError: 

992 loose_compression_level = default_compression_level 

993 try: 

994 pack_compression_level = int( 

995 config.get((b"core",), "packCompression").decode() 

996 ) 

997 except KeyError: 

998 pack_compression_level = default_compression_level 

999 try: 

1000 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

1001 except KeyError: 

1002 pack_index_version = None 

1003 

1004 # Read pack configuration options 

1005 try: 

1006 pack_delta_window_size = int( 

1007 config.get((b"pack",), b"deltaWindowSize").decode() 

1008 ) 

1009 except KeyError: 

1010 pack_delta_window_size = None 

1011 try: 

1012 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode()) 

1013 except KeyError: 

1014 pack_window_memory = None 

1015 try: 

1016 pack_delta_cache_size = int( 

1017 config.get((b"pack",), b"deltaCacheSize").decode() 

1018 ) 

1019 except KeyError: 

1020 pack_delta_cache_size = None 

1021 try: 

1022 pack_depth = int(config.get((b"pack",), b"depth").decode()) 

1023 except KeyError: 

1024 pack_depth = None 

1025 try: 

1026 pack_threads = int(config.get((b"pack",), b"threads").decode()) 

1027 except KeyError: 

1028 pack_threads = None 

1029 try: 

1030 pack_big_file_threshold = int( 

1031 config.get((b"pack",), b"bigFileThreshold").decode() 

1032 ) 

1033 except KeyError: 

1034 pack_big_file_threshold = None 

1035 

1036 # Read core.commitGraph setting 

1037 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

1038 

1039 instance = cls( 

1040 path, 

1041 loose_compression_level, 

1042 pack_compression_level, 

1043 pack_index_version, 

1044 pack_delta_window_size, 

1045 pack_window_memory, 

1046 pack_delta_cache_size, 

1047 pack_depth, 

1048 pack_threads, 

1049 pack_big_file_threshold, 

1050 ) 

1051 instance._use_commit_graph = use_commit_graph 

1052 return instance 

1053 

1054 @property 

1055 def alternates(self): 

1056 if self._alternates is not None: 

1057 return self._alternates 

1058 self._alternates = [] 

1059 for path in self._read_alternate_paths(): 

1060 self._alternates.append(DiskObjectStore(path)) 

1061 return self._alternates 

1062 

1063 def _read_alternate_paths(self): 

1064 try: 

1065 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

1066 except FileNotFoundError: 

1067 return 

1068 with f: 

1069 for line in f.readlines(): 

1070 line = line.rstrip(b"\n") 

1071 if line.startswith(b"#"): 

1072 continue 

1073 if os.path.isabs(line): 

1074 yield os.fsdecode(line) 

1075 else: 

1076 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1077 

1078 def add_alternate_path(self, path) -> None: 

1079 """Add an alternate path to this object store.""" 

1080 try: 

1081 os.mkdir(os.path.join(self.path, INFODIR)) 

1082 except FileExistsError: 

1083 pass 

1084 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1085 with GitFile(alternates_path, "wb") as f: 

1086 try: 

1087 orig_f = open(alternates_path, "rb") 

1088 except FileNotFoundError: 

1089 pass 

1090 else: 

1091 with orig_f: 

1092 f.write(orig_f.read()) 

1093 f.write(os.fsencode(path) + b"\n") 

1094 

1095 if not os.path.isabs(path): 

1096 path = os.path.join(self.path, path) 

1097 self.alternates.append(DiskObjectStore(path)) 

1098 

1099 def _update_pack_cache(self): 

1100 """Read and iterate over new pack files and cache them.""" 

1101 try: 

1102 pack_dir_contents = os.listdir(self.pack_dir) 

1103 except FileNotFoundError: 

1104 self.close() 

1105 return [] 

1106 pack_files = set() 

1107 for name in pack_dir_contents: 

1108 if name.startswith("pack-") and name.endswith(".pack"): 

1109 # verify that idx exists first (otherwise the pack was not yet 

1110 # fully written) 

1111 idx_name = os.path.splitext(name)[0] + ".idx" 

1112 if idx_name in pack_dir_contents: 

1113 pack_name = name[: -len(".pack")] 

1114 pack_files.add(pack_name) 

1115 

1116 # Open newly appeared pack files 

1117 new_packs = [] 

1118 for f in pack_files: 

1119 if f not in self._pack_cache: 

1120 pack = Pack( 

1121 os.path.join(self.pack_dir, f), 

1122 delta_window_size=self.pack_delta_window_size, 

1123 window_memory=self.pack_window_memory, 

1124 delta_cache_size=self.pack_delta_cache_size, 

1125 depth=self.pack_depth, 

1126 threads=self.pack_threads, 

1127 big_file_threshold=self.pack_big_file_threshold, 

1128 ) 

1129 new_packs.append(pack) 

1130 self._pack_cache[f] = pack 

1131 # Remove disappeared pack files 

1132 for f in set(self._pack_cache) - pack_files: 

1133 self._pack_cache.pop(f).close() 

1134 return new_packs 

1135 

1136 def _get_shafile_path(self, sha): 

1137 # Check from object dir 

1138 return hex_to_filename(self.path, sha) 

1139 

1140 def _iter_loose_objects(self): 

1141 for base in os.listdir(self.path): 

1142 if len(base) != 2: 

1143 continue 

1144 for rest in os.listdir(os.path.join(self.path, base)): 

1145 sha = os.fsencode(base + rest) 

1146 if not valid_hexsha(sha): 

1147 continue 

1148 yield sha 

1149 

1150 def count_loose_objects(self) -> int: 

1151 """Count the number of loose objects in the object store. 

1152 

1153 Returns: 

1154 Number of loose objects 

1155 """ 

1156 count = 0 

1157 if not os.path.exists(self.path): 

1158 return 0 

1159 

1160 for i in range(256): 

1161 subdir = os.path.join(self.path, f"{i:02x}") 

1162 try: 

1163 count += len( 

1164 [ 

1165 name 

1166 for name in os.listdir(subdir) 

1167 if len(name) == 38 # 40 - 2 for the prefix 

1168 ] 

1169 ) 

1170 except FileNotFoundError: 

1171 # Directory may have been removed or is inaccessible 

1172 continue 

1173 

1174 return count 

1175 

1176 def _get_loose_object(self, sha): 

1177 path = self._get_shafile_path(sha) 

1178 try: 

1179 return ShaFile.from_path(path) 

1180 except FileNotFoundError: 

1181 return None 

1182 

1183 def delete_loose_object(self, sha) -> None: 

1184 os.remove(self._get_shafile_path(sha)) 

1185 

1186 def get_object_mtime(self, sha): 

1187 """Get the modification time of an object. 

1188 

1189 Args: 

1190 sha: SHA1 of the object 

1191 

1192 Returns: 

1193 Modification time as seconds since epoch 

1194 

1195 Raises: 

1196 KeyError: if the object is not found 

1197 """ 

1198 # First check if it's a loose object 

1199 if self.contains_loose(sha): 

1200 path = self._get_shafile_path(sha) 

1201 try: 

1202 return os.path.getmtime(path) 

1203 except FileNotFoundError: 

1204 pass 

1205 

1206 # Check if it's in a pack file 

1207 for pack in self.packs: 

1208 try: 

1209 if sha in pack: 

1210 # Use the pack file's mtime for packed objects 

1211 pack_path = pack._data_path 

1212 try: 

1213 return os.path.getmtime(pack_path) 

1214 except (FileNotFoundError, AttributeError): 

1215 pass 

1216 except PackFileDisappeared: 

1217 pass 

1218 

1219 raise KeyError(sha) 

1220 

1221 def _remove_pack(self, pack) -> None: 

1222 try: 

1223 del self._pack_cache[os.path.basename(pack._basename)] 

1224 except KeyError: 

1225 pass 

1226 pack.close() 

1227 os.remove(pack.data.path) 

1228 os.remove(pack.index.path) 

1229 

1230 def _get_pack_basepath(self, entries): 

1231 suffix = iter_sha1(entry[0] for entry in entries) 

1232 # TODO: Handle self.pack_dir being bytes 

1233 suffix = suffix.decode("ascii") 

1234 return os.path.join(self.pack_dir, "pack-" + suffix) 

1235 

1236 def _complete_pack(self, f, path, num_objects, indexer, progress=None): 

1237 """Move a specific file containing a pack into the pack directory. 

1238 

1239 Note: The file should be on the same file system as the 

1240 packs directory. 

1241 

1242 Args: 

1243 f: Open file object for the pack. 

1244 path: Path to the pack file. 

1245 indexer: A PackIndexer for indexing the pack. 

1246 """ 

1247 entries = [] 

1248 for i, entry in enumerate(indexer): 

1249 if progress is not None: 

1250 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1251 entries.append(entry) 

1252 

1253 pack_sha, extra_entries = extend_pack( 

1254 f, 

1255 indexer.ext_refs(), 

1256 get_raw=self.get_raw, 

1257 compression_level=self.pack_compression_level, 

1258 progress=progress, 

1259 ) 

1260 f.flush() 

1261 try: 

1262 fileno = f.fileno() 

1263 except AttributeError: 

1264 pass 

1265 else: 

1266 os.fsync(fileno) 

1267 f.close() 

1268 

1269 entries.extend(extra_entries) 

1270 

1271 # Move the pack in. 

1272 entries.sort() 

1273 pack_base_name = self._get_pack_basepath(entries) 

1274 

1275 for pack in self.packs: 

1276 if pack._basename == pack_base_name: 

1277 return pack 

1278 

1279 target_pack_path = pack_base_name + ".pack" 

1280 target_index_path = pack_base_name + ".idx" 

1281 if sys.platform == "win32": 

1282 # Windows might have the target pack file lingering. Attempt 

1283 # removal, silently passing if the target does not exist. 

1284 with suppress(FileNotFoundError): 

1285 os.remove(target_pack_path) 

1286 os.rename(path, target_pack_path) 

1287 

1288 # Write the index. 

1289 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file: 

1290 write_pack_index( 

1291 index_file, entries, pack_sha, version=self.pack_index_version 

1292 ) 

1293 

1294 # Add the pack to the store and return it. 

1295 final_pack = Pack( 

1296 pack_base_name, 

1297 delta_window_size=self.pack_delta_window_size, 

1298 window_memory=self.pack_window_memory, 

1299 delta_cache_size=self.pack_delta_cache_size, 

1300 depth=self.pack_depth, 

1301 threads=self.pack_threads, 

1302 big_file_threshold=self.pack_big_file_threshold, 

1303 ) 

1304 final_pack.check_length_and_checksum() 

1305 self._add_cached_pack(pack_base_name, final_pack) 

1306 return final_pack 

1307 

1308 def add_thin_pack(self, read_all, read_some, progress=None): 

1309 """Add a new thin pack to this object store. 

1310 

1311 Thin packs are packs that contain deltas with parents that exist 

1312 outside the pack. They should never be placed in the object store 

1313 directly, and always indexed and completed as they are copied. 

1314 

1315 Args: 

1316 read_all: Read function that blocks until the number of 

1317 requested bytes are read. 

1318 read_some: Read function that returns at least one byte, but may 

1319 not return the number of bytes requested. 

1320 Returns: A Pack object pointing at the now-completed thin pack in the 

1321 objects/pack directory. 

1322 """ 

1323 import tempfile 

1324 

1325 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1326 with os.fdopen(fd, "w+b") as f: 

1327 os.chmod(path, PACK_MODE) 

1328 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) 

1329 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) 

1330 copier.verify(progress=progress) 

1331 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

1332 

1333 def add_pack(self): 

1334 """Add a new pack to this object store. 

1335 

1336 Returns: Fileobject to write to, a commit function to 

1337 call when the pack is finished and an abort 

1338 function. 

1339 """ 

1340 import tempfile 

1341 

1342 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

1343 f = os.fdopen(fd, "w+b") 

1344 os.chmod(path, PACK_MODE) 

1345 

1346 def commit(): 

1347 if f.tell() > 0: 

1348 f.seek(0) 

1349 with PackData(path, f) as pd: 

1350 indexer = PackIndexer.for_pack_data( 

1351 pd, resolve_ext_ref=self.get_raw 

1352 ) 

1353 return self._complete_pack(f, path, len(pd), indexer) 

1354 else: 

1355 f.close() 

1356 os.remove(path) 

1357 return None 

1358 

1359 def abort() -> None: 

1360 f.close() 

1361 os.remove(path) 

1362 

1363 return f, commit, abort 

1364 

1365 def add_object(self, obj) -> None: 

1366 """Add a single object to this object store. 

1367 

1368 Args: 

1369 obj: Object to add 

1370 """ 

1371 path = self._get_shafile_path(obj.id) 

1372 dir = os.path.dirname(path) 

1373 try: 

1374 os.mkdir(dir) 

1375 except FileExistsError: 

1376 pass 

1377 if os.path.exists(path): 

1378 return # Already there, no need to write again 

1379 with GitFile(path, "wb", mask=PACK_MODE) as f: 

1380 f.write( 

1381 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1382 ) 

1383 

1384 @classmethod 

1385 def init(cls, path: Union[str, os.PathLike]): 

1386 try: 

1387 os.mkdir(path) 

1388 except FileExistsError: 

1389 pass 

1390 os.mkdir(os.path.join(path, "info")) 

1391 os.mkdir(os.path.join(path, PACKDIR)) 

1392 return cls(path) 

1393 

1394 def iter_prefix(self, prefix): 

1395 if len(prefix) < 2: 

1396 yield from super().iter_prefix(prefix) 

1397 return 

1398 seen = set() 

1399 dir = prefix[:2].decode() 

1400 rest = prefix[2:].decode() 

1401 try: 

1402 for name in os.listdir(os.path.join(self.path, dir)): 

1403 if name.startswith(rest): 

1404 sha = os.fsencode(dir + name) 

1405 if sha not in seen: 

1406 seen.add(sha) 

1407 yield sha 

1408 except FileNotFoundError: 

1409 pass 

1410 

1411 for p in self.packs: 

1412 bin_prefix = ( 

1413 binascii.unhexlify(prefix) 

1414 if len(prefix) % 2 == 0 

1415 else binascii.unhexlify(prefix[:-1]) 

1416 ) 

1417 for sha in p.index.iter_prefix(bin_prefix): 

1418 sha = sha_to_hex(sha) 

1419 if sha.startswith(prefix) and sha not in seen: 

1420 seen.add(sha) 

1421 yield sha 

1422 for alternate in self.alternates: 

1423 for sha in alternate.iter_prefix(prefix): 

1424 if sha not in seen: 

1425 seen.add(sha) 

1426 yield sha 

1427 

1428 def get_commit_graph(self): 

1429 """Get the commit graph for this object store. 

1430 

1431 Returns: 

1432 CommitGraph object if available, None otherwise 

1433 """ 

1434 if not self._use_commit_graph: 

1435 return None 

1436 

1437 if self._commit_graph is None: 

1438 from .commit_graph import read_commit_graph 

1439 

1440 # Look for commit graph in our objects directory 

1441 graph_file = os.path.join(self.path, "info", "commit-graph") 

1442 if os.path.exists(graph_file): 

1443 self._commit_graph = read_commit_graph(graph_file) 

1444 return self._commit_graph 

1445 

1446 def write_commit_graph(self, refs=None, reachable=True) -> None: 

1447 """Write a commit graph file for this object store. 

1448 

1449 Args: 

1450 refs: List of refs to include. If None, includes all refs from object store. 

1451 reachable: If True, includes all commits reachable from refs. 

1452 If False, only includes the direct ref targets. 

1453 """ 

1454 from .commit_graph import get_reachable_commits 

1455 

1456 if refs is None: 

1457 # Get all commit objects from the object store 

1458 all_refs = [] 

1459 # Iterate through all objects to find commits 

1460 for sha in self: 

1461 try: 

1462 obj = self[sha] 

1463 if obj.type_name == b"commit": 

1464 all_refs.append(sha) 

1465 except KeyError: 

1466 continue 

1467 else: 

1468 # Use provided refs 

1469 all_refs = refs 

1470 

1471 if not all_refs: 

1472 return # No commits to include 

1473 

1474 if reachable: 

1475 # Get all reachable commits 

1476 commit_ids = get_reachable_commits(self, all_refs) 

1477 else: 

1478 # Just use the direct ref targets - ensure they're hex ObjectIDs 

1479 commit_ids = [] 

1480 for ref in all_refs: 

1481 if isinstance(ref, bytes) and len(ref) == 40: 

1482 # Already hex ObjectID 

1483 commit_ids.append(ref) 

1484 elif isinstance(ref, bytes) and len(ref) == 20: 

1485 # Binary SHA, convert to hex ObjectID 

1486 from .objects import sha_to_hex 

1487 

1488 commit_ids.append(sha_to_hex(ref)) 

1489 else: 

1490 # Assume it's already correct format 

1491 commit_ids.append(ref) 

1492 

1493 if commit_ids: 

1494 # Write commit graph directly to our object store path 

1495 # Generate the commit graph 

1496 from .commit_graph import generate_commit_graph 

1497 

1498 graph = generate_commit_graph(self, commit_ids) 

1499 

1500 if graph.entries: 

1501 # Ensure the info directory exists 

1502 info_dir = os.path.join(self.path, "info") 

1503 os.makedirs(info_dir, exist_ok=True) 

1504 

1505 # Write using GitFile for atomic operation 

1506 graph_path = os.path.join(info_dir, "commit-graph") 

1507 with GitFile(graph_path, "wb") as f: 

1508 graph.write_to_file(f) 

1509 

1510 # Clear cached commit graph so it gets reloaded 

1511 self._commit_graph = None 

1512 

1513 def prune(self, grace_period: Optional[int] = None) -> None: 

1514 """Prune/clean up this object store. 

1515 

1516 This removes temporary files that were left behind by interrupted 

1517 pack operations. These are files that start with ``tmp_pack_`` in the 

1518 repository directory or files with .pack extension but no corresponding 

1519 .idx file in the pack directory. 

1520 

1521 Args: 

1522 grace_period: Grace period in seconds for removing temporary files. 

1523 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

1524 """ 

1525 import glob 

1526 

1527 if grace_period is None: 

1528 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

1529 

1530 # Clean up tmp_pack_* files in the repository directory 

1531 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

1532 # Check if file is old enough (more than grace period) 

1533 mtime = os.path.getmtime(tmp_file) 

1534 if time.time() - mtime > grace_period: 

1535 os.remove(tmp_file) 

1536 

1537 # Clean up orphaned .pack files without corresponding .idx files 

1538 try: 

1539 pack_dir_contents = os.listdir(self.pack_dir) 

1540 except FileNotFoundError: 

1541 return 

1542 

1543 pack_files = {} 

1544 idx_files = set() 

1545 

1546 for name in pack_dir_contents: 

1547 if name.endswith(".pack"): 

1548 base_name = name[:-5] # Remove .pack extension 

1549 pack_files[base_name] = name 

1550 elif name.endswith(".idx"): 

1551 base_name = name[:-4] # Remove .idx extension 

1552 idx_files.add(base_name) 

1553 

1554 # Remove .pack files without corresponding .idx files 

1555 for base_name, pack_name in pack_files.items(): 

1556 if base_name not in idx_files: 

1557 pack_path = os.path.join(self.pack_dir, pack_name) 

1558 # Check if file is old enough (more than grace period) 

1559 mtime = os.path.getmtime(pack_path) 

1560 if time.time() - mtime > grace_period: 

1561 os.remove(pack_path) 

1562 

1563 

1564class MemoryObjectStore(BaseObjectStore): 

1565 """Object store that keeps all objects in memory.""" 

1566 

1567 def __init__(self) -> None: 

1568 super().__init__() 

1569 self._data: dict[str, ShaFile] = {} 

1570 self.pack_compression_level = -1 

1571 

1572 def _to_hexsha(self, sha): 

1573 if len(sha) == 40: 

1574 return sha 

1575 elif len(sha) == 20: 

1576 return sha_to_hex(sha) 

1577 else: 

1578 raise ValueError(f"Invalid sha {sha!r}") 

1579 

1580 def contains_loose(self, sha): 

1581 """Check if a particular object is present by SHA1 and is loose.""" 

1582 return self._to_hexsha(sha) in self._data 

1583 

1584 def contains_packed(self, sha) -> bool: 

1585 """Check if a particular object is present by SHA1 and is packed.""" 

1586 return False 

1587 

1588 def __iter__(self): 

1589 """Iterate over the SHAs that are present in this store.""" 

1590 return iter(self._data.keys()) 

1591 

1592 @property 

1593 def packs(self): 

1594 """List with pack objects.""" 

1595 return [] 

1596 

1597 def get_raw(self, name: ObjectID): 

1598 """Obtain the raw text for an object. 

1599 

1600 Args: 

1601 name: sha for the object. 

1602 Returns: tuple with numeric type and object contents. 

1603 """ 

1604 obj = self[self._to_hexsha(name)] 

1605 return obj.type_num, obj.as_raw_string() 

1606 

1607 def __getitem__(self, name: ObjectID): 

1608 return self._data[self._to_hexsha(name)].copy() 

1609 

1610 def __delitem__(self, name: ObjectID) -> None: 

1611 """Delete an object from this store, for testing only.""" 

1612 del self._data[self._to_hexsha(name)] 

1613 

1614 def add_object(self, obj) -> None: 

1615 """Add a single object to this object store.""" 

1616 self._data[obj.id] = obj.copy() 

1617 

1618 def add_objects(self, objects, progress=None) -> None: 

1619 """Add a set of objects to this object store. 

1620 

1621 Args: 

1622 objects: Iterable over a list of (object, path) tuples 

1623 """ 

1624 for obj, path in objects: 

1625 self.add_object(obj) 

1626 

1627 def add_pack(self): 

1628 """Add a new pack to this object store. 

1629 

1630 Because this object store doesn't support packs, we extract and add the 

1631 individual objects. 

1632 

1633 Returns: Fileobject to write to and a commit function to 

1634 call when the pack is finished. 

1635 """ 

1636 from tempfile import SpooledTemporaryFile 

1637 

1638 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

1639 

1640 def commit() -> None: 

1641 size = f.tell() 

1642 if size > 0: 

1643 f.seek(0) 

1644 p = PackData.from_file(f, size) 

1645 for obj in PackInflater.for_pack_data(p, self.get_raw): 

1646 self.add_object(obj) 

1647 p.close() 

1648 f.close() 

1649 else: 

1650 f.close() 

1651 

1652 def abort() -> None: 

1653 f.close() 

1654 

1655 return f, commit, abort 

1656 

1657 def add_pack_data( 

1658 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

1659 ) -> None: 

1660 """Add pack data to this object store. 

1661 

1662 Args: 

1663 count: Number of items to add 

1664 """ 

1665 if count == 0: 

1666 return 

1667 

1668 # Since MemoryObjectStore doesn't support pack files, we need to 

1669 # extract individual objects. To handle deltas properly, we write 

1670 # to a temporary pack and then use PackInflater to resolve them. 

1671 f, commit, abort = self.add_pack() 

1672 try: 

1673 write_pack_data( 

1674 f.write, 

1675 unpacked_objects, 

1676 num_records=count, 

1677 progress=progress, 

1678 ) 

1679 except BaseException: 

1680 abort() 

1681 raise 

1682 else: 

1683 commit() 

1684 

1685 def add_thin_pack(self, read_all, read_some, progress=None) -> None: 

1686 """Add a new thin pack to this object store. 

1687 

1688 Thin packs are packs that contain deltas with parents that exist 

1689 outside the pack. Because this object store doesn't support packs, we 

1690 extract and add the individual objects. 

1691 

1692 Args: 

1693 read_all: Read function that blocks until the number of 

1694 requested bytes are read. 

1695 read_some: Read function that returns at least one byte, but may 

1696 not return the number of bytes requested. 

1697 """ 

1698 f, commit, abort = self.add_pack() 

1699 try: 

1700 copier = PackStreamCopier(read_all, read_some, f) 

1701 copier.verify() 

1702 except BaseException: 

1703 abort() 

1704 raise 

1705 else: 

1706 commit() 

1707 

1708 

1709class ObjectIterator(Protocol): 

1710 """Interface for iterating over objects.""" 

1711 

1712 def iterobjects(self) -> Iterator[ShaFile]: 

1713 raise NotImplementedError(self.iterobjects) 

1714 

1715 

1716def tree_lookup_path(lookup_obj, root_sha, path): 

1717 """Look up an object in a Git tree. 

1718 

1719 Args: 

1720 lookup_obj: Callback for retrieving object by SHA1 

1721 root_sha: SHA1 of the root tree 

1722 path: Path to lookup 

1723 Returns: A tuple of (mode, SHA) of the resulting path. 

1724 """ 

1725 tree = lookup_obj(root_sha) 

1726 if not isinstance(tree, Tree): 

1727 raise NotTreeError(root_sha) 

1728 return tree.lookup_path(lookup_obj, path) 

1729 

1730 

1731def _collect_filetree_revs( 

1732 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

1733) -> None: 

1734 """Collect SHA1s of files and directories for specified tree. 

1735 

1736 Args: 

1737 obj_store: Object store to get objects by SHA from 

1738 tree_sha: tree reference to walk 

1739 kset: set to fill with references to files and directories 

1740 """ 

1741 filetree = obj_store[tree_sha] 

1742 assert isinstance(filetree, Tree) 

1743 for name, mode, sha in filetree.iteritems(): 

1744 if not S_ISGITLINK(mode) and sha not in kset: 

1745 kset.add(sha) 

1746 if stat.S_ISDIR(mode): 

1747 _collect_filetree_revs(obj_store, sha, kset) 

1748 

1749 

1750def _split_commits_and_tags( 

1751 obj_store: ObjectContainer, lst, *, ignore_unknown=False 

1752) -> tuple[set[bytes], set[bytes], set[bytes]]: 

1753 """Split object id list into three lists with commit, tag, and other SHAs. 

1754 

1755 Commits referenced by tags are included into commits 

1756 list as well. Only SHA1s known in this repository will get 

1757 through, and unless ignore_unknown argument is True, KeyError 

1758 is thrown for SHA1 missing in the repository 

1759 

1760 Args: 

1761 obj_store: Object store to get objects by SHA1 from 

1762 lst: Collection of commit and tag SHAs 

1763 ignore_unknown: True to skip SHA1 missing in the repository 

1764 silently. 

1765 Returns: A tuple of (commits, tags, others) SHA1s 

1766 """ 

1767 commits: set[bytes] = set() 

1768 tags: set[bytes] = set() 

1769 others: set[bytes] = set() 

1770 for e in lst: 

1771 try: 

1772 o = obj_store[e] 

1773 except KeyError: 

1774 if not ignore_unknown: 

1775 raise 

1776 else: 

1777 if isinstance(o, Commit): 

1778 commits.add(e) 

1779 elif isinstance(o, Tag): 

1780 tags.add(e) 

1781 tagged = o.object[1] 

1782 c, t, os = _split_commits_and_tags( 

1783 obj_store, [tagged], ignore_unknown=ignore_unknown 

1784 ) 

1785 commits |= c 

1786 tags |= t 

1787 others |= os 

1788 else: 

1789 others.add(e) 

1790 return (commits, tags, others) 

1791 

1792 

1793class MissingObjectFinder: 

1794 """Find the objects missing from another object store. 

1795 

1796 Args: 

1797 object_store: Object store containing at least all objects to be 

1798 sent 

1799 haves: SHA1s of commits not to send (already present in target) 

1800 wants: SHA1s of commits to send 

1801 progress: Optional function to report progress to. 

1802 get_tagged: Function that returns a dict of pointed-to sha -> tag 

1803 sha for including tags. 

1804 get_parents: Optional function for getting the parents of a commit. 

1805 """ 

1806 

1807 def __init__( 

1808 self, 

1809 object_store, 

1810 haves, 

1811 wants, 

1812 *, 

1813 shallow=None, 

1814 progress=None, 

1815 get_tagged=None, 

1816 get_parents=lambda commit: commit.parents, 

1817 ) -> None: 

1818 self.object_store = object_store 

1819 if shallow is None: 

1820 shallow = set() 

1821 self._get_parents = get_parents 

1822 # process Commits and Tags differently 

1823 # Note, while haves may list commits/tags not available locally, 

1824 # and such SHAs would get filtered out by _split_commits_and_tags, 

1825 # wants shall list only known SHAs, and otherwise 

1826 # _split_commits_and_tags fails with KeyError 

1827 have_commits, have_tags, have_others = _split_commits_and_tags( 

1828 object_store, haves, ignore_unknown=True 

1829 ) 

1830 want_commits, want_tags, want_others = _split_commits_and_tags( 

1831 object_store, wants, ignore_unknown=False 

1832 ) 

1833 # all_ancestors is a set of commits that shall not be sent 

1834 # (complete repository up to 'haves') 

1835 all_ancestors = _collect_ancestors( 

1836 object_store, have_commits, shallow=shallow, get_parents=self._get_parents 

1837 )[0] 

1838 # all_missing - complete set of commits between haves and wants 

1839 # common - commits from all_ancestors we hit into while 

1840 # traversing parent hierarchy of wants 

1841 missing_commits, common_commits = _collect_ancestors( 

1842 object_store, 

1843 want_commits, 

1844 all_ancestors, 

1845 shallow=shallow, 

1846 get_parents=self._get_parents, 

1847 ) 

1848 self.remote_has: set[bytes] = set() 

1849 # Now, fill sha_done with commits and revisions of 

1850 # files and directories known to be both locally 

1851 # and on target. Thus these commits and files 

1852 # won't get selected for fetch 

1853 for h in common_commits: 

1854 self.remote_has.add(h) 

1855 cmt = object_store[h] 

1856 _collect_filetree_revs(object_store, cmt.tree, self.remote_has) 

1857 # record tags we have as visited, too 

1858 for t in have_tags: 

1859 self.remote_has.add(t) 

1860 self.sha_done = set(self.remote_has) 

1861 

1862 # in fact, what we 'want' is commits, tags, and others 

1863 # we've found missing 

1864 self.objects_to_send: set[ 

1865 tuple[ObjectID, Optional[bytes], Optional[int], bool] 

1866 ] = {(w, None, Commit.type_num, False) for w in missing_commits} 

1867 missing_tags = want_tags.difference(have_tags) 

1868 self.objects_to_send.update( 

1869 {(w, None, Tag.type_num, False) for w in missing_tags} 

1870 ) 

1871 missing_others = want_others.difference(have_others) 

1872 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

1873 

1874 if progress is None: 

1875 self.progress = lambda x: None 

1876 else: 

1877 self.progress = progress 

1878 self._tagged = (get_tagged and get_tagged()) or {} 

1879 

1880 def get_remote_has(self): 

1881 return self.remote_has 

1882 

1883 def add_todo( 

1884 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]] 

1885 ) -> None: 

1886 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

1887 

1888 def __next__(self) -> tuple[bytes, Optional[PackHint]]: 

1889 while True: 

1890 if not self.objects_to_send: 

1891 self.progress( 

1892 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

1893 ) 

1894 raise StopIteration 

1895 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

1896 if sha not in self.sha_done: 

1897 break 

1898 if not leaf: 

1899 o = self.object_store[sha] 

1900 if isinstance(o, Commit): 

1901 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

1902 elif isinstance(o, Tree): 

1903 self.add_todo( 

1904 [ 

1905 ( 

1906 s, 

1907 n, 

1908 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

1909 not stat.S_ISDIR(m), 

1910 ) 

1911 for n, m, s in o.iteritems() 

1912 if not S_ISGITLINK(m) 

1913 ] 

1914 ) 

1915 elif isinstance(o, Tag): 

1916 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

1917 if sha in self._tagged: 

1918 self.add_todo([(self._tagged[sha], None, None, True)]) 

1919 self.sha_done.add(sha) 

1920 if len(self.sha_done) % 1000 == 0: 

1921 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

1922 if type_num is None: 

1923 pack_hint = None 

1924 else: 

1925 pack_hint = (type_num, name) 

1926 return (sha, pack_hint) 

1927 

1928 def __iter__(self): 

1929 return self 

1930 

1931 

1932class ObjectStoreGraphWalker: 

1933 """Graph walker that finds what commits are missing from an object store.""" 

1934 

1935 heads: set[ObjectID] 

1936 """Revisions without descendants in the local repo.""" 

1937 

1938 get_parents: Callable[[ObjectID], ObjectID] 

1939 """Function to retrieve parents in the local repo.""" 

1940 

1941 shallow: set[ObjectID] 

1942 

1943 def __init__( 

1944 self, 

1945 local_heads: Iterable[ObjectID], 

1946 get_parents, 

1947 shallow: Optional[set[ObjectID]] = None, 

1948 update_shallow=None, 

1949 ) -> None: 

1950 """Create a new instance. 

1951 

1952 Args: 

1953 local_heads: Heads to start search with 

1954 get_parents: Function for finding the parents of a SHA1. 

1955 """ 

1956 self.heads = set(local_heads) 

1957 self.get_parents = get_parents 

1958 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {} 

1959 if shallow is None: 

1960 shallow = set() 

1961 self.shallow = shallow 

1962 self.update_shallow = update_shallow 

1963 

1964 def nak(self) -> None: 

1965 """Nothing in common was found.""" 

1966 

1967 def ack(self, sha: ObjectID) -> None: 

1968 """Ack that a revision and its ancestors are present in the source.""" 

1969 if len(sha) != 40: 

1970 raise ValueError(f"unexpected sha {sha!r} received") 

1971 ancestors = {sha} 

1972 

1973 # stop if we run out of heads to remove 

1974 while self.heads: 

1975 for a in ancestors: 

1976 if a in self.heads: 

1977 self.heads.remove(a) 

1978 

1979 # collect all ancestors 

1980 new_ancestors = set() 

1981 for a in ancestors: 

1982 ps = self.parents.get(a) 

1983 if ps is not None: 

1984 new_ancestors.update(ps) 

1985 self.parents[a] = None 

1986 

1987 # no more ancestors; stop 

1988 if not new_ancestors: 

1989 break 

1990 

1991 ancestors = new_ancestors 

1992 

1993 def next(self): 

1994 """Iterate over ancestors of heads in the target.""" 

1995 if self.heads: 

1996 ret = self.heads.pop() 

1997 try: 

1998 ps = self.get_parents(ret) 

1999 except KeyError: 

2000 return None 

2001 self.parents[ret] = ps 

2002 self.heads.update([p for p in ps if p not in self.parents]) 

2003 return ret 

2004 return None 

2005 

2006 __next__ = next 

2007 

2008 

2009def commit_tree_changes(object_store, tree, changes): 

2010 """Commit a specified set of changes to a tree structure. 

2011 

2012 This will apply a set of changes on top of an existing tree, storing new 

2013 objects in object_store. 

2014 

2015 changes are a list of tuples with (path, mode, object_sha). 

2016 Paths can be both blobs and trees. See the mode and 

2017 object sha to None deletes the path. 

2018 

2019 This method works especially well if there are only a small 

2020 number of changes to a big tree. For a large number of changes 

2021 to a large tree, use e.g. commit_tree. 

2022 

2023 Args: 

2024 object_store: Object store to store new objects in 

2025 and retrieve old ones from. 

2026 tree: Original tree root 

2027 changes: changes to apply 

2028 Returns: New tree root object 

2029 """ 

2030 # TODO(jelmer): Save up the objects and add them using .add_objects 

2031 # rather than with individual calls to .add_object. 

2032 nested_changes = {} 

2033 for path, new_mode, new_sha in changes: 

2034 try: 

2035 (dirname, subpath) = path.split(b"/", 1) 

2036 except ValueError: 

2037 if new_sha is None: 

2038 del tree[path] 

2039 else: 

2040 tree[path] = (new_mode, new_sha) 

2041 else: 

2042 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

2043 for name, subchanges in nested_changes.items(): 

2044 try: 

2045 orig_subtree = object_store[tree[name][1]] 

2046 except KeyError: 

2047 orig_subtree = Tree() 

2048 subtree = commit_tree_changes(object_store, orig_subtree, subchanges) 

2049 if len(subtree) == 0: 

2050 del tree[name] 

2051 else: 

2052 tree[name] = (stat.S_IFDIR, subtree.id) 

2053 object_store.add_object(tree) 

2054 return tree 

2055 

2056 

2057class OverlayObjectStore(BaseObjectStore): 

2058 """Object store that can overlay multiple object stores.""" 

2059 

2060 def __init__(self, bases, add_store=None) -> None: 

2061 self.bases = bases 

2062 self.add_store = add_store 

2063 

2064 def add_object(self, object): 

2065 if self.add_store is None: 

2066 raise NotImplementedError(self.add_object) 

2067 return self.add_store.add_object(object) 

2068 

2069 def add_objects(self, objects, progress=None): 

2070 if self.add_store is None: 

2071 raise NotImplementedError(self.add_object) 

2072 return self.add_store.add_objects(objects, progress) 

2073 

2074 @property 

2075 def packs(self): 

2076 ret = [] 

2077 for b in self.bases: 

2078 ret.extend(b.packs) 

2079 return ret 

2080 

2081 def __iter__(self): 

2082 done = set() 

2083 for b in self.bases: 

2084 for o_id in b: 

2085 if o_id not in done: 

2086 yield o_id 

2087 done.add(o_id) 

2088 

2089 def iterobjects_subset( 

2090 self, shas: Iterable[bytes], *, allow_missing: bool = False 

2091 ) -> Iterator[ShaFile]: 

2092 todo = set(shas) 

2093 found: set[bytes] = set() 

2094 

2095 for b in self.bases: 

2096 # Create a copy of todo for each base to avoid modifying 

2097 # the set while iterating through it 

2098 current_todo = todo - found 

2099 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

2100 yield o 

2101 found.add(o.id) 

2102 

2103 # Check for any remaining objects not found 

2104 missing = todo - found 

2105 if missing and not allow_missing: 

2106 raise KeyError(next(iter(missing))) 

2107 

2108 def iter_unpacked_subset( 

2109 self, 

2110 shas: Iterable[bytes], 

2111 *, 

2112 include_comp=False, 

2113 allow_missing: bool = False, 

2114 convert_ofs_delta=True, 

2115 ) -> Iterator[ShaFile]: 

2116 todo = set(shas) 

2117 for b in self.bases: 

2118 for o in b.iter_unpacked_subset( 

2119 todo, 

2120 include_comp=include_comp, 

2121 allow_missing=True, 

2122 convert_ofs_delta=convert_ofs_delta, 

2123 ): 

2124 yield o 

2125 todo.remove(o.id) 

2126 if todo and not allow_missing: 

2127 raise KeyError(o.id) 

2128 

2129 def get_raw(self, sha_id): 

2130 for b in self.bases: 

2131 try: 

2132 return b.get_raw(sha_id) 

2133 except KeyError: 

2134 pass 

2135 raise KeyError(sha_id) 

2136 

2137 def contains_packed(self, sha) -> bool: 

2138 for b in self.bases: 

2139 if b.contains_packed(sha): 

2140 return True 

2141 return False 

2142 

2143 def contains_loose(self, sha) -> bool: 

2144 for b in self.bases: 

2145 if b.contains_loose(sha): 

2146 return True 

2147 return False 

2148 

2149 

2150def read_packs_file(f): 

2151 """Yield the packs listed in a packs file.""" 

2152 for line in f.read().splitlines(): 

2153 if not line: 

2154 continue 

2155 (kind, name) = line.split(b" ", 1) 

2156 if kind != b"P": 

2157 continue 

2158 yield os.fsdecode(name) 

2159 

2160 

2161class BucketBasedObjectStore(PackBasedObjectStore): 

2162 """Object store implementation that uses a bucket store like S3 as backend.""" 

2163 

2164 def _iter_loose_objects(self): 

2165 """Iterate over the SHAs of all loose objects.""" 

2166 return iter([]) 

2167 

2168 def _get_loose_object(self, sha) -> None: 

2169 return None 

2170 

2171 def delete_loose_object(self, sha) -> None: 

2172 # Doesn't exist.. 

2173 pass 

2174 

2175 def _remove_pack(self, name) -> None: 

2176 raise NotImplementedError(self._remove_pack) 

2177 

2178 def _iter_pack_names(self) -> Iterator[str]: 

2179 raise NotImplementedError(self._iter_pack_names) 

2180 

2181 def _get_pack(self, name) -> Pack: 

2182 raise NotImplementedError(self._get_pack) 

2183 

2184 def _update_pack_cache(self): 

2185 pack_files = set(self._iter_pack_names()) 

2186 

2187 # Open newly appeared pack files 

2188 new_packs = [] 

2189 for f in pack_files: 

2190 if f not in self._pack_cache: 

2191 pack = self._get_pack(f) 

2192 new_packs.append(pack) 

2193 self._pack_cache[f] = pack 

2194 # Remove disappeared pack files 

2195 for f in set(self._pack_cache) - pack_files: 

2196 self._pack_cache.pop(f).close() 

2197 return new_packs 

2198 

2199 def _upload_pack(self, basename, pack_file, index_file) -> None: 

2200 raise NotImplementedError 

2201 

2202 def add_pack(self): 

2203 """Add a new pack to this object store. 

2204 

2205 Returns: Fileobject to write to, a commit function to 

2206 call when the pack is finished and an abort 

2207 function. 

2208 """ 

2209 import tempfile 

2210 

2211 pf = tempfile.SpooledTemporaryFile( 

2212 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2213 ) 

2214 

2215 def commit(): 

2216 if pf.tell() == 0: 

2217 pf.close() 

2218 return None 

2219 

2220 pf.seek(0) 

2221 p = PackData(pf.name, pf) 

2222 entries = p.sorted_entries() 

2223 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

2224 idxf = tempfile.SpooledTemporaryFile( 

2225 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2226 ) 

2227 checksum = p.get_stored_checksum() 

2228 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

2229 idxf.seek(0) 

2230 idx = load_pack_index_file(basename + ".idx", idxf) 

2231 for pack in self.packs: 

2232 if pack.get_stored_checksum() == p.get_stored_checksum(): 

2233 p.close() 

2234 idx.close() 

2235 pf.close() 

2236 idxf.close() 

2237 return pack 

2238 pf.seek(0) 

2239 idxf.seek(0) 

2240 self._upload_pack(basename, pf, idxf) 

2241 final_pack = Pack.from_objects(p, idx) 

2242 self._add_cached_pack(basename, final_pack) 

2243 pf.close() 

2244 idxf.close() 

2245 return final_pack 

2246 

2247 return pf, commit, pf.close 

2248 

2249 

2250def _collect_ancestors( 

2251 store: ObjectContainer, 

2252 heads, 

2253 common: frozenset[ObjectID] = frozenset(), 

2254 shallow: frozenset[ObjectID] = frozenset(), 

2255 get_parents=lambda commit: commit.parents, 

2256): 

2257 """Collect all ancestors of heads up to (excluding) those in common. 

2258 

2259 Args: 

2260 heads: commits to start from 

2261 common: commits to end at, or empty set to walk repository 

2262 completely 

2263 get_parents: Optional function for getting the parents of a 

2264 commit. 

2265 Returns: a tuple (A, B) where A - all commits reachable 

2266 from heads but not present in common, B - common (shared) elements 

2267 that are directly reachable from heads 

2268 """ 

2269 bases = set() 

2270 commits = set() 

2271 queue = [] 

2272 queue.extend(heads) 

2273 

2274 # Try to use commit graph if available 

2275 commit_graph = store.get_commit_graph() 

2276 

2277 while queue: 

2278 e = queue.pop(0) 

2279 if e in common: 

2280 bases.add(e) 

2281 elif e not in commits: 

2282 commits.add(e) 

2283 if e in shallow: 

2284 continue 

2285 

2286 # Try to use commit graph for parent lookup 

2287 parents = None 

2288 if commit_graph: 

2289 parents = commit_graph.get_parents(e) 

2290 

2291 if parents is None: 

2292 # Fall back to loading the object 

2293 cmt = store[e] 

2294 parents = get_parents(cmt) 

2295 

2296 queue.extend(parents) 

2297 return (commits, bases) 

2298 

2299 

2300def iter_tree_contents( 

2301 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False 

2302): 

2303 """Iterate the contents of a tree and all subtrees. 

2304 

2305 Iteration is depth-first pre-order, as in e.g. os.walk. 

2306 

2307 Args: 

2308 tree_id: SHA1 of the tree. 

2309 include_trees: If True, include tree objects in the iteration. 

2310 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

2311 tree. 

2312 """ 

2313 if tree_id is None: 

2314 return 

2315 # This could be fairly easily generalized to >2 trees if we find a use 

2316 # case. 

2317 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

2318 while todo: 

2319 entry = todo.pop() 

2320 if stat.S_ISDIR(entry.mode): 

2321 extra = [] 

2322 tree = store[entry.sha] 

2323 assert isinstance(tree, Tree) 

2324 for subentry in tree.iteritems(name_order=True): 

2325 extra.append(subentry.in_path(entry.path)) 

2326 todo.extend(reversed(extra)) 

2327 if not stat.S_ISDIR(entry.mode) or include_trees: 

2328 yield entry 

2329 

2330 

2331def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]: 

2332 """Peel all tags from a SHA. 

2333 

2334 Args: 

2335 sha: The object SHA to peel. 

2336 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

2337 intermediate tags; if the original ref does not point to a tag, 

2338 this will equal the original SHA1. 

2339 """ 

2340 unpeeled = obj = store[sha] 

2341 obj_class = object_class(obj.type_name) 

2342 while obj_class is Tag: 

2343 assert isinstance(obj, Tag) 

2344 obj_class, sha = obj.object 

2345 obj = store[sha] 

2346 return unpeeled, obj