Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1107 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as public by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26import binascii 

27import os 

28import stat 

29import sys 

30import time 

31import warnings 

32from collections.abc import Iterable, Iterator, Sequence 

33from contextlib import suppress 

34from io import BytesIO 

35from typing import ( 

36 Callable, 

37 Optional, 

38 Protocol, 

39 Union, 

40 cast, 

41) 

42 

43from .errors import NotTreeError 

44from .file import GitFile 

45from .objects import ( 

46 S_ISGITLINK, 

47 ZERO_SHA, 

48 Blob, 

49 Commit, 

50 ObjectID, 

51 ShaFile, 

52 Tag, 

53 Tree, 

54 TreeEntry, 

55 hex_to_filename, 

56 hex_to_sha, 

57 object_class, 

58 sha_to_hex, 

59 valid_hexsha, 

60) 

61from .pack import ( 

62 PACK_SPOOL_FILE_MAX_SIZE, 

63 ObjectContainer, 

64 Pack, 

65 PackData, 

66 PackedObjectContainer, 

67 PackFileDisappeared, 

68 PackHint, 

69 PackIndexer, 

70 PackInflater, 

71 PackStreamCopier, 

72 UnpackedObject, 

73 extend_pack, 

74 full_unpacked_object, 

75 generate_unpacked_objects, 

76 iter_sha1, 

77 load_pack_index_file, 

78 pack_objects_to_data, 

79 write_pack_data, 

80 write_pack_index, 

81) 

82from .protocol import DEPTH_INFINITE 

83from .refs import PEELED_TAG_SUFFIX, Ref 

84 

85INFODIR = "info" 

86PACKDIR = "pack" 

87 

88# use permissions consistent with Git; just readable by everyone 

89# TODO: should packs also be non-writable on Windows? if so, that 

90# would requite some rather significant adjustments to the test suite 

91PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

92 

93# Grace period for cleaning up temporary pack files (in seconds) 

94# Matches git's default of 2 weeks 

95DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

96 

97 

98def find_shallow(store, heads, depth): 

99 """Find shallow commits according to a given depth. 

100 

101 Args: 

102 store: An ObjectStore for looking up objects. 

103 heads: Iterable of head SHAs to start walking from. 

104 depth: The depth of ancestors to include. A depth of one includes 

105 only the heads themselves. 

106 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

107 considered shallow and unshallow according to the arguments. Note that 

108 these sets may overlap if a commit is reachable along multiple paths. 

109 """ 

110 parents = {} 

111 commit_graph = store.get_commit_graph() 

112 

113 def get_parents(sha): 

114 result = parents.get(sha, None) 

115 if not result: 

116 # Try to use commit graph first if available 

117 if commit_graph: 

118 graph_parents = commit_graph.get_parents(sha) 

119 if graph_parents is not None: 

120 result = graph_parents 

121 parents[sha] = result 

122 return result 

123 # Fall back to loading the object 

124 result = store[sha].parents 

125 parents[sha] = result 

126 return result 

127 

128 todo = [] # stack of (sha, depth) 

129 for head_sha in heads: 

130 obj = store[head_sha] 

131 # Peel tags if necessary 

132 while isinstance(obj, Tag): 

133 _, sha = obj.object 

134 obj = store[sha] 

135 if isinstance(obj, Commit): 

136 todo.append((obj.id, 1)) 

137 

138 not_shallow = set() 

139 shallow = set() 

140 while todo: 

141 sha, cur_depth = todo.pop() 

142 if cur_depth < depth: 

143 not_shallow.add(sha) 

144 new_depth = cur_depth + 1 

145 todo.extend((p, new_depth) for p in get_parents(sha)) 

146 else: 

147 shallow.add(sha) 

148 

149 return shallow, not_shallow 

150 

151 

152def get_depth( 

153 store, 

154 head, 

155 get_parents=lambda commit: commit.parents, 

156 max_depth=None, 

157): 

158 """Return the current available depth for the given head. 

159 For commits with multiple parents, the largest possible depth will be 

160 returned. 

161 

162 Args: 

163 head: commit to start from 

164 get_parents: optional function for getting the parents of a commit 

165 max_depth: maximum depth to search 

166 """ 

167 if head not in store: 

168 return 0 

169 current_depth = 1 

170 queue = [(head, current_depth)] 

171 commit_graph = store.get_commit_graph() 

172 

173 while queue and (max_depth is None or current_depth < max_depth): 

174 e, depth = queue.pop(0) 

175 current_depth = max(current_depth, depth) 

176 

177 # Try to use commit graph for parent lookup if available 

178 parents = None 

179 if commit_graph: 

180 parents = commit_graph.get_parents(e) 

181 

182 if parents is None: 

183 # Fall back to loading the object 

184 cmt = store[e] 

185 if isinstance(cmt, Tag): 

186 _cls, sha = cmt.object 

187 cmt = store[sha] 

188 parents = get_parents(cmt) 

189 

190 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

191 return current_depth 

192 

193 

194class PackContainer(Protocol): 

195 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

196 """Add a new pack.""" 

197 

198 

199class BaseObjectStore: 

200 """Object store interface.""" 

201 

202 def determine_wants_all( 

203 self, refs: dict[Ref, ObjectID], depth: Optional[int] = None 

204 ) -> list[ObjectID]: 

205 def _want_deepen(sha): 

206 if not depth: 

207 return False 

208 if depth == DEPTH_INFINITE: 

209 return True 

210 return depth > self._get_depth(sha) 

211 

212 return [ 

213 sha 

214 for (ref, sha) in refs.items() 

215 if (sha not in self or _want_deepen(sha)) 

216 and not ref.endswith(PEELED_TAG_SUFFIX) 

217 and not sha == ZERO_SHA 

218 ] 

219 

220 def contains_loose(self, sha) -> bool: 

221 """Check if a particular object is present by SHA1 and is loose.""" 

222 raise NotImplementedError(self.contains_loose) 

223 

224 def __contains__(self, sha1: bytes) -> bool: 

225 """Check if a particular object is present by SHA1. 

226 

227 This method makes no distinction between loose and packed objects. 

228 """ 

229 return self.contains_loose(sha1) 

230 

231 @property 

232 def packs(self): 

233 """Iterable of pack objects.""" 

234 raise NotImplementedError 

235 

236 def get_raw(self, name) -> tuple[int, bytes]: 

237 """Obtain the raw text for an object. 

238 

239 Args: 

240 name: sha for the object. 

241 Returns: tuple with numeric type and object contents. 

242 """ 

243 raise NotImplementedError(self.get_raw) 

244 

245 def __getitem__(self, sha1: ObjectID) -> ShaFile: 

246 """Obtain an object by SHA1.""" 

247 type_num, uncomp = self.get_raw(sha1) 

248 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

249 

250 def __iter__(self): 

251 """Iterate over the SHAs that are present in this store.""" 

252 raise NotImplementedError(self.__iter__) 

253 

254 def add_object(self, obj) -> None: 

255 """Add a single object to this object store.""" 

256 raise NotImplementedError(self.add_object) 

257 

258 def add_objects(self, objects, progress=None) -> None: 

259 """Add a set of objects to this object store. 

260 

261 Args: 

262 objects: Iterable over a list of (object, path) tuples 

263 """ 

264 raise NotImplementedError(self.add_objects) 

265 

266 def tree_changes( 

267 self, 

268 source, 

269 target, 

270 want_unchanged=False, 

271 include_trees=False, 

272 change_type_same=False, 

273 rename_detector=None, 

274 paths=None, 

275 ): 

276 """Find the differences between the contents of two trees. 

277 

278 Args: 

279 source: SHA1 of the source tree 

280 target: SHA1 of the target tree 

281 want_unchanged: Whether unchanged files should be reported 

282 include_trees: Whether to include trees 

283 change_type_same: Whether to report files changing 

284 type in the same entry. 

285 rename_detector: RenameDetector object for detecting renames. 

286 paths: Optional list of paths to filter to (as bytes). 

287 Returns: Iterator over tuples with 

288 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

289 """ 

290 from .diff_tree import tree_changes 

291 

292 for change in tree_changes( 

293 self, 

294 source, 

295 target, 

296 want_unchanged=want_unchanged, 

297 include_trees=include_trees, 

298 change_type_same=change_type_same, 

299 rename_detector=rename_detector, 

300 paths=paths, 

301 ): 

302 yield ( 

303 (change.old.path, change.new.path), 

304 (change.old.mode, change.new.mode), 

305 (change.old.sha, change.new.sha), 

306 ) 

307 

308 def iter_tree_contents(self, tree_id, include_trees=False): 

309 """Iterate the contents of a tree and all subtrees. 

310 

311 Iteration is depth-first pre-order, as in e.g. os.walk. 

312 

313 Args: 

314 tree_id: SHA1 of the tree. 

315 include_trees: If True, include tree objects in the iteration. 

316 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

317 tree. 

318 """ 

319 warnings.warn( 

320 "Please use dulwich.object_store.iter_tree_contents", 

321 DeprecationWarning, 

322 stacklevel=2, 

323 ) 

324 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

325 

326 def iterobjects_subset( 

327 self, shas: Iterable[bytes], *, allow_missing: bool = False 

328 ) -> Iterator[ShaFile]: 

329 for sha in shas: 

330 try: 

331 yield self[sha] 

332 except KeyError: 

333 if not allow_missing: 

334 raise 

335 

336 def find_missing_objects( 

337 self, 

338 haves, 

339 wants, 

340 shallow=None, 

341 progress=None, 

342 get_tagged=None, 

343 get_parents=lambda commit: commit.parents, 

344 ): 

345 """Find the missing objects required for a set of revisions. 

346 

347 Args: 

348 haves: Iterable over SHAs already in common. 

349 wants: Iterable over SHAs of objects to fetch. 

350 shallow: Set of shallow commit SHA1s to skip 

351 progress: Simple progress function that will be called with 

352 updated progress strings. 

353 get_tagged: Function that returns a dict of pointed-to sha -> 

354 tag sha for including tags. 

355 get_parents: Optional function for getting the parents of a 

356 commit. 

357 Returns: Iterator over (sha, path) pairs. 

358 """ 

359 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

360 finder = MissingObjectFinder( 

361 self, 

362 haves=haves, 

363 wants=wants, 

364 shallow=shallow, 

365 progress=progress, 

366 get_tagged=get_tagged, 

367 get_parents=get_parents, 

368 ) 

369 return iter(finder) 

370 

371 def find_common_revisions(self, graphwalker): 

372 """Find which revisions this store has in common using graphwalker. 

373 

374 Args: 

375 graphwalker: A graphwalker object. 

376 Returns: List of SHAs that are in common 

377 """ 

378 haves = [] 

379 sha = next(graphwalker) 

380 while sha: 

381 if sha in self: 

382 haves.append(sha) 

383 graphwalker.ack(sha) 

384 sha = next(graphwalker) 

385 return haves 

386 

387 def generate_pack_data( 

388 self, have, want, shallow=None, progress=None, ofs_delta=True 

389 ) -> tuple[int, Iterator[UnpackedObject]]: 

390 """Generate pack data objects for a set of wants/haves. 

391 

392 Args: 

393 have: List of SHA1s of objects that should not be sent 

394 want: List of SHA1s of objects that should be sent 

395 shallow: Set of shallow commit SHA1s to skip 

396 ofs_delta: Whether OFS deltas can be included 

397 progress: Optional progress reporting method 

398 """ 

399 # Note that the pack-specific implementation below is more efficient, 

400 # as it reuses deltas 

401 missing_objects = MissingObjectFinder( 

402 self, haves=have, wants=want, shallow=shallow, progress=progress 

403 ) 

404 object_ids = list(missing_objects) 

405 return pack_objects_to_data( 

406 [(self[oid], path) for oid, path in object_ids], 

407 ofs_delta=ofs_delta, 

408 progress=progress, 

409 ) 

410 

411 def peel_sha(self, sha): 

412 """Peel all tags from a SHA. 

413 

414 Args: 

415 sha: The object SHA to peel. 

416 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

417 intermediate tags; if the original ref does not point to a tag, 

418 this will equal the original SHA1. 

419 """ 

420 warnings.warn( 

421 "Please use dulwich.object_store.peel_sha()", 

422 DeprecationWarning, 

423 stacklevel=2, 

424 ) 

425 return peel_sha(self, sha)[1] 

426 

427 def _get_depth( 

428 self, 

429 head, 

430 get_parents=lambda commit: commit.parents, 

431 max_depth=None, 

432 ): 

433 """Return the current available depth for the given head. 

434 For commits with multiple parents, the largest possible depth will be 

435 returned. 

436 

437 Args: 

438 head: commit to start from 

439 get_parents: optional function for getting the parents of a commit 

440 max_depth: maximum depth to search 

441 """ 

442 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

443 

444 def close(self) -> None: 

445 """Close any files opened by this object store.""" 

446 # Default implementation is a NO-OP 

447 

448 def prune(self, grace_period: Optional[int] = None) -> None: 

449 """Prune/clean up this object store. 

450 

451 This includes removing orphaned temporary files and other 

452 housekeeping tasks. Default implementation is a NO-OP. 

453 

454 Args: 

455 grace_period: Grace period in seconds for removing temporary files. 

456 If None, uses the default grace period. 

457 """ 

458 # Default implementation is a NO-OP 

459 

460 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

461 """Iterate over all SHA1s that start with a given prefix. 

462 

463 The default implementation is a naive iteration over all objects. 

464 However, subclasses may override this method with more efficient 

465 implementations. 

466 """ 

467 for sha in self: 

468 if sha.startswith(prefix): 

469 yield sha 

470 

471 def get_commit_graph(self): 

472 """Get the commit graph for this object store. 

473 

474 Returns: 

475 CommitGraph object if available, None otherwise 

476 """ 

477 return None 

478 

479 def write_commit_graph(self, refs=None, reachable=True) -> None: 

480 """Write a commit graph file for this object store. 

481 

482 Args: 

483 refs: List of refs to include. If None, includes all refs from object store. 

484 reachable: If True, includes all commits reachable from refs. 

485 If False, only includes the direct ref targets. 

486 

487 Note: 

488 Default implementation does nothing. Subclasses should override 

489 this method to provide commit graph writing functionality. 

490 """ 

491 raise NotImplementedError(self.write_commit_graph) 

492 

493 def get_object_mtime(self, sha): 

494 """Get the modification time of an object. 

495 

496 Args: 

497 sha: SHA1 of the object 

498 

499 Returns: 

500 Modification time as seconds since epoch 

501 

502 Raises: 

503 KeyError: if the object is not found 

504 """ 

505 # Default implementation raises KeyError 

506 # Subclasses should override to provide actual mtime 

507 raise KeyError(sha) 

508 

509 

510class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer): 

511 def __init__(self, pack_compression_level=-1, pack_index_version=None) -> None: 

512 self._pack_cache: dict[str, Pack] = {} 

513 self.pack_compression_level = pack_compression_level 

514 self.pack_index_version = pack_index_version 

515 

516 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

517 """Add a new pack to this object store.""" 

518 raise NotImplementedError(self.add_pack) 

519 

520 def add_pack_data( 

521 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

522 ) -> None: 

523 """Add pack data to this object store. 

524 

525 Args: 

526 count: Number of items to add 

527 """ 

528 if count == 0: 

529 # Don't bother writing an empty pack file 

530 return 

531 f, commit, abort = self.add_pack() 

532 try: 

533 write_pack_data( 

534 f.write, 

535 unpacked_objects, 

536 num_records=count, 

537 progress=progress, 

538 compression_level=self.pack_compression_level, 

539 ) 

540 except BaseException: 

541 abort() 

542 raise 

543 else: 

544 return commit() 

545 

546 @property 

547 def alternates(self): 

548 return [] 

549 

550 def contains_packed(self, sha) -> bool: 

551 """Check if a particular object is present by SHA1 and is packed. 

552 

553 This does not check alternates. 

554 """ 

555 for pack in self.packs: 

556 try: 

557 if sha in pack: 

558 return True 

559 except PackFileDisappeared: 

560 pass 

561 return False 

562 

563 def __contains__(self, sha) -> bool: 

564 """Check if a particular object is present by SHA1. 

565 

566 This method makes no distinction between loose and packed objects. 

567 """ 

568 if self.contains_packed(sha) or self.contains_loose(sha): 

569 return True 

570 for alternate in self.alternates: 

571 if sha in alternate: 

572 return True 

573 return False 

574 

575 def _add_cached_pack(self, base_name, pack) -> None: 

576 """Add a newly appeared pack to the cache by path.""" 

577 prev_pack = self._pack_cache.get(base_name) 

578 if prev_pack is not pack: 

579 self._pack_cache[base_name] = pack 

580 if prev_pack: 

581 prev_pack.close() 

582 

583 def generate_pack_data( 

584 self, have, want, shallow=None, progress=None, ofs_delta=True 

585 ) -> tuple[int, Iterator[UnpackedObject]]: 

586 """Generate pack data objects for a set of wants/haves. 

587 

588 Args: 

589 have: List of SHA1s of objects that should not be sent 

590 want: List of SHA1s of objects that should be sent 

591 shallow: Set of shallow commit SHA1s to skip 

592 ofs_delta: Whether OFS deltas can be included 

593 progress: Optional progress reporting method 

594 """ 

595 missing_objects = MissingObjectFinder( 

596 self, haves=have, wants=want, shallow=shallow, progress=progress 

597 ) 

598 remote_has = missing_objects.get_remote_has() 

599 object_ids = list(missing_objects) 

600 return len(object_ids), generate_unpacked_objects( 

601 cast(PackedObjectContainer, self), 

602 object_ids, 

603 progress=progress, 

604 ofs_delta=ofs_delta, 

605 other_haves=remote_has, 

606 ) 

607 

608 def _clear_cached_packs(self) -> None: 

609 pack_cache = self._pack_cache 

610 self._pack_cache = {} 

611 while pack_cache: 

612 (name, pack) = pack_cache.popitem() 

613 pack.close() 

614 

615 def _iter_cached_packs(self): 

616 return self._pack_cache.values() 

617 

618 def _update_pack_cache(self) -> list[Pack]: 

619 raise NotImplementedError(self._update_pack_cache) 

620 

621 def close(self) -> None: 

622 self._clear_cached_packs() 

623 

624 @property 

625 def packs(self): 

626 """List with pack objects.""" 

627 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

628 

629 def count_pack_files(self) -> int: 

630 """Count the number of pack files. 

631 

632 Returns: 

633 Number of pack files (excluding those with .keep files) 

634 """ 

635 count = 0 

636 for pack in self.packs: 

637 # Check if there's a .keep file for this pack 

638 keep_path = pack._basename + ".keep" 

639 if not os.path.exists(keep_path): 

640 count += 1 

641 return count 

642 

643 def _iter_alternate_objects(self): 

644 """Iterate over the SHAs of all the objects in alternate stores.""" 

645 for alternate in self.alternates: 

646 yield from alternate 

647 

648 def _iter_loose_objects(self): 

649 """Iterate over the SHAs of all loose objects.""" 

650 raise NotImplementedError(self._iter_loose_objects) 

651 

652 def _get_loose_object(self, sha) -> Optional[ShaFile]: 

653 raise NotImplementedError(self._get_loose_object) 

654 

655 def delete_loose_object(self, sha) -> None: 

656 """Delete a loose object. 

657 

658 This method only handles loose objects. For packed objects, 

659 use repack(exclude=...) to exclude them during repacking. 

660 """ 

661 raise NotImplementedError(self.delete_loose_object) 

662 

663 def _remove_pack(self, name) -> None: 

664 raise NotImplementedError(self._remove_pack) 

665 

666 def pack_loose_objects(self): 

667 """Pack loose objects. 

668 

669 Returns: Number of objects packed 

670 """ 

671 objects = set() 

672 for sha in self._iter_loose_objects(): 

673 objects.add((self._get_loose_object(sha), None)) 

674 self.add_objects(list(objects)) 

675 for obj, path in objects: 

676 self.delete_loose_object(obj.id) 

677 return len(objects) 

678 

679 def repack(self, exclude=None): 

680 """Repack the packs in this repository. 

681 

682 Note that this implementation is fairly naive and currently keeps all 

683 objects in memory while it repacks. 

684 

685 Args: 

686 exclude: Optional set of object SHAs to exclude from repacking 

687 """ 

688 if exclude is None: 

689 exclude = set() 

690 

691 loose_objects = set() 

692 excluded_loose_objects = set() 

693 for sha in self._iter_loose_objects(): 

694 if sha not in exclude: 

695 loose_objects.add(self._get_loose_object(sha)) 

696 else: 

697 excluded_loose_objects.add(sha) 

698 

699 objects = {(obj, None) for obj in loose_objects} 

700 old_packs = {p.name(): p for p in self.packs} 

701 for name, pack in old_packs.items(): 

702 objects.update( 

703 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

704 ) 

705 

706 # Only create a new pack if there are objects to pack 

707 if objects: 

708 # The name of the consolidated pack might match the name of a 

709 # pre-existing pack. Take care not to remove the newly created 

710 # consolidated pack. 

711 consolidated = self.add_objects(objects) 

712 old_packs.pop(consolidated.name(), None) 

713 

714 # Delete loose objects that were packed 

715 for obj in loose_objects: 

716 self.delete_loose_object(obj.id) 

717 # Delete excluded loose objects 

718 for sha in excluded_loose_objects: 

719 self.delete_loose_object(sha) 

720 for name, pack in old_packs.items(): 

721 self._remove_pack(pack) 

722 self._update_pack_cache() 

723 return len(objects) 

724 

725 def __iter__(self): 

726 """Iterate over the SHAs that are present in this store.""" 

727 self._update_pack_cache() 

728 for pack in self._iter_cached_packs(): 

729 try: 

730 yield from pack 

731 except PackFileDisappeared: 

732 pass 

733 yield from self._iter_loose_objects() 

734 yield from self._iter_alternate_objects() 

735 

736 def contains_loose(self, sha): 

737 """Check if a particular object is present by SHA1 and is loose. 

738 

739 This does not check alternates. 

740 """ 

741 return self._get_loose_object(sha) is not None 

742 

743 def get_raw(self, name): 

744 """Obtain the raw fulltext for an object. 

745 

746 Args: 

747 name: sha for the object. 

748 Returns: tuple with numeric type and object contents. 

749 """ 

750 if name == ZERO_SHA: 

751 raise KeyError(name) 

752 if len(name) == 40: 

753 sha = hex_to_sha(name) 

754 hexsha = name 

755 elif len(name) == 20: 

756 sha = name 

757 hexsha = None 

758 else: 

759 raise AssertionError(f"Invalid object name {name!r}") 

760 for pack in self._iter_cached_packs(): 

761 try: 

762 return pack.get_raw(sha) 

763 except (KeyError, PackFileDisappeared): 

764 pass 

765 if hexsha is None: 

766 hexsha = sha_to_hex(name) 

767 ret = self._get_loose_object(hexsha) 

768 if ret is not None: 

769 return ret.type_num, ret.as_raw_string() 

770 # Maybe something else has added a pack with the object 

771 # in the mean time? 

772 for pack in self._update_pack_cache(): 

773 try: 

774 return pack.get_raw(sha) 

775 except KeyError: 

776 pass 

777 for alternate in self.alternates: 

778 try: 

779 return alternate.get_raw(hexsha) 

780 except KeyError: 

781 pass 

782 raise KeyError(hexsha) 

783 

784 def iter_unpacked_subset( 

785 self, 

786 shas: set[bytes], 

787 include_comp: bool = False, 

788 allow_missing: bool = False, 

789 convert_ofs_delta: bool = True, 

790 ) -> Iterator[UnpackedObject]: 

791 todo: set[bytes] = set(shas) 

792 for p in self._iter_cached_packs(): 

793 for unpacked in p.iter_unpacked_subset( 

794 todo, 

795 include_comp=include_comp, 

796 allow_missing=True, 

797 convert_ofs_delta=convert_ofs_delta, 

798 ): 

799 yield unpacked 

800 hexsha = sha_to_hex(unpacked.sha()) 

801 todo.remove(hexsha) 

802 # Maybe something else has added a pack with the object 

803 # in the mean time? 

804 for p in self._update_pack_cache(): 

805 for unpacked in p.iter_unpacked_subset( 

806 todo, 

807 include_comp=include_comp, 

808 allow_missing=True, 

809 convert_ofs_delta=convert_ofs_delta, 

810 ): 

811 yield unpacked 

812 hexsha = sha_to_hex(unpacked.sha()) 

813 todo.remove(hexsha) 

814 for alternate in self.alternates: 

815 for unpacked in alternate.iter_unpacked_subset( 

816 todo, 

817 include_comp=include_comp, 

818 allow_missing=True, 

819 convert_ofs_delta=convert_ofs_delta, 

820 ): 

821 yield unpacked 

822 hexsha = sha_to_hex(unpacked.sha()) 

823 todo.remove(hexsha) 

824 

825 def iterobjects_subset( 

826 self, shas: Iterable[bytes], *, allow_missing: bool = False 

827 ) -> Iterator[ShaFile]: 

828 todo: set[bytes] = set(shas) 

829 for p in self._iter_cached_packs(): 

830 for o in p.iterobjects_subset(todo, allow_missing=True): 

831 yield o 

832 todo.remove(o.id) 

833 # Maybe something else has added a pack with the object 

834 # in the mean time? 

835 for p in self._update_pack_cache(): 

836 for o in p.iterobjects_subset(todo, allow_missing=True): 

837 yield o 

838 todo.remove(o.id) 

839 for alternate in self.alternates: 

840 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

841 yield o 

842 todo.remove(o.id) 

843 for oid in todo: 

844 o = self._get_loose_object(oid) 

845 if o is not None: 

846 yield o 

847 elif not allow_missing: 

848 raise KeyError(oid) 

849 

850 def get_unpacked_object( 

851 self, sha1: bytes, *, include_comp: bool = False 

852 ) -> UnpackedObject: 

853 """Obtain the unpacked object. 

854 

855 Args: 

856 sha1: sha for the object. 

857 """ 

858 if sha1 == ZERO_SHA: 

859 raise KeyError(sha1) 

860 if len(sha1) == 40: 

861 sha = hex_to_sha(sha1) 

862 hexsha = sha1 

863 elif len(sha1) == 20: 

864 sha = sha1 

865 hexsha = None 

866 else: 

867 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

868 for pack in self._iter_cached_packs(): 

869 try: 

870 return pack.get_unpacked_object(sha, include_comp=include_comp) 

871 except (KeyError, PackFileDisappeared): 

872 pass 

873 if hexsha is None: 

874 hexsha = sha_to_hex(sha1) 

875 # Maybe something else has added a pack with the object 

876 # in the mean time? 

877 for pack in self._update_pack_cache(): 

878 try: 

879 return pack.get_unpacked_object(sha, include_comp=include_comp) 

880 except KeyError: 

881 pass 

882 for alternate in self.alternates: 

883 try: 

884 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

885 except KeyError: 

886 pass 

887 raise KeyError(hexsha) 

888 

889 def add_objects( 

890 self, 

891 objects: Sequence[tuple[ShaFile, Optional[str]]], 

892 progress: Optional[Callable[[str], None]] = None, 

893 ) -> None: 

894 """Add a set of objects to this object store. 

895 

896 Args: 

897 objects: Iterable over (object, path) tuples, should support 

898 __len__. 

899 Returns: Pack object of the objects written. 

900 """ 

901 count = len(objects) 

902 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

903 return self.add_pack_data(count, record_iter, progress=progress) 

904 

905 

906class DiskObjectStore(PackBasedObjectStore): 

907 """Git-style object store that exists on disk.""" 

908 

909 path: Union[str, os.PathLike] 

910 pack_dir: Union[str, os.PathLike] 

911 

912 def __init__( 

913 self, 

914 path: Union[str, os.PathLike], 

915 loose_compression_level=-1, 

916 pack_compression_level=-1, 

917 pack_index_version=None, 

918 ) -> None: 

919 """Open an object store. 

920 

921 Args: 

922 path: Path of the object store. 

923 loose_compression_level: zlib compression level for loose objects 

924 pack_compression_level: zlib compression level for pack objects 

925 pack_index_version: pack index version to use (1, 2, or 3) 

926 """ 

927 super().__init__( 

928 pack_compression_level=pack_compression_level, 

929 pack_index_version=pack_index_version, 

930 ) 

931 self.path = path 

932 self.pack_dir = os.path.join(self.path, PACKDIR) 

933 self._alternates = None 

934 self.loose_compression_level = loose_compression_level 

935 self.pack_compression_level = pack_compression_level 

936 self.pack_index_version = pack_index_version 

937 

938 # Commit graph support - lazy loaded 

939 self._commit_graph = None 

940 self._use_commit_graph = True # Default to true 

941 

942 def __repr__(self) -> str: 

943 return f"<{self.__class__.__name__}({self.path!r})>" 

944 

945 @classmethod 

946 def from_config(cls, path: Union[str, os.PathLike], config): 

947 try: 

948 default_compression_level = int( 

949 config.get((b"core",), b"compression").decode() 

950 ) 

951 except KeyError: 

952 default_compression_level = -1 

953 try: 

954 loose_compression_level = int( 

955 config.get((b"core",), b"looseCompression").decode() 

956 ) 

957 except KeyError: 

958 loose_compression_level = default_compression_level 

959 try: 

960 pack_compression_level = int( 

961 config.get((b"core",), "packCompression").decode() 

962 ) 

963 except KeyError: 

964 pack_compression_level = default_compression_level 

965 try: 

966 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

967 except KeyError: 

968 pack_index_version = None 

969 

970 # Read core.commitGraph setting 

971 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

972 

973 instance = cls( 

974 path, loose_compression_level, pack_compression_level, pack_index_version 

975 ) 

976 instance._use_commit_graph = use_commit_graph 

977 return instance 

978 

979 @property 

980 def alternates(self): 

981 if self._alternates is not None: 

982 return self._alternates 

983 self._alternates = [] 

984 for path in self._read_alternate_paths(): 

985 self._alternates.append(DiskObjectStore(path)) 

986 return self._alternates 

987 

988 def _read_alternate_paths(self): 

989 try: 

990 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

991 except FileNotFoundError: 

992 return 

993 with f: 

994 for line in f.readlines(): 

995 line = line.rstrip(b"\n") 

996 if line.startswith(b"#"): 

997 continue 

998 if os.path.isabs(line): 

999 yield os.fsdecode(line) 

1000 else: 

1001 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

1002 

1003 def add_alternate_path(self, path) -> None: 

1004 """Add an alternate path to this object store.""" 

1005 try: 

1006 os.mkdir(os.path.join(self.path, INFODIR)) 

1007 except FileExistsError: 

1008 pass 

1009 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1010 with GitFile(alternates_path, "wb") as f: 

1011 try: 

1012 orig_f = open(alternates_path, "rb") 

1013 except FileNotFoundError: 

1014 pass 

1015 else: 

1016 with orig_f: 

1017 f.write(orig_f.read()) 

1018 f.write(os.fsencode(path) + b"\n") 

1019 

1020 if not os.path.isabs(path): 

1021 path = os.path.join(self.path, path) 

1022 self.alternates.append(DiskObjectStore(path)) 

1023 

1024 def _update_pack_cache(self): 

1025 """Read and iterate over new pack files and cache them.""" 

1026 try: 

1027 pack_dir_contents = os.listdir(self.pack_dir) 

1028 except FileNotFoundError: 

1029 self.close() 

1030 return [] 

1031 pack_files = set() 

1032 for name in pack_dir_contents: 

1033 if name.startswith("pack-") and name.endswith(".pack"): 

1034 # verify that idx exists first (otherwise the pack was not yet 

1035 # fully written) 

1036 idx_name = os.path.splitext(name)[0] + ".idx" 

1037 if idx_name in pack_dir_contents: 

1038 pack_name = name[: -len(".pack")] 

1039 pack_files.add(pack_name) 

1040 

1041 # Open newly appeared pack files 

1042 new_packs = [] 

1043 for f in pack_files: 

1044 if f not in self._pack_cache: 

1045 pack = Pack(os.path.join(self.pack_dir, f)) 

1046 new_packs.append(pack) 

1047 self._pack_cache[f] = pack 

1048 # Remove disappeared pack files 

1049 for f in set(self._pack_cache) - pack_files: 

1050 self._pack_cache.pop(f).close() 

1051 return new_packs 

1052 

1053 def _get_shafile_path(self, sha): 

1054 # Check from object dir 

1055 return hex_to_filename(self.path, sha) 

1056 

1057 def _iter_loose_objects(self): 

1058 for base in os.listdir(self.path): 

1059 if len(base) != 2: 

1060 continue 

1061 for rest in os.listdir(os.path.join(self.path, base)): 

1062 sha = os.fsencode(base + rest) 

1063 if not valid_hexsha(sha): 

1064 continue 

1065 yield sha 

1066 

1067 def count_loose_objects(self) -> int: 

1068 """Count the number of loose objects in the object store. 

1069 

1070 Returns: 

1071 Number of loose objects 

1072 """ 

1073 count = 0 

1074 if not os.path.exists(self.path): 

1075 return 0 

1076 

1077 for i in range(256): 

1078 subdir = os.path.join(self.path, f"{i:02x}") 

1079 try: 

1080 count += len( 

1081 [ 

1082 name 

1083 for name in os.listdir(subdir) 

1084 if len(name) == 38 # 40 - 2 for the prefix 

1085 ] 

1086 ) 

1087 except FileNotFoundError: 

1088 # Directory may have been removed or is inaccessible 

1089 continue 

1090 

1091 return count 

1092 

1093 def _get_loose_object(self, sha): 

1094 path = self._get_shafile_path(sha) 

1095 try: 

1096 return ShaFile.from_path(path) 

1097 except FileNotFoundError: 

1098 return None 

1099 

1100 def delete_loose_object(self, sha) -> None: 

1101 os.remove(self._get_shafile_path(sha)) 

1102 

1103 def get_object_mtime(self, sha): 

1104 """Get the modification time of an object. 

1105 

1106 Args: 

1107 sha: SHA1 of the object 

1108 

1109 Returns: 

1110 Modification time as seconds since epoch 

1111 

1112 Raises: 

1113 KeyError: if the object is not found 

1114 """ 

1115 # First check if it's a loose object 

1116 if self.contains_loose(sha): 

1117 path = self._get_shafile_path(sha) 

1118 try: 

1119 return os.path.getmtime(path) 

1120 except FileNotFoundError: 

1121 pass 

1122 

1123 # Check if it's in a pack file 

1124 for pack in self.packs: 

1125 try: 

1126 if sha in pack: 

1127 # Use the pack file's mtime for packed objects 

1128 pack_path = pack._data_path 

1129 try: 

1130 return os.path.getmtime(pack_path) 

1131 except (FileNotFoundError, AttributeError): 

1132 pass 

1133 except PackFileDisappeared: 

1134 pass 

1135 

1136 raise KeyError(sha) 

1137 

1138 def _remove_pack(self, pack) -> None: 

1139 try: 

1140 del self._pack_cache[os.path.basename(pack._basename)] 

1141 except KeyError: 

1142 pass 

1143 pack.close() 

1144 os.remove(pack.data.path) 

1145 os.remove(pack.index.path) 

1146 

1147 def _get_pack_basepath(self, entries): 

1148 suffix = iter_sha1(entry[0] for entry in entries) 

1149 # TODO: Handle self.pack_dir being bytes 

1150 suffix = suffix.decode("ascii") 

1151 return os.path.join(self.pack_dir, "pack-" + suffix) 

1152 

1153 def _complete_pack(self, f, path, num_objects, indexer, progress=None): 

1154 """Move a specific file containing a pack into the pack directory. 

1155 

1156 Note: The file should be on the same file system as the 

1157 packs directory. 

1158 

1159 Args: 

1160 f: Open file object for the pack. 

1161 path: Path to the pack file. 

1162 indexer: A PackIndexer for indexing the pack. 

1163 """ 

1164 entries = [] 

1165 for i, entry in enumerate(indexer): 

1166 if progress is not None: 

1167 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1168 entries.append(entry) 

1169 

1170 pack_sha, extra_entries = extend_pack( 

1171 f, 

1172 indexer.ext_refs(), 

1173 get_raw=self.get_raw, 

1174 compression_level=self.pack_compression_level, 

1175 progress=progress, 

1176 ) 

1177 f.flush() 

1178 try: 

1179 fileno = f.fileno() 

1180 except AttributeError: 

1181 pass 

1182 else: 

1183 os.fsync(fileno) 

1184 f.close() 

1185 

1186 entries.extend(extra_entries) 

1187 

1188 # Move the pack in. 

1189 entries.sort() 

1190 pack_base_name = self._get_pack_basepath(entries) 

1191 

1192 for pack in self.packs: 

1193 if pack._basename == pack_base_name: 

1194 return pack 

1195 

1196 target_pack_path = pack_base_name + ".pack" 

1197 target_index_path = pack_base_name + ".idx" 

1198 if sys.platform == "win32": 

1199 # Windows might have the target pack file lingering. Attempt 

1200 # removal, silently passing if the target does not exist. 

1201 with suppress(FileNotFoundError): 

1202 os.remove(target_pack_path) 

1203 os.rename(path, target_pack_path) 

1204 

1205 # Write the index. 

1206 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file: 

1207 write_pack_index( 

1208 index_file, entries, pack_sha, version=self.pack_index_version 

1209 ) 

1210 

1211 # Add the pack to the store and return it. 

1212 final_pack = Pack(pack_base_name) 

1213 final_pack.check_length_and_checksum() 

1214 self._add_cached_pack(pack_base_name, final_pack) 

1215 return final_pack 

1216 

1217 def add_thin_pack(self, read_all, read_some, progress=None): 

1218 """Add a new thin pack to this object store. 

1219 

1220 Thin packs are packs that contain deltas with parents that exist 

1221 outside the pack. They should never be placed in the object store 

1222 directly, and always indexed and completed as they are copied. 

1223 

1224 Args: 

1225 read_all: Read function that blocks until the number of 

1226 requested bytes are read. 

1227 read_some: Read function that returns at least one byte, but may 

1228 not return the number of bytes requested. 

1229 Returns: A Pack object pointing at the now-completed thin pack in the 

1230 objects/pack directory. 

1231 """ 

1232 import tempfile 

1233 

1234 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1235 with os.fdopen(fd, "w+b") as f: 

1236 os.chmod(path, PACK_MODE) 

1237 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) 

1238 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) 

1239 copier.verify(progress=progress) 

1240 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

1241 

1242 def add_pack(self): 

1243 """Add a new pack to this object store. 

1244 

1245 Returns: Fileobject to write to, a commit function to 

1246 call when the pack is finished and an abort 

1247 function. 

1248 """ 

1249 import tempfile 

1250 

1251 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

1252 f = os.fdopen(fd, "w+b") 

1253 os.chmod(path, PACK_MODE) 

1254 

1255 def commit(): 

1256 if f.tell() > 0: 

1257 f.seek(0) 

1258 with PackData(path, f) as pd: 

1259 indexer = PackIndexer.for_pack_data( 

1260 pd, resolve_ext_ref=self.get_raw 

1261 ) 

1262 return self._complete_pack(f, path, len(pd), indexer) 

1263 else: 

1264 f.close() 

1265 os.remove(path) 

1266 return None 

1267 

1268 def abort() -> None: 

1269 f.close() 

1270 os.remove(path) 

1271 

1272 return f, commit, abort 

1273 

1274 def add_object(self, obj) -> None: 

1275 """Add a single object to this object store. 

1276 

1277 Args: 

1278 obj: Object to add 

1279 """ 

1280 path = self._get_shafile_path(obj.id) 

1281 dir = os.path.dirname(path) 

1282 try: 

1283 os.mkdir(dir) 

1284 except FileExistsError: 

1285 pass 

1286 if os.path.exists(path): 

1287 return # Already there, no need to write again 

1288 with GitFile(path, "wb", mask=PACK_MODE) as f: 

1289 f.write( 

1290 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1291 ) 

1292 

1293 @classmethod 

1294 def init(cls, path: Union[str, os.PathLike]): 

1295 try: 

1296 os.mkdir(path) 

1297 except FileExistsError: 

1298 pass 

1299 os.mkdir(os.path.join(path, "info")) 

1300 os.mkdir(os.path.join(path, PACKDIR)) 

1301 return cls(path) 

1302 

1303 def iter_prefix(self, prefix): 

1304 if len(prefix) < 2: 

1305 yield from super().iter_prefix(prefix) 

1306 return 

1307 seen = set() 

1308 dir = prefix[:2].decode() 

1309 rest = prefix[2:].decode() 

1310 try: 

1311 for name in os.listdir(os.path.join(self.path, dir)): 

1312 if name.startswith(rest): 

1313 sha = os.fsencode(dir + name) 

1314 if sha not in seen: 

1315 seen.add(sha) 

1316 yield sha 

1317 except FileNotFoundError: 

1318 pass 

1319 

1320 for p in self.packs: 

1321 bin_prefix = ( 

1322 binascii.unhexlify(prefix) 

1323 if len(prefix) % 2 == 0 

1324 else binascii.unhexlify(prefix[:-1]) 

1325 ) 

1326 for sha in p.index.iter_prefix(bin_prefix): 

1327 sha = sha_to_hex(sha) 

1328 if sha.startswith(prefix) and sha not in seen: 

1329 seen.add(sha) 

1330 yield sha 

1331 for alternate in self.alternates: 

1332 for sha in alternate.iter_prefix(prefix): 

1333 if sha not in seen: 

1334 seen.add(sha) 

1335 yield sha 

1336 

1337 def get_commit_graph(self): 

1338 """Get the commit graph for this object store. 

1339 

1340 Returns: 

1341 CommitGraph object if available, None otherwise 

1342 """ 

1343 if not self._use_commit_graph: 

1344 return None 

1345 

1346 if self._commit_graph is None: 

1347 from .commit_graph import read_commit_graph 

1348 

1349 # Look for commit graph in our objects directory 

1350 graph_file = os.path.join(self.path, "info", "commit-graph") 

1351 if os.path.exists(graph_file): 

1352 self._commit_graph = read_commit_graph(graph_file) 

1353 return self._commit_graph 

1354 

1355 def write_commit_graph(self, refs=None, reachable=True) -> None: 

1356 """Write a commit graph file for this object store. 

1357 

1358 Args: 

1359 refs: List of refs to include. If None, includes all refs from object store. 

1360 reachable: If True, includes all commits reachable from refs. 

1361 If False, only includes the direct ref targets. 

1362 """ 

1363 from .commit_graph import get_reachable_commits 

1364 

1365 if refs is None: 

1366 # Get all commit objects from the object store 

1367 all_refs = [] 

1368 # Iterate through all objects to find commits 

1369 for sha in self: 

1370 try: 

1371 obj = self[sha] 

1372 if obj.type_name == b"commit": 

1373 all_refs.append(sha) 

1374 except KeyError: 

1375 continue 

1376 else: 

1377 # Use provided refs 

1378 all_refs = refs 

1379 

1380 if not all_refs: 

1381 return # No commits to include 

1382 

1383 if reachable: 

1384 # Get all reachable commits 

1385 commit_ids = get_reachable_commits(self, all_refs) 

1386 else: 

1387 # Just use the direct ref targets - ensure they're hex ObjectIDs 

1388 commit_ids = [] 

1389 for ref in all_refs: 

1390 if isinstance(ref, bytes) and len(ref) == 40: 

1391 # Already hex ObjectID 

1392 commit_ids.append(ref) 

1393 elif isinstance(ref, bytes) and len(ref) == 20: 

1394 # Binary SHA, convert to hex ObjectID 

1395 from .objects import sha_to_hex 

1396 

1397 commit_ids.append(sha_to_hex(ref)) 

1398 else: 

1399 # Assume it's already correct format 

1400 commit_ids.append(ref) 

1401 

1402 if commit_ids: 

1403 # Write commit graph directly to our object store path 

1404 # Generate the commit graph 

1405 from .commit_graph import generate_commit_graph 

1406 

1407 graph = generate_commit_graph(self, commit_ids) 

1408 

1409 if graph.entries: 

1410 # Ensure the info directory exists 

1411 info_dir = os.path.join(self.path, "info") 

1412 os.makedirs(info_dir, exist_ok=True) 

1413 

1414 # Write using GitFile for atomic operation 

1415 graph_path = os.path.join(info_dir, "commit-graph") 

1416 with GitFile(graph_path, "wb") as f: 

1417 graph.write_to_file(f) 

1418 

1419 # Clear cached commit graph so it gets reloaded 

1420 self._commit_graph = None 

1421 

1422 def prune(self, grace_period: Optional[int] = None) -> None: 

1423 """Prune/clean up this object store. 

1424 

1425 This removes temporary files that were left behind by interrupted 

1426 pack operations. These are files that start with ``tmp_pack_`` in the 

1427 repository directory or files with .pack extension but no corresponding 

1428 .idx file in the pack directory. 

1429 

1430 Args: 

1431 grace_period: Grace period in seconds for removing temporary files. 

1432 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

1433 """ 

1434 import glob 

1435 

1436 if grace_period is None: 

1437 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

1438 

1439 # Clean up tmp_pack_* files in the repository directory 

1440 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

1441 # Check if file is old enough (more than grace period) 

1442 mtime = os.path.getmtime(tmp_file) 

1443 if time.time() - mtime > grace_period: 

1444 os.remove(tmp_file) 

1445 

1446 # Clean up orphaned .pack files without corresponding .idx files 

1447 try: 

1448 pack_dir_contents = os.listdir(self.pack_dir) 

1449 except FileNotFoundError: 

1450 return 

1451 

1452 pack_files = {} 

1453 idx_files = set() 

1454 

1455 for name in pack_dir_contents: 

1456 if name.endswith(".pack"): 

1457 base_name = name[:-5] # Remove .pack extension 

1458 pack_files[base_name] = name 

1459 elif name.endswith(".idx"): 

1460 base_name = name[:-4] # Remove .idx extension 

1461 idx_files.add(base_name) 

1462 

1463 # Remove .pack files without corresponding .idx files 

1464 for base_name, pack_name in pack_files.items(): 

1465 if base_name not in idx_files: 

1466 pack_path = os.path.join(self.pack_dir, pack_name) 

1467 # Check if file is old enough (more than grace period) 

1468 mtime = os.path.getmtime(pack_path) 

1469 if time.time() - mtime > grace_period: 

1470 os.remove(pack_path) 

1471 

1472 

1473class MemoryObjectStore(BaseObjectStore): 

1474 """Object store that keeps all objects in memory.""" 

1475 

1476 def __init__(self) -> None: 

1477 super().__init__() 

1478 self._data: dict[str, ShaFile] = {} 

1479 self.pack_compression_level = -1 

1480 

1481 def _to_hexsha(self, sha): 

1482 if len(sha) == 40: 

1483 return sha 

1484 elif len(sha) == 20: 

1485 return sha_to_hex(sha) 

1486 else: 

1487 raise ValueError(f"Invalid sha {sha!r}") 

1488 

1489 def contains_loose(self, sha): 

1490 """Check if a particular object is present by SHA1 and is loose.""" 

1491 return self._to_hexsha(sha) in self._data 

1492 

1493 def contains_packed(self, sha) -> bool: 

1494 """Check if a particular object is present by SHA1 and is packed.""" 

1495 return False 

1496 

1497 def __iter__(self): 

1498 """Iterate over the SHAs that are present in this store.""" 

1499 return iter(self._data.keys()) 

1500 

1501 @property 

1502 def packs(self): 

1503 """List with pack objects.""" 

1504 return [] 

1505 

1506 def get_raw(self, name: ObjectID): 

1507 """Obtain the raw text for an object. 

1508 

1509 Args: 

1510 name: sha for the object. 

1511 Returns: tuple with numeric type and object contents. 

1512 """ 

1513 obj = self[self._to_hexsha(name)] 

1514 return obj.type_num, obj.as_raw_string() 

1515 

1516 def __getitem__(self, name: ObjectID): 

1517 return self._data[self._to_hexsha(name)].copy() 

1518 

1519 def __delitem__(self, name: ObjectID) -> None: 

1520 """Delete an object from this store, for testing only.""" 

1521 del self._data[self._to_hexsha(name)] 

1522 

1523 def add_object(self, obj) -> None: 

1524 """Add a single object to this object store.""" 

1525 self._data[obj.id] = obj.copy() 

1526 

1527 def add_objects(self, objects, progress=None) -> None: 

1528 """Add a set of objects to this object store. 

1529 

1530 Args: 

1531 objects: Iterable over a list of (object, path) tuples 

1532 """ 

1533 for obj, path in objects: 

1534 self.add_object(obj) 

1535 

1536 def add_pack(self): 

1537 """Add a new pack to this object store. 

1538 

1539 Because this object store doesn't support packs, we extract and add the 

1540 individual objects. 

1541 

1542 Returns: Fileobject to write to and a commit function to 

1543 call when the pack is finished. 

1544 """ 

1545 from tempfile import SpooledTemporaryFile 

1546 

1547 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

1548 

1549 def commit() -> None: 

1550 size = f.tell() 

1551 if size > 0: 

1552 f.seek(0) 

1553 p = PackData.from_file(f, size) 

1554 for obj in PackInflater.for_pack_data(p, self.get_raw): 

1555 self.add_object(obj) 

1556 p.close() 

1557 f.close() 

1558 else: 

1559 f.close() 

1560 

1561 def abort() -> None: 

1562 f.close() 

1563 

1564 return f, commit, abort 

1565 

1566 def add_pack_data( 

1567 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

1568 ) -> None: 

1569 """Add pack data to this object store. 

1570 

1571 Args: 

1572 count: Number of items to add 

1573 """ 

1574 if count == 0: 

1575 return 

1576 

1577 # Since MemoryObjectStore doesn't support pack files, we need to 

1578 # extract individual objects. To handle deltas properly, we write 

1579 # to a temporary pack and then use PackInflater to resolve them. 

1580 f, commit, abort = self.add_pack() 

1581 try: 

1582 write_pack_data( 

1583 f.write, 

1584 unpacked_objects, 

1585 num_records=count, 

1586 progress=progress, 

1587 ) 

1588 except BaseException: 

1589 abort() 

1590 raise 

1591 else: 

1592 commit() 

1593 

1594 def add_thin_pack(self, read_all, read_some, progress=None) -> None: 

1595 """Add a new thin pack to this object store. 

1596 

1597 Thin packs are packs that contain deltas with parents that exist 

1598 outside the pack. Because this object store doesn't support packs, we 

1599 extract and add the individual objects. 

1600 

1601 Args: 

1602 read_all: Read function that blocks until the number of 

1603 requested bytes are read. 

1604 read_some: Read function that returns at least one byte, but may 

1605 not return the number of bytes requested. 

1606 """ 

1607 f, commit, abort = self.add_pack() 

1608 try: 

1609 copier = PackStreamCopier(read_all, read_some, f) 

1610 copier.verify() 

1611 except BaseException: 

1612 abort() 

1613 raise 

1614 else: 

1615 commit() 

1616 

1617 

1618class ObjectIterator(Protocol): 

1619 """Interface for iterating over objects.""" 

1620 

1621 def iterobjects(self) -> Iterator[ShaFile]: 

1622 raise NotImplementedError(self.iterobjects) 

1623 

1624 

1625def tree_lookup_path(lookup_obj, root_sha, path): 

1626 """Look up an object in a Git tree. 

1627 

1628 Args: 

1629 lookup_obj: Callback for retrieving object by SHA1 

1630 root_sha: SHA1 of the root tree 

1631 path: Path to lookup 

1632 Returns: A tuple of (mode, SHA) of the resulting path. 

1633 """ 

1634 tree = lookup_obj(root_sha) 

1635 if not isinstance(tree, Tree): 

1636 raise NotTreeError(root_sha) 

1637 return tree.lookup_path(lookup_obj, path) 

1638 

1639 

1640def _collect_filetree_revs( 

1641 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

1642) -> None: 

1643 """Collect SHA1s of files and directories for specified tree. 

1644 

1645 Args: 

1646 obj_store: Object store to get objects by SHA from 

1647 tree_sha: tree reference to walk 

1648 kset: set to fill with references to files and directories 

1649 """ 

1650 filetree = obj_store[tree_sha] 

1651 assert isinstance(filetree, Tree) 

1652 for name, mode, sha in filetree.iteritems(): 

1653 if not S_ISGITLINK(mode) and sha not in kset: 

1654 kset.add(sha) 

1655 if stat.S_ISDIR(mode): 

1656 _collect_filetree_revs(obj_store, sha, kset) 

1657 

1658 

1659def _split_commits_and_tags( 

1660 obj_store: ObjectContainer, lst, *, ignore_unknown=False 

1661) -> tuple[set[bytes], set[bytes], set[bytes]]: 

1662 """Split object id list into three lists with commit, tag, and other SHAs. 

1663 

1664 Commits referenced by tags are included into commits 

1665 list as well. Only SHA1s known in this repository will get 

1666 through, and unless ignore_unknown argument is True, KeyError 

1667 is thrown for SHA1 missing in the repository 

1668 

1669 Args: 

1670 obj_store: Object store to get objects by SHA1 from 

1671 lst: Collection of commit and tag SHAs 

1672 ignore_unknown: True to skip SHA1 missing in the repository 

1673 silently. 

1674 Returns: A tuple of (commits, tags, others) SHA1s 

1675 """ 

1676 commits: set[bytes] = set() 

1677 tags: set[bytes] = set() 

1678 others: set[bytes] = set() 

1679 for e in lst: 

1680 try: 

1681 o = obj_store[e] 

1682 except KeyError: 

1683 if not ignore_unknown: 

1684 raise 

1685 else: 

1686 if isinstance(o, Commit): 

1687 commits.add(e) 

1688 elif isinstance(o, Tag): 

1689 tags.add(e) 

1690 tagged = o.object[1] 

1691 c, t, os = _split_commits_and_tags( 

1692 obj_store, [tagged], ignore_unknown=ignore_unknown 

1693 ) 

1694 commits |= c 

1695 tags |= t 

1696 others |= os 

1697 else: 

1698 others.add(e) 

1699 return (commits, tags, others) 

1700 

1701 

1702class MissingObjectFinder: 

1703 """Find the objects missing from another object store. 

1704 

1705 Args: 

1706 object_store: Object store containing at least all objects to be 

1707 sent 

1708 haves: SHA1s of commits not to send (already present in target) 

1709 wants: SHA1s of commits to send 

1710 progress: Optional function to report progress to. 

1711 get_tagged: Function that returns a dict of pointed-to sha -> tag 

1712 sha for including tags. 

1713 get_parents: Optional function for getting the parents of a commit. 

1714 """ 

1715 

1716 def __init__( 

1717 self, 

1718 object_store, 

1719 haves, 

1720 wants, 

1721 *, 

1722 shallow=None, 

1723 progress=None, 

1724 get_tagged=None, 

1725 get_parents=lambda commit: commit.parents, 

1726 ) -> None: 

1727 self.object_store = object_store 

1728 if shallow is None: 

1729 shallow = set() 

1730 self._get_parents = get_parents 

1731 # process Commits and Tags differently 

1732 # Note, while haves may list commits/tags not available locally, 

1733 # and such SHAs would get filtered out by _split_commits_and_tags, 

1734 # wants shall list only known SHAs, and otherwise 

1735 # _split_commits_and_tags fails with KeyError 

1736 have_commits, have_tags, have_others = _split_commits_and_tags( 

1737 object_store, haves, ignore_unknown=True 

1738 ) 

1739 want_commits, want_tags, want_others = _split_commits_and_tags( 

1740 object_store, wants, ignore_unknown=False 

1741 ) 

1742 # all_ancestors is a set of commits that shall not be sent 

1743 # (complete repository up to 'haves') 

1744 all_ancestors = _collect_ancestors( 

1745 object_store, have_commits, shallow=shallow, get_parents=self._get_parents 

1746 )[0] 

1747 # all_missing - complete set of commits between haves and wants 

1748 # common - commits from all_ancestors we hit into while 

1749 # traversing parent hierarchy of wants 

1750 missing_commits, common_commits = _collect_ancestors( 

1751 object_store, 

1752 want_commits, 

1753 all_ancestors, 

1754 shallow=shallow, 

1755 get_parents=self._get_parents, 

1756 ) 

1757 self.remote_has: set[bytes] = set() 

1758 # Now, fill sha_done with commits and revisions of 

1759 # files and directories known to be both locally 

1760 # and on target. Thus these commits and files 

1761 # won't get selected for fetch 

1762 for h in common_commits: 

1763 self.remote_has.add(h) 

1764 cmt = object_store[h] 

1765 _collect_filetree_revs(object_store, cmt.tree, self.remote_has) 

1766 # record tags we have as visited, too 

1767 for t in have_tags: 

1768 self.remote_has.add(t) 

1769 self.sha_done = set(self.remote_has) 

1770 

1771 # in fact, what we 'want' is commits, tags, and others 

1772 # we've found missing 

1773 self.objects_to_send: set[ 

1774 tuple[ObjectID, Optional[bytes], Optional[int], bool] 

1775 ] = {(w, None, Commit.type_num, False) for w in missing_commits} 

1776 missing_tags = want_tags.difference(have_tags) 

1777 self.objects_to_send.update( 

1778 {(w, None, Tag.type_num, False) for w in missing_tags} 

1779 ) 

1780 missing_others = want_others.difference(have_others) 

1781 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

1782 

1783 if progress is None: 

1784 self.progress = lambda x: None 

1785 else: 

1786 self.progress = progress 

1787 self._tagged = (get_tagged and get_tagged()) or {} 

1788 

1789 def get_remote_has(self): 

1790 return self.remote_has 

1791 

1792 def add_todo( 

1793 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]] 

1794 ) -> None: 

1795 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

1796 

1797 def __next__(self) -> tuple[bytes, Optional[PackHint]]: 

1798 while True: 

1799 if not self.objects_to_send: 

1800 self.progress( 

1801 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

1802 ) 

1803 raise StopIteration 

1804 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

1805 if sha not in self.sha_done: 

1806 break 

1807 if not leaf: 

1808 o = self.object_store[sha] 

1809 if isinstance(o, Commit): 

1810 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

1811 elif isinstance(o, Tree): 

1812 self.add_todo( 

1813 [ 

1814 ( 

1815 s, 

1816 n, 

1817 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

1818 not stat.S_ISDIR(m), 

1819 ) 

1820 for n, m, s in o.iteritems() 

1821 if not S_ISGITLINK(m) 

1822 ] 

1823 ) 

1824 elif isinstance(o, Tag): 

1825 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

1826 if sha in self._tagged: 

1827 self.add_todo([(self._tagged[sha], None, None, True)]) 

1828 self.sha_done.add(sha) 

1829 if len(self.sha_done) % 1000 == 0: 

1830 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

1831 if type_num is None: 

1832 pack_hint = None 

1833 else: 

1834 pack_hint = (type_num, name) 

1835 return (sha, pack_hint) 

1836 

1837 def __iter__(self): 

1838 return self 

1839 

1840 

1841class ObjectStoreGraphWalker: 

1842 """Graph walker that finds what commits are missing from an object store.""" 

1843 

1844 heads: set[ObjectID] 

1845 """Revisions without descendants in the local repo.""" 

1846 

1847 get_parents: Callable[[ObjectID], ObjectID] 

1848 """Function to retrieve parents in the local repo.""" 

1849 

1850 shallow: set[ObjectID] 

1851 

1852 def __init__( 

1853 self, 

1854 local_heads: Iterable[ObjectID], 

1855 get_parents, 

1856 shallow: Optional[set[ObjectID]] = None, 

1857 update_shallow=None, 

1858 ) -> None: 

1859 """Create a new instance. 

1860 

1861 Args: 

1862 local_heads: Heads to start search with 

1863 get_parents: Function for finding the parents of a SHA1. 

1864 """ 

1865 self.heads = set(local_heads) 

1866 self.get_parents = get_parents 

1867 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {} 

1868 if shallow is None: 

1869 shallow = set() 

1870 self.shallow = shallow 

1871 self.update_shallow = update_shallow 

1872 

1873 def nak(self) -> None: 

1874 """Nothing in common was found.""" 

1875 

1876 def ack(self, sha: ObjectID) -> None: 

1877 """Ack that a revision and its ancestors are present in the source.""" 

1878 if len(sha) != 40: 

1879 raise ValueError(f"unexpected sha {sha!r} received") 

1880 ancestors = {sha} 

1881 

1882 # stop if we run out of heads to remove 

1883 while self.heads: 

1884 for a in ancestors: 

1885 if a in self.heads: 

1886 self.heads.remove(a) 

1887 

1888 # collect all ancestors 

1889 new_ancestors = set() 

1890 for a in ancestors: 

1891 ps = self.parents.get(a) 

1892 if ps is not None: 

1893 new_ancestors.update(ps) 

1894 self.parents[a] = None 

1895 

1896 # no more ancestors; stop 

1897 if not new_ancestors: 

1898 break 

1899 

1900 ancestors = new_ancestors 

1901 

1902 def next(self): 

1903 """Iterate over ancestors of heads in the target.""" 

1904 if self.heads: 

1905 ret = self.heads.pop() 

1906 try: 

1907 ps = self.get_parents(ret) 

1908 except KeyError: 

1909 return None 

1910 self.parents[ret] = ps 

1911 self.heads.update([p for p in ps if p not in self.parents]) 

1912 return ret 

1913 return None 

1914 

1915 __next__ = next 

1916 

1917 

1918def commit_tree_changes(object_store, tree, changes): 

1919 """Commit a specified set of changes to a tree structure. 

1920 

1921 This will apply a set of changes on top of an existing tree, storing new 

1922 objects in object_store. 

1923 

1924 changes are a list of tuples with (path, mode, object_sha). 

1925 Paths can be both blobs and trees. See the mode and 

1926 object sha to None deletes the path. 

1927 

1928 This method works especially well if there are only a small 

1929 number of changes to a big tree. For a large number of changes 

1930 to a large tree, use e.g. commit_tree. 

1931 

1932 Args: 

1933 object_store: Object store to store new objects in 

1934 and retrieve old ones from. 

1935 tree: Original tree root 

1936 changes: changes to apply 

1937 Returns: New tree root object 

1938 """ 

1939 # TODO(jelmer): Save up the objects and add them using .add_objects 

1940 # rather than with individual calls to .add_object. 

1941 nested_changes = {} 

1942 for path, new_mode, new_sha in changes: 

1943 try: 

1944 (dirname, subpath) = path.split(b"/", 1) 

1945 except ValueError: 

1946 if new_sha is None: 

1947 del tree[path] 

1948 else: 

1949 tree[path] = (new_mode, new_sha) 

1950 else: 

1951 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

1952 for name, subchanges in nested_changes.items(): 

1953 try: 

1954 orig_subtree = object_store[tree[name][1]] 

1955 except KeyError: 

1956 orig_subtree = Tree() 

1957 subtree = commit_tree_changes(object_store, orig_subtree, subchanges) 

1958 if len(subtree) == 0: 

1959 del tree[name] 

1960 else: 

1961 tree[name] = (stat.S_IFDIR, subtree.id) 

1962 object_store.add_object(tree) 

1963 return tree 

1964 

1965 

1966class OverlayObjectStore(BaseObjectStore): 

1967 """Object store that can overlay multiple object stores.""" 

1968 

1969 def __init__(self, bases, add_store=None) -> None: 

1970 self.bases = bases 

1971 self.add_store = add_store 

1972 

1973 def add_object(self, object): 

1974 if self.add_store is None: 

1975 raise NotImplementedError(self.add_object) 

1976 return self.add_store.add_object(object) 

1977 

1978 def add_objects(self, objects, progress=None): 

1979 if self.add_store is None: 

1980 raise NotImplementedError(self.add_object) 

1981 return self.add_store.add_objects(objects, progress) 

1982 

1983 @property 

1984 def packs(self): 

1985 ret = [] 

1986 for b in self.bases: 

1987 ret.extend(b.packs) 

1988 return ret 

1989 

1990 def __iter__(self): 

1991 done = set() 

1992 for b in self.bases: 

1993 for o_id in b: 

1994 if o_id not in done: 

1995 yield o_id 

1996 done.add(o_id) 

1997 

1998 def iterobjects_subset( 

1999 self, shas: Iterable[bytes], *, allow_missing: bool = False 

2000 ) -> Iterator[ShaFile]: 

2001 todo = set(shas) 

2002 found: set[bytes] = set() 

2003 

2004 for b in self.bases: 

2005 # Create a copy of todo for each base to avoid modifying 

2006 # the set while iterating through it 

2007 current_todo = todo - found 

2008 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

2009 yield o 

2010 found.add(o.id) 

2011 

2012 # Check for any remaining objects not found 

2013 missing = todo - found 

2014 if missing and not allow_missing: 

2015 raise KeyError(next(iter(missing))) 

2016 

2017 def iter_unpacked_subset( 

2018 self, 

2019 shas: Iterable[bytes], 

2020 *, 

2021 include_comp=False, 

2022 allow_missing: bool = False, 

2023 convert_ofs_delta=True, 

2024 ) -> Iterator[ShaFile]: 

2025 todo = set(shas) 

2026 for b in self.bases: 

2027 for o in b.iter_unpacked_subset( 

2028 todo, 

2029 include_comp=include_comp, 

2030 allow_missing=True, 

2031 convert_ofs_delta=convert_ofs_delta, 

2032 ): 

2033 yield o 

2034 todo.remove(o.id) 

2035 if todo and not allow_missing: 

2036 raise KeyError(o.id) 

2037 

2038 def get_raw(self, sha_id): 

2039 for b in self.bases: 

2040 try: 

2041 return b.get_raw(sha_id) 

2042 except KeyError: 

2043 pass 

2044 raise KeyError(sha_id) 

2045 

2046 def contains_packed(self, sha) -> bool: 

2047 for b in self.bases: 

2048 if b.contains_packed(sha): 

2049 return True 

2050 return False 

2051 

2052 def contains_loose(self, sha) -> bool: 

2053 for b in self.bases: 

2054 if b.contains_loose(sha): 

2055 return True 

2056 return False 

2057 

2058 

2059def read_packs_file(f): 

2060 """Yield the packs listed in a packs file.""" 

2061 for line in f.read().splitlines(): 

2062 if not line: 

2063 continue 

2064 (kind, name) = line.split(b" ", 1) 

2065 if kind != b"P": 

2066 continue 

2067 yield os.fsdecode(name) 

2068 

2069 

2070class BucketBasedObjectStore(PackBasedObjectStore): 

2071 """Object store implementation that uses a bucket store like S3 as backend.""" 

2072 

2073 def _iter_loose_objects(self): 

2074 """Iterate over the SHAs of all loose objects.""" 

2075 return iter([]) 

2076 

2077 def _get_loose_object(self, sha) -> None: 

2078 return None 

2079 

2080 def delete_loose_object(self, sha) -> None: 

2081 # Doesn't exist.. 

2082 pass 

2083 

2084 def _remove_pack(self, name) -> None: 

2085 raise NotImplementedError(self._remove_pack) 

2086 

2087 def _iter_pack_names(self) -> Iterator[str]: 

2088 raise NotImplementedError(self._iter_pack_names) 

2089 

2090 def _get_pack(self, name) -> Pack: 

2091 raise NotImplementedError(self._get_pack) 

2092 

2093 def _update_pack_cache(self): 

2094 pack_files = set(self._iter_pack_names()) 

2095 

2096 # Open newly appeared pack files 

2097 new_packs = [] 

2098 for f in pack_files: 

2099 if f not in self._pack_cache: 

2100 pack = self._get_pack(f) 

2101 new_packs.append(pack) 

2102 self._pack_cache[f] = pack 

2103 # Remove disappeared pack files 

2104 for f in set(self._pack_cache) - pack_files: 

2105 self._pack_cache.pop(f).close() 

2106 return new_packs 

2107 

2108 def _upload_pack(self, basename, pack_file, index_file) -> None: 

2109 raise NotImplementedError 

2110 

2111 def add_pack(self): 

2112 """Add a new pack to this object store. 

2113 

2114 Returns: Fileobject to write to, a commit function to 

2115 call when the pack is finished and an abort 

2116 function. 

2117 """ 

2118 import tempfile 

2119 

2120 pf = tempfile.SpooledTemporaryFile( 

2121 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2122 ) 

2123 

2124 def commit(): 

2125 if pf.tell() == 0: 

2126 pf.close() 

2127 return None 

2128 

2129 pf.seek(0) 

2130 p = PackData(pf.name, pf) 

2131 entries = p.sorted_entries() 

2132 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

2133 idxf = tempfile.SpooledTemporaryFile( 

2134 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2135 ) 

2136 checksum = p.get_stored_checksum() 

2137 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

2138 idxf.seek(0) 

2139 idx = load_pack_index_file(basename + ".idx", idxf) 

2140 for pack in self.packs: 

2141 if pack.get_stored_checksum() == p.get_stored_checksum(): 

2142 p.close() 

2143 idx.close() 

2144 pf.close() 

2145 idxf.close() 

2146 return pack 

2147 pf.seek(0) 

2148 idxf.seek(0) 

2149 self._upload_pack(basename, pf, idxf) 

2150 final_pack = Pack.from_objects(p, idx) 

2151 self._add_cached_pack(basename, final_pack) 

2152 pf.close() 

2153 idxf.close() 

2154 return final_pack 

2155 

2156 return pf, commit, pf.close 

2157 

2158 

2159def _collect_ancestors( 

2160 store: ObjectContainer, 

2161 heads, 

2162 common: frozenset[ObjectID] = frozenset(), 

2163 shallow: frozenset[ObjectID] = frozenset(), 

2164 get_parents=lambda commit: commit.parents, 

2165): 

2166 """Collect all ancestors of heads up to (excluding) those in common. 

2167 

2168 Args: 

2169 heads: commits to start from 

2170 common: commits to end at, or empty set to walk repository 

2171 completely 

2172 get_parents: Optional function for getting the parents of a 

2173 commit. 

2174 Returns: a tuple (A, B) where A - all commits reachable 

2175 from heads but not present in common, B - common (shared) elements 

2176 that are directly reachable from heads 

2177 """ 

2178 bases = set() 

2179 commits = set() 

2180 queue = [] 

2181 queue.extend(heads) 

2182 

2183 # Try to use commit graph if available 

2184 commit_graph = store.get_commit_graph() 

2185 

2186 while queue: 

2187 e = queue.pop(0) 

2188 if e in common: 

2189 bases.add(e) 

2190 elif e not in commits: 

2191 commits.add(e) 

2192 if e in shallow: 

2193 continue 

2194 

2195 # Try to use commit graph for parent lookup 

2196 parents = None 

2197 if commit_graph: 

2198 parents = commit_graph.get_parents(e) 

2199 

2200 if parents is None: 

2201 # Fall back to loading the object 

2202 cmt = store[e] 

2203 parents = get_parents(cmt) 

2204 

2205 queue.extend(parents) 

2206 return (commits, bases) 

2207 

2208 

2209def iter_tree_contents( 

2210 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False 

2211): 

2212 """Iterate the contents of a tree and all subtrees. 

2213 

2214 Iteration is depth-first pre-order, as in e.g. os.walk. 

2215 

2216 Args: 

2217 tree_id: SHA1 of the tree. 

2218 include_trees: If True, include tree objects in the iteration. 

2219 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

2220 tree. 

2221 """ 

2222 if tree_id is None: 

2223 return 

2224 # This could be fairly easily generalized to >2 trees if we find a use 

2225 # case. 

2226 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

2227 while todo: 

2228 entry = todo.pop() 

2229 if stat.S_ISDIR(entry.mode): 

2230 extra = [] 

2231 tree = store[entry.sha] 

2232 assert isinstance(tree, Tree) 

2233 for subentry in tree.iteritems(name_order=True): 

2234 extra.append(subentry.in_path(entry.path)) 

2235 todo.extend(reversed(extra)) 

2236 if not stat.S_ISDIR(entry.mode) or include_trees: 

2237 yield entry 

2238 

2239 

2240def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]: 

2241 """Peel all tags from a SHA. 

2242 

2243 Args: 

2244 sha: The object SHA to peel. 

2245 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

2246 intermediate tags; if the original ref does not point to a tag, 

2247 this will equal the original SHA1. 

2248 """ 

2249 unpeeled = obj = store[sha] 

2250 obj_class = object_class(obj.type_name) 

2251 while obj_class is Tag: 

2252 assert isinstance(obj, Tag) 

2253 obj_class, sha = obj.object 

2254 obj = store[sha] 

2255 return unpeeled, obj