Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1107 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as public by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23 

24"""Git object store interfaces and implementation.""" 

25 

26import binascii 

27import os 

28import stat 

29import sys 

30import time 

31import warnings 

32from collections.abc import Iterable, Iterator, Sequence 

33from contextlib import suppress 

34from io import BytesIO 

35from typing import ( 

36 Callable, 

37 Optional, 

38 Protocol, 

39 Union, 

40 cast, 

41) 

42 

43from .errors import NotTreeError 

44from .file import GitFile 

45from .objects import ( 

46 S_ISGITLINK, 

47 ZERO_SHA, 

48 Blob, 

49 Commit, 

50 ObjectID, 

51 ShaFile, 

52 Tag, 

53 Tree, 

54 TreeEntry, 

55 hex_to_filename, 

56 hex_to_sha, 

57 object_class, 

58 sha_to_hex, 

59 valid_hexsha, 

60) 

61from .pack import ( 

62 PACK_SPOOL_FILE_MAX_SIZE, 

63 ObjectContainer, 

64 Pack, 

65 PackData, 

66 PackedObjectContainer, 

67 PackFileDisappeared, 

68 PackHint, 

69 PackIndexer, 

70 PackInflater, 

71 PackStreamCopier, 

72 UnpackedObject, 

73 extend_pack, 

74 full_unpacked_object, 

75 generate_unpacked_objects, 

76 iter_sha1, 

77 load_pack_index_file, 

78 pack_objects_to_data, 

79 write_pack_data, 

80 write_pack_index, 

81) 

82from .protocol import DEPTH_INFINITE 

83from .refs import PEELED_TAG_SUFFIX, Ref 

84 

85INFODIR = "info" 

86PACKDIR = "pack" 

87 

88# use permissions consistent with Git; just readable by everyone 

89# TODO: should packs also be non-writable on Windows? if so, that 

90# would requite some rather significant adjustments to the test suite 

91PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

92 

93# Grace period for cleaning up temporary pack files (in seconds) 

94# Matches git's default of 2 weeks 

95DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks 

96 

97 

98def find_shallow(store, heads, depth): 

99 """Find shallow commits according to a given depth. 

100 

101 Args: 

102 store: An ObjectStore for looking up objects. 

103 heads: Iterable of head SHAs to start walking from. 

104 depth: The depth of ancestors to include. A depth of one includes 

105 only the heads themselves. 

106 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be 

107 considered shallow and unshallow according to the arguments. Note that 

108 these sets may overlap if a commit is reachable along multiple paths. 

109 """ 

110 parents = {} 

111 commit_graph = store.get_commit_graph() 

112 

113 def get_parents(sha): 

114 result = parents.get(sha, None) 

115 if not result: 

116 # Try to use commit graph first if available 

117 if commit_graph: 

118 graph_parents = commit_graph.get_parents(sha) 

119 if graph_parents is not None: 

120 result = graph_parents 

121 parents[sha] = result 

122 return result 

123 # Fall back to loading the object 

124 result = store[sha].parents 

125 parents[sha] = result 

126 return result 

127 

128 todo = [] # stack of (sha, depth) 

129 for head_sha in heads: 

130 obj = store[head_sha] 

131 # Peel tags if necessary 

132 while isinstance(obj, Tag): 

133 _, sha = obj.object 

134 obj = store[sha] 

135 if isinstance(obj, Commit): 

136 todo.append((obj.id, 1)) 

137 

138 not_shallow = set() 

139 shallow = set() 

140 while todo: 

141 sha, cur_depth = todo.pop() 

142 if cur_depth < depth: 

143 not_shallow.add(sha) 

144 new_depth = cur_depth + 1 

145 todo.extend((p, new_depth) for p in get_parents(sha)) 

146 else: 

147 shallow.add(sha) 

148 

149 return shallow, not_shallow 

150 

151 

152def get_depth( 

153 store, 

154 head, 

155 get_parents=lambda commit: commit.parents, 

156 max_depth=None, 

157): 

158 """Return the current available depth for the given head. 

159 For commits with multiple parents, the largest possible depth will be 

160 returned. 

161 

162 Args: 

163 head: commit to start from 

164 get_parents: optional function for getting the parents of a commit 

165 max_depth: maximum depth to search 

166 """ 

167 if head not in store: 

168 return 0 

169 current_depth = 1 

170 queue = [(head, current_depth)] 

171 commit_graph = store.get_commit_graph() 

172 

173 while queue and (max_depth is None or current_depth < max_depth): 

174 e, depth = queue.pop(0) 

175 current_depth = max(current_depth, depth) 

176 

177 # Try to use commit graph for parent lookup if available 

178 parents = None 

179 if commit_graph: 

180 parents = commit_graph.get_parents(e) 

181 

182 if parents is None: 

183 # Fall back to loading the object 

184 cmt = store[e] 

185 if isinstance(cmt, Tag): 

186 _cls, sha = cmt.object 

187 cmt = store[sha] 

188 parents = get_parents(cmt) 

189 

190 queue.extend((parent, depth + 1) for parent in parents if parent in store) 

191 return current_depth 

192 

193 

194class PackContainer(Protocol): 

195 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

196 """Add a new pack.""" 

197 

198 

199class BaseObjectStore: 

200 """Object store interface.""" 

201 

202 def determine_wants_all( 

203 self, refs: dict[Ref, ObjectID], depth: Optional[int] = None 

204 ) -> list[ObjectID]: 

205 def _want_deepen(sha): 

206 if not depth: 

207 return False 

208 if depth == DEPTH_INFINITE: 

209 return True 

210 return depth > self._get_depth(sha) 

211 

212 return [ 

213 sha 

214 for (ref, sha) in refs.items() 

215 if (sha not in self or _want_deepen(sha)) 

216 and not ref.endswith(PEELED_TAG_SUFFIX) 

217 and not sha == ZERO_SHA 

218 ] 

219 

220 def contains_loose(self, sha) -> bool: 

221 """Check if a particular object is present by SHA1 and is loose.""" 

222 raise NotImplementedError(self.contains_loose) 

223 

224 def __contains__(self, sha1: bytes) -> bool: 

225 """Check if a particular object is present by SHA1. 

226 

227 This method makes no distinction between loose and packed objects. 

228 """ 

229 return self.contains_loose(sha1) 

230 

231 @property 

232 def packs(self): 

233 """Iterable of pack objects.""" 

234 raise NotImplementedError 

235 

236 def get_raw(self, name) -> tuple[int, bytes]: 

237 """Obtain the raw text for an object. 

238 

239 Args: 

240 name: sha for the object. 

241 Returns: tuple with numeric type and object contents. 

242 """ 

243 raise NotImplementedError(self.get_raw) 

244 

245 def __getitem__(self, sha1: ObjectID) -> ShaFile: 

246 """Obtain an object by SHA1.""" 

247 type_num, uncomp = self.get_raw(sha1) 

248 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

249 

250 def __iter__(self): 

251 """Iterate over the SHAs that are present in this store.""" 

252 raise NotImplementedError(self.__iter__) 

253 

254 def add_object(self, obj) -> None: 

255 """Add a single object to this object store.""" 

256 raise NotImplementedError(self.add_object) 

257 

258 def add_objects(self, objects, progress=None) -> None: 

259 """Add a set of objects to this object store. 

260 

261 Args: 

262 objects: Iterable over a list of (object, path) tuples 

263 """ 

264 raise NotImplementedError(self.add_objects) 

265 

266 def tree_changes( 

267 self, 

268 source, 

269 target, 

270 want_unchanged=False, 

271 include_trees=False, 

272 change_type_same=False, 

273 rename_detector=None, 

274 ): 

275 """Find the differences between the contents of two trees. 

276 

277 Args: 

278 source: SHA1 of the source tree 

279 target: SHA1 of the target tree 

280 want_unchanged: Whether unchanged files should be reported 

281 include_trees: Whether to include trees 

282 change_type_same: Whether to report files changing 

283 type in the same entry. 

284 Returns: Iterator over tuples with 

285 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

286 """ 

287 from .diff_tree import tree_changes 

288 

289 for change in tree_changes( 

290 self, 

291 source, 

292 target, 

293 want_unchanged=want_unchanged, 

294 include_trees=include_trees, 

295 change_type_same=change_type_same, 

296 rename_detector=rename_detector, 

297 ): 

298 yield ( 

299 (change.old.path, change.new.path), 

300 (change.old.mode, change.new.mode), 

301 (change.old.sha, change.new.sha), 

302 ) 

303 

304 def iter_tree_contents(self, tree_id, include_trees=False): 

305 """Iterate the contents of a tree and all subtrees. 

306 

307 Iteration is depth-first pre-order, as in e.g. os.walk. 

308 

309 Args: 

310 tree_id: SHA1 of the tree. 

311 include_trees: If True, include tree objects in the iteration. 

312 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

313 tree. 

314 """ 

315 warnings.warn( 

316 "Please use dulwich.object_store.iter_tree_contents", 

317 DeprecationWarning, 

318 stacklevel=2, 

319 ) 

320 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

321 

322 def iterobjects_subset( 

323 self, shas: Iterable[bytes], *, allow_missing: bool = False 

324 ) -> Iterator[ShaFile]: 

325 for sha in shas: 

326 try: 

327 yield self[sha] 

328 except KeyError: 

329 if not allow_missing: 

330 raise 

331 

332 def find_missing_objects( 

333 self, 

334 haves, 

335 wants, 

336 shallow=None, 

337 progress=None, 

338 get_tagged=None, 

339 get_parents=lambda commit: commit.parents, 

340 ): 

341 """Find the missing objects required for a set of revisions. 

342 

343 Args: 

344 haves: Iterable over SHAs already in common. 

345 wants: Iterable over SHAs of objects to fetch. 

346 shallow: Set of shallow commit SHA1s to skip 

347 progress: Simple progress function that will be called with 

348 updated progress strings. 

349 get_tagged: Function that returns a dict of pointed-to sha -> 

350 tag sha for including tags. 

351 get_parents: Optional function for getting the parents of a 

352 commit. 

353 Returns: Iterator over (sha, path) pairs. 

354 """ 

355 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

356 finder = MissingObjectFinder( 

357 self, 

358 haves=haves, 

359 wants=wants, 

360 shallow=shallow, 

361 progress=progress, 

362 get_tagged=get_tagged, 

363 get_parents=get_parents, 

364 ) 

365 return iter(finder) 

366 

367 def find_common_revisions(self, graphwalker): 

368 """Find which revisions this store has in common using graphwalker. 

369 

370 Args: 

371 graphwalker: A graphwalker object. 

372 Returns: List of SHAs that are in common 

373 """ 

374 haves = [] 

375 sha = next(graphwalker) 

376 while sha: 

377 if sha in self: 

378 haves.append(sha) 

379 graphwalker.ack(sha) 

380 sha = next(graphwalker) 

381 return haves 

382 

383 def generate_pack_data( 

384 self, have, want, shallow=None, progress=None, ofs_delta=True 

385 ) -> tuple[int, Iterator[UnpackedObject]]: 

386 """Generate pack data objects for a set of wants/haves. 

387 

388 Args: 

389 have: List of SHA1s of objects that should not be sent 

390 want: List of SHA1s of objects that should be sent 

391 shallow: Set of shallow commit SHA1s to skip 

392 ofs_delta: Whether OFS deltas can be included 

393 progress: Optional progress reporting method 

394 """ 

395 # Note that the pack-specific implementation below is more efficient, 

396 # as it reuses deltas 

397 missing_objects = MissingObjectFinder( 

398 self, haves=have, wants=want, shallow=shallow, progress=progress 

399 ) 

400 object_ids = list(missing_objects) 

401 return pack_objects_to_data( 

402 [(self[oid], path) for oid, path in object_ids], 

403 ofs_delta=ofs_delta, 

404 progress=progress, 

405 ) 

406 

407 def peel_sha(self, sha): 

408 """Peel all tags from a SHA. 

409 

410 Args: 

411 sha: The object SHA to peel. 

412 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

413 intermediate tags; if the original ref does not point to a tag, 

414 this will equal the original SHA1. 

415 """ 

416 warnings.warn( 

417 "Please use dulwich.object_store.peel_sha()", 

418 DeprecationWarning, 

419 stacklevel=2, 

420 ) 

421 return peel_sha(self, sha)[1] 

422 

423 def _get_depth( 

424 self, 

425 head, 

426 get_parents=lambda commit: commit.parents, 

427 max_depth=None, 

428 ): 

429 """Return the current available depth for the given head. 

430 For commits with multiple parents, the largest possible depth will be 

431 returned. 

432 

433 Args: 

434 head: commit to start from 

435 get_parents: optional function for getting the parents of a commit 

436 max_depth: maximum depth to search 

437 """ 

438 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth) 

439 

440 def close(self) -> None: 

441 """Close any files opened by this object store.""" 

442 # Default implementation is a NO-OP 

443 

444 def prune(self, grace_period: Optional[int] = None) -> None: 

445 """Prune/clean up this object store. 

446 

447 This includes removing orphaned temporary files and other 

448 housekeeping tasks. Default implementation is a NO-OP. 

449 

450 Args: 

451 grace_period: Grace period in seconds for removing temporary files. 

452 If None, uses the default grace period. 

453 """ 

454 # Default implementation is a NO-OP 

455 

456 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]: 

457 """Iterate over all SHA1s that start with a given prefix. 

458 

459 The default implementation is a naive iteration over all objects. 

460 However, subclasses may override this method with more efficient 

461 implementations. 

462 """ 

463 for sha in self: 

464 if sha.startswith(prefix): 

465 yield sha 

466 

467 def get_commit_graph(self): 

468 """Get the commit graph for this object store. 

469 

470 Returns: 

471 CommitGraph object if available, None otherwise 

472 """ 

473 return None 

474 

475 def write_commit_graph(self, refs=None, reachable=True) -> None: 

476 """Write a commit graph file for this object store. 

477 

478 Args: 

479 refs: List of refs to include. If None, includes all refs from object store. 

480 reachable: If True, includes all commits reachable from refs. 

481 If False, only includes the direct ref targets. 

482 

483 Note: 

484 Default implementation does nothing. Subclasses should override 

485 this method to provide commit graph writing functionality. 

486 """ 

487 raise NotImplementedError(self.write_commit_graph) 

488 

489 def get_object_mtime(self, sha): 

490 """Get the modification time of an object. 

491 

492 Args: 

493 sha: SHA1 of the object 

494 

495 Returns: 

496 Modification time as seconds since epoch 

497 

498 Raises: 

499 KeyError: if the object is not found 

500 """ 

501 # Default implementation raises KeyError 

502 # Subclasses should override to provide actual mtime 

503 raise KeyError(sha) 

504 

505 

506class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer): 

507 def __init__(self, pack_compression_level=-1, pack_index_version=None) -> None: 

508 self._pack_cache: dict[str, Pack] = {} 

509 self.pack_compression_level = pack_compression_level 

510 self.pack_index_version = pack_index_version 

511 

512 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

513 """Add a new pack to this object store.""" 

514 raise NotImplementedError(self.add_pack) 

515 

516 def add_pack_data( 

517 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

518 ) -> None: 

519 """Add pack data to this object store. 

520 

521 Args: 

522 count: Number of items to add 

523 """ 

524 if count == 0: 

525 # Don't bother writing an empty pack file 

526 return 

527 f, commit, abort = self.add_pack() 

528 try: 

529 write_pack_data( 

530 f.write, 

531 unpacked_objects, 

532 num_records=count, 

533 progress=progress, 

534 compression_level=self.pack_compression_level, 

535 ) 

536 except BaseException: 

537 abort() 

538 raise 

539 else: 

540 return commit() 

541 

542 @property 

543 def alternates(self): 

544 return [] 

545 

546 def contains_packed(self, sha) -> bool: 

547 """Check if a particular object is present by SHA1 and is packed. 

548 

549 This does not check alternates. 

550 """ 

551 for pack in self.packs: 

552 try: 

553 if sha in pack: 

554 return True 

555 except PackFileDisappeared: 

556 pass 

557 return False 

558 

559 def __contains__(self, sha) -> bool: 

560 """Check if a particular object is present by SHA1. 

561 

562 This method makes no distinction between loose and packed objects. 

563 """ 

564 if self.contains_packed(sha) or self.contains_loose(sha): 

565 return True 

566 for alternate in self.alternates: 

567 if sha in alternate: 

568 return True 

569 return False 

570 

571 def _add_cached_pack(self, base_name, pack) -> None: 

572 """Add a newly appeared pack to the cache by path.""" 

573 prev_pack = self._pack_cache.get(base_name) 

574 if prev_pack is not pack: 

575 self._pack_cache[base_name] = pack 

576 if prev_pack: 

577 prev_pack.close() 

578 

579 def generate_pack_data( 

580 self, have, want, shallow=None, progress=None, ofs_delta=True 

581 ) -> tuple[int, Iterator[UnpackedObject]]: 

582 """Generate pack data objects for a set of wants/haves. 

583 

584 Args: 

585 have: List of SHA1s of objects that should not be sent 

586 want: List of SHA1s of objects that should be sent 

587 shallow: Set of shallow commit SHA1s to skip 

588 ofs_delta: Whether OFS deltas can be included 

589 progress: Optional progress reporting method 

590 """ 

591 missing_objects = MissingObjectFinder( 

592 self, haves=have, wants=want, shallow=shallow, progress=progress 

593 ) 

594 remote_has = missing_objects.get_remote_has() 

595 object_ids = list(missing_objects) 

596 return len(object_ids), generate_unpacked_objects( 

597 cast(PackedObjectContainer, self), 

598 object_ids, 

599 progress=progress, 

600 ofs_delta=ofs_delta, 

601 other_haves=remote_has, 

602 ) 

603 

604 def _clear_cached_packs(self) -> None: 

605 pack_cache = self._pack_cache 

606 self._pack_cache = {} 

607 while pack_cache: 

608 (name, pack) = pack_cache.popitem() 

609 pack.close() 

610 

611 def _iter_cached_packs(self): 

612 return self._pack_cache.values() 

613 

614 def _update_pack_cache(self) -> list[Pack]: 

615 raise NotImplementedError(self._update_pack_cache) 

616 

617 def close(self) -> None: 

618 self._clear_cached_packs() 

619 

620 @property 

621 def packs(self): 

622 """List with pack objects.""" 

623 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

624 

625 def count_pack_files(self) -> int: 

626 """Count the number of pack files. 

627 

628 Returns: 

629 Number of pack files (excluding those with .keep files) 

630 """ 

631 count = 0 

632 for pack in self.packs: 

633 # Check if there's a .keep file for this pack 

634 keep_path = pack._basename + ".keep" 

635 if not os.path.exists(keep_path): 

636 count += 1 

637 return count 

638 

639 def _iter_alternate_objects(self): 

640 """Iterate over the SHAs of all the objects in alternate stores.""" 

641 for alternate in self.alternates: 

642 yield from alternate 

643 

644 def _iter_loose_objects(self): 

645 """Iterate over the SHAs of all loose objects.""" 

646 raise NotImplementedError(self._iter_loose_objects) 

647 

648 def _get_loose_object(self, sha) -> Optional[ShaFile]: 

649 raise NotImplementedError(self._get_loose_object) 

650 

651 def delete_loose_object(self, sha) -> None: 

652 """Delete a loose object. 

653 

654 This method only handles loose objects. For packed objects, 

655 use repack(exclude=...) to exclude them during repacking. 

656 """ 

657 raise NotImplementedError(self.delete_loose_object) 

658 

659 def _remove_pack(self, name) -> None: 

660 raise NotImplementedError(self._remove_pack) 

661 

662 def pack_loose_objects(self): 

663 """Pack loose objects. 

664 

665 Returns: Number of objects packed 

666 """ 

667 objects = set() 

668 for sha in self._iter_loose_objects(): 

669 objects.add((self._get_loose_object(sha), None)) 

670 self.add_objects(list(objects)) 

671 for obj, path in objects: 

672 self.delete_loose_object(obj.id) 

673 return len(objects) 

674 

675 def repack(self, exclude=None): 

676 """Repack the packs in this repository. 

677 

678 Note that this implementation is fairly naive and currently keeps all 

679 objects in memory while it repacks. 

680 

681 Args: 

682 exclude: Optional set of object SHAs to exclude from repacking 

683 """ 

684 if exclude is None: 

685 exclude = set() 

686 

687 loose_objects = set() 

688 excluded_loose_objects = set() 

689 for sha in self._iter_loose_objects(): 

690 if sha not in exclude: 

691 loose_objects.add(self._get_loose_object(sha)) 

692 else: 

693 excluded_loose_objects.add(sha) 

694 

695 objects = {(obj, None) for obj in loose_objects} 

696 old_packs = {p.name(): p for p in self.packs} 

697 for name, pack in old_packs.items(): 

698 objects.update( 

699 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude 

700 ) 

701 

702 # Only create a new pack if there are objects to pack 

703 if objects: 

704 # The name of the consolidated pack might match the name of a 

705 # pre-existing pack. Take care not to remove the newly created 

706 # consolidated pack. 

707 consolidated = self.add_objects(objects) 

708 old_packs.pop(consolidated.name(), None) 

709 

710 # Delete loose objects that were packed 

711 for obj in loose_objects: 

712 self.delete_loose_object(obj.id) 

713 # Delete excluded loose objects 

714 for sha in excluded_loose_objects: 

715 self.delete_loose_object(sha) 

716 for name, pack in old_packs.items(): 

717 self._remove_pack(pack) 

718 self._update_pack_cache() 

719 return len(objects) 

720 

721 def __iter__(self): 

722 """Iterate over the SHAs that are present in this store.""" 

723 self._update_pack_cache() 

724 for pack in self._iter_cached_packs(): 

725 try: 

726 yield from pack 

727 except PackFileDisappeared: 

728 pass 

729 yield from self._iter_loose_objects() 

730 yield from self._iter_alternate_objects() 

731 

732 def contains_loose(self, sha): 

733 """Check if a particular object is present by SHA1 and is loose. 

734 

735 This does not check alternates. 

736 """ 

737 return self._get_loose_object(sha) is not None 

738 

739 def get_raw(self, name): 

740 """Obtain the raw fulltext for an object. 

741 

742 Args: 

743 name: sha for the object. 

744 Returns: tuple with numeric type and object contents. 

745 """ 

746 if name == ZERO_SHA: 

747 raise KeyError(name) 

748 if len(name) == 40: 

749 sha = hex_to_sha(name) 

750 hexsha = name 

751 elif len(name) == 20: 

752 sha = name 

753 hexsha = None 

754 else: 

755 raise AssertionError(f"Invalid object name {name!r}") 

756 for pack in self._iter_cached_packs(): 

757 try: 

758 return pack.get_raw(sha) 

759 except (KeyError, PackFileDisappeared): 

760 pass 

761 if hexsha is None: 

762 hexsha = sha_to_hex(name) 

763 ret = self._get_loose_object(hexsha) 

764 if ret is not None: 

765 return ret.type_num, ret.as_raw_string() 

766 # Maybe something else has added a pack with the object 

767 # in the mean time? 

768 for pack in self._update_pack_cache(): 

769 try: 

770 return pack.get_raw(sha) 

771 except KeyError: 

772 pass 

773 for alternate in self.alternates: 

774 try: 

775 return alternate.get_raw(hexsha) 

776 except KeyError: 

777 pass 

778 raise KeyError(hexsha) 

779 

780 def iter_unpacked_subset( 

781 self, 

782 shas: set[bytes], 

783 include_comp: bool = False, 

784 allow_missing: bool = False, 

785 convert_ofs_delta: bool = True, 

786 ) -> Iterator[UnpackedObject]: 

787 todo: set[bytes] = set(shas) 

788 for p in self._iter_cached_packs(): 

789 for unpacked in p.iter_unpacked_subset( 

790 todo, 

791 include_comp=include_comp, 

792 allow_missing=True, 

793 convert_ofs_delta=convert_ofs_delta, 

794 ): 

795 yield unpacked 

796 hexsha = sha_to_hex(unpacked.sha()) 

797 todo.remove(hexsha) 

798 # Maybe something else has added a pack with the object 

799 # in the mean time? 

800 for p in self._update_pack_cache(): 

801 for unpacked in p.iter_unpacked_subset( 

802 todo, 

803 include_comp=include_comp, 

804 allow_missing=True, 

805 convert_ofs_delta=convert_ofs_delta, 

806 ): 

807 yield unpacked 

808 hexsha = sha_to_hex(unpacked.sha()) 

809 todo.remove(hexsha) 

810 for alternate in self.alternates: 

811 for unpacked in alternate.iter_unpacked_subset( 

812 todo, 

813 include_comp=include_comp, 

814 allow_missing=True, 

815 convert_ofs_delta=convert_ofs_delta, 

816 ): 

817 yield unpacked 

818 hexsha = sha_to_hex(unpacked.sha()) 

819 todo.remove(hexsha) 

820 

821 def iterobjects_subset( 

822 self, shas: Iterable[bytes], *, allow_missing: bool = False 

823 ) -> Iterator[ShaFile]: 

824 todo: set[bytes] = set(shas) 

825 for p in self._iter_cached_packs(): 

826 for o in p.iterobjects_subset(todo, allow_missing=True): 

827 yield o 

828 todo.remove(o.id) 

829 # Maybe something else has added a pack with the object 

830 # in the mean time? 

831 for p in self._update_pack_cache(): 

832 for o in p.iterobjects_subset(todo, allow_missing=True): 

833 yield o 

834 todo.remove(o.id) 

835 for alternate in self.alternates: 

836 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

837 yield o 

838 todo.remove(o.id) 

839 for oid in todo: 

840 o = self._get_loose_object(oid) 

841 if o is not None: 

842 yield o 

843 elif not allow_missing: 

844 raise KeyError(oid) 

845 

846 def get_unpacked_object( 

847 self, sha1: bytes, *, include_comp: bool = False 

848 ) -> UnpackedObject: 

849 """Obtain the unpacked object. 

850 

851 Args: 

852 sha1: sha for the object. 

853 """ 

854 if sha1 == ZERO_SHA: 

855 raise KeyError(sha1) 

856 if len(sha1) == 40: 

857 sha = hex_to_sha(sha1) 

858 hexsha = sha1 

859 elif len(sha1) == 20: 

860 sha = sha1 

861 hexsha = None 

862 else: 

863 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

864 for pack in self._iter_cached_packs(): 

865 try: 

866 return pack.get_unpacked_object(sha, include_comp=include_comp) 

867 except (KeyError, PackFileDisappeared): 

868 pass 

869 if hexsha is None: 

870 hexsha = sha_to_hex(sha1) 

871 # Maybe something else has added a pack with the object 

872 # in the mean time? 

873 for pack in self._update_pack_cache(): 

874 try: 

875 return pack.get_unpacked_object(sha, include_comp=include_comp) 

876 except KeyError: 

877 pass 

878 for alternate in self.alternates: 

879 try: 

880 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

881 except KeyError: 

882 pass 

883 raise KeyError(hexsha) 

884 

885 def add_objects( 

886 self, 

887 objects: Sequence[tuple[ShaFile, Optional[str]]], 

888 progress: Optional[Callable[[str], None]] = None, 

889 ) -> None: 

890 """Add a set of objects to this object store. 

891 

892 Args: 

893 objects: Iterable over (object, path) tuples, should support 

894 __len__. 

895 Returns: Pack object of the objects written. 

896 """ 

897 count = len(objects) 

898 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

899 return self.add_pack_data(count, record_iter, progress=progress) 

900 

901 

902class DiskObjectStore(PackBasedObjectStore): 

903 """Git-style object store that exists on disk.""" 

904 

905 path: Union[str, os.PathLike] 

906 pack_dir: Union[str, os.PathLike] 

907 

908 def __init__( 

909 self, 

910 path: Union[str, os.PathLike], 

911 loose_compression_level=-1, 

912 pack_compression_level=-1, 

913 pack_index_version=None, 

914 ) -> None: 

915 """Open an object store. 

916 

917 Args: 

918 path: Path of the object store. 

919 loose_compression_level: zlib compression level for loose objects 

920 pack_compression_level: zlib compression level for pack objects 

921 pack_index_version: pack index version to use (1, 2, or 3) 

922 """ 

923 super().__init__( 

924 pack_compression_level=pack_compression_level, 

925 pack_index_version=pack_index_version, 

926 ) 

927 self.path = path 

928 self.pack_dir = os.path.join(self.path, PACKDIR) 

929 self._alternates = None 

930 self.loose_compression_level = loose_compression_level 

931 self.pack_compression_level = pack_compression_level 

932 self.pack_index_version = pack_index_version 

933 

934 # Commit graph support - lazy loaded 

935 self._commit_graph = None 

936 self._use_commit_graph = True # Default to true 

937 

938 def __repr__(self) -> str: 

939 return f"<{self.__class__.__name__}({self.path!r})>" 

940 

941 @classmethod 

942 def from_config(cls, path: Union[str, os.PathLike], config): 

943 try: 

944 default_compression_level = int( 

945 config.get((b"core",), b"compression").decode() 

946 ) 

947 except KeyError: 

948 default_compression_level = -1 

949 try: 

950 loose_compression_level = int( 

951 config.get((b"core",), b"looseCompression").decode() 

952 ) 

953 except KeyError: 

954 loose_compression_level = default_compression_level 

955 try: 

956 pack_compression_level = int( 

957 config.get((b"core",), "packCompression").decode() 

958 ) 

959 except KeyError: 

960 pack_compression_level = default_compression_level 

961 try: 

962 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode()) 

963 except KeyError: 

964 pack_index_version = None 

965 

966 # Read core.commitGraph setting 

967 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True) 

968 

969 instance = cls( 

970 path, loose_compression_level, pack_compression_level, pack_index_version 

971 ) 

972 instance._use_commit_graph = use_commit_graph 

973 return instance 

974 

975 @property 

976 def alternates(self): 

977 if self._alternates is not None: 

978 return self._alternates 

979 self._alternates = [] 

980 for path in self._read_alternate_paths(): 

981 self._alternates.append(DiskObjectStore(path)) 

982 return self._alternates 

983 

984 def _read_alternate_paths(self): 

985 try: 

986 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

987 except FileNotFoundError: 

988 return 

989 with f: 

990 for line in f.readlines(): 

991 line = line.rstrip(b"\n") 

992 if line.startswith(b"#"): 

993 continue 

994 if os.path.isabs(line): 

995 yield os.fsdecode(line) 

996 else: 

997 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

998 

999 def add_alternate_path(self, path) -> None: 

1000 """Add an alternate path to this object store.""" 

1001 try: 

1002 os.mkdir(os.path.join(self.path, INFODIR)) 

1003 except FileExistsError: 

1004 pass 

1005 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

1006 with GitFile(alternates_path, "wb") as f: 

1007 try: 

1008 orig_f = open(alternates_path, "rb") 

1009 except FileNotFoundError: 

1010 pass 

1011 else: 

1012 with orig_f: 

1013 f.write(orig_f.read()) 

1014 f.write(os.fsencode(path) + b"\n") 

1015 

1016 if not os.path.isabs(path): 

1017 path = os.path.join(self.path, path) 

1018 self.alternates.append(DiskObjectStore(path)) 

1019 

1020 def _update_pack_cache(self): 

1021 """Read and iterate over new pack files and cache them.""" 

1022 try: 

1023 pack_dir_contents = os.listdir(self.pack_dir) 

1024 except FileNotFoundError: 

1025 self.close() 

1026 return [] 

1027 pack_files = set() 

1028 for name in pack_dir_contents: 

1029 if name.startswith("pack-") and name.endswith(".pack"): 

1030 # verify that idx exists first (otherwise the pack was not yet 

1031 # fully written) 

1032 idx_name = os.path.splitext(name)[0] + ".idx" 

1033 if idx_name in pack_dir_contents: 

1034 pack_name = name[: -len(".pack")] 

1035 pack_files.add(pack_name) 

1036 

1037 # Open newly appeared pack files 

1038 new_packs = [] 

1039 for f in pack_files: 

1040 if f not in self._pack_cache: 

1041 pack = Pack(os.path.join(self.pack_dir, f)) 

1042 new_packs.append(pack) 

1043 self._pack_cache[f] = pack 

1044 # Remove disappeared pack files 

1045 for f in set(self._pack_cache) - pack_files: 

1046 self._pack_cache.pop(f).close() 

1047 return new_packs 

1048 

1049 def _get_shafile_path(self, sha): 

1050 # Check from object dir 

1051 return hex_to_filename(self.path, sha) 

1052 

1053 def _iter_loose_objects(self): 

1054 for base in os.listdir(self.path): 

1055 if len(base) != 2: 

1056 continue 

1057 for rest in os.listdir(os.path.join(self.path, base)): 

1058 sha = os.fsencode(base + rest) 

1059 if not valid_hexsha(sha): 

1060 continue 

1061 yield sha 

1062 

1063 def count_loose_objects(self) -> int: 

1064 """Count the number of loose objects in the object store. 

1065 

1066 Returns: 

1067 Number of loose objects 

1068 """ 

1069 count = 0 

1070 if not os.path.exists(self.path): 

1071 return 0 

1072 

1073 for i in range(256): 

1074 subdir = os.path.join(self.path, f"{i:02x}") 

1075 try: 

1076 count += len( 

1077 [ 

1078 name 

1079 for name in os.listdir(subdir) 

1080 if len(name) == 38 # 40 - 2 for the prefix 

1081 ] 

1082 ) 

1083 except FileNotFoundError: 

1084 # Directory may have been removed or is inaccessible 

1085 continue 

1086 

1087 return count 

1088 

1089 def _get_loose_object(self, sha): 

1090 path = self._get_shafile_path(sha) 

1091 try: 

1092 return ShaFile.from_path(path) 

1093 except FileNotFoundError: 

1094 return None 

1095 

1096 def delete_loose_object(self, sha) -> None: 

1097 os.remove(self._get_shafile_path(sha)) 

1098 

1099 def get_object_mtime(self, sha): 

1100 """Get the modification time of an object. 

1101 

1102 Args: 

1103 sha: SHA1 of the object 

1104 

1105 Returns: 

1106 Modification time as seconds since epoch 

1107 

1108 Raises: 

1109 KeyError: if the object is not found 

1110 """ 

1111 # First check if it's a loose object 

1112 if self.contains_loose(sha): 

1113 path = self._get_shafile_path(sha) 

1114 try: 

1115 return os.path.getmtime(path) 

1116 except FileNotFoundError: 

1117 pass 

1118 

1119 # Check if it's in a pack file 

1120 for pack in self.packs: 

1121 try: 

1122 if sha in pack: 

1123 # Use the pack file's mtime for packed objects 

1124 pack_path = pack._data_path 

1125 try: 

1126 return os.path.getmtime(pack_path) 

1127 except (FileNotFoundError, AttributeError): 

1128 pass 

1129 except PackFileDisappeared: 

1130 pass 

1131 

1132 raise KeyError(sha) 

1133 

1134 def _remove_pack(self, pack) -> None: 

1135 try: 

1136 del self._pack_cache[os.path.basename(pack._basename)] 

1137 except KeyError: 

1138 pass 

1139 pack.close() 

1140 os.remove(pack.data.path) 

1141 os.remove(pack.index.path) 

1142 

1143 def _get_pack_basepath(self, entries): 

1144 suffix = iter_sha1(entry[0] for entry in entries) 

1145 # TODO: Handle self.pack_dir being bytes 

1146 suffix = suffix.decode("ascii") 

1147 return os.path.join(self.pack_dir, "pack-" + suffix) 

1148 

1149 def _complete_pack(self, f, path, num_objects, indexer, progress=None): 

1150 """Move a specific file containing a pack into the pack directory. 

1151 

1152 Note: The file should be on the same file system as the 

1153 packs directory. 

1154 

1155 Args: 

1156 f: Open file object for the pack. 

1157 path: Path to the pack file. 

1158 indexer: A PackIndexer for indexing the pack. 

1159 """ 

1160 entries = [] 

1161 for i, entry in enumerate(indexer): 

1162 if progress is not None: 

1163 progress(f"generating index: {i}/{num_objects}\r".encode("ascii")) 

1164 entries.append(entry) 

1165 

1166 pack_sha, extra_entries = extend_pack( 

1167 f, 

1168 indexer.ext_refs(), 

1169 get_raw=self.get_raw, 

1170 compression_level=self.pack_compression_level, 

1171 progress=progress, 

1172 ) 

1173 f.flush() 

1174 try: 

1175 fileno = f.fileno() 

1176 except AttributeError: 

1177 pass 

1178 else: 

1179 os.fsync(fileno) 

1180 f.close() 

1181 

1182 entries.extend(extra_entries) 

1183 

1184 # Move the pack in. 

1185 entries.sort() 

1186 pack_base_name = self._get_pack_basepath(entries) 

1187 

1188 for pack in self.packs: 

1189 if pack._basename == pack_base_name: 

1190 return pack 

1191 

1192 target_pack_path = pack_base_name + ".pack" 

1193 target_index_path = pack_base_name + ".idx" 

1194 if sys.platform == "win32": 

1195 # Windows might have the target pack file lingering. Attempt 

1196 # removal, silently passing if the target does not exist. 

1197 with suppress(FileNotFoundError): 

1198 os.remove(target_pack_path) 

1199 os.rename(path, target_pack_path) 

1200 

1201 # Write the index. 

1202 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file: 

1203 write_pack_index( 

1204 index_file, entries, pack_sha, version=self.pack_index_version 

1205 ) 

1206 

1207 # Add the pack to the store and return it. 

1208 final_pack = Pack(pack_base_name) 

1209 final_pack.check_length_and_checksum() 

1210 self._add_cached_pack(pack_base_name, final_pack) 

1211 return final_pack 

1212 

1213 def add_thin_pack(self, read_all, read_some, progress=None): 

1214 """Add a new thin pack to this object store. 

1215 

1216 Thin packs are packs that contain deltas with parents that exist 

1217 outside the pack. They should never be placed in the object store 

1218 directly, and always indexed and completed as they are copied. 

1219 

1220 Args: 

1221 read_all: Read function that blocks until the number of 

1222 requested bytes are read. 

1223 read_some: Read function that returns at least one byte, but may 

1224 not return the number of bytes requested. 

1225 Returns: A Pack object pointing at the now-completed thin pack in the 

1226 objects/pack directory. 

1227 """ 

1228 import tempfile 

1229 

1230 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

1231 with os.fdopen(fd, "w+b") as f: 

1232 os.chmod(path, PACK_MODE) 

1233 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) 

1234 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) 

1235 copier.verify(progress=progress) 

1236 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

1237 

1238 def add_pack(self): 

1239 """Add a new pack to this object store. 

1240 

1241 Returns: Fileobject to write to, a commit function to 

1242 call when the pack is finished and an abort 

1243 function. 

1244 """ 

1245 import tempfile 

1246 

1247 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

1248 f = os.fdopen(fd, "w+b") 

1249 os.chmod(path, PACK_MODE) 

1250 

1251 def commit(): 

1252 if f.tell() > 0: 

1253 f.seek(0) 

1254 with PackData(path, f) as pd: 

1255 indexer = PackIndexer.for_pack_data( 

1256 pd, resolve_ext_ref=self.get_raw 

1257 ) 

1258 return self._complete_pack(f, path, len(pd), indexer) 

1259 else: 

1260 f.close() 

1261 os.remove(path) 

1262 return None 

1263 

1264 def abort() -> None: 

1265 f.close() 

1266 os.remove(path) 

1267 

1268 return f, commit, abort 

1269 

1270 def add_object(self, obj) -> None: 

1271 """Add a single object to this object store. 

1272 

1273 Args: 

1274 obj: Object to add 

1275 """ 

1276 path = self._get_shafile_path(obj.id) 

1277 dir = os.path.dirname(path) 

1278 try: 

1279 os.mkdir(dir) 

1280 except FileExistsError: 

1281 pass 

1282 if os.path.exists(path): 

1283 return # Already there, no need to write again 

1284 with GitFile(path, "wb", mask=PACK_MODE) as f: 

1285 f.write( 

1286 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1287 ) 

1288 

1289 @classmethod 

1290 def init(cls, path: Union[str, os.PathLike]): 

1291 try: 

1292 os.mkdir(path) 

1293 except FileExistsError: 

1294 pass 

1295 os.mkdir(os.path.join(path, "info")) 

1296 os.mkdir(os.path.join(path, PACKDIR)) 

1297 return cls(path) 

1298 

1299 def iter_prefix(self, prefix): 

1300 if len(prefix) < 2: 

1301 yield from super().iter_prefix(prefix) 

1302 return 

1303 seen = set() 

1304 dir = prefix[:2].decode() 

1305 rest = prefix[2:].decode() 

1306 try: 

1307 for name in os.listdir(os.path.join(self.path, dir)): 

1308 if name.startswith(rest): 

1309 sha = os.fsencode(dir + name) 

1310 if sha not in seen: 

1311 seen.add(sha) 

1312 yield sha 

1313 except FileNotFoundError: 

1314 pass 

1315 

1316 for p in self.packs: 

1317 bin_prefix = ( 

1318 binascii.unhexlify(prefix) 

1319 if len(prefix) % 2 == 0 

1320 else binascii.unhexlify(prefix[:-1]) 

1321 ) 

1322 for sha in p.index.iter_prefix(bin_prefix): 

1323 sha = sha_to_hex(sha) 

1324 if sha.startswith(prefix) and sha not in seen: 

1325 seen.add(sha) 

1326 yield sha 

1327 for alternate in self.alternates: 

1328 for sha in alternate.iter_prefix(prefix): 

1329 if sha not in seen: 

1330 seen.add(sha) 

1331 yield sha 

1332 

1333 def get_commit_graph(self): 

1334 """Get the commit graph for this object store. 

1335 

1336 Returns: 

1337 CommitGraph object if available, None otherwise 

1338 """ 

1339 if not self._use_commit_graph: 

1340 return None 

1341 

1342 if self._commit_graph is None: 

1343 from .commit_graph import read_commit_graph 

1344 

1345 # Look for commit graph in our objects directory 

1346 graph_file = os.path.join(self.path, "info", "commit-graph") 

1347 if os.path.exists(graph_file): 

1348 self._commit_graph = read_commit_graph(graph_file) 

1349 return self._commit_graph 

1350 

1351 def write_commit_graph(self, refs=None, reachable=True) -> None: 

1352 """Write a commit graph file for this object store. 

1353 

1354 Args: 

1355 refs: List of refs to include. If None, includes all refs from object store. 

1356 reachable: If True, includes all commits reachable from refs. 

1357 If False, only includes the direct ref targets. 

1358 """ 

1359 from .commit_graph import get_reachable_commits 

1360 

1361 if refs is None: 

1362 # Get all commit objects from the object store 

1363 all_refs = [] 

1364 # Iterate through all objects to find commits 

1365 for sha in self: 

1366 try: 

1367 obj = self[sha] 

1368 if obj.type_name == b"commit": 

1369 all_refs.append(sha) 

1370 except KeyError: 

1371 continue 

1372 else: 

1373 # Use provided refs 

1374 all_refs = refs 

1375 

1376 if not all_refs: 

1377 return # No commits to include 

1378 

1379 if reachable: 

1380 # Get all reachable commits 

1381 commit_ids = get_reachable_commits(self, all_refs) 

1382 else: 

1383 # Just use the direct ref targets - ensure they're hex ObjectIDs 

1384 commit_ids = [] 

1385 for ref in all_refs: 

1386 if isinstance(ref, bytes) and len(ref) == 40: 

1387 # Already hex ObjectID 

1388 commit_ids.append(ref) 

1389 elif isinstance(ref, bytes) and len(ref) == 20: 

1390 # Binary SHA, convert to hex ObjectID 

1391 from .objects import sha_to_hex 

1392 

1393 commit_ids.append(sha_to_hex(ref)) 

1394 else: 

1395 # Assume it's already correct format 

1396 commit_ids.append(ref) 

1397 

1398 if commit_ids: 

1399 # Write commit graph directly to our object store path 

1400 # Generate the commit graph 

1401 from .commit_graph import generate_commit_graph 

1402 

1403 graph = generate_commit_graph(self, commit_ids) 

1404 

1405 if graph.entries: 

1406 # Ensure the info directory exists 

1407 info_dir = os.path.join(self.path, "info") 

1408 os.makedirs(info_dir, exist_ok=True) 

1409 

1410 # Write using GitFile for atomic operation 

1411 graph_path = os.path.join(info_dir, "commit-graph") 

1412 with GitFile(graph_path, "wb") as f: 

1413 graph.write_to_file(f) 

1414 

1415 # Clear cached commit graph so it gets reloaded 

1416 self._commit_graph = None 

1417 

1418 def prune(self, grace_period: Optional[int] = None) -> None: 

1419 """Prune/clean up this object store. 

1420 

1421 This removes temporary files that were left behind by interrupted 

1422 pack operations. These are files that start with ``tmp_pack_`` in the 

1423 repository directory or files with .pack extension but no corresponding 

1424 .idx file in the pack directory. 

1425 

1426 Args: 

1427 grace_period: Grace period in seconds for removing temporary files. 

1428 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD. 

1429 """ 

1430 import glob 

1431 

1432 if grace_period is None: 

1433 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD 

1434 

1435 # Clean up tmp_pack_* files in the repository directory 

1436 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")): 

1437 # Check if file is old enough (more than grace period) 

1438 mtime = os.path.getmtime(tmp_file) 

1439 if time.time() - mtime > grace_period: 

1440 os.remove(tmp_file) 

1441 

1442 # Clean up orphaned .pack files without corresponding .idx files 

1443 try: 

1444 pack_dir_contents = os.listdir(self.pack_dir) 

1445 except FileNotFoundError: 

1446 return 

1447 

1448 pack_files = {} 

1449 idx_files = set() 

1450 

1451 for name in pack_dir_contents: 

1452 if name.endswith(".pack"): 

1453 base_name = name[:-5] # Remove .pack extension 

1454 pack_files[base_name] = name 

1455 elif name.endswith(".idx"): 

1456 base_name = name[:-4] # Remove .idx extension 

1457 idx_files.add(base_name) 

1458 

1459 # Remove .pack files without corresponding .idx files 

1460 for base_name, pack_name in pack_files.items(): 

1461 if base_name not in idx_files: 

1462 pack_path = os.path.join(self.pack_dir, pack_name) 

1463 # Check if file is old enough (more than grace period) 

1464 mtime = os.path.getmtime(pack_path) 

1465 if time.time() - mtime > grace_period: 

1466 os.remove(pack_path) 

1467 

1468 

1469class MemoryObjectStore(BaseObjectStore): 

1470 """Object store that keeps all objects in memory.""" 

1471 

1472 def __init__(self) -> None: 

1473 super().__init__() 

1474 self._data: dict[str, ShaFile] = {} 

1475 self.pack_compression_level = -1 

1476 

1477 def _to_hexsha(self, sha): 

1478 if len(sha) == 40: 

1479 return sha 

1480 elif len(sha) == 20: 

1481 return sha_to_hex(sha) 

1482 else: 

1483 raise ValueError(f"Invalid sha {sha!r}") 

1484 

1485 def contains_loose(self, sha): 

1486 """Check if a particular object is present by SHA1 and is loose.""" 

1487 return self._to_hexsha(sha) in self._data 

1488 

1489 def contains_packed(self, sha) -> bool: 

1490 """Check if a particular object is present by SHA1 and is packed.""" 

1491 return False 

1492 

1493 def __iter__(self): 

1494 """Iterate over the SHAs that are present in this store.""" 

1495 return iter(self._data.keys()) 

1496 

1497 @property 

1498 def packs(self): 

1499 """List with pack objects.""" 

1500 return [] 

1501 

1502 def get_raw(self, name: ObjectID): 

1503 """Obtain the raw text for an object. 

1504 

1505 Args: 

1506 name: sha for the object. 

1507 Returns: tuple with numeric type and object contents. 

1508 """ 

1509 obj = self[self._to_hexsha(name)] 

1510 return obj.type_num, obj.as_raw_string() 

1511 

1512 def __getitem__(self, name: ObjectID): 

1513 return self._data[self._to_hexsha(name)].copy() 

1514 

1515 def __delitem__(self, name: ObjectID) -> None: 

1516 """Delete an object from this store, for testing only.""" 

1517 del self._data[self._to_hexsha(name)] 

1518 

1519 def add_object(self, obj) -> None: 

1520 """Add a single object to this object store.""" 

1521 self._data[obj.id] = obj.copy() 

1522 

1523 def add_objects(self, objects, progress=None) -> None: 

1524 """Add a set of objects to this object store. 

1525 

1526 Args: 

1527 objects: Iterable over a list of (object, path) tuples 

1528 """ 

1529 for obj, path in objects: 

1530 self.add_object(obj) 

1531 

1532 def add_pack(self): 

1533 """Add a new pack to this object store. 

1534 

1535 Because this object store doesn't support packs, we extract and add the 

1536 individual objects. 

1537 

1538 Returns: Fileobject to write to and a commit function to 

1539 call when the pack is finished. 

1540 """ 

1541 from tempfile import SpooledTemporaryFile 

1542 

1543 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

1544 

1545 def commit() -> None: 

1546 size = f.tell() 

1547 if size > 0: 

1548 f.seek(0) 

1549 p = PackData.from_file(f, size) 

1550 for obj in PackInflater.for_pack_data(p, self.get_raw): 

1551 self.add_object(obj) 

1552 p.close() 

1553 f.close() 

1554 else: 

1555 f.close() 

1556 

1557 def abort() -> None: 

1558 f.close() 

1559 

1560 return f, commit, abort 

1561 

1562 def add_pack_data( 

1563 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

1564 ) -> None: 

1565 """Add pack data to this object store. 

1566 

1567 Args: 

1568 count: Number of items to add 

1569 """ 

1570 if count == 0: 

1571 return 

1572 

1573 # Since MemoryObjectStore doesn't support pack files, we need to 

1574 # extract individual objects. To handle deltas properly, we write 

1575 # to a temporary pack and then use PackInflater to resolve them. 

1576 f, commit, abort = self.add_pack() 

1577 try: 

1578 write_pack_data( 

1579 f.write, 

1580 unpacked_objects, 

1581 num_records=count, 

1582 progress=progress, 

1583 ) 

1584 except BaseException: 

1585 abort() 

1586 raise 

1587 else: 

1588 commit() 

1589 

1590 def add_thin_pack(self, read_all, read_some, progress=None) -> None: 

1591 """Add a new thin pack to this object store. 

1592 

1593 Thin packs are packs that contain deltas with parents that exist 

1594 outside the pack. Because this object store doesn't support packs, we 

1595 extract and add the individual objects. 

1596 

1597 Args: 

1598 read_all: Read function that blocks until the number of 

1599 requested bytes are read. 

1600 read_some: Read function that returns at least one byte, but may 

1601 not return the number of bytes requested. 

1602 """ 

1603 f, commit, abort = self.add_pack() 

1604 try: 

1605 copier = PackStreamCopier(read_all, read_some, f) 

1606 copier.verify() 

1607 except BaseException: 

1608 abort() 

1609 raise 

1610 else: 

1611 commit() 

1612 

1613 

1614class ObjectIterator(Protocol): 

1615 """Interface for iterating over objects.""" 

1616 

1617 def iterobjects(self) -> Iterator[ShaFile]: 

1618 raise NotImplementedError(self.iterobjects) 

1619 

1620 

1621def tree_lookup_path(lookup_obj, root_sha, path): 

1622 """Look up an object in a Git tree. 

1623 

1624 Args: 

1625 lookup_obj: Callback for retrieving object by SHA1 

1626 root_sha: SHA1 of the root tree 

1627 path: Path to lookup 

1628 Returns: A tuple of (mode, SHA) of the resulting path. 

1629 """ 

1630 tree = lookup_obj(root_sha) 

1631 if not isinstance(tree, Tree): 

1632 raise NotTreeError(root_sha) 

1633 return tree.lookup_path(lookup_obj, path) 

1634 

1635 

1636def _collect_filetree_revs( 

1637 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID] 

1638) -> None: 

1639 """Collect SHA1s of files and directories for specified tree. 

1640 

1641 Args: 

1642 obj_store: Object store to get objects by SHA from 

1643 tree_sha: tree reference to walk 

1644 kset: set to fill with references to files and directories 

1645 """ 

1646 filetree = obj_store[tree_sha] 

1647 assert isinstance(filetree, Tree) 

1648 for name, mode, sha in filetree.iteritems(): 

1649 if not S_ISGITLINK(mode) and sha not in kset: 

1650 kset.add(sha) 

1651 if stat.S_ISDIR(mode): 

1652 _collect_filetree_revs(obj_store, sha, kset) 

1653 

1654 

1655def _split_commits_and_tags( 

1656 obj_store: ObjectContainer, lst, *, ignore_unknown=False 

1657) -> tuple[set[bytes], set[bytes], set[bytes]]: 

1658 """Split object id list into three lists with commit, tag, and other SHAs. 

1659 

1660 Commits referenced by tags are included into commits 

1661 list as well. Only SHA1s known in this repository will get 

1662 through, and unless ignore_unknown argument is True, KeyError 

1663 is thrown for SHA1 missing in the repository 

1664 

1665 Args: 

1666 obj_store: Object store to get objects by SHA1 from 

1667 lst: Collection of commit and tag SHAs 

1668 ignore_unknown: True to skip SHA1 missing in the repository 

1669 silently. 

1670 Returns: A tuple of (commits, tags, others) SHA1s 

1671 """ 

1672 commits: set[bytes] = set() 

1673 tags: set[bytes] = set() 

1674 others: set[bytes] = set() 

1675 for e in lst: 

1676 try: 

1677 o = obj_store[e] 

1678 except KeyError: 

1679 if not ignore_unknown: 

1680 raise 

1681 else: 

1682 if isinstance(o, Commit): 

1683 commits.add(e) 

1684 elif isinstance(o, Tag): 

1685 tags.add(e) 

1686 tagged = o.object[1] 

1687 c, t, os = _split_commits_and_tags( 

1688 obj_store, [tagged], ignore_unknown=ignore_unknown 

1689 ) 

1690 commits |= c 

1691 tags |= t 

1692 others |= os 

1693 else: 

1694 others.add(e) 

1695 return (commits, tags, others) 

1696 

1697 

1698class MissingObjectFinder: 

1699 """Find the objects missing from another object store. 

1700 

1701 Args: 

1702 object_store: Object store containing at least all objects to be 

1703 sent 

1704 haves: SHA1s of commits not to send (already present in target) 

1705 wants: SHA1s of commits to send 

1706 progress: Optional function to report progress to. 

1707 get_tagged: Function that returns a dict of pointed-to sha -> tag 

1708 sha for including tags. 

1709 get_parents: Optional function for getting the parents of a commit. 

1710 """ 

1711 

1712 def __init__( 

1713 self, 

1714 object_store, 

1715 haves, 

1716 wants, 

1717 *, 

1718 shallow=None, 

1719 progress=None, 

1720 get_tagged=None, 

1721 get_parents=lambda commit: commit.parents, 

1722 ) -> None: 

1723 self.object_store = object_store 

1724 if shallow is None: 

1725 shallow = set() 

1726 self._get_parents = get_parents 

1727 # process Commits and Tags differently 

1728 # Note, while haves may list commits/tags not available locally, 

1729 # and such SHAs would get filtered out by _split_commits_and_tags, 

1730 # wants shall list only known SHAs, and otherwise 

1731 # _split_commits_and_tags fails with KeyError 

1732 have_commits, have_tags, have_others = _split_commits_and_tags( 

1733 object_store, haves, ignore_unknown=True 

1734 ) 

1735 want_commits, want_tags, want_others = _split_commits_and_tags( 

1736 object_store, wants, ignore_unknown=False 

1737 ) 

1738 # all_ancestors is a set of commits that shall not be sent 

1739 # (complete repository up to 'haves') 

1740 all_ancestors = _collect_ancestors( 

1741 object_store, have_commits, shallow=shallow, get_parents=self._get_parents 

1742 )[0] 

1743 # all_missing - complete set of commits between haves and wants 

1744 # common - commits from all_ancestors we hit into while 

1745 # traversing parent hierarchy of wants 

1746 missing_commits, common_commits = _collect_ancestors( 

1747 object_store, 

1748 want_commits, 

1749 all_ancestors, 

1750 shallow=shallow, 

1751 get_parents=self._get_parents, 

1752 ) 

1753 self.remote_has: set[bytes] = set() 

1754 # Now, fill sha_done with commits and revisions of 

1755 # files and directories known to be both locally 

1756 # and on target. Thus these commits and files 

1757 # won't get selected for fetch 

1758 for h in common_commits: 

1759 self.remote_has.add(h) 

1760 cmt = object_store[h] 

1761 _collect_filetree_revs(object_store, cmt.tree, self.remote_has) 

1762 # record tags we have as visited, too 

1763 for t in have_tags: 

1764 self.remote_has.add(t) 

1765 self.sha_done = set(self.remote_has) 

1766 

1767 # in fact, what we 'want' is commits, tags, and others 

1768 # we've found missing 

1769 self.objects_to_send: set[ 

1770 tuple[ObjectID, Optional[bytes], Optional[int], bool] 

1771 ] = {(w, None, Commit.type_num, False) for w in missing_commits} 

1772 missing_tags = want_tags.difference(have_tags) 

1773 self.objects_to_send.update( 

1774 {(w, None, Tag.type_num, False) for w in missing_tags} 

1775 ) 

1776 missing_others = want_others.difference(have_others) 

1777 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

1778 

1779 if progress is None: 

1780 self.progress = lambda x: None 

1781 else: 

1782 self.progress = progress 

1783 self._tagged = (get_tagged and get_tagged()) or {} 

1784 

1785 def get_remote_has(self): 

1786 return self.remote_has 

1787 

1788 def add_todo( 

1789 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]] 

1790 ) -> None: 

1791 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

1792 

1793 def __next__(self) -> tuple[bytes, Optional[PackHint]]: 

1794 while True: 

1795 if not self.objects_to_send: 

1796 self.progress( 

1797 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii") 

1798 ) 

1799 raise StopIteration 

1800 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

1801 if sha not in self.sha_done: 

1802 break 

1803 if not leaf: 

1804 o = self.object_store[sha] 

1805 if isinstance(o, Commit): 

1806 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

1807 elif isinstance(o, Tree): 

1808 self.add_todo( 

1809 [ 

1810 ( 

1811 s, 

1812 n, 

1813 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

1814 not stat.S_ISDIR(m), 

1815 ) 

1816 for n, m, s in o.iteritems() 

1817 if not S_ISGITLINK(m) 

1818 ] 

1819 ) 

1820 elif isinstance(o, Tag): 

1821 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

1822 if sha in self._tagged: 

1823 self.add_todo([(self._tagged[sha], None, None, True)]) 

1824 self.sha_done.add(sha) 

1825 if len(self.sha_done) % 1000 == 0: 

1826 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii")) 

1827 if type_num is None: 

1828 pack_hint = None 

1829 else: 

1830 pack_hint = (type_num, name) 

1831 return (sha, pack_hint) 

1832 

1833 def __iter__(self): 

1834 return self 

1835 

1836 

1837class ObjectStoreGraphWalker: 

1838 """Graph walker that finds what commits are missing from an object store.""" 

1839 

1840 heads: set[ObjectID] 

1841 """Revisions without descendants in the local repo.""" 

1842 

1843 get_parents: Callable[[ObjectID], ObjectID] 

1844 """Function to retrieve parents in the local repo.""" 

1845 

1846 shallow: set[ObjectID] 

1847 

1848 def __init__( 

1849 self, 

1850 local_heads: Iterable[ObjectID], 

1851 get_parents, 

1852 shallow: Optional[set[ObjectID]] = None, 

1853 update_shallow=None, 

1854 ) -> None: 

1855 """Create a new instance. 

1856 

1857 Args: 

1858 local_heads: Heads to start search with 

1859 get_parents: Function for finding the parents of a SHA1. 

1860 """ 

1861 self.heads = set(local_heads) 

1862 self.get_parents = get_parents 

1863 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {} 

1864 if shallow is None: 

1865 shallow = set() 

1866 self.shallow = shallow 

1867 self.update_shallow = update_shallow 

1868 

1869 def nak(self) -> None: 

1870 """Nothing in common was found.""" 

1871 

1872 def ack(self, sha: ObjectID) -> None: 

1873 """Ack that a revision and its ancestors are present in the source.""" 

1874 if len(sha) != 40: 

1875 raise ValueError(f"unexpected sha {sha!r} received") 

1876 ancestors = {sha} 

1877 

1878 # stop if we run out of heads to remove 

1879 while self.heads: 

1880 for a in ancestors: 

1881 if a in self.heads: 

1882 self.heads.remove(a) 

1883 

1884 # collect all ancestors 

1885 new_ancestors = set() 

1886 for a in ancestors: 

1887 ps = self.parents.get(a) 

1888 if ps is not None: 

1889 new_ancestors.update(ps) 

1890 self.parents[a] = None 

1891 

1892 # no more ancestors; stop 

1893 if not new_ancestors: 

1894 break 

1895 

1896 ancestors = new_ancestors 

1897 

1898 def next(self): 

1899 """Iterate over ancestors of heads in the target.""" 

1900 if self.heads: 

1901 ret = self.heads.pop() 

1902 try: 

1903 ps = self.get_parents(ret) 

1904 except KeyError: 

1905 return None 

1906 self.parents[ret] = ps 

1907 self.heads.update([p for p in ps if p not in self.parents]) 

1908 return ret 

1909 return None 

1910 

1911 __next__ = next 

1912 

1913 

1914def commit_tree_changes(object_store, tree, changes): 

1915 """Commit a specified set of changes to a tree structure. 

1916 

1917 This will apply a set of changes on top of an existing tree, storing new 

1918 objects in object_store. 

1919 

1920 changes are a list of tuples with (path, mode, object_sha). 

1921 Paths can be both blobs and trees. See the mode and 

1922 object sha to None deletes the path. 

1923 

1924 This method works especially well if there are only a small 

1925 number of changes to a big tree. For a large number of changes 

1926 to a large tree, use e.g. commit_tree. 

1927 

1928 Args: 

1929 object_store: Object store to store new objects in 

1930 and retrieve old ones from. 

1931 tree: Original tree root 

1932 changes: changes to apply 

1933 Returns: New tree root object 

1934 """ 

1935 # TODO(jelmer): Save up the objects and add them using .add_objects 

1936 # rather than with individual calls to .add_object. 

1937 nested_changes = {} 

1938 for path, new_mode, new_sha in changes: 

1939 try: 

1940 (dirname, subpath) = path.split(b"/", 1) 

1941 except ValueError: 

1942 if new_sha is None: 

1943 del tree[path] 

1944 else: 

1945 tree[path] = (new_mode, new_sha) 

1946 else: 

1947 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

1948 for name, subchanges in nested_changes.items(): 

1949 try: 

1950 orig_subtree = object_store[tree[name][1]] 

1951 except KeyError: 

1952 orig_subtree = Tree() 

1953 subtree = commit_tree_changes(object_store, orig_subtree, subchanges) 

1954 if len(subtree) == 0: 

1955 del tree[name] 

1956 else: 

1957 tree[name] = (stat.S_IFDIR, subtree.id) 

1958 object_store.add_object(tree) 

1959 return tree 

1960 

1961 

1962class OverlayObjectStore(BaseObjectStore): 

1963 """Object store that can overlay multiple object stores.""" 

1964 

1965 def __init__(self, bases, add_store=None) -> None: 

1966 self.bases = bases 

1967 self.add_store = add_store 

1968 

1969 def add_object(self, object): 

1970 if self.add_store is None: 

1971 raise NotImplementedError(self.add_object) 

1972 return self.add_store.add_object(object) 

1973 

1974 def add_objects(self, objects, progress=None): 

1975 if self.add_store is None: 

1976 raise NotImplementedError(self.add_object) 

1977 return self.add_store.add_objects(objects, progress) 

1978 

1979 @property 

1980 def packs(self): 

1981 ret = [] 

1982 for b in self.bases: 

1983 ret.extend(b.packs) 

1984 return ret 

1985 

1986 def __iter__(self): 

1987 done = set() 

1988 for b in self.bases: 

1989 for o_id in b: 

1990 if o_id not in done: 

1991 yield o_id 

1992 done.add(o_id) 

1993 

1994 def iterobjects_subset( 

1995 self, shas: Iterable[bytes], *, allow_missing: bool = False 

1996 ) -> Iterator[ShaFile]: 

1997 todo = set(shas) 

1998 found: set[bytes] = set() 

1999 

2000 for b in self.bases: 

2001 # Create a copy of todo for each base to avoid modifying 

2002 # the set while iterating through it 

2003 current_todo = todo - found 

2004 for o in b.iterobjects_subset(current_todo, allow_missing=True): 

2005 yield o 

2006 found.add(o.id) 

2007 

2008 # Check for any remaining objects not found 

2009 missing = todo - found 

2010 if missing and not allow_missing: 

2011 raise KeyError(next(iter(missing))) 

2012 

2013 def iter_unpacked_subset( 

2014 self, 

2015 shas: Iterable[bytes], 

2016 *, 

2017 include_comp=False, 

2018 allow_missing: bool = False, 

2019 convert_ofs_delta=True, 

2020 ) -> Iterator[ShaFile]: 

2021 todo = set(shas) 

2022 for b in self.bases: 

2023 for o in b.iter_unpacked_subset( 

2024 todo, 

2025 include_comp=include_comp, 

2026 allow_missing=True, 

2027 convert_ofs_delta=convert_ofs_delta, 

2028 ): 

2029 yield o 

2030 todo.remove(o.id) 

2031 if todo and not allow_missing: 

2032 raise KeyError(o.id) 

2033 

2034 def get_raw(self, sha_id): 

2035 for b in self.bases: 

2036 try: 

2037 return b.get_raw(sha_id) 

2038 except KeyError: 

2039 pass 

2040 raise KeyError(sha_id) 

2041 

2042 def contains_packed(self, sha) -> bool: 

2043 for b in self.bases: 

2044 if b.contains_packed(sha): 

2045 return True 

2046 return False 

2047 

2048 def contains_loose(self, sha) -> bool: 

2049 for b in self.bases: 

2050 if b.contains_loose(sha): 

2051 return True 

2052 return False 

2053 

2054 

2055def read_packs_file(f): 

2056 """Yield the packs listed in a packs file.""" 

2057 for line in f.read().splitlines(): 

2058 if not line: 

2059 continue 

2060 (kind, name) = line.split(b" ", 1) 

2061 if kind != b"P": 

2062 continue 

2063 yield os.fsdecode(name) 

2064 

2065 

2066class BucketBasedObjectStore(PackBasedObjectStore): 

2067 """Object store implementation that uses a bucket store like S3 as backend.""" 

2068 

2069 def _iter_loose_objects(self): 

2070 """Iterate over the SHAs of all loose objects.""" 

2071 return iter([]) 

2072 

2073 def _get_loose_object(self, sha) -> None: 

2074 return None 

2075 

2076 def delete_loose_object(self, sha) -> None: 

2077 # Doesn't exist.. 

2078 pass 

2079 

2080 def _remove_pack(self, name) -> None: 

2081 raise NotImplementedError(self._remove_pack) 

2082 

2083 def _iter_pack_names(self) -> Iterator[str]: 

2084 raise NotImplementedError(self._iter_pack_names) 

2085 

2086 def _get_pack(self, name) -> Pack: 

2087 raise NotImplementedError(self._get_pack) 

2088 

2089 def _update_pack_cache(self): 

2090 pack_files = set(self._iter_pack_names()) 

2091 

2092 # Open newly appeared pack files 

2093 new_packs = [] 

2094 for f in pack_files: 

2095 if f not in self._pack_cache: 

2096 pack = self._get_pack(f) 

2097 new_packs.append(pack) 

2098 self._pack_cache[f] = pack 

2099 # Remove disappeared pack files 

2100 for f in set(self._pack_cache) - pack_files: 

2101 self._pack_cache.pop(f).close() 

2102 return new_packs 

2103 

2104 def _upload_pack(self, basename, pack_file, index_file) -> None: 

2105 raise NotImplementedError 

2106 

2107 def add_pack(self): 

2108 """Add a new pack to this object store. 

2109 

2110 Returns: Fileobject to write to, a commit function to 

2111 call when the pack is finished and an abort 

2112 function. 

2113 """ 

2114 import tempfile 

2115 

2116 pf = tempfile.SpooledTemporaryFile( 

2117 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2118 ) 

2119 

2120 def commit(): 

2121 if pf.tell() == 0: 

2122 pf.close() 

2123 return None 

2124 

2125 pf.seek(0) 

2126 p = PackData(pf.name, pf) 

2127 entries = p.sorted_entries() 

2128 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

2129 idxf = tempfile.SpooledTemporaryFile( 

2130 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

2131 ) 

2132 checksum = p.get_stored_checksum() 

2133 write_pack_index(idxf, entries, checksum, version=self.pack_index_version) 

2134 idxf.seek(0) 

2135 idx = load_pack_index_file(basename + ".idx", idxf) 

2136 for pack in self.packs: 

2137 if pack.get_stored_checksum() == p.get_stored_checksum(): 

2138 p.close() 

2139 idx.close() 

2140 pf.close() 

2141 idxf.close() 

2142 return pack 

2143 pf.seek(0) 

2144 idxf.seek(0) 

2145 self._upload_pack(basename, pf, idxf) 

2146 final_pack = Pack.from_objects(p, idx) 

2147 self._add_cached_pack(basename, final_pack) 

2148 pf.close() 

2149 idxf.close() 

2150 return final_pack 

2151 

2152 return pf, commit, pf.close 

2153 

2154 

2155def _collect_ancestors( 

2156 store: ObjectContainer, 

2157 heads, 

2158 common: frozenset[ObjectID] = frozenset(), 

2159 shallow: frozenset[ObjectID] = frozenset(), 

2160 get_parents=lambda commit: commit.parents, 

2161): 

2162 """Collect all ancestors of heads up to (excluding) those in common. 

2163 

2164 Args: 

2165 heads: commits to start from 

2166 common: commits to end at, or empty set to walk repository 

2167 completely 

2168 get_parents: Optional function for getting the parents of a 

2169 commit. 

2170 Returns: a tuple (A, B) where A - all commits reachable 

2171 from heads but not present in common, B - common (shared) elements 

2172 that are directly reachable from heads 

2173 """ 

2174 bases = set() 

2175 commits = set() 

2176 queue = [] 

2177 queue.extend(heads) 

2178 

2179 # Try to use commit graph if available 

2180 commit_graph = store.get_commit_graph() 

2181 

2182 while queue: 

2183 e = queue.pop(0) 

2184 if e in common: 

2185 bases.add(e) 

2186 elif e not in commits: 

2187 commits.add(e) 

2188 if e in shallow: 

2189 continue 

2190 

2191 # Try to use commit graph for parent lookup 

2192 parents = None 

2193 if commit_graph: 

2194 parents = commit_graph.get_parents(e) 

2195 

2196 if parents is None: 

2197 # Fall back to loading the object 

2198 cmt = store[e] 

2199 parents = get_parents(cmt) 

2200 

2201 queue.extend(parents) 

2202 return (commits, bases) 

2203 

2204 

2205def iter_tree_contents( 

2206 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False 

2207): 

2208 """Iterate the contents of a tree and all subtrees. 

2209 

2210 Iteration is depth-first pre-order, as in e.g. os.walk. 

2211 

2212 Args: 

2213 tree_id: SHA1 of the tree. 

2214 include_trees: If True, include tree objects in the iteration. 

2215 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

2216 tree. 

2217 """ 

2218 if tree_id is None: 

2219 return 

2220 # This could be fairly easily generalized to >2 trees if we find a use 

2221 # case. 

2222 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

2223 while todo: 

2224 entry = todo.pop() 

2225 if stat.S_ISDIR(entry.mode): 

2226 extra = [] 

2227 tree = store[entry.sha] 

2228 assert isinstance(tree, Tree) 

2229 for subentry in tree.iteritems(name_order=True): 

2230 extra.append(subentry.in_path(entry.path)) 

2231 todo.extend(reversed(extra)) 

2232 if not stat.S_ISDIR(entry.mode) or include_trees: 

2233 yield entry 

2234 

2235 

2236def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]: 

2237 """Peel all tags from a SHA. 

2238 

2239 Args: 

2240 sha: The object SHA to peel. 

2241 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

2242 intermediate tags; if the original ref does not point to a tag, 

2243 this will equal the original SHA1. 

2244 """ 

2245 unpeeled = obj = store[sha] 

2246 obj_class = object_class(obj.type_name) 

2247 while obj_class is Tag: 

2248 assert isinstance(obj, Tag) 

2249 obj_class, sha = obj.object 

2250 obj = store[sha] 

2251 return unpeeled, obj