1# object_store.py -- Object store for git objects
2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
3# and others
4#
5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
7# General Public License as published by the Free Software Foundation; version 2.0
8# or (at your option) any later version. You can redistribute it and/or
9# modify it under the terms of either of these two licenses.
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# You should have received a copy of the licenses; if not, see
18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
20# License, Version 2.0.
21#
22
23
24"""Git object store interfaces and implementation."""
25
26import binascii
27import os
28import stat
29import sys
30import time
31import warnings
32from collections.abc import Iterable, Iterator, Mapping, Sequence, Set
33from contextlib import suppress
34from io import BytesIO
35from pathlib import Path
36from typing import (
37 TYPE_CHECKING,
38 BinaryIO,
39 Callable,
40 Optional,
41 Protocol,
42 Union,
43)
44
45from .errors import NotTreeError
46from .file import GitFile, _GitFile
47from .objects import (
48 S_ISGITLINK,
49 ZERO_SHA,
50 Blob,
51 Commit,
52 ObjectID,
53 ShaFile,
54 Tag,
55 Tree,
56 TreeEntry,
57 hex_to_filename,
58 hex_to_sha,
59 object_class,
60 sha_to_hex,
61 valid_hexsha,
62)
63from .pack import (
64 PACK_SPOOL_FILE_MAX_SIZE,
65 ObjectContainer,
66 Pack,
67 PackData,
68 PackedObjectContainer,
69 PackFileDisappeared,
70 PackHint,
71 PackIndexer,
72 PackInflater,
73 PackStreamCopier,
74 UnpackedObject,
75 extend_pack,
76 full_unpacked_object,
77 generate_unpacked_objects,
78 iter_sha1,
79 load_pack_index_file,
80 pack_objects_to_data,
81 write_pack_data,
82 write_pack_index,
83)
84from .protocol import DEPTH_INFINITE
85from .refs import PEELED_TAG_SUFFIX, Ref
86
87if TYPE_CHECKING:
88 from .commit_graph import CommitGraph
89 from .config import Config
90 from .diff_tree import RenameDetector
91
92
93class GraphWalker(Protocol):
94 """Protocol for graph walker objects."""
95
96 def __next__(self) -> Optional[bytes]:
97 """Return the next object SHA to visit."""
98 ...
99
100 def ack(self, sha: bytes) -> None:
101 """Acknowledge that an object has been received."""
102 ...
103
104 def nak(self) -> None:
105 """Nothing in common was found."""
106 ...
107
108
109INFODIR = "info"
110PACKDIR = "pack"
111
112# use permissions consistent with Git; just readable by everyone
113# TODO: should packs also be non-writable on Windows? if so, that
114# would requite some rather significant adjustments to the test suite
115PACK_MODE = 0o444 if sys.platform != "win32" else 0o644
116
117# Grace period for cleaning up temporary pack files (in seconds)
118# Matches git's default of 2 weeks
119DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks
120
121
122def find_shallow(
123 store: ObjectContainer, heads: Iterable[bytes], depth: int
124) -> tuple[set[bytes], set[bytes]]:
125 """Find shallow commits according to a given depth.
126
127 Args:
128 store: An ObjectStore for looking up objects.
129 heads: Iterable of head SHAs to start walking from.
130 depth: The depth of ancestors to include. A depth of one includes
131 only the heads themselves.
132 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be
133 considered shallow and unshallow according to the arguments. Note that
134 these sets may overlap if a commit is reachable along multiple paths.
135 """
136 parents: dict[bytes, list[bytes]] = {}
137 commit_graph = store.get_commit_graph()
138
139 def get_parents(sha: bytes) -> list[bytes]:
140 result = parents.get(sha, None)
141 if not result:
142 # Try to use commit graph first if available
143 if commit_graph:
144 graph_parents = commit_graph.get_parents(sha)
145 if graph_parents is not None:
146 result = graph_parents
147 parents[sha] = result
148 return result
149 # Fall back to loading the object
150 commit = store[sha]
151 assert isinstance(commit, Commit)
152 result = commit.parents
153 parents[sha] = result
154 return result
155
156 todo = [] # stack of (sha, depth)
157 for head_sha in heads:
158 obj = store[head_sha]
159 # Peel tags if necessary
160 while isinstance(obj, Tag):
161 _, sha = obj.object
162 obj = store[sha]
163 if isinstance(obj, Commit):
164 todo.append((obj.id, 1))
165
166 not_shallow = set()
167 shallow = set()
168 while todo:
169 sha, cur_depth = todo.pop()
170 if cur_depth < depth:
171 not_shallow.add(sha)
172 new_depth = cur_depth + 1
173 todo.extend((p, new_depth) for p in get_parents(sha))
174 else:
175 shallow.add(sha)
176
177 return shallow, not_shallow
178
179
180def get_depth(
181 store: ObjectContainer,
182 head: bytes,
183 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents,
184 max_depth: Optional[int] = None,
185) -> int:
186 """Return the current available depth for the given head.
187
188 For commits with multiple parents, the largest possible depth will be
189 returned.
190
191 Args:
192 store: Object store to search in
193 head: commit to start from
194 get_parents: optional function for getting the parents of a commit
195 max_depth: maximum depth to search
196 """
197 if head not in store:
198 return 0
199 current_depth = 1
200 queue = [(head, current_depth)]
201 commit_graph = store.get_commit_graph()
202
203 while queue and (max_depth is None or current_depth < max_depth):
204 e, depth = queue.pop(0)
205 current_depth = max(current_depth, depth)
206
207 # Try to use commit graph for parent lookup if available
208 parents = None
209 if commit_graph:
210 parents = commit_graph.get_parents(e)
211
212 if parents is None:
213 # Fall back to loading the object
214 cmt = store[e]
215 if isinstance(cmt, Tag):
216 _cls, sha = cmt.object
217 cmt = store[sha]
218 parents = get_parents(cmt)
219
220 queue.extend((parent, depth + 1) for parent in parents if parent in store)
221 return current_depth
222
223
224class PackContainer(Protocol):
225 """Protocol for containers that can accept pack files."""
226
227 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:
228 """Add a new pack."""
229
230
231class BaseObjectStore:
232 """Object store interface."""
233
234 def determine_wants_all(
235 self, refs: Mapping[Ref, ObjectID], depth: Optional[int] = None
236 ) -> list[ObjectID]:
237 """Determine which objects are wanted based on refs."""
238
239 def _want_deepen(sha: bytes) -> bool:
240 if not depth:
241 return False
242 if depth == DEPTH_INFINITE:
243 return True
244 return depth > self._get_depth(sha)
245
246 return [
247 sha
248 for (ref, sha) in refs.items()
249 if (sha not in self or _want_deepen(sha))
250 and not ref.endswith(PEELED_TAG_SUFFIX)
251 and not sha == ZERO_SHA
252 ]
253
254 def contains_loose(self, sha: bytes) -> bool:
255 """Check if a particular object is present by SHA1 and is loose."""
256 raise NotImplementedError(self.contains_loose)
257
258 def contains_packed(self, sha: bytes) -> bool:
259 """Check if a particular object is present by SHA1 and is packed."""
260 return False # Default implementation for stores that don't support packing
261
262 def __contains__(self, sha1: bytes) -> bool:
263 """Check if a particular object is present by SHA1.
264
265 This method makes no distinction between loose and packed objects.
266 """
267 return self.contains_loose(sha1)
268
269 @property
270 def packs(self) -> list[Pack]:
271 """Iterable of pack objects."""
272 raise NotImplementedError
273
274 def get_raw(self, name: bytes) -> tuple[int, bytes]:
275 """Obtain the raw text for an object.
276
277 Args:
278 name: sha for the object.
279 Returns: tuple with numeric type and object contents.
280 """
281 raise NotImplementedError(self.get_raw)
282
283 def __getitem__(self, sha1: ObjectID) -> ShaFile:
284 """Obtain an object by SHA1."""
285 type_num, uncomp = self.get_raw(sha1)
286 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1)
287
288 def __iter__(self) -> Iterator[bytes]:
289 """Iterate over the SHAs that are present in this store."""
290 raise NotImplementedError(self.__iter__)
291
292 def add_object(self, obj: ShaFile) -> None:
293 """Add a single object to this object store."""
294 raise NotImplementedError(self.add_object)
295
296 def add_objects(
297 self,
298 objects: Sequence[tuple[ShaFile, Optional[str]]],
299 progress: Optional[Callable[..., None]] = None,
300 ) -> Optional["Pack"]:
301 """Add a set of objects to this object store.
302
303 Args:
304 objects: Iterable over a list of (object, path) tuples
305 progress: Optional progress callback
306 """
307 raise NotImplementedError(self.add_objects)
308
309 def tree_changes(
310 self,
311 source: Optional[bytes],
312 target: Optional[bytes],
313 want_unchanged: bool = False,
314 include_trees: bool = False,
315 change_type_same: bool = False,
316 rename_detector: Optional["RenameDetector"] = None,
317 paths: Optional[Sequence[bytes]] = None,
318 ) -> Iterator[
319 tuple[
320 tuple[Optional[bytes], Optional[bytes]],
321 tuple[Optional[int], Optional[int]],
322 tuple[Optional[bytes], Optional[bytes]],
323 ]
324 ]:
325 """Find the differences between the contents of two trees.
326
327 Args:
328 source: SHA1 of the source tree
329 target: SHA1 of the target tree
330 want_unchanged: Whether unchanged files should be reported
331 include_trees: Whether to include trees
332 change_type_same: Whether to report files changing
333 type in the same entry.
334 rename_detector: RenameDetector object for detecting renames.
335 paths: Optional list of paths to filter to (as bytes).
336 Returns: Iterator over tuples with
337 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
338 """
339 from .diff_tree import tree_changes
340
341 for change in tree_changes(
342 self,
343 source,
344 target,
345 want_unchanged=want_unchanged,
346 include_trees=include_trees,
347 change_type_same=change_type_same,
348 rename_detector=rename_detector,
349 paths=paths,
350 ):
351 old_path = change.old.path if change.old is not None else None
352 new_path = change.new.path if change.new is not None else None
353 old_mode = change.old.mode if change.old is not None else None
354 new_mode = change.new.mode if change.new is not None else None
355 old_sha = change.old.sha if change.old is not None else None
356 new_sha = change.new.sha if change.new is not None else None
357 yield (
358 (old_path, new_path),
359 (old_mode, new_mode),
360 (old_sha, new_sha),
361 )
362
363 def iter_tree_contents(
364 self, tree_id: bytes, include_trees: bool = False
365 ) -> Iterator[TreeEntry]:
366 """Iterate the contents of a tree and all subtrees.
367
368 Iteration is depth-first pre-order, as in e.g. os.walk.
369
370 Args:
371 tree_id: SHA1 of the tree.
372 include_trees: If True, include tree objects in the iteration.
373 Returns: Iterator over TreeEntry namedtuples for all the objects in a
374 tree.
375 """
376 warnings.warn(
377 "Please use dulwich.object_store.iter_tree_contents",
378 DeprecationWarning,
379 stacklevel=2,
380 )
381 return iter_tree_contents(self, tree_id, include_trees=include_trees)
382
383 def iterobjects_subset(
384 self, shas: Iterable[bytes], *, allow_missing: bool = False
385 ) -> Iterator[ShaFile]:
386 """Iterate over a subset of objects in the store.
387
388 Args:
389 shas: Iterable of object SHAs to retrieve
390 allow_missing: If True, skip missing objects; if False, raise KeyError
391
392 Returns:
393 Iterator of ShaFile objects
394
395 Raises:
396 KeyError: If an object is missing and allow_missing is False
397 """
398 for sha in shas:
399 try:
400 yield self[sha]
401 except KeyError:
402 if not allow_missing:
403 raise
404
405 def iter_unpacked_subset(
406 self,
407 shas: Iterable[bytes],
408 include_comp: bool = False,
409 allow_missing: bool = False,
410 convert_ofs_delta: bool = True,
411 ) -> "Iterator[UnpackedObject]":
412 """Iterate over unpacked objects for a subset of SHAs.
413
414 Default implementation that converts ShaFile objects to UnpackedObject.
415 Subclasses may override for more efficient unpacked access.
416
417 Args:
418 shas: Iterable of object SHAs to retrieve
419 include_comp: Whether to include compressed data (ignored in base implementation)
420 allow_missing: If True, skip missing objects; if False, raise KeyError
421 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in base implementation)
422
423 Returns:
424 Iterator of UnpackedObject instances
425
426 Raises:
427 KeyError: If an object is missing and allow_missing is False
428 """
429 from .pack import UnpackedObject
430
431 for sha in shas:
432 try:
433 obj = self[sha]
434 # Convert ShaFile to UnpackedObject
435 unpacked = UnpackedObject(
436 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id
437 )
438 yield unpacked
439 except KeyError:
440 if not allow_missing:
441 raise
442
443 def find_missing_objects(
444 self,
445 haves: Iterable[bytes],
446 wants: Iterable[bytes],
447 shallow: Optional[Set[bytes]] = None,
448 progress: Optional[Callable[..., None]] = None,
449 get_tagged: Optional[Callable[[], dict[bytes, bytes]]] = None,
450 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents,
451 ) -> Iterator[tuple[bytes, Optional[PackHint]]]:
452 """Find the missing objects required for a set of revisions.
453
454 Args:
455 haves: Iterable over SHAs already in common.
456 wants: Iterable over SHAs of objects to fetch.
457 shallow: Set of shallow commit SHA1s to skip
458 progress: Simple progress function that will be called with
459 updated progress strings.
460 get_tagged: Function that returns a dict of pointed-to sha ->
461 tag sha for including tags.
462 get_parents: Optional function for getting the parents of a
463 commit.
464 Returns: Iterator over (sha, path) pairs.
465 """
466 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning)
467 finder = MissingObjectFinder(
468 self,
469 haves=haves,
470 wants=wants,
471 shallow=shallow,
472 progress=progress,
473 get_tagged=get_tagged,
474 get_parents=get_parents,
475 )
476 return iter(finder)
477
478 def find_common_revisions(self, graphwalker: GraphWalker) -> list[bytes]:
479 """Find which revisions this store has in common using graphwalker.
480
481 Args:
482 graphwalker: A graphwalker object.
483 Returns: List of SHAs that are in common
484 """
485 haves = []
486 sha = next(graphwalker)
487 while sha:
488 if sha in self:
489 haves.append(sha)
490 graphwalker.ack(sha)
491 sha = next(graphwalker)
492 return haves
493
494 def generate_pack_data(
495 self,
496 have: Iterable[bytes],
497 want: Iterable[bytes],
498 shallow: Optional[Set[bytes]] = None,
499 progress: Optional[Callable[..., None]] = None,
500 ofs_delta: bool = True,
501 ) -> tuple[int, Iterator[UnpackedObject]]:
502 """Generate pack data objects for a set of wants/haves.
503
504 Args:
505 have: List of SHA1s of objects that should not be sent
506 want: List of SHA1s of objects that should be sent
507 shallow: Set of shallow commit SHA1s to skip
508 ofs_delta: Whether OFS deltas can be included
509 progress: Optional progress reporting method
510 """
511 # Note that the pack-specific implementation below is more efficient,
512 # as it reuses deltas
513 missing_objects = MissingObjectFinder(
514 self, haves=have, wants=want, shallow=shallow, progress=progress
515 )
516 object_ids = list(missing_objects)
517 return pack_objects_to_data(
518 [(self[oid], path) for oid, path in object_ids],
519 ofs_delta=ofs_delta,
520 progress=progress,
521 )
522
523 def peel_sha(self, sha: bytes) -> bytes:
524 """Peel all tags from a SHA.
525
526 Args:
527 sha: The object SHA to peel.
528 Returns: The fully-peeled SHA1 of a tag object, after peeling all
529 intermediate tags; if the original ref does not point to a tag,
530 this will equal the original SHA1.
531 """
532 warnings.warn(
533 "Please use dulwich.object_store.peel_sha()",
534 DeprecationWarning,
535 stacklevel=2,
536 )
537 return peel_sha(self, sha)[1].id
538
539 def _get_depth(
540 self,
541 head: bytes,
542 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents,
543 max_depth: Optional[int] = None,
544 ) -> int:
545 """Return the current available depth for the given head.
546
547 For commits with multiple parents, the largest possible depth will be
548 returned.
549
550 Args:
551 head: commit to start from
552 get_parents: optional function for getting the parents of a commit
553 max_depth: maximum depth to search
554 """
555 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth)
556
557 def close(self) -> None:
558 """Close any files opened by this object store."""
559 # Default implementation is a NO-OP
560
561 def prune(self, grace_period: Optional[int] = None) -> None:
562 """Prune/clean up this object store.
563
564 This includes removing orphaned temporary files and other
565 housekeeping tasks. Default implementation is a NO-OP.
566
567 Args:
568 grace_period: Grace period in seconds for removing temporary files.
569 If None, uses the default grace period.
570 """
571 # Default implementation is a NO-OP
572
573 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
574 """Iterate over all SHA1s that start with a given prefix.
575
576 The default implementation is a naive iteration over all objects.
577 However, subclasses may override this method with more efficient
578 implementations.
579 """
580 for sha in self:
581 if sha.startswith(prefix):
582 yield sha
583
584 def get_commit_graph(self) -> Optional["CommitGraph"]:
585 """Get the commit graph for this object store.
586
587 Returns:
588 CommitGraph object if available, None otherwise
589 """
590 return None
591
592 def write_commit_graph(
593 self, refs: Optional[Sequence[bytes]] = None, reachable: bool = True
594 ) -> None:
595 """Write a commit graph file for this object store.
596
597 Args:
598 refs: List of refs to include. If None, includes all refs from object store.
599 reachable: If True, includes all commits reachable from refs.
600 If False, only includes the direct ref targets.
601
602 Note:
603 Default implementation does nothing. Subclasses should override
604 this method to provide commit graph writing functionality.
605 """
606 raise NotImplementedError(self.write_commit_graph)
607
608 def get_object_mtime(self, sha: bytes) -> float:
609 """Get the modification time of an object.
610
611 Args:
612 sha: SHA1 of the object
613
614 Returns:
615 Modification time as seconds since epoch
616
617 Raises:
618 KeyError: if the object is not found
619 """
620 # Default implementation raises KeyError
621 # Subclasses should override to provide actual mtime
622 raise KeyError(sha)
623
624
625class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer):
626 """Object store that supports pack operations.
627
628 This is a base class for object stores that can handle pack files,
629 including both disk-based and memory-based stores.
630 """
631
632 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
633 """Add a new pack to this object store.
634
635 Returns: Tuple of (file, commit_func, abort_func)
636 """
637 raise NotImplementedError(self.add_pack)
638
639 def add_pack_data(
640 self,
641 count: int,
642 unpacked_objects: Iterator["UnpackedObject"],
643 progress: Optional[Callable[..., None]] = None,
644 ) -> Optional["Pack"]:
645 """Add pack data to this object store.
646
647 Args:
648 count: Number of objects
649 unpacked_objects: Iterator over unpacked objects
650 progress: Optional progress callback
651 """
652 raise NotImplementedError(self.add_pack_data)
653
654 def get_unpacked_object(
655 self, sha1: bytes, *, include_comp: bool = False
656 ) -> "UnpackedObject":
657 """Get a raw unresolved object.
658
659 Args:
660 sha1: SHA-1 hash of the object
661 include_comp: Whether to include compressed data
662
663 Returns:
664 UnpackedObject instance
665 """
666 from .pack import UnpackedObject
667
668 obj = self[sha1]
669 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks())
670
671 def iterobjects_subset(
672 self, shas: Iterable[bytes], *, allow_missing: bool = False
673 ) -> Iterator[ShaFile]:
674 """Iterate over a subset of objects.
675
676 Args:
677 shas: Iterable of object SHAs to retrieve
678 allow_missing: If True, skip missing objects
679
680 Returns:
681 Iterator of ShaFile objects
682 """
683 for sha in shas:
684 try:
685 yield self[sha]
686 except KeyError:
687 if not allow_missing:
688 raise
689
690
691class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
692 """Object store that uses pack files for storage.
693
694 This class provides a base implementation for object stores that use
695 Git pack files as their primary storage mechanism. It handles caching
696 of open pack files and provides configuration for pack file operations.
697 """
698
699 def __init__(
700 self,
701 pack_compression_level: int = -1,
702 pack_index_version: Optional[int] = None,
703 pack_delta_window_size: Optional[int] = None,
704 pack_window_memory: Optional[int] = None,
705 pack_delta_cache_size: Optional[int] = None,
706 pack_depth: Optional[int] = None,
707 pack_threads: Optional[int] = None,
708 pack_big_file_threshold: Optional[int] = None,
709 ) -> None:
710 """Initialize a PackBasedObjectStore.
711
712 Args:
713 pack_compression_level: Compression level for pack files (-1 to 9)
714 pack_index_version: Pack index version to use
715 pack_delta_window_size: Window size for delta compression
716 pack_window_memory: Maximum memory to use for delta window
717 pack_delta_cache_size: Cache size for delta operations
718 pack_depth: Maximum depth for pack deltas
719 pack_threads: Number of threads to use for packing
720 pack_big_file_threshold: Threshold for treating files as "big"
721 """
722 self._pack_cache: dict[str, Pack] = {}
723 self.pack_compression_level = pack_compression_level
724 self.pack_index_version = pack_index_version
725 self.pack_delta_window_size = pack_delta_window_size
726 self.pack_window_memory = pack_window_memory
727 self.pack_delta_cache_size = pack_delta_cache_size
728 self.pack_depth = pack_depth
729 self.pack_threads = pack_threads
730 self.pack_big_file_threshold = pack_big_file_threshold
731
732 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
733 """Add a new pack to this object store."""
734 raise NotImplementedError(self.add_pack)
735
736 def add_pack_data(
737 self,
738 count: int,
739 unpacked_objects: Iterator[UnpackedObject],
740 progress: Optional[Callable[..., None]] = None,
741 ) -> Optional["Pack"]:
742 """Add pack data to this object store.
743
744 Args:
745 count: Number of items to add
746 unpacked_objects: Iterator of UnpackedObject instances
747 progress: Optional progress callback
748 """
749 if count == 0:
750 # Don't bother writing an empty pack file
751 return None
752 f, commit, abort = self.add_pack()
753 try:
754 write_pack_data(
755 f.write,
756 unpacked_objects,
757 num_records=count,
758 progress=progress,
759 compression_level=self.pack_compression_level,
760 )
761 except BaseException:
762 abort()
763 raise
764 else:
765 return commit()
766
767 @property
768 def alternates(self) -> list["BaseObjectStore"]:
769 """Return list of alternate object stores."""
770 return []
771
772 def contains_packed(self, sha: bytes) -> bool:
773 """Check if a particular object is present by SHA1 and is packed.
774
775 This does not check alternates.
776 """
777 for pack in self.packs:
778 try:
779 if sha in pack:
780 return True
781 except PackFileDisappeared:
782 pass
783 return False
784
785 def __contains__(self, sha: bytes) -> bool:
786 """Check if a particular object is present by SHA1.
787
788 This method makes no distinction between loose and packed objects.
789 """
790 if self.contains_packed(sha) or self.contains_loose(sha):
791 return True
792 for alternate in self.alternates:
793 if sha in alternate:
794 return True
795 return False
796
797 def _add_cached_pack(self, base_name: str, pack: Pack) -> None:
798 """Add a newly appeared pack to the cache by path."""
799 prev_pack = self._pack_cache.get(base_name)
800 if prev_pack is not pack:
801 self._pack_cache[base_name] = pack
802 if prev_pack:
803 prev_pack.close()
804
805 def generate_pack_data(
806 self,
807 have: Iterable[bytes],
808 want: Iterable[bytes],
809 shallow: Optional[Set[bytes]] = None,
810 progress: Optional[Callable[..., None]] = None,
811 ofs_delta: bool = True,
812 ) -> tuple[int, Iterator[UnpackedObject]]:
813 """Generate pack data objects for a set of wants/haves.
814
815 Args:
816 have: List of SHA1s of objects that should not be sent
817 want: List of SHA1s of objects that should be sent
818 shallow: Set of shallow commit SHA1s to skip
819 ofs_delta: Whether OFS deltas can be included
820 progress: Optional progress reporting method
821 """
822 missing_objects = MissingObjectFinder(
823 self, haves=have, wants=want, shallow=shallow, progress=progress
824 )
825 remote_has = missing_objects.get_remote_has()
826 object_ids = list(missing_objects)
827 return len(object_ids), generate_unpacked_objects(
828 self,
829 object_ids,
830 progress=progress,
831 ofs_delta=ofs_delta,
832 other_haves=remote_has,
833 )
834
835 def _clear_cached_packs(self) -> None:
836 pack_cache = self._pack_cache
837 self._pack_cache = {}
838 while pack_cache:
839 (_name, pack) = pack_cache.popitem()
840 pack.close()
841
842 def _iter_cached_packs(self) -> Iterator[Pack]:
843 return iter(self._pack_cache.values())
844
845 def _update_pack_cache(self) -> list[Pack]:
846 raise NotImplementedError(self._update_pack_cache)
847
848 def close(self) -> None:
849 """Close the object store and release resources.
850
851 This method closes all cached pack files and frees associated resources.
852 """
853 self._clear_cached_packs()
854
855 @property
856 def packs(self) -> list[Pack]:
857 """List with pack objects."""
858 return list(self._iter_cached_packs()) + list(self._update_pack_cache())
859
860 def count_pack_files(self) -> int:
861 """Count the number of pack files.
862
863 Returns:
864 Number of pack files (excluding those with .keep files)
865 """
866 count = 0
867 for pack in self.packs:
868 # Check if there's a .keep file for this pack
869 keep_path = pack._basename + ".keep"
870 if not os.path.exists(keep_path):
871 count += 1
872 return count
873
874 def _iter_alternate_objects(self) -> Iterator[bytes]:
875 """Iterate over the SHAs of all the objects in alternate stores."""
876 for alternate in self.alternates:
877 yield from alternate
878
879 def _iter_loose_objects(self) -> Iterator[bytes]:
880 """Iterate over the SHAs of all loose objects."""
881 raise NotImplementedError(self._iter_loose_objects)
882
883 def _get_loose_object(self, sha: bytes) -> Optional[ShaFile]:
884 raise NotImplementedError(self._get_loose_object)
885
886 def delete_loose_object(self, sha: bytes) -> None:
887 """Delete a loose object.
888
889 This method only handles loose objects. For packed objects,
890 use repack(exclude=...) to exclude them during repacking.
891 """
892 raise NotImplementedError(self.delete_loose_object)
893
894 def _remove_pack(self, pack: "Pack") -> None:
895 raise NotImplementedError(self._remove_pack)
896
897 def pack_loose_objects(
898 self, progress: Optional[Callable[[str], None]] = None
899 ) -> int:
900 """Pack loose objects.
901
902 Args:
903 progress: Optional progress reporting callback
904
905 Returns: Number of objects packed
906 """
907 objects: list[tuple[ShaFile, None]] = []
908 for sha in self._iter_loose_objects():
909 obj = self._get_loose_object(sha)
910 if obj is not None:
911 objects.append((obj, None))
912 self.add_objects(objects, progress=progress)
913 for obj, path in objects:
914 self.delete_loose_object(obj.id)
915 return len(objects)
916
917 def repack(
918 self,
919 exclude: Optional[Set[bytes]] = None,
920 progress: Optional[Callable[[str], None]] = None,
921 ) -> int:
922 """Repack the packs in this repository.
923
924 Note that this implementation is fairly naive and currently keeps all
925 objects in memory while it repacks.
926
927 Args:
928 exclude: Optional set of object SHAs to exclude from repacking
929 progress: Optional progress reporting callback
930 """
931 if exclude is None:
932 exclude = set()
933
934 loose_objects = set()
935 excluded_loose_objects = set()
936 for sha in self._iter_loose_objects():
937 if sha not in exclude:
938 obj = self._get_loose_object(sha)
939 if obj is not None:
940 loose_objects.add(obj)
941 else:
942 excluded_loose_objects.add(sha)
943
944 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects}
945 old_packs = {p.name(): p for p in self.packs}
946 for name, pack in old_packs.items():
947 objects.update(
948 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude
949 )
950
951 # Only create a new pack if there are objects to pack
952 if objects:
953 # The name of the consolidated pack might match the name of a
954 # pre-existing pack. Take care not to remove the newly created
955 # consolidated pack.
956 consolidated = self.add_objects(list(objects), progress=progress)
957 if consolidated is not None:
958 old_packs.pop(consolidated.name(), None)
959
960 # Delete loose objects that were packed
961 for obj in loose_objects:
962 if obj is not None:
963 self.delete_loose_object(obj.id)
964 # Delete excluded loose objects
965 for sha in excluded_loose_objects:
966 self.delete_loose_object(sha)
967 for name, pack in old_packs.items():
968 self._remove_pack(pack)
969 self._update_pack_cache()
970 return len(objects)
971
972 def __iter__(self) -> Iterator[bytes]:
973 """Iterate over the SHAs that are present in this store."""
974 self._update_pack_cache()
975 for pack in self._iter_cached_packs():
976 try:
977 yield from pack
978 except PackFileDisappeared:
979 pass
980 yield from self._iter_loose_objects()
981 yield from self._iter_alternate_objects()
982
983 def contains_loose(self, sha: bytes) -> bool:
984 """Check if a particular object is present by SHA1 and is loose.
985
986 This does not check alternates.
987 """
988 return self._get_loose_object(sha) is not None
989
990 def get_raw(self, name: bytes) -> tuple[int, bytes]:
991 """Obtain the raw fulltext for an object.
992
993 Args:
994 name: sha for the object.
995 Returns: tuple with numeric type and object contents.
996 """
997 if name == ZERO_SHA:
998 raise KeyError(name)
999 if len(name) == 40:
1000 sha = hex_to_sha(name)
1001 hexsha = name
1002 elif len(name) == 20:
1003 sha = name
1004 hexsha = None
1005 else:
1006 raise AssertionError(f"Invalid object name {name!r}")
1007 for pack in self._iter_cached_packs():
1008 try:
1009 return pack.get_raw(sha)
1010 except (KeyError, PackFileDisappeared):
1011 pass
1012 if hexsha is None:
1013 hexsha = sha_to_hex(name)
1014 ret = self._get_loose_object(hexsha)
1015 if ret is not None:
1016 return ret.type_num, ret.as_raw_string()
1017 # Maybe something else has added a pack with the object
1018 # in the mean time?
1019 for pack in self._update_pack_cache():
1020 try:
1021 return pack.get_raw(sha)
1022 except KeyError:
1023 pass
1024 for alternate in self.alternates:
1025 try:
1026 return alternate.get_raw(hexsha)
1027 except KeyError:
1028 pass
1029 raise KeyError(hexsha)
1030
1031 def iter_unpacked_subset(
1032 self,
1033 shas: Iterable[bytes],
1034 include_comp: bool = False,
1035 allow_missing: bool = False,
1036 convert_ofs_delta: bool = True,
1037 ) -> Iterator[UnpackedObject]:
1038 """Iterate over a subset of objects, yielding UnpackedObject instances.
1039
1040 Args:
1041 shas: Set of object SHAs to retrieve
1042 include_comp: Whether to include compressed data
1043 allow_missing: If True, skip missing objects; if False, raise KeyError
1044 convert_ofs_delta: Whether to convert OFS_DELTA objects
1045
1046 Returns:
1047 Iterator of UnpackedObject instances
1048
1049 Raises:
1050 KeyError: If an object is missing and allow_missing is False
1051 """
1052 todo: set[bytes] = set(shas)
1053 for p in self._iter_cached_packs():
1054 for unpacked in p.iter_unpacked_subset(
1055 todo,
1056 include_comp=include_comp,
1057 allow_missing=True,
1058 convert_ofs_delta=convert_ofs_delta,
1059 ):
1060 yield unpacked
1061 hexsha = sha_to_hex(unpacked.sha())
1062 todo.remove(hexsha)
1063 # Maybe something else has added a pack with the object
1064 # in the mean time?
1065 for p in self._update_pack_cache():
1066 for unpacked in p.iter_unpacked_subset(
1067 todo,
1068 include_comp=include_comp,
1069 allow_missing=True,
1070 convert_ofs_delta=convert_ofs_delta,
1071 ):
1072 yield unpacked
1073 hexsha = sha_to_hex(unpacked.sha())
1074 todo.remove(hexsha)
1075 for alternate in self.alternates:
1076 assert isinstance(alternate, PackBasedObjectStore)
1077 for unpacked in alternate.iter_unpacked_subset(
1078 todo,
1079 include_comp=include_comp,
1080 allow_missing=True,
1081 convert_ofs_delta=convert_ofs_delta,
1082 ):
1083 yield unpacked
1084 hexsha = sha_to_hex(unpacked.sha())
1085 todo.remove(hexsha)
1086
1087 def iterobjects_subset(
1088 self, shas: Iterable[bytes], *, allow_missing: bool = False
1089 ) -> Iterator[ShaFile]:
1090 """Iterate over a subset of objects in the store.
1091
1092 This method searches for objects in pack files, alternates, and loose storage.
1093
1094 Args:
1095 shas: Iterable of object SHAs to retrieve
1096 allow_missing: If True, skip missing objects; if False, raise KeyError
1097
1098 Returns:
1099 Iterator of ShaFile objects
1100
1101 Raises:
1102 KeyError: If an object is missing and allow_missing is False
1103 """
1104 todo: set[bytes] = set(shas)
1105 for p in self._iter_cached_packs():
1106 for o in p.iterobjects_subset(todo, allow_missing=True):
1107 yield o
1108 todo.remove(o.id)
1109 # Maybe something else has added a pack with the object
1110 # in the mean time?
1111 for p in self._update_pack_cache():
1112 for o in p.iterobjects_subset(todo, allow_missing=True):
1113 yield o
1114 todo.remove(o.id)
1115 for alternate in self.alternates:
1116 for o in alternate.iterobjects_subset(todo, allow_missing=True):
1117 yield o
1118 todo.remove(o.id)
1119 for oid in todo:
1120 loose_obj: Optional[ShaFile] = self._get_loose_object(oid)
1121 if loose_obj is not None:
1122 yield loose_obj
1123 elif not allow_missing:
1124 raise KeyError(oid)
1125
1126 def get_unpacked_object(
1127 self, sha1: bytes, *, include_comp: bool = False
1128 ) -> UnpackedObject:
1129 """Obtain the unpacked object.
1130
1131 Args:
1132 sha1: sha for the object.
1133 include_comp: Whether to include compression metadata.
1134 """
1135 if sha1 == ZERO_SHA:
1136 raise KeyError(sha1)
1137 if len(sha1) == 40:
1138 sha = hex_to_sha(sha1)
1139 hexsha = sha1
1140 elif len(sha1) == 20:
1141 sha = sha1
1142 hexsha = None
1143 else:
1144 raise AssertionError(f"Invalid object sha1 {sha1!r}")
1145 for pack in self._iter_cached_packs():
1146 try:
1147 return pack.get_unpacked_object(sha, include_comp=include_comp)
1148 except (KeyError, PackFileDisappeared):
1149 pass
1150 if hexsha is None:
1151 hexsha = sha_to_hex(sha1)
1152 # Maybe something else has added a pack with the object
1153 # in the mean time?
1154 for pack in self._update_pack_cache():
1155 try:
1156 return pack.get_unpacked_object(sha, include_comp=include_comp)
1157 except KeyError:
1158 pass
1159 for alternate in self.alternates:
1160 assert isinstance(alternate, PackBasedObjectStore)
1161 try:
1162 return alternate.get_unpacked_object(hexsha, include_comp=include_comp)
1163 except KeyError:
1164 pass
1165 raise KeyError(hexsha)
1166
1167 def add_objects(
1168 self,
1169 objects: Sequence[tuple[ShaFile, Optional[str]]],
1170 progress: Optional[Callable[[str], None]] = None,
1171 ) -> Optional["Pack"]:
1172 """Add a set of objects to this object store.
1173
1174 Args:
1175 objects: Iterable over (object, path) tuples, should support
1176 __len__.
1177 progress: Optional progress reporting function.
1178 Returns: Pack object of the objects written.
1179 """
1180 count = len(objects)
1181 record_iter = (full_unpacked_object(o) for (o, p) in objects)
1182 return self.add_pack_data(count, record_iter, progress=progress)
1183
1184
1185class DiskObjectStore(PackBasedObjectStore):
1186 """Git-style object store that exists on disk."""
1187
1188 path: Union[str, os.PathLike[str]]
1189 pack_dir: Union[str, os.PathLike[str]]
1190 _alternates: Optional[list["BaseObjectStore"]]
1191 _commit_graph: Optional["CommitGraph"]
1192
1193 def __init__(
1194 self,
1195 path: Union[str, os.PathLike[str]],
1196 loose_compression_level: int = -1,
1197 pack_compression_level: int = -1,
1198 pack_index_version: Optional[int] = None,
1199 pack_delta_window_size: Optional[int] = None,
1200 pack_window_memory: Optional[int] = None,
1201 pack_delta_cache_size: Optional[int] = None,
1202 pack_depth: Optional[int] = None,
1203 pack_threads: Optional[int] = None,
1204 pack_big_file_threshold: Optional[int] = None,
1205 ) -> None:
1206 """Open an object store.
1207
1208 Args:
1209 path: Path of the object store.
1210 loose_compression_level: zlib compression level for loose objects
1211 pack_compression_level: zlib compression level for pack objects
1212 pack_index_version: pack index version to use (1, 2, or 3)
1213 pack_delta_window_size: sliding window size for delta compression
1214 pack_window_memory: memory limit for delta window operations
1215 pack_delta_cache_size: size of cache for delta operations
1216 pack_depth: maximum delta chain depth
1217 pack_threads: number of threads for pack operations
1218 pack_big_file_threshold: threshold for treating files as big
1219 """
1220 super().__init__(
1221 pack_compression_level=pack_compression_level,
1222 pack_index_version=pack_index_version,
1223 pack_delta_window_size=pack_delta_window_size,
1224 pack_window_memory=pack_window_memory,
1225 pack_delta_cache_size=pack_delta_cache_size,
1226 pack_depth=pack_depth,
1227 pack_threads=pack_threads,
1228 pack_big_file_threshold=pack_big_file_threshold,
1229 )
1230 self.path = path
1231 self.pack_dir = os.path.join(self.path, PACKDIR)
1232 self._alternates = None
1233 self.loose_compression_level = loose_compression_level
1234 self.pack_compression_level = pack_compression_level
1235 self.pack_index_version = pack_index_version
1236
1237 # Commit graph support - lazy loaded
1238 self._commit_graph = None
1239 self._use_commit_graph = True # Default to true
1240
1241 def __repr__(self) -> str:
1242 """Return string representation of DiskObjectStore.
1243
1244 Returns:
1245 String representation including the store path
1246 """
1247 return f"<{self.__class__.__name__}({self.path!r})>"
1248
1249 @classmethod
1250 def from_config(
1251 cls, path: Union[str, os.PathLike[str]], config: "Config"
1252 ) -> "DiskObjectStore":
1253 """Create a DiskObjectStore from a configuration object.
1254
1255 Args:
1256 path: Path to the object store directory
1257 config: Configuration object to read settings from
1258
1259 Returns:
1260 New DiskObjectStore instance configured according to config
1261 """
1262 try:
1263 default_compression_level = int(
1264 config.get((b"core",), b"compression").decode()
1265 )
1266 except KeyError:
1267 default_compression_level = -1
1268 try:
1269 loose_compression_level = int(
1270 config.get((b"core",), b"looseCompression").decode()
1271 )
1272 except KeyError:
1273 loose_compression_level = default_compression_level
1274 try:
1275 pack_compression_level = int(
1276 config.get((b"core",), "packCompression").decode()
1277 )
1278 except KeyError:
1279 pack_compression_level = default_compression_level
1280 try:
1281 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode())
1282 except KeyError:
1283 pack_index_version = None
1284
1285 # Read pack configuration options
1286 try:
1287 pack_delta_window_size = int(
1288 config.get((b"pack",), b"deltaWindowSize").decode()
1289 )
1290 except KeyError:
1291 pack_delta_window_size = None
1292 try:
1293 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode())
1294 except KeyError:
1295 pack_window_memory = None
1296 try:
1297 pack_delta_cache_size = int(
1298 config.get((b"pack",), b"deltaCacheSize").decode()
1299 )
1300 except KeyError:
1301 pack_delta_cache_size = None
1302 try:
1303 pack_depth = int(config.get((b"pack",), b"depth").decode())
1304 except KeyError:
1305 pack_depth = None
1306 try:
1307 pack_threads = int(config.get((b"pack",), b"threads").decode())
1308 except KeyError:
1309 pack_threads = None
1310 try:
1311 pack_big_file_threshold = int(
1312 config.get((b"pack",), b"bigFileThreshold").decode()
1313 )
1314 except KeyError:
1315 pack_big_file_threshold = None
1316
1317 # Read core.commitGraph setting
1318 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)
1319
1320 instance = cls(
1321 path,
1322 loose_compression_level,
1323 pack_compression_level,
1324 pack_index_version,
1325 pack_delta_window_size,
1326 pack_window_memory,
1327 pack_delta_cache_size,
1328 pack_depth,
1329 pack_threads,
1330 pack_big_file_threshold,
1331 )
1332 instance._use_commit_graph = use_commit_graph
1333 return instance
1334
1335 @property
1336 def alternates(self) -> list["BaseObjectStore"]:
1337 """Get the list of alternate object stores.
1338
1339 Reads from .git/objects/info/alternates if not already cached.
1340
1341 Returns:
1342 List of DiskObjectStore instances for alternate object directories
1343 """
1344 if self._alternates is not None:
1345 return self._alternates
1346 self._alternates = []
1347 for path in self._read_alternate_paths():
1348 self._alternates.append(DiskObjectStore(path))
1349 return self._alternates
1350
1351 def _read_alternate_paths(self) -> Iterator[str]:
1352 try:
1353 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb")
1354 except FileNotFoundError:
1355 return
1356 with f:
1357 for line in f.readlines():
1358 line = line.rstrip(b"\n")
1359 if line.startswith(b"#"):
1360 continue
1361 if os.path.isabs(line):
1362 yield os.fsdecode(line)
1363 else:
1364 yield os.fsdecode(os.path.join(os.fsencode(self.path), line))
1365
1366 def add_alternate_path(self, path: Union[str, os.PathLike[str]]) -> None:
1367 """Add an alternate path to this object store."""
1368 try:
1369 os.mkdir(os.path.join(self.path, INFODIR))
1370 except FileExistsError:
1371 pass
1372 alternates_path = os.path.join(self.path, INFODIR, "alternates")
1373 with GitFile(alternates_path, "wb") as f:
1374 try:
1375 orig_f = open(alternates_path, "rb")
1376 except FileNotFoundError:
1377 pass
1378 else:
1379 with orig_f:
1380 f.write(orig_f.read())
1381 f.write(os.fsencode(path) + b"\n")
1382
1383 if not os.path.isabs(path):
1384 path = os.path.join(self.path, path)
1385 self.alternates.append(DiskObjectStore(path))
1386
1387 def _update_pack_cache(self) -> list[Pack]:
1388 """Read and iterate over new pack files and cache them."""
1389 try:
1390 pack_dir_contents = os.listdir(self.pack_dir)
1391 except FileNotFoundError:
1392 self.close()
1393 return []
1394 pack_files = set()
1395 for name in pack_dir_contents:
1396 if name.startswith("pack-") and name.endswith(".pack"):
1397 # verify that idx exists first (otherwise the pack was not yet
1398 # fully written)
1399 idx_name = os.path.splitext(name)[0] + ".idx"
1400 if idx_name in pack_dir_contents:
1401 pack_name = name[: -len(".pack")]
1402 pack_files.add(pack_name)
1403
1404 # Open newly appeared pack files
1405 new_packs = []
1406 for f in pack_files:
1407 if f not in self._pack_cache:
1408 pack = Pack(
1409 os.path.join(self.pack_dir, f),
1410 delta_window_size=self.pack_delta_window_size,
1411 window_memory=self.pack_window_memory,
1412 delta_cache_size=self.pack_delta_cache_size,
1413 depth=self.pack_depth,
1414 threads=self.pack_threads,
1415 big_file_threshold=self.pack_big_file_threshold,
1416 )
1417 new_packs.append(pack)
1418 self._pack_cache[f] = pack
1419 # Remove disappeared pack files
1420 for f in set(self._pack_cache) - pack_files:
1421 self._pack_cache.pop(f).close()
1422 return new_packs
1423
1424 def _get_shafile_path(self, sha: bytes) -> str:
1425 # Check from object dir
1426 return hex_to_filename(os.fspath(self.path), sha)
1427
1428 def _iter_loose_objects(self) -> Iterator[bytes]:
1429 for base in os.listdir(self.path):
1430 if len(base) != 2:
1431 continue
1432 for rest in os.listdir(os.path.join(self.path, base)):
1433 sha = os.fsencode(base + rest)
1434 if not valid_hexsha(sha):
1435 continue
1436 yield sha
1437
1438 def count_loose_objects(self) -> int:
1439 """Count the number of loose objects in the object store.
1440
1441 Returns:
1442 Number of loose objects
1443 """
1444 count = 0
1445 if not os.path.exists(self.path):
1446 return 0
1447
1448 for i in range(256):
1449 subdir = os.path.join(self.path, f"{i:02x}")
1450 try:
1451 count += len(
1452 [
1453 name
1454 for name in os.listdir(subdir)
1455 if len(name) == 38 # 40 - 2 for the prefix
1456 ]
1457 )
1458 except FileNotFoundError:
1459 # Directory may have been removed or is inaccessible
1460 continue
1461
1462 return count
1463
1464 def _get_loose_object(self, sha: bytes) -> Optional[ShaFile]:
1465 path = self._get_shafile_path(sha)
1466 try:
1467 return ShaFile.from_path(path)
1468 except FileNotFoundError:
1469 return None
1470
1471 def delete_loose_object(self, sha: bytes) -> None:
1472 """Delete a loose object from disk.
1473
1474 Args:
1475 sha: SHA1 of the object to delete
1476
1477 Raises:
1478 FileNotFoundError: If the object file doesn't exist
1479 """
1480 os.remove(self._get_shafile_path(sha))
1481
1482 def get_object_mtime(self, sha: bytes) -> float:
1483 """Get the modification time of an object.
1484
1485 Args:
1486 sha: SHA1 of the object
1487
1488 Returns:
1489 Modification time as seconds since epoch
1490
1491 Raises:
1492 KeyError: if the object is not found
1493 """
1494 # First check if it's a loose object
1495 if self.contains_loose(sha):
1496 path = self._get_shafile_path(sha)
1497 try:
1498 return os.path.getmtime(path)
1499 except FileNotFoundError:
1500 pass
1501
1502 # Check if it's in a pack file
1503 for pack in self.packs:
1504 try:
1505 if sha in pack:
1506 # Use the pack file's mtime for packed objects
1507 pack_path = pack._data_path
1508 try:
1509 return os.path.getmtime(pack_path)
1510 except (FileNotFoundError, AttributeError):
1511 pass
1512 except PackFileDisappeared:
1513 pass
1514
1515 raise KeyError(sha)
1516
1517 def _remove_pack(self, pack: Pack) -> None:
1518 try:
1519 del self._pack_cache[os.path.basename(pack._basename)]
1520 except KeyError:
1521 pass
1522 pack.close()
1523 os.remove(pack.data.path)
1524 if hasattr(pack.index, "path"):
1525 os.remove(pack.index.path)
1526
1527 def _get_pack_basepath(
1528 self, entries: Iterable[tuple[bytes, int, Union[int, None]]]
1529 ) -> str:
1530 suffix_bytes = iter_sha1(entry[0] for entry in entries)
1531 # TODO: Handle self.pack_dir being bytes
1532 suffix = suffix_bytes.decode("ascii")
1533 return os.path.join(self.pack_dir, "pack-" + suffix)
1534
1535 def _complete_pack(
1536 self,
1537 f: BinaryIO,
1538 path: str,
1539 num_objects: int,
1540 indexer: PackIndexer,
1541 progress: Optional[Callable[..., None]] = None,
1542 ) -> Pack:
1543 """Move a specific file containing a pack into the pack directory.
1544
1545 Note: The file should be on the same file system as the
1546 packs directory.
1547
1548 Args:
1549 f: Open file object for the pack.
1550 path: Path to the pack file.
1551 num_objects: Number of objects in the pack.
1552 indexer: A PackIndexer for indexing the pack.
1553 progress: Optional progress reporting function.
1554 """
1555 entries = []
1556 for i, entry in enumerate(indexer):
1557 if progress is not None:
1558 progress(f"generating index: {i}/{num_objects}\r".encode("ascii"))
1559 entries.append(entry)
1560
1561 pack_sha, extra_entries = extend_pack(
1562 f,
1563 set(indexer.ext_refs()),
1564 get_raw=self.get_raw,
1565 compression_level=self.pack_compression_level,
1566 progress=progress,
1567 )
1568 f.flush()
1569 try:
1570 fileno = f.fileno()
1571 except AttributeError:
1572 pass
1573 else:
1574 os.fsync(fileno)
1575 f.close()
1576
1577 entries.extend(extra_entries)
1578
1579 # Move the pack in.
1580 entries.sort()
1581 pack_base_name = self._get_pack_basepath(entries)
1582
1583 for pack in self.packs:
1584 if pack._basename == pack_base_name:
1585 return pack
1586
1587 target_pack_path = pack_base_name + ".pack"
1588 target_index_path = pack_base_name + ".idx"
1589 if sys.platform == "win32":
1590 # Windows might have the target pack file lingering. Attempt
1591 # removal, silently passing if the target does not exist.
1592 with suppress(FileNotFoundError):
1593 os.remove(target_pack_path)
1594 os.rename(path, target_pack_path)
1595
1596 # Write the index.
1597 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file:
1598 write_pack_index(
1599 index_file, entries, pack_sha, version=self.pack_index_version
1600 )
1601
1602 # Add the pack to the store and return it.
1603 final_pack = Pack(
1604 pack_base_name,
1605 delta_window_size=self.pack_delta_window_size,
1606 window_memory=self.pack_window_memory,
1607 delta_cache_size=self.pack_delta_cache_size,
1608 depth=self.pack_depth,
1609 threads=self.pack_threads,
1610 big_file_threshold=self.pack_big_file_threshold,
1611 )
1612 final_pack.check_length_and_checksum()
1613 self._add_cached_pack(pack_base_name, final_pack)
1614 return final_pack
1615
1616 def add_thin_pack(
1617 self,
1618 read_all: Callable[[int], bytes],
1619 read_some: Optional[Callable[[int], bytes]],
1620 progress: Optional[Callable[..., None]] = None,
1621 ) -> "Pack":
1622 """Add a new thin pack to this object store.
1623
1624 Thin packs are packs that contain deltas with parents that exist
1625 outside the pack. They should never be placed in the object store
1626 directly, and always indexed and completed as they are copied.
1627
1628 Args:
1629 read_all: Read function that blocks until the number of
1630 requested bytes are read.
1631 read_some: Read function that returns at least one byte, but may
1632 not return the number of bytes requested.
1633 progress: Optional progress reporting function.
1634 Returns: A Pack object pointing at the now-completed thin pack in the
1635 objects/pack directory.
1636 """
1637 import tempfile
1638
1639 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_")
1640 with os.fdopen(fd, "w+b") as f:
1641 os.chmod(path, PACK_MODE)
1642 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) # type: ignore[arg-type]
1643 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) # type: ignore[arg-type]
1644 copier.verify(progress=progress)
1645 return self._complete_pack(f, path, len(copier), indexer, progress=progress)
1646
1647 def add_pack(
1648 self,
1649 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
1650 """Add a new pack to this object store.
1651
1652 Returns: Fileobject to write to, a commit function to
1653 call when the pack is finished and an abort
1654 function.
1655 """
1656 import tempfile
1657
1658 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
1659 f = os.fdopen(fd, "w+b")
1660 os.chmod(path, PACK_MODE)
1661
1662 def commit() -> Optional["Pack"]:
1663 if f.tell() > 0:
1664 f.seek(0)
1665
1666 with PackData(path, f) as pd:
1667 indexer = PackIndexer.for_pack_data(
1668 pd,
1669 resolve_ext_ref=self.get_raw, # type: ignore[arg-type]
1670 )
1671 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type]
1672 else:
1673 f.close()
1674 os.remove(path)
1675 return None
1676
1677 def abort() -> None:
1678 f.close()
1679 os.remove(path)
1680
1681 return f, commit, abort # type: ignore[return-value]
1682
1683 def add_object(self, obj: ShaFile) -> None:
1684 """Add a single object to this object store.
1685
1686 Args:
1687 obj: Object to add
1688 """
1689 path = self._get_shafile_path(obj.id)
1690 dir = os.path.dirname(path)
1691 try:
1692 os.mkdir(dir)
1693 except FileExistsError:
1694 pass
1695 if os.path.exists(path):
1696 return # Already there, no need to write again
1697 with GitFile(path, "wb", mask=PACK_MODE) as f:
1698 f.write(
1699 obj.as_legacy_object(compression_level=self.loose_compression_level)
1700 )
1701
1702 @classmethod
1703 def init(cls, path: Union[str, os.PathLike[str]]) -> "DiskObjectStore":
1704 """Initialize a new disk object store.
1705
1706 Creates the necessary directory structure for a Git object store.
1707
1708 Args:
1709 path: Path where the object store should be created
1710
1711 Returns:
1712 New DiskObjectStore instance
1713 """
1714 try:
1715 os.mkdir(path)
1716 except FileExistsError:
1717 pass
1718 os.mkdir(os.path.join(path, "info"))
1719 os.mkdir(os.path.join(path, PACKDIR))
1720 return cls(path)
1721
1722 def iter_prefix(self, prefix: bytes) -> Iterator[bytes]:
1723 """Iterate over all object SHAs with the given prefix.
1724
1725 Args:
1726 prefix: Hex prefix to search for (as bytes)
1727
1728 Returns:
1729 Iterator of object SHAs (as bytes) matching the prefix
1730 """
1731 if len(prefix) < 2:
1732 yield from super().iter_prefix(prefix)
1733 return
1734 seen = set()
1735 dir = prefix[:2].decode()
1736 rest = prefix[2:].decode()
1737 try:
1738 for name in os.listdir(os.path.join(self.path, dir)):
1739 if name.startswith(rest):
1740 sha = os.fsencode(dir + name)
1741 if sha not in seen:
1742 seen.add(sha)
1743 yield sha
1744 except FileNotFoundError:
1745 pass
1746
1747 for p in self.packs:
1748 bin_prefix = (
1749 binascii.unhexlify(prefix)
1750 if len(prefix) % 2 == 0
1751 else binascii.unhexlify(prefix[:-1])
1752 )
1753 for sha in p.index.iter_prefix(bin_prefix):
1754 sha = sha_to_hex(sha)
1755 if sha.startswith(prefix) and sha not in seen:
1756 seen.add(sha)
1757 yield sha
1758 for alternate in self.alternates:
1759 for sha in alternate.iter_prefix(prefix):
1760 if sha not in seen:
1761 seen.add(sha)
1762 yield sha
1763
1764 def get_commit_graph(self) -> Optional["CommitGraph"]:
1765 """Get the commit graph for this object store.
1766
1767 Returns:
1768 CommitGraph object if available, None otherwise
1769 """
1770 if not self._use_commit_graph:
1771 return None
1772
1773 if self._commit_graph is None:
1774 from .commit_graph import read_commit_graph
1775
1776 # Look for commit graph in our objects directory
1777 graph_file = os.path.join(self.path, "info", "commit-graph")
1778 if os.path.exists(graph_file):
1779 self._commit_graph = read_commit_graph(graph_file)
1780 return self._commit_graph
1781
1782 def write_commit_graph(
1783 self, refs: Optional[Iterable[bytes]] = None, reachable: bool = True
1784 ) -> None:
1785 """Write a commit graph file for this object store.
1786
1787 Args:
1788 refs: List of refs to include. If None, includes all refs from object store.
1789 reachable: If True, includes all commits reachable from refs.
1790 If False, only includes the direct ref targets.
1791 """
1792 from .commit_graph import get_reachable_commits
1793
1794 if refs is None:
1795 # Get all commit objects from the object store
1796 all_refs = []
1797 # Iterate through all objects to find commits
1798 for sha in self:
1799 try:
1800 obj = self[sha]
1801 if obj.type_name == b"commit":
1802 all_refs.append(sha)
1803 except KeyError:
1804 continue
1805 else:
1806 # Use provided refs
1807 all_refs = list(refs)
1808
1809 if not all_refs:
1810 return # No commits to include
1811
1812 if reachable:
1813 # Get all reachable commits
1814 commit_ids = get_reachable_commits(self, all_refs)
1815 else:
1816 # Just use the direct ref targets - ensure they're hex ObjectIDs
1817 commit_ids = []
1818 for ref in all_refs:
1819 if isinstance(ref, bytes) and len(ref) == 40:
1820 # Already hex ObjectID
1821 commit_ids.append(ref)
1822 elif isinstance(ref, bytes) and len(ref) == 20:
1823 # Binary SHA, convert to hex ObjectID
1824 from .objects import sha_to_hex
1825
1826 commit_ids.append(sha_to_hex(ref))
1827 else:
1828 # Assume it's already correct format
1829 commit_ids.append(ref)
1830
1831 if commit_ids:
1832 # Write commit graph directly to our object store path
1833 # Generate the commit graph
1834 from .commit_graph import generate_commit_graph
1835
1836 graph = generate_commit_graph(self, commit_ids)
1837
1838 if graph.entries:
1839 # Ensure the info directory exists
1840 info_dir = os.path.join(self.path, "info")
1841 os.makedirs(info_dir, exist_ok=True)
1842
1843 # Write using GitFile for atomic operation
1844 graph_path = os.path.join(info_dir, "commit-graph")
1845 with GitFile(graph_path, "wb") as f:
1846 assert isinstance(
1847 f, _GitFile
1848 ) # GitFile in write mode always returns _GitFile
1849 graph.write_to_file(f)
1850
1851 # Clear cached commit graph so it gets reloaded
1852 self._commit_graph = None
1853
1854 def prune(self, grace_period: Optional[int] = None) -> None:
1855 """Prune/clean up this object store.
1856
1857 This removes temporary files that were left behind by interrupted
1858 pack operations. These are files that start with ``tmp_pack_`` in the
1859 repository directory or files with .pack extension but no corresponding
1860 .idx file in the pack directory.
1861
1862 Args:
1863 grace_period: Grace period in seconds for removing temporary files.
1864 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD.
1865 """
1866 import glob
1867
1868 if grace_period is None:
1869 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD
1870
1871 # Clean up tmp_pack_* files in the repository directory
1872 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")):
1873 # Check if file is old enough (more than grace period)
1874 mtime = os.path.getmtime(tmp_file)
1875 if time.time() - mtime > grace_period:
1876 os.remove(tmp_file)
1877
1878 # Clean up orphaned .pack files without corresponding .idx files
1879 try:
1880 pack_dir_contents = os.listdir(self.pack_dir)
1881 except FileNotFoundError:
1882 return
1883
1884 pack_files = {}
1885 idx_files = set()
1886
1887 for name in pack_dir_contents:
1888 if name.endswith(".pack"):
1889 base_name = name[:-5] # Remove .pack extension
1890 pack_files[base_name] = name
1891 elif name.endswith(".idx"):
1892 base_name = name[:-4] # Remove .idx extension
1893 idx_files.add(base_name)
1894
1895 # Remove .pack files without corresponding .idx files
1896 for base_name, pack_name in pack_files.items():
1897 if base_name not in idx_files:
1898 pack_path = os.path.join(self.pack_dir, pack_name)
1899 # Check if file is old enough (more than grace period)
1900 mtime = os.path.getmtime(pack_path)
1901 if time.time() - mtime > grace_period:
1902 os.remove(pack_path)
1903
1904
1905class MemoryObjectStore(PackCapableObjectStore):
1906 """Object store that keeps all objects in memory."""
1907
1908 def __init__(self) -> None:
1909 """Initialize a MemoryObjectStore.
1910
1911 Creates an empty in-memory object store.
1912 """
1913 super().__init__()
1914 self._data: dict[bytes, ShaFile] = {}
1915 self.pack_compression_level = -1
1916
1917 def _to_hexsha(self, sha: bytes) -> bytes:
1918 if len(sha) == 40:
1919 return sha
1920 elif len(sha) == 20:
1921 return sha_to_hex(sha)
1922 else:
1923 raise ValueError(f"Invalid sha {sha!r}")
1924
1925 def contains_loose(self, sha: bytes) -> bool:
1926 """Check if a particular object is present by SHA1 and is loose."""
1927 return self._to_hexsha(sha) in self._data
1928
1929 def contains_packed(self, sha: bytes) -> bool:
1930 """Check if a particular object is present by SHA1 and is packed."""
1931 return False
1932
1933 def __iter__(self) -> Iterator[bytes]:
1934 """Iterate over the SHAs that are present in this store."""
1935 return iter(self._data.keys())
1936
1937 @property
1938 def packs(self) -> list[Pack]:
1939 """List with pack objects."""
1940 return []
1941
1942 def get_raw(self, name: ObjectID) -> tuple[int, bytes]:
1943 """Obtain the raw text for an object.
1944
1945 Args:
1946 name: sha for the object.
1947 Returns: tuple with numeric type and object contents.
1948 """
1949 obj = self[self._to_hexsha(name)]
1950 return obj.type_num, obj.as_raw_string()
1951
1952 def __getitem__(self, name: ObjectID) -> ShaFile:
1953 """Retrieve an object by SHA.
1954
1955 Args:
1956 name: SHA of the object (as hex string or bytes)
1957
1958 Returns:
1959 Copy of the ShaFile object
1960
1961 Raises:
1962 KeyError: If the object is not found
1963 """
1964 return self._data[self._to_hexsha(name)].copy()
1965
1966 def __delitem__(self, name: ObjectID) -> None:
1967 """Delete an object from this store, for testing only."""
1968 del self._data[self._to_hexsha(name)]
1969
1970 def add_object(self, obj: ShaFile) -> None:
1971 """Add a single object to this object store."""
1972 self._data[obj.id] = obj.copy()
1973
1974 def add_objects(
1975 self,
1976 objects: Iterable[tuple[ShaFile, Optional[str]]],
1977 progress: Optional[Callable[[str], None]] = None,
1978 ) -> None:
1979 """Add a set of objects to this object store.
1980
1981 Args:
1982 objects: Iterable over a list of (object, path) tuples
1983 progress: Optional progress reporting function.
1984 """
1985 for obj, path in objects:
1986 self.add_object(obj)
1987
1988 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
1989 """Add a new pack to this object store.
1990
1991 Because this object store doesn't support packs, we extract and add the
1992 individual objects.
1993
1994 Returns: Fileobject to write to and a commit function to
1995 call when the pack is finished.
1996 """
1997 from tempfile import SpooledTemporaryFile
1998
1999 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-")
2000
2001 def commit() -> None:
2002 size = f.tell()
2003 if size > 0:
2004 f.seek(0)
2005
2006 p = PackData.from_file(f, size)
2007 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type]
2008 self.add_object(obj)
2009 p.close()
2010 f.close()
2011 else:
2012 f.close()
2013
2014 def abort() -> None:
2015 f.close()
2016
2017 return f, commit, abort # type: ignore[return-value]
2018
2019 def add_pack_data(
2020 self,
2021 count: int,
2022 unpacked_objects: Iterator[UnpackedObject],
2023 progress: Optional[Callable[[str], None]] = None,
2024 ) -> None:
2025 """Add pack data to this object store.
2026
2027 Args:
2028 count: Number of items to add
2029 unpacked_objects: Iterator of UnpackedObject instances
2030 progress: Optional progress reporting function.
2031 """
2032 if count == 0:
2033 return
2034
2035 # Since MemoryObjectStore doesn't support pack files, we need to
2036 # extract individual objects. To handle deltas properly, we write
2037 # to a temporary pack and then use PackInflater to resolve them.
2038 f, commit, abort = self.add_pack()
2039 try:
2040 write_pack_data(
2041 f.write,
2042 unpacked_objects,
2043 num_records=count,
2044 progress=progress,
2045 )
2046 except BaseException:
2047 abort()
2048 raise
2049 else:
2050 commit()
2051
2052 def add_thin_pack(
2053 self,
2054 read_all: Callable[[], bytes],
2055 read_some: Callable[[int], bytes],
2056 progress: Optional[Callable[[str], None]] = None,
2057 ) -> None:
2058 """Add a new thin pack to this object store.
2059
2060 Thin packs are packs that contain deltas with parents that exist
2061 outside the pack. Because this object store doesn't support packs, we
2062 extract and add the individual objects.
2063
2064 Args:
2065 read_all: Read function that blocks until the number of
2066 requested bytes are read.
2067 read_some: Read function that returns at least one byte, but may
2068 not return the number of bytes requested.
2069 progress: Optional progress reporting function.
2070 """
2071 f, commit, abort = self.add_pack()
2072 try:
2073 copier = PackStreamCopier(read_all, read_some, f) # type: ignore[arg-type]
2074 copier.verify()
2075 except BaseException:
2076 abort()
2077 raise
2078 else:
2079 commit()
2080
2081
2082class ObjectIterator(Protocol):
2083 """Interface for iterating over objects."""
2084
2085 def iterobjects(self) -> Iterator[ShaFile]:
2086 """Iterate over all objects.
2087
2088 Returns:
2089 Iterator of ShaFile objects
2090 """
2091 raise NotImplementedError(self.iterobjects)
2092
2093
2094def tree_lookup_path(
2095 lookup_obj: Callable[[bytes], ShaFile], root_sha: bytes, path: bytes
2096) -> tuple[int, bytes]:
2097 """Look up an object in a Git tree.
2098
2099 Args:
2100 lookup_obj: Callback for retrieving object by SHA1
2101 root_sha: SHA1 of the root tree
2102 path: Path to lookup
2103 Returns: A tuple of (mode, SHA) of the resulting path.
2104 """
2105 tree = lookup_obj(root_sha)
2106 if not isinstance(tree, Tree):
2107 raise NotTreeError(root_sha)
2108 return tree.lookup_path(lookup_obj, path)
2109
2110
2111def _collect_filetree_revs(
2112 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID]
2113) -> None:
2114 """Collect SHA1s of files and directories for specified tree.
2115
2116 Args:
2117 obj_store: Object store to get objects by SHA from
2118 tree_sha: tree reference to walk
2119 kset: set to fill with references to files and directories
2120 """
2121 filetree = obj_store[tree_sha]
2122 assert isinstance(filetree, Tree)
2123 for name, mode, sha in filetree.iteritems():
2124 assert mode is not None
2125 assert sha is not None
2126 if not S_ISGITLINK(mode) and sha not in kset:
2127 kset.add(sha)
2128 if stat.S_ISDIR(mode):
2129 _collect_filetree_revs(obj_store, sha, kset)
2130
2131
2132def _split_commits_and_tags(
2133 obj_store: ObjectContainer, lst: Iterable[bytes], *, ignore_unknown: bool = False
2134) -> tuple[set[bytes], set[bytes], set[bytes]]:
2135 """Split object id list into three lists with commit, tag, and other SHAs.
2136
2137 Commits referenced by tags are included into commits
2138 list as well. Only SHA1s known in this repository will get
2139 through, and unless ignore_unknown argument is True, KeyError
2140 is thrown for SHA1 missing in the repository
2141
2142 Args:
2143 obj_store: Object store to get objects by SHA1 from
2144 lst: Collection of commit and tag SHAs
2145 ignore_unknown: True to skip SHA1 missing in the repository
2146 silently.
2147 Returns: A tuple of (commits, tags, others) SHA1s
2148 """
2149 commits: set[bytes] = set()
2150 tags: set[bytes] = set()
2151 others: set[bytes] = set()
2152 for e in lst:
2153 try:
2154 o = obj_store[e]
2155 except KeyError:
2156 if not ignore_unknown:
2157 raise
2158 else:
2159 if isinstance(o, Commit):
2160 commits.add(e)
2161 elif isinstance(o, Tag):
2162 tags.add(e)
2163 tagged = o.object[1]
2164 c, t, os = _split_commits_and_tags(
2165 obj_store, [tagged], ignore_unknown=ignore_unknown
2166 )
2167 commits |= c
2168 tags |= t
2169 others |= os
2170 else:
2171 others.add(e)
2172 return (commits, tags, others)
2173
2174
2175class MissingObjectFinder:
2176 """Find the objects missing from another object store.
2177
2178 Args:
2179 object_store: Object store containing at least all objects to be
2180 sent
2181 haves: SHA1s of commits not to send (already present in target)
2182 wants: SHA1s of commits to send
2183 progress: Optional function to report progress to.
2184 get_tagged: Function that returns a dict of pointed-to sha -> tag
2185 sha for including tags.
2186 get_parents: Optional function for getting the parents of a commit.
2187 """
2188
2189 def __init__(
2190 self,
2191 object_store: BaseObjectStore,
2192 haves: Iterable[bytes],
2193 wants: Iterable[bytes],
2194 *,
2195 shallow: Optional[Set[bytes]] = None,
2196 progress: Optional[Callable[[bytes], None]] = None,
2197 get_tagged: Optional[Callable[[], dict[bytes, bytes]]] = None,
2198 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents,
2199 ) -> None:
2200 """Initialize a MissingObjectFinder.
2201
2202 Args:
2203 object_store: Object store containing objects
2204 haves: SHA1s of objects already present in target
2205 wants: SHA1s of objects to send
2206 shallow: Set of shallow commit SHA1s
2207 progress: Optional progress reporting callback
2208 get_tagged: Function returning dict of pointed-to sha -> tag sha
2209 get_parents: Function for getting commit parents
2210 """
2211 self.object_store = object_store
2212 if shallow is None:
2213 shallow = set()
2214 self._get_parents = get_parents
2215 # process Commits and Tags differently
2216 # Note, while haves may list commits/tags not available locally,
2217 # and such SHAs would get filtered out by _split_commits_and_tags,
2218 # wants shall list only known SHAs, and otherwise
2219 # _split_commits_and_tags fails with KeyError
2220 have_commits, have_tags, have_others = _split_commits_and_tags(
2221 object_store, haves, ignore_unknown=True
2222 )
2223 want_commits, want_tags, want_others = _split_commits_and_tags(
2224 object_store, wants, ignore_unknown=False
2225 )
2226 # all_ancestors is a set of commits that shall not be sent
2227 # (complete repository up to 'haves')
2228 all_ancestors = _collect_ancestors(
2229 object_store,
2230 have_commits,
2231 shallow=frozenset(shallow),
2232 get_parents=self._get_parents,
2233 )[0]
2234 # all_missing - complete set of commits between haves and wants
2235 # common - commits from all_ancestors we hit into while
2236 # traversing parent hierarchy of wants
2237 missing_commits, common_commits = _collect_ancestors(
2238 object_store,
2239 want_commits,
2240 frozenset(all_ancestors),
2241 shallow=frozenset(shallow),
2242 get_parents=self._get_parents,
2243 )
2244 self.remote_has: set[bytes] = set()
2245 # Now, fill sha_done with commits and revisions of
2246 # files and directories known to be both locally
2247 # and on target. Thus these commits and files
2248 # won't get selected for fetch
2249 for h in common_commits:
2250 self.remote_has.add(h)
2251 cmt = object_store[h]
2252 assert isinstance(cmt, Commit)
2253 _collect_filetree_revs(object_store, cmt.tree, self.remote_has)
2254 # record tags we have as visited, too
2255 for t in have_tags:
2256 self.remote_has.add(t)
2257 self.sha_done = set(self.remote_has)
2258
2259 # in fact, what we 'want' is commits, tags, and others
2260 # we've found missing
2261 self.objects_to_send: set[
2262 tuple[ObjectID, Optional[bytes], Optional[int], bool]
2263 ] = {(w, None, Commit.type_num, False) for w in missing_commits}
2264 missing_tags = want_tags.difference(have_tags)
2265 self.objects_to_send.update(
2266 {(w, None, Tag.type_num, False) for w in missing_tags}
2267 )
2268 missing_others = want_others.difference(have_others)
2269 self.objects_to_send.update({(w, None, None, False) for w in missing_others})
2270
2271 if progress is None:
2272 self.progress: Callable[[bytes], None] = lambda x: None
2273 else:
2274 self.progress = progress
2275 self._tagged = (get_tagged and get_tagged()) or {}
2276
2277 def get_remote_has(self) -> set[bytes]:
2278 """Get the set of SHAs the remote has.
2279
2280 Returns:
2281 Set of SHA1s that the remote side already has
2282 """
2283 return self.remote_has
2284
2285 def add_todo(
2286 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]]
2287 ) -> None:
2288 """Add objects to the todo list.
2289
2290 Args:
2291 entries: Iterable of tuples (sha, name, type_num, is_leaf)
2292 """
2293 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done])
2294
2295 def __next__(self) -> tuple[bytes, Optional[PackHint]]:
2296 """Get the next object to send.
2297
2298 Returns:
2299 Tuple of (sha, pack_hint)
2300
2301 Raises:
2302 StopIteration: When no more objects to send
2303 """
2304 while True:
2305 if not self.objects_to_send:
2306 self.progress(
2307 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii")
2308 )
2309 raise StopIteration
2310 (sha, name, type_num, leaf) = self.objects_to_send.pop()
2311 if sha not in self.sha_done:
2312 break
2313 if not leaf:
2314 o = self.object_store[sha]
2315 if isinstance(o, Commit):
2316 self.add_todo([(o.tree, b"", Tree.type_num, False)])
2317 elif isinstance(o, Tree):
2318 todos = []
2319 for n, m, s in o.iteritems():
2320 assert m is not None
2321 assert n is not None
2322 assert s is not None
2323 if not S_ISGITLINK(m):
2324 todos.append(
2325 (
2326 s,
2327 n,
2328 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num),
2329 not stat.S_ISDIR(m),
2330 )
2331 )
2332 self.add_todo(todos)
2333 elif isinstance(o, Tag):
2334 self.add_todo([(o.object[1], None, o.object[0].type_num, False)])
2335 if sha in self._tagged:
2336 self.add_todo([(self._tagged[sha], None, None, True)])
2337 self.sha_done.add(sha)
2338 if len(self.sha_done) % 1000 == 0:
2339 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii"))
2340 if type_num is None:
2341 pack_hint = None
2342 else:
2343 pack_hint = (type_num, name)
2344 return (sha, pack_hint)
2345
2346 def __iter__(self) -> Iterator[tuple[bytes, Optional[PackHint]]]:
2347 """Return iterator over objects to send.
2348
2349 Returns:
2350 Self (this class implements the iterator protocol)
2351 """
2352 return self
2353
2354
2355class ObjectStoreGraphWalker:
2356 """Graph walker that finds what commits are missing from an object store."""
2357
2358 heads: set[ObjectID]
2359 """Revisions without descendants in the local repo."""
2360
2361 get_parents: Callable[[ObjectID], list[ObjectID]]
2362 """Function to retrieve parents in the local repo."""
2363
2364 shallow: set[ObjectID]
2365
2366 def __init__(
2367 self,
2368 local_heads: Iterable[ObjectID],
2369 get_parents: Callable[[ObjectID], list[ObjectID]],
2370 shallow: Optional[set[ObjectID]] = None,
2371 update_shallow: Optional[
2372 Callable[[Optional[set[ObjectID]], Optional[set[ObjectID]]], None]
2373 ] = None,
2374 ) -> None:
2375 """Create a new instance.
2376
2377 Args:
2378 local_heads: Heads to start search with
2379 get_parents: Function for finding the parents of a SHA1.
2380 shallow: Set of shallow commits.
2381 update_shallow: Function to update shallow commits.
2382 """
2383 self.heads = set(local_heads)
2384 self.get_parents = get_parents
2385 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {}
2386 if shallow is None:
2387 shallow = set()
2388 self.shallow = shallow
2389 self.update_shallow = update_shallow
2390
2391 def nak(self) -> None:
2392 """Nothing in common was found."""
2393
2394 def ack(self, sha: ObjectID) -> None:
2395 """Ack that a revision and its ancestors are present in the source."""
2396 if len(sha) != 40:
2397 raise ValueError(f"unexpected sha {sha!r} received")
2398 ancestors = {sha}
2399
2400 # stop if we run out of heads to remove
2401 while self.heads:
2402 for a in ancestors:
2403 if a in self.heads:
2404 self.heads.remove(a)
2405
2406 # collect all ancestors
2407 new_ancestors = set()
2408 for a in ancestors:
2409 ps = self.parents.get(a)
2410 if ps is not None:
2411 new_ancestors.update(ps)
2412 self.parents[a] = None
2413
2414 # no more ancestors; stop
2415 if not new_ancestors:
2416 break
2417
2418 ancestors = new_ancestors
2419
2420 def next(self) -> Optional[ObjectID]:
2421 """Iterate over ancestors of heads in the target."""
2422 if self.heads:
2423 ret = self.heads.pop()
2424 try:
2425 ps = self.get_parents(ret)
2426 except KeyError:
2427 return None
2428 self.parents[ret] = ps
2429 self.heads.update([p for p in ps if p not in self.parents])
2430 return ret
2431 return None
2432
2433 __next__ = next
2434
2435
2436def commit_tree_changes(
2437 object_store: BaseObjectStore,
2438 tree: Union[ObjectID, Tree],
2439 changes: Sequence[tuple[bytes, Optional[int], Optional[bytes]]],
2440) -> ObjectID:
2441 """Commit a specified set of changes to a tree structure.
2442
2443 This will apply a set of changes on top of an existing tree, storing new
2444 objects in object_store.
2445
2446 changes are a list of tuples with (path, mode, object_sha).
2447 Paths can be both blobs and trees. See the mode and
2448 object sha to None deletes the path.
2449
2450 This method works especially well if there are only a small
2451 number of changes to a big tree. For a large number of changes
2452 to a large tree, use e.g. commit_tree.
2453
2454 Args:
2455 object_store: Object store to store new objects in
2456 and retrieve old ones from.
2457 tree: Original tree root (SHA or Tree object)
2458 changes: changes to apply
2459 Returns: New tree root object
2460 """
2461 # TODO(jelmer): Save up the objects and add them using .add_objects
2462 # rather than with individual calls to .add_object.
2463 # Handle both Tree object and SHA
2464 if isinstance(tree, Tree):
2465 tree_obj: Tree = tree
2466 else:
2467 sha_obj = object_store[tree]
2468 assert isinstance(sha_obj, Tree)
2469 tree_obj = sha_obj
2470 nested_changes: dict[bytes, list[tuple[bytes, Optional[int], Optional[bytes]]]] = {}
2471 for path, new_mode, new_sha in changes:
2472 try:
2473 (dirname, subpath) = path.split(b"/", 1)
2474 except ValueError:
2475 if new_sha is None:
2476 del tree_obj[path]
2477 else:
2478 assert new_mode is not None
2479 tree_obj[path] = (new_mode, new_sha)
2480 else:
2481 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha))
2482 for name, subchanges in nested_changes.items():
2483 try:
2484 orig_subtree_id: Union[bytes, Tree] = tree_obj[name][1]
2485 except KeyError:
2486 # For new directories, pass an empty Tree object
2487 orig_subtree_id = Tree()
2488 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges)
2489 subtree = object_store[subtree_id]
2490 assert isinstance(subtree, Tree)
2491 if len(subtree) == 0:
2492 del tree_obj[name]
2493 else:
2494 tree_obj[name] = (stat.S_IFDIR, subtree.id)
2495 object_store.add_object(tree_obj)
2496 return tree_obj.id
2497
2498
2499class OverlayObjectStore(BaseObjectStore):
2500 """Object store that can overlay multiple object stores."""
2501
2502 def __init__(
2503 self,
2504 bases: list[BaseObjectStore],
2505 add_store: Optional[BaseObjectStore] = None,
2506 ) -> None:
2507 """Initialize an OverlayObjectStore.
2508
2509 Args:
2510 bases: List of base object stores to overlay
2511 add_store: Optional store to write new objects to
2512 """
2513 self.bases = bases
2514 self.add_store = add_store
2515
2516 def add_object(self, object: ShaFile) -> None:
2517 """Add a single object to the store.
2518
2519 Args:
2520 object: Object to add
2521
2522 Raises:
2523 NotImplementedError: If no add_store was provided
2524 """
2525 if self.add_store is None:
2526 raise NotImplementedError(self.add_object)
2527 return self.add_store.add_object(object)
2528
2529 def add_objects(
2530 self,
2531 objects: Sequence[tuple[ShaFile, Optional[str]]],
2532 progress: Optional[Callable[[str], None]] = None,
2533 ) -> Optional[Pack]:
2534 """Add multiple objects to the store.
2535
2536 Args:
2537 objects: Iterator of objects to add
2538 progress: Optional progress reporting callback
2539
2540 Raises:
2541 NotImplementedError: If no add_store was provided
2542 """
2543 if self.add_store is None:
2544 raise NotImplementedError(self.add_object)
2545 return self.add_store.add_objects(objects, progress)
2546
2547 @property
2548 def packs(self) -> list[Pack]:
2549 """Get the list of packs from all overlaid stores.
2550
2551 Returns:
2552 Combined list of packs from all base stores
2553 """
2554 ret = []
2555 for b in self.bases:
2556 ret.extend(b.packs)
2557 return ret
2558
2559 def __iter__(self) -> Iterator[ObjectID]:
2560 """Iterate over all object SHAs in the overlaid stores.
2561
2562 Returns:
2563 Iterator of object SHAs (deduped across stores)
2564 """
2565 done = set()
2566 for b in self.bases:
2567 for o_id in b:
2568 if o_id not in done:
2569 yield o_id
2570 done.add(o_id)
2571
2572 def iterobjects_subset(
2573 self, shas: Iterable[bytes], *, allow_missing: bool = False
2574 ) -> Iterator[ShaFile]:
2575 """Iterate over a subset of objects from the overlaid stores.
2576
2577 Args:
2578 shas: Iterable of object SHAs to retrieve
2579 allow_missing: If True, skip missing objects; if False, raise KeyError
2580
2581 Returns:
2582 Iterator of ShaFile objects
2583
2584 Raises:
2585 KeyError: If an object is missing and allow_missing is False
2586 """
2587 todo = set(shas)
2588 found: set[bytes] = set()
2589
2590 for b in self.bases:
2591 # Create a copy of todo for each base to avoid modifying
2592 # the set while iterating through it
2593 current_todo = todo - found
2594 for o in b.iterobjects_subset(current_todo, allow_missing=True):
2595 yield o
2596 found.add(o.id)
2597
2598 # Check for any remaining objects not found
2599 missing = todo - found
2600 if missing and not allow_missing:
2601 raise KeyError(next(iter(missing)))
2602
2603 def iter_unpacked_subset(
2604 self,
2605 shas: Iterable[bytes],
2606 include_comp: bool = False,
2607 allow_missing: bool = False,
2608 convert_ofs_delta: bool = True,
2609 ) -> Iterator[UnpackedObject]:
2610 """Iterate over unpacked objects from the overlaid stores.
2611
2612 Args:
2613 shas: Iterable of object SHAs to retrieve
2614 include_comp: Whether to include compressed data
2615 allow_missing: If True, skip missing objects; if False, raise KeyError
2616 convert_ofs_delta: Whether to convert OFS_DELTA objects
2617
2618 Returns:
2619 Iterator of unpacked objects
2620
2621 Raises:
2622 KeyError: If an object is missing and allow_missing is False
2623 """
2624 todo = set(shas)
2625 for b in self.bases:
2626 for o in b.iter_unpacked_subset(
2627 todo,
2628 include_comp=include_comp,
2629 allow_missing=True,
2630 convert_ofs_delta=convert_ofs_delta,
2631 ):
2632 yield o
2633 todo.remove(o.sha())
2634 if todo and not allow_missing:
2635 raise KeyError(next(iter(todo)))
2636
2637 def get_raw(self, sha_id: ObjectID) -> tuple[int, bytes]:
2638 """Get the raw object data from the overlaid stores.
2639
2640 Args:
2641 sha_id: SHA of the object
2642
2643 Returns:
2644 Tuple of (type_num, raw_data)
2645
2646 Raises:
2647 KeyError: If object not found in any base store
2648 """
2649 for b in self.bases:
2650 try:
2651 return b.get_raw(sha_id)
2652 except KeyError:
2653 pass
2654 raise KeyError(sha_id)
2655
2656 def contains_packed(self, sha: bytes) -> bool:
2657 """Check if an object is packed in any base store.
2658
2659 Args:
2660 sha: SHA of the object
2661
2662 Returns:
2663 True if object is packed in any base store
2664 """
2665 for b in self.bases:
2666 if b.contains_packed(sha):
2667 return True
2668 return False
2669
2670 def contains_loose(self, sha: bytes) -> bool:
2671 """Check if an object is loose in any base store.
2672
2673 Args:
2674 sha: SHA of the object
2675
2676 Returns:
2677 True if object is loose in any base store
2678 """
2679 for b in self.bases:
2680 if b.contains_loose(sha):
2681 return True
2682 return False
2683
2684
2685def read_packs_file(f: BinaryIO) -> Iterator[str]:
2686 """Yield the packs listed in a packs file."""
2687 for line in f.read().splitlines():
2688 if not line:
2689 continue
2690 (kind, name) = line.split(b" ", 1)
2691 if kind != b"P":
2692 continue
2693 yield os.fsdecode(name)
2694
2695
2696class BucketBasedObjectStore(PackBasedObjectStore):
2697 """Object store implementation that uses a bucket store like S3 as backend."""
2698
2699 def _iter_loose_objects(self) -> Iterator[bytes]:
2700 """Iterate over the SHAs of all loose objects."""
2701 return iter([])
2702
2703 def _get_loose_object(self, sha: bytes) -> None:
2704 return None
2705
2706 def delete_loose_object(self, sha: bytes) -> None:
2707 """Delete a loose object (no-op for bucket stores).
2708
2709 Bucket-based stores don't have loose objects, so this is a no-op.
2710
2711 Args:
2712 sha: SHA of the object to delete
2713 """
2714 # Doesn't exist..
2715
2716 def pack_loose_objects(
2717 self, progress: Optional[Callable[[str], None]] = None
2718 ) -> int:
2719 """Pack loose objects. Returns number of objects packed.
2720
2721 BucketBasedObjectStore doesn't support loose objects, so this is a no-op.
2722
2723 Args:
2724 progress: Optional progress reporting callback (ignored)
2725 """
2726 return 0
2727
2728 def _remove_pack_by_name(self, name: str) -> None:
2729 """Remove a pack by name. Subclasses should implement this."""
2730 raise NotImplementedError(self._remove_pack_by_name)
2731
2732 def _iter_pack_names(self) -> Iterator[str]:
2733 raise NotImplementedError(self._iter_pack_names)
2734
2735 def _get_pack(self, name: str) -> Pack:
2736 raise NotImplementedError(self._get_pack)
2737
2738 def _update_pack_cache(self) -> list[Pack]:
2739 pack_files = set(self._iter_pack_names())
2740
2741 # Open newly appeared pack files
2742 new_packs = []
2743 for f in pack_files:
2744 if f not in self._pack_cache:
2745 pack = self._get_pack(f)
2746 new_packs.append(pack)
2747 self._pack_cache[f] = pack
2748 # Remove disappeared pack files
2749 for f in set(self._pack_cache) - pack_files:
2750 self._pack_cache.pop(f).close()
2751 return new_packs
2752
2753 def _upload_pack(
2754 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO
2755 ) -> None:
2756 raise NotImplementedError
2757
2758 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
2759 """Add a new pack to this object store.
2760
2761 Returns: Fileobject to write to, a commit function to
2762 call when the pack is finished and an abort
2763 function.
2764 """
2765 import tempfile
2766
2767 pf = tempfile.SpooledTemporaryFile(
2768 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
2769 )
2770
2771 def commit() -> Optional[Pack]:
2772 if pf.tell() == 0:
2773 pf.close()
2774 return None
2775
2776 pf.seek(0)
2777
2778 p = PackData(pf.name, pf)
2779 entries = p.sorted_entries()
2780 basename = iter_sha1(entry[0] for entry in entries).decode("ascii")
2781 idxf = tempfile.SpooledTemporaryFile(
2782 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
2783 )
2784 checksum = p.get_stored_checksum()
2785 write_pack_index(idxf, entries, checksum, version=self.pack_index_version)
2786 idxf.seek(0)
2787 idx = load_pack_index_file(basename + ".idx", idxf)
2788 for pack in self.packs:
2789 if pack.get_stored_checksum() == p.get_stored_checksum():
2790 p.close()
2791 idx.close()
2792 pf.close()
2793 idxf.close()
2794 return pack
2795 pf.seek(0)
2796 idxf.seek(0)
2797 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type]
2798 final_pack = Pack.from_objects(p, idx)
2799 self._add_cached_pack(basename, final_pack)
2800 pf.close()
2801 idxf.close()
2802 return final_pack
2803
2804 return pf, commit, pf.close # type: ignore[return-value]
2805
2806
2807def _collect_ancestors(
2808 store: ObjectContainer,
2809 heads: Iterable[ObjectID],
2810 common: frozenset[ObjectID] = frozenset(),
2811 shallow: frozenset[ObjectID] = frozenset(),
2812 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents,
2813) -> tuple[set[ObjectID], set[ObjectID]]:
2814 """Collect all ancestors of heads up to (excluding) those in common.
2815
2816 Args:
2817 store: Object store to get commits from
2818 heads: commits to start from
2819 common: commits to end at, or empty set to walk repository
2820 completely
2821 shallow: Set of shallow commits
2822 get_parents: Optional function for getting the parents of a
2823 commit.
2824 Returns: a tuple (A, B) where A - all commits reachable
2825 from heads but not present in common, B - common (shared) elements
2826 that are directly reachable from heads
2827 """
2828 bases = set()
2829 commits = set()
2830 queue: list[ObjectID] = []
2831 queue.extend(heads)
2832
2833 # Try to use commit graph if available
2834 commit_graph = store.get_commit_graph()
2835
2836 while queue:
2837 e = queue.pop(0)
2838 if e in common:
2839 bases.add(e)
2840 elif e not in commits:
2841 commits.add(e)
2842 if e in shallow:
2843 continue
2844
2845 # Try to use commit graph for parent lookup
2846 parents = None
2847 if commit_graph:
2848 parents = commit_graph.get_parents(e)
2849
2850 if parents is None:
2851 # Fall back to loading the object
2852 cmt = store[e]
2853 assert isinstance(cmt, Commit)
2854 parents = get_parents(cmt)
2855
2856 queue.extend(parents)
2857 return (commits, bases)
2858
2859
2860def iter_tree_contents(
2861 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False
2862) -> Iterator[TreeEntry]:
2863 """Iterate the contents of a tree and all subtrees.
2864
2865 Iteration is depth-first pre-order, as in e.g. os.walk.
2866
2867 Args:
2868 store: Object store to get trees from
2869 tree_id: SHA1 of the tree.
2870 include_trees: If True, include tree objects in the iteration.
2871
2872 Yields: TreeEntry namedtuples for all the objects in a tree.
2873 """
2874 if tree_id is None:
2875 return
2876 # This could be fairly easily generalized to >2 trees if we find a use
2877 # case.
2878 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]
2879 while todo:
2880 entry = todo.pop()
2881 assert entry.mode is not None
2882 if stat.S_ISDIR(entry.mode):
2883 extra = []
2884 assert entry.sha is not None
2885 tree = store[entry.sha]
2886 assert isinstance(tree, Tree)
2887 for subentry in tree.iteritems(name_order=True):
2888 assert entry.path is not None
2889 extra.append(subentry.in_path(entry.path))
2890 todo.extend(reversed(extra))
2891 if not stat.S_ISDIR(entry.mode) or include_trees:
2892 yield entry
2893
2894
2895def iter_commit_contents(
2896 store: ObjectContainer,
2897 commit: Union[Commit, bytes],
2898 *,
2899 include: Optional[Sequence[Union[str, bytes, Path]]] = None,
2900) -> Iterator[TreeEntry]:
2901 """Iterate the contents of the repository at the specified commit.
2902
2903 This is a wrapper around iter_tree_contents() and
2904 tree_lookup_path() to simplify the common task of getting the
2905 contest of a repo at a particular commit. See also
2906 dulwich.index.build_file_from_blob() for writing individual files
2907 to disk.
2908
2909 Args:
2910 store: Object store to get trees from
2911 commit: Commit object, or SHA1 of a commit
2912 include: if provided, only the entries whose paths are in the
2913 list, or whose parent tree is in the list, will be
2914 included. Note that duplicate or overlapping paths
2915 (e.g. ["foo", "foo/bar"]) may result in duplicate entries
2916
2917 Yields: TreeEntry namedtuples for all matching files in a commit.
2918 """
2919 sha = commit.id if isinstance(commit, Commit) else commit
2920 if not isinstance(obj := store[sha], Commit):
2921 raise TypeError(
2922 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}"
2923 )
2924 commit = obj
2925 encoding = commit.encoding or "utf-8"
2926 include_bytes: list[bytes] = (
2927 [
2928 path if isinstance(path, bytes) else str(path).encode(encoding)
2929 for path in include
2930 ]
2931 if include is not None
2932 else [b""]
2933 )
2934
2935 for path in include_bytes:
2936 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path)
2937 # Iterate all contained files if path points to a dir, otherwise just get that
2938 # single file
2939 if isinstance(store[obj_id], Tree):
2940 for entry in iter_tree_contents(store, obj_id):
2941 yield entry.in_path(path)
2942 else:
2943 yield TreeEntry(path, mode, obj_id)
2944
2945
2946def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]:
2947 """Peel all tags from a SHA.
2948
2949 Args:
2950 store: Object store to get objects from
2951 sha: The object SHA to peel.
2952 Returns: The fully-peeled SHA1 of a tag object, after peeling all
2953 intermediate tags; if the original ref does not point to a tag,
2954 this will equal the original SHA1.
2955 """
2956 unpeeled = obj = store[sha]
2957 obj_class = object_class(obj.type_name)
2958 while obj_class is Tag:
2959 assert isinstance(obj, Tag)
2960 obj_class, sha = obj.object
2961 obj = store[sha]
2962 return unpeeled, obj