Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 20%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# object_store.py -- Object store for git objects
2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
3# and others
4#
5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
7# General Public License as published by the Free Software Foundation; version 2.0
8# or (at your option) any later version. You can redistribute it and/or
9# modify it under the terms of either of these two licenses.
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# You should have received a copy of the licenses; if not, see
18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
20# License, Version 2.0.
21#
24"""Git object store interfaces and implementation."""
26__all__ = [
27 "DEFAULT_TEMPFILE_GRACE_PERIOD",
28 "INFODIR",
29 "PACKDIR",
30 "PACK_MODE",
31 "BaseObjectStore",
32 "BitmapReachability",
33 "BucketBasedObjectStore",
34 "DiskObjectStore",
35 "GraphTraversalReachability",
36 "GraphWalker",
37 "MemoryObjectStore",
38 "MissingObjectFinder",
39 "ObjectIterator",
40 "ObjectReachabilityProvider",
41 "ObjectStoreGraphWalker",
42 "OverlayObjectStore",
43 "PackBasedObjectStore",
44 "PackCapableObjectStore",
45 "PackContainer",
46 "commit_tree_changes",
47 "find_shallow",
48 "get_depth",
49 "iter_commit_contents",
50 "iter_tree_contents",
51 "peel_sha",
52 "read_packs_file",
53 "tree_lookup_path",
54]
56import binascii
57import os
58import stat
59import sys
60import time
61import warnings
62from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set
63from contextlib import suppress
64from io import BytesIO
65from pathlib import Path
66from typing import (
67 TYPE_CHECKING,
68 BinaryIO,
69 Protocol,
70 cast,
71)
73if TYPE_CHECKING:
74 from .object_format import ObjectFormat
76from .errors import NotTreeError
77from .file import GitFile, _GitFile
78from .midx import MultiPackIndex, load_midx
79from .objects import (
80 S_ISGITLINK,
81 Blob,
82 Commit,
83 ObjectID,
84 RawObjectID,
85 ShaFile,
86 Tag,
87 Tree,
88 TreeEntry,
89 hex_to_filename,
90 hex_to_sha,
91 object_class,
92 sha_to_hex,
93 valid_hexsha,
94)
95from .pack import (
96 PACK_SPOOL_FILE_MAX_SIZE,
97 ObjectContainer,
98 Pack,
99 PackData,
100 PackedObjectContainer,
101 PackFileDisappeared,
102 PackHint,
103 PackIndexer,
104 PackInflater,
105 PackStreamCopier,
106 UnpackedObject,
107 extend_pack,
108 full_unpacked_object,
109 generate_unpacked_objects,
110 iter_sha1,
111 load_pack_index_file,
112 pack_objects_to_data,
113 write_pack_data,
114 write_pack_index,
115)
116from .protocol import DEPTH_INFINITE, PEELED_TAG_SUFFIX
117from .refs import Ref
119if TYPE_CHECKING:
120 from .bitmap import EWAHBitmap
121 from .commit_graph import CommitGraph
122 from .config import Config
123 from .diff_tree import RenameDetector
124 from .pack import Pack
127class GraphWalker(Protocol):
128 """Protocol for graph walker objects."""
130 def __next__(self) -> ObjectID | None:
131 """Return the next object SHA to visit."""
132 ...
134 def ack(self, sha: ObjectID) -> None:
135 """Acknowledge that an object has been received."""
136 ...
138 def nak(self) -> None:
139 """Nothing in common was found."""
140 ...
143class ObjectReachabilityProvider(Protocol):
144 """Protocol for computing object reachability queries.
146 This abstraction allows reachability computations to be backed by either
147 naive graph traversal or optimized bitmap indexes, with a consistent interface.
148 """
150 def get_reachable_commits(
151 self,
152 heads: Iterable[ObjectID],
153 exclude: Iterable[ObjectID] | None = None,
154 shallow: Set[ObjectID] | None = None,
155 ) -> set[ObjectID]:
156 """Get all commits reachable from heads, excluding those in exclude.
158 Args:
159 heads: Starting commit SHAs
160 exclude: Commit SHAs to exclude (and their ancestors)
161 shallow: Set of shallow commit boundaries (traversal stops here)
163 Returns:
164 Set of commit SHAs reachable from heads but not from exclude
165 """
166 ...
168 def get_reachable_objects(
169 self,
170 commits: Iterable[ObjectID],
171 exclude_commits: Iterable[ObjectID] | None = None,
172 ) -> set[ObjectID]:
173 """Get all objects (commits + trees + blobs) reachable from commits.
175 Args:
176 commits: Starting commit SHAs
177 exclude_commits: Commits whose objects should be excluded
179 Returns:
180 Set of all object SHAs (commits, trees, blobs, tags)
181 """
182 ...
184 def get_tree_objects(
185 self,
186 tree_shas: Iterable[ObjectID],
187 ) -> set[ObjectID]:
188 """Get all trees and blobs reachable from the given trees.
190 Args:
191 tree_shas: Starting tree SHAs
193 Returns:
194 Set of tree and blob SHAs
195 """
196 ...
199INFODIR = "info"
200PACKDIR = "pack"
202# use permissions consistent with Git; just readable by everyone
203# TODO: should packs also be non-writable on Windows? if so, that
204# would requite some rather significant adjustments to the test suite
205PACK_MODE = 0o444 if sys.platform != "win32" else 0o644
207# Grace period for cleaning up temporary pack files (in seconds)
208# Matches git's default of 2 weeks
209DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks
212def find_shallow(
213 store: ObjectContainer, heads: Iterable[ObjectID], depth: int
214) -> tuple[set[ObjectID], set[ObjectID]]:
215 """Find shallow commits according to a given depth.
217 Args:
218 store: An ObjectStore for looking up objects.
219 heads: Iterable of head SHAs to start walking from.
220 depth: The depth of ancestors to include. A depth of one includes
221 only the heads themselves.
222 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be
223 considered shallow and unshallow according to the arguments. Note that
224 these sets may overlap if a commit is reachable along multiple paths.
225 """
226 parents: dict[ObjectID, list[ObjectID]] = {}
227 commit_graph = store.get_commit_graph()
229 def get_parents(sha: ObjectID) -> list[ObjectID]:
230 result = parents.get(sha, None)
231 if not result:
232 # Try to use commit graph first if available
233 if commit_graph:
234 graph_parents = commit_graph.get_parents(sha)
235 if graph_parents is not None:
236 result = graph_parents
237 parents[sha] = result
238 return result
239 # Fall back to loading the object
240 commit = store[sha]
241 assert isinstance(commit, Commit)
242 result = commit.parents
243 parents[sha] = result
244 return result
246 todo = [] # stack of (sha, depth)
247 for head_sha in heads:
248 obj = store[head_sha]
249 # Peel tags if necessary
250 while isinstance(obj, Tag):
251 _, sha = obj.object
252 obj = store[sha]
253 if isinstance(obj, Commit):
254 todo.append((obj.id, 1))
256 not_shallow = set()
257 shallow = set()
258 while todo:
259 sha, cur_depth = todo.pop()
260 if cur_depth < depth:
261 not_shallow.add(sha)
262 new_depth = cur_depth + 1
263 todo.extend((p, new_depth) for p in get_parents(sha))
264 else:
265 shallow.add(sha)
267 return shallow, not_shallow
270def get_depth(
271 store: ObjectContainer,
272 head: ObjectID,
273 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents,
274 max_depth: int | None = None,
275) -> int:
276 """Return the current available depth for the given head.
278 For commits with multiple parents, the largest possible depth will be
279 returned.
281 Args:
282 store: Object store to search in
283 head: commit to start from
284 get_parents: optional function for getting the parents of a commit
285 max_depth: maximum depth to search
286 """
287 if head not in store:
288 return 0
289 current_depth = 1
290 queue = [(head, current_depth)]
291 commit_graph = store.get_commit_graph()
293 while queue and (max_depth is None or current_depth < max_depth):
294 e, depth = queue.pop(0)
295 current_depth = max(current_depth, depth)
297 # Try to use commit graph for parent lookup if available
298 parents = None
299 if commit_graph:
300 parents = commit_graph.get_parents(e)
302 if parents is None:
303 # Fall back to loading the object
304 cmt = store[e]
305 if isinstance(cmt, Tag):
306 _cls, sha = cmt.object
307 cmt = store[sha]
308 parents = get_parents(cmt)
310 queue.extend((parent, depth + 1) for parent in parents if parent in store)
311 return current_depth
314class PackContainer(Protocol):
315 """Protocol for containers that can accept pack files."""
317 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:
318 """Add a new pack."""
321class BaseObjectStore:
322 """Object store interface."""
324 def __init__(self, *, object_format: "ObjectFormat | None" = None) -> None:
325 """Initialize object store.
327 Args:
328 object_format: Object format to use (defaults to DEFAULT_OBJECT_FORMAT)
329 """
330 from .object_format import DEFAULT_OBJECT_FORMAT
332 self.object_format = object_format if object_format else DEFAULT_OBJECT_FORMAT
334 def determine_wants_all(
335 self, refs: Mapping[Ref, ObjectID], depth: int | None = None
336 ) -> list[ObjectID]:
337 """Determine which objects are wanted based on refs."""
339 def _want_deepen(sha: ObjectID) -> bool:
340 if not depth:
341 return False
342 if depth == DEPTH_INFINITE:
343 return True
344 return depth > self._get_depth(sha)
346 return [
347 sha
348 for (ref, sha) in refs.items()
349 if (sha not in self or _want_deepen(sha))
350 and not ref.endswith(PEELED_TAG_SUFFIX)
351 ]
353 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:
354 """Check if a particular object is present by SHA1 and is loose."""
355 raise NotImplementedError(self.contains_loose)
357 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:
358 """Check if a particular object is present by SHA1 and is packed."""
359 return False # Default implementation for stores that don't support packing
361 def __contains__(self, sha1: ObjectID | RawObjectID) -> bool:
362 """Check if a particular object is present by SHA1.
364 This method makes no distinction between loose and packed objects.
365 """
366 return self.contains_loose(sha1)
368 @property
369 def packs(self) -> list[Pack]:
370 """Iterable of pack objects."""
371 raise NotImplementedError
373 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:
374 """Obtain the raw text for an object.
376 Args:
377 name: sha for the object.
378 Returns: tuple with numeric type and object contents.
379 """
380 raise NotImplementedError(self.get_raw)
382 def __getitem__(self, sha1: ObjectID | RawObjectID) -> ShaFile:
383 """Obtain an object by SHA1."""
384 type_num, uncomp = self.get_raw(sha1)
385 return ShaFile.from_raw_string(
386 type_num, uncomp, sha=sha1, object_format=self.object_format
387 )
389 def __iter__(self) -> Iterator[ObjectID]:
390 """Iterate over the SHAs that are present in this store."""
391 raise NotImplementedError(self.__iter__)
393 def add_object(self, obj: ShaFile) -> None:
394 """Add a single object to this object store."""
395 raise NotImplementedError(self.add_object)
397 def add_objects(
398 self,
399 objects: Sequence[tuple[ShaFile, str | None]],
400 progress: Callable[..., None] | None = None,
401 ) -> "Pack | None":
402 """Add a set of objects to this object store.
404 Args:
405 objects: Iterable over a list of (object, path) tuples
406 progress: Optional progress callback
407 """
408 raise NotImplementedError(self.add_objects)
410 def get_reachability_provider(
411 self, prefer_bitmap: bool = True
412 ) -> ObjectReachabilityProvider:
413 """Get a reachability provider for this object store.
415 Returns an ObjectReachabilityProvider that can efficiently compute
416 object reachability queries. Subclasses can override this to provide
417 optimized implementations (e.g., using bitmap indexes).
419 Args:
420 prefer_bitmap: Whether to prefer bitmap-based reachability if
421 available.
423 Returns:
424 ObjectReachabilityProvider instance
425 """
426 return GraphTraversalReachability(self)
428 def tree_changes(
429 self,
430 source: ObjectID | None,
431 target: ObjectID | None,
432 want_unchanged: bool = False,
433 include_trees: bool = False,
434 change_type_same: bool = False,
435 rename_detector: "RenameDetector | None" = None,
436 paths: Sequence[bytes] | None = None,
437 ) -> Iterator[
438 tuple[
439 tuple[bytes | None, bytes | None],
440 tuple[int | None, int | None],
441 tuple[ObjectID | None, ObjectID | None],
442 ]
443 ]:
444 """Find the differences between the contents of two trees.
446 Args:
447 source: SHA1 of the source tree
448 target: SHA1 of the target tree
449 want_unchanged: Whether unchanged files should be reported
450 include_trees: Whether to include trees
451 change_type_same: Whether to report files changing
452 type in the same entry.
453 rename_detector: RenameDetector object for detecting renames.
454 paths: Optional list of paths to filter to (as bytes).
455 Returns: Iterator over tuples with
456 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
457 """
458 from .diff_tree import tree_changes
460 for change in tree_changes(
461 self,
462 source,
463 target,
464 want_unchanged=want_unchanged,
465 include_trees=include_trees,
466 change_type_same=change_type_same,
467 rename_detector=rename_detector,
468 paths=paths,
469 ):
470 old_path = change.old.path if change.old is not None else None
471 new_path = change.new.path if change.new is not None else None
472 old_mode = change.old.mode if change.old is not None else None
473 new_mode = change.new.mode if change.new is not None else None
474 old_sha = change.old.sha if change.old is not None else None
475 new_sha = change.new.sha if change.new is not None else None
476 yield (
477 (old_path, new_path),
478 (old_mode, new_mode),
479 (old_sha, new_sha),
480 )
482 def iter_tree_contents(
483 self, tree_id: ObjectID, include_trees: bool = False
484 ) -> Iterator[TreeEntry]:
485 """Iterate the contents of a tree and all subtrees.
487 Iteration is depth-first pre-order, as in e.g. os.walk.
489 Args:
490 tree_id: SHA1 of the tree.
491 include_trees: If True, include tree objects in the iteration.
492 Returns: Iterator over TreeEntry namedtuples for all the objects in a
493 tree.
494 """
495 warnings.warn(
496 "Please use dulwich.object_store.iter_tree_contents",
497 DeprecationWarning,
498 stacklevel=2,
499 )
500 return iter_tree_contents(self, tree_id, include_trees=include_trees)
502 def iterobjects_subset(
503 self, shas: Iterable[ObjectID], *, allow_missing: bool = False
504 ) -> Iterator[ShaFile]:
505 """Iterate over a subset of objects in the store.
507 Args:
508 shas: Iterable of object SHAs to retrieve
509 allow_missing: If True, skip missing objects; if False, raise KeyError
511 Returns:
512 Iterator of ShaFile objects
514 Raises:
515 KeyError: If an object is missing and allow_missing is False
516 """
517 for sha in shas:
518 try:
519 yield self[sha]
520 except KeyError:
521 if not allow_missing:
522 raise
524 def iter_unpacked_subset(
525 self,
526 shas: Iterable[ObjectID | RawObjectID],
527 include_comp: bool = False,
528 allow_missing: bool = False,
529 convert_ofs_delta: bool = True,
530 ) -> "Iterator[UnpackedObject]":
531 """Iterate over unpacked objects for a subset of SHAs.
533 Default implementation that converts ShaFile objects to UnpackedObject.
534 Subclasses may override for more efficient unpacked access.
536 Args:
537 shas: Iterable of object SHAs to retrieve
538 include_comp: Whether to include compressed data (ignored in base
539 implementation)
540 allow_missing: If True, skip missing objects; if False, raise
541 KeyError
542 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in
543 base implementation)
545 Returns:
546 Iterator of UnpackedObject instances
548 Raises:
549 KeyError: If an object is missing and allow_missing is False
550 """
551 from .pack import UnpackedObject
553 for sha in shas:
554 try:
555 obj = self[sha]
556 # Convert ShaFile to UnpackedObject
557 unpacked = UnpackedObject(
558 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id
559 )
560 yield unpacked
561 except KeyError:
562 if not allow_missing:
563 raise
565 def find_missing_objects(
566 self,
567 haves: Iterable[ObjectID],
568 wants: Iterable[ObjectID],
569 shallow: Set[ObjectID] | None = None,
570 progress: Callable[..., None] | None = None,
571 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None,
572 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents,
573 ) -> Iterator[tuple[ObjectID, PackHint | None]]:
574 """Find the missing objects required for a set of revisions.
576 Args:
577 haves: Iterable over SHAs already in common.
578 wants: Iterable over SHAs of objects to fetch.
579 shallow: Set of shallow commit SHA1s to skip
580 progress: Simple progress function that will be called with
581 updated progress strings.
582 get_tagged: Function that returns a dict of pointed-to sha ->
583 tag sha for including tags.
584 get_parents: Optional function for getting the parents of a
585 commit.
586 Returns: Iterator over (sha, path) pairs.
587 """
588 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning)
589 finder = MissingObjectFinder(
590 self,
591 haves=haves,
592 wants=wants,
593 shallow=shallow,
594 progress=progress,
595 get_tagged=get_tagged,
596 get_parents=get_parents,
597 )
598 return iter(finder)
600 def find_common_revisions(self, graphwalker: GraphWalker) -> list[ObjectID]:
601 """Find which revisions this store has in common using graphwalker.
603 Args:
604 graphwalker: A graphwalker object.
605 Returns: List of SHAs that are in common
606 """
607 haves = []
608 sha = next(graphwalker)
609 while sha:
610 if sha in self:
611 haves.append(sha)
612 graphwalker.ack(sha)
613 sha = next(graphwalker)
614 return haves
616 def generate_pack_data(
617 self,
618 have: Iterable[ObjectID],
619 want: Iterable[ObjectID],
620 *,
621 shallow: Set[ObjectID] | None = None,
622 progress: Callable[..., None] | None = None,
623 ofs_delta: bool = True,
624 ) -> tuple[int, Iterator[UnpackedObject]]:
625 """Generate pack data objects for a set of wants/haves.
627 Args:
628 have: List of SHA1s of objects that should not be sent
629 want: List of SHA1s of objects that should be sent
630 shallow: Set of shallow commit SHA1s to skip
631 ofs_delta: Whether OFS deltas can be included
632 progress: Optional progress reporting method
633 """
634 # Note that the pack-specific implementation below is more efficient,
635 # as it reuses deltas
636 missing_objects = MissingObjectFinder(
637 self, haves=have, wants=want, shallow=shallow, progress=progress
638 )
639 object_ids = list(missing_objects)
640 return pack_objects_to_data(
641 [(self[oid], path) for oid, path in object_ids],
642 ofs_delta=ofs_delta,
643 progress=progress,
644 )
646 def peel_sha(self, sha: ObjectID | RawObjectID) -> ObjectID:
647 """Peel all tags from a SHA.
649 Args:
650 sha: The object SHA to peel.
651 Returns: The fully-peeled SHA1 of a tag object, after peeling all
652 intermediate tags; if the original ref does not point to a tag,
653 this will equal the original SHA1.
654 """
655 warnings.warn(
656 "Please use dulwich.object_store.peel_sha()",
657 DeprecationWarning,
658 stacklevel=2,
659 )
660 return peel_sha(self, sha)[1].id
662 def _get_depth(
663 self,
664 head: ObjectID,
665 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents,
666 max_depth: int | None = None,
667 ) -> int:
668 """Return the current available depth for the given head.
670 For commits with multiple parents, the largest possible depth will be
671 returned.
673 Args:
674 head: commit to start from
675 get_parents: optional function for getting the parents of a commit
676 max_depth: maximum depth to search
677 """
678 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth)
680 def close(self) -> None:
681 """Close any files opened by this object store."""
682 # Default implementation is a NO-OP
684 def prune(self, grace_period: int | None = None) -> None:
685 """Prune/clean up this object store.
687 This includes removing orphaned temporary files and other
688 housekeeping tasks. Default implementation is a NO-OP.
690 Args:
691 grace_period: Grace period in seconds for removing temporary files.
692 If None, uses the default grace period.
693 """
694 # Default implementation is a NO-OP
696 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
697 """Iterate over all SHA1s that start with a given prefix.
699 The default implementation is a naive iteration over all objects.
700 However, subclasses may override this method with more efficient
701 implementations.
702 """
703 for sha in self:
704 if sha.startswith(prefix):
705 yield sha
707 def get_commit_graph(self) -> "CommitGraph | None":
708 """Get the commit graph for this object store.
710 Returns:
711 CommitGraph object if available, None otherwise
712 """
713 return None
715 def write_commit_graph(
716 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True
717 ) -> None:
718 """Write a commit graph file for this object store.
720 Args:
721 refs: List of refs to include. If None, includes all refs from object store.
722 reachable: If True, includes all commits reachable from refs.
723 If False, only includes the direct ref targets.
725 Note:
726 Default implementation does nothing. Subclasses should override
727 this method to provide commit graph writing functionality.
728 """
729 raise NotImplementedError(self.write_commit_graph)
731 def get_object_mtime(self, sha: ObjectID) -> float:
732 """Get the modification time of an object.
734 Args:
735 sha: SHA1 of the object
737 Returns:
738 Modification time as seconds since epoch
740 Raises:
741 KeyError: if the object is not found
742 """
743 # Default implementation raises KeyError
744 # Subclasses should override to provide actual mtime
745 raise KeyError(sha)
748class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer):
749 """Object store that supports pack operations.
751 This is a base class for object stores that can handle pack files,
752 including both disk-based and memory-based stores.
753 """
755 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
756 """Add a new pack to this object store.
758 Returns: Tuple of (file, commit_func, abort_func)
759 """
760 raise NotImplementedError(self.add_pack)
762 def add_pack_data(
763 self,
764 count: int,
765 unpacked_objects: Iterator["UnpackedObject"],
766 progress: Callable[..., None] | None = None,
767 ) -> "Pack | None":
768 """Add pack data to this object store.
770 Args:
771 count: Number of objects
772 unpacked_objects: Iterator over unpacked objects
773 progress: Optional progress callback
774 """
775 raise NotImplementedError(self.add_pack_data)
777 def get_unpacked_object(
778 self, sha1: ObjectID | RawObjectID, *, include_comp: bool = False
779 ) -> "UnpackedObject":
780 """Get a raw unresolved object.
782 Args:
783 sha1: SHA-1 hash of the object
784 include_comp: Whether to include compressed data
786 Returns:
787 UnpackedObject instance
788 """
789 from .pack import UnpackedObject
791 obj = self[sha1]
792 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks())
794 def iterobjects_subset(
795 self, shas: Iterable[ObjectID], *, allow_missing: bool = False
796 ) -> Iterator[ShaFile]:
797 """Iterate over a subset of objects.
799 Args:
800 shas: Iterable of object SHAs to retrieve
801 allow_missing: If True, skip missing objects
803 Returns:
804 Iterator of ShaFile objects
805 """
806 for sha in shas:
807 try:
808 yield self[sha]
809 except KeyError:
810 if not allow_missing:
811 raise
814class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
815 """Object store that uses pack files for storage.
817 This class provides a base implementation for object stores that use
818 Git pack files as their primary storage mechanism. It handles caching
819 of open pack files and provides configuration for pack file operations.
820 """
822 def __init__(
823 self,
824 pack_compression_level: int = -1,
825 pack_index_version: int | None = None,
826 pack_delta_window_size: int | None = None,
827 pack_window_memory: int | None = None,
828 pack_delta_cache_size: int | None = None,
829 pack_depth: int | None = None,
830 pack_threads: int | None = None,
831 pack_big_file_threshold: int | None = None,
832 *,
833 object_format: "ObjectFormat | None" = None,
834 ) -> None:
835 """Initialize a PackBasedObjectStore.
837 Args:
838 pack_compression_level: Compression level for pack files (-1 to 9)
839 pack_index_version: Pack index version to use
840 pack_delta_window_size: Window size for delta compression
841 pack_window_memory: Maximum memory to use for delta window
842 pack_delta_cache_size: Cache size for delta operations
843 pack_depth: Maximum depth for pack deltas
844 pack_threads: Number of threads to use for packing
845 pack_big_file_threshold: Threshold for treating files as "big"
846 object_format: Hash algorithm to use
847 """
848 super().__init__(object_format=object_format)
849 self._pack_cache: dict[str, Pack] = {}
850 self.pack_compression_level = pack_compression_level
851 self.pack_index_version = pack_index_version
852 self.pack_delta_window_size = pack_delta_window_size
853 self.pack_window_memory = pack_window_memory
854 self.pack_delta_cache_size = pack_delta_cache_size
855 self.pack_depth = pack_depth
856 self.pack_threads = pack_threads
857 self.pack_big_file_threshold = pack_big_file_threshold
859 def get_reachability_provider(
860 self,
861 prefer_bitmaps: bool = True,
862 ) -> ObjectReachabilityProvider:
863 """Get the best reachability provider for the object store.
865 Args:
866 prefer_bitmaps: Whether to use bitmaps if available
868 Returns:
869 ObjectReachabilityProvider implementation (either bitmap-accelerated
870 or graph traversal)
871 """
872 if prefer_bitmaps:
873 # Check if any packs have bitmaps
874 has_bitmap = False
875 for pack in self.packs:
876 try:
877 # Try to access bitmap property
878 if pack.bitmap is not None:
879 has_bitmap = True
880 break
881 except FileNotFoundError:
882 # Bitmap file doesn't exist for this pack
883 continue
885 if has_bitmap:
886 return BitmapReachability(self)
888 # Fall back to graph traversal
889 return GraphTraversalReachability(self)
891 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
892 """Add a new pack to this object store."""
893 raise NotImplementedError(self.add_pack)
895 def add_pack_data(
896 self,
897 count: int,
898 unpacked_objects: Iterator[UnpackedObject],
899 progress: Callable[..., None] | None = None,
900 ) -> "Pack | None":
901 """Add pack data to this object store.
903 Args:
904 count: Number of items to add
905 unpacked_objects: Iterator of UnpackedObject instances
906 progress: Optional progress callback
907 """
908 if count == 0:
909 # Don't bother writing an empty pack file
910 return None
911 f, commit, abort = self.add_pack()
912 try:
913 write_pack_data(
914 f.write,
915 unpacked_objects,
916 num_records=count,
917 progress=progress,
918 compression_level=self.pack_compression_level,
919 object_format=self.object_format,
920 )
921 except BaseException:
922 abort()
923 raise
924 else:
925 return commit()
927 @property
928 def alternates(self) -> list["BaseObjectStore"]:
929 """Return list of alternate object stores."""
930 return []
932 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:
933 """Check if a particular object is present by SHA1 and is packed.
935 This does not check alternates.
936 """
937 for pack in self.packs:
938 try:
939 if sha in pack:
940 return True
941 except PackFileDisappeared:
942 pass
943 return False
945 def __contains__(self, sha: ObjectID | RawObjectID) -> bool:
946 """Check if a particular object is present by SHA1.
948 This method makes no distinction between loose and packed objects.
949 """
950 if self.contains_packed(sha) or self.contains_loose(sha):
951 return True
952 for alternate in self.alternates:
953 if sha in alternate:
954 return True
955 return False
957 def _add_cached_pack(self, base_name: str, pack: Pack) -> None:
958 """Add a newly appeared pack to the cache by path."""
959 prev_pack = self._pack_cache.get(base_name)
960 if prev_pack is not pack:
961 self._pack_cache[base_name] = pack
962 if prev_pack:
963 prev_pack.close()
965 def generate_pack_data(
966 self,
967 have: Iterable[ObjectID],
968 want: Iterable[ObjectID],
969 *,
970 shallow: Set[ObjectID] | None = None,
971 progress: Callable[..., None] | None = None,
972 ofs_delta: bool = True,
973 ) -> tuple[int, Iterator[UnpackedObject]]:
974 """Generate pack data objects for a set of wants/haves.
976 Args:
977 have: List of SHA1s of objects that should not be sent
978 want: List of SHA1s of objects that should be sent
979 shallow: Set of shallow commit SHA1s to skip
980 ofs_delta: Whether OFS deltas can be included
981 progress: Optional progress reporting method
982 """
983 missing_objects = MissingObjectFinder(
984 self, haves=have, wants=want, shallow=shallow, progress=progress
985 )
986 remote_has = missing_objects.get_remote_has()
987 object_ids = list(missing_objects)
988 return len(object_ids), generate_unpacked_objects(
989 self,
990 object_ids,
991 progress=progress,
992 ofs_delta=ofs_delta,
993 other_haves=remote_has,
994 )
996 def _clear_cached_packs(self) -> None:
997 pack_cache = self._pack_cache
998 self._pack_cache = {}
999 while pack_cache:
1000 (_name, pack) = pack_cache.popitem()
1001 pack.close()
1003 def _iter_cached_packs(self) -> Iterator[Pack]:
1004 return iter(self._pack_cache.values())
1006 def _update_pack_cache(self) -> list[Pack]:
1007 raise NotImplementedError(self._update_pack_cache)
1009 def close(self) -> None:
1010 """Close the object store and release resources.
1012 This method closes all cached pack files and frees associated resources.
1013 Can be called multiple times safely.
1014 """
1015 self._clear_cached_packs()
1017 def __del__(self) -> None:
1018 """Warn if the object store is being deleted with unclosed packs."""
1019 if self._pack_cache:
1020 import warnings
1022 warnings.warn(
1023 f"ObjectStore {self!r} was destroyed with {len(self._pack_cache)} "
1024 "unclosed pack(s). Please call close() explicitly.",
1025 ResourceWarning,
1026 stacklevel=2,
1027 )
1028 self.close()
1030 @property
1031 def packs(self) -> list[Pack]:
1032 """List with pack objects."""
1033 return list(self._iter_cached_packs()) + list(self._update_pack_cache())
1035 def count_pack_files(self) -> int:
1036 """Count the number of pack files.
1038 Returns:
1039 Number of pack files (excluding those with .keep files)
1040 """
1041 count = 0
1042 for pack in self.packs:
1043 # Check if there's a .keep file for this pack
1044 keep_path = pack._basename + ".keep"
1045 if not os.path.exists(keep_path):
1046 count += 1
1047 return count
1049 def _iter_alternate_objects(self) -> Iterator[ObjectID]:
1050 """Iterate over the SHAs of all the objects in alternate stores."""
1051 for alternate in self.alternates:
1052 yield from alternate
1054 def _iter_loose_objects(self) -> Iterator[ObjectID]:
1055 """Iterate over the SHAs of all loose objects."""
1056 raise NotImplementedError(self._iter_loose_objects)
1058 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None:
1059 raise NotImplementedError(self._get_loose_object)
1061 def delete_loose_object(self, sha: ObjectID) -> None:
1062 """Delete a loose object.
1064 This method only handles loose objects. For packed objects,
1065 use repack(exclude=...) to exclude them during repacking.
1066 """
1067 raise NotImplementedError(self.delete_loose_object)
1069 def _remove_pack(self, pack: "Pack") -> None:
1070 raise NotImplementedError(self._remove_pack)
1072 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int:
1073 """Pack loose objects.
1075 Args:
1076 progress: Optional progress reporting callback
1078 Returns: Number of objects packed
1079 """
1080 objects: list[tuple[ShaFile, None]] = []
1081 for sha in self._iter_loose_objects():
1082 obj = self._get_loose_object(sha)
1083 if obj is not None:
1084 objects.append((obj, None))
1085 self.add_objects(objects, progress=progress)
1086 for obj, path in objects:
1087 self.delete_loose_object(obj.id)
1088 return len(objects)
1090 def repack(
1091 self,
1092 exclude: Set[bytes] | None = None,
1093 progress: Callable[[str], None] | None = None,
1094 ) -> int:
1095 """Repack the packs in this repository.
1097 Note that this implementation is fairly naive and currently keeps all
1098 objects in memory while it repacks.
1100 Args:
1101 exclude: Optional set of object SHAs to exclude from repacking
1102 progress: Optional progress reporting callback
1103 """
1104 if exclude is None:
1105 exclude = set()
1107 loose_objects = set()
1108 excluded_loose_objects = set()
1109 for sha in self._iter_loose_objects():
1110 if sha not in exclude:
1111 obj = self._get_loose_object(sha)
1112 if obj is not None:
1113 loose_objects.add(obj)
1114 else:
1115 excluded_loose_objects.add(sha)
1117 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects}
1118 old_packs = {p.name(): p for p in self.packs}
1119 for name, pack in old_packs.items():
1120 objects.update(
1121 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude
1122 )
1124 # Only create a new pack if there are objects to pack
1125 if objects:
1126 # The name of the consolidated pack might match the name of a
1127 # pre-existing pack. Take care not to remove the newly created
1128 # consolidated pack.
1129 consolidated = self.add_objects(list(objects), progress=progress)
1130 if consolidated is not None:
1131 old_packs.pop(consolidated.name(), None)
1133 # Delete loose objects that were packed
1134 for obj in loose_objects:
1135 if obj is not None:
1136 self.delete_loose_object(obj.id)
1137 # Delete excluded loose objects
1138 for sha in excluded_loose_objects:
1139 self.delete_loose_object(sha)
1140 for name, pack in old_packs.items():
1141 self._remove_pack(pack)
1142 self._update_pack_cache()
1143 return len(objects)
1145 def generate_pack_bitmaps(
1146 self,
1147 refs: dict[Ref, ObjectID],
1148 *,
1149 commit_interval: int | None = None,
1150 progress: Callable[[str], None] | None = None,
1151 ) -> int:
1152 """Generate bitmap indexes for all packs that don't have them.
1154 This generates .bitmap files for packfiles, enabling fast reachability
1155 queries. Equivalent to the bitmap generation part of 'git repack -b'.
1157 Args:
1158 refs: Dictionary of ref names to commit SHAs
1159 commit_interval: Include every Nth commit in bitmap index (None for default)
1160 progress: Optional progress reporting callback
1162 Returns:
1163 Number of bitmaps generated
1164 """
1165 count = 0
1166 for pack in self.packs:
1167 pack.ensure_bitmap(
1168 self, refs, commit_interval=commit_interval, progress=progress
1169 )
1170 count += 1
1172 # Update cache to pick up new bitmaps
1173 self._update_pack_cache()
1175 return count
1177 def __iter__(self) -> Iterator[ObjectID]:
1178 """Iterate over the SHAs that are present in this store."""
1179 self._update_pack_cache()
1180 for pack in self._iter_cached_packs():
1181 try:
1182 yield from pack
1183 except PackFileDisappeared:
1184 pass
1185 yield from self._iter_loose_objects()
1186 yield from self._iter_alternate_objects()
1188 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:
1189 """Check if a particular object is present by SHA1 and is loose.
1191 This does not check alternates.
1192 """
1193 return self._get_loose_object(sha) is not None
1195 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:
1196 """Obtain the raw fulltext for an object.
1198 Args:
1199 name: sha for the object.
1200 Returns: tuple with numeric type and object contents.
1201 """
1202 sha: RawObjectID
1203 if len(name) == self.object_format.hex_length:
1204 sha = hex_to_sha(ObjectID(name))
1205 hexsha = name
1206 elif len(name) == self.object_format.oid_length:
1207 sha = RawObjectID(name)
1208 hexsha = None
1209 else:
1210 raise AssertionError(f"Invalid object name {name!r}")
1211 for pack in self._iter_cached_packs():
1212 try:
1213 return pack.get_raw(sha)
1214 except (KeyError, PackFileDisappeared):
1215 pass
1216 if hexsha is None:
1217 hexsha = sha_to_hex(sha)
1218 ret = self._get_loose_object(hexsha)
1219 if ret is not None:
1220 return ret.type_num, ret.as_raw_string()
1221 # Maybe something else has added a pack with the object
1222 # in the mean time?
1223 for pack in self._update_pack_cache():
1224 try:
1225 return pack.get_raw(sha)
1226 except KeyError:
1227 pass
1228 for alternate in self.alternates:
1229 try:
1230 return alternate.get_raw(hexsha)
1231 except KeyError:
1232 pass
1233 raise KeyError(hexsha)
1235 def iter_unpacked_subset(
1236 self,
1237 shas: Iterable[ObjectID | RawObjectID],
1238 include_comp: bool = False,
1239 allow_missing: bool = False,
1240 convert_ofs_delta: bool = True,
1241 ) -> Iterator[UnpackedObject]:
1242 """Iterate over a subset of objects, yielding UnpackedObject instances.
1244 Args:
1245 shas: Set of object SHAs to retrieve
1246 include_comp: Whether to include compressed data
1247 allow_missing: If True, skip missing objects; if False, raise KeyError
1248 convert_ofs_delta: Whether to convert OFS_DELTA objects
1250 Returns:
1251 Iterator of UnpackedObject instances
1253 Raises:
1254 KeyError: If an object is missing and allow_missing is False
1255 """
1256 todo: set[ObjectID | RawObjectID] = set(shas)
1257 for p in self._iter_cached_packs():
1258 for unpacked in p.iter_unpacked_subset(
1259 todo,
1260 include_comp=include_comp,
1261 allow_missing=True,
1262 convert_ofs_delta=convert_ofs_delta,
1263 ):
1264 yield unpacked
1265 hexsha = sha_to_hex(unpacked.sha())
1266 todo.remove(hexsha)
1267 # Maybe something else has added a pack with the object
1268 # in the mean time?
1269 for p in self._update_pack_cache():
1270 for unpacked in p.iter_unpacked_subset(
1271 todo,
1272 include_comp=include_comp,
1273 allow_missing=True,
1274 convert_ofs_delta=convert_ofs_delta,
1275 ):
1276 yield unpacked
1277 hexsha = sha_to_hex(unpacked.sha())
1278 todo.remove(hexsha)
1279 for alternate in self.alternates:
1280 assert isinstance(alternate, PackBasedObjectStore)
1281 for unpacked in alternate.iter_unpacked_subset(
1282 todo,
1283 include_comp=include_comp,
1284 allow_missing=True,
1285 convert_ofs_delta=convert_ofs_delta,
1286 ):
1287 yield unpacked
1288 hexsha = sha_to_hex(unpacked.sha())
1289 todo.remove(hexsha)
1291 def iterobjects_subset(
1292 self, shas: Iterable[ObjectID], *, allow_missing: bool = False
1293 ) -> Iterator[ShaFile]:
1294 """Iterate over a subset of objects in the store.
1296 This method searches for objects in pack files, alternates, and loose storage.
1298 Args:
1299 shas: Iterable of object SHAs to retrieve
1300 allow_missing: If True, skip missing objects; if False, raise KeyError
1302 Returns:
1303 Iterator of ShaFile objects
1305 Raises:
1306 KeyError: If an object is missing and allow_missing is False
1307 """
1308 todo: set[ObjectID] = set(shas)
1309 for p in self._iter_cached_packs():
1310 for o in p.iterobjects_subset(todo, allow_missing=True):
1311 yield o
1312 todo.remove(o.id)
1313 # Maybe something else has added a pack with the object
1314 # in the mean time?
1315 for p in self._update_pack_cache():
1316 for o in p.iterobjects_subset(todo, allow_missing=True):
1317 yield o
1318 todo.remove(o.id)
1319 for alternate in self.alternates:
1320 for o in alternate.iterobjects_subset(todo, allow_missing=True):
1321 yield o
1322 todo.remove(o.id)
1323 for oid in todo:
1324 loose_obj: ShaFile | None = self._get_loose_object(oid)
1325 if loose_obj is not None:
1326 yield loose_obj
1327 elif not allow_missing:
1328 raise KeyError(oid)
1330 def get_unpacked_object(
1331 self, sha1: bytes, *, include_comp: bool = False
1332 ) -> UnpackedObject:
1333 """Obtain the unpacked object.
1335 Args:
1336 sha1: sha for the object.
1337 include_comp: Whether to include compression metadata.
1338 """
1339 if len(sha1) == self.object_format.hex_length:
1340 sha = hex_to_sha(cast(ObjectID, sha1))
1341 hexsha = cast(ObjectID, sha1)
1342 elif len(sha1) == self.object_format.oid_length:
1343 sha = cast(RawObjectID, sha1)
1344 hexsha = None
1345 else:
1346 raise AssertionError(f"Invalid object sha1 {sha1!r}")
1347 for pack in self._iter_cached_packs():
1348 try:
1349 return pack.get_unpacked_object(sha, include_comp=include_comp)
1350 except (KeyError, PackFileDisappeared):
1351 pass
1352 if hexsha is None:
1353 hexsha = sha_to_hex(sha)
1354 # Maybe something else has added a pack with the object
1355 # in the mean time?
1356 for pack in self._update_pack_cache():
1357 try:
1358 return pack.get_unpacked_object(sha, include_comp=include_comp)
1359 except KeyError:
1360 pass
1361 for alternate in self.alternates:
1362 assert isinstance(alternate, PackBasedObjectStore)
1363 try:
1364 return alternate.get_unpacked_object(hexsha, include_comp=include_comp)
1365 except KeyError:
1366 pass
1367 raise KeyError(hexsha)
1369 def add_objects(
1370 self,
1371 objects: Sequence[tuple[ShaFile, str | None]],
1372 progress: Callable[[str], None] | None = None,
1373 ) -> "Pack | None":
1374 """Add a set of objects to this object store.
1376 Args:
1377 objects: Iterable over (object, path) tuples, should support
1378 __len__.
1379 progress: Optional progress reporting function.
1380 Returns: Pack object of the objects written.
1381 """
1382 count = len(objects)
1383 record_iter = (full_unpacked_object(o) for (o, p) in objects)
1384 return self.add_pack_data(count, record_iter, progress=progress)
1387class DiskObjectStore(PackBasedObjectStore):
1388 """Git-style object store that exists on disk."""
1390 path: str | os.PathLike[str]
1391 pack_dir: str | os.PathLike[str]
1392 _alternates: "list[BaseObjectStore] | None"
1393 _commit_graph: "CommitGraph | None"
1395 def __init__(
1396 self,
1397 path: str | os.PathLike[str],
1398 *,
1399 loose_compression_level: int = -1,
1400 pack_compression_level: int = -1,
1401 pack_index_version: int | None = None,
1402 pack_delta_window_size: int | None = None,
1403 pack_window_memory: int | None = None,
1404 pack_delta_cache_size: int | None = None,
1405 pack_depth: int | None = None,
1406 pack_threads: int | None = None,
1407 pack_big_file_threshold: int | None = None,
1408 fsync_object_files: bool = False,
1409 pack_write_bitmaps: bool = False,
1410 pack_write_bitmap_hash_cache: bool = True,
1411 pack_write_bitmap_lookup_table: bool = True,
1412 file_mode: int | None = None,
1413 dir_mode: int | None = None,
1414 object_format: "ObjectFormat | None" = None,
1415 ) -> None:
1416 """Open an object store.
1418 Args:
1419 path: Path of the object store.
1420 loose_compression_level: zlib compression level for loose objects
1421 pack_compression_level: zlib compression level for pack objects
1422 pack_index_version: pack index version to use (1, 2, or 3)
1423 pack_delta_window_size: sliding window size for delta compression
1424 pack_window_memory: memory limit for delta window operations
1425 pack_delta_cache_size: size of cache for delta operations
1426 pack_depth: maximum delta chain depth
1427 pack_threads: number of threads for pack operations
1428 pack_big_file_threshold: threshold for treating files as big
1429 fsync_object_files: whether to fsync object files for durability
1430 pack_write_bitmaps: whether to write bitmap indexes for packs
1431 pack_write_bitmap_hash_cache: whether to include name-hash cache in bitmaps
1432 pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps
1433 file_mode: File permission mask for shared repository
1434 dir_mode: Directory permission mask for shared repository
1435 object_format: Hash algorithm to use (SHA1 or SHA256)
1436 """
1437 # Import here to avoid circular dependency
1438 from .object_format import DEFAULT_OBJECT_FORMAT
1440 super().__init__(
1441 pack_compression_level=pack_compression_level,
1442 pack_index_version=pack_index_version,
1443 pack_delta_window_size=pack_delta_window_size,
1444 pack_window_memory=pack_window_memory,
1445 pack_delta_cache_size=pack_delta_cache_size,
1446 pack_depth=pack_depth,
1447 pack_threads=pack_threads,
1448 pack_big_file_threshold=pack_big_file_threshold,
1449 object_format=object_format if object_format else DEFAULT_OBJECT_FORMAT,
1450 )
1451 self.path = path
1452 self.pack_dir = os.path.join(self.path, PACKDIR)
1453 self._alternates = None
1454 self.loose_compression_level = loose_compression_level
1455 self.pack_compression_level = pack_compression_level
1456 self.pack_index_version = pack_index_version
1457 self.fsync_object_files = fsync_object_files
1458 self.pack_write_bitmaps = pack_write_bitmaps
1459 self.pack_write_bitmap_hash_cache = pack_write_bitmap_hash_cache
1460 self.pack_write_bitmap_lookup_table = pack_write_bitmap_lookup_table
1461 self.file_mode = file_mode
1462 self.dir_mode = dir_mode
1464 # Commit graph support - lazy loaded
1465 self._commit_graph = None
1466 self._use_commit_graph = True # Default to true
1468 # Multi-pack-index support - lazy loaded
1469 self._midx: MultiPackIndex | None = None
1470 self._use_midx = True # Default to true
1472 def __repr__(self) -> str:
1473 """Return string representation of DiskObjectStore.
1475 Returns:
1476 String representation including the store path
1477 """
1478 return f"<{self.__class__.__name__}({self.path!r})>"
1480 @classmethod
1481 def from_config(
1482 cls,
1483 path: str | os.PathLike[str],
1484 config: "Config",
1485 *,
1486 file_mode: int | None = None,
1487 dir_mode: int | None = None,
1488 ) -> "DiskObjectStore":
1489 """Create a DiskObjectStore from a configuration object.
1491 Args:
1492 path: Path to the object store directory
1493 config: Configuration object to read settings from
1494 file_mode: Optional file permission mask for shared repository
1495 dir_mode: Optional directory permission mask for shared repository
1497 Returns:
1498 New DiskObjectStore instance configured according to config
1499 """
1500 try:
1501 default_compression_level = int(
1502 config.get((b"core",), b"compression").decode()
1503 )
1504 except KeyError:
1505 default_compression_level = -1
1506 try:
1507 loose_compression_level = int(
1508 config.get((b"core",), b"looseCompression").decode()
1509 )
1510 except KeyError:
1511 loose_compression_level = default_compression_level
1512 try:
1513 pack_compression_level = int(
1514 config.get((b"core",), "packCompression").decode()
1515 )
1516 except KeyError:
1517 pack_compression_level = default_compression_level
1518 try:
1519 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode())
1520 except KeyError:
1521 pack_index_version = None
1523 # Read pack configuration options
1524 try:
1525 pack_delta_window_size = int(
1526 config.get((b"pack",), b"deltaWindowSize").decode()
1527 )
1528 except KeyError:
1529 pack_delta_window_size = None
1530 try:
1531 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode())
1532 except KeyError:
1533 pack_window_memory = None
1534 try:
1535 pack_delta_cache_size = int(
1536 config.get((b"pack",), b"deltaCacheSize").decode()
1537 )
1538 except KeyError:
1539 pack_delta_cache_size = None
1540 try:
1541 pack_depth = int(config.get((b"pack",), b"depth").decode())
1542 except KeyError:
1543 pack_depth = None
1544 try:
1545 pack_threads = int(config.get((b"pack",), b"threads").decode())
1546 except KeyError:
1547 pack_threads = None
1548 try:
1549 pack_big_file_threshold = int(
1550 config.get((b"pack",), b"bigFileThreshold").decode()
1551 )
1552 except KeyError:
1553 pack_big_file_threshold = None
1555 # Read core.commitGraph setting
1556 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)
1558 # Read core.multiPackIndex setting
1559 use_midx = config.get_boolean((b"core",), b"multiPackIndex", True)
1561 # Read core.fsyncObjectFiles setting
1562 fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False)
1564 # Read bitmap settings
1565 pack_write_bitmaps = config.get_boolean((b"pack",), b"writeBitmaps", False)
1566 pack_write_bitmap_hash_cache = config.get_boolean(
1567 (b"pack",), b"writeBitmapHashCache", True
1568 )
1569 pack_write_bitmap_lookup_table = config.get_boolean(
1570 (b"pack",), b"writeBitmapLookupTable", True
1571 )
1572 # Also check repack.writeBitmaps for backwards compatibility
1573 if not pack_write_bitmaps:
1574 pack_write_bitmaps = config.get_boolean(
1575 (b"repack",), b"writeBitmaps", False
1576 )
1578 # Get hash algorithm from config
1579 from .object_format import get_object_format
1581 object_format = None
1582 try:
1583 try:
1584 version = int(config.get((b"core",), b"repositoryformatversion"))
1585 except KeyError:
1586 version = 0
1587 if version == 1:
1588 try:
1589 object_format_name = config.get((b"extensions",), b"objectformat")
1590 except KeyError:
1591 object_format_name = b"sha1"
1592 object_format = get_object_format(object_format_name.decode("ascii"))
1593 except (KeyError, ValueError):
1594 pass
1596 instance = cls(
1597 path,
1598 loose_compression_level=loose_compression_level,
1599 pack_compression_level=pack_compression_level,
1600 pack_index_version=pack_index_version,
1601 pack_delta_window_size=pack_delta_window_size,
1602 pack_window_memory=pack_window_memory,
1603 pack_delta_cache_size=pack_delta_cache_size,
1604 pack_depth=pack_depth,
1605 pack_threads=pack_threads,
1606 pack_big_file_threshold=pack_big_file_threshold,
1607 fsync_object_files=fsync_object_files,
1608 pack_write_bitmaps=pack_write_bitmaps,
1609 pack_write_bitmap_hash_cache=pack_write_bitmap_hash_cache,
1610 pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table,
1611 file_mode=file_mode,
1612 dir_mode=dir_mode,
1613 object_format=object_format,
1614 )
1615 instance._use_commit_graph = use_commit_graph
1616 instance._use_midx = use_midx
1617 return instance
1619 @property
1620 def alternates(self) -> list["BaseObjectStore"]:
1621 """Get the list of alternate object stores.
1623 Reads from .git/objects/info/alternates if not already cached.
1625 Returns:
1626 List of DiskObjectStore instances for alternate object directories
1627 """
1628 if self._alternates is not None:
1629 return self._alternates
1630 self._alternates = []
1631 for path in self._read_alternate_paths():
1632 self._alternates.append(DiskObjectStore(path))
1633 return self._alternates
1635 def _read_alternate_paths(self) -> Iterator[str]:
1636 try:
1637 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb")
1638 except FileNotFoundError:
1639 return
1640 with f:
1641 for line in f.readlines():
1642 line = line.rstrip(b"\n")
1643 if line.startswith(b"#"):
1644 continue
1645 if os.path.isabs(line):
1646 yield os.fsdecode(line)
1647 else:
1648 yield os.fsdecode(os.path.join(os.fsencode(self.path), line))
1650 def add_alternate_path(self, path: str | os.PathLike[str]) -> None:
1651 """Add an alternate path to this object store."""
1652 info_dir = os.path.join(self.path, INFODIR)
1653 try:
1654 os.mkdir(info_dir)
1655 if self.dir_mode is not None:
1656 os.chmod(info_dir, self.dir_mode)
1657 except FileExistsError:
1658 pass
1659 alternates_path = os.path.join(self.path, INFODIR, "alternates")
1660 mask = self.file_mode if self.file_mode is not None else 0o644
1661 with GitFile(alternates_path, "wb", mask=mask) as f:
1662 try:
1663 orig_f = open(alternates_path, "rb")
1664 except FileNotFoundError:
1665 pass
1666 else:
1667 with orig_f:
1668 f.write(orig_f.read())
1669 f.write(os.fsencode(path) + b"\n")
1671 if not os.path.isabs(path):
1672 path = os.path.join(self.path, path)
1673 self.alternates.append(DiskObjectStore(path))
1675 def _update_pack_cache(self) -> list[Pack]:
1676 """Read and iterate over new pack files and cache them."""
1677 try:
1678 pack_dir_contents = os.listdir(self.pack_dir)
1679 except FileNotFoundError:
1680 return []
1681 pack_files = set()
1682 for name in pack_dir_contents:
1683 if name.startswith("pack-") and name.endswith(".pack"):
1684 # verify that idx exists first (otherwise the pack was not yet
1685 # fully written)
1686 idx_name = os.path.splitext(name)[0] + ".idx"
1687 if idx_name in pack_dir_contents:
1688 # Extract just the hash (remove "pack-" prefix and ".pack" suffix)
1689 pack_hash = name[len("pack-") : -len(".pack")]
1690 pack_files.add(pack_hash)
1692 # Open newly appeared pack files
1693 new_packs = []
1694 for pack_hash in pack_files:
1695 if pack_hash not in self._pack_cache:
1696 pack = Pack(
1697 os.path.join(self.pack_dir, "pack-" + pack_hash),
1698 object_format=self.object_format,
1699 delta_window_size=self.pack_delta_window_size,
1700 window_memory=self.pack_window_memory,
1701 delta_cache_size=self.pack_delta_cache_size,
1702 depth=self.pack_depth,
1703 threads=self.pack_threads,
1704 big_file_threshold=self.pack_big_file_threshold,
1705 )
1706 new_packs.append(pack)
1707 self._pack_cache[pack_hash] = pack
1708 # Remove disappeared pack files
1709 for f in set(self._pack_cache) - pack_files:
1710 self._pack_cache.pop(f).close()
1711 return new_packs
1713 def _get_shafile_path(self, sha: ObjectID | RawObjectID) -> str:
1714 # Check from object dir
1715 return hex_to_filename(os.fspath(self.path), sha)
1717 def _iter_loose_objects(self) -> Iterator[ObjectID]:
1718 for base in os.listdir(self.path):
1719 if len(base) != 2:
1720 continue
1721 for rest in os.listdir(os.path.join(self.path, base)):
1722 sha = os.fsencode(base + rest)
1723 if not valid_hexsha(sha):
1724 continue
1725 yield ObjectID(sha)
1727 def count_loose_objects(self) -> int:
1728 """Count the number of loose objects in the object store.
1730 Returns:
1731 Number of loose objects
1732 """
1733 # Calculate expected filename length for loose
1734 # objects (excluding directory)
1735 fn_length = self.object_format.hex_length - 2
1736 count = 0
1737 if not os.path.exists(self.path):
1738 return 0
1740 for i in range(256):
1741 subdir = os.path.join(self.path, f"{i:02x}")
1742 try:
1743 count += len(
1744 [name for name in os.listdir(subdir) if len(name) == fn_length]
1745 )
1746 except FileNotFoundError:
1747 # Directory may have been removed or is inaccessible
1748 continue
1750 return count
1752 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None:
1753 path = self._get_shafile_path(sha)
1754 try:
1755 # Load the object from path with SHA and hash algorithm from object store
1756 # Convert to hex ObjectID if needed
1757 if len(sha) == self.object_format.oid_length:
1758 hex_sha: ObjectID = sha_to_hex(RawObjectID(sha))
1759 else:
1760 hex_sha = ObjectID(sha)
1761 return ShaFile.from_path(path, hex_sha, object_format=self.object_format)
1762 except FileNotFoundError:
1763 return None
1765 def delete_loose_object(self, sha: ObjectID) -> None:
1766 """Delete a loose object from disk.
1768 Args:
1769 sha: SHA1 of the object to delete
1771 Raises:
1772 FileNotFoundError: If the object file doesn't exist
1773 """
1774 os.remove(self._get_shafile_path(sha))
1776 def get_object_mtime(self, sha: ObjectID) -> float:
1777 """Get the modification time of an object.
1779 Args:
1780 sha: SHA1 of the object
1782 Returns:
1783 Modification time as seconds since epoch
1785 Raises:
1786 KeyError: if the object is not found
1787 """
1788 # First check if it's a loose object
1789 if self.contains_loose(sha):
1790 path = self._get_shafile_path(sha)
1791 try:
1792 return os.path.getmtime(path)
1793 except FileNotFoundError:
1794 pass
1796 # Check if it's in a pack file
1797 for pack in self.packs:
1798 try:
1799 if sha in pack:
1800 # Use the pack file's mtime for packed objects
1801 pack_path = pack._data_path
1802 try:
1803 return os.path.getmtime(pack_path)
1804 except (FileNotFoundError, AttributeError):
1805 pass
1806 except PackFileDisappeared:
1807 pass
1809 raise KeyError(sha)
1811 def _remove_pack(self, pack: Pack) -> None:
1812 try:
1813 del self._pack_cache[os.path.basename(pack._basename)]
1814 except KeyError:
1815 pass
1816 # Store paths before closing to avoid re-opening files on Windows
1817 data_path = pack._data_path
1818 idx_path = pack._idx_path
1819 pack.close()
1820 os.remove(data_path)
1821 if os.path.exists(idx_path):
1822 os.remove(idx_path)
1824 def _get_pack_basepath(
1825 self, entries: Iterable[tuple[bytes, int, int | None]]
1826 ) -> str:
1827 suffix_bytes = iter_sha1(entry[0] for entry in entries)
1828 # TODO: Handle self.pack_dir being bytes
1829 suffix = suffix_bytes.decode("ascii")
1830 return os.path.join(self.pack_dir, "pack-" + suffix)
1832 def _complete_pack(
1833 self,
1834 f: BinaryIO,
1835 path: str,
1836 num_objects: int,
1837 indexer: PackIndexer,
1838 progress: Callable[..., None] | None = None,
1839 refs: dict[Ref, ObjectID] | None = None,
1840 ) -> Pack:
1841 """Move a specific file containing a pack into the pack directory.
1843 Note: The file should be on the same file system as the
1844 packs directory.
1846 Args:
1847 f: Open file object for the pack.
1848 path: Path to the pack file.
1849 num_objects: Number of objects in the pack.
1850 indexer: A PackIndexer for indexing the pack.
1851 progress: Optional progress reporting function.
1852 refs: Optional dictionary of refs for bitmap generation.
1853 """
1854 entries = []
1855 for i, entry in enumerate(indexer):
1856 if progress is not None:
1857 progress(f"generating index: {i}/{num_objects}\r".encode("ascii"))
1858 entries.append(entry)
1860 pack_sha, extra_entries = extend_pack(
1861 f,
1862 set(indexer.ext_refs()),
1863 get_raw=self.get_raw,
1864 compression_level=self.pack_compression_level,
1865 progress=progress,
1866 object_format=self.object_format,
1867 )
1868 f.flush()
1869 if self.fsync_object_files:
1870 try:
1871 fileno = f.fileno()
1872 except AttributeError as e:
1873 raise OSError("fsync requested but file has no fileno()") from e
1874 else:
1875 os.fsync(fileno)
1876 f.close()
1878 entries.extend(extra_entries)
1880 # Move the pack in.
1881 entries.sort()
1882 pack_base_name = self._get_pack_basepath(entries)
1884 for pack in self.packs:
1885 if pack._basename == pack_base_name:
1886 return pack
1888 target_pack_path = pack_base_name + ".pack"
1889 target_index_path = pack_base_name + ".idx"
1890 if sys.platform == "win32":
1891 # Windows might have the target pack file lingering. Attempt
1892 # removal, silently passing if the target does not exist.
1893 with suppress(FileNotFoundError):
1894 os.remove(target_pack_path)
1895 os.rename(path, target_pack_path)
1897 # Write the index.
1898 mask = self.file_mode if self.file_mode is not None else PACK_MODE
1899 with GitFile(
1900 target_index_path,
1901 "wb",
1902 mask=mask,
1903 fsync=self.fsync_object_files,
1904 ) as index_file:
1905 write_pack_index(
1906 index_file, entries, pack_sha, version=self.pack_index_version
1907 )
1909 # Generate bitmap if configured and refs are available
1910 if self.pack_write_bitmaps and refs:
1911 from .bitmap import generate_bitmap, write_bitmap
1912 from .pack import load_pack_index_file
1914 if progress:
1915 progress("Generating bitmap index\r".encode("ascii"))
1917 # Load the index we just wrote
1918 with open(target_index_path, "rb") as idx_file:
1919 pack_index = load_pack_index_file(
1920 os.path.basename(target_index_path),
1921 idx_file,
1922 self.object_format,
1923 )
1925 # Generate the bitmap
1926 bitmap = generate_bitmap(
1927 pack_index=pack_index,
1928 object_store=self,
1929 refs=refs,
1930 pack_checksum=pack_sha,
1931 include_hash_cache=self.pack_write_bitmap_hash_cache,
1932 include_lookup_table=self.pack_write_bitmap_lookup_table,
1933 progress=lambda msg: (
1934 progress(msg.encode("ascii"))
1935 if progress and isinstance(msg, str)
1936 else None
1937 ),
1938 )
1940 # Write the bitmap
1941 target_bitmap_path = pack_base_name + ".bitmap"
1942 write_bitmap(target_bitmap_path, bitmap)
1944 if progress:
1945 progress("Bitmap index written\r".encode("ascii"))
1947 # Add the pack to the store and return it.
1948 final_pack = Pack(
1949 pack_base_name,
1950 object_format=self.object_format,
1951 delta_window_size=self.pack_delta_window_size,
1952 window_memory=self.pack_window_memory,
1953 delta_cache_size=self.pack_delta_cache_size,
1954 depth=self.pack_depth,
1955 threads=self.pack_threads,
1956 big_file_threshold=self.pack_big_file_threshold,
1957 )
1958 final_pack.check_length_and_checksum()
1959 # Extract just the hash from pack_base_name (/path/to/pack-HASH -> HASH)
1960 pack_hash = os.path.basename(pack_base_name)[len("pack-") :]
1961 self._add_cached_pack(pack_hash, final_pack)
1962 return final_pack
1964 def add_thin_pack(
1965 self,
1966 read_all: Callable[[int], bytes],
1967 read_some: Callable[[int], bytes] | None,
1968 progress: Callable[..., None] | None = None,
1969 ) -> "Pack":
1970 """Add a new thin pack to this object store.
1972 Thin packs are packs that contain deltas with parents that exist
1973 outside the pack. They should never be placed in the object store
1974 directly, and always indexed and completed as they are copied.
1976 Args:
1977 read_all: Read function that blocks until the number of
1978 requested bytes are read.
1979 read_some: Read function that returns at least one byte, but may
1980 not return the number of bytes requested.
1981 progress: Optional progress reporting function.
1982 Returns: A Pack object pointing at the now-completed thin pack in the
1983 objects/pack directory.
1984 """
1985 import tempfile
1987 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_")
1988 with os.fdopen(fd, "w+b") as f:
1989 os.chmod(path, PACK_MODE)
1990 indexer = PackIndexer(
1991 f,
1992 self.object_format.hash_func,
1993 resolve_ext_ref=self.get_raw, # type: ignore[arg-type]
1994 )
1995 copier = PackStreamCopier(
1996 self.object_format.hash_func,
1997 read_all,
1998 read_some,
1999 f,
2000 delta_iter=indexer, # type: ignore[arg-type]
2001 )
2002 copier.verify(progress=progress)
2003 return self._complete_pack(f, path, len(copier), indexer, progress=progress)
2005 def add_pack(
2006 self,
2007 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
2008 """Add a new pack to this object store.
2010 Returns: Fileobject to write to, a commit function to
2011 call when the pack is finished and an abort
2012 function.
2013 """
2014 import tempfile
2016 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
2017 f = os.fdopen(fd, "w+b")
2018 mask = self.file_mode if self.file_mode is not None else PACK_MODE
2019 os.chmod(path, mask)
2021 def commit() -> "Pack | None":
2022 if f.tell() > 0:
2023 f.seek(0)
2025 with PackData(path, file=f, object_format=self.object_format) as pd:
2026 indexer = PackIndexer.for_pack_data(
2027 pd,
2028 resolve_ext_ref=self.get_raw, # type: ignore[arg-type]
2029 )
2030 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type]
2031 else:
2032 f.close()
2033 os.remove(path)
2034 return None
2036 def abort() -> None:
2037 f.close()
2038 os.remove(path)
2040 return f, commit, abort # type: ignore[return-value]
2042 def add_object(self, obj: ShaFile) -> None:
2043 """Add a single object to this object store.
2045 Args:
2046 obj: Object to add
2047 """
2048 # Use the correct hash algorithm for the object ID
2049 obj_id = ObjectID(obj.get_id(self.object_format))
2050 path = self._get_shafile_path(obj_id)
2051 dir = os.path.dirname(path)
2052 try:
2053 os.mkdir(dir)
2054 if self.dir_mode is not None:
2055 os.chmod(dir, self.dir_mode)
2056 except FileExistsError:
2057 pass
2058 if os.path.exists(path):
2059 return # Already there, no need to write again
2060 mask = self.file_mode if self.file_mode is not None else PACK_MODE
2061 with GitFile(path, "wb", mask=mask, fsync=self.fsync_object_files) as f:
2062 f.write(
2063 obj.as_legacy_object(compression_level=self.loose_compression_level)
2064 )
2066 @classmethod
2067 def init(
2068 cls,
2069 path: str | os.PathLike[str],
2070 *,
2071 file_mode: int | None = None,
2072 dir_mode: int | None = None,
2073 object_format: "ObjectFormat | None" = None,
2074 ) -> "DiskObjectStore":
2075 """Initialize a new disk object store.
2077 Creates the necessary directory structure for a Git object store.
2079 Args:
2080 path: Path where the object store should be created
2081 file_mode: Optional file permission mask for shared repository
2082 dir_mode: Optional directory permission mask for shared repository
2083 object_format: Hash algorithm to use (SHA1 or SHA256)
2085 Returns:
2086 New DiskObjectStore instance
2087 """
2088 try:
2089 os.mkdir(path)
2090 if dir_mode is not None:
2091 os.chmod(path, dir_mode)
2092 except FileExistsError:
2093 pass
2094 info_path = os.path.join(path, "info")
2095 pack_path = os.path.join(path, PACKDIR)
2096 os.mkdir(info_path)
2097 os.mkdir(pack_path)
2098 if dir_mode is not None:
2099 os.chmod(info_path, dir_mode)
2100 os.chmod(pack_path, dir_mode)
2101 return cls(
2102 path, file_mode=file_mode, dir_mode=dir_mode, object_format=object_format
2103 )
2105 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
2106 """Iterate over all object SHAs with the given prefix.
2108 Args:
2109 prefix: Hex prefix to search for (as bytes)
2111 Returns:
2112 Iterator of object SHAs (as ObjectID) matching the prefix
2113 """
2114 if len(prefix) < 2:
2115 yield from super().iter_prefix(prefix)
2116 return
2117 seen = set()
2118 dir = prefix[:2].decode()
2119 rest = prefix[2:].decode()
2120 try:
2121 for name in os.listdir(os.path.join(self.path, dir)):
2122 if name.startswith(rest):
2123 sha = ObjectID(os.fsencode(dir + name))
2124 if sha not in seen:
2125 seen.add(sha)
2126 yield sha
2127 except FileNotFoundError:
2128 pass
2130 for p in self.packs:
2131 bin_prefix = (
2132 binascii.unhexlify(prefix)
2133 if len(prefix) % 2 == 0
2134 else binascii.unhexlify(prefix[:-1])
2135 )
2136 for bin_sha in p.index.iter_prefix(bin_prefix):
2137 sha = sha_to_hex(bin_sha)
2138 if sha.startswith(prefix) and sha not in seen:
2139 seen.add(sha)
2140 yield sha
2141 for alternate in self.alternates:
2142 for sha in alternate.iter_prefix(prefix):
2143 if sha not in seen:
2144 seen.add(sha)
2145 yield sha
2147 def get_commit_graph(self) -> "CommitGraph | None":
2148 """Get the commit graph for this object store.
2150 Returns:
2151 CommitGraph object if available, None otherwise
2152 """
2153 if not self._use_commit_graph:
2154 return None
2156 if self._commit_graph is None:
2157 from .commit_graph import read_commit_graph
2159 # Look for commit graph in our objects directory
2160 graph_file = os.path.join(self.path, "info", "commit-graph")
2161 if os.path.exists(graph_file):
2162 self._commit_graph = read_commit_graph(graph_file)
2163 return self._commit_graph
2165 def get_midx(self) -> MultiPackIndex | None:
2166 """Get the multi-pack-index for this object store.
2168 Returns:
2169 MultiPackIndex object if available, None otherwise
2171 Raises:
2172 ValueError: If MIDX file is corrupt
2173 OSError: If MIDX file cannot be read
2174 """
2175 if not self._use_midx:
2176 return None
2178 if self._midx is None:
2179 # Look for MIDX in pack directory
2180 midx_file = os.path.join(self.pack_dir, "multi-pack-index")
2181 if os.path.exists(midx_file):
2182 self._midx = load_midx(midx_file)
2183 return self._midx
2185 def _get_pack_by_name(self, pack_name: str) -> Pack:
2186 """Get a pack by its base name.
2188 Args:
2189 pack_name: Base name of the pack (e.g., 'pack-abc123.pack' or 'pack-abc123.idx')
2191 Returns:
2192 Pack object
2194 Raises:
2195 KeyError: If pack doesn't exist
2196 """
2197 # Remove .pack or .idx extension if present
2198 if pack_name.endswith(".pack"):
2199 base_name = pack_name[:-5]
2200 elif pack_name.endswith(".idx"):
2201 base_name = pack_name[:-4]
2202 else:
2203 base_name = pack_name
2205 # Check if already in cache
2206 if base_name in self._pack_cache:
2207 return self._pack_cache[base_name]
2209 # Load the pack
2210 pack_path = os.path.join(self.pack_dir, base_name)
2211 if not os.path.exists(pack_path + ".pack"):
2212 raise KeyError(f"Pack {pack_name} not found")
2214 pack = Pack(
2215 pack_path,
2216 object_format=self.object_format,
2217 delta_window_size=self.pack_delta_window_size,
2218 window_memory=self.pack_window_memory,
2219 delta_cache_size=self.pack_delta_cache_size,
2220 depth=self.pack_depth,
2221 threads=self.pack_threads,
2222 big_file_threshold=self.pack_big_file_threshold,
2223 )
2224 self._pack_cache[base_name] = pack
2225 return pack
2227 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:
2228 """Check if a particular object is present by SHA1 and is packed.
2230 This checks the MIDX first if available, then falls back to checking
2231 individual pack indexes.
2233 Args:
2234 sha: Binary SHA of the object
2236 Returns:
2237 True if the object is in a pack file
2238 """
2239 # Check MIDX first for faster lookup
2240 midx = self.get_midx()
2241 if midx is not None and sha in midx:
2242 return True
2244 # Fall back to checking individual packs
2245 return super().contains_packed(sha)
2247 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:
2248 """Obtain the raw fulltext for an object.
2250 This uses the MIDX if available for faster lookups.
2252 Args:
2253 name: SHA for the object (20 bytes binary or 40 bytes hex)
2255 Returns:
2256 Tuple with numeric type and object contents
2258 Raises:
2259 KeyError: If object not found
2260 """
2261 sha: RawObjectID
2262 if len(name) in (40, 64):
2263 # name is ObjectID (hex), convert to RawObjectID
2264 # Support both SHA1 (40) and SHA256 (64)
2265 sha = hex_to_sha(cast(ObjectID, name))
2266 elif len(name) in (20, 32):
2267 # name is already RawObjectID (binary)
2268 # Support both SHA1 (20) and SHA256 (32)
2269 sha = RawObjectID(name)
2270 else:
2271 raise AssertionError(f"Invalid object name {name!r}")
2273 # Try MIDX first for faster lookup
2274 midx = self.get_midx()
2275 if midx is not None:
2276 result = midx.object_offset(sha)
2277 if result is not None:
2278 pack_name, _offset = result
2279 try:
2280 pack = self._get_pack_by_name(pack_name)
2281 return pack.get_raw(sha)
2282 except (KeyError, PackFileDisappeared):
2283 # Pack disappeared or object not found, fall through to standard lookup
2284 pass
2286 # Fall back to the standard implementation
2287 return super().get_raw(name)
2289 def write_midx(self) -> bytes:
2290 """Write a multi-pack-index file for this object store.
2292 Creates a MIDX file that indexes all pack files in the pack directory.
2294 Returns:
2295 SHA-1 checksum of the written MIDX file
2297 Raises:
2298 OSError: If the pack directory doesn't exist or MIDX can't be written
2299 """
2300 from .midx import write_midx_file
2302 # Get all pack files
2303 packs = self.packs
2304 if not packs:
2305 # No packs to index
2306 return b"\x00" * 20
2308 # Collect entries from all packs
2309 pack_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]] = []
2311 for pack in packs:
2312 # Git stores .idx extension in MIDX, not .pack
2313 pack_name = os.path.basename(pack._basename) + ".idx"
2314 entries = list(pack.index.iterentries())
2315 pack_entries.append((pack_name, entries))
2317 # Write MIDX file
2318 midx_path = os.path.join(self.pack_dir, "multi-pack-index")
2319 return write_midx_file(midx_path, pack_entries)
2321 def write_commit_graph(
2322 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True
2323 ) -> None:
2324 """Write a commit graph file for this object store.
2326 Args:
2327 refs: List of refs to include. If None, includes all refs from object store.
2328 reachable: If True, includes all commits reachable from refs.
2329 If False, only includes the direct ref targets.
2330 """
2331 from .commit_graph import get_reachable_commits
2333 if refs is None:
2334 # Get all commit objects from the object store
2335 all_refs = []
2336 # Iterate through all objects to find commits
2337 for sha in self:
2338 try:
2339 obj = self[sha]
2340 if obj.type_name == b"commit":
2341 all_refs.append(sha)
2342 except KeyError:
2343 continue
2344 else:
2345 # Use provided refs
2346 all_refs = list(refs)
2348 if not all_refs:
2349 return # No commits to include
2351 if reachable:
2352 # Get all reachable commits
2353 commit_ids = get_reachable_commits(self, all_refs)
2354 else:
2355 # Just use the direct ref targets - ensure they're hex ObjectIDs
2356 commit_ids = []
2357 for ref in all_refs:
2358 if isinstance(ref, bytes) and len(ref) == self.object_format.hex_length:
2359 # Already hex ObjectID
2360 commit_ids.append(ref)
2361 elif (
2362 isinstance(ref, bytes) and len(ref) == self.object_format.oid_length
2363 ):
2364 # Binary SHA, convert to hex ObjectID
2365 from .objects import sha_to_hex
2367 commit_ids.append(sha_to_hex(RawObjectID(ref)))
2368 else:
2369 # Assume it's already correct format
2370 commit_ids.append(ref)
2372 if commit_ids:
2373 # Write commit graph directly to our object store path
2374 # Generate the commit graph
2375 from .commit_graph import generate_commit_graph
2377 graph = generate_commit_graph(self, commit_ids)
2379 if graph.entries:
2380 # Ensure the info directory exists
2381 info_dir = os.path.join(self.path, "info")
2382 os.makedirs(info_dir, exist_ok=True)
2383 if self.dir_mode is not None:
2384 os.chmod(info_dir, self.dir_mode)
2386 # Write using GitFile for atomic operation
2387 graph_path = os.path.join(info_dir, "commit-graph")
2388 mask = self.file_mode if self.file_mode is not None else 0o644
2389 with GitFile(graph_path, "wb", mask=mask) as f:
2390 assert isinstance(
2391 f, _GitFile
2392 ) # GitFile in write mode always returns _GitFile
2393 graph.write_to_file(f)
2395 # Clear cached commit graph so it gets reloaded
2396 self._commit_graph = None
2398 def prune(self, grace_period: int | None = None) -> None:
2399 """Prune/clean up this object store.
2401 This removes temporary files that were left behind by interrupted
2402 pack operations. These are files that start with ``tmp_pack_`` in the
2403 repository directory or files with .pack extension but no corresponding
2404 .idx file in the pack directory.
2406 Args:
2407 grace_period: Grace period in seconds for removing temporary files.
2408 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD.
2409 """
2410 import glob
2412 if grace_period is None:
2413 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD
2415 # Clean up tmp_pack_* files in the repository directory
2416 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")):
2417 # Check if file is old enough (more than grace period)
2418 mtime = os.path.getmtime(tmp_file)
2419 if time.time() - mtime > grace_period:
2420 os.remove(tmp_file)
2422 # Clean up orphaned .pack files without corresponding .idx files
2423 try:
2424 pack_dir_contents = os.listdir(self.pack_dir)
2425 except FileNotFoundError:
2426 return
2428 pack_files = {}
2429 idx_files = set()
2431 for name in pack_dir_contents:
2432 if name.endswith(".pack"):
2433 base_name = name[:-5] # Remove .pack extension
2434 pack_files[base_name] = name
2435 elif name.endswith(".idx"):
2436 base_name = name[:-4] # Remove .idx extension
2437 idx_files.add(base_name)
2439 # Remove .pack files without corresponding .idx files
2440 for base_name, pack_name in pack_files.items():
2441 if base_name not in idx_files:
2442 pack_path = os.path.join(self.pack_dir, pack_name)
2443 # Check if file is old enough (more than grace period)
2444 mtime = os.path.getmtime(pack_path)
2445 if time.time() - mtime > grace_period:
2446 os.remove(pack_path)
2448 def close(self) -> None:
2449 """Close the object store and release resources.
2451 This method closes all cached pack files, MIDX, and frees associated resources.
2452 Can be called multiple times safely.
2453 """
2454 # Close MIDX if it's loaded
2455 if self._midx is not None:
2456 self._midx.close()
2457 self._midx = None
2459 # Close alternates
2460 if self._alternates is not None:
2461 for alt in self._alternates:
2462 alt.close()
2463 self._alternates = None
2465 # Call parent class close to handle pack files
2466 super().close()
2469class MemoryObjectStore(PackCapableObjectStore):
2470 """Object store that keeps all objects in memory."""
2472 def __init__(self, *, object_format: "ObjectFormat | None" = None) -> None:
2473 """Initialize a MemoryObjectStore.
2475 Creates an empty in-memory object store.
2477 Args:
2478 object_format: Hash algorithm to use (defaults to SHA1)
2479 """
2480 super().__init__(object_format=object_format)
2481 self._data: dict[ObjectID, ShaFile] = {}
2482 self.pack_compression_level = -1
2484 def _to_hexsha(self, sha: ObjectID | RawObjectID) -> ObjectID:
2485 if len(sha) == self.object_format.hex_length:
2486 return cast(ObjectID, sha)
2487 elif len(sha) == self.object_format.oid_length:
2488 return sha_to_hex(cast(RawObjectID, sha))
2489 else:
2490 raise ValueError(f"Invalid sha {sha!r}")
2492 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:
2493 """Check if a particular object is present by SHA1 and is loose."""
2494 return self._to_hexsha(sha) in self._data
2496 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:
2497 """Check if a particular object is present by SHA1 and is packed."""
2498 return False
2500 def __iter__(self) -> Iterator[ObjectID]:
2501 """Iterate over the SHAs that are present in this store."""
2502 return iter(self._data.keys())
2504 @property
2505 def packs(self) -> list[Pack]:
2506 """List with pack objects."""
2507 return []
2509 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:
2510 """Obtain the raw text for an object.
2512 Args:
2513 name: sha for the object.
2514 Returns: tuple with numeric type and object contents.
2515 """
2516 obj = self[self._to_hexsha(name)]
2517 return obj.type_num, obj.as_raw_string()
2519 def __getitem__(self, name: ObjectID | RawObjectID) -> ShaFile:
2520 """Retrieve an object by SHA.
2522 Args:
2523 name: SHA of the object (as hex string or bytes)
2525 Returns:
2526 Copy of the ShaFile object
2528 Raises:
2529 KeyError: If the object is not found
2530 """
2531 return self._data[self._to_hexsha(name)].copy()
2533 def __delitem__(self, name: ObjectID) -> None:
2534 """Delete an object from this store, for testing only."""
2535 del self._data[self._to_hexsha(name)]
2537 def add_object(self, obj: ShaFile) -> None:
2538 """Add a single object to this object store."""
2539 self._data[obj.id] = obj.copy()
2541 def add_objects(
2542 self,
2543 objects: Iterable[tuple[ShaFile, str | None]],
2544 progress: Callable[[str], None] | None = None,
2545 ) -> None:
2546 """Add a set of objects to this object store.
2548 Args:
2549 objects: Iterable over a list of (object, path) tuples
2550 progress: Optional progress reporting function.
2551 """
2552 for obj, path in objects:
2553 self.add_object(obj)
2555 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
2556 """Add a new pack to this object store.
2558 Because this object store doesn't support packs, we extract and add the
2559 individual objects.
2561 Returns: Fileobject to write to and a commit function to
2562 call when the pack is finished.
2563 """
2564 from tempfile import SpooledTemporaryFile
2566 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-")
2568 def commit() -> None:
2569 size = f.tell()
2570 if size > 0:
2571 f.seek(0)
2573 p = PackData.from_file(f, self.object_format, size)
2574 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type]
2575 self.add_object(obj)
2576 p.close()
2577 f.close()
2578 else:
2579 f.close()
2581 def abort() -> None:
2582 f.close()
2584 return f, commit, abort # type: ignore[return-value]
2586 def add_pack_data(
2587 self,
2588 count: int,
2589 unpacked_objects: Iterator[UnpackedObject],
2590 progress: Callable[[str], None] | None = None,
2591 ) -> None:
2592 """Add pack data to this object store.
2594 Args:
2595 count: Number of items to add
2596 unpacked_objects: Iterator of UnpackedObject instances
2597 progress: Optional progress reporting function.
2598 """
2599 if count == 0:
2600 return
2602 # Since MemoryObjectStore doesn't support pack files, we need to
2603 # extract individual objects. To handle deltas properly, we write
2604 # to a temporary pack and then use PackInflater to resolve them.
2605 f, commit, abort = self.add_pack()
2606 try:
2607 write_pack_data(
2608 f.write,
2609 unpacked_objects,
2610 num_records=count,
2611 progress=progress,
2612 object_format=self.object_format,
2613 )
2614 except BaseException:
2615 abort()
2616 raise
2617 else:
2618 commit()
2620 def add_thin_pack(
2621 self,
2622 read_all: Callable[[int], bytes],
2623 read_some: Callable[[int], bytes] | None,
2624 progress: Callable[[str], None] | None = None,
2625 ) -> None:
2626 """Add a new thin pack to this object store.
2628 Thin packs are packs that contain deltas with parents that exist
2629 outside the pack. Because this object store doesn't support packs, we
2630 extract and add the individual objects.
2632 Args:
2633 read_all: Read function that blocks until the number of
2634 requested bytes are read.
2635 read_some: Read function that returns at least one byte, but may
2636 not return the number of bytes requested.
2637 progress: Optional progress reporting function.
2638 """
2639 f, commit, abort = self.add_pack()
2640 try:
2641 copier = PackStreamCopier(
2642 self.object_format.hash_func,
2643 read_all,
2644 read_some,
2645 f,
2646 )
2647 copier.verify()
2648 except BaseException:
2649 abort()
2650 raise
2651 else:
2652 commit()
2655class ObjectIterator(Protocol):
2656 """Interface for iterating over objects."""
2658 def iterobjects(self) -> Iterator[ShaFile]:
2659 """Iterate over all objects.
2661 Returns:
2662 Iterator of ShaFile objects
2663 """
2664 raise NotImplementedError(self.iterobjects)
2667def tree_lookup_path(
2668 lookup_obj: Callable[[ObjectID | RawObjectID], ShaFile],
2669 root_sha: ObjectID | RawObjectID,
2670 path: bytes,
2671) -> tuple[int, ObjectID]:
2672 """Look up an object in a Git tree.
2674 Args:
2675 lookup_obj: Callback for retrieving object by SHA1
2676 root_sha: SHA1 of the root tree
2677 path: Path to lookup
2678 Returns: A tuple of (mode, SHA) of the resulting path.
2679 """
2680 tree = lookup_obj(root_sha)
2681 if not isinstance(tree, Tree):
2682 raise NotTreeError(root_sha)
2683 return tree.lookup_path(lookup_obj, path)
2686def _collect_filetree_revs(
2687 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID]
2688) -> None:
2689 """Collect SHA1s of files and directories for specified tree.
2691 Args:
2692 obj_store: Object store to get objects by SHA from
2693 tree_sha: tree reference to walk
2694 kset: set to fill with references to files and directories
2695 """
2696 filetree = obj_store[tree_sha]
2697 assert isinstance(filetree, Tree)
2698 for name, mode, sha in filetree.iteritems():
2699 assert mode is not None
2700 assert sha is not None
2701 if not S_ISGITLINK(mode) and sha not in kset:
2702 kset.add(sha)
2703 if stat.S_ISDIR(mode):
2704 _collect_filetree_revs(obj_store, sha, kset)
2707def _split_commits_and_tags(
2708 obj_store: ObjectContainer,
2709 lst: Iterable[ObjectID],
2710 *,
2711 unknown: str = "error",
2712) -> tuple[set[ObjectID], set[ObjectID], set[ObjectID]]:
2713 """Split object id list into three lists with commit, tag, and other SHAs.
2715 Commits referenced by tags are included into commits
2716 list as well. Only SHA1s known in this repository will get
2717 through, controlled by the unknown parameter.
2719 Args:
2720 obj_store: Object store to get objects by SHA1 from
2721 lst: Collection of commit and tag SHAs
2722 unknown: How to handle unknown objects: "error", "warn", or "ignore"
2723 Returns: A tuple of (commits, tags, others) SHA1s
2724 """
2725 import logging
2727 if unknown not in ("error", "warn", "ignore"):
2728 raise ValueError(
2729 f"unknown must be 'error', 'warn', or 'ignore', got {unknown!r}"
2730 )
2732 commits: set[ObjectID] = set()
2733 tags: set[ObjectID] = set()
2734 others: set[ObjectID] = set()
2735 for e in lst:
2736 try:
2737 o = obj_store[e]
2738 except KeyError:
2739 if unknown == "error":
2740 raise
2741 elif unknown == "warn":
2742 logging.warning(
2743 "Object %s not found in object store", e.decode("ascii")
2744 )
2745 # else: ignore
2746 else:
2747 if isinstance(o, Commit):
2748 commits.add(e)
2749 elif isinstance(o, Tag):
2750 tags.add(e)
2751 tagged = o.object[1]
2752 c, t, os = _split_commits_and_tags(obj_store, [tagged], unknown=unknown)
2753 commits |= c
2754 tags |= t
2755 others |= os
2756 else:
2757 others.add(e)
2758 return (commits, tags, others)
2761class MissingObjectFinder:
2762 """Find the objects missing from another object store.
2764 Args:
2765 object_store: Object store containing at least all objects to be
2766 sent
2767 haves: SHA1s of commits not to send (already present in target)
2768 wants: SHA1s of commits to send
2769 progress: Optional function to report progress to.
2770 get_tagged: Function that returns a dict of pointed-to sha -> tag
2771 sha for including tags.
2772 get_parents: Optional function for getting the parents of a commit.
2773 """
2775 def __init__(
2776 self,
2777 object_store: BaseObjectStore,
2778 haves: Iterable[ObjectID],
2779 wants: Iterable[ObjectID],
2780 *,
2781 shallow: Set[ObjectID] | None = None,
2782 progress: Callable[[bytes], None] | None = None,
2783 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None,
2784 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents,
2785 ) -> None:
2786 """Initialize a MissingObjectFinder.
2788 Args:
2789 object_store: Object store containing objects
2790 haves: SHA1s of objects already present in target
2791 wants: SHA1s of objects to send
2792 shallow: Set of shallow commit SHA1s
2793 progress: Optional progress reporting callback
2794 get_tagged: Function returning dict of pointed-to sha -> tag sha
2795 get_parents: Function for getting commit parents
2796 """
2797 self.object_store = object_store
2798 if shallow is None:
2799 shallow = set()
2800 self._get_parents = get_parents
2801 reachability = object_store.get_reachability_provider()
2802 # process Commits and Tags differently
2803 # haves may list commits/tags not available locally (silently ignore them).
2804 # wants should only contain valid SHAs (fail fast if not).
2805 have_commits, have_tags, have_others = _split_commits_and_tags(
2806 object_store, haves, unknown="ignore"
2807 )
2808 want_commits, want_tags, want_others = _split_commits_and_tags(
2809 object_store, wants, unknown="error"
2810 )
2811 # all_ancestors is a set of commits that shall not be sent
2812 # (complete repository up to 'haves')
2813 all_ancestors = reachability.get_reachable_commits(
2814 have_commits, exclude=None, shallow=shallow
2815 )
2816 # all_missing - complete set of commits between haves and wants
2817 # common_commits - boundary commits directly encountered when traversing wants
2818 # We use _collect_ancestors here because we need the exact boundary behavior:
2819 # commits that are in all_ancestors and directly reachable from wants,
2820 # but we don't traverse past them. This is hard to express with the
2821 # reachability abstraction alone.
2822 missing_commits, common_commits = _collect_ancestors(
2823 object_store,
2824 want_commits,
2825 frozenset(all_ancestors),
2826 shallow=frozenset(shallow),
2827 get_parents=self._get_parents,
2828 )
2830 self.remote_has: set[ObjectID] = set()
2831 # Now, fill sha_done with commits and revisions of
2832 # files and directories known to be both locally
2833 # and on target. Thus these commits and files
2834 # won't get selected for fetch
2835 for h in common_commits:
2836 self.remote_has.add(h)
2837 cmt = object_store[h]
2838 assert isinstance(cmt, Commit)
2839 # Get tree objects for this commit
2840 tree_objects = reachability.get_tree_objects([cmt.tree])
2841 self.remote_has.update(tree_objects)
2843 # record tags we have as visited, too
2844 for t in have_tags:
2845 self.remote_has.add(t)
2846 self.sha_done = set(self.remote_has)
2848 # in fact, what we 'want' is commits, tags, and others
2849 # we've found missing
2850 self.objects_to_send: set[tuple[ObjectID, bytes | None, int | None, bool]] = {
2851 (w, None, Commit.type_num, False) for w in missing_commits
2852 }
2853 missing_tags = want_tags.difference(have_tags)
2854 self.objects_to_send.update(
2855 {(w, None, Tag.type_num, False) for w in missing_tags}
2856 )
2857 missing_others = want_others.difference(have_others)
2858 self.objects_to_send.update({(w, None, None, False) for w in missing_others})
2860 if progress is None:
2861 self.progress: Callable[[bytes], None] = lambda x: None
2862 else:
2863 self.progress = progress
2864 self._tagged = (get_tagged and get_tagged()) or {}
2866 def get_remote_has(self) -> set[ObjectID]:
2867 """Get the set of SHAs the remote has.
2869 Returns:
2870 Set of SHA1s that the remote side already has
2871 """
2872 return self.remote_has
2874 def add_todo(
2875 self, entries: Iterable[tuple[ObjectID, bytes | None, int | None, bool]]
2876 ) -> None:
2877 """Add objects to the todo list.
2879 Args:
2880 entries: Iterable of tuples (sha, name, type_num, is_leaf)
2881 """
2882 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done])
2884 def __next__(self) -> tuple[ObjectID, PackHint | None]:
2885 """Get the next object to send.
2887 Returns:
2888 Tuple of (sha, pack_hint)
2890 Raises:
2891 StopIteration: When no more objects to send
2892 """
2893 while True:
2894 if not self.objects_to_send:
2895 self.progress(
2896 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii")
2897 )
2898 raise StopIteration
2899 (sha, name, type_num, leaf) = self.objects_to_send.pop()
2900 if sha not in self.sha_done:
2901 break
2902 if not leaf:
2903 o = self.object_store[sha]
2904 if isinstance(o, Commit):
2905 self.add_todo([(o.tree, b"", Tree.type_num, False)])
2906 elif isinstance(o, Tree):
2907 todos = []
2908 for n, m, s in o.iteritems():
2909 assert m is not None
2910 assert n is not None
2911 assert s is not None
2912 if not S_ISGITLINK(m):
2913 todos.append(
2914 (
2915 s,
2916 n,
2917 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num),
2918 not stat.S_ISDIR(m),
2919 )
2920 )
2921 self.add_todo(todos)
2922 elif isinstance(o, Tag):
2923 self.add_todo([(o.object[1], None, o.object[0].type_num, False)])
2924 if sha in self._tagged:
2925 self.add_todo([(self._tagged[sha], None, None, True)])
2926 self.sha_done.add(sha)
2927 if len(self.sha_done) % 1000 == 0:
2928 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii"))
2929 if type_num is None:
2930 pack_hint = None
2931 else:
2932 pack_hint = (type_num, name)
2933 return (sha, pack_hint)
2935 def __iter__(self) -> Iterator[tuple[ObjectID, PackHint | None]]:
2936 """Return iterator over objects to send.
2938 Returns:
2939 Self (this class implements the iterator protocol)
2940 """
2941 return self
2944class ObjectStoreGraphWalker:
2945 """Graph walker that finds what commits are missing from an object store."""
2947 heads: set[ObjectID]
2948 """Revisions without descendants in the local repo."""
2950 get_parents: Callable[[ObjectID], list[ObjectID]]
2951 """Function to retrieve parents in the local repo."""
2953 shallow: set[ObjectID]
2955 def __init__(
2956 self,
2957 local_heads: Iterable[ObjectID],
2958 get_parents: Callable[[ObjectID], list[ObjectID]],
2959 shallow: set[ObjectID] | None = None,
2960 update_shallow: Callable[[set[ObjectID] | None, set[ObjectID] | None], None]
2961 | None = None,
2962 ) -> None:
2963 """Create a new instance.
2965 Args:
2966 local_heads: Heads to start search with
2967 get_parents: Function for finding the parents of a SHA1.
2968 shallow: Set of shallow commits.
2969 update_shallow: Function to update shallow commits.
2970 """
2971 self.heads = set(local_heads)
2972 self.get_parents = get_parents
2973 self.parents: dict[ObjectID, list[ObjectID] | None] = {}
2974 if shallow is None:
2975 shallow = set()
2976 self.shallow = shallow
2977 self.update_shallow = update_shallow
2979 def nak(self) -> None:
2980 """Nothing in common was found."""
2982 def ack(self, sha: ObjectID) -> None:
2983 """Ack that a revision and its ancestors are present in the source."""
2984 if len(sha) != 40:
2985 # TODO: support SHA256
2986 raise ValueError(f"unexpected sha {sha!r} received")
2987 ancestors = {sha}
2989 # stop if we run out of heads to remove
2990 while self.heads:
2991 for a in ancestors:
2992 if a in self.heads:
2993 self.heads.remove(a)
2995 # collect all ancestors
2996 new_ancestors = set()
2997 for a in ancestors:
2998 ps = self.parents.get(a)
2999 if ps is not None:
3000 new_ancestors.update(ps)
3001 self.parents[a] = None
3003 # no more ancestors; stop
3004 if not new_ancestors:
3005 break
3007 ancestors = new_ancestors
3009 def next(self) -> ObjectID | None:
3010 """Iterate over ancestors of heads in the target."""
3011 if self.heads:
3012 ret = self.heads.pop()
3013 try:
3014 ps = self.get_parents(ret)
3015 except KeyError:
3016 return None
3017 self.parents[ret] = ps
3018 self.heads.update([p for p in ps if p not in self.parents])
3019 return ret
3020 return None
3022 __next__ = next
3025def commit_tree_changes(
3026 object_store: BaseObjectStore,
3027 tree: ObjectID | Tree,
3028 changes: Sequence[tuple[bytes, int | None, ObjectID | None]],
3029) -> ObjectID:
3030 """Commit a specified set of changes to a tree structure.
3032 This will apply a set of changes on top of an existing tree, storing new
3033 objects in object_store.
3035 changes are a list of tuples with (path, mode, object_sha).
3036 Paths can be both blobs and trees. See the mode and
3037 object sha to None deletes the path.
3039 This method works especially well if there are only a small
3040 number of changes to a big tree. For a large number of changes
3041 to a large tree, use e.g. commit_tree.
3043 Args:
3044 object_store: Object store to store new objects in
3045 and retrieve old ones from.
3046 tree: Original tree root (SHA or Tree object)
3047 changes: changes to apply
3048 Returns: New tree root object
3049 """
3050 # TODO(jelmer): Save up the objects and add them using .add_objects
3051 # rather than with individual calls to .add_object.
3052 # Handle both Tree object and SHA
3053 if isinstance(tree, Tree):
3054 tree_obj: Tree = tree
3055 else:
3056 sha_obj = object_store[tree]
3057 assert isinstance(sha_obj, Tree)
3058 tree_obj = sha_obj
3059 nested_changes: dict[bytes, list[tuple[bytes, int | None, ObjectID | None]]] = {}
3060 for path, new_mode, new_sha in changes:
3061 try:
3062 (dirname, subpath) = path.split(b"/", 1)
3063 except ValueError:
3064 if new_sha is None:
3065 del tree_obj[path]
3066 else:
3067 assert new_mode is not None
3068 tree_obj[path] = (new_mode, new_sha)
3069 else:
3070 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha))
3071 for name, subchanges in nested_changes.items():
3072 try:
3073 orig_subtree_id: ObjectID | Tree = tree_obj[name][1]
3074 except KeyError:
3075 # For new directories, pass an empty Tree object
3076 orig_subtree_id = Tree()
3077 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges)
3078 subtree = object_store[subtree_id]
3079 assert isinstance(subtree, Tree)
3080 if len(subtree) == 0:
3081 del tree_obj[name]
3082 else:
3083 tree_obj[name] = (stat.S_IFDIR, subtree.id)
3084 object_store.add_object(tree_obj)
3085 return tree_obj.id
3088class OverlayObjectStore(BaseObjectStore):
3089 """Object store that can overlay multiple object stores."""
3091 def __init__(
3092 self,
3093 bases: list[BaseObjectStore],
3094 add_store: BaseObjectStore | None = None,
3095 ) -> None:
3096 """Initialize an OverlayObjectStore.
3098 Args:
3099 bases: List of base object stores to overlay
3100 add_store: Optional store to write new objects to
3102 Raises:
3103 ValueError: If stores have different hash algorithms
3104 """
3105 from .object_format import verify_same_object_format
3107 # Verify all stores use the same hash algorithm
3108 store_algorithms = [store.object_format for store in bases]
3109 if add_store:
3110 store_algorithms.append(add_store.object_format)
3112 object_format = verify_same_object_format(*store_algorithms)
3114 super().__init__(object_format=object_format)
3115 self.bases = bases
3116 self.add_store = add_store
3118 def add_object(self, object: ShaFile) -> None:
3119 """Add a single object to the store.
3121 Args:
3122 object: Object to add
3124 Raises:
3125 NotImplementedError: If no add_store was provided
3126 """
3127 if self.add_store is None:
3128 raise NotImplementedError(self.add_object)
3129 return self.add_store.add_object(object)
3131 def add_objects(
3132 self,
3133 objects: Sequence[tuple[ShaFile, str | None]],
3134 progress: Callable[[str], None] | None = None,
3135 ) -> Pack | None:
3136 """Add multiple objects to the store.
3138 Args:
3139 objects: Iterator of objects to add
3140 progress: Optional progress reporting callback
3142 Raises:
3143 NotImplementedError: If no add_store was provided
3144 """
3145 if self.add_store is None:
3146 raise NotImplementedError(self.add_object)
3147 return self.add_store.add_objects(objects, progress)
3149 @property
3150 def packs(self) -> list[Pack]:
3151 """Get the list of packs from all overlaid stores.
3153 Returns:
3154 Combined list of packs from all base stores
3155 """
3156 ret = []
3157 for b in self.bases:
3158 ret.extend(b.packs)
3159 return ret
3161 def __iter__(self) -> Iterator[ObjectID]:
3162 """Iterate over all object SHAs in the overlaid stores.
3164 Returns:
3165 Iterator of object SHAs (deduped across stores)
3166 """
3167 done = set()
3168 for b in self.bases:
3169 for o_id in b:
3170 if o_id not in done:
3171 yield o_id
3172 done.add(o_id)
3174 def iterobjects_subset(
3175 self, shas: Iterable[ObjectID], *, allow_missing: bool = False
3176 ) -> Iterator[ShaFile]:
3177 """Iterate over a subset of objects from the overlaid stores.
3179 Args:
3180 shas: Iterable of object SHAs to retrieve
3181 allow_missing: If True, skip missing objects; if False, raise KeyError
3183 Returns:
3184 Iterator of ShaFile objects
3186 Raises:
3187 KeyError: If an object is missing and allow_missing is False
3188 """
3189 todo = set(shas)
3190 found: set[ObjectID] = set()
3192 for b in self.bases:
3193 # Create a copy of todo for each base to avoid modifying
3194 # the set while iterating through it
3195 current_todo = todo - found
3196 for o in b.iterobjects_subset(current_todo, allow_missing=True):
3197 yield o
3198 found.add(o.id)
3200 # Check for any remaining objects not found
3201 missing = todo - found
3202 if missing and not allow_missing:
3203 raise KeyError(next(iter(missing)))
3205 def iter_unpacked_subset(
3206 self,
3207 shas: Iterable[ObjectID | RawObjectID],
3208 include_comp: bool = False,
3209 allow_missing: bool = False,
3210 convert_ofs_delta: bool = True,
3211 ) -> Iterator[UnpackedObject]:
3212 """Iterate over unpacked objects from the overlaid stores.
3214 Args:
3215 shas: Iterable of object SHAs to retrieve
3216 include_comp: Whether to include compressed data
3217 allow_missing: If True, skip missing objects; if False, raise KeyError
3218 convert_ofs_delta: Whether to convert OFS_DELTA objects
3220 Returns:
3221 Iterator of unpacked objects
3223 Raises:
3224 KeyError: If an object is missing and allow_missing is False
3225 """
3226 todo: set[ObjectID | RawObjectID] = set(shas)
3227 for b in self.bases:
3228 for o in b.iter_unpacked_subset(
3229 todo,
3230 include_comp=include_comp,
3231 allow_missing=True,
3232 convert_ofs_delta=convert_ofs_delta,
3233 ):
3234 yield o
3235 todo.remove(o.sha())
3236 if todo and not allow_missing:
3237 raise KeyError(next(iter(todo)))
3239 def get_raw(self, sha_id: ObjectID | RawObjectID) -> tuple[int, bytes]:
3240 """Get the raw object data from the overlaid stores.
3242 Args:
3243 sha_id: SHA of the object
3245 Returns:
3246 Tuple of (type_num, raw_data)
3248 Raises:
3249 KeyError: If object not found in any base store
3250 """
3251 for b in self.bases:
3252 try:
3253 return b.get_raw(sha_id)
3254 except KeyError:
3255 pass
3256 raise KeyError(sha_id)
3258 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:
3259 """Check if an object is packed in any base store.
3261 Args:
3262 sha: SHA of the object
3264 Returns:
3265 True if object is packed in any base store
3266 """
3267 for b in self.bases:
3268 if b.contains_packed(sha):
3269 return True
3270 return False
3272 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:
3273 """Check if an object is loose in any base store.
3275 Args:
3276 sha: SHA of the object
3278 Returns:
3279 True if object is loose in any base store
3280 """
3281 for b in self.bases:
3282 if b.contains_loose(sha):
3283 return True
3284 return False
3287def read_packs_file(f: BinaryIO) -> Iterator[str]:
3288 """Yield the packs listed in a packs file."""
3289 for line in f.read().splitlines():
3290 if not line:
3291 continue
3292 (kind, name) = line.split(b" ", 1)
3293 if kind != b"P":
3294 continue
3295 yield os.fsdecode(name)
3298class BucketBasedObjectStore(PackBasedObjectStore):
3299 """Object store implementation that uses a bucket store like S3 as backend."""
3301 def _iter_loose_objects(self) -> Iterator[ObjectID]:
3302 """Iterate over the SHAs of all loose objects."""
3303 return iter([])
3305 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> None:
3306 return None
3308 def delete_loose_object(self, sha: ObjectID) -> None:
3309 """Delete a loose object (no-op for bucket stores).
3311 Bucket-based stores don't have loose objects, so this is a no-op.
3313 Args:
3314 sha: SHA of the object to delete
3315 """
3316 # Doesn't exist..
3318 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int:
3319 """Pack loose objects. Returns number of objects packed.
3321 BucketBasedObjectStore doesn't support loose objects, so this is a no-op.
3323 Args:
3324 progress: Optional progress reporting callback (ignored)
3325 """
3326 return 0
3328 def _remove_pack_by_name(self, name: str) -> None:
3329 """Remove a pack by name. Subclasses should implement this."""
3330 raise NotImplementedError(self._remove_pack_by_name)
3332 def _iter_pack_names(self) -> Iterator[str]:
3333 raise NotImplementedError(self._iter_pack_names)
3335 def _get_pack(self, name: str) -> Pack:
3336 raise NotImplementedError(self._get_pack)
3338 def _update_pack_cache(self) -> list[Pack]:
3339 pack_files = set(self._iter_pack_names())
3341 # Open newly appeared pack files
3342 new_packs = []
3343 for f in pack_files:
3344 if f not in self._pack_cache:
3345 pack = self._get_pack(f)
3346 new_packs.append(pack)
3347 self._pack_cache[f] = pack
3348 # Remove disappeared pack files
3349 for f in set(self._pack_cache) - pack_files:
3350 self._pack_cache.pop(f).close()
3351 return new_packs
3353 def _upload_pack(
3354 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO
3355 ) -> None:
3356 raise NotImplementedError
3358 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
3359 """Add a new pack to this object store.
3361 Returns: Fileobject to write to, a commit function to
3362 call when the pack is finished and an abort
3363 function.
3364 """
3365 import tempfile
3367 pf = tempfile.SpooledTemporaryFile(
3368 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
3369 )
3371 def commit() -> Pack | None:
3372 if pf.tell() == 0:
3373 pf.close()
3374 return None
3376 pf.seek(0)
3378 p = PackData(pf.name, file=pf, object_format=self.object_format)
3379 entries = p.sorted_entries()
3380 basename = iter_sha1(entry[0] for entry in entries).decode("ascii")
3381 idxf = tempfile.SpooledTemporaryFile(
3382 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
3383 )
3384 checksum = p.get_stored_checksum()
3385 write_pack_index(idxf, entries, checksum, version=self.pack_index_version)
3386 idxf.seek(0)
3387 idx = load_pack_index_file(basename + ".idx", idxf, self.object_format)
3388 for pack in self.packs:
3389 if pack.get_stored_checksum() == p.get_stored_checksum():
3390 p.close()
3391 idx.close()
3392 pf.close()
3393 idxf.close()
3394 return pack
3395 pf.seek(0)
3396 idxf.seek(0)
3397 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type]
3398 final_pack = Pack.from_objects(p, idx)
3399 self._add_cached_pack(basename, final_pack)
3400 pf.close()
3401 idxf.close()
3402 return final_pack
3404 return pf, commit, pf.close # type: ignore[return-value]
3407def _collect_ancestors(
3408 store: ObjectContainer,
3409 heads: Iterable[ObjectID],
3410 common: frozenset[ObjectID] = frozenset(),
3411 shallow: frozenset[ObjectID] = frozenset(),
3412 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents,
3413) -> tuple[set[ObjectID], set[ObjectID]]:
3414 """Collect all ancestors of heads up to (excluding) those in common.
3416 Args:
3417 store: Object store to get commits from
3418 heads: commits to start from
3419 common: commits to end at, or empty set to walk repository
3420 completely
3421 shallow: Set of shallow commits
3422 get_parents: Optional function for getting the parents of a
3423 commit.
3424 Returns: a tuple (A, B) where A - all commits reachable
3425 from heads but not present in common, B - common (shared) elements
3426 that are directly reachable from heads
3427 """
3428 bases = set()
3429 commits = set()
3430 queue: list[ObjectID] = []
3431 queue.extend(heads)
3433 # Try to use commit graph if available
3434 commit_graph = store.get_commit_graph()
3436 while queue:
3437 e = queue.pop(0)
3438 if e in common:
3439 bases.add(e)
3440 elif e not in commits:
3441 commits.add(e)
3442 if e in shallow:
3443 continue
3445 # Try to use commit graph for parent lookup
3446 parents = None
3447 if commit_graph:
3448 parents = commit_graph.get_parents(e)
3450 if parents is None:
3451 # Fall back to loading the object
3452 cmt = store[e]
3453 assert isinstance(cmt, Commit)
3454 parents = get_parents(cmt)
3456 queue.extend(parents)
3457 return (commits, bases)
3460def iter_tree_contents(
3461 store: ObjectContainer, tree_id: ObjectID | None, *, include_trees: bool = False
3462) -> Iterator[TreeEntry]:
3463 """Iterate the contents of a tree and all subtrees.
3465 Iteration is depth-first pre-order, as in e.g. os.walk.
3467 Args:
3468 store: Object store to get trees from
3469 tree_id: SHA1 of the tree.
3470 include_trees: If True, include tree objects in the iteration.
3472 Yields: TreeEntry namedtuples for all the objects in a tree.
3473 """
3474 if tree_id is None:
3475 return
3476 # This could be fairly easily generalized to >2 trees if we find a use
3477 # case.
3478 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]
3479 while todo:
3480 entry = todo.pop()
3481 assert entry.mode is not None
3482 if stat.S_ISDIR(entry.mode):
3483 extra = []
3484 assert entry.sha is not None
3485 tree = store[entry.sha]
3486 assert isinstance(tree, Tree)
3487 for subentry in tree.iteritems(name_order=True):
3488 assert entry.path is not None
3489 extra.append(subentry.in_path(entry.path))
3490 todo.extend(reversed(extra))
3491 if not stat.S_ISDIR(entry.mode) or include_trees:
3492 yield entry
3495def iter_commit_contents(
3496 store: ObjectContainer,
3497 commit: Commit | ObjectID | RawObjectID,
3498 *,
3499 include: Sequence[str | bytes | Path] | None = None,
3500) -> Iterator[TreeEntry]:
3501 """Iterate the contents of the repository at the specified commit.
3503 This is a wrapper around iter_tree_contents() and
3504 tree_lookup_path() to simplify the common task of getting the
3505 contest of a repo at a particular commit. See also
3506 dulwich.index.build_file_from_blob() for writing individual files
3507 to disk.
3509 Args:
3510 store: Object store to get trees from
3511 commit: Commit object, or SHA1 of a commit
3512 include: if provided, only the entries whose paths are in the
3513 list, or whose parent tree is in the list, will be
3514 included. Note that duplicate or overlapping paths
3515 (e.g. ["foo", "foo/bar"]) may result in duplicate entries
3517 Yields: TreeEntry namedtuples for all matching files in a commit.
3518 """
3519 sha = commit.id if isinstance(commit, Commit) else commit
3520 if not isinstance(obj := store[sha], Commit):
3521 raise TypeError(
3522 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}"
3523 )
3524 commit = obj
3525 encoding = commit.encoding or "utf-8"
3526 include_bytes: list[bytes] = (
3527 [
3528 path if isinstance(path, bytes) else str(path).encode(encoding)
3529 for path in include
3530 ]
3531 if include is not None
3532 else [b""]
3533 )
3535 for path in include_bytes:
3536 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path)
3537 # Iterate all contained files if path points to a dir, otherwise just get that
3538 # single file
3539 if isinstance(store[obj_id], Tree):
3540 for entry in iter_tree_contents(store, obj_id):
3541 yield entry.in_path(path)
3542 else:
3543 yield TreeEntry(path, mode, obj_id)
3546def peel_sha(
3547 store: ObjectContainer, sha: ObjectID | RawObjectID
3548) -> tuple[ShaFile, ShaFile]:
3549 """Peel all tags from a SHA.
3551 Args:
3552 store: Object store to get objects from
3553 sha: The object SHA to peel.
3554 Returns: The fully-peeled SHA1 of a tag object, after peeling all
3555 intermediate tags; if the original ref does not point to a tag,
3556 this will equal the original SHA1.
3557 """
3558 unpeeled = obj = store[sha]
3559 obj_class = object_class(obj.type_name)
3560 while obj_class is Tag:
3561 assert isinstance(obj, Tag)
3562 obj_class, sha = obj.object
3563 obj = store[sha]
3564 return unpeeled, obj
3567class GraphTraversalReachability:
3568 """Naive graph traversal implementation of ObjectReachabilityProvider.
3570 This implementation wraps existing graph traversal functions
3571 (_collect_ancestors, _collect_filetree_revs) to provide the standard
3572 reachability interface without any performance optimizations.
3573 """
3575 def __init__(self, object_store: BaseObjectStore) -> None:
3576 """Initialize the graph traversal provider.
3578 Args:
3579 object_store: Object store to query
3580 """
3581 self.store = object_store
3583 def get_reachable_commits(
3584 self,
3585 heads: Iterable[ObjectID],
3586 exclude: Iterable[ObjectID] | None = None,
3587 shallow: Set[ObjectID] | None = None,
3588 ) -> set[ObjectID]:
3589 """Get all commits reachable from heads, excluding those in exclude.
3591 Uses _collect_ancestors for commit traversal.
3593 Args:
3594 heads: Starting commit SHAs
3595 exclude: Commit SHAs to exclude (and their ancestors)
3596 shallow: Set of shallow commit boundaries
3598 Returns:
3599 Set of commit SHAs reachable from heads but not from exclude
3600 """
3601 exclude_set = frozenset(exclude) if exclude else frozenset()
3602 shallow_set = frozenset(shallow) if shallow else frozenset()
3603 commits, _bases = _collect_ancestors(
3604 self.store, heads, exclude_set, shallow_set
3605 )
3606 return commits
3608 def get_tree_objects(
3609 self,
3610 tree_shas: Iterable[ObjectID],
3611 ) -> set[ObjectID]:
3612 """Get all trees and blobs reachable from the given trees.
3614 Uses _collect_filetree_revs for tree traversal.
3616 Args:
3617 tree_shas: Starting tree SHAs
3619 Returns:
3620 Set of tree and blob SHAs
3621 """
3622 result: set[ObjectID] = set()
3623 for tree_sha in tree_shas:
3624 _collect_filetree_revs(self.store, tree_sha, result)
3625 return result
3627 def get_reachable_objects(
3628 self,
3629 commits: Iterable[ObjectID],
3630 exclude_commits: Iterable[ObjectID] | None = None,
3631 ) -> set[ObjectID]:
3632 """Get all objects (commits + trees + blobs) reachable from commits.
3634 Args:
3635 commits: Starting commit SHAs
3636 exclude_commits: Commits whose objects should be excluded
3638 Returns:
3639 Set of all object SHAs (commits, trees, blobs)
3640 """
3641 commits_set = set(commits)
3642 result = set(commits_set)
3644 # Get trees for all commits
3645 tree_shas = []
3646 for commit_sha in commits_set:
3647 try:
3648 commit = self.store[commit_sha]
3649 if isinstance(commit, Commit):
3650 tree_shas.append(commit.tree)
3651 except KeyError:
3652 # Commit not in store, skip
3653 continue
3655 # Collect all tree/blob objects
3656 result.update(self.get_tree_objects(tree_shas))
3658 # Exclude objects from exclude_commits if needed
3659 if exclude_commits:
3660 exclude_objects = self.get_reachable_objects(exclude_commits, None)
3661 result -= exclude_objects
3663 return result
3666class BitmapReachability:
3667 """Bitmap-accelerated implementation of ObjectReachabilityProvider.
3669 This implementation uses packfile bitmap indexes where available to
3670 accelerate reachability queries. Falls back to graph traversal when
3671 bitmaps don't cover the requested commits.
3672 """
3674 def __init__(self, object_store: "PackBasedObjectStore") -> None:
3675 """Initialize the bitmap provider.
3677 Args:
3678 object_store: Pack-based object store with bitmap support
3679 """
3680 self.store = object_store
3681 # Fallback to graph traversal for operations not yet optimized
3682 self._fallback = GraphTraversalReachability(object_store)
3684 def _combine_commit_bitmaps(
3685 self,
3686 commit_shas: set[ObjectID],
3687 exclude_shas: set[ObjectID] | None = None,
3688 ) -> tuple["EWAHBitmap", "Pack"] | None:
3689 """Combine bitmaps for multiple commits using OR, with optional exclusion.
3691 Args:
3692 commit_shas: Set of commit SHAs to combine
3693 exclude_shas: Optional set of commit SHAs to exclude
3695 Returns:
3696 Tuple of (combined_bitmap, pack) or None if bitmaps unavailable
3697 """
3698 from .bitmap import find_commit_bitmaps
3700 # Find bitmaps for the commits
3701 commit_bitmaps = find_commit_bitmaps(commit_shas, self.store.packs)
3703 # If we can't find bitmaps for all commits, return None
3704 if len(commit_bitmaps) < len(commit_shas):
3705 return None
3707 # Combine bitmaps using OR
3708 combined_bitmap = None
3709 result_pack = None
3711 for commit_sha in commit_shas:
3712 pack, pack_bitmap, _sha_to_pos = commit_bitmaps[commit_sha]
3713 commit_bitmap = pack_bitmap.get_bitmap(commit_sha)
3715 if commit_bitmap is None:
3716 return None
3718 if combined_bitmap is None:
3719 combined_bitmap = commit_bitmap
3720 result_pack = pack
3721 elif pack == result_pack:
3722 # Same pack, can OR directly
3723 combined_bitmap = combined_bitmap | commit_bitmap
3724 else:
3725 # Different packs, can't combine
3726 return None
3728 # Handle exclusions if provided
3729 if exclude_shas and result_pack and combined_bitmap:
3730 exclude_bitmaps = find_commit_bitmaps(exclude_shas, [result_pack])
3732 if len(exclude_bitmaps) == len(exclude_shas):
3733 # All excludes have bitmaps, compute exclusion
3734 exclude_combined = None
3736 for commit_sha in exclude_shas:
3737 _pack, pack_bitmap, _sha_to_pos = exclude_bitmaps[commit_sha]
3738 exclude_bitmap = pack_bitmap.get_bitmap(commit_sha)
3740 if exclude_bitmap is None:
3741 break
3743 if exclude_combined is None:
3744 exclude_combined = exclude_bitmap
3745 else:
3746 exclude_combined = exclude_combined | exclude_bitmap
3748 # Subtract excludes using set difference
3749 if exclude_combined:
3750 combined_bitmap = combined_bitmap - exclude_combined
3752 if combined_bitmap and result_pack:
3753 return (combined_bitmap, result_pack)
3754 return None
3756 def get_reachable_commits(
3757 self,
3758 heads: Iterable[ObjectID],
3759 exclude: Iterable[ObjectID] | None = None,
3760 shallow: Set[ObjectID] | None = None,
3761 ) -> set[ObjectID]:
3762 """Get all commits reachable from heads using bitmaps where possible.
3764 Args:
3765 heads: Starting commit SHAs
3766 exclude: Commit SHAs to exclude (and their ancestors)
3767 shallow: Set of shallow commit boundaries
3769 Returns:
3770 Set of commit SHAs reachable from heads but not from exclude
3771 """
3772 from .bitmap import bitmap_to_object_shas
3774 # If shallow is specified, fall back to graph traversal
3775 # (bitmaps don't support shallow boundaries well)
3776 if shallow:
3777 return self._fallback.get_reachable_commits(heads, exclude, shallow)
3779 heads_set = set(heads)
3780 exclude_set = set(exclude) if exclude else None
3782 # Try to combine bitmaps
3783 result = self._combine_commit_bitmaps(heads_set, exclude_set)
3784 if result is None:
3785 return self._fallback.get_reachable_commits(heads, exclude, shallow)
3787 combined_bitmap, result_pack = result
3789 # Convert bitmap to commit SHAs, filtering for commits only
3790 pack_bitmap = result_pack.bitmap
3791 if pack_bitmap is None:
3792 return self._fallback.get_reachable_commits(heads, exclude, shallow)
3793 commit_type_filter = pack_bitmap.commit_bitmap
3794 return bitmap_to_object_shas(
3795 combined_bitmap, result_pack.index, commit_type_filter
3796 )
3798 def get_tree_objects(
3799 self,
3800 tree_shas: Iterable[ObjectID],
3801 ) -> set[ObjectID]:
3802 """Get all trees and blobs reachable from the given trees.
3804 Args:
3805 tree_shas: Starting tree SHAs
3807 Returns:
3808 Set of tree and blob SHAs
3809 """
3810 # Tree traversal doesn't benefit much from bitmaps, use fallback
3811 return self._fallback.get_tree_objects(tree_shas)
3813 def get_reachable_objects(
3814 self,
3815 commits: Iterable[ObjectID],
3816 exclude_commits: Iterable[ObjectID] | None = None,
3817 ) -> set[ObjectID]:
3818 """Get all objects reachable from commits using bitmaps.
3820 Args:
3821 commits: Starting commit SHAs
3822 exclude_commits: Commits whose objects should be excluded
3824 Returns:
3825 Set of all object SHAs (commits, trees, blobs)
3826 """
3827 from .bitmap import bitmap_to_object_shas
3829 commits_set = set(commits)
3830 exclude_set = set(exclude_commits) if exclude_commits else None
3832 # Try to combine bitmaps
3833 result = self._combine_commit_bitmaps(commits_set, exclude_set)
3834 if result is None:
3835 return self._fallback.get_reachable_objects(commits, exclude_commits)
3837 combined_bitmap, result_pack = result
3839 # Convert bitmap to all object SHAs (no type filter)
3840 return bitmap_to_object_shas(combined_bitmap, result_pack.index, None)