Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 20%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# object_store.py -- Object store for git objects
2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
3# and others
4#
5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
7# General Public License as published by the Free Software Foundation; version 2.0
8# or (at your option) any later version. You can redistribute it and/or
9# modify it under the terms of either of these two licenses.
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# You should have received a copy of the licenses; if not, see
18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
20# License, Version 2.0.
21#
24"""Git object store interfaces and implementation."""
26__all__ = [
27 "DEFAULT_TEMPFILE_GRACE_PERIOD",
28 "INFODIR",
29 "PACKDIR",
30 "PACK_MODE",
31 "BaseObjectStore",
32 "BitmapReachability",
33 "BucketBasedObjectStore",
34 "DiskObjectStore",
35 "GraphTraversalReachability",
36 "GraphWalker",
37 "MemoryObjectStore",
38 "MissingObjectFinder",
39 "ObjectIterator",
40 "ObjectReachabilityProvider",
41 "ObjectStoreGraphWalker",
42 "OverlayObjectStore",
43 "PackBasedObjectStore",
44 "PackCapableObjectStore",
45 "PackContainer",
46 "commit_tree_changes",
47 "find_shallow",
48 "get_depth",
49 "iter_commit_contents",
50 "iter_tree_contents",
51 "peel_sha",
52 "read_packs_file",
53 "tree_lookup_path",
54]
56import binascii
57import os
58import stat
59import sys
60import time
61import warnings
62from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set
63from contextlib import suppress
64from io import BytesIO
65from pathlib import Path
66from typing import (
67 TYPE_CHECKING,
68 BinaryIO,
69 Protocol,
70 cast,
71)
73from .errors import NotTreeError
74from .file import GitFile, _GitFile
75from .midx import MultiPackIndex, load_midx
76from .objects import (
77 S_ISGITLINK,
78 ZERO_SHA,
79 Blob,
80 Commit,
81 ObjectID,
82 RawObjectID,
83 ShaFile,
84 Tag,
85 Tree,
86 TreeEntry,
87 hex_to_filename,
88 hex_to_sha,
89 object_class,
90 sha_to_hex,
91 valid_hexsha,
92)
93from .pack import (
94 PACK_SPOOL_FILE_MAX_SIZE,
95 ObjectContainer,
96 Pack,
97 PackData,
98 PackedObjectContainer,
99 PackFileDisappeared,
100 PackHint,
101 PackIndexer,
102 PackInflater,
103 PackStreamCopier,
104 UnpackedObject,
105 extend_pack,
106 full_unpacked_object,
107 generate_unpacked_objects,
108 iter_sha1,
109 load_pack_index_file,
110 pack_objects_to_data,
111 write_pack_data,
112 write_pack_index,
113)
114from .protocol import DEPTH_INFINITE, PEELED_TAG_SUFFIX
115from .refs import Ref
117if TYPE_CHECKING:
118 from .bitmap import EWAHBitmap
119 from .commit_graph import CommitGraph
120 from .config import Config
121 from .diff_tree import RenameDetector
122 from .pack import Pack
125class GraphWalker(Protocol):
126 """Protocol for graph walker objects."""
128 def __next__(self) -> ObjectID | None:
129 """Return the next object SHA to visit."""
130 ...
132 def ack(self, sha: ObjectID) -> None:
133 """Acknowledge that an object has been received."""
134 ...
136 def nak(self) -> None:
137 """Nothing in common was found."""
138 ...
141class ObjectReachabilityProvider(Protocol):
142 """Protocol for computing object reachability queries.
144 This abstraction allows reachability computations to be backed by either
145 naive graph traversal or optimized bitmap indexes, with a consistent interface.
146 """
148 def get_reachable_commits(
149 self,
150 heads: Iterable[ObjectID],
151 exclude: Iterable[ObjectID] | None = None,
152 shallow: Set[ObjectID] | None = None,
153 ) -> set[ObjectID]:
154 """Get all commits reachable from heads, excluding those in exclude.
156 Args:
157 heads: Starting commit SHAs
158 exclude: Commit SHAs to exclude (and their ancestors)
159 shallow: Set of shallow commit boundaries (traversal stops here)
161 Returns:
162 Set of commit SHAs reachable from heads but not from exclude
163 """
164 ...
166 def get_reachable_objects(
167 self,
168 commits: Iterable[ObjectID],
169 exclude_commits: Iterable[ObjectID] | None = None,
170 ) -> set[ObjectID]:
171 """Get all objects (commits + trees + blobs) reachable from commits.
173 Args:
174 commits: Starting commit SHAs
175 exclude_commits: Commits whose objects should be excluded
177 Returns:
178 Set of all object SHAs (commits, trees, blobs, tags)
179 """
180 ...
182 def get_tree_objects(
183 self,
184 tree_shas: Iterable[ObjectID],
185 ) -> set[ObjectID]:
186 """Get all trees and blobs reachable from the given trees.
188 Args:
189 tree_shas: Starting tree SHAs
191 Returns:
192 Set of tree and blob SHAs
193 """
194 ...
197INFODIR = "info"
198PACKDIR = "pack"
200# use permissions consistent with Git; just readable by everyone
201# TODO: should packs also be non-writable on Windows? if so, that
202# would requite some rather significant adjustments to the test suite
203PACK_MODE = 0o444 if sys.platform != "win32" else 0o644
205# Grace period for cleaning up temporary pack files (in seconds)
206# Matches git's default of 2 weeks
207DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks
210def find_shallow(
211 store: ObjectContainer, heads: Iterable[ObjectID], depth: int
212) -> tuple[set[ObjectID], set[ObjectID]]:
213 """Find shallow commits according to a given depth.
215 Args:
216 store: An ObjectStore for looking up objects.
217 heads: Iterable of head SHAs to start walking from.
218 depth: The depth of ancestors to include. A depth of one includes
219 only the heads themselves.
220 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be
221 considered shallow and unshallow according to the arguments. Note that
222 these sets may overlap if a commit is reachable along multiple paths.
223 """
224 parents: dict[ObjectID, list[ObjectID]] = {}
225 commit_graph = store.get_commit_graph()
227 def get_parents(sha: ObjectID) -> list[ObjectID]:
228 result = parents.get(sha, None)
229 if not result:
230 # Try to use commit graph first if available
231 if commit_graph:
232 graph_parents = commit_graph.get_parents(sha)
233 if graph_parents is not None:
234 result = graph_parents
235 parents[sha] = result
236 return result
237 # Fall back to loading the object
238 commit = store[sha]
239 assert isinstance(commit, Commit)
240 result = commit.parents
241 parents[sha] = result
242 return result
244 todo = [] # stack of (sha, depth)
245 for head_sha in heads:
246 obj = store[head_sha]
247 # Peel tags if necessary
248 while isinstance(obj, Tag):
249 _, sha = obj.object
250 obj = store[sha]
251 if isinstance(obj, Commit):
252 todo.append((obj.id, 1))
254 not_shallow = set()
255 shallow = set()
256 while todo:
257 sha, cur_depth = todo.pop()
258 if cur_depth < depth:
259 not_shallow.add(sha)
260 new_depth = cur_depth + 1
261 todo.extend((p, new_depth) for p in get_parents(sha))
262 else:
263 shallow.add(sha)
265 return shallow, not_shallow
268def get_depth(
269 store: ObjectContainer,
270 head: ObjectID,
271 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents,
272 max_depth: int | None = None,
273) -> int:
274 """Return the current available depth for the given head.
276 For commits with multiple parents, the largest possible depth will be
277 returned.
279 Args:
280 store: Object store to search in
281 head: commit to start from
282 get_parents: optional function for getting the parents of a commit
283 max_depth: maximum depth to search
284 """
285 if head not in store:
286 return 0
287 current_depth = 1
288 queue = [(head, current_depth)]
289 commit_graph = store.get_commit_graph()
291 while queue and (max_depth is None or current_depth < max_depth):
292 e, depth = queue.pop(0)
293 current_depth = max(current_depth, depth)
295 # Try to use commit graph for parent lookup if available
296 parents = None
297 if commit_graph:
298 parents = commit_graph.get_parents(e)
300 if parents is None:
301 # Fall back to loading the object
302 cmt = store[e]
303 if isinstance(cmt, Tag):
304 _cls, sha = cmt.object
305 cmt = store[sha]
306 parents = get_parents(cmt)
308 queue.extend((parent, depth + 1) for parent in parents if parent in store)
309 return current_depth
312class PackContainer(Protocol):
313 """Protocol for containers that can accept pack files."""
315 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:
316 """Add a new pack."""
319class BaseObjectStore:
320 """Object store interface."""
322 def determine_wants_all(
323 self, refs: Mapping[Ref, ObjectID], depth: int | None = None
324 ) -> list[ObjectID]:
325 """Determine which objects are wanted based on refs."""
327 def _want_deepen(sha: ObjectID) -> bool:
328 if not depth:
329 return False
330 if depth == DEPTH_INFINITE:
331 return True
332 return depth > self._get_depth(sha)
334 return [
335 sha
336 for (ref, sha) in refs.items()
337 if (sha not in self or _want_deepen(sha))
338 and not ref.endswith(PEELED_TAG_SUFFIX)
339 and not sha == ZERO_SHA
340 ]
342 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:
343 """Check if a particular object is present by SHA1 and is loose."""
344 raise NotImplementedError(self.contains_loose)
346 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:
347 """Check if a particular object is present by SHA1 and is packed."""
348 return False # Default implementation for stores that don't support packing
350 def __contains__(self, sha1: ObjectID | RawObjectID) -> bool:
351 """Check if a particular object is present by SHA1.
353 This method makes no distinction between loose and packed objects.
354 """
355 return self.contains_loose(sha1)
357 @property
358 def packs(self) -> list[Pack]:
359 """Iterable of pack objects."""
360 raise NotImplementedError
362 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:
363 """Obtain the raw text for an object.
365 Args:
366 name: sha for the object.
367 Returns: tuple with numeric type and object contents.
368 """
369 raise NotImplementedError(self.get_raw)
371 def __getitem__(self, sha1: ObjectID | RawObjectID) -> ShaFile:
372 """Obtain an object by SHA1."""
373 type_num, uncomp = self.get_raw(sha1)
374 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1)
376 def __iter__(self) -> Iterator[ObjectID]:
377 """Iterate over the SHAs that are present in this store."""
378 raise NotImplementedError(self.__iter__)
380 def add_object(self, obj: ShaFile) -> None:
381 """Add a single object to this object store."""
382 raise NotImplementedError(self.add_object)
384 def add_objects(
385 self,
386 objects: Sequence[tuple[ShaFile, str | None]],
387 progress: Callable[..., None] | None = None,
388 ) -> "Pack | None":
389 """Add a set of objects to this object store.
391 Args:
392 objects: Iterable over a list of (object, path) tuples
393 progress: Optional progress callback
394 """
395 raise NotImplementedError(self.add_objects)
397 def get_reachability_provider(
398 self, prefer_bitmap: bool = True
399 ) -> ObjectReachabilityProvider:
400 """Get a reachability provider for this object store.
402 Returns an ObjectReachabilityProvider that can efficiently compute
403 object reachability queries. Subclasses can override this to provide
404 optimized implementations (e.g., using bitmap indexes).
406 Args:
407 prefer_bitmap: Whether to prefer bitmap-based reachability if
408 available.
410 Returns:
411 ObjectReachabilityProvider instance
412 """
413 return GraphTraversalReachability(self)
415 def tree_changes(
416 self,
417 source: ObjectID | None,
418 target: ObjectID | None,
419 want_unchanged: bool = False,
420 include_trees: bool = False,
421 change_type_same: bool = False,
422 rename_detector: "RenameDetector | None" = None,
423 paths: Sequence[bytes] | None = None,
424 ) -> Iterator[
425 tuple[
426 tuple[bytes | None, bytes | None],
427 tuple[int | None, int | None],
428 tuple[ObjectID | None, ObjectID | None],
429 ]
430 ]:
431 """Find the differences between the contents of two trees.
433 Args:
434 source: SHA1 of the source tree
435 target: SHA1 of the target tree
436 want_unchanged: Whether unchanged files should be reported
437 include_trees: Whether to include trees
438 change_type_same: Whether to report files changing
439 type in the same entry.
440 rename_detector: RenameDetector object for detecting renames.
441 paths: Optional list of paths to filter to (as bytes).
442 Returns: Iterator over tuples with
443 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
444 """
445 from .diff_tree import tree_changes
447 for change in tree_changes(
448 self,
449 source,
450 target,
451 want_unchanged=want_unchanged,
452 include_trees=include_trees,
453 change_type_same=change_type_same,
454 rename_detector=rename_detector,
455 paths=paths,
456 ):
457 old_path = change.old.path if change.old is not None else None
458 new_path = change.new.path if change.new is not None else None
459 old_mode = change.old.mode if change.old is not None else None
460 new_mode = change.new.mode if change.new is not None else None
461 old_sha = change.old.sha if change.old is not None else None
462 new_sha = change.new.sha if change.new is not None else None
463 yield (
464 (old_path, new_path),
465 (old_mode, new_mode),
466 (old_sha, new_sha),
467 )
469 def iter_tree_contents(
470 self, tree_id: ObjectID, include_trees: bool = False
471 ) -> Iterator[TreeEntry]:
472 """Iterate the contents of a tree and all subtrees.
474 Iteration is depth-first pre-order, as in e.g. os.walk.
476 Args:
477 tree_id: SHA1 of the tree.
478 include_trees: If True, include tree objects in the iteration.
479 Returns: Iterator over TreeEntry namedtuples for all the objects in a
480 tree.
481 """
482 warnings.warn(
483 "Please use dulwich.object_store.iter_tree_contents",
484 DeprecationWarning,
485 stacklevel=2,
486 )
487 return iter_tree_contents(self, tree_id, include_trees=include_trees)
489 def iterobjects_subset(
490 self, shas: Iterable[ObjectID], *, allow_missing: bool = False
491 ) -> Iterator[ShaFile]:
492 """Iterate over a subset of objects in the store.
494 Args:
495 shas: Iterable of object SHAs to retrieve
496 allow_missing: If True, skip missing objects; if False, raise KeyError
498 Returns:
499 Iterator of ShaFile objects
501 Raises:
502 KeyError: If an object is missing and allow_missing is False
503 """
504 for sha in shas:
505 try:
506 yield self[sha]
507 except KeyError:
508 if not allow_missing:
509 raise
511 def iter_unpacked_subset(
512 self,
513 shas: Iterable[ObjectID | RawObjectID],
514 include_comp: bool = False,
515 allow_missing: bool = False,
516 convert_ofs_delta: bool = True,
517 ) -> "Iterator[UnpackedObject]":
518 """Iterate over unpacked objects for a subset of SHAs.
520 Default implementation that converts ShaFile objects to UnpackedObject.
521 Subclasses may override for more efficient unpacked access.
523 Args:
524 shas: Iterable of object SHAs to retrieve
525 include_comp: Whether to include compressed data (ignored in base
526 implementation)
527 allow_missing: If True, skip missing objects; if False, raise
528 KeyError
529 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in
530 base implementation)
532 Returns:
533 Iterator of UnpackedObject instances
535 Raises:
536 KeyError: If an object is missing and allow_missing is False
537 """
538 from .pack import UnpackedObject
540 for sha in shas:
541 try:
542 obj = self[sha]
543 # Convert ShaFile to UnpackedObject
544 unpacked = UnpackedObject(
545 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id
546 )
547 yield unpacked
548 except KeyError:
549 if not allow_missing:
550 raise
552 def find_missing_objects(
553 self,
554 haves: Iterable[ObjectID],
555 wants: Iterable[ObjectID],
556 shallow: Set[ObjectID] | None = None,
557 progress: Callable[..., None] | None = None,
558 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None,
559 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents,
560 ) -> Iterator[tuple[ObjectID, PackHint | None]]:
561 """Find the missing objects required for a set of revisions.
563 Args:
564 haves: Iterable over SHAs already in common.
565 wants: Iterable over SHAs of objects to fetch.
566 shallow: Set of shallow commit SHA1s to skip
567 progress: Simple progress function that will be called with
568 updated progress strings.
569 get_tagged: Function that returns a dict of pointed-to sha ->
570 tag sha for including tags.
571 get_parents: Optional function for getting the parents of a
572 commit.
573 Returns: Iterator over (sha, path) pairs.
574 """
575 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning)
576 finder = MissingObjectFinder(
577 self,
578 haves=haves,
579 wants=wants,
580 shallow=shallow,
581 progress=progress,
582 get_tagged=get_tagged,
583 get_parents=get_parents,
584 )
585 return iter(finder)
587 def find_common_revisions(self, graphwalker: GraphWalker) -> list[ObjectID]:
588 """Find which revisions this store has in common using graphwalker.
590 Args:
591 graphwalker: A graphwalker object.
592 Returns: List of SHAs that are in common
593 """
594 haves = []
595 sha = next(graphwalker)
596 while sha:
597 if sha in self:
598 haves.append(sha)
599 graphwalker.ack(sha)
600 sha = next(graphwalker)
601 return haves
603 def generate_pack_data(
604 self,
605 have: Iterable[ObjectID],
606 want: Iterable[ObjectID],
607 *,
608 shallow: Set[ObjectID] | None = None,
609 progress: Callable[..., None] | None = None,
610 ofs_delta: bool = True,
611 ) -> tuple[int, Iterator[UnpackedObject]]:
612 """Generate pack data objects for a set of wants/haves.
614 Args:
615 have: List of SHA1s of objects that should not be sent
616 want: List of SHA1s of objects that should be sent
617 shallow: Set of shallow commit SHA1s to skip
618 ofs_delta: Whether OFS deltas can be included
619 progress: Optional progress reporting method
620 """
621 # Note that the pack-specific implementation below is more efficient,
622 # as it reuses deltas
623 missing_objects = MissingObjectFinder(
624 self, haves=have, wants=want, shallow=shallow, progress=progress
625 )
626 object_ids = list(missing_objects)
627 return pack_objects_to_data(
628 [(self[oid], path) for oid, path in object_ids],
629 ofs_delta=ofs_delta,
630 progress=progress,
631 )
633 def peel_sha(self, sha: ObjectID | RawObjectID) -> ObjectID:
634 """Peel all tags from a SHA.
636 Args:
637 sha: The object SHA to peel.
638 Returns: The fully-peeled SHA1 of a tag object, after peeling all
639 intermediate tags; if the original ref does not point to a tag,
640 this will equal the original SHA1.
641 """
642 warnings.warn(
643 "Please use dulwich.object_store.peel_sha()",
644 DeprecationWarning,
645 stacklevel=2,
646 )
647 return peel_sha(self, sha)[1].id
649 def _get_depth(
650 self,
651 head: ObjectID,
652 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents,
653 max_depth: int | None = None,
654 ) -> int:
655 """Return the current available depth for the given head.
657 For commits with multiple parents, the largest possible depth will be
658 returned.
660 Args:
661 head: commit to start from
662 get_parents: optional function for getting the parents of a commit
663 max_depth: maximum depth to search
664 """
665 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth)
667 def close(self) -> None:
668 """Close any files opened by this object store."""
669 # Default implementation is a NO-OP
671 def prune(self, grace_period: int | None = None) -> None:
672 """Prune/clean up this object store.
674 This includes removing orphaned temporary files and other
675 housekeeping tasks. Default implementation is a NO-OP.
677 Args:
678 grace_period: Grace period in seconds for removing temporary files.
679 If None, uses the default grace period.
680 """
681 # Default implementation is a NO-OP
683 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
684 """Iterate over all SHA1s that start with a given prefix.
686 The default implementation is a naive iteration over all objects.
687 However, subclasses may override this method with more efficient
688 implementations.
689 """
690 for sha in self:
691 if sha.startswith(prefix):
692 yield sha
694 def get_commit_graph(self) -> "CommitGraph | None":
695 """Get the commit graph for this object store.
697 Returns:
698 CommitGraph object if available, None otherwise
699 """
700 return None
702 def write_commit_graph(
703 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True
704 ) -> None:
705 """Write a commit graph file for this object store.
707 Args:
708 refs: List of refs to include. If None, includes all refs from object store.
709 reachable: If True, includes all commits reachable from refs.
710 If False, only includes the direct ref targets.
712 Note:
713 Default implementation does nothing. Subclasses should override
714 this method to provide commit graph writing functionality.
715 """
716 raise NotImplementedError(self.write_commit_graph)
718 def get_object_mtime(self, sha: ObjectID) -> float:
719 """Get the modification time of an object.
721 Args:
722 sha: SHA1 of the object
724 Returns:
725 Modification time as seconds since epoch
727 Raises:
728 KeyError: if the object is not found
729 """
730 # Default implementation raises KeyError
731 # Subclasses should override to provide actual mtime
732 raise KeyError(sha)
735class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer):
736 """Object store that supports pack operations.
738 This is a base class for object stores that can handle pack files,
739 including both disk-based and memory-based stores.
740 """
742 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
743 """Add a new pack to this object store.
745 Returns: Tuple of (file, commit_func, abort_func)
746 """
747 raise NotImplementedError(self.add_pack)
749 def add_pack_data(
750 self,
751 count: int,
752 unpacked_objects: Iterator["UnpackedObject"],
753 progress: Callable[..., None] | None = None,
754 ) -> "Pack | None":
755 """Add pack data to this object store.
757 Args:
758 count: Number of objects
759 unpacked_objects: Iterator over unpacked objects
760 progress: Optional progress callback
761 """
762 raise NotImplementedError(self.add_pack_data)
764 def get_unpacked_object(
765 self, sha1: ObjectID | RawObjectID, *, include_comp: bool = False
766 ) -> "UnpackedObject":
767 """Get a raw unresolved object.
769 Args:
770 sha1: SHA-1 hash of the object
771 include_comp: Whether to include compressed data
773 Returns:
774 UnpackedObject instance
775 """
776 from .pack import UnpackedObject
778 obj = self[sha1]
779 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks())
781 def iterobjects_subset(
782 self, shas: Iterable[ObjectID], *, allow_missing: bool = False
783 ) -> Iterator[ShaFile]:
784 """Iterate over a subset of objects.
786 Args:
787 shas: Iterable of object SHAs to retrieve
788 allow_missing: If True, skip missing objects
790 Returns:
791 Iterator of ShaFile objects
792 """
793 for sha in shas:
794 try:
795 yield self[sha]
796 except KeyError:
797 if not allow_missing:
798 raise
801class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
802 """Object store that uses pack files for storage.
804 This class provides a base implementation for object stores that use
805 Git pack files as their primary storage mechanism. It handles caching
806 of open pack files and provides configuration for pack file operations.
807 """
809 def __init__(
810 self,
811 pack_compression_level: int = -1,
812 pack_index_version: int | None = None,
813 pack_delta_window_size: int | None = None,
814 pack_window_memory: int | None = None,
815 pack_delta_cache_size: int | None = None,
816 pack_depth: int | None = None,
817 pack_threads: int | None = None,
818 pack_big_file_threshold: int | None = None,
819 ) -> None:
820 """Initialize a PackBasedObjectStore.
822 Args:
823 pack_compression_level: Compression level for pack files (-1 to 9)
824 pack_index_version: Pack index version to use
825 pack_delta_window_size: Window size for delta compression
826 pack_window_memory: Maximum memory to use for delta window
827 pack_delta_cache_size: Cache size for delta operations
828 pack_depth: Maximum depth for pack deltas
829 pack_threads: Number of threads to use for packing
830 pack_big_file_threshold: Threshold for treating files as "big"
831 """
832 self._pack_cache: dict[str, Pack] = {}
833 self.pack_compression_level = pack_compression_level
834 self.pack_index_version = pack_index_version
835 self.pack_delta_window_size = pack_delta_window_size
836 self.pack_window_memory = pack_window_memory
837 self.pack_delta_cache_size = pack_delta_cache_size
838 self.pack_depth = pack_depth
839 self.pack_threads = pack_threads
840 self.pack_big_file_threshold = pack_big_file_threshold
842 def get_reachability_provider(
843 self,
844 prefer_bitmaps: bool = True,
845 ) -> ObjectReachabilityProvider:
846 """Get the best reachability provider for the object store.
848 Args:
849 prefer_bitmaps: Whether to use bitmaps if available
851 Returns:
852 ObjectReachabilityProvider implementation (either bitmap-accelerated
853 or graph traversal)
854 """
855 if prefer_bitmaps:
856 # Check if any packs have bitmaps
857 has_bitmap = False
858 for pack in self.packs:
859 try:
860 # Try to access bitmap property
861 if pack.bitmap is not None:
862 has_bitmap = True
863 break
864 except FileNotFoundError:
865 # Bitmap file doesn't exist for this pack
866 continue
868 if has_bitmap:
869 return BitmapReachability(self)
871 # Fall back to graph traversal
872 return GraphTraversalReachability(self)
874 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
875 """Add a new pack to this object store."""
876 raise NotImplementedError(self.add_pack)
878 def add_pack_data(
879 self,
880 count: int,
881 unpacked_objects: Iterator[UnpackedObject],
882 progress: Callable[..., None] | None = None,
883 ) -> "Pack | None":
884 """Add pack data to this object store.
886 Args:
887 count: Number of items to add
888 unpacked_objects: Iterator of UnpackedObject instances
889 progress: Optional progress callback
890 """
891 if count == 0:
892 # Don't bother writing an empty pack file
893 return None
894 f, commit, abort = self.add_pack()
895 try:
896 write_pack_data(
897 f.write,
898 unpacked_objects,
899 num_records=count,
900 progress=progress,
901 compression_level=self.pack_compression_level,
902 )
903 except BaseException:
904 abort()
905 raise
906 else:
907 return commit()
909 @property
910 def alternates(self) -> list["BaseObjectStore"]:
911 """Return list of alternate object stores."""
912 return []
914 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:
915 """Check if a particular object is present by SHA1 and is packed.
917 This does not check alternates.
918 """
919 for pack in self.packs:
920 try:
921 if sha in pack:
922 return True
923 except PackFileDisappeared:
924 pass
925 return False
927 def __contains__(self, sha: ObjectID | RawObjectID) -> bool:
928 """Check if a particular object is present by SHA1.
930 This method makes no distinction between loose and packed objects.
931 """
932 if self.contains_packed(sha) or self.contains_loose(sha):
933 return True
934 for alternate in self.alternates:
935 if sha in alternate:
936 return True
937 return False
939 def _add_cached_pack(self, base_name: str, pack: Pack) -> None:
940 """Add a newly appeared pack to the cache by path."""
941 prev_pack = self._pack_cache.get(base_name)
942 if prev_pack is not pack:
943 self._pack_cache[base_name] = pack
944 if prev_pack:
945 prev_pack.close()
947 def generate_pack_data(
948 self,
949 have: Iterable[ObjectID],
950 want: Iterable[ObjectID],
951 *,
952 shallow: Set[ObjectID] | None = None,
953 progress: Callable[..., None] | None = None,
954 ofs_delta: bool = True,
955 ) -> tuple[int, Iterator[UnpackedObject]]:
956 """Generate pack data objects for a set of wants/haves.
958 Args:
959 have: List of SHA1s of objects that should not be sent
960 want: List of SHA1s of objects that should be sent
961 shallow: Set of shallow commit SHA1s to skip
962 ofs_delta: Whether OFS deltas can be included
963 progress: Optional progress reporting method
964 """
965 missing_objects = MissingObjectFinder(
966 self, haves=have, wants=want, shallow=shallow, progress=progress
967 )
968 remote_has = missing_objects.get_remote_has()
969 object_ids = list(missing_objects)
970 return len(object_ids), generate_unpacked_objects(
971 self,
972 object_ids,
973 progress=progress,
974 ofs_delta=ofs_delta,
975 other_haves=remote_has,
976 )
978 def _clear_cached_packs(self) -> None:
979 pack_cache = self._pack_cache
980 self._pack_cache = {}
981 while pack_cache:
982 (_name, pack) = pack_cache.popitem()
983 pack.close()
985 def _iter_cached_packs(self) -> Iterator[Pack]:
986 return iter(self._pack_cache.values())
988 def _update_pack_cache(self) -> list[Pack]:
989 raise NotImplementedError(self._update_pack_cache)
991 def close(self) -> None:
992 """Close the object store and release resources.
994 This method closes all cached pack files and frees associated resources.
995 """
996 self._clear_cached_packs()
998 @property
999 def packs(self) -> list[Pack]:
1000 """List with pack objects."""
1001 return list(self._iter_cached_packs()) + list(self._update_pack_cache())
1003 def count_pack_files(self) -> int:
1004 """Count the number of pack files.
1006 Returns:
1007 Number of pack files (excluding those with .keep files)
1008 """
1009 count = 0
1010 for pack in self.packs:
1011 # Check if there's a .keep file for this pack
1012 keep_path = pack._basename + ".keep"
1013 if not os.path.exists(keep_path):
1014 count += 1
1015 return count
1017 def _iter_alternate_objects(self) -> Iterator[ObjectID]:
1018 """Iterate over the SHAs of all the objects in alternate stores."""
1019 for alternate in self.alternates:
1020 yield from alternate
1022 def _iter_loose_objects(self) -> Iterator[ObjectID]:
1023 """Iterate over the SHAs of all loose objects."""
1024 raise NotImplementedError(self._iter_loose_objects)
1026 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None:
1027 raise NotImplementedError(self._get_loose_object)
1029 def delete_loose_object(self, sha: ObjectID) -> None:
1030 """Delete a loose object.
1032 This method only handles loose objects. For packed objects,
1033 use repack(exclude=...) to exclude them during repacking.
1034 """
1035 raise NotImplementedError(self.delete_loose_object)
1037 def _remove_pack(self, pack: "Pack") -> None:
1038 raise NotImplementedError(self._remove_pack)
1040 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int:
1041 """Pack loose objects.
1043 Args:
1044 progress: Optional progress reporting callback
1046 Returns: Number of objects packed
1047 """
1048 objects: list[tuple[ShaFile, None]] = []
1049 for sha in self._iter_loose_objects():
1050 obj = self._get_loose_object(sha)
1051 if obj is not None:
1052 objects.append((obj, None))
1053 self.add_objects(objects, progress=progress)
1054 for obj, path in objects:
1055 self.delete_loose_object(obj.id)
1056 return len(objects)
1058 def repack(
1059 self,
1060 exclude: Set[bytes] | None = None,
1061 progress: Callable[[str], None] | None = None,
1062 ) -> int:
1063 """Repack the packs in this repository.
1065 Note that this implementation is fairly naive and currently keeps all
1066 objects in memory while it repacks.
1068 Args:
1069 exclude: Optional set of object SHAs to exclude from repacking
1070 progress: Optional progress reporting callback
1071 """
1072 if exclude is None:
1073 exclude = set()
1075 loose_objects = set()
1076 excluded_loose_objects = set()
1077 for sha in self._iter_loose_objects():
1078 if sha not in exclude:
1079 obj = self._get_loose_object(sha)
1080 if obj is not None:
1081 loose_objects.add(obj)
1082 else:
1083 excluded_loose_objects.add(sha)
1085 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects}
1086 old_packs = {p.name(): p for p in self.packs}
1087 for name, pack in old_packs.items():
1088 objects.update(
1089 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude
1090 )
1092 # Only create a new pack if there are objects to pack
1093 if objects:
1094 # The name of the consolidated pack might match the name of a
1095 # pre-existing pack. Take care not to remove the newly created
1096 # consolidated pack.
1097 consolidated = self.add_objects(list(objects), progress=progress)
1098 if consolidated is not None:
1099 old_packs.pop(consolidated.name(), None)
1101 # Delete loose objects that were packed
1102 for obj in loose_objects:
1103 if obj is not None:
1104 self.delete_loose_object(obj.id)
1105 # Delete excluded loose objects
1106 for sha in excluded_loose_objects:
1107 self.delete_loose_object(sha)
1108 for name, pack in old_packs.items():
1109 self._remove_pack(pack)
1110 self._update_pack_cache()
1111 return len(objects)
1113 def generate_pack_bitmaps(
1114 self,
1115 refs: dict[Ref, ObjectID],
1116 *,
1117 commit_interval: int | None = None,
1118 progress: Callable[[str], None] | None = None,
1119 ) -> int:
1120 """Generate bitmap indexes for all packs that don't have them.
1122 This generates .bitmap files for packfiles, enabling fast reachability
1123 queries. Equivalent to the bitmap generation part of 'git repack -b'.
1125 Args:
1126 refs: Dictionary of ref names to commit SHAs
1127 commit_interval: Include every Nth commit in bitmap index (None for default)
1128 progress: Optional progress reporting callback
1130 Returns:
1131 Number of bitmaps generated
1132 """
1133 count = 0
1134 for pack in self.packs:
1135 pack.ensure_bitmap(
1136 self, refs, commit_interval=commit_interval, progress=progress
1137 )
1138 count += 1
1140 # Update cache to pick up new bitmaps
1141 self._update_pack_cache()
1143 return count
1145 def __iter__(self) -> Iterator[ObjectID]:
1146 """Iterate over the SHAs that are present in this store."""
1147 self._update_pack_cache()
1148 for pack in self._iter_cached_packs():
1149 try:
1150 yield from pack
1151 except PackFileDisappeared:
1152 pass
1153 yield from self._iter_loose_objects()
1154 yield from self._iter_alternate_objects()
1156 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:
1157 """Check if a particular object is present by SHA1 and is loose.
1159 This does not check alternates.
1160 """
1161 return self._get_loose_object(sha) is not None
1163 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:
1164 """Obtain the raw fulltext for an object.
1166 Args:
1167 name: sha for the object.
1168 Returns: tuple with numeric type and object contents.
1169 """
1170 if name == ZERO_SHA:
1171 raise KeyError(name)
1172 if len(name) == 40:
1173 sha = hex_to_sha(cast(ObjectID, name))
1174 hexsha = cast(ObjectID, name)
1175 elif len(name) == 20:
1176 sha = cast(RawObjectID, name)
1177 hexsha = None
1178 else:
1179 raise AssertionError(f"Invalid object name {name!r}")
1180 for pack in self._iter_cached_packs():
1181 try:
1182 return pack.get_raw(sha)
1183 except (KeyError, PackFileDisappeared):
1184 pass
1185 if hexsha is None:
1186 hexsha = sha_to_hex(sha)
1187 ret = self._get_loose_object(hexsha)
1188 if ret is not None:
1189 return ret.type_num, ret.as_raw_string()
1190 # Maybe something else has added a pack with the object
1191 # in the mean time?
1192 for pack in self._update_pack_cache():
1193 try:
1194 return pack.get_raw(sha)
1195 except KeyError:
1196 pass
1197 for alternate in self.alternates:
1198 try:
1199 return alternate.get_raw(hexsha)
1200 except KeyError:
1201 pass
1202 raise KeyError(hexsha)
1204 def iter_unpacked_subset(
1205 self,
1206 shas: Iterable[ObjectID | RawObjectID],
1207 include_comp: bool = False,
1208 allow_missing: bool = False,
1209 convert_ofs_delta: bool = True,
1210 ) -> Iterator[UnpackedObject]:
1211 """Iterate over a subset of objects, yielding UnpackedObject instances.
1213 Args:
1214 shas: Set of object SHAs to retrieve
1215 include_comp: Whether to include compressed data
1216 allow_missing: If True, skip missing objects; if False, raise KeyError
1217 convert_ofs_delta: Whether to convert OFS_DELTA objects
1219 Returns:
1220 Iterator of UnpackedObject instances
1222 Raises:
1223 KeyError: If an object is missing and allow_missing is False
1224 """
1225 todo: set[ObjectID | RawObjectID] = set(shas)
1226 for p in self._iter_cached_packs():
1227 for unpacked in p.iter_unpacked_subset(
1228 todo,
1229 include_comp=include_comp,
1230 allow_missing=True,
1231 convert_ofs_delta=convert_ofs_delta,
1232 ):
1233 yield unpacked
1234 hexsha = sha_to_hex(unpacked.sha())
1235 todo.remove(hexsha)
1236 # Maybe something else has added a pack with the object
1237 # in the mean time?
1238 for p in self._update_pack_cache():
1239 for unpacked in p.iter_unpacked_subset(
1240 todo,
1241 include_comp=include_comp,
1242 allow_missing=True,
1243 convert_ofs_delta=convert_ofs_delta,
1244 ):
1245 yield unpacked
1246 hexsha = sha_to_hex(unpacked.sha())
1247 todo.remove(hexsha)
1248 for alternate in self.alternates:
1249 assert isinstance(alternate, PackBasedObjectStore)
1250 for unpacked in alternate.iter_unpacked_subset(
1251 todo,
1252 include_comp=include_comp,
1253 allow_missing=True,
1254 convert_ofs_delta=convert_ofs_delta,
1255 ):
1256 yield unpacked
1257 hexsha = sha_to_hex(unpacked.sha())
1258 todo.remove(hexsha)
1260 def iterobjects_subset(
1261 self, shas: Iterable[ObjectID], *, allow_missing: bool = False
1262 ) -> Iterator[ShaFile]:
1263 """Iterate over a subset of objects in the store.
1265 This method searches for objects in pack files, alternates, and loose storage.
1267 Args:
1268 shas: Iterable of object SHAs to retrieve
1269 allow_missing: If True, skip missing objects; if False, raise KeyError
1271 Returns:
1272 Iterator of ShaFile objects
1274 Raises:
1275 KeyError: If an object is missing and allow_missing is False
1276 """
1277 todo: set[ObjectID] = set(shas)
1278 for p in self._iter_cached_packs():
1279 for o in p.iterobjects_subset(todo, allow_missing=True):
1280 yield o
1281 todo.remove(o.id)
1282 # Maybe something else has added a pack with the object
1283 # in the mean time?
1284 for p in self._update_pack_cache():
1285 for o in p.iterobjects_subset(todo, allow_missing=True):
1286 yield o
1287 todo.remove(o.id)
1288 for alternate in self.alternates:
1289 for o in alternate.iterobjects_subset(todo, allow_missing=True):
1290 yield o
1291 todo.remove(o.id)
1292 for oid in todo:
1293 loose_obj: ShaFile | None = self._get_loose_object(oid)
1294 if loose_obj is not None:
1295 yield loose_obj
1296 elif not allow_missing:
1297 raise KeyError(oid)
1299 def get_unpacked_object(
1300 self, sha1: bytes, *, include_comp: bool = False
1301 ) -> UnpackedObject:
1302 """Obtain the unpacked object.
1304 Args:
1305 sha1: sha for the object.
1306 include_comp: Whether to include compression metadata.
1307 """
1308 if sha1 == ZERO_SHA:
1309 raise KeyError(sha1)
1310 if len(sha1) == 40:
1311 sha = hex_to_sha(cast(ObjectID, sha1))
1312 hexsha = cast(ObjectID, sha1)
1313 elif len(sha1) == 20:
1314 sha = cast(RawObjectID, sha1)
1315 hexsha = None
1316 else:
1317 raise AssertionError(f"Invalid object sha1 {sha1!r}")
1318 for pack in self._iter_cached_packs():
1319 try:
1320 return pack.get_unpacked_object(sha, include_comp=include_comp)
1321 except (KeyError, PackFileDisappeared):
1322 pass
1323 if hexsha is None:
1324 hexsha = sha_to_hex(sha)
1325 # Maybe something else has added a pack with the object
1326 # in the mean time?
1327 for pack in self._update_pack_cache():
1328 try:
1329 return pack.get_unpacked_object(sha, include_comp=include_comp)
1330 except KeyError:
1331 pass
1332 for alternate in self.alternates:
1333 assert isinstance(alternate, PackBasedObjectStore)
1334 try:
1335 return alternate.get_unpacked_object(hexsha, include_comp=include_comp)
1336 except KeyError:
1337 pass
1338 raise KeyError(hexsha)
1340 def add_objects(
1341 self,
1342 objects: Sequence[tuple[ShaFile, str | None]],
1343 progress: Callable[[str], None] | None = None,
1344 ) -> "Pack | None":
1345 """Add a set of objects to this object store.
1347 Args:
1348 objects: Iterable over (object, path) tuples, should support
1349 __len__.
1350 progress: Optional progress reporting function.
1351 Returns: Pack object of the objects written.
1352 """
1353 count = len(objects)
1354 record_iter = (full_unpacked_object(o) for (o, p) in objects)
1355 return self.add_pack_data(count, record_iter, progress=progress)
1358class DiskObjectStore(PackBasedObjectStore):
1359 """Git-style object store that exists on disk."""
1361 path: str | os.PathLike[str]
1362 pack_dir: str | os.PathLike[str]
1363 _alternates: "list[BaseObjectStore] | None"
1364 _commit_graph: "CommitGraph | None"
1366 def __init__(
1367 self,
1368 path: str | os.PathLike[str],
1369 *,
1370 loose_compression_level: int = -1,
1371 pack_compression_level: int = -1,
1372 pack_index_version: int | None = None,
1373 pack_delta_window_size: int | None = None,
1374 pack_window_memory: int | None = None,
1375 pack_delta_cache_size: int | None = None,
1376 pack_depth: int | None = None,
1377 pack_threads: int | None = None,
1378 pack_big_file_threshold: int | None = None,
1379 fsync_object_files: bool = False,
1380 pack_write_bitmaps: bool = False,
1381 pack_write_bitmap_hash_cache: bool = True,
1382 pack_write_bitmap_lookup_table: bool = True,
1383 file_mode: int | None = None,
1384 dir_mode: int | None = None,
1385 ) -> None:
1386 """Open an object store.
1388 Args:
1389 path: Path of the object store.
1390 loose_compression_level: zlib compression level for loose objects
1391 pack_compression_level: zlib compression level for pack objects
1392 pack_index_version: pack index version to use (1, 2, or 3)
1393 pack_delta_window_size: sliding window size for delta compression
1394 pack_window_memory: memory limit for delta window operations
1395 pack_delta_cache_size: size of cache for delta operations
1396 pack_depth: maximum delta chain depth
1397 pack_threads: number of threads for pack operations
1398 pack_big_file_threshold: threshold for treating files as big
1399 fsync_object_files: whether to fsync object files for durability
1400 pack_write_bitmaps: whether to write bitmap indexes for packs
1401 pack_write_bitmap_hash_cache: whether to include name-hash cache in bitmaps
1402 pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps
1403 file_mode: File permission mask for shared repository
1404 dir_mode: Directory permission mask for shared repository
1405 """
1406 super().__init__(
1407 pack_compression_level=pack_compression_level,
1408 pack_index_version=pack_index_version,
1409 pack_delta_window_size=pack_delta_window_size,
1410 pack_window_memory=pack_window_memory,
1411 pack_delta_cache_size=pack_delta_cache_size,
1412 pack_depth=pack_depth,
1413 pack_threads=pack_threads,
1414 pack_big_file_threshold=pack_big_file_threshold,
1415 )
1416 self.path = path
1417 self.pack_dir = os.path.join(self.path, PACKDIR)
1418 self._alternates = None
1419 self.loose_compression_level = loose_compression_level
1420 self.pack_compression_level = pack_compression_level
1421 self.pack_index_version = pack_index_version
1422 self.fsync_object_files = fsync_object_files
1423 self.pack_write_bitmaps = pack_write_bitmaps
1424 self.pack_write_bitmap_hash_cache = pack_write_bitmap_hash_cache
1425 self.pack_write_bitmap_lookup_table = pack_write_bitmap_lookup_table
1426 self.file_mode = file_mode
1427 self.dir_mode = dir_mode
1429 # Commit graph support - lazy loaded
1430 self._commit_graph = None
1431 self._use_commit_graph = True # Default to true
1433 # Multi-pack-index support - lazy loaded
1434 self._midx: MultiPackIndex | None = None
1435 self._use_midx = True # Default to true
1437 def __repr__(self) -> str:
1438 """Return string representation of DiskObjectStore.
1440 Returns:
1441 String representation including the store path
1442 """
1443 return f"<{self.__class__.__name__}({self.path!r})>"
1445 @classmethod
1446 def from_config(
1447 cls,
1448 path: str | os.PathLike[str],
1449 config: "Config",
1450 *,
1451 file_mode: int | None = None,
1452 dir_mode: int | None = None,
1453 ) -> "DiskObjectStore":
1454 """Create a DiskObjectStore from a configuration object.
1456 Args:
1457 path: Path to the object store directory
1458 config: Configuration object to read settings from
1459 file_mode: Optional file permission mask for shared repository
1460 dir_mode: Optional directory permission mask for shared repository
1462 Returns:
1463 New DiskObjectStore instance configured according to config
1464 """
1465 try:
1466 default_compression_level = int(
1467 config.get((b"core",), b"compression").decode()
1468 )
1469 except KeyError:
1470 default_compression_level = -1
1471 try:
1472 loose_compression_level = int(
1473 config.get((b"core",), b"looseCompression").decode()
1474 )
1475 except KeyError:
1476 loose_compression_level = default_compression_level
1477 try:
1478 pack_compression_level = int(
1479 config.get((b"core",), "packCompression").decode()
1480 )
1481 except KeyError:
1482 pack_compression_level = default_compression_level
1483 try:
1484 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode())
1485 except KeyError:
1486 pack_index_version = None
1488 # Read pack configuration options
1489 try:
1490 pack_delta_window_size = int(
1491 config.get((b"pack",), b"deltaWindowSize").decode()
1492 )
1493 except KeyError:
1494 pack_delta_window_size = None
1495 try:
1496 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode())
1497 except KeyError:
1498 pack_window_memory = None
1499 try:
1500 pack_delta_cache_size = int(
1501 config.get((b"pack",), b"deltaCacheSize").decode()
1502 )
1503 except KeyError:
1504 pack_delta_cache_size = None
1505 try:
1506 pack_depth = int(config.get((b"pack",), b"depth").decode())
1507 except KeyError:
1508 pack_depth = None
1509 try:
1510 pack_threads = int(config.get((b"pack",), b"threads").decode())
1511 except KeyError:
1512 pack_threads = None
1513 try:
1514 pack_big_file_threshold = int(
1515 config.get((b"pack",), b"bigFileThreshold").decode()
1516 )
1517 except KeyError:
1518 pack_big_file_threshold = None
1520 # Read core.commitGraph setting
1521 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)
1523 # Read core.multiPackIndex setting
1524 use_midx = config.get_boolean((b"core",), b"multiPackIndex", True)
1526 # Read core.fsyncObjectFiles setting
1527 fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False)
1529 # Read bitmap settings
1530 pack_write_bitmaps = config.get_boolean((b"pack",), b"writeBitmaps", False)
1531 pack_write_bitmap_hash_cache = config.get_boolean(
1532 (b"pack",), b"writeBitmapHashCache", True
1533 )
1534 pack_write_bitmap_lookup_table = config.get_boolean(
1535 (b"pack",), b"writeBitmapLookupTable", True
1536 )
1537 # Also check repack.writeBitmaps for backwards compatibility
1538 if not pack_write_bitmaps:
1539 pack_write_bitmaps = config.get_boolean(
1540 (b"repack",), b"writeBitmaps", False
1541 )
1543 instance = cls(
1544 path,
1545 loose_compression_level=loose_compression_level,
1546 pack_compression_level=pack_compression_level,
1547 pack_index_version=pack_index_version,
1548 pack_delta_window_size=pack_delta_window_size,
1549 pack_window_memory=pack_window_memory,
1550 pack_delta_cache_size=pack_delta_cache_size,
1551 pack_depth=pack_depth,
1552 pack_threads=pack_threads,
1553 pack_big_file_threshold=pack_big_file_threshold,
1554 fsync_object_files=fsync_object_files,
1555 pack_write_bitmaps=pack_write_bitmaps,
1556 pack_write_bitmap_hash_cache=pack_write_bitmap_hash_cache,
1557 pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table,
1558 file_mode=file_mode,
1559 dir_mode=dir_mode,
1560 )
1561 instance._use_commit_graph = use_commit_graph
1562 instance._use_midx = use_midx
1563 return instance
1565 @property
1566 def alternates(self) -> list["BaseObjectStore"]:
1567 """Get the list of alternate object stores.
1569 Reads from .git/objects/info/alternates if not already cached.
1571 Returns:
1572 List of DiskObjectStore instances for alternate object directories
1573 """
1574 if self._alternates is not None:
1575 return self._alternates
1576 self._alternates = []
1577 for path in self._read_alternate_paths():
1578 self._alternates.append(DiskObjectStore(path))
1579 return self._alternates
1581 def _read_alternate_paths(self) -> Iterator[str]:
1582 try:
1583 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb")
1584 except FileNotFoundError:
1585 return
1586 with f:
1587 for line in f.readlines():
1588 line = line.rstrip(b"\n")
1589 if line.startswith(b"#"):
1590 continue
1591 if os.path.isabs(line):
1592 yield os.fsdecode(line)
1593 else:
1594 yield os.fsdecode(os.path.join(os.fsencode(self.path), line))
1596 def add_alternate_path(self, path: str | os.PathLike[str]) -> None:
1597 """Add an alternate path to this object store."""
1598 info_dir = os.path.join(self.path, INFODIR)
1599 try:
1600 os.mkdir(info_dir)
1601 if self.dir_mode is not None:
1602 os.chmod(info_dir, self.dir_mode)
1603 except FileExistsError:
1604 pass
1605 alternates_path = os.path.join(self.path, INFODIR, "alternates")
1606 mask = self.file_mode if self.file_mode is not None else 0o644
1607 with GitFile(alternates_path, "wb", mask=mask) as f:
1608 try:
1609 orig_f = open(alternates_path, "rb")
1610 except FileNotFoundError:
1611 pass
1612 else:
1613 with orig_f:
1614 f.write(orig_f.read())
1615 f.write(os.fsencode(path) + b"\n")
1617 if not os.path.isabs(path):
1618 path = os.path.join(self.path, path)
1619 self.alternates.append(DiskObjectStore(path))
1621 def _update_pack_cache(self) -> list[Pack]:
1622 """Read and iterate over new pack files and cache them."""
1623 try:
1624 pack_dir_contents = os.listdir(self.pack_dir)
1625 except FileNotFoundError:
1626 self.close()
1627 return []
1628 pack_files = set()
1629 for name in pack_dir_contents:
1630 if name.startswith("pack-") and name.endswith(".pack"):
1631 # verify that idx exists first (otherwise the pack was not yet
1632 # fully written)
1633 idx_name = os.path.splitext(name)[0] + ".idx"
1634 if idx_name in pack_dir_contents:
1635 pack_name = name[: -len(".pack")]
1636 pack_files.add(pack_name)
1638 # Open newly appeared pack files
1639 new_packs = []
1640 for f in pack_files:
1641 if f not in self._pack_cache:
1642 pack = Pack(
1643 os.path.join(self.pack_dir, f),
1644 delta_window_size=self.pack_delta_window_size,
1645 window_memory=self.pack_window_memory,
1646 delta_cache_size=self.pack_delta_cache_size,
1647 depth=self.pack_depth,
1648 threads=self.pack_threads,
1649 big_file_threshold=self.pack_big_file_threshold,
1650 )
1651 new_packs.append(pack)
1652 self._pack_cache[f] = pack
1653 # Remove disappeared pack files
1654 for f in set(self._pack_cache) - pack_files:
1655 self._pack_cache.pop(f).close()
1656 return new_packs
1658 def _get_shafile_path(self, sha: ObjectID | RawObjectID) -> str:
1659 # Check from object dir
1660 return hex_to_filename(os.fspath(self.path), sha)
1662 def _iter_loose_objects(self) -> Iterator[ObjectID]:
1663 for base in os.listdir(self.path):
1664 if len(base) != 2:
1665 continue
1666 for rest in os.listdir(os.path.join(self.path, base)):
1667 sha = os.fsencode(base + rest)
1668 if not valid_hexsha(sha):
1669 continue
1670 yield ObjectID(sha)
1672 def count_loose_objects(self) -> int:
1673 """Count the number of loose objects in the object store.
1675 Returns:
1676 Number of loose objects
1677 """
1678 count = 0
1679 if not os.path.exists(self.path):
1680 return 0
1682 for i in range(256):
1683 subdir = os.path.join(self.path, f"{i:02x}")
1684 try:
1685 count += len(
1686 [
1687 name
1688 for name in os.listdir(subdir)
1689 if len(name) == 38 # 40 - 2 for the prefix
1690 ]
1691 )
1692 except FileNotFoundError:
1693 # Directory may have been removed or is inaccessible
1694 continue
1696 return count
1698 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None:
1699 path = self._get_shafile_path(sha)
1700 try:
1701 return ShaFile.from_path(path)
1702 except FileNotFoundError:
1703 return None
1705 def delete_loose_object(self, sha: ObjectID) -> None:
1706 """Delete a loose object from disk.
1708 Args:
1709 sha: SHA1 of the object to delete
1711 Raises:
1712 FileNotFoundError: If the object file doesn't exist
1713 """
1714 os.remove(self._get_shafile_path(sha))
1716 def get_object_mtime(self, sha: ObjectID) -> float:
1717 """Get the modification time of an object.
1719 Args:
1720 sha: SHA1 of the object
1722 Returns:
1723 Modification time as seconds since epoch
1725 Raises:
1726 KeyError: if the object is not found
1727 """
1728 # First check if it's a loose object
1729 if self.contains_loose(sha):
1730 path = self._get_shafile_path(sha)
1731 try:
1732 return os.path.getmtime(path)
1733 except FileNotFoundError:
1734 pass
1736 # Check if it's in a pack file
1737 for pack in self.packs:
1738 try:
1739 if sha in pack:
1740 # Use the pack file's mtime for packed objects
1741 pack_path = pack._data_path
1742 try:
1743 return os.path.getmtime(pack_path)
1744 except (FileNotFoundError, AttributeError):
1745 pass
1746 except PackFileDisappeared:
1747 pass
1749 raise KeyError(sha)
1751 def _remove_pack(self, pack: Pack) -> None:
1752 try:
1753 del self._pack_cache[os.path.basename(pack._basename)]
1754 except KeyError:
1755 pass
1756 pack.close()
1757 os.remove(pack.data.path)
1758 if hasattr(pack.index, "path"):
1759 os.remove(pack.index.path)
1761 def _get_pack_basepath(
1762 self, entries: Iterable[tuple[bytes, int, int | None]]
1763 ) -> str:
1764 suffix_bytes = iter_sha1(entry[0] for entry in entries)
1765 # TODO: Handle self.pack_dir being bytes
1766 suffix = suffix_bytes.decode("ascii")
1767 return os.path.join(self.pack_dir, "pack-" + suffix)
1769 def _complete_pack(
1770 self,
1771 f: BinaryIO,
1772 path: str,
1773 num_objects: int,
1774 indexer: PackIndexer,
1775 progress: Callable[..., None] | None = None,
1776 refs: dict[Ref, ObjectID] | None = None,
1777 ) -> Pack:
1778 """Move a specific file containing a pack into the pack directory.
1780 Note: The file should be on the same file system as the
1781 packs directory.
1783 Args:
1784 f: Open file object for the pack.
1785 path: Path to the pack file.
1786 num_objects: Number of objects in the pack.
1787 indexer: A PackIndexer for indexing the pack.
1788 progress: Optional progress reporting function.
1789 refs: Optional dictionary of refs for bitmap generation.
1790 """
1791 entries = []
1792 for i, entry in enumerate(indexer):
1793 if progress is not None:
1794 progress(f"generating index: {i}/{num_objects}\r".encode("ascii"))
1795 entries.append(entry)
1797 pack_sha, extra_entries = extend_pack(
1798 f,
1799 set(indexer.ext_refs()),
1800 get_raw=self.get_raw,
1801 compression_level=self.pack_compression_level,
1802 progress=progress,
1803 )
1804 f.flush()
1805 if self.fsync_object_files:
1806 try:
1807 fileno = f.fileno()
1808 except AttributeError as e:
1809 raise OSError("fsync requested but file has no fileno()") from e
1810 else:
1811 os.fsync(fileno)
1812 f.close()
1814 entries.extend(extra_entries)
1816 # Move the pack in.
1817 entries.sort()
1818 pack_base_name = self._get_pack_basepath(entries)
1820 for pack in self.packs:
1821 if pack._basename == pack_base_name:
1822 return pack
1824 target_pack_path = pack_base_name + ".pack"
1825 target_index_path = pack_base_name + ".idx"
1826 if sys.platform == "win32":
1827 # Windows might have the target pack file lingering. Attempt
1828 # removal, silently passing if the target does not exist.
1829 with suppress(FileNotFoundError):
1830 os.remove(target_pack_path)
1831 os.rename(path, target_pack_path)
1833 # Write the index.
1834 mask = self.file_mode if self.file_mode is not None else PACK_MODE
1835 with GitFile(
1836 target_index_path,
1837 "wb",
1838 mask=mask,
1839 fsync=self.fsync_object_files,
1840 ) as index_file:
1841 write_pack_index(
1842 index_file, entries, pack_sha, version=self.pack_index_version
1843 )
1845 # Generate bitmap if configured and refs are available
1846 if self.pack_write_bitmaps and refs:
1847 from .bitmap import generate_bitmap, write_bitmap
1848 from .pack import load_pack_index_file
1850 if progress:
1851 progress("Generating bitmap index\r".encode("ascii"))
1853 # Load the index we just wrote
1854 with open(target_index_path, "rb") as idx_file:
1855 pack_index = load_pack_index_file(
1856 os.path.basename(target_index_path), idx_file
1857 )
1859 # Generate the bitmap
1860 bitmap = generate_bitmap(
1861 pack_index=pack_index,
1862 object_store=self,
1863 refs=refs,
1864 pack_checksum=pack_sha,
1865 include_hash_cache=self.pack_write_bitmap_hash_cache,
1866 include_lookup_table=self.pack_write_bitmap_lookup_table,
1867 progress=lambda msg: progress(msg.encode("ascii"))
1868 if progress and isinstance(msg, str)
1869 else None,
1870 )
1872 # Write the bitmap
1873 target_bitmap_path = pack_base_name + ".bitmap"
1874 write_bitmap(target_bitmap_path, bitmap)
1876 if progress:
1877 progress("Bitmap index written\r".encode("ascii"))
1879 # Add the pack to the store and return it.
1880 final_pack = Pack(
1881 pack_base_name,
1882 delta_window_size=self.pack_delta_window_size,
1883 window_memory=self.pack_window_memory,
1884 delta_cache_size=self.pack_delta_cache_size,
1885 depth=self.pack_depth,
1886 threads=self.pack_threads,
1887 big_file_threshold=self.pack_big_file_threshold,
1888 )
1889 final_pack.check_length_and_checksum()
1890 self._add_cached_pack(pack_base_name, final_pack)
1891 return final_pack
1893 def add_thin_pack(
1894 self,
1895 read_all: Callable[[int], bytes],
1896 read_some: Callable[[int], bytes] | None,
1897 progress: Callable[..., None] | None = None,
1898 ) -> "Pack":
1899 """Add a new thin pack to this object store.
1901 Thin packs are packs that contain deltas with parents that exist
1902 outside the pack. They should never be placed in the object store
1903 directly, and always indexed and completed as they are copied.
1905 Args:
1906 read_all: Read function that blocks until the number of
1907 requested bytes are read.
1908 read_some: Read function that returns at least one byte, but may
1909 not return the number of bytes requested.
1910 progress: Optional progress reporting function.
1911 Returns: A Pack object pointing at the now-completed thin pack in the
1912 objects/pack directory.
1913 """
1914 import tempfile
1916 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_")
1917 with os.fdopen(fd, "w+b") as f:
1918 os.chmod(path, PACK_MODE)
1919 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) # type: ignore[arg-type]
1920 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) # type: ignore[arg-type]
1921 copier.verify(progress=progress)
1922 return self._complete_pack(f, path, len(copier), indexer, progress=progress)
1924 def add_pack(
1925 self,
1926 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
1927 """Add a new pack to this object store.
1929 Returns: Fileobject to write to, a commit function to
1930 call when the pack is finished and an abort
1931 function.
1932 """
1933 import tempfile
1935 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
1936 f = os.fdopen(fd, "w+b")
1937 mask = self.file_mode if self.file_mode is not None else PACK_MODE
1938 os.chmod(path, mask)
1940 def commit() -> "Pack | None":
1941 if f.tell() > 0:
1942 f.seek(0)
1944 with PackData(path, f) as pd:
1945 indexer = PackIndexer.for_pack_data(
1946 pd,
1947 resolve_ext_ref=self.get_raw, # type: ignore[arg-type]
1948 )
1949 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type]
1950 else:
1951 f.close()
1952 os.remove(path)
1953 return None
1955 def abort() -> None:
1956 f.close()
1957 os.remove(path)
1959 return f, commit, abort # type: ignore[return-value]
1961 def add_object(self, obj: ShaFile) -> None:
1962 """Add a single object to this object store.
1964 Args:
1965 obj: Object to add
1966 """
1967 path = self._get_shafile_path(obj.id)
1968 dir = os.path.dirname(path)
1969 try:
1970 os.mkdir(dir)
1971 if self.dir_mode is not None:
1972 os.chmod(dir, self.dir_mode)
1973 except FileExistsError:
1974 pass
1975 if os.path.exists(path):
1976 return # Already there, no need to write again
1977 mask = self.file_mode if self.file_mode is not None else PACK_MODE
1978 with GitFile(path, "wb", mask=mask, fsync=self.fsync_object_files) as f:
1979 f.write(
1980 obj.as_legacy_object(compression_level=self.loose_compression_level)
1981 )
1983 @classmethod
1984 def init(
1985 cls,
1986 path: str | os.PathLike[str],
1987 *,
1988 file_mode: int | None = None,
1989 dir_mode: int | None = None,
1990 ) -> "DiskObjectStore":
1991 """Initialize a new disk object store.
1993 Creates the necessary directory structure for a Git object store.
1995 Args:
1996 path: Path where the object store should be created
1997 file_mode: Optional file permission mask for shared repository
1998 dir_mode: Optional directory permission mask for shared repository
2000 Returns:
2001 New DiskObjectStore instance
2002 """
2003 try:
2004 os.mkdir(path)
2005 if dir_mode is not None:
2006 os.chmod(path, dir_mode)
2007 except FileExistsError:
2008 pass
2009 info_path = os.path.join(path, "info")
2010 pack_path = os.path.join(path, PACKDIR)
2011 os.mkdir(info_path)
2012 os.mkdir(pack_path)
2013 if dir_mode is not None:
2014 os.chmod(info_path, dir_mode)
2015 os.chmod(pack_path, dir_mode)
2016 return cls(path, file_mode=file_mode, dir_mode=dir_mode)
2018 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
2019 """Iterate over all object SHAs with the given prefix.
2021 Args:
2022 prefix: Hex prefix to search for (as bytes)
2024 Returns:
2025 Iterator of object SHAs (as ObjectID) matching the prefix
2026 """
2027 if len(prefix) < 2:
2028 yield from super().iter_prefix(prefix)
2029 return
2030 seen = set()
2031 dir = prefix[:2].decode()
2032 rest = prefix[2:].decode()
2033 try:
2034 for name in os.listdir(os.path.join(self.path, dir)):
2035 if name.startswith(rest):
2036 sha = ObjectID(os.fsencode(dir + name))
2037 if sha not in seen:
2038 seen.add(sha)
2039 yield sha
2040 except FileNotFoundError:
2041 pass
2043 for p in self.packs:
2044 bin_prefix = (
2045 binascii.unhexlify(prefix)
2046 if len(prefix) % 2 == 0
2047 else binascii.unhexlify(prefix[:-1])
2048 )
2049 for bin_sha in p.index.iter_prefix(bin_prefix):
2050 sha = sha_to_hex(bin_sha)
2051 if sha.startswith(prefix) and sha not in seen:
2052 seen.add(sha)
2053 yield sha
2054 for alternate in self.alternates:
2055 for sha in alternate.iter_prefix(prefix):
2056 if sha not in seen:
2057 seen.add(sha)
2058 yield sha
2060 def get_commit_graph(self) -> "CommitGraph | None":
2061 """Get the commit graph for this object store.
2063 Returns:
2064 CommitGraph object if available, None otherwise
2065 """
2066 if not self._use_commit_graph:
2067 return None
2069 if self._commit_graph is None:
2070 from .commit_graph import read_commit_graph
2072 # Look for commit graph in our objects directory
2073 graph_file = os.path.join(self.path, "info", "commit-graph")
2074 if os.path.exists(graph_file):
2075 self._commit_graph = read_commit_graph(graph_file)
2076 return self._commit_graph
2078 def get_midx(self) -> MultiPackIndex | None:
2079 """Get the multi-pack-index for this object store.
2081 Returns:
2082 MultiPackIndex object if available, None otherwise
2084 Raises:
2085 ValueError: If MIDX file is corrupt
2086 OSError: If MIDX file cannot be read
2087 """
2088 if not self._use_midx:
2089 return None
2091 if self._midx is None:
2092 # Look for MIDX in pack directory
2093 midx_file = os.path.join(self.pack_dir, "multi-pack-index")
2094 if os.path.exists(midx_file):
2095 self._midx = load_midx(midx_file)
2096 return self._midx
2098 def _get_pack_by_name(self, pack_name: str) -> Pack:
2099 """Get a pack by its base name.
2101 Args:
2102 pack_name: Base name of the pack (e.g., 'pack-abc123.pack' or 'pack-abc123.idx')
2104 Returns:
2105 Pack object
2107 Raises:
2108 KeyError: If pack doesn't exist
2109 """
2110 # Remove .pack or .idx extension if present
2111 if pack_name.endswith(".pack"):
2112 base_name = pack_name[:-5]
2113 elif pack_name.endswith(".idx"):
2114 base_name = pack_name[:-4]
2115 else:
2116 base_name = pack_name
2118 # Check if already in cache
2119 if base_name in self._pack_cache:
2120 return self._pack_cache[base_name]
2122 # Load the pack
2123 pack_path = os.path.join(self.pack_dir, base_name)
2124 if not os.path.exists(pack_path + ".pack"):
2125 raise KeyError(f"Pack {pack_name} not found")
2127 pack = Pack(
2128 pack_path,
2129 delta_window_size=self.pack_delta_window_size,
2130 window_memory=self.pack_window_memory,
2131 delta_cache_size=self.pack_delta_cache_size,
2132 depth=self.pack_depth,
2133 threads=self.pack_threads,
2134 big_file_threshold=self.pack_big_file_threshold,
2135 )
2136 self._pack_cache[base_name] = pack
2137 return pack
2139 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:
2140 """Check if a particular object is present by SHA1 and is packed.
2142 This checks the MIDX first if available, then falls back to checking
2143 individual pack indexes.
2145 Args:
2146 sha: Binary SHA of the object
2148 Returns:
2149 True if the object is in a pack file
2150 """
2151 # Check MIDX first for faster lookup
2152 midx = self.get_midx()
2153 if midx is not None and sha in midx:
2154 return True
2156 # Fall back to checking individual packs
2157 return super().contains_packed(sha)
2159 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:
2160 """Obtain the raw fulltext for an object.
2162 This uses the MIDX if available for faster lookups.
2164 Args:
2165 name: SHA for the object (20 bytes binary or 40 bytes hex)
2167 Returns:
2168 Tuple with numeric type and object contents
2170 Raises:
2171 KeyError: If object not found
2172 """
2173 if name == ZERO_SHA:
2174 raise KeyError(name)
2176 sha: RawObjectID
2177 if len(name) == 40:
2178 # name is ObjectID (hex), convert to RawObjectID
2179 sha = hex_to_sha(cast(ObjectID, name))
2180 elif len(name) == 20:
2181 # name is already RawObjectID (binary)
2182 sha = RawObjectID(name)
2183 else:
2184 raise AssertionError(f"Invalid object name {name!r}")
2186 # Try MIDX first for faster lookup
2187 midx = self.get_midx()
2188 if midx is not None:
2189 result = midx.object_offset(sha)
2190 if result is not None:
2191 pack_name, _offset = result
2192 try:
2193 pack = self._get_pack_by_name(pack_name)
2194 return pack.get_raw(sha)
2195 except (KeyError, PackFileDisappeared):
2196 # Pack disappeared or object not found, fall through to standard lookup
2197 pass
2199 # Fall back to the standard implementation
2200 return super().get_raw(name)
2202 def write_midx(self) -> bytes:
2203 """Write a multi-pack-index file for this object store.
2205 Creates a MIDX file that indexes all pack files in the pack directory.
2207 Returns:
2208 SHA-1 checksum of the written MIDX file
2210 Raises:
2211 OSError: If the pack directory doesn't exist or MIDX can't be written
2212 """
2213 from .midx import write_midx_file
2215 # Get all pack files
2216 packs = self.packs
2217 if not packs:
2218 # No packs to index
2219 return b"\x00" * 20
2221 # Collect entries from all packs
2222 pack_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]] = []
2224 for pack in packs:
2225 # Git stores .idx extension in MIDX, not .pack
2226 pack_name = os.path.basename(pack._basename) + ".idx"
2227 entries = list(pack.index.iterentries())
2228 pack_entries.append((pack_name, entries))
2230 # Write MIDX file
2231 midx_path = os.path.join(self.pack_dir, "multi-pack-index")
2232 return write_midx_file(midx_path, pack_entries)
2234 def write_commit_graph(
2235 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True
2236 ) -> None:
2237 """Write a commit graph file for this object store.
2239 Args:
2240 refs: List of refs to include. If None, includes all refs from object store.
2241 reachable: If True, includes all commits reachable from refs.
2242 If False, only includes the direct ref targets.
2243 """
2244 from .commit_graph import get_reachable_commits
2246 if refs is None:
2247 # Get all commit objects from the object store
2248 all_refs = []
2249 # Iterate through all objects to find commits
2250 for sha in self:
2251 try:
2252 obj = self[sha]
2253 if obj.type_name == b"commit":
2254 all_refs.append(sha)
2255 except KeyError:
2256 continue
2257 else:
2258 # Use provided refs
2259 all_refs = list(refs)
2261 if not all_refs:
2262 return # No commits to include
2264 if reachable:
2265 # Get all reachable commits
2266 commit_ids = get_reachable_commits(self, all_refs)
2267 else:
2268 # Just use the direct ref targets (already ObjectIDs)
2269 commit_ids = all_refs
2271 if commit_ids:
2272 # Write commit graph directly to our object store path
2273 # Generate the commit graph
2274 from .commit_graph import generate_commit_graph
2276 graph = generate_commit_graph(self, commit_ids)
2278 if graph.entries:
2279 # Ensure the info directory exists
2280 info_dir = os.path.join(self.path, "info")
2281 os.makedirs(info_dir, exist_ok=True)
2282 if self.dir_mode is not None:
2283 os.chmod(info_dir, self.dir_mode)
2285 # Write using GitFile for atomic operation
2286 graph_path = os.path.join(info_dir, "commit-graph")
2287 mask = self.file_mode if self.file_mode is not None else 0o644
2288 with GitFile(graph_path, "wb", mask=mask) as f:
2289 assert isinstance(
2290 f, _GitFile
2291 ) # GitFile in write mode always returns _GitFile
2292 graph.write_to_file(f)
2294 # Clear cached commit graph so it gets reloaded
2295 self._commit_graph = None
2297 def prune(self, grace_period: int | None = None) -> None:
2298 """Prune/clean up this object store.
2300 This removes temporary files that were left behind by interrupted
2301 pack operations. These are files that start with ``tmp_pack_`` in the
2302 repository directory or files with .pack extension but no corresponding
2303 .idx file in the pack directory.
2305 Args:
2306 grace_period: Grace period in seconds for removing temporary files.
2307 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD.
2308 """
2309 import glob
2311 if grace_period is None:
2312 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD
2314 # Clean up tmp_pack_* files in the repository directory
2315 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")):
2316 # Check if file is old enough (more than grace period)
2317 mtime = os.path.getmtime(tmp_file)
2318 if time.time() - mtime > grace_period:
2319 os.remove(tmp_file)
2321 # Clean up orphaned .pack files without corresponding .idx files
2322 try:
2323 pack_dir_contents = os.listdir(self.pack_dir)
2324 except FileNotFoundError:
2325 return
2327 pack_files = {}
2328 idx_files = set()
2330 for name in pack_dir_contents:
2331 if name.endswith(".pack"):
2332 base_name = name[:-5] # Remove .pack extension
2333 pack_files[base_name] = name
2334 elif name.endswith(".idx"):
2335 base_name = name[:-4] # Remove .idx extension
2336 idx_files.add(base_name)
2338 # Remove .pack files without corresponding .idx files
2339 for base_name, pack_name in pack_files.items():
2340 if base_name not in idx_files:
2341 pack_path = os.path.join(self.pack_dir, pack_name)
2342 # Check if file is old enough (more than grace period)
2343 mtime = os.path.getmtime(pack_path)
2344 if time.time() - mtime > grace_period:
2345 os.remove(pack_path)
2347 def close(self) -> None:
2348 """Close the object store and release resources.
2350 This method closes all cached pack files, MIDX, and frees associated resources.
2351 """
2352 # Close MIDX if it's loaded
2353 if self._midx is not None:
2354 self._midx.close()
2355 self._midx = None
2357 # Close alternates
2358 if self._alternates is not None:
2359 for alt in self._alternates:
2360 alt.close()
2361 self._alternates = None
2363 # Call parent class close to handle pack files
2364 super().close()
2367class MemoryObjectStore(PackCapableObjectStore):
2368 """Object store that keeps all objects in memory."""
2370 def __init__(self) -> None:
2371 """Initialize a MemoryObjectStore.
2373 Creates an empty in-memory object store.
2374 """
2375 super().__init__()
2376 self._data: dict[ObjectID, ShaFile] = {}
2377 self.pack_compression_level = -1
2379 def _to_hexsha(self, sha: ObjectID | RawObjectID) -> ObjectID:
2380 if len(sha) == 40:
2381 return cast(ObjectID, sha)
2382 elif len(sha) == 20:
2383 return sha_to_hex(cast(RawObjectID, sha))
2384 else:
2385 raise ValueError(f"Invalid sha {sha!r}")
2387 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:
2388 """Check if a particular object is present by SHA1 and is loose."""
2389 return self._to_hexsha(sha) in self._data
2391 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:
2392 """Check if a particular object is present by SHA1 and is packed."""
2393 return False
2395 def __iter__(self) -> Iterator[ObjectID]:
2396 """Iterate over the SHAs that are present in this store."""
2397 return iter(self._data.keys())
2399 @property
2400 def packs(self) -> list[Pack]:
2401 """List with pack objects."""
2402 return []
2404 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:
2405 """Obtain the raw text for an object.
2407 Args:
2408 name: sha for the object.
2409 Returns: tuple with numeric type and object contents.
2410 """
2411 obj = self[self._to_hexsha(name)]
2412 return obj.type_num, obj.as_raw_string()
2414 def __getitem__(self, name: ObjectID | RawObjectID) -> ShaFile:
2415 """Retrieve an object by SHA.
2417 Args:
2418 name: SHA of the object (as hex string or bytes)
2420 Returns:
2421 Copy of the ShaFile object
2423 Raises:
2424 KeyError: If the object is not found
2425 """
2426 return self._data[self._to_hexsha(name)].copy()
2428 def __delitem__(self, name: ObjectID) -> None:
2429 """Delete an object from this store, for testing only."""
2430 del self._data[self._to_hexsha(name)]
2432 def add_object(self, obj: ShaFile) -> None:
2433 """Add a single object to this object store."""
2434 self._data[obj.id] = obj.copy()
2436 def add_objects(
2437 self,
2438 objects: Iterable[tuple[ShaFile, str | None]],
2439 progress: Callable[[str], None] | None = None,
2440 ) -> None:
2441 """Add a set of objects to this object store.
2443 Args:
2444 objects: Iterable over a list of (object, path) tuples
2445 progress: Optional progress reporting function.
2446 """
2447 for obj, path in objects:
2448 self.add_object(obj)
2450 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
2451 """Add a new pack to this object store.
2453 Because this object store doesn't support packs, we extract and add the
2454 individual objects.
2456 Returns: Fileobject to write to and a commit function to
2457 call when the pack is finished.
2458 """
2459 from tempfile import SpooledTemporaryFile
2461 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-")
2463 def commit() -> None:
2464 size = f.tell()
2465 if size > 0:
2466 f.seek(0)
2468 p = PackData.from_file(f, size)
2469 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type]
2470 self.add_object(obj)
2471 p.close()
2472 f.close()
2473 else:
2474 f.close()
2476 def abort() -> None:
2477 f.close()
2479 return f, commit, abort # type: ignore[return-value]
2481 def add_pack_data(
2482 self,
2483 count: int,
2484 unpacked_objects: Iterator[UnpackedObject],
2485 progress: Callable[[str], None] | None = None,
2486 ) -> None:
2487 """Add pack data to this object store.
2489 Args:
2490 count: Number of items to add
2491 unpacked_objects: Iterator of UnpackedObject instances
2492 progress: Optional progress reporting function.
2493 """
2494 if count == 0:
2495 return
2497 # Since MemoryObjectStore doesn't support pack files, we need to
2498 # extract individual objects. To handle deltas properly, we write
2499 # to a temporary pack and then use PackInflater to resolve them.
2500 f, commit, abort = self.add_pack()
2501 try:
2502 write_pack_data(
2503 f.write,
2504 unpacked_objects,
2505 num_records=count,
2506 progress=progress,
2507 )
2508 except BaseException:
2509 abort()
2510 raise
2511 else:
2512 commit()
2514 def add_thin_pack(
2515 self,
2516 read_all: Callable[[], bytes],
2517 read_some: Callable[[int], bytes],
2518 progress: Callable[[str], None] | None = None,
2519 ) -> None:
2520 """Add a new thin pack to this object store.
2522 Thin packs are packs that contain deltas with parents that exist
2523 outside the pack. Because this object store doesn't support packs, we
2524 extract and add the individual objects.
2526 Args:
2527 read_all: Read function that blocks until the number of
2528 requested bytes are read.
2529 read_some: Read function that returns at least one byte, but may
2530 not return the number of bytes requested.
2531 progress: Optional progress reporting function.
2532 """
2533 f, commit, abort = self.add_pack()
2534 try:
2535 copier = PackStreamCopier(read_all, read_some, f) # type: ignore[arg-type]
2536 copier.verify()
2537 except BaseException:
2538 abort()
2539 raise
2540 else:
2541 commit()
2544class ObjectIterator(Protocol):
2545 """Interface for iterating over objects."""
2547 def iterobjects(self) -> Iterator[ShaFile]:
2548 """Iterate over all objects.
2550 Returns:
2551 Iterator of ShaFile objects
2552 """
2553 raise NotImplementedError(self.iterobjects)
2556def tree_lookup_path(
2557 lookup_obj: Callable[[ObjectID | RawObjectID], ShaFile],
2558 root_sha: ObjectID | RawObjectID,
2559 path: bytes,
2560) -> tuple[int, ObjectID]:
2561 """Look up an object in a Git tree.
2563 Args:
2564 lookup_obj: Callback for retrieving object by SHA1
2565 root_sha: SHA1 of the root tree
2566 path: Path to lookup
2567 Returns: A tuple of (mode, SHA) of the resulting path.
2568 """
2569 tree = lookup_obj(root_sha)
2570 if not isinstance(tree, Tree):
2571 raise NotTreeError(root_sha)
2572 return tree.lookup_path(lookup_obj, path)
2575def _collect_filetree_revs(
2576 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID]
2577) -> None:
2578 """Collect SHA1s of files and directories for specified tree.
2580 Args:
2581 obj_store: Object store to get objects by SHA from
2582 tree_sha: tree reference to walk
2583 kset: set to fill with references to files and directories
2584 """
2585 filetree = obj_store[tree_sha]
2586 assert isinstance(filetree, Tree)
2587 for name, mode, sha in filetree.iteritems():
2588 assert mode is not None
2589 assert sha is not None
2590 if not S_ISGITLINK(mode) and sha not in kset:
2591 kset.add(sha)
2592 if stat.S_ISDIR(mode):
2593 _collect_filetree_revs(obj_store, sha, kset)
2596def _split_commits_and_tags(
2597 obj_store: ObjectContainer,
2598 lst: Iterable[ObjectID],
2599 *,
2600 unknown: str = "error",
2601) -> tuple[set[ObjectID], set[ObjectID], set[ObjectID]]:
2602 """Split object id list into three lists with commit, tag, and other SHAs.
2604 Commits referenced by tags are included into commits
2605 list as well. Only SHA1s known in this repository will get
2606 through, controlled by the unknown parameter.
2608 Args:
2609 obj_store: Object store to get objects by SHA1 from
2610 lst: Collection of commit and tag SHAs
2611 unknown: How to handle unknown objects: "error", "warn", or "ignore"
2612 Returns: A tuple of (commits, tags, others) SHA1s
2613 """
2614 import logging
2616 if unknown not in ("error", "warn", "ignore"):
2617 raise ValueError(
2618 f"unknown must be 'error', 'warn', or 'ignore', got {unknown!r}"
2619 )
2621 commits: set[ObjectID] = set()
2622 tags: set[ObjectID] = set()
2623 others: set[ObjectID] = set()
2624 for e in lst:
2625 try:
2626 o = obj_store[e]
2627 except KeyError:
2628 if unknown == "error":
2629 raise
2630 elif unknown == "warn":
2631 logging.warning(
2632 "Object %s not found in object store", e.decode("ascii")
2633 )
2634 # else: ignore
2635 else:
2636 if isinstance(o, Commit):
2637 commits.add(e)
2638 elif isinstance(o, Tag):
2639 tags.add(e)
2640 tagged = o.object[1]
2641 c, t, os = _split_commits_and_tags(obj_store, [tagged], unknown=unknown)
2642 commits |= c
2643 tags |= t
2644 others |= os
2645 else:
2646 others.add(e)
2647 return (commits, tags, others)
2650class MissingObjectFinder:
2651 """Find the objects missing from another object store.
2653 Args:
2654 object_store: Object store containing at least all objects to be
2655 sent
2656 haves: SHA1s of commits not to send (already present in target)
2657 wants: SHA1s of commits to send
2658 progress: Optional function to report progress to.
2659 get_tagged: Function that returns a dict of pointed-to sha -> tag
2660 sha for including tags.
2661 get_parents: Optional function for getting the parents of a commit.
2662 """
2664 def __init__(
2665 self,
2666 object_store: BaseObjectStore,
2667 haves: Iterable[ObjectID],
2668 wants: Iterable[ObjectID],
2669 *,
2670 shallow: Set[ObjectID] | None = None,
2671 progress: Callable[[bytes], None] | None = None,
2672 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None,
2673 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents,
2674 ) -> None:
2675 """Initialize a MissingObjectFinder.
2677 Args:
2678 object_store: Object store containing objects
2679 haves: SHA1s of objects already present in target
2680 wants: SHA1s of objects to send
2681 shallow: Set of shallow commit SHA1s
2682 progress: Optional progress reporting callback
2683 get_tagged: Function returning dict of pointed-to sha -> tag sha
2684 get_parents: Function for getting commit parents
2685 """
2686 self.object_store = object_store
2687 if shallow is None:
2688 shallow = set()
2689 self._get_parents = get_parents
2690 reachability = object_store.get_reachability_provider()
2691 # process Commits and Tags differently
2692 # haves may list commits/tags not available locally (silently ignore them).
2693 # wants should only contain valid SHAs (fail fast if not).
2694 have_commits, have_tags, have_others = _split_commits_and_tags(
2695 object_store, haves, unknown="ignore"
2696 )
2697 want_commits, want_tags, want_others = _split_commits_and_tags(
2698 object_store, wants, unknown="error"
2699 )
2700 # all_ancestors is a set of commits that shall not be sent
2701 # (complete repository up to 'haves')
2702 all_ancestors = reachability.get_reachable_commits(
2703 have_commits, exclude=None, shallow=shallow
2704 )
2705 # all_missing - complete set of commits between haves and wants
2706 # common_commits - boundary commits directly encountered when traversing wants
2707 # We use _collect_ancestors here because we need the exact boundary behavior:
2708 # commits that are in all_ancestors and directly reachable from wants,
2709 # but we don't traverse past them. This is hard to express with the
2710 # reachability abstraction alone.
2711 missing_commits, common_commits = _collect_ancestors(
2712 object_store,
2713 want_commits,
2714 frozenset(all_ancestors),
2715 shallow=frozenset(shallow),
2716 get_parents=self._get_parents,
2717 )
2719 self.remote_has: set[ObjectID] = set()
2720 # Now, fill sha_done with commits and revisions of
2721 # files and directories known to be both locally
2722 # and on target. Thus these commits and files
2723 # won't get selected for fetch
2724 for h in common_commits:
2725 self.remote_has.add(h)
2726 cmt = object_store[h]
2727 assert isinstance(cmt, Commit)
2728 # Get tree objects for this commit
2729 tree_objects = reachability.get_tree_objects([cmt.tree])
2730 self.remote_has.update(tree_objects)
2732 # record tags we have as visited, too
2733 for t in have_tags:
2734 self.remote_has.add(t)
2735 self.sha_done = set(self.remote_has)
2737 # in fact, what we 'want' is commits, tags, and others
2738 # we've found missing
2739 self.objects_to_send: set[tuple[ObjectID, bytes | None, int | None, bool]] = {
2740 (w, None, Commit.type_num, False) for w in missing_commits
2741 }
2742 missing_tags = want_tags.difference(have_tags)
2743 self.objects_to_send.update(
2744 {(w, None, Tag.type_num, False) for w in missing_tags}
2745 )
2746 missing_others = want_others.difference(have_others)
2747 self.objects_to_send.update({(w, None, None, False) for w in missing_others})
2749 if progress is None:
2750 self.progress: Callable[[bytes], None] = lambda x: None
2751 else:
2752 self.progress = progress
2753 self._tagged = (get_tagged and get_tagged()) or {}
2755 def get_remote_has(self) -> set[ObjectID]:
2756 """Get the set of SHAs the remote has.
2758 Returns:
2759 Set of SHA1s that the remote side already has
2760 """
2761 return self.remote_has
2763 def add_todo(
2764 self, entries: Iterable[tuple[ObjectID, bytes | None, int | None, bool]]
2765 ) -> None:
2766 """Add objects to the todo list.
2768 Args:
2769 entries: Iterable of tuples (sha, name, type_num, is_leaf)
2770 """
2771 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done])
2773 def __next__(self) -> tuple[ObjectID, PackHint | None]:
2774 """Get the next object to send.
2776 Returns:
2777 Tuple of (sha, pack_hint)
2779 Raises:
2780 StopIteration: When no more objects to send
2781 """
2782 while True:
2783 if not self.objects_to_send:
2784 self.progress(
2785 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii")
2786 )
2787 raise StopIteration
2788 (sha, name, type_num, leaf) = self.objects_to_send.pop()
2789 if sha not in self.sha_done:
2790 break
2791 if not leaf:
2792 o = self.object_store[sha]
2793 if isinstance(o, Commit):
2794 self.add_todo([(o.tree, b"", Tree.type_num, False)])
2795 elif isinstance(o, Tree):
2796 todos = []
2797 for n, m, s in o.iteritems():
2798 assert m is not None
2799 assert n is not None
2800 assert s is not None
2801 if not S_ISGITLINK(m):
2802 todos.append(
2803 (
2804 s,
2805 n,
2806 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num),
2807 not stat.S_ISDIR(m),
2808 )
2809 )
2810 self.add_todo(todos)
2811 elif isinstance(o, Tag):
2812 self.add_todo([(o.object[1], None, o.object[0].type_num, False)])
2813 if sha in self._tagged:
2814 self.add_todo([(self._tagged[sha], None, None, True)])
2815 self.sha_done.add(sha)
2816 if len(self.sha_done) % 1000 == 0:
2817 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii"))
2818 if type_num is None:
2819 pack_hint = None
2820 else:
2821 pack_hint = (type_num, name)
2822 return (sha, pack_hint)
2824 def __iter__(self) -> Iterator[tuple[ObjectID, PackHint | None]]:
2825 """Return iterator over objects to send.
2827 Returns:
2828 Self (this class implements the iterator protocol)
2829 """
2830 return self
2833class ObjectStoreGraphWalker:
2834 """Graph walker that finds what commits are missing from an object store."""
2836 heads: set[ObjectID]
2837 """Revisions without descendants in the local repo."""
2839 get_parents: Callable[[ObjectID], list[ObjectID]]
2840 """Function to retrieve parents in the local repo."""
2842 shallow: set[ObjectID]
2844 def __init__(
2845 self,
2846 local_heads: Iterable[ObjectID],
2847 get_parents: Callable[[ObjectID], list[ObjectID]],
2848 shallow: set[ObjectID] | None = None,
2849 update_shallow: Callable[[set[ObjectID] | None, set[ObjectID] | None], None]
2850 | None = None,
2851 ) -> None:
2852 """Create a new instance.
2854 Args:
2855 local_heads: Heads to start search with
2856 get_parents: Function for finding the parents of a SHA1.
2857 shallow: Set of shallow commits.
2858 update_shallow: Function to update shallow commits.
2859 """
2860 self.heads = set(local_heads)
2861 self.get_parents = get_parents
2862 self.parents: dict[ObjectID, list[ObjectID] | None] = {}
2863 if shallow is None:
2864 shallow = set()
2865 self.shallow = shallow
2866 self.update_shallow = update_shallow
2868 def nak(self) -> None:
2869 """Nothing in common was found."""
2871 def ack(self, sha: ObjectID) -> None:
2872 """Ack that a revision and its ancestors are present in the source."""
2873 if len(sha) != 40:
2874 raise ValueError(f"unexpected sha {sha!r} received")
2875 ancestors = {sha}
2877 # stop if we run out of heads to remove
2878 while self.heads:
2879 for a in ancestors:
2880 if a in self.heads:
2881 self.heads.remove(a)
2883 # collect all ancestors
2884 new_ancestors = set()
2885 for a in ancestors:
2886 ps = self.parents.get(a)
2887 if ps is not None:
2888 new_ancestors.update(ps)
2889 self.parents[a] = None
2891 # no more ancestors; stop
2892 if not new_ancestors:
2893 break
2895 ancestors = new_ancestors
2897 def next(self) -> ObjectID | None:
2898 """Iterate over ancestors of heads in the target."""
2899 if self.heads:
2900 ret = self.heads.pop()
2901 try:
2902 ps = self.get_parents(ret)
2903 except KeyError:
2904 return None
2905 self.parents[ret] = ps
2906 self.heads.update([p for p in ps if p not in self.parents])
2907 return ret
2908 return None
2910 __next__ = next
2913def commit_tree_changes(
2914 object_store: BaseObjectStore,
2915 tree: ObjectID | Tree,
2916 changes: Sequence[tuple[bytes, int | None, ObjectID | None]],
2917) -> ObjectID:
2918 """Commit a specified set of changes to a tree structure.
2920 This will apply a set of changes on top of an existing tree, storing new
2921 objects in object_store.
2923 changes are a list of tuples with (path, mode, object_sha).
2924 Paths can be both blobs and trees. See the mode and
2925 object sha to None deletes the path.
2927 This method works especially well if there are only a small
2928 number of changes to a big tree. For a large number of changes
2929 to a large tree, use e.g. commit_tree.
2931 Args:
2932 object_store: Object store to store new objects in
2933 and retrieve old ones from.
2934 tree: Original tree root (SHA or Tree object)
2935 changes: changes to apply
2936 Returns: New tree root object
2937 """
2938 # TODO(jelmer): Save up the objects and add them using .add_objects
2939 # rather than with individual calls to .add_object.
2940 # Handle both Tree object and SHA
2941 if isinstance(tree, Tree):
2942 tree_obj: Tree = tree
2943 else:
2944 sha_obj = object_store[tree]
2945 assert isinstance(sha_obj, Tree)
2946 tree_obj = sha_obj
2947 nested_changes: dict[bytes, list[tuple[bytes, int | None, ObjectID | None]]] = {}
2948 for path, new_mode, new_sha in changes:
2949 try:
2950 (dirname, subpath) = path.split(b"/", 1)
2951 except ValueError:
2952 if new_sha is None:
2953 del tree_obj[path]
2954 else:
2955 assert new_mode is not None
2956 tree_obj[path] = (new_mode, new_sha)
2957 else:
2958 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha))
2959 for name, subchanges in nested_changes.items():
2960 try:
2961 orig_subtree_id: ObjectID | Tree = tree_obj[name][1]
2962 except KeyError:
2963 # For new directories, pass an empty Tree object
2964 orig_subtree_id = Tree()
2965 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges)
2966 subtree = object_store[subtree_id]
2967 assert isinstance(subtree, Tree)
2968 if len(subtree) == 0:
2969 del tree_obj[name]
2970 else:
2971 tree_obj[name] = (stat.S_IFDIR, subtree.id)
2972 object_store.add_object(tree_obj)
2973 return tree_obj.id
2976class OverlayObjectStore(BaseObjectStore):
2977 """Object store that can overlay multiple object stores."""
2979 def __init__(
2980 self,
2981 bases: list[BaseObjectStore],
2982 add_store: BaseObjectStore | None = None,
2983 ) -> None:
2984 """Initialize an OverlayObjectStore.
2986 Args:
2987 bases: List of base object stores to overlay
2988 add_store: Optional store to write new objects to
2989 """
2990 self.bases = bases
2991 self.add_store = add_store
2993 def add_object(self, object: ShaFile) -> None:
2994 """Add a single object to the store.
2996 Args:
2997 object: Object to add
2999 Raises:
3000 NotImplementedError: If no add_store was provided
3001 """
3002 if self.add_store is None:
3003 raise NotImplementedError(self.add_object)
3004 return self.add_store.add_object(object)
3006 def add_objects(
3007 self,
3008 objects: Sequence[tuple[ShaFile, str | None]],
3009 progress: Callable[[str], None] | None = None,
3010 ) -> Pack | None:
3011 """Add multiple objects to the store.
3013 Args:
3014 objects: Iterator of objects to add
3015 progress: Optional progress reporting callback
3017 Raises:
3018 NotImplementedError: If no add_store was provided
3019 """
3020 if self.add_store is None:
3021 raise NotImplementedError(self.add_object)
3022 return self.add_store.add_objects(objects, progress)
3024 @property
3025 def packs(self) -> list[Pack]:
3026 """Get the list of packs from all overlaid stores.
3028 Returns:
3029 Combined list of packs from all base stores
3030 """
3031 ret = []
3032 for b in self.bases:
3033 ret.extend(b.packs)
3034 return ret
3036 def __iter__(self) -> Iterator[ObjectID]:
3037 """Iterate over all object SHAs in the overlaid stores.
3039 Returns:
3040 Iterator of object SHAs (deduped across stores)
3041 """
3042 done = set()
3043 for b in self.bases:
3044 for o_id in b:
3045 if o_id not in done:
3046 yield o_id
3047 done.add(o_id)
3049 def iterobjects_subset(
3050 self, shas: Iterable[ObjectID], *, allow_missing: bool = False
3051 ) -> Iterator[ShaFile]:
3052 """Iterate over a subset of objects from the overlaid stores.
3054 Args:
3055 shas: Iterable of object SHAs to retrieve
3056 allow_missing: If True, skip missing objects; if False, raise KeyError
3058 Returns:
3059 Iterator of ShaFile objects
3061 Raises:
3062 KeyError: If an object is missing and allow_missing is False
3063 """
3064 todo = set(shas)
3065 found: set[ObjectID] = set()
3067 for b in self.bases:
3068 # Create a copy of todo for each base to avoid modifying
3069 # the set while iterating through it
3070 current_todo = todo - found
3071 for o in b.iterobjects_subset(current_todo, allow_missing=True):
3072 yield o
3073 found.add(o.id)
3075 # Check for any remaining objects not found
3076 missing = todo - found
3077 if missing and not allow_missing:
3078 raise KeyError(next(iter(missing)))
3080 def iter_unpacked_subset(
3081 self,
3082 shas: Iterable[ObjectID | RawObjectID],
3083 include_comp: bool = False,
3084 allow_missing: bool = False,
3085 convert_ofs_delta: bool = True,
3086 ) -> Iterator[UnpackedObject]:
3087 """Iterate over unpacked objects from the overlaid stores.
3089 Args:
3090 shas: Iterable of object SHAs to retrieve
3091 include_comp: Whether to include compressed data
3092 allow_missing: If True, skip missing objects; if False, raise KeyError
3093 convert_ofs_delta: Whether to convert OFS_DELTA objects
3095 Returns:
3096 Iterator of unpacked objects
3098 Raises:
3099 KeyError: If an object is missing and allow_missing is False
3100 """
3101 todo: set[ObjectID | RawObjectID] = set(shas)
3102 for b in self.bases:
3103 for o in b.iter_unpacked_subset(
3104 todo,
3105 include_comp=include_comp,
3106 allow_missing=True,
3107 convert_ofs_delta=convert_ofs_delta,
3108 ):
3109 yield o
3110 todo.remove(o.sha())
3111 if todo and not allow_missing:
3112 raise KeyError(next(iter(todo)))
3114 def get_raw(self, sha_id: ObjectID | RawObjectID) -> tuple[int, bytes]:
3115 """Get the raw object data from the overlaid stores.
3117 Args:
3118 sha_id: SHA of the object
3120 Returns:
3121 Tuple of (type_num, raw_data)
3123 Raises:
3124 KeyError: If object not found in any base store
3125 """
3126 for b in self.bases:
3127 try:
3128 return b.get_raw(sha_id)
3129 except KeyError:
3130 pass
3131 raise KeyError(sha_id)
3133 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:
3134 """Check if an object is packed in any base store.
3136 Args:
3137 sha: SHA of the object
3139 Returns:
3140 True if object is packed in any base store
3141 """
3142 for b in self.bases:
3143 if b.contains_packed(sha):
3144 return True
3145 return False
3147 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:
3148 """Check if an object is loose in any base store.
3150 Args:
3151 sha: SHA of the object
3153 Returns:
3154 True if object is loose in any base store
3155 """
3156 for b in self.bases:
3157 if b.contains_loose(sha):
3158 return True
3159 return False
3162def read_packs_file(f: BinaryIO) -> Iterator[str]:
3163 """Yield the packs listed in a packs file."""
3164 for line in f.read().splitlines():
3165 if not line:
3166 continue
3167 (kind, name) = line.split(b" ", 1)
3168 if kind != b"P":
3169 continue
3170 yield os.fsdecode(name)
3173class BucketBasedObjectStore(PackBasedObjectStore):
3174 """Object store implementation that uses a bucket store like S3 as backend."""
3176 def _iter_loose_objects(self) -> Iterator[ObjectID]:
3177 """Iterate over the SHAs of all loose objects."""
3178 return iter([])
3180 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> None:
3181 return None
3183 def delete_loose_object(self, sha: ObjectID) -> None:
3184 """Delete a loose object (no-op for bucket stores).
3186 Bucket-based stores don't have loose objects, so this is a no-op.
3188 Args:
3189 sha: SHA of the object to delete
3190 """
3191 # Doesn't exist..
3193 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int:
3194 """Pack loose objects. Returns number of objects packed.
3196 BucketBasedObjectStore doesn't support loose objects, so this is a no-op.
3198 Args:
3199 progress: Optional progress reporting callback (ignored)
3200 """
3201 return 0
3203 def _remove_pack_by_name(self, name: str) -> None:
3204 """Remove a pack by name. Subclasses should implement this."""
3205 raise NotImplementedError(self._remove_pack_by_name)
3207 def _iter_pack_names(self) -> Iterator[str]:
3208 raise NotImplementedError(self._iter_pack_names)
3210 def _get_pack(self, name: str) -> Pack:
3211 raise NotImplementedError(self._get_pack)
3213 def _update_pack_cache(self) -> list[Pack]:
3214 pack_files = set(self._iter_pack_names())
3216 # Open newly appeared pack files
3217 new_packs = []
3218 for f in pack_files:
3219 if f not in self._pack_cache:
3220 pack = self._get_pack(f)
3221 new_packs.append(pack)
3222 self._pack_cache[f] = pack
3223 # Remove disappeared pack files
3224 for f in set(self._pack_cache) - pack_files:
3225 self._pack_cache.pop(f).close()
3226 return new_packs
3228 def _upload_pack(
3229 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO
3230 ) -> None:
3231 raise NotImplementedError
3233 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
3234 """Add a new pack to this object store.
3236 Returns: Fileobject to write to, a commit function to
3237 call when the pack is finished and an abort
3238 function.
3239 """
3240 import tempfile
3242 pf = tempfile.SpooledTemporaryFile(
3243 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
3244 )
3246 def commit() -> Pack | None:
3247 if pf.tell() == 0:
3248 pf.close()
3249 return None
3251 pf.seek(0)
3253 p = PackData(pf.name, pf)
3254 entries = p.sorted_entries()
3255 basename = iter_sha1(entry[0] for entry in entries).decode("ascii")
3256 idxf = tempfile.SpooledTemporaryFile(
3257 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
3258 )
3259 checksum = p.get_stored_checksum()
3260 write_pack_index(idxf, entries, checksum, version=self.pack_index_version)
3261 idxf.seek(0)
3262 idx = load_pack_index_file(basename + ".idx", idxf)
3263 for pack in self.packs:
3264 if pack.get_stored_checksum() == p.get_stored_checksum():
3265 p.close()
3266 idx.close()
3267 pf.close()
3268 idxf.close()
3269 return pack
3270 pf.seek(0)
3271 idxf.seek(0)
3272 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type]
3273 final_pack = Pack.from_objects(p, idx)
3274 self._add_cached_pack(basename, final_pack)
3275 pf.close()
3276 idxf.close()
3277 return final_pack
3279 return pf, commit, pf.close # type: ignore[return-value]
3282def _collect_ancestors(
3283 store: ObjectContainer,
3284 heads: Iterable[ObjectID],
3285 common: frozenset[ObjectID] = frozenset(),
3286 shallow: frozenset[ObjectID] = frozenset(),
3287 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents,
3288) -> tuple[set[ObjectID], set[ObjectID]]:
3289 """Collect all ancestors of heads up to (excluding) those in common.
3291 Args:
3292 store: Object store to get commits from
3293 heads: commits to start from
3294 common: commits to end at, or empty set to walk repository
3295 completely
3296 shallow: Set of shallow commits
3297 get_parents: Optional function for getting the parents of a
3298 commit.
3299 Returns: a tuple (A, B) where A - all commits reachable
3300 from heads but not present in common, B - common (shared) elements
3301 that are directly reachable from heads
3302 """
3303 bases = set()
3304 commits = set()
3305 queue: list[ObjectID] = []
3306 queue.extend(heads)
3308 # Try to use commit graph if available
3309 commit_graph = store.get_commit_graph()
3311 while queue:
3312 e = queue.pop(0)
3313 if e in common:
3314 bases.add(e)
3315 elif e not in commits:
3316 commits.add(e)
3317 if e in shallow:
3318 continue
3320 # Try to use commit graph for parent lookup
3321 parents = None
3322 if commit_graph:
3323 parents = commit_graph.get_parents(e)
3325 if parents is None:
3326 # Fall back to loading the object
3327 cmt = store[e]
3328 assert isinstance(cmt, Commit)
3329 parents = get_parents(cmt)
3331 queue.extend(parents)
3332 return (commits, bases)
3335def iter_tree_contents(
3336 store: ObjectContainer, tree_id: ObjectID | None, *, include_trees: bool = False
3337) -> Iterator[TreeEntry]:
3338 """Iterate the contents of a tree and all subtrees.
3340 Iteration is depth-first pre-order, as in e.g. os.walk.
3342 Args:
3343 store: Object store to get trees from
3344 tree_id: SHA1 of the tree.
3345 include_trees: If True, include tree objects in the iteration.
3347 Yields: TreeEntry namedtuples for all the objects in a tree.
3348 """
3349 if tree_id is None:
3350 return
3351 # This could be fairly easily generalized to >2 trees if we find a use
3352 # case.
3353 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]
3354 while todo:
3355 entry = todo.pop()
3356 assert entry.mode is not None
3357 if stat.S_ISDIR(entry.mode):
3358 extra = []
3359 assert entry.sha is not None
3360 tree = store[entry.sha]
3361 assert isinstance(tree, Tree)
3362 for subentry in tree.iteritems(name_order=True):
3363 assert entry.path is not None
3364 extra.append(subentry.in_path(entry.path))
3365 todo.extend(reversed(extra))
3366 if not stat.S_ISDIR(entry.mode) or include_trees:
3367 yield entry
3370def iter_commit_contents(
3371 store: ObjectContainer,
3372 commit: Commit | ObjectID | RawObjectID,
3373 *,
3374 include: Sequence[str | bytes | Path] | None = None,
3375) -> Iterator[TreeEntry]:
3376 """Iterate the contents of the repository at the specified commit.
3378 This is a wrapper around iter_tree_contents() and
3379 tree_lookup_path() to simplify the common task of getting the
3380 contest of a repo at a particular commit. See also
3381 dulwich.index.build_file_from_blob() for writing individual files
3382 to disk.
3384 Args:
3385 store: Object store to get trees from
3386 commit: Commit object, or SHA1 of a commit
3387 include: if provided, only the entries whose paths are in the
3388 list, or whose parent tree is in the list, will be
3389 included. Note that duplicate or overlapping paths
3390 (e.g. ["foo", "foo/bar"]) may result in duplicate entries
3392 Yields: TreeEntry namedtuples for all matching files in a commit.
3393 """
3394 sha = commit.id if isinstance(commit, Commit) else commit
3395 if not isinstance(obj := store[sha], Commit):
3396 raise TypeError(
3397 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}"
3398 )
3399 commit = obj
3400 encoding = commit.encoding or "utf-8"
3401 include_bytes: list[bytes] = (
3402 [
3403 path if isinstance(path, bytes) else str(path).encode(encoding)
3404 for path in include
3405 ]
3406 if include is not None
3407 else [b""]
3408 )
3410 for path in include_bytes:
3411 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path)
3412 # Iterate all contained files if path points to a dir, otherwise just get that
3413 # single file
3414 if isinstance(store[obj_id], Tree):
3415 for entry in iter_tree_contents(store, obj_id):
3416 yield entry.in_path(path)
3417 else:
3418 yield TreeEntry(path, mode, obj_id)
3421def peel_sha(
3422 store: ObjectContainer, sha: ObjectID | RawObjectID
3423) -> tuple[ShaFile, ShaFile]:
3424 """Peel all tags from a SHA.
3426 Args:
3427 store: Object store to get objects from
3428 sha: The object SHA to peel.
3429 Returns: The fully-peeled SHA1 of a tag object, after peeling all
3430 intermediate tags; if the original ref does not point to a tag,
3431 this will equal the original SHA1.
3432 """
3433 unpeeled = obj = store[sha]
3434 obj_class = object_class(obj.type_name)
3435 while obj_class is Tag:
3436 assert isinstance(obj, Tag)
3437 obj_class, sha = obj.object
3438 obj = store[sha]
3439 return unpeeled, obj
3442class GraphTraversalReachability:
3443 """Naive graph traversal implementation of ObjectReachabilityProvider.
3445 This implementation wraps existing graph traversal functions
3446 (_collect_ancestors, _collect_filetree_revs) to provide the standard
3447 reachability interface without any performance optimizations.
3448 """
3450 def __init__(self, object_store: BaseObjectStore) -> None:
3451 """Initialize the graph traversal provider.
3453 Args:
3454 object_store: Object store to query
3455 """
3456 self.store = object_store
3458 def get_reachable_commits(
3459 self,
3460 heads: Iterable[ObjectID],
3461 exclude: Iterable[ObjectID] | None = None,
3462 shallow: Set[ObjectID] | None = None,
3463 ) -> set[ObjectID]:
3464 """Get all commits reachable from heads, excluding those in exclude.
3466 Uses _collect_ancestors for commit traversal.
3468 Args:
3469 heads: Starting commit SHAs
3470 exclude: Commit SHAs to exclude (and their ancestors)
3471 shallow: Set of shallow commit boundaries
3473 Returns:
3474 Set of commit SHAs reachable from heads but not from exclude
3475 """
3476 exclude_set = frozenset(exclude) if exclude else frozenset()
3477 shallow_set = frozenset(shallow) if shallow else frozenset()
3478 commits, _bases = _collect_ancestors(
3479 self.store, heads, exclude_set, shallow_set
3480 )
3481 return commits
3483 def get_tree_objects(
3484 self,
3485 tree_shas: Iterable[ObjectID],
3486 ) -> set[ObjectID]:
3487 """Get all trees and blobs reachable from the given trees.
3489 Uses _collect_filetree_revs for tree traversal.
3491 Args:
3492 tree_shas: Starting tree SHAs
3494 Returns:
3495 Set of tree and blob SHAs
3496 """
3497 result: set[ObjectID] = set()
3498 for tree_sha in tree_shas:
3499 _collect_filetree_revs(self.store, tree_sha, result)
3500 return result
3502 def get_reachable_objects(
3503 self,
3504 commits: Iterable[ObjectID],
3505 exclude_commits: Iterable[ObjectID] | None = None,
3506 ) -> set[ObjectID]:
3507 """Get all objects (commits + trees + blobs) reachable from commits.
3509 Args:
3510 commits: Starting commit SHAs
3511 exclude_commits: Commits whose objects should be excluded
3513 Returns:
3514 Set of all object SHAs (commits, trees, blobs)
3515 """
3516 commits_set = set(commits)
3517 result = set(commits_set)
3519 # Get trees for all commits
3520 tree_shas = []
3521 for commit_sha in commits_set:
3522 try:
3523 commit = self.store[commit_sha]
3524 if isinstance(commit, Commit):
3525 tree_shas.append(commit.tree)
3526 except KeyError:
3527 # Commit not in store, skip
3528 continue
3530 # Collect all tree/blob objects
3531 result.update(self.get_tree_objects(tree_shas))
3533 # Exclude objects from exclude_commits if needed
3534 if exclude_commits:
3535 exclude_objects = self.get_reachable_objects(exclude_commits, None)
3536 result -= exclude_objects
3538 return result
3541class BitmapReachability:
3542 """Bitmap-accelerated implementation of ObjectReachabilityProvider.
3544 This implementation uses packfile bitmap indexes where available to
3545 accelerate reachability queries. Falls back to graph traversal when
3546 bitmaps don't cover the requested commits.
3547 """
3549 def __init__(self, object_store: "PackBasedObjectStore") -> None:
3550 """Initialize the bitmap provider.
3552 Args:
3553 object_store: Pack-based object store with bitmap support
3554 """
3555 self.store = object_store
3556 # Fallback to graph traversal for operations not yet optimized
3557 self._fallback = GraphTraversalReachability(object_store)
3559 def _combine_commit_bitmaps(
3560 self,
3561 commit_shas: set[ObjectID],
3562 exclude_shas: set[ObjectID] | None = None,
3563 ) -> tuple["EWAHBitmap", "Pack"] | None:
3564 """Combine bitmaps for multiple commits using OR, with optional exclusion.
3566 Args:
3567 commit_shas: Set of commit SHAs to combine
3568 exclude_shas: Optional set of commit SHAs to exclude
3570 Returns:
3571 Tuple of (combined_bitmap, pack) or None if bitmaps unavailable
3572 """
3573 from .bitmap import find_commit_bitmaps
3575 # Find bitmaps for the commits
3576 commit_bitmaps = find_commit_bitmaps(commit_shas, self.store.packs)
3578 # If we can't find bitmaps for all commits, return None
3579 if len(commit_bitmaps) < len(commit_shas):
3580 return None
3582 # Combine bitmaps using OR
3583 combined_bitmap = None
3584 result_pack = None
3586 for commit_sha in commit_shas:
3587 pack, pack_bitmap, _sha_to_pos = commit_bitmaps[commit_sha]
3588 commit_bitmap = pack_bitmap.get_bitmap(commit_sha)
3590 if commit_bitmap is None:
3591 return None
3593 if combined_bitmap is None:
3594 combined_bitmap = commit_bitmap
3595 result_pack = pack
3596 elif pack == result_pack:
3597 # Same pack, can OR directly
3598 combined_bitmap = combined_bitmap | commit_bitmap
3599 else:
3600 # Different packs, can't combine
3601 return None
3603 # Handle exclusions if provided
3604 if exclude_shas and result_pack and combined_bitmap:
3605 exclude_bitmaps = find_commit_bitmaps(exclude_shas, [result_pack])
3607 if len(exclude_bitmaps) == len(exclude_shas):
3608 # All excludes have bitmaps, compute exclusion
3609 exclude_combined = None
3611 for commit_sha in exclude_shas:
3612 _pack, pack_bitmap, _sha_to_pos = exclude_bitmaps[commit_sha]
3613 exclude_bitmap = pack_bitmap.get_bitmap(commit_sha)
3615 if exclude_bitmap is None:
3616 break
3618 if exclude_combined is None:
3619 exclude_combined = exclude_bitmap
3620 else:
3621 exclude_combined = exclude_combined | exclude_bitmap
3623 # Subtract excludes using set difference
3624 if exclude_combined:
3625 combined_bitmap = combined_bitmap - exclude_combined
3627 if combined_bitmap and result_pack:
3628 return (combined_bitmap, result_pack)
3629 return None
3631 def get_reachable_commits(
3632 self,
3633 heads: Iterable[ObjectID],
3634 exclude: Iterable[ObjectID] | None = None,
3635 shallow: Set[ObjectID] | None = None,
3636 ) -> set[ObjectID]:
3637 """Get all commits reachable from heads using bitmaps where possible.
3639 Args:
3640 heads: Starting commit SHAs
3641 exclude: Commit SHAs to exclude (and their ancestors)
3642 shallow: Set of shallow commit boundaries
3644 Returns:
3645 Set of commit SHAs reachable from heads but not from exclude
3646 """
3647 from .bitmap import bitmap_to_object_shas
3649 # If shallow is specified, fall back to graph traversal
3650 # (bitmaps don't support shallow boundaries well)
3651 if shallow:
3652 return self._fallback.get_reachable_commits(heads, exclude, shallow)
3654 heads_set = set(heads)
3655 exclude_set = set(exclude) if exclude else None
3657 # Try to combine bitmaps
3658 result = self._combine_commit_bitmaps(heads_set, exclude_set)
3659 if result is None:
3660 return self._fallback.get_reachable_commits(heads, exclude, shallow)
3662 combined_bitmap, result_pack = result
3664 # Convert bitmap to commit SHAs, filtering for commits only
3665 pack_bitmap = result_pack.bitmap
3666 if pack_bitmap is None:
3667 return self._fallback.get_reachable_commits(heads, exclude, shallow)
3668 commit_type_filter = pack_bitmap.commit_bitmap
3669 return bitmap_to_object_shas(
3670 combined_bitmap, result_pack.index, commit_type_filter
3671 )
3673 def get_tree_objects(
3674 self,
3675 tree_shas: Iterable[ObjectID],
3676 ) -> set[ObjectID]:
3677 """Get all trees and blobs reachable from the given trees.
3679 Args:
3680 tree_shas: Starting tree SHAs
3682 Returns:
3683 Set of tree and blob SHAs
3684 """
3685 # Tree traversal doesn't benefit much from bitmaps, use fallback
3686 return self._fallback.get_tree_objects(tree_shas)
3688 def get_reachable_objects(
3689 self,
3690 commits: Iterable[ObjectID],
3691 exclude_commits: Iterable[ObjectID] | None = None,
3692 ) -> set[ObjectID]:
3693 """Get all objects reachable from commits using bitmaps.
3695 Args:
3696 commits: Starting commit SHAs
3697 exclude_commits: Commits whose objects should be excluded
3699 Returns:
3700 Set of all object SHAs (commits, trees, blobs)
3701 """
3702 from .bitmap import bitmap_to_object_shas
3704 commits_set = set(commits)
3705 exclude_set = set(exclude_commits) if exclude_commits else None
3707 # Try to combine bitmaps
3708 result = self._combine_commit_bitmaps(commits_set, exclude_set)
3709 if result is None:
3710 return self._fallback.get_reachable_objects(commits, exclude_commits)
3712 combined_bitmap, result_pack = result
3714 # Convert bitmap to all object SHAs (no type filter)
3715 return bitmap_to_object_shas(combined_bitmap, result_pack.index, None)