Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 20%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# object_store.py -- Object store for git objects
2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
3# and others
4#
5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
7# General Public License as published by the Free Software Foundation; version 2.0
8# or (at your option) any later version. You can redistribute it and/or
9# modify it under the terms of either of these two licenses.
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# You should have received a copy of the licenses; if not, see
18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
20# License, Version 2.0.
21#
24"""Git object store interfaces and implementation."""
26import binascii
27import os
28import stat
29import sys
30import time
31import warnings
32from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set
33from contextlib import suppress
34from io import BytesIO
35from pathlib import Path
36from typing import (
37 TYPE_CHECKING,
38 BinaryIO,
39 Protocol,
40)
42from .errors import NotTreeError
43from .file import GitFile, _GitFile
44from .objects import (
45 S_ISGITLINK,
46 ZERO_SHA,
47 Blob,
48 Commit,
49 ObjectID,
50 ShaFile,
51 Tag,
52 Tree,
53 TreeEntry,
54 hex_to_filename,
55 hex_to_sha,
56 object_class,
57 sha_to_hex,
58 valid_hexsha,
59)
60from .pack import (
61 PACK_SPOOL_FILE_MAX_SIZE,
62 ObjectContainer,
63 Pack,
64 PackData,
65 PackedObjectContainer,
66 PackFileDisappeared,
67 PackHint,
68 PackIndexer,
69 PackInflater,
70 PackStreamCopier,
71 UnpackedObject,
72 extend_pack,
73 full_unpacked_object,
74 generate_unpacked_objects,
75 iter_sha1,
76 load_pack_index_file,
77 pack_objects_to_data,
78 write_pack_data,
79 write_pack_index,
80)
81from .protocol import DEPTH_INFINITE
82from .refs import PEELED_TAG_SUFFIX, Ref
84if TYPE_CHECKING:
85 from .bitmap import EWAHBitmap
86 from .commit_graph import CommitGraph
87 from .config import Config
88 from .diff_tree import RenameDetector
89 from .pack import Pack
92class GraphWalker(Protocol):
93 """Protocol for graph walker objects."""
95 def __next__(self) -> bytes | None:
96 """Return the next object SHA to visit."""
97 ...
99 def ack(self, sha: bytes) -> None:
100 """Acknowledge that an object has been received."""
101 ...
103 def nak(self) -> None:
104 """Nothing in common was found."""
105 ...
108class ObjectReachabilityProvider(Protocol):
109 """Protocol for computing object reachability queries.
111 This abstraction allows reachability computations to be backed by either
112 naive graph traversal or optimized bitmap indexes, with a consistent interface.
113 """
115 def get_reachable_commits(
116 self,
117 heads: Iterable[bytes],
118 exclude: Iterable[bytes] | None = None,
119 shallow: Set[bytes] | None = None,
120 ) -> set[bytes]:
121 """Get all commits reachable from heads, excluding those in exclude.
123 Args:
124 heads: Starting commit SHAs
125 exclude: Commit SHAs to exclude (and their ancestors)
126 shallow: Set of shallow commit boundaries (traversal stops here)
128 Returns:
129 Set of commit SHAs reachable from heads but not from exclude
130 """
131 ...
133 def get_reachable_objects(
134 self,
135 commits: Iterable[bytes],
136 exclude_commits: Iterable[bytes] | None = None,
137 ) -> set[bytes]:
138 """Get all objects (commits + trees + blobs) reachable from commits.
140 Args:
141 commits: Starting commit SHAs
142 exclude_commits: Commits whose objects should be excluded
144 Returns:
145 Set of all object SHAs (commits, trees, blobs, tags)
146 """
147 ...
149 def get_tree_objects(
150 self,
151 tree_shas: Iterable[bytes],
152 ) -> set[bytes]:
153 """Get all trees and blobs reachable from the given trees.
155 Args:
156 tree_shas: Starting tree SHAs
158 Returns:
159 Set of tree and blob SHAs
160 """
161 ...
164INFODIR = "info"
165PACKDIR = "pack"
167# use permissions consistent with Git; just readable by everyone
168# TODO: should packs also be non-writable on Windows? if so, that
169# would requite some rather significant adjustments to the test suite
170PACK_MODE = 0o444 if sys.platform != "win32" else 0o644
172# Grace period for cleaning up temporary pack files (in seconds)
173# Matches git's default of 2 weeks
174DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks
177def find_shallow(
178 store: ObjectContainer, heads: Iterable[bytes], depth: int
179) -> tuple[set[bytes], set[bytes]]:
180 """Find shallow commits according to a given depth.
182 Args:
183 store: An ObjectStore for looking up objects.
184 heads: Iterable of head SHAs to start walking from.
185 depth: The depth of ancestors to include. A depth of one includes
186 only the heads themselves.
187 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be
188 considered shallow and unshallow according to the arguments. Note that
189 these sets may overlap if a commit is reachable along multiple paths.
190 """
191 parents: dict[bytes, list[bytes]] = {}
192 commit_graph = store.get_commit_graph()
194 def get_parents(sha: bytes) -> list[bytes]:
195 result = parents.get(sha, None)
196 if not result:
197 # Try to use commit graph first if available
198 if commit_graph:
199 graph_parents = commit_graph.get_parents(sha)
200 if graph_parents is not None:
201 result = graph_parents
202 parents[sha] = result
203 return result
204 # Fall back to loading the object
205 commit = store[sha]
206 assert isinstance(commit, Commit)
207 result = commit.parents
208 parents[sha] = result
209 return result
211 todo = [] # stack of (sha, depth)
212 for head_sha in heads:
213 obj = store[head_sha]
214 # Peel tags if necessary
215 while isinstance(obj, Tag):
216 _, sha = obj.object
217 obj = store[sha]
218 if isinstance(obj, Commit):
219 todo.append((obj.id, 1))
221 not_shallow = set()
222 shallow = set()
223 while todo:
224 sha, cur_depth = todo.pop()
225 if cur_depth < depth:
226 not_shallow.add(sha)
227 new_depth = cur_depth + 1
228 todo.extend((p, new_depth) for p in get_parents(sha))
229 else:
230 shallow.add(sha)
232 return shallow, not_shallow
235def get_depth(
236 store: ObjectContainer,
237 head: bytes,
238 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents,
239 max_depth: int | None = None,
240) -> int:
241 """Return the current available depth for the given head.
243 For commits with multiple parents, the largest possible depth will be
244 returned.
246 Args:
247 store: Object store to search in
248 head: commit to start from
249 get_parents: optional function for getting the parents of a commit
250 max_depth: maximum depth to search
251 """
252 if head not in store:
253 return 0
254 current_depth = 1
255 queue = [(head, current_depth)]
256 commit_graph = store.get_commit_graph()
258 while queue and (max_depth is None or current_depth < max_depth):
259 e, depth = queue.pop(0)
260 current_depth = max(current_depth, depth)
262 # Try to use commit graph for parent lookup if available
263 parents = None
264 if commit_graph:
265 parents = commit_graph.get_parents(e)
267 if parents is None:
268 # Fall back to loading the object
269 cmt = store[e]
270 if isinstance(cmt, Tag):
271 _cls, sha = cmt.object
272 cmt = store[sha]
273 parents = get_parents(cmt)
275 queue.extend((parent, depth + 1) for parent in parents if parent in store)
276 return current_depth
279class PackContainer(Protocol):
280 """Protocol for containers that can accept pack files."""
282 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:
283 """Add a new pack."""
286class BaseObjectStore:
287 """Object store interface."""
289 def determine_wants_all(
290 self, refs: Mapping[Ref, ObjectID], depth: int | None = None
291 ) -> list[ObjectID]:
292 """Determine which objects are wanted based on refs."""
294 def _want_deepen(sha: bytes) -> bool:
295 if not depth:
296 return False
297 if depth == DEPTH_INFINITE:
298 return True
299 return depth > self._get_depth(sha)
301 return [
302 sha
303 for (ref, sha) in refs.items()
304 if (sha not in self or _want_deepen(sha))
305 and not ref.endswith(PEELED_TAG_SUFFIX)
306 and not sha == ZERO_SHA
307 ]
309 def contains_loose(self, sha: bytes) -> bool:
310 """Check if a particular object is present by SHA1 and is loose."""
311 raise NotImplementedError(self.contains_loose)
313 def contains_packed(self, sha: bytes) -> bool:
314 """Check if a particular object is present by SHA1 and is packed."""
315 return False # Default implementation for stores that don't support packing
317 def __contains__(self, sha1: bytes) -> bool:
318 """Check if a particular object is present by SHA1.
320 This method makes no distinction between loose and packed objects.
321 """
322 return self.contains_loose(sha1)
324 @property
325 def packs(self) -> list[Pack]:
326 """Iterable of pack objects."""
327 raise NotImplementedError
329 def get_raw(self, name: bytes) -> tuple[int, bytes]:
330 """Obtain the raw text for an object.
332 Args:
333 name: sha for the object.
334 Returns: tuple with numeric type and object contents.
335 """
336 raise NotImplementedError(self.get_raw)
338 def __getitem__(self, sha1: ObjectID) -> ShaFile:
339 """Obtain an object by SHA1."""
340 type_num, uncomp = self.get_raw(sha1)
341 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1)
343 def __iter__(self) -> Iterator[bytes]:
344 """Iterate over the SHAs that are present in this store."""
345 raise NotImplementedError(self.__iter__)
347 def add_object(self, obj: ShaFile) -> None:
348 """Add a single object to this object store."""
349 raise NotImplementedError(self.add_object)
351 def add_objects(
352 self,
353 objects: Sequence[tuple[ShaFile, str | None]],
354 progress: Callable[..., None] | None = None,
355 ) -> "Pack | None":
356 """Add a set of objects to this object store.
358 Args:
359 objects: Iterable over a list of (object, path) tuples
360 progress: Optional progress callback
361 """
362 raise NotImplementedError(self.add_objects)
364 def get_reachability_provider(
365 self, prefer_bitmap: bool = True
366 ) -> ObjectReachabilityProvider:
367 """Get a reachability provider for this object store.
369 Returns an ObjectReachabilityProvider that can efficiently compute
370 object reachability queries. Subclasses can override this to provide
371 optimized implementations (e.g., using bitmap indexes).
373 Args:
374 prefer_bitmap: Whether to prefer bitmap-based reachability if
375 available.
377 Returns:
378 ObjectReachabilityProvider instance
379 """
380 return GraphTraversalReachability(self)
382 def tree_changes(
383 self,
384 source: bytes | None,
385 target: bytes | None,
386 want_unchanged: bool = False,
387 include_trees: bool = False,
388 change_type_same: bool = False,
389 rename_detector: "RenameDetector | None" = None,
390 paths: Sequence[bytes] | None = None,
391 ) -> Iterator[
392 tuple[
393 tuple[bytes | None, bytes | None],
394 tuple[int | None, int | None],
395 tuple[bytes | None, bytes | None],
396 ]
397 ]:
398 """Find the differences between the contents of two trees.
400 Args:
401 source: SHA1 of the source tree
402 target: SHA1 of the target tree
403 want_unchanged: Whether unchanged files should be reported
404 include_trees: Whether to include trees
405 change_type_same: Whether to report files changing
406 type in the same entry.
407 rename_detector: RenameDetector object for detecting renames.
408 paths: Optional list of paths to filter to (as bytes).
409 Returns: Iterator over tuples with
410 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
411 """
412 from .diff_tree import tree_changes
414 for change in tree_changes(
415 self,
416 source,
417 target,
418 want_unchanged=want_unchanged,
419 include_trees=include_trees,
420 change_type_same=change_type_same,
421 rename_detector=rename_detector,
422 paths=paths,
423 ):
424 old_path = change.old.path if change.old is not None else None
425 new_path = change.new.path if change.new is not None else None
426 old_mode = change.old.mode if change.old is not None else None
427 new_mode = change.new.mode if change.new is not None else None
428 old_sha = change.old.sha if change.old is not None else None
429 new_sha = change.new.sha if change.new is not None else None
430 yield (
431 (old_path, new_path),
432 (old_mode, new_mode),
433 (old_sha, new_sha),
434 )
436 def iter_tree_contents(
437 self, tree_id: bytes, include_trees: bool = False
438 ) -> Iterator[TreeEntry]:
439 """Iterate the contents of a tree and all subtrees.
441 Iteration is depth-first pre-order, as in e.g. os.walk.
443 Args:
444 tree_id: SHA1 of the tree.
445 include_trees: If True, include tree objects in the iteration.
446 Returns: Iterator over TreeEntry namedtuples for all the objects in a
447 tree.
448 """
449 warnings.warn(
450 "Please use dulwich.object_store.iter_tree_contents",
451 DeprecationWarning,
452 stacklevel=2,
453 )
454 return iter_tree_contents(self, tree_id, include_trees=include_trees)
456 def iterobjects_subset(
457 self, shas: Iterable[bytes], *, allow_missing: bool = False
458 ) -> Iterator[ShaFile]:
459 """Iterate over a subset of objects in the store.
461 Args:
462 shas: Iterable of object SHAs to retrieve
463 allow_missing: If True, skip missing objects; if False, raise KeyError
465 Returns:
466 Iterator of ShaFile objects
468 Raises:
469 KeyError: If an object is missing and allow_missing is False
470 """
471 for sha in shas:
472 try:
473 yield self[sha]
474 except KeyError:
475 if not allow_missing:
476 raise
478 def iter_unpacked_subset(
479 self,
480 shas: Iterable[bytes],
481 include_comp: bool = False,
482 allow_missing: bool = False,
483 convert_ofs_delta: bool = True,
484 ) -> "Iterator[UnpackedObject]":
485 """Iterate over unpacked objects for a subset of SHAs.
487 Default implementation that converts ShaFile objects to UnpackedObject.
488 Subclasses may override for more efficient unpacked access.
490 Args:
491 shas: Iterable of object SHAs to retrieve
492 include_comp: Whether to include compressed data (ignored in base
493 implementation)
494 allow_missing: If True, skip missing objects; if False, raise
495 KeyError
496 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in
497 base implementation)
499 Returns:
500 Iterator of UnpackedObject instances
502 Raises:
503 KeyError: If an object is missing and allow_missing is False
504 """
505 from .pack import UnpackedObject
507 for sha in shas:
508 try:
509 obj = self[sha]
510 # Convert ShaFile to UnpackedObject
511 unpacked = UnpackedObject(
512 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id
513 )
514 yield unpacked
515 except KeyError:
516 if not allow_missing:
517 raise
519 def find_missing_objects(
520 self,
521 haves: Iterable[bytes],
522 wants: Iterable[bytes],
523 shallow: Set[bytes] | None = None,
524 progress: Callable[..., None] | None = None,
525 get_tagged: Callable[[], dict[bytes, bytes]] | None = None,
526 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents,
527 ) -> Iterator[tuple[bytes, PackHint | None]]:
528 """Find the missing objects required for a set of revisions.
530 Args:
531 haves: Iterable over SHAs already in common.
532 wants: Iterable over SHAs of objects to fetch.
533 shallow: Set of shallow commit SHA1s to skip
534 progress: Simple progress function that will be called with
535 updated progress strings.
536 get_tagged: Function that returns a dict of pointed-to sha ->
537 tag sha for including tags.
538 get_parents: Optional function for getting the parents of a
539 commit.
540 Returns: Iterator over (sha, path) pairs.
541 """
542 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning)
543 finder = MissingObjectFinder(
544 self,
545 haves=haves,
546 wants=wants,
547 shallow=shallow,
548 progress=progress,
549 get_tagged=get_tagged,
550 get_parents=get_parents,
551 )
552 return iter(finder)
554 def find_common_revisions(self, graphwalker: GraphWalker) -> list[bytes]:
555 """Find which revisions this store has in common using graphwalker.
557 Args:
558 graphwalker: A graphwalker object.
559 Returns: List of SHAs that are in common
560 """
561 haves = []
562 sha = next(graphwalker)
563 while sha:
564 if sha in self:
565 haves.append(sha)
566 graphwalker.ack(sha)
567 sha = next(graphwalker)
568 return haves
570 def generate_pack_data(
571 self,
572 have: Iterable[bytes],
573 want: Iterable[bytes],
574 *,
575 shallow: Set[bytes] | None = None,
576 progress: Callable[..., None] | None = None,
577 ofs_delta: bool = True,
578 ) -> tuple[int, Iterator[UnpackedObject]]:
579 """Generate pack data objects for a set of wants/haves.
581 Args:
582 have: List of SHA1s of objects that should not be sent
583 want: List of SHA1s of objects that should be sent
584 shallow: Set of shallow commit SHA1s to skip
585 ofs_delta: Whether OFS deltas can be included
586 progress: Optional progress reporting method
587 """
588 # Note that the pack-specific implementation below is more efficient,
589 # as it reuses deltas
590 missing_objects = MissingObjectFinder(
591 self, haves=have, wants=want, shallow=shallow, progress=progress
592 )
593 object_ids = list(missing_objects)
594 return pack_objects_to_data(
595 [(self[oid], path) for oid, path in object_ids],
596 ofs_delta=ofs_delta,
597 progress=progress,
598 )
600 def peel_sha(self, sha: bytes) -> bytes:
601 """Peel all tags from a SHA.
603 Args:
604 sha: The object SHA to peel.
605 Returns: The fully-peeled SHA1 of a tag object, after peeling all
606 intermediate tags; if the original ref does not point to a tag,
607 this will equal the original SHA1.
608 """
609 warnings.warn(
610 "Please use dulwich.object_store.peel_sha()",
611 DeprecationWarning,
612 stacklevel=2,
613 )
614 return peel_sha(self, sha)[1].id
616 def _get_depth(
617 self,
618 head: bytes,
619 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents,
620 max_depth: int | None = None,
621 ) -> int:
622 """Return the current available depth for the given head.
624 For commits with multiple parents, the largest possible depth will be
625 returned.
627 Args:
628 head: commit to start from
629 get_parents: optional function for getting the parents of a commit
630 max_depth: maximum depth to search
631 """
632 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth)
634 def close(self) -> None:
635 """Close any files opened by this object store."""
636 # Default implementation is a NO-OP
638 def prune(self, grace_period: int | None = None) -> None:
639 """Prune/clean up this object store.
641 This includes removing orphaned temporary files and other
642 housekeeping tasks. Default implementation is a NO-OP.
644 Args:
645 grace_period: Grace period in seconds for removing temporary files.
646 If None, uses the default grace period.
647 """
648 # Default implementation is a NO-OP
650 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
651 """Iterate over all SHA1s that start with a given prefix.
653 The default implementation is a naive iteration over all objects.
654 However, subclasses may override this method with more efficient
655 implementations.
656 """
657 for sha in self:
658 if sha.startswith(prefix):
659 yield sha
661 def get_commit_graph(self) -> "CommitGraph | None":
662 """Get the commit graph for this object store.
664 Returns:
665 CommitGraph object if available, None otherwise
666 """
667 return None
669 def write_commit_graph(
670 self, refs: Sequence[bytes] | None = None, reachable: bool = True
671 ) -> None:
672 """Write a commit graph file for this object store.
674 Args:
675 refs: List of refs to include. If None, includes all refs from object store.
676 reachable: If True, includes all commits reachable from refs.
677 If False, only includes the direct ref targets.
679 Note:
680 Default implementation does nothing. Subclasses should override
681 this method to provide commit graph writing functionality.
682 """
683 raise NotImplementedError(self.write_commit_graph)
685 def get_object_mtime(self, sha: bytes) -> float:
686 """Get the modification time of an object.
688 Args:
689 sha: SHA1 of the object
691 Returns:
692 Modification time as seconds since epoch
694 Raises:
695 KeyError: if the object is not found
696 """
697 # Default implementation raises KeyError
698 # Subclasses should override to provide actual mtime
699 raise KeyError(sha)
702class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer):
703 """Object store that supports pack operations.
705 This is a base class for object stores that can handle pack files,
706 including both disk-based and memory-based stores.
707 """
709 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
710 """Add a new pack to this object store.
712 Returns: Tuple of (file, commit_func, abort_func)
713 """
714 raise NotImplementedError(self.add_pack)
716 def add_pack_data(
717 self,
718 count: int,
719 unpacked_objects: Iterator["UnpackedObject"],
720 progress: Callable[..., None] | None = None,
721 ) -> "Pack | None":
722 """Add pack data to this object store.
724 Args:
725 count: Number of objects
726 unpacked_objects: Iterator over unpacked objects
727 progress: Optional progress callback
728 """
729 raise NotImplementedError(self.add_pack_data)
731 def get_unpacked_object(
732 self, sha1: bytes, *, include_comp: bool = False
733 ) -> "UnpackedObject":
734 """Get a raw unresolved object.
736 Args:
737 sha1: SHA-1 hash of the object
738 include_comp: Whether to include compressed data
740 Returns:
741 UnpackedObject instance
742 """
743 from .pack import UnpackedObject
745 obj = self[sha1]
746 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks())
748 def iterobjects_subset(
749 self, shas: Iterable[bytes], *, allow_missing: bool = False
750 ) -> Iterator[ShaFile]:
751 """Iterate over a subset of objects.
753 Args:
754 shas: Iterable of object SHAs to retrieve
755 allow_missing: If True, skip missing objects
757 Returns:
758 Iterator of ShaFile objects
759 """
760 for sha in shas:
761 try:
762 yield self[sha]
763 except KeyError:
764 if not allow_missing:
765 raise
768class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
769 """Object store that uses pack files for storage.
771 This class provides a base implementation for object stores that use
772 Git pack files as their primary storage mechanism. It handles caching
773 of open pack files and provides configuration for pack file operations.
774 """
776 def __init__(
777 self,
778 pack_compression_level: int = -1,
779 pack_index_version: int | None = None,
780 pack_delta_window_size: int | None = None,
781 pack_window_memory: int | None = None,
782 pack_delta_cache_size: int | None = None,
783 pack_depth: int | None = None,
784 pack_threads: int | None = None,
785 pack_big_file_threshold: int | None = None,
786 ) -> None:
787 """Initialize a PackBasedObjectStore.
789 Args:
790 pack_compression_level: Compression level for pack files (-1 to 9)
791 pack_index_version: Pack index version to use
792 pack_delta_window_size: Window size for delta compression
793 pack_window_memory: Maximum memory to use for delta window
794 pack_delta_cache_size: Cache size for delta operations
795 pack_depth: Maximum depth for pack deltas
796 pack_threads: Number of threads to use for packing
797 pack_big_file_threshold: Threshold for treating files as "big"
798 """
799 self._pack_cache: dict[str, Pack] = {}
800 self.pack_compression_level = pack_compression_level
801 self.pack_index_version = pack_index_version
802 self.pack_delta_window_size = pack_delta_window_size
803 self.pack_window_memory = pack_window_memory
804 self.pack_delta_cache_size = pack_delta_cache_size
805 self.pack_depth = pack_depth
806 self.pack_threads = pack_threads
807 self.pack_big_file_threshold = pack_big_file_threshold
809 def get_reachability_provider(
810 self,
811 prefer_bitmaps: bool = True,
812 ) -> ObjectReachabilityProvider:
813 """Get the best reachability provider for the object store.
815 Args:
816 object_store: The object store to query
817 prefer_bitmaps: Whether to use bitmaps if available
819 Returns:
820 ObjectReachabilityProvider implementation (either bitmap-accelerated
821 or graph traversal)
822 """
823 if prefer_bitmaps:
824 # Check if any packs have bitmaps
825 has_bitmap = False
826 for pack in self.packs:
827 try:
828 # Try to access bitmap property
829 if pack.bitmap is not None:
830 has_bitmap = True
831 break
832 except FileNotFoundError:
833 # Bitmap file doesn't exist for this pack
834 continue
836 if has_bitmap:
837 return BitmapReachability(self)
839 # Fall back to graph traversal
840 return GraphTraversalReachability(self)
842 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
843 """Add a new pack to this object store."""
844 raise NotImplementedError(self.add_pack)
846 def add_pack_data(
847 self,
848 count: int,
849 unpacked_objects: Iterator[UnpackedObject],
850 progress: Callable[..., None] | None = None,
851 ) -> "Pack | None":
852 """Add pack data to this object store.
854 Args:
855 count: Number of items to add
856 unpacked_objects: Iterator of UnpackedObject instances
857 progress: Optional progress callback
858 """
859 if count == 0:
860 # Don't bother writing an empty pack file
861 return None
862 f, commit, abort = self.add_pack()
863 try:
864 write_pack_data(
865 f.write,
866 unpacked_objects,
867 num_records=count,
868 progress=progress,
869 compression_level=self.pack_compression_level,
870 )
871 except BaseException:
872 abort()
873 raise
874 else:
875 return commit()
877 @property
878 def alternates(self) -> list["BaseObjectStore"]:
879 """Return list of alternate object stores."""
880 return []
882 def contains_packed(self, sha: bytes) -> bool:
883 """Check if a particular object is present by SHA1 and is packed.
885 This does not check alternates.
886 """
887 for pack in self.packs:
888 try:
889 if sha in pack:
890 return True
891 except PackFileDisappeared:
892 pass
893 return False
895 def __contains__(self, sha: bytes) -> bool:
896 """Check if a particular object is present by SHA1.
898 This method makes no distinction between loose and packed objects.
899 """
900 if self.contains_packed(sha) or self.contains_loose(sha):
901 return True
902 for alternate in self.alternates:
903 if sha in alternate:
904 return True
905 return False
907 def _add_cached_pack(self, base_name: str, pack: Pack) -> None:
908 """Add a newly appeared pack to the cache by path."""
909 prev_pack = self._pack_cache.get(base_name)
910 if prev_pack is not pack:
911 self._pack_cache[base_name] = pack
912 if prev_pack:
913 prev_pack.close()
915 def generate_pack_data(
916 self,
917 have: Iterable[bytes],
918 want: Iterable[bytes],
919 *,
920 shallow: Set[bytes] | None = None,
921 progress: Callable[..., None] | None = None,
922 ofs_delta: bool = True,
923 ) -> tuple[int, Iterator[UnpackedObject]]:
924 """Generate pack data objects for a set of wants/haves.
926 Args:
927 have: List of SHA1s of objects that should not be sent
928 want: List of SHA1s of objects that should be sent
929 shallow: Set of shallow commit SHA1s to skip
930 ofs_delta: Whether OFS deltas can be included
931 progress: Optional progress reporting method
932 """
933 missing_objects = MissingObjectFinder(
934 self, haves=have, wants=want, shallow=shallow, progress=progress
935 )
936 remote_has = missing_objects.get_remote_has()
937 object_ids = list(missing_objects)
938 return len(object_ids), generate_unpacked_objects(
939 self,
940 object_ids,
941 progress=progress,
942 ofs_delta=ofs_delta,
943 other_haves=remote_has,
944 )
946 def _clear_cached_packs(self) -> None:
947 pack_cache = self._pack_cache
948 self._pack_cache = {}
949 while pack_cache:
950 (_name, pack) = pack_cache.popitem()
951 pack.close()
953 def _iter_cached_packs(self) -> Iterator[Pack]:
954 return iter(self._pack_cache.values())
956 def _update_pack_cache(self) -> list[Pack]:
957 raise NotImplementedError(self._update_pack_cache)
959 def close(self) -> None:
960 """Close the object store and release resources.
962 This method closes all cached pack files and frees associated resources.
963 """
964 self._clear_cached_packs()
966 @property
967 def packs(self) -> list[Pack]:
968 """List with pack objects."""
969 return list(self._iter_cached_packs()) + list(self._update_pack_cache())
971 def count_pack_files(self) -> int:
972 """Count the number of pack files.
974 Returns:
975 Number of pack files (excluding those with .keep files)
976 """
977 count = 0
978 for pack in self.packs:
979 # Check if there's a .keep file for this pack
980 keep_path = pack._basename + ".keep"
981 if not os.path.exists(keep_path):
982 count += 1
983 return count
985 def _iter_alternate_objects(self) -> Iterator[bytes]:
986 """Iterate over the SHAs of all the objects in alternate stores."""
987 for alternate in self.alternates:
988 yield from alternate
990 def _iter_loose_objects(self) -> Iterator[bytes]:
991 """Iterate over the SHAs of all loose objects."""
992 raise NotImplementedError(self._iter_loose_objects)
994 def _get_loose_object(self, sha: bytes) -> ShaFile | None:
995 raise NotImplementedError(self._get_loose_object)
997 def delete_loose_object(self, sha: bytes) -> None:
998 """Delete a loose object.
1000 This method only handles loose objects. For packed objects,
1001 use repack(exclude=...) to exclude them during repacking.
1002 """
1003 raise NotImplementedError(self.delete_loose_object)
1005 def _remove_pack(self, pack: "Pack") -> None:
1006 raise NotImplementedError(self._remove_pack)
1008 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int:
1009 """Pack loose objects.
1011 Args:
1012 progress: Optional progress reporting callback
1014 Returns: Number of objects packed
1015 """
1016 objects: list[tuple[ShaFile, None]] = []
1017 for sha in self._iter_loose_objects():
1018 obj = self._get_loose_object(sha)
1019 if obj is not None:
1020 objects.append((obj, None))
1021 self.add_objects(objects, progress=progress)
1022 for obj, path in objects:
1023 self.delete_loose_object(obj.id)
1024 return len(objects)
1026 def repack(
1027 self,
1028 exclude: Set[bytes] | None = None,
1029 progress: Callable[[str], None] | None = None,
1030 ) -> int:
1031 """Repack the packs in this repository.
1033 Note that this implementation is fairly naive and currently keeps all
1034 objects in memory while it repacks.
1036 Args:
1037 exclude: Optional set of object SHAs to exclude from repacking
1038 progress: Optional progress reporting callback
1039 """
1040 if exclude is None:
1041 exclude = set()
1043 loose_objects = set()
1044 excluded_loose_objects = set()
1045 for sha in self._iter_loose_objects():
1046 if sha not in exclude:
1047 obj = self._get_loose_object(sha)
1048 if obj is not None:
1049 loose_objects.add(obj)
1050 else:
1051 excluded_loose_objects.add(sha)
1053 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects}
1054 old_packs = {p.name(): p for p in self.packs}
1055 for name, pack in old_packs.items():
1056 objects.update(
1057 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude
1058 )
1060 # Only create a new pack if there are objects to pack
1061 if objects:
1062 # The name of the consolidated pack might match the name of a
1063 # pre-existing pack. Take care not to remove the newly created
1064 # consolidated pack.
1065 consolidated = self.add_objects(list(objects), progress=progress)
1066 if consolidated is not None:
1067 old_packs.pop(consolidated.name(), None)
1069 # Delete loose objects that were packed
1070 for obj in loose_objects:
1071 if obj is not None:
1072 self.delete_loose_object(obj.id)
1073 # Delete excluded loose objects
1074 for sha in excluded_loose_objects:
1075 self.delete_loose_object(sha)
1076 for name, pack in old_packs.items():
1077 self._remove_pack(pack)
1078 self._update_pack_cache()
1079 return len(objects)
1081 def generate_pack_bitmaps(
1082 self,
1083 refs: dict[bytes, bytes],
1084 *,
1085 commit_interval: int | None = None,
1086 progress: Callable[[str], None] | None = None,
1087 ) -> int:
1088 """Generate bitmap indexes for all packs that don't have them.
1090 This generates .bitmap files for packfiles, enabling fast reachability
1091 queries. Equivalent to the bitmap generation part of 'git repack -b'.
1093 Args:
1094 refs: Dictionary of ref names to commit SHAs
1095 commit_interval: Include every Nth commit in bitmap index (None for default)
1096 progress: Optional progress reporting callback
1098 Returns:
1099 Number of bitmaps generated
1100 """
1101 count = 0
1102 for pack in self.packs:
1103 pack.ensure_bitmap(
1104 self, refs, commit_interval=commit_interval, progress=progress
1105 )
1106 count += 1
1108 # Update cache to pick up new bitmaps
1109 self._update_pack_cache()
1111 return count
1113 def __iter__(self) -> Iterator[bytes]:
1114 """Iterate over the SHAs that are present in this store."""
1115 self._update_pack_cache()
1116 for pack in self._iter_cached_packs():
1117 try:
1118 yield from pack
1119 except PackFileDisappeared:
1120 pass
1121 yield from self._iter_loose_objects()
1122 yield from self._iter_alternate_objects()
1124 def contains_loose(self, sha: bytes) -> bool:
1125 """Check if a particular object is present by SHA1 and is loose.
1127 This does not check alternates.
1128 """
1129 return self._get_loose_object(sha) is not None
1131 def get_raw(self, name: bytes) -> tuple[int, bytes]:
1132 """Obtain the raw fulltext for an object.
1134 Args:
1135 name: sha for the object.
1136 Returns: tuple with numeric type and object contents.
1137 """
1138 if name == ZERO_SHA:
1139 raise KeyError(name)
1140 if len(name) == 40:
1141 sha = hex_to_sha(name)
1142 hexsha = name
1143 elif len(name) == 20:
1144 sha = name
1145 hexsha = None
1146 else:
1147 raise AssertionError(f"Invalid object name {name!r}")
1148 for pack in self._iter_cached_packs():
1149 try:
1150 return pack.get_raw(sha)
1151 except (KeyError, PackFileDisappeared):
1152 pass
1153 if hexsha is None:
1154 hexsha = sha_to_hex(name)
1155 ret = self._get_loose_object(hexsha)
1156 if ret is not None:
1157 return ret.type_num, ret.as_raw_string()
1158 # Maybe something else has added a pack with the object
1159 # in the mean time?
1160 for pack in self._update_pack_cache():
1161 try:
1162 return pack.get_raw(sha)
1163 except KeyError:
1164 pass
1165 for alternate in self.alternates:
1166 try:
1167 return alternate.get_raw(hexsha)
1168 except KeyError:
1169 pass
1170 raise KeyError(hexsha)
1172 def iter_unpacked_subset(
1173 self,
1174 shas: Iterable[bytes],
1175 include_comp: bool = False,
1176 allow_missing: bool = False,
1177 convert_ofs_delta: bool = True,
1178 ) -> Iterator[UnpackedObject]:
1179 """Iterate over a subset of objects, yielding UnpackedObject instances.
1181 Args:
1182 shas: Set of object SHAs to retrieve
1183 include_comp: Whether to include compressed data
1184 allow_missing: If True, skip missing objects; if False, raise KeyError
1185 convert_ofs_delta: Whether to convert OFS_DELTA objects
1187 Returns:
1188 Iterator of UnpackedObject instances
1190 Raises:
1191 KeyError: If an object is missing and allow_missing is False
1192 """
1193 todo: set[bytes] = set(shas)
1194 for p in self._iter_cached_packs():
1195 for unpacked in p.iter_unpacked_subset(
1196 todo,
1197 include_comp=include_comp,
1198 allow_missing=True,
1199 convert_ofs_delta=convert_ofs_delta,
1200 ):
1201 yield unpacked
1202 hexsha = sha_to_hex(unpacked.sha())
1203 todo.remove(hexsha)
1204 # Maybe something else has added a pack with the object
1205 # in the mean time?
1206 for p in self._update_pack_cache():
1207 for unpacked in p.iter_unpacked_subset(
1208 todo,
1209 include_comp=include_comp,
1210 allow_missing=True,
1211 convert_ofs_delta=convert_ofs_delta,
1212 ):
1213 yield unpacked
1214 hexsha = sha_to_hex(unpacked.sha())
1215 todo.remove(hexsha)
1216 for alternate in self.alternates:
1217 assert isinstance(alternate, PackBasedObjectStore)
1218 for unpacked in alternate.iter_unpacked_subset(
1219 todo,
1220 include_comp=include_comp,
1221 allow_missing=True,
1222 convert_ofs_delta=convert_ofs_delta,
1223 ):
1224 yield unpacked
1225 hexsha = sha_to_hex(unpacked.sha())
1226 todo.remove(hexsha)
1228 def iterobjects_subset(
1229 self, shas: Iterable[bytes], *, allow_missing: bool = False
1230 ) -> Iterator[ShaFile]:
1231 """Iterate over a subset of objects in the store.
1233 This method searches for objects in pack files, alternates, and loose storage.
1235 Args:
1236 shas: Iterable of object SHAs to retrieve
1237 allow_missing: If True, skip missing objects; if False, raise KeyError
1239 Returns:
1240 Iterator of ShaFile objects
1242 Raises:
1243 KeyError: If an object is missing and allow_missing is False
1244 """
1245 todo: set[bytes] = set(shas)
1246 for p in self._iter_cached_packs():
1247 for o in p.iterobjects_subset(todo, allow_missing=True):
1248 yield o
1249 todo.remove(o.id)
1250 # Maybe something else has added a pack with the object
1251 # in the mean time?
1252 for p in self._update_pack_cache():
1253 for o in p.iterobjects_subset(todo, allow_missing=True):
1254 yield o
1255 todo.remove(o.id)
1256 for alternate in self.alternates:
1257 for o in alternate.iterobjects_subset(todo, allow_missing=True):
1258 yield o
1259 todo.remove(o.id)
1260 for oid in todo:
1261 loose_obj: ShaFile | None = self._get_loose_object(oid)
1262 if loose_obj is not None:
1263 yield loose_obj
1264 elif not allow_missing:
1265 raise KeyError(oid)
1267 def get_unpacked_object(
1268 self, sha1: bytes, *, include_comp: bool = False
1269 ) -> UnpackedObject:
1270 """Obtain the unpacked object.
1272 Args:
1273 sha1: sha for the object.
1274 include_comp: Whether to include compression metadata.
1275 """
1276 if sha1 == ZERO_SHA:
1277 raise KeyError(sha1)
1278 if len(sha1) == 40:
1279 sha = hex_to_sha(sha1)
1280 hexsha = sha1
1281 elif len(sha1) == 20:
1282 sha = sha1
1283 hexsha = None
1284 else:
1285 raise AssertionError(f"Invalid object sha1 {sha1!r}")
1286 for pack in self._iter_cached_packs():
1287 try:
1288 return pack.get_unpacked_object(sha, include_comp=include_comp)
1289 except (KeyError, PackFileDisappeared):
1290 pass
1291 if hexsha is None:
1292 hexsha = sha_to_hex(sha1)
1293 # Maybe something else has added a pack with the object
1294 # in the mean time?
1295 for pack in self._update_pack_cache():
1296 try:
1297 return pack.get_unpacked_object(sha, include_comp=include_comp)
1298 except KeyError:
1299 pass
1300 for alternate in self.alternates:
1301 assert isinstance(alternate, PackBasedObjectStore)
1302 try:
1303 return alternate.get_unpacked_object(hexsha, include_comp=include_comp)
1304 except KeyError:
1305 pass
1306 raise KeyError(hexsha)
1308 def add_objects(
1309 self,
1310 objects: Sequence[tuple[ShaFile, str | None]],
1311 progress: Callable[[str], None] | None = None,
1312 ) -> "Pack | None":
1313 """Add a set of objects to this object store.
1315 Args:
1316 objects: Iterable over (object, path) tuples, should support
1317 __len__.
1318 progress: Optional progress reporting function.
1319 Returns: Pack object of the objects written.
1320 """
1321 count = len(objects)
1322 record_iter = (full_unpacked_object(o) for (o, p) in objects)
1323 return self.add_pack_data(count, record_iter, progress=progress)
1326class DiskObjectStore(PackBasedObjectStore):
1327 """Git-style object store that exists on disk."""
1329 path: str | os.PathLike[str]
1330 pack_dir: str | os.PathLike[str]
1331 _alternates: "list[BaseObjectStore] | None"
1332 _commit_graph: "CommitGraph | None"
1334 def __init__(
1335 self,
1336 path: str | os.PathLike[str],
1337 *,
1338 loose_compression_level: int = -1,
1339 pack_compression_level: int = -1,
1340 pack_index_version: int | None = None,
1341 pack_delta_window_size: int | None = None,
1342 pack_window_memory: int | None = None,
1343 pack_delta_cache_size: int | None = None,
1344 pack_depth: int | None = None,
1345 pack_threads: int | None = None,
1346 pack_big_file_threshold: int | None = None,
1347 fsync_object_files: bool = False,
1348 pack_write_bitmaps: bool = False,
1349 pack_write_bitmap_hash_cache: bool = True,
1350 pack_write_bitmap_lookup_table: bool = True,
1351 file_mode: int | None = None,
1352 dir_mode: int | None = None,
1353 ) -> None:
1354 """Open an object store.
1356 Args:
1357 path: Path of the object store.
1358 loose_compression_level: zlib compression level for loose objects
1359 pack_compression_level: zlib compression level for pack objects
1360 pack_index_version: pack index version to use (1, 2, or 3)
1361 pack_delta_window_size: sliding window size for delta compression
1362 pack_window_memory: memory limit for delta window operations
1363 pack_delta_cache_size: size of cache for delta operations
1364 pack_depth: maximum delta chain depth
1365 pack_threads: number of threads for pack operations
1366 pack_big_file_threshold: threshold for treating files as big
1367 fsync_object_files: whether to fsync object files for durability
1368 pack_write_bitmaps: whether to write bitmap indexes for packs
1369 pack_write_bitmap_hash_cache: whether to include name-hash cache in bitmaps
1370 pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps
1371 file_mode: File permission mask for shared repository
1372 dir_mode: Directory permission mask for shared repository
1373 """
1374 super().__init__(
1375 pack_compression_level=pack_compression_level,
1376 pack_index_version=pack_index_version,
1377 pack_delta_window_size=pack_delta_window_size,
1378 pack_window_memory=pack_window_memory,
1379 pack_delta_cache_size=pack_delta_cache_size,
1380 pack_depth=pack_depth,
1381 pack_threads=pack_threads,
1382 pack_big_file_threshold=pack_big_file_threshold,
1383 )
1384 self.path = path
1385 self.pack_dir = os.path.join(self.path, PACKDIR)
1386 self._alternates = None
1387 self.loose_compression_level = loose_compression_level
1388 self.pack_compression_level = pack_compression_level
1389 self.pack_index_version = pack_index_version
1390 self.fsync_object_files = fsync_object_files
1391 self.pack_write_bitmaps = pack_write_bitmaps
1392 self.pack_write_bitmap_hash_cache = pack_write_bitmap_hash_cache
1393 self.pack_write_bitmap_lookup_table = pack_write_bitmap_lookup_table
1394 self.file_mode = file_mode
1395 self.dir_mode = dir_mode
1397 # Commit graph support - lazy loaded
1398 self._commit_graph = None
1399 self._use_commit_graph = True # Default to true
1401 def __repr__(self) -> str:
1402 """Return string representation of DiskObjectStore.
1404 Returns:
1405 String representation including the store path
1406 """
1407 return f"<{self.__class__.__name__}({self.path!r})>"
1409 @classmethod
1410 def from_config(
1411 cls,
1412 path: str | os.PathLike[str],
1413 config: "Config",
1414 *,
1415 file_mode: int | None = None,
1416 dir_mode: int | None = None,
1417 ) -> "DiskObjectStore":
1418 """Create a DiskObjectStore from a configuration object.
1420 Args:
1421 path: Path to the object store directory
1422 config: Configuration object to read settings from
1423 file_mode: Optional file permission mask for shared repository
1424 dir_mode: Optional directory permission mask for shared repository
1426 Returns:
1427 New DiskObjectStore instance configured according to config
1428 """
1429 try:
1430 default_compression_level = int(
1431 config.get((b"core",), b"compression").decode()
1432 )
1433 except KeyError:
1434 default_compression_level = -1
1435 try:
1436 loose_compression_level = int(
1437 config.get((b"core",), b"looseCompression").decode()
1438 )
1439 except KeyError:
1440 loose_compression_level = default_compression_level
1441 try:
1442 pack_compression_level = int(
1443 config.get((b"core",), "packCompression").decode()
1444 )
1445 except KeyError:
1446 pack_compression_level = default_compression_level
1447 try:
1448 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode())
1449 except KeyError:
1450 pack_index_version = None
1452 # Read pack configuration options
1453 try:
1454 pack_delta_window_size = int(
1455 config.get((b"pack",), b"deltaWindowSize").decode()
1456 )
1457 except KeyError:
1458 pack_delta_window_size = None
1459 try:
1460 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode())
1461 except KeyError:
1462 pack_window_memory = None
1463 try:
1464 pack_delta_cache_size = int(
1465 config.get((b"pack",), b"deltaCacheSize").decode()
1466 )
1467 except KeyError:
1468 pack_delta_cache_size = None
1469 try:
1470 pack_depth = int(config.get((b"pack",), b"depth").decode())
1471 except KeyError:
1472 pack_depth = None
1473 try:
1474 pack_threads = int(config.get((b"pack",), b"threads").decode())
1475 except KeyError:
1476 pack_threads = None
1477 try:
1478 pack_big_file_threshold = int(
1479 config.get((b"pack",), b"bigFileThreshold").decode()
1480 )
1481 except KeyError:
1482 pack_big_file_threshold = None
1484 # Read core.commitGraph setting
1485 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)
1487 # Read core.fsyncObjectFiles setting
1488 fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False)
1490 # Read bitmap settings
1491 pack_write_bitmaps = config.get_boolean((b"pack",), b"writeBitmaps", False)
1492 pack_write_bitmap_hash_cache = config.get_boolean(
1493 (b"pack",), b"writeBitmapHashCache", True
1494 )
1495 pack_write_bitmap_lookup_table = config.get_boolean(
1496 (b"pack",), b"writeBitmapLookupTable", True
1497 )
1498 # Also check repack.writeBitmaps for backwards compatibility
1499 if not pack_write_bitmaps:
1500 pack_write_bitmaps = config.get_boolean(
1501 (b"repack",), b"writeBitmaps", False
1502 )
1504 instance = cls(
1505 path,
1506 loose_compression_level=loose_compression_level,
1507 pack_compression_level=pack_compression_level,
1508 pack_index_version=pack_index_version,
1509 pack_delta_window_size=pack_delta_window_size,
1510 pack_window_memory=pack_window_memory,
1511 pack_delta_cache_size=pack_delta_cache_size,
1512 pack_depth=pack_depth,
1513 pack_threads=pack_threads,
1514 pack_big_file_threshold=pack_big_file_threshold,
1515 fsync_object_files=fsync_object_files,
1516 pack_write_bitmaps=pack_write_bitmaps,
1517 pack_write_bitmap_hash_cache=pack_write_bitmap_hash_cache,
1518 pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table,
1519 file_mode=file_mode,
1520 dir_mode=dir_mode,
1521 )
1522 instance._use_commit_graph = use_commit_graph
1523 return instance
1525 @property
1526 def alternates(self) -> list["BaseObjectStore"]:
1527 """Get the list of alternate object stores.
1529 Reads from .git/objects/info/alternates if not already cached.
1531 Returns:
1532 List of DiskObjectStore instances for alternate object directories
1533 """
1534 if self._alternates is not None:
1535 return self._alternates
1536 self._alternates = []
1537 for path in self._read_alternate_paths():
1538 self._alternates.append(DiskObjectStore(path))
1539 return self._alternates
1541 def _read_alternate_paths(self) -> Iterator[str]:
1542 try:
1543 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb")
1544 except FileNotFoundError:
1545 return
1546 with f:
1547 for line in f.readlines():
1548 line = line.rstrip(b"\n")
1549 if line.startswith(b"#"):
1550 continue
1551 if os.path.isabs(line):
1552 yield os.fsdecode(line)
1553 else:
1554 yield os.fsdecode(os.path.join(os.fsencode(self.path), line))
1556 def add_alternate_path(self, path: str | os.PathLike[str]) -> None:
1557 """Add an alternate path to this object store."""
1558 info_dir = os.path.join(self.path, INFODIR)
1559 try:
1560 os.mkdir(info_dir)
1561 if self.dir_mode is not None:
1562 os.chmod(info_dir, self.dir_mode)
1563 except FileExistsError:
1564 pass
1565 alternates_path = os.path.join(self.path, INFODIR, "alternates")
1566 mask = self.file_mode if self.file_mode is not None else 0o644
1567 with GitFile(alternates_path, "wb", mask=mask) as f:
1568 try:
1569 orig_f = open(alternates_path, "rb")
1570 except FileNotFoundError:
1571 pass
1572 else:
1573 with orig_f:
1574 f.write(orig_f.read())
1575 f.write(os.fsencode(path) + b"\n")
1577 if not os.path.isabs(path):
1578 path = os.path.join(self.path, path)
1579 self.alternates.append(DiskObjectStore(path))
1581 def _update_pack_cache(self) -> list[Pack]:
1582 """Read and iterate over new pack files and cache them."""
1583 try:
1584 pack_dir_contents = os.listdir(self.pack_dir)
1585 except FileNotFoundError:
1586 self.close()
1587 return []
1588 pack_files = set()
1589 for name in pack_dir_contents:
1590 if name.startswith("pack-") and name.endswith(".pack"):
1591 # verify that idx exists first (otherwise the pack was not yet
1592 # fully written)
1593 idx_name = os.path.splitext(name)[0] + ".idx"
1594 if idx_name in pack_dir_contents:
1595 pack_name = name[: -len(".pack")]
1596 pack_files.add(pack_name)
1598 # Open newly appeared pack files
1599 new_packs = []
1600 for f in pack_files:
1601 if f not in self._pack_cache:
1602 pack = Pack(
1603 os.path.join(self.pack_dir, f),
1604 delta_window_size=self.pack_delta_window_size,
1605 window_memory=self.pack_window_memory,
1606 delta_cache_size=self.pack_delta_cache_size,
1607 depth=self.pack_depth,
1608 threads=self.pack_threads,
1609 big_file_threshold=self.pack_big_file_threshold,
1610 )
1611 new_packs.append(pack)
1612 self._pack_cache[f] = pack
1613 # Remove disappeared pack files
1614 for f in set(self._pack_cache) - pack_files:
1615 self._pack_cache.pop(f).close()
1616 return new_packs
1618 def _get_shafile_path(self, sha: bytes) -> str:
1619 # Check from object dir
1620 return hex_to_filename(os.fspath(self.path), sha)
1622 def _iter_loose_objects(self) -> Iterator[bytes]:
1623 for base in os.listdir(self.path):
1624 if len(base) != 2:
1625 continue
1626 for rest in os.listdir(os.path.join(self.path, base)):
1627 sha = os.fsencode(base + rest)
1628 if not valid_hexsha(sha):
1629 continue
1630 yield sha
1632 def count_loose_objects(self) -> int:
1633 """Count the number of loose objects in the object store.
1635 Returns:
1636 Number of loose objects
1637 """
1638 count = 0
1639 if not os.path.exists(self.path):
1640 return 0
1642 for i in range(256):
1643 subdir = os.path.join(self.path, f"{i:02x}")
1644 try:
1645 count += len(
1646 [
1647 name
1648 for name in os.listdir(subdir)
1649 if len(name) == 38 # 40 - 2 for the prefix
1650 ]
1651 )
1652 except FileNotFoundError:
1653 # Directory may have been removed or is inaccessible
1654 continue
1656 return count
1658 def _get_loose_object(self, sha: bytes) -> ShaFile | None:
1659 path = self._get_shafile_path(sha)
1660 try:
1661 return ShaFile.from_path(path)
1662 except FileNotFoundError:
1663 return None
1665 def delete_loose_object(self, sha: bytes) -> None:
1666 """Delete a loose object from disk.
1668 Args:
1669 sha: SHA1 of the object to delete
1671 Raises:
1672 FileNotFoundError: If the object file doesn't exist
1673 """
1674 os.remove(self._get_shafile_path(sha))
1676 def get_object_mtime(self, sha: bytes) -> float:
1677 """Get the modification time of an object.
1679 Args:
1680 sha: SHA1 of the object
1682 Returns:
1683 Modification time as seconds since epoch
1685 Raises:
1686 KeyError: if the object is not found
1687 """
1688 # First check if it's a loose object
1689 if self.contains_loose(sha):
1690 path = self._get_shafile_path(sha)
1691 try:
1692 return os.path.getmtime(path)
1693 except FileNotFoundError:
1694 pass
1696 # Check if it's in a pack file
1697 for pack in self.packs:
1698 try:
1699 if sha in pack:
1700 # Use the pack file's mtime for packed objects
1701 pack_path = pack._data_path
1702 try:
1703 return os.path.getmtime(pack_path)
1704 except (FileNotFoundError, AttributeError):
1705 pass
1706 except PackFileDisappeared:
1707 pass
1709 raise KeyError(sha)
1711 def _remove_pack(self, pack: Pack) -> None:
1712 try:
1713 del self._pack_cache[os.path.basename(pack._basename)]
1714 except KeyError:
1715 pass
1716 pack.close()
1717 os.remove(pack.data.path)
1718 if hasattr(pack.index, "path"):
1719 os.remove(pack.index.path)
1721 def _get_pack_basepath(
1722 self, entries: Iterable[tuple[bytes, int, int | None]]
1723 ) -> str:
1724 suffix_bytes = iter_sha1(entry[0] for entry in entries)
1725 # TODO: Handle self.pack_dir being bytes
1726 suffix = suffix_bytes.decode("ascii")
1727 return os.path.join(self.pack_dir, "pack-" + suffix)
1729 def _complete_pack(
1730 self,
1731 f: BinaryIO,
1732 path: str,
1733 num_objects: int,
1734 indexer: PackIndexer,
1735 progress: Callable[..., None] | None = None,
1736 refs: dict[bytes, bytes] | None = None,
1737 ) -> Pack:
1738 """Move a specific file containing a pack into the pack directory.
1740 Note: The file should be on the same file system as the
1741 packs directory.
1743 Args:
1744 f: Open file object for the pack.
1745 path: Path to the pack file.
1746 num_objects: Number of objects in the pack.
1747 indexer: A PackIndexer for indexing the pack.
1748 progress: Optional progress reporting function.
1749 refs: Optional dictionary of refs for bitmap generation.
1750 """
1751 entries = []
1752 for i, entry in enumerate(indexer):
1753 if progress is not None:
1754 progress(f"generating index: {i}/{num_objects}\r".encode("ascii"))
1755 entries.append(entry)
1757 pack_sha, extra_entries = extend_pack(
1758 f,
1759 set(indexer.ext_refs()),
1760 get_raw=self.get_raw,
1761 compression_level=self.pack_compression_level,
1762 progress=progress,
1763 )
1764 f.flush()
1765 if self.fsync_object_files:
1766 try:
1767 fileno = f.fileno()
1768 except AttributeError as e:
1769 raise OSError("fsync requested but file has no fileno()") from e
1770 else:
1771 os.fsync(fileno)
1772 f.close()
1774 entries.extend(extra_entries)
1776 # Move the pack in.
1777 entries.sort()
1778 pack_base_name = self._get_pack_basepath(entries)
1780 for pack in self.packs:
1781 if pack._basename == pack_base_name:
1782 return pack
1784 target_pack_path = pack_base_name + ".pack"
1785 target_index_path = pack_base_name + ".idx"
1786 if sys.platform == "win32":
1787 # Windows might have the target pack file lingering. Attempt
1788 # removal, silently passing if the target does not exist.
1789 with suppress(FileNotFoundError):
1790 os.remove(target_pack_path)
1791 os.rename(path, target_pack_path)
1793 # Write the index.
1794 mask = self.file_mode if self.file_mode is not None else PACK_MODE
1795 with GitFile(
1796 target_index_path,
1797 "wb",
1798 mask=mask,
1799 fsync=self.fsync_object_files,
1800 ) as index_file:
1801 write_pack_index(
1802 index_file, entries, pack_sha, version=self.pack_index_version
1803 )
1805 # Generate bitmap if configured and refs are available
1806 if self.pack_write_bitmaps and refs:
1807 from .bitmap import generate_bitmap, write_bitmap
1808 from .pack import load_pack_index_file
1810 if progress:
1811 progress("Generating bitmap index\r".encode("ascii"))
1813 # Load the index we just wrote
1814 with open(target_index_path, "rb") as idx_file:
1815 pack_index = load_pack_index_file(
1816 os.path.basename(target_index_path), idx_file
1817 )
1819 # Generate the bitmap
1820 bitmap = generate_bitmap(
1821 pack_index=pack_index,
1822 object_store=self,
1823 refs=refs,
1824 pack_checksum=pack_sha,
1825 include_hash_cache=self.pack_write_bitmap_hash_cache,
1826 include_lookup_table=self.pack_write_bitmap_lookup_table,
1827 progress=lambda msg: progress(msg.encode("ascii"))
1828 if progress and isinstance(msg, str)
1829 else None,
1830 )
1832 # Write the bitmap
1833 target_bitmap_path = pack_base_name + ".bitmap"
1834 write_bitmap(target_bitmap_path, bitmap)
1836 if progress:
1837 progress("Bitmap index written\r".encode("ascii"))
1839 # Add the pack to the store and return it.
1840 final_pack = Pack(
1841 pack_base_name,
1842 delta_window_size=self.pack_delta_window_size,
1843 window_memory=self.pack_window_memory,
1844 delta_cache_size=self.pack_delta_cache_size,
1845 depth=self.pack_depth,
1846 threads=self.pack_threads,
1847 big_file_threshold=self.pack_big_file_threshold,
1848 )
1849 final_pack.check_length_and_checksum()
1850 self._add_cached_pack(pack_base_name, final_pack)
1851 return final_pack
1853 def add_thin_pack(
1854 self,
1855 read_all: Callable[[int], bytes],
1856 read_some: Callable[[int], bytes] | None,
1857 progress: Callable[..., None] | None = None,
1858 ) -> "Pack":
1859 """Add a new thin pack to this object store.
1861 Thin packs are packs that contain deltas with parents that exist
1862 outside the pack. They should never be placed in the object store
1863 directly, and always indexed and completed as they are copied.
1865 Args:
1866 read_all: Read function that blocks until the number of
1867 requested bytes are read.
1868 read_some: Read function that returns at least one byte, but may
1869 not return the number of bytes requested.
1870 progress: Optional progress reporting function.
1871 Returns: A Pack object pointing at the now-completed thin pack in the
1872 objects/pack directory.
1873 """
1874 import tempfile
1876 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_")
1877 with os.fdopen(fd, "w+b") as f:
1878 os.chmod(path, PACK_MODE)
1879 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) # type: ignore[arg-type]
1880 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) # type: ignore[arg-type]
1881 copier.verify(progress=progress)
1882 return self._complete_pack(f, path, len(copier), indexer, progress=progress)
1884 def add_pack(
1885 self,
1886 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
1887 """Add a new pack to this object store.
1889 Returns: Fileobject to write to, a commit function to
1890 call when the pack is finished and an abort
1891 function.
1892 """
1893 import tempfile
1895 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
1896 f = os.fdopen(fd, "w+b")
1897 mask = self.file_mode if self.file_mode is not None else PACK_MODE
1898 os.chmod(path, mask)
1900 def commit() -> "Pack | None":
1901 if f.tell() > 0:
1902 f.seek(0)
1904 with PackData(path, f) as pd:
1905 indexer = PackIndexer.for_pack_data(
1906 pd,
1907 resolve_ext_ref=self.get_raw, # type: ignore[arg-type]
1908 )
1909 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type]
1910 else:
1911 f.close()
1912 os.remove(path)
1913 return None
1915 def abort() -> None:
1916 f.close()
1917 os.remove(path)
1919 return f, commit, abort # type: ignore[return-value]
1921 def add_object(self, obj: ShaFile) -> None:
1922 """Add a single object to this object store.
1924 Args:
1925 obj: Object to add
1926 """
1927 path = self._get_shafile_path(obj.id)
1928 dir = os.path.dirname(path)
1929 try:
1930 os.mkdir(dir)
1931 if self.dir_mode is not None:
1932 os.chmod(dir, self.dir_mode)
1933 except FileExistsError:
1934 pass
1935 if os.path.exists(path):
1936 return # Already there, no need to write again
1937 mask = self.file_mode if self.file_mode is not None else PACK_MODE
1938 with GitFile(path, "wb", mask=mask, fsync=self.fsync_object_files) as f:
1939 f.write(
1940 obj.as_legacy_object(compression_level=self.loose_compression_level)
1941 )
1943 @classmethod
1944 def init(
1945 cls,
1946 path: str | os.PathLike[str],
1947 *,
1948 file_mode: int | None = None,
1949 dir_mode: int | None = None,
1950 ) -> "DiskObjectStore":
1951 """Initialize a new disk object store.
1953 Creates the necessary directory structure for a Git object store.
1955 Args:
1956 path: Path where the object store should be created
1957 file_mode: Optional file permission mask for shared repository
1958 dir_mode: Optional directory permission mask for shared repository
1960 Returns:
1961 New DiskObjectStore instance
1962 """
1963 try:
1964 os.mkdir(path)
1965 if dir_mode is not None:
1966 os.chmod(path, dir_mode)
1967 except FileExistsError:
1968 pass
1969 info_path = os.path.join(path, "info")
1970 pack_path = os.path.join(path, PACKDIR)
1971 os.mkdir(info_path)
1972 os.mkdir(pack_path)
1973 if dir_mode is not None:
1974 os.chmod(info_path, dir_mode)
1975 os.chmod(pack_path, dir_mode)
1976 return cls(path, file_mode=file_mode, dir_mode=dir_mode)
1978 def iter_prefix(self, prefix: bytes) -> Iterator[bytes]:
1979 """Iterate over all object SHAs with the given prefix.
1981 Args:
1982 prefix: Hex prefix to search for (as bytes)
1984 Returns:
1985 Iterator of object SHAs (as bytes) matching the prefix
1986 """
1987 if len(prefix) < 2:
1988 yield from super().iter_prefix(prefix)
1989 return
1990 seen = set()
1991 dir = prefix[:2].decode()
1992 rest = prefix[2:].decode()
1993 try:
1994 for name in os.listdir(os.path.join(self.path, dir)):
1995 if name.startswith(rest):
1996 sha = os.fsencode(dir + name)
1997 if sha not in seen:
1998 seen.add(sha)
1999 yield sha
2000 except FileNotFoundError:
2001 pass
2003 for p in self.packs:
2004 bin_prefix = (
2005 binascii.unhexlify(prefix)
2006 if len(prefix) % 2 == 0
2007 else binascii.unhexlify(prefix[:-1])
2008 )
2009 for sha in p.index.iter_prefix(bin_prefix):
2010 sha = sha_to_hex(sha)
2011 if sha.startswith(prefix) and sha not in seen:
2012 seen.add(sha)
2013 yield sha
2014 for alternate in self.alternates:
2015 for sha in alternate.iter_prefix(prefix):
2016 if sha not in seen:
2017 seen.add(sha)
2018 yield sha
2020 def get_commit_graph(self) -> "CommitGraph | None":
2021 """Get the commit graph for this object store.
2023 Returns:
2024 CommitGraph object if available, None otherwise
2025 """
2026 if not self._use_commit_graph:
2027 return None
2029 if self._commit_graph is None:
2030 from .commit_graph import read_commit_graph
2032 # Look for commit graph in our objects directory
2033 graph_file = os.path.join(self.path, "info", "commit-graph")
2034 if os.path.exists(graph_file):
2035 self._commit_graph = read_commit_graph(graph_file)
2036 return self._commit_graph
2038 def write_commit_graph(
2039 self, refs: Iterable[bytes] | None = None, reachable: bool = True
2040 ) -> None:
2041 """Write a commit graph file for this object store.
2043 Args:
2044 refs: List of refs to include. If None, includes all refs from object store.
2045 reachable: If True, includes all commits reachable from refs.
2046 If False, only includes the direct ref targets.
2047 """
2048 from .commit_graph import get_reachable_commits
2050 if refs is None:
2051 # Get all commit objects from the object store
2052 all_refs = []
2053 # Iterate through all objects to find commits
2054 for sha in self:
2055 try:
2056 obj = self[sha]
2057 if obj.type_name == b"commit":
2058 all_refs.append(sha)
2059 except KeyError:
2060 continue
2061 else:
2062 # Use provided refs
2063 all_refs = list(refs)
2065 if not all_refs:
2066 return # No commits to include
2068 if reachable:
2069 # Get all reachable commits
2070 commit_ids = get_reachable_commits(self, all_refs)
2071 else:
2072 # Just use the direct ref targets - ensure they're hex ObjectIDs
2073 commit_ids = []
2074 for ref in all_refs:
2075 if isinstance(ref, bytes) and len(ref) == 40:
2076 # Already hex ObjectID
2077 commit_ids.append(ref)
2078 elif isinstance(ref, bytes) and len(ref) == 20:
2079 # Binary SHA, convert to hex ObjectID
2080 from .objects import sha_to_hex
2082 commit_ids.append(sha_to_hex(ref))
2083 else:
2084 # Assume it's already correct format
2085 commit_ids.append(ref)
2087 if commit_ids:
2088 # Write commit graph directly to our object store path
2089 # Generate the commit graph
2090 from .commit_graph import generate_commit_graph
2092 graph = generate_commit_graph(self, commit_ids)
2094 if graph.entries:
2095 # Ensure the info directory exists
2096 info_dir = os.path.join(self.path, "info")
2097 os.makedirs(info_dir, exist_ok=True)
2098 if self.dir_mode is not None:
2099 os.chmod(info_dir, self.dir_mode)
2101 # Write using GitFile for atomic operation
2102 graph_path = os.path.join(info_dir, "commit-graph")
2103 mask = self.file_mode if self.file_mode is not None else 0o644
2104 with GitFile(graph_path, "wb", mask=mask) as f:
2105 assert isinstance(
2106 f, _GitFile
2107 ) # GitFile in write mode always returns _GitFile
2108 graph.write_to_file(f)
2110 # Clear cached commit graph so it gets reloaded
2111 self._commit_graph = None
2113 def prune(self, grace_period: int | None = None) -> None:
2114 """Prune/clean up this object store.
2116 This removes temporary files that were left behind by interrupted
2117 pack operations. These are files that start with ``tmp_pack_`` in the
2118 repository directory or files with .pack extension but no corresponding
2119 .idx file in the pack directory.
2121 Args:
2122 grace_period: Grace period in seconds for removing temporary files.
2123 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD.
2124 """
2125 import glob
2127 if grace_period is None:
2128 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD
2130 # Clean up tmp_pack_* files in the repository directory
2131 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")):
2132 # Check if file is old enough (more than grace period)
2133 mtime = os.path.getmtime(tmp_file)
2134 if time.time() - mtime > grace_period:
2135 os.remove(tmp_file)
2137 # Clean up orphaned .pack files without corresponding .idx files
2138 try:
2139 pack_dir_contents = os.listdir(self.pack_dir)
2140 except FileNotFoundError:
2141 return
2143 pack_files = {}
2144 idx_files = set()
2146 for name in pack_dir_contents:
2147 if name.endswith(".pack"):
2148 base_name = name[:-5] # Remove .pack extension
2149 pack_files[base_name] = name
2150 elif name.endswith(".idx"):
2151 base_name = name[:-4] # Remove .idx extension
2152 idx_files.add(base_name)
2154 # Remove .pack files without corresponding .idx files
2155 for base_name, pack_name in pack_files.items():
2156 if base_name not in idx_files:
2157 pack_path = os.path.join(self.pack_dir, pack_name)
2158 # Check if file is old enough (more than grace period)
2159 mtime = os.path.getmtime(pack_path)
2160 if time.time() - mtime > grace_period:
2161 os.remove(pack_path)
2164class MemoryObjectStore(PackCapableObjectStore):
2165 """Object store that keeps all objects in memory."""
2167 def __init__(self) -> None:
2168 """Initialize a MemoryObjectStore.
2170 Creates an empty in-memory object store.
2171 """
2172 super().__init__()
2173 self._data: dict[bytes, ShaFile] = {}
2174 self.pack_compression_level = -1
2176 def _to_hexsha(self, sha: bytes) -> bytes:
2177 if len(sha) == 40:
2178 return sha
2179 elif len(sha) == 20:
2180 return sha_to_hex(sha)
2181 else:
2182 raise ValueError(f"Invalid sha {sha!r}")
2184 def contains_loose(self, sha: bytes) -> bool:
2185 """Check if a particular object is present by SHA1 and is loose."""
2186 return self._to_hexsha(sha) in self._data
2188 def contains_packed(self, sha: bytes) -> bool:
2189 """Check if a particular object is present by SHA1 and is packed."""
2190 return False
2192 def __iter__(self) -> Iterator[bytes]:
2193 """Iterate over the SHAs that are present in this store."""
2194 return iter(self._data.keys())
2196 @property
2197 def packs(self) -> list[Pack]:
2198 """List with pack objects."""
2199 return []
2201 def get_raw(self, name: ObjectID) -> tuple[int, bytes]:
2202 """Obtain the raw text for an object.
2204 Args:
2205 name: sha for the object.
2206 Returns: tuple with numeric type and object contents.
2207 """
2208 obj = self[self._to_hexsha(name)]
2209 return obj.type_num, obj.as_raw_string()
2211 def __getitem__(self, name: ObjectID) -> ShaFile:
2212 """Retrieve an object by SHA.
2214 Args:
2215 name: SHA of the object (as hex string or bytes)
2217 Returns:
2218 Copy of the ShaFile object
2220 Raises:
2221 KeyError: If the object is not found
2222 """
2223 return self._data[self._to_hexsha(name)].copy()
2225 def __delitem__(self, name: ObjectID) -> None:
2226 """Delete an object from this store, for testing only."""
2227 del self._data[self._to_hexsha(name)]
2229 def add_object(self, obj: ShaFile) -> None:
2230 """Add a single object to this object store."""
2231 self._data[obj.id] = obj.copy()
2233 def add_objects(
2234 self,
2235 objects: Iterable[tuple[ShaFile, str | None]],
2236 progress: Callable[[str], None] | None = None,
2237 ) -> None:
2238 """Add a set of objects to this object store.
2240 Args:
2241 objects: Iterable over a list of (object, path) tuples
2242 progress: Optional progress reporting function.
2243 """
2244 for obj, path in objects:
2245 self.add_object(obj)
2247 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
2248 """Add a new pack to this object store.
2250 Because this object store doesn't support packs, we extract and add the
2251 individual objects.
2253 Returns: Fileobject to write to and a commit function to
2254 call when the pack is finished.
2255 """
2256 from tempfile import SpooledTemporaryFile
2258 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-")
2260 def commit() -> None:
2261 size = f.tell()
2262 if size > 0:
2263 f.seek(0)
2265 p = PackData.from_file(f, size)
2266 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type]
2267 self.add_object(obj)
2268 p.close()
2269 f.close()
2270 else:
2271 f.close()
2273 def abort() -> None:
2274 f.close()
2276 return f, commit, abort # type: ignore[return-value]
2278 def add_pack_data(
2279 self,
2280 count: int,
2281 unpacked_objects: Iterator[UnpackedObject],
2282 progress: Callable[[str], None] | None = None,
2283 ) -> None:
2284 """Add pack data to this object store.
2286 Args:
2287 count: Number of items to add
2288 unpacked_objects: Iterator of UnpackedObject instances
2289 progress: Optional progress reporting function.
2290 """
2291 if count == 0:
2292 return
2294 # Since MemoryObjectStore doesn't support pack files, we need to
2295 # extract individual objects. To handle deltas properly, we write
2296 # to a temporary pack and then use PackInflater to resolve them.
2297 f, commit, abort = self.add_pack()
2298 try:
2299 write_pack_data(
2300 f.write,
2301 unpacked_objects,
2302 num_records=count,
2303 progress=progress,
2304 )
2305 except BaseException:
2306 abort()
2307 raise
2308 else:
2309 commit()
2311 def add_thin_pack(
2312 self,
2313 read_all: Callable[[], bytes],
2314 read_some: Callable[[int], bytes],
2315 progress: Callable[[str], None] | None = None,
2316 ) -> None:
2317 """Add a new thin pack to this object store.
2319 Thin packs are packs that contain deltas with parents that exist
2320 outside the pack. Because this object store doesn't support packs, we
2321 extract and add the individual objects.
2323 Args:
2324 read_all: Read function that blocks until the number of
2325 requested bytes are read.
2326 read_some: Read function that returns at least one byte, but may
2327 not return the number of bytes requested.
2328 progress: Optional progress reporting function.
2329 """
2330 f, commit, abort = self.add_pack()
2331 try:
2332 copier = PackStreamCopier(read_all, read_some, f) # type: ignore[arg-type]
2333 copier.verify()
2334 except BaseException:
2335 abort()
2336 raise
2337 else:
2338 commit()
2341class ObjectIterator(Protocol):
2342 """Interface for iterating over objects."""
2344 def iterobjects(self) -> Iterator[ShaFile]:
2345 """Iterate over all objects.
2347 Returns:
2348 Iterator of ShaFile objects
2349 """
2350 raise NotImplementedError(self.iterobjects)
2353def tree_lookup_path(
2354 lookup_obj: Callable[[bytes], ShaFile], root_sha: bytes, path: bytes
2355) -> tuple[int, bytes]:
2356 """Look up an object in a Git tree.
2358 Args:
2359 lookup_obj: Callback for retrieving object by SHA1
2360 root_sha: SHA1 of the root tree
2361 path: Path to lookup
2362 Returns: A tuple of (mode, SHA) of the resulting path.
2363 """
2364 tree = lookup_obj(root_sha)
2365 if not isinstance(tree, Tree):
2366 raise NotTreeError(root_sha)
2367 return tree.lookup_path(lookup_obj, path)
2370def _collect_filetree_revs(
2371 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID]
2372) -> None:
2373 """Collect SHA1s of files and directories for specified tree.
2375 Args:
2376 obj_store: Object store to get objects by SHA from
2377 tree_sha: tree reference to walk
2378 kset: set to fill with references to files and directories
2379 """
2380 filetree = obj_store[tree_sha]
2381 assert isinstance(filetree, Tree)
2382 for name, mode, sha in filetree.iteritems():
2383 assert mode is not None
2384 assert sha is not None
2385 if not S_ISGITLINK(mode) and sha not in kset:
2386 kset.add(sha)
2387 if stat.S_ISDIR(mode):
2388 _collect_filetree_revs(obj_store, sha, kset)
2391def _split_commits_and_tags(
2392 obj_store: ObjectContainer, lst: Iterable[bytes], *, ignore_unknown: bool = False
2393) -> tuple[set[bytes], set[bytes], set[bytes]]:
2394 """Split object id list into three lists with commit, tag, and other SHAs.
2396 Commits referenced by tags are included into commits
2397 list as well. Only SHA1s known in this repository will get
2398 through, and unless ignore_unknown argument is True, KeyError
2399 is thrown for SHA1 missing in the repository
2401 Args:
2402 obj_store: Object store to get objects by SHA1 from
2403 lst: Collection of commit and tag SHAs
2404 ignore_unknown: True to skip SHA1 missing in the repository
2405 silently.
2406 Returns: A tuple of (commits, tags, others) SHA1s
2407 """
2408 commits: set[bytes] = set()
2409 tags: set[bytes] = set()
2410 others: set[bytes] = set()
2411 for e in lst:
2412 try:
2413 o = obj_store[e]
2414 except KeyError:
2415 if not ignore_unknown:
2416 raise
2417 else:
2418 if isinstance(o, Commit):
2419 commits.add(e)
2420 elif isinstance(o, Tag):
2421 tags.add(e)
2422 tagged = o.object[1]
2423 c, t, os = _split_commits_and_tags(
2424 obj_store, [tagged], ignore_unknown=ignore_unknown
2425 )
2426 commits |= c
2427 tags |= t
2428 others |= os
2429 else:
2430 others.add(e)
2431 return (commits, tags, others)
2434class MissingObjectFinder:
2435 """Find the objects missing from another object store.
2437 Args:
2438 object_store: Object store containing at least all objects to be
2439 sent
2440 haves: SHA1s of commits not to send (already present in target)
2441 wants: SHA1s of commits to send
2442 progress: Optional function to report progress to.
2443 get_tagged: Function that returns a dict of pointed-to sha -> tag
2444 sha for including tags.
2445 get_parents: Optional function for getting the parents of a commit.
2446 """
2448 def __init__(
2449 self,
2450 object_store: BaseObjectStore,
2451 haves: Iterable[bytes],
2452 wants: Iterable[bytes],
2453 *,
2454 shallow: Set[bytes] | None = None,
2455 progress: Callable[[bytes], None] | None = None,
2456 get_tagged: Callable[[], dict[bytes, bytes]] | None = None,
2457 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents,
2458 ) -> None:
2459 """Initialize a MissingObjectFinder.
2461 Args:
2462 object_store: Object store containing objects
2463 haves: SHA1s of objects already present in target
2464 wants: SHA1s of objects to send
2465 shallow: Set of shallow commit SHA1s
2466 progress: Optional progress reporting callback
2467 get_tagged: Function returning dict of pointed-to sha -> tag sha
2468 get_parents: Function for getting commit parents
2469 """
2470 self.object_store = object_store
2471 if shallow is None:
2472 shallow = set()
2473 self._get_parents = get_parents
2474 reachability = object_store.get_reachability_provider()
2475 # process Commits and Tags differently
2476 # Note, while haves may list commits/tags not available locally,
2477 # and such SHAs would get filtered out by _split_commits_and_tags,
2478 # wants shall list only known SHAs, and otherwise
2479 # _split_commits_and_tags fails with KeyError
2480 have_commits, have_tags, have_others = _split_commits_and_tags(
2481 object_store, haves, ignore_unknown=True
2482 )
2483 want_commits, want_tags, want_others = _split_commits_and_tags(
2484 object_store, wants, ignore_unknown=False
2485 )
2486 # all_ancestors is a set of commits that shall not be sent
2487 # (complete repository up to 'haves')
2488 all_ancestors = reachability.get_reachable_commits(
2489 have_commits, exclude=None, shallow=shallow
2490 )
2491 # all_missing - complete set of commits between haves and wants
2492 # common_commits - boundary commits directly encountered when traversing wants
2493 # We use _collect_ancestors here because we need the exact boundary behavior:
2494 # commits that are in all_ancestors and directly reachable from wants,
2495 # but we don't traverse past them. This is hard to express with the
2496 # reachability abstraction alone.
2497 missing_commits, common_commits = _collect_ancestors(
2498 object_store,
2499 want_commits,
2500 frozenset(all_ancestors),
2501 shallow=frozenset(shallow),
2502 get_parents=self._get_parents,
2503 )
2505 self.remote_has: set[bytes] = set()
2506 # Now, fill sha_done with commits and revisions of
2507 # files and directories known to be both locally
2508 # and on target. Thus these commits and files
2509 # won't get selected for fetch
2510 for h in common_commits:
2511 self.remote_has.add(h)
2512 cmt = object_store[h]
2513 assert isinstance(cmt, Commit)
2514 # Get tree objects for this commit
2515 tree_objects = reachability.get_tree_objects([cmt.tree])
2516 self.remote_has.update(tree_objects)
2518 # record tags we have as visited, too
2519 for t in have_tags:
2520 self.remote_has.add(t)
2521 self.sha_done = set(self.remote_has)
2523 # in fact, what we 'want' is commits, tags, and others
2524 # we've found missing
2525 self.objects_to_send: set[tuple[ObjectID, bytes | None, int | None, bool]] = {
2526 (w, None, Commit.type_num, False) for w in missing_commits
2527 }
2528 missing_tags = want_tags.difference(have_tags)
2529 self.objects_to_send.update(
2530 {(w, None, Tag.type_num, False) for w in missing_tags}
2531 )
2532 missing_others = want_others.difference(have_others)
2533 self.objects_to_send.update({(w, None, None, False) for w in missing_others})
2535 if progress is None:
2536 self.progress: Callable[[bytes], None] = lambda x: None
2537 else:
2538 self.progress = progress
2539 self._tagged = (get_tagged and get_tagged()) or {}
2541 def get_remote_has(self) -> set[bytes]:
2542 """Get the set of SHAs the remote has.
2544 Returns:
2545 Set of SHA1s that the remote side already has
2546 """
2547 return self.remote_has
2549 def add_todo(
2550 self, entries: Iterable[tuple[ObjectID, bytes | None, int | None, bool]]
2551 ) -> None:
2552 """Add objects to the todo list.
2554 Args:
2555 entries: Iterable of tuples (sha, name, type_num, is_leaf)
2556 """
2557 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done])
2559 def __next__(self) -> tuple[bytes, PackHint | None]:
2560 """Get the next object to send.
2562 Returns:
2563 Tuple of (sha, pack_hint)
2565 Raises:
2566 StopIteration: When no more objects to send
2567 """
2568 while True:
2569 if not self.objects_to_send:
2570 self.progress(
2571 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii")
2572 )
2573 raise StopIteration
2574 (sha, name, type_num, leaf) = self.objects_to_send.pop()
2575 if sha not in self.sha_done:
2576 break
2577 if not leaf:
2578 o = self.object_store[sha]
2579 if isinstance(o, Commit):
2580 self.add_todo([(o.tree, b"", Tree.type_num, False)])
2581 elif isinstance(o, Tree):
2582 todos = []
2583 for n, m, s in o.iteritems():
2584 assert m is not None
2585 assert n is not None
2586 assert s is not None
2587 if not S_ISGITLINK(m):
2588 todos.append(
2589 (
2590 s,
2591 n,
2592 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num),
2593 not stat.S_ISDIR(m),
2594 )
2595 )
2596 self.add_todo(todos)
2597 elif isinstance(o, Tag):
2598 self.add_todo([(o.object[1], None, o.object[0].type_num, False)])
2599 if sha in self._tagged:
2600 self.add_todo([(self._tagged[sha], None, None, True)])
2601 self.sha_done.add(sha)
2602 if len(self.sha_done) % 1000 == 0:
2603 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii"))
2604 if type_num is None:
2605 pack_hint = None
2606 else:
2607 pack_hint = (type_num, name)
2608 return (sha, pack_hint)
2610 def __iter__(self) -> Iterator[tuple[bytes, PackHint | None]]:
2611 """Return iterator over objects to send.
2613 Returns:
2614 Self (this class implements the iterator protocol)
2615 """
2616 return self
2619class ObjectStoreGraphWalker:
2620 """Graph walker that finds what commits are missing from an object store."""
2622 heads: set[ObjectID]
2623 """Revisions without descendants in the local repo."""
2625 get_parents: Callable[[ObjectID], list[ObjectID]]
2626 """Function to retrieve parents in the local repo."""
2628 shallow: set[ObjectID]
2630 def __init__(
2631 self,
2632 local_heads: Iterable[ObjectID],
2633 get_parents: Callable[[ObjectID], list[ObjectID]],
2634 shallow: set[ObjectID] | None = None,
2635 update_shallow: Callable[[set[ObjectID] | None, set[ObjectID] | None], None]
2636 | None = None,
2637 ) -> None:
2638 """Create a new instance.
2640 Args:
2641 local_heads: Heads to start search with
2642 get_parents: Function for finding the parents of a SHA1.
2643 shallow: Set of shallow commits.
2644 update_shallow: Function to update shallow commits.
2645 """
2646 self.heads = set(local_heads)
2647 self.get_parents = get_parents
2648 self.parents: dict[ObjectID, list[ObjectID] | None] = {}
2649 if shallow is None:
2650 shallow = set()
2651 self.shallow = shallow
2652 self.update_shallow = update_shallow
2654 def nak(self) -> None:
2655 """Nothing in common was found."""
2657 def ack(self, sha: ObjectID) -> None:
2658 """Ack that a revision and its ancestors are present in the source."""
2659 if len(sha) != 40:
2660 raise ValueError(f"unexpected sha {sha!r} received")
2661 ancestors = {sha}
2663 # stop if we run out of heads to remove
2664 while self.heads:
2665 for a in ancestors:
2666 if a in self.heads:
2667 self.heads.remove(a)
2669 # collect all ancestors
2670 new_ancestors = set()
2671 for a in ancestors:
2672 ps = self.parents.get(a)
2673 if ps is not None:
2674 new_ancestors.update(ps)
2675 self.parents[a] = None
2677 # no more ancestors; stop
2678 if not new_ancestors:
2679 break
2681 ancestors = new_ancestors
2683 def next(self) -> ObjectID | None:
2684 """Iterate over ancestors of heads in the target."""
2685 if self.heads:
2686 ret = self.heads.pop()
2687 try:
2688 ps = self.get_parents(ret)
2689 except KeyError:
2690 return None
2691 self.parents[ret] = ps
2692 self.heads.update([p for p in ps if p not in self.parents])
2693 return ret
2694 return None
2696 __next__ = next
2699def commit_tree_changes(
2700 object_store: BaseObjectStore,
2701 tree: ObjectID | Tree,
2702 changes: Sequence[tuple[bytes, int | None, bytes | None]],
2703) -> ObjectID:
2704 """Commit a specified set of changes to a tree structure.
2706 This will apply a set of changes on top of an existing tree, storing new
2707 objects in object_store.
2709 changes are a list of tuples with (path, mode, object_sha).
2710 Paths can be both blobs and trees. See the mode and
2711 object sha to None deletes the path.
2713 This method works especially well if there are only a small
2714 number of changes to a big tree. For a large number of changes
2715 to a large tree, use e.g. commit_tree.
2717 Args:
2718 object_store: Object store to store new objects in
2719 and retrieve old ones from.
2720 tree: Original tree root (SHA or Tree object)
2721 changes: changes to apply
2722 Returns: New tree root object
2723 """
2724 # TODO(jelmer): Save up the objects and add them using .add_objects
2725 # rather than with individual calls to .add_object.
2726 # Handle both Tree object and SHA
2727 if isinstance(tree, Tree):
2728 tree_obj: Tree = tree
2729 else:
2730 sha_obj = object_store[tree]
2731 assert isinstance(sha_obj, Tree)
2732 tree_obj = sha_obj
2733 nested_changes: dict[bytes, list[tuple[bytes, int | None, bytes | None]]] = {}
2734 for path, new_mode, new_sha in changes:
2735 try:
2736 (dirname, subpath) = path.split(b"/", 1)
2737 except ValueError:
2738 if new_sha is None:
2739 del tree_obj[path]
2740 else:
2741 assert new_mode is not None
2742 tree_obj[path] = (new_mode, new_sha)
2743 else:
2744 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha))
2745 for name, subchanges in nested_changes.items():
2746 try:
2747 orig_subtree_id: bytes | Tree = tree_obj[name][1]
2748 except KeyError:
2749 # For new directories, pass an empty Tree object
2750 orig_subtree_id = Tree()
2751 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges)
2752 subtree = object_store[subtree_id]
2753 assert isinstance(subtree, Tree)
2754 if len(subtree) == 0:
2755 del tree_obj[name]
2756 else:
2757 tree_obj[name] = (stat.S_IFDIR, subtree.id)
2758 object_store.add_object(tree_obj)
2759 return tree_obj.id
2762class OverlayObjectStore(BaseObjectStore):
2763 """Object store that can overlay multiple object stores."""
2765 def __init__(
2766 self,
2767 bases: list[BaseObjectStore],
2768 add_store: BaseObjectStore | None = None,
2769 ) -> None:
2770 """Initialize an OverlayObjectStore.
2772 Args:
2773 bases: List of base object stores to overlay
2774 add_store: Optional store to write new objects to
2775 """
2776 self.bases = bases
2777 self.add_store = add_store
2779 def add_object(self, object: ShaFile) -> None:
2780 """Add a single object to the store.
2782 Args:
2783 object: Object to add
2785 Raises:
2786 NotImplementedError: If no add_store was provided
2787 """
2788 if self.add_store is None:
2789 raise NotImplementedError(self.add_object)
2790 return self.add_store.add_object(object)
2792 def add_objects(
2793 self,
2794 objects: Sequence[tuple[ShaFile, str | None]],
2795 progress: Callable[[str], None] | None = None,
2796 ) -> Pack | None:
2797 """Add multiple objects to the store.
2799 Args:
2800 objects: Iterator of objects to add
2801 progress: Optional progress reporting callback
2803 Raises:
2804 NotImplementedError: If no add_store was provided
2805 """
2806 if self.add_store is None:
2807 raise NotImplementedError(self.add_object)
2808 return self.add_store.add_objects(objects, progress)
2810 @property
2811 def packs(self) -> list[Pack]:
2812 """Get the list of packs from all overlaid stores.
2814 Returns:
2815 Combined list of packs from all base stores
2816 """
2817 ret = []
2818 for b in self.bases:
2819 ret.extend(b.packs)
2820 return ret
2822 def __iter__(self) -> Iterator[ObjectID]:
2823 """Iterate over all object SHAs in the overlaid stores.
2825 Returns:
2826 Iterator of object SHAs (deduped across stores)
2827 """
2828 done = set()
2829 for b in self.bases:
2830 for o_id in b:
2831 if o_id not in done:
2832 yield o_id
2833 done.add(o_id)
2835 def iterobjects_subset(
2836 self, shas: Iterable[bytes], *, allow_missing: bool = False
2837 ) -> Iterator[ShaFile]:
2838 """Iterate over a subset of objects from the overlaid stores.
2840 Args:
2841 shas: Iterable of object SHAs to retrieve
2842 allow_missing: If True, skip missing objects; if False, raise KeyError
2844 Returns:
2845 Iterator of ShaFile objects
2847 Raises:
2848 KeyError: If an object is missing and allow_missing is False
2849 """
2850 todo = set(shas)
2851 found: set[bytes] = set()
2853 for b in self.bases:
2854 # Create a copy of todo for each base to avoid modifying
2855 # the set while iterating through it
2856 current_todo = todo - found
2857 for o in b.iterobjects_subset(current_todo, allow_missing=True):
2858 yield o
2859 found.add(o.id)
2861 # Check for any remaining objects not found
2862 missing = todo - found
2863 if missing and not allow_missing:
2864 raise KeyError(next(iter(missing)))
2866 def iter_unpacked_subset(
2867 self,
2868 shas: Iterable[bytes],
2869 include_comp: bool = False,
2870 allow_missing: bool = False,
2871 convert_ofs_delta: bool = True,
2872 ) -> Iterator[UnpackedObject]:
2873 """Iterate over unpacked objects from the overlaid stores.
2875 Args:
2876 shas: Iterable of object SHAs to retrieve
2877 include_comp: Whether to include compressed data
2878 allow_missing: If True, skip missing objects; if False, raise KeyError
2879 convert_ofs_delta: Whether to convert OFS_DELTA objects
2881 Returns:
2882 Iterator of unpacked objects
2884 Raises:
2885 KeyError: If an object is missing and allow_missing is False
2886 """
2887 todo = set(shas)
2888 for b in self.bases:
2889 for o in b.iter_unpacked_subset(
2890 todo,
2891 include_comp=include_comp,
2892 allow_missing=True,
2893 convert_ofs_delta=convert_ofs_delta,
2894 ):
2895 yield o
2896 todo.remove(o.sha())
2897 if todo and not allow_missing:
2898 raise KeyError(next(iter(todo)))
2900 def get_raw(self, sha_id: ObjectID) -> tuple[int, bytes]:
2901 """Get the raw object data from the overlaid stores.
2903 Args:
2904 sha_id: SHA of the object
2906 Returns:
2907 Tuple of (type_num, raw_data)
2909 Raises:
2910 KeyError: If object not found in any base store
2911 """
2912 for b in self.bases:
2913 try:
2914 return b.get_raw(sha_id)
2915 except KeyError:
2916 pass
2917 raise KeyError(sha_id)
2919 def contains_packed(self, sha: bytes) -> bool:
2920 """Check if an object is packed in any base store.
2922 Args:
2923 sha: SHA of the object
2925 Returns:
2926 True if object is packed in any base store
2927 """
2928 for b in self.bases:
2929 if b.contains_packed(sha):
2930 return True
2931 return False
2933 def contains_loose(self, sha: bytes) -> bool:
2934 """Check if an object is loose in any base store.
2936 Args:
2937 sha: SHA of the object
2939 Returns:
2940 True if object is loose in any base store
2941 """
2942 for b in self.bases:
2943 if b.contains_loose(sha):
2944 return True
2945 return False
2948def read_packs_file(f: BinaryIO) -> Iterator[str]:
2949 """Yield the packs listed in a packs file."""
2950 for line in f.read().splitlines():
2951 if not line:
2952 continue
2953 (kind, name) = line.split(b" ", 1)
2954 if kind != b"P":
2955 continue
2956 yield os.fsdecode(name)
2959class BucketBasedObjectStore(PackBasedObjectStore):
2960 """Object store implementation that uses a bucket store like S3 as backend."""
2962 def _iter_loose_objects(self) -> Iterator[bytes]:
2963 """Iterate over the SHAs of all loose objects."""
2964 return iter([])
2966 def _get_loose_object(self, sha: bytes) -> None:
2967 return None
2969 def delete_loose_object(self, sha: bytes) -> None:
2970 """Delete a loose object (no-op for bucket stores).
2972 Bucket-based stores don't have loose objects, so this is a no-op.
2974 Args:
2975 sha: SHA of the object to delete
2976 """
2977 # Doesn't exist..
2979 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int:
2980 """Pack loose objects. Returns number of objects packed.
2982 BucketBasedObjectStore doesn't support loose objects, so this is a no-op.
2984 Args:
2985 progress: Optional progress reporting callback (ignored)
2986 """
2987 return 0
2989 def _remove_pack_by_name(self, name: str) -> None:
2990 """Remove a pack by name. Subclasses should implement this."""
2991 raise NotImplementedError(self._remove_pack_by_name)
2993 def _iter_pack_names(self) -> Iterator[str]:
2994 raise NotImplementedError(self._iter_pack_names)
2996 def _get_pack(self, name: str) -> Pack:
2997 raise NotImplementedError(self._get_pack)
2999 def _update_pack_cache(self) -> list[Pack]:
3000 pack_files = set(self._iter_pack_names())
3002 # Open newly appeared pack files
3003 new_packs = []
3004 for f in pack_files:
3005 if f not in self._pack_cache:
3006 pack = self._get_pack(f)
3007 new_packs.append(pack)
3008 self._pack_cache[f] = pack
3009 # Remove disappeared pack files
3010 for f in set(self._pack_cache) - pack_files:
3011 self._pack_cache.pop(f).close()
3012 return new_packs
3014 def _upload_pack(
3015 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO
3016 ) -> None:
3017 raise NotImplementedError
3019 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
3020 """Add a new pack to this object store.
3022 Returns: Fileobject to write to, a commit function to
3023 call when the pack is finished and an abort
3024 function.
3025 """
3026 import tempfile
3028 pf = tempfile.SpooledTemporaryFile(
3029 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
3030 )
3032 def commit() -> Pack | None:
3033 if pf.tell() == 0:
3034 pf.close()
3035 return None
3037 pf.seek(0)
3039 p = PackData(pf.name, pf)
3040 entries = p.sorted_entries()
3041 basename = iter_sha1(entry[0] for entry in entries).decode("ascii")
3042 idxf = tempfile.SpooledTemporaryFile(
3043 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
3044 )
3045 checksum = p.get_stored_checksum()
3046 write_pack_index(idxf, entries, checksum, version=self.pack_index_version)
3047 idxf.seek(0)
3048 idx = load_pack_index_file(basename + ".idx", idxf)
3049 for pack in self.packs:
3050 if pack.get_stored_checksum() == p.get_stored_checksum():
3051 p.close()
3052 idx.close()
3053 pf.close()
3054 idxf.close()
3055 return pack
3056 pf.seek(0)
3057 idxf.seek(0)
3058 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type]
3059 final_pack = Pack.from_objects(p, idx)
3060 self._add_cached_pack(basename, final_pack)
3061 pf.close()
3062 idxf.close()
3063 return final_pack
3065 return pf, commit, pf.close # type: ignore[return-value]
3068def _collect_ancestors(
3069 store: ObjectContainer,
3070 heads: Iterable[ObjectID],
3071 common: frozenset[ObjectID] = frozenset(),
3072 shallow: frozenset[ObjectID] = frozenset(),
3073 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents,
3074) -> tuple[set[ObjectID], set[ObjectID]]:
3075 """Collect all ancestors of heads up to (excluding) those in common.
3077 Args:
3078 store: Object store to get commits from
3079 heads: commits to start from
3080 common: commits to end at, or empty set to walk repository
3081 completely
3082 shallow: Set of shallow commits
3083 get_parents: Optional function for getting the parents of a
3084 commit.
3085 Returns: a tuple (A, B) where A - all commits reachable
3086 from heads but not present in common, B - common (shared) elements
3087 that are directly reachable from heads
3088 """
3089 bases = set()
3090 commits = set()
3091 queue: list[ObjectID] = []
3092 queue.extend(heads)
3094 # Try to use commit graph if available
3095 commit_graph = store.get_commit_graph()
3097 while queue:
3098 e = queue.pop(0)
3099 if e in common:
3100 bases.add(e)
3101 elif e not in commits:
3102 commits.add(e)
3103 if e in shallow:
3104 continue
3106 # Try to use commit graph for parent lookup
3107 parents = None
3108 if commit_graph:
3109 parents = commit_graph.get_parents(e)
3111 if parents is None:
3112 # Fall back to loading the object
3113 cmt = store[e]
3114 assert isinstance(cmt, Commit)
3115 parents = get_parents(cmt)
3117 queue.extend(parents)
3118 return (commits, bases)
3121def iter_tree_contents(
3122 store: ObjectContainer, tree_id: ObjectID | None, *, include_trees: bool = False
3123) -> Iterator[TreeEntry]:
3124 """Iterate the contents of a tree and all subtrees.
3126 Iteration is depth-first pre-order, as in e.g. os.walk.
3128 Args:
3129 store: Object store to get trees from
3130 tree_id: SHA1 of the tree.
3131 include_trees: If True, include tree objects in the iteration.
3133 Yields: TreeEntry namedtuples for all the objects in a tree.
3134 """
3135 if tree_id is None:
3136 return
3137 # This could be fairly easily generalized to >2 trees if we find a use
3138 # case.
3139 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]
3140 while todo:
3141 entry = todo.pop()
3142 assert entry.mode is not None
3143 if stat.S_ISDIR(entry.mode):
3144 extra = []
3145 assert entry.sha is not None
3146 tree = store[entry.sha]
3147 assert isinstance(tree, Tree)
3148 for subentry in tree.iteritems(name_order=True):
3149 assert entry.path is not None
3150 extra.append(subentry.in_path(entry.path))
3151 todo.extend(reversed(extra))
3152 if not stat.S_ISDIR(entry.mode) or include_trees:
3153 yield entry
3156def iter_commit_contents(
3157 store: ObjectContainer,
3158 commit: Commit | bytes,
3159 *,
3160 include: Sequence[str | bytes | Path] | None = None,
3161) -> Iterator[TreeEntry]:
3162 """Iterate the contents of the repository at the specified commit.
3164 This is a wrapper around iter_tree_contents() and
3165 tree_lookup_path() to simplify the common task of getting the
3166 contest of a repo at a particular commit. See also
3167 dulwich.index.build_file_from_blob() for writing individual files
3168 to disk.
3170 Args:
3171 store: Object store to get trees from
3172 commit: Commit object, or SHA1 of a commit
3173 include: if provided, only the entries whose paths are in the
3174 list, or whose parent tree is in the list, will be
3175 included. Note that duplicate or overlapping paths
3176 (e.g. ["foo", "foo/bar"]) may result in duplicate entries
3178 Yields: TreeEntry namedtuples for all matching files in a commit.
3179 """
3180 sha = commit.id if isinstance(commit, Commit) else commit
3181 if not isinstance(obj := store[sha], Commit):
3182 raise TypeError(
3183 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}"
3184 )
3185 commit = obj
3186 encoding = commit.encoding or "utf-8"
3187 include_bytes: list[bytes] = (
3188 [
3189 path if isinstance(path, bytes) else str(path).encode(encoding)
3190 for path in include
3191 ]
3192 if include is not None
3193 else [b""]
3194 )
3196 for path in include_bytes:
3197 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path)
3198 # Iterate all contained files if path points to a dir, otherwise just get that
3199 # single file
3200 if isinstance(store[obj_id], Tree):
3201 for entry in iter_tree_contents(store, obj_id):
3202 yield entry.in_path(path)
3203 else:
3204 yield TreeEntry(path, mode, obj_id)
3207def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]:
3208 """Peel all tags from a SHA.
3210 Args:
3211 store: Object store to get objects from
3212 sha: The object SHA to peel.
3213 Returns: The fully-peeled SHA1 of a tag object, after peeling all
3214 intermediate tags; if the original ref does not point to a tag,
3215 this will equal the original SHA1.
3216 """
3217 unpeeled = obj = store[sha]
3218 obj_class = object_class(obj.type_name)
3219 while obj_class is Tag:
3220 assert isinstance(obj, Tag)
3221 obj_class, sha = obj.object
3222 obj = store[sha]
3223 return unpeeled, obj
3226class GraphTraversalReachability:
3227 """Naive graph traversal implementation of ObjectReachabilityProvider.
3229 This implementation wraps existing graph traversal functions
3230 (_collect_ancestors, _collect_filetree_revs) to provide the standard
3231 reachability interface without any performance optimizations.
3232 """
3234 def __init__(self, object_store: BaseObjectStore) -> None:
3235 """Initialize the graph traversal provider.
3237 Args:
3238 object_store: Object store to query
3239 """
3240 self.store = object_store
3242 def get_reachable_commits(
3243 self,
3244 heads: Iterable[bytes],
3245 exclude: Iterable[bytes] | None = None,
3246 shallow: Set[bytes] | None = None,
3247 ) -> set[bytes]:
3248 """Get all commits reachable from heads, excluding those in exclude.
3250 Uses _collect_ancestors for commit traversal.
3252 Args:
3253 heads: Starting commit SHAs
3254 exclude: Commit SHAs to exclude (and their ancestors)
3255 shallow: Set of shallow commit boundaries
3257 Returns:
3258 Set of commit SHAs reachable from heads but not from exclude
3259 """
3260 exclude_set = frozenset(exclude) if exclude else frozenset()
3261 shallow_set = frozenset(shallow) if shallow else frozenset()
3262 commits, _bases = _collect_ancestors(
3263 self.store, heads, exclude_set, shallow_set
3264 )
3265 return commits
3267 def get_tree_objects(
3268 self,
3269 tree_shas: Iterable[bytes],
3270 ) -> set[bytes]:
3271 """Get all trees and blobs reachable from the given trees.
3273 Uses _collect_filetree_revs for tree traversal.
3275 Args:
3276 tree_shas: Starting tree SHAs
3278 Returns:
3279 Set of tree and blob SHAs
3280 """
3281 result: set[bytes] = set()
3282 for tree_sha in tree_shas:
3283 _collect_filetree_revs(self.store, tree_sha, result)
3284 return result
3286 def get_reachable_objects(
3287 self,
3288 commits: Iterable[bytes],
3289 exclude_commits: Iterable[bytes] | None = None,
3290 ) -> set[bytes]:
3291 """Get all objects (commits + trees + blobs) reachable from commits.
3293 Args:
3294 commits: Starting commit SHAs
3295 exclude_commits: Commits whose objects should be excluded
3297 Returns:
3298 Set of all object SHAs (commits, trees, blobs)
3299 """
3300 commits_set = set(commits)
3301 result = set(commits_set)
3303 # Get trees for all commits
3304 tree_shas = []
3305 for commit_sha in commits_set:
3306 try:
3307 commit = self.store[commit_sha]
3308 if isinstance(commit, Commit):
3309 tree_shas.append(commit.tree)
3310 except KeyError:
3311 # Commit not in store, skip
3312 continue
3314 # Collect all tree/blob objects
3315 result.update(self.get_tree_objects(tree_shas))
3317 # Exclude objects from exclude_commits if needed
3318 if exclude_commits:
3319 exclude_objects = self.get_reachable_objects(exclude_commits, None)
3320 result -= exclude_objects
3322 return result
3325class BitmapReachability:
3326 """Bitmap-accelerated implementation of ObjectReachabilityProvider.
3328 This implementation uses packfile bitmap indexes where available to
3329 accelerate reachability queries. Falls back to graph traversal when
3330 bitmaps don't cover the requested commits.
3331 """
3333 def __init__(self, object_store: "PackBasedObjectStore") -> None:
3334 """Initialize the bitmap provider.
3336 Args:
3337 object_store: Pack-based object store with bitmap support
3338 """
3339 self.store = object_store
3340 # Fallback to graph traversal for operations not yet optimized
3341 self._fallback = GraphTraversalReachability(object_store)
3343 def _combine_commit_bitmaps(
3344 self,
3345 commit_shas: set[bytes],
3346 exclude_shas: set[bytes] | None = None,
3347 ) -> tuple["EWAHBitmap", "Pack"] | None:
3348 """Combine bitmaps for multiple commits using OR, with optional exclusion.
3350 Args:
3351 commit_shas: Set of commit SHAs to combine
3352 exclude_shas: Optional set of commit SHAs to exclude
3354 Returns:
3355 Tuple of (combined_bitmap, pack) or None if bitmaps unavailable
3356 """
3357 from .bitmap import find_commit_bitmaps
3359 # Find bitmaps for the commits
3360 commit_bitmaps = find_commit_bitmaps(commit_shas, self.store.packs)
3362 # If we can't find bitmaps for all commits, return None
3363 if len(commit_bitmaps) < len(commit_shas):
3364 return None
3366 # Combine bitmaps using OR
3367 combined_bitmap = None
3368 result_pack = None
3370 for commit_sha in commit_shas:
3371 pack, pack_bitmap, _sha_to_pos = commit_bitmaps[commit_sha]
3372 commit_bitmap = pack_bitmap.get_bitmap(commit_sha)
3374 if commit_bitmap is None:
3375 return None
3377 if combined_bitmap is None:
3378 combined_bitmap = commit_bitmap
3379 result_pack = pack
3380 elif pack == result_pack:
3381 # Same pack, can OR directly
3382 combined_bitmap = combined_bitmap | commit_bitmap
3383 else:
3384 # Different packs, can't combine
3385 return None
3387 # Handle exclusions if provided
3388 if exclude_shas and result_pack and combined_bitmap:
3389 exclude_bitmaps = find_commit_bitmaps(exclude_shas, [result_pack])
3391 if len(exclude_bitmaps) == len(exclude_shas):
3392 # All excludes have bitmaps, compute exclusion
3393 exclude_combined = None
3395 for commit_sha in exclude_shas:
3396 _pack, pack_bitmap, _sha_to_pos = exclude_bitmaps[commit_sha]
3397 exclude_bitmap = pack_bitmap.get_bitmap(commit_sha)
3399 if exclude_bitmap is None:
3400 break
3402 if exclude_combined is None:
3403 exclude_combined = exclude_bitmap
3404 else:
3405 exclude_combined = exclude_combined | exclude_bitmap
3407 # Subtract excludes using set difference
3408 if exclude_combined:
3409 combined_bitmap = combined_bitmap - exclude_combined
3411 if combined_bitmap and result_pack:
3412 return (combined_bitmap, result_pack)
3413 return None
3415 def get_reachable_commits(
3416 self,
3417 heads: Iterable[bytes],
3418 exclude: Iterable[bytes] | None = None,
3419 shallow: Set[bytes] | None = None,
3420 ) -> set[bytes]:
3421 """Get all commits reachable from heads using bitmaps where possible.
3423 Args:
3424 heads: Starting commit SHAs
3425 exclude: Commit SHAs to exclude (and their ancestors)
3426 shallow: Set of shallow commit boundaries
3428 Returns:
3429 Set of commit SHAs reachable from heads but not from exclude
3430 """
3431 from .bitmap import bitmap_to_object_shas
3433 # If shallow is specified, fall back to graph traversal
3434 # (bitmaps don't support shallow boundaries well)
3435 if shallow:
3436 return self._fallback.get_reachable_commits(heads, exclude, shallow)
3438 heads_set = set(heads)
3439 exclude_set = set(exclude) if exclude else None
3441 # Try to combine bitmaps
3442 result = self._combine_commit_bitmaps(heads_set, exclude_set)
3443 if result is None:
3444 return self._fallback.get_reachable_commits(heads, exclude, shallow)
3446 combined_bitmap, result_pack = result
3448 # Convert bitmap to commit SHAs, filtering for commits only
3449 pack_bitmap = result_pack.bitmap
3450 if pack_bitmap is None:
3451 return self._fallback.get_reachable_commits(heads, exclude, shallow)
3452 commit_type_filter = pack_bitmap.commit_bitmap
3453 return bitmap_to_object_shas(
3454 combined_bitmap, result_pack.index, commit_type_filter
3455 )
3457 def get_tree_objects(
3458 self,
3459 tree_shas: Iterable[bytes],
3460 ) -> set[bytes]:
3461 """Get all trees and blobs reachable from the given trees.
3463 Args:
3464 tree_shas: Starting tree SHAs
3466 Returns:
3467 Set of tree and blob SHAs
3468 """
3469 # Tree traversal doesn't benefit much from bitmaps, use fallback
3470 return self._fallback.get_tree_objects(tree_shas)
3472 def get_reachable_objects(
3473 self,
3474 commits: Iterable[bytes],
3475 exclude_commits: Iterable[bytes] | None = None,
3476 ) -> set[bytes]:
3477 """Get all objects reachable from commits using bitmaps.
3479 Args:
3480 commits: Starting commit SHAs
3481 exclude_commits: Commits whose objects should be excluded
3483 Returns:
3484 Set of all object SHAs (commits, trees, blobs)
3485 """
3486 from .bitmap import bitmap_to_object_shas
3488 commits_set = set(commits)
3489 exclude_set = set(exclude_commits) if exclude_commits else None
3491 # Try to combine bitmaps
3492 result = self._combine_commit_bitmaps(commits_set, exclude_set)
3493 if result is None:
3494 return self._fallback.get_reachable_objects(commits, exclude_commits)
3496 combined_bitmap, result_pack = result
3498 # Convert bitmap to all object SHAs (no type filter)
3499 return bitmap_to_object_shas(combined_bitmap, result_pack.index, None)