Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# object_store.py -- Object store for git objects
2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
3# and others
4#
5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
7# General Public License as published by the Free Software Foundation; version 2.0
8# or (at your option) any later version. You can redistribute it and/or
9# modify it under the terms of either of these two licenses.
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# You should have received a copy of the licenses; if not, see
18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
20# License, Version 2.0.
21#
24"""Git object store interfaces and implementation."""
26import binascii
27import os
28import stat
29import sys
30import time
31import warnings
32from collections.abc import Iterable, Iterator, Mapping, Sequence, Set
33from contextlib import suppress
34from io import BytesIO
35from pathlib import Path
36from typing import (
37 TYPE_CHECKING,
38 BinaryIO,
39 Callable,
40 Optional,
41 Protocol,
42 Union,
43)
45from .errors import NotTreeError
46from .file import GitFile, _GitFile
47from .objects import (
48 S_ISGITLINK,
49 ZERO_SHA,
50 Blob,
51 Commit,
52 ObjectID,
53 ShaFile,
54 Tag,
55 Tree,
56 TreeEntry,
57 hex_to_filename,
58 hex_to_sha,
59 object_class,
60 sha_to_hex,
61 valid_hexsha,
62)
63from .pack import (
64 PACK_SPOOL_FILE_MAX_SIZE,
65 ObjectContainer,
66 Pack,
67 PackData,
68 PackedObjectContainer,
69 PackFileDisappeared,
70 PackHint,
71 PackIndexer,
72 PackInflater,
73 PackStreamCopier,
74 UnpackedObject,
75 extend_pack,
76 full_unpacked_object,
77 generate_unpacked_objects,
78 iter_sha1,
79 load_pack_index_file,
80 pack_objects_to_data,
81 write_pack_data,
82 write_pack_index,
83)
84from .protocol import DEPTH_INFINITE
85from .refs import PEELED_TAG_SUFFIX, Ref
87if TYPE_CHECKING:
88 from .commit_graph import CommitGraph
89 from .config import Config
90 from .diff_tree import RenameDetector
93class GraphWalker(Protocol):
94 """Protocol for graph walker objects."""
96 def __next__(self) -> Optional[bytes]:
97 """Return the next object SHA to visit."""
98 ...
100 def ack(self, sha: bytes) -> None:
101 """Acknowledge that an object has been received."""
102 ...
104 def nak(self) -> None:
105 """Nothing in common was found."""
106 ...
109INFODIR = "info"
110PACKDIR = "pack"
112# use permissions consistent with Git; just readable by everyone
113# TODO: should packs also be non-writable on Windows? if so, that
114# would requite some rather significant adjustments to the test suite
115PACK_MODE = 0o444 if sys.platform != "win32" else 0o644
117# Grace period for cleaning up temporary pack files (in seconds)
118# Matches git's default of 2 weeks
119DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks
122def find_shallow(
123 store: ObjectContainer, heads: Iterable[bytes], depth: int
124) -> tuple[set[bytes], set[bytes]]:
125 """Find shallow commits according to a given depth.
127 Args:
128 store: An ObjectStore for looking up objects.
129 heads: Iterable of head SHAs to start walking from.
130 depth: The depth of ancestors to include. A depth of one includes
131 only the heads themselves.
132 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be
133 considered shallow and unshallow according to the arguments. Note that
134 these sets may overlap if a commit is reachable along multiple paths.
135 """
136 parents: dict[bytes, list[bytes]] = {}
137 commit_graph = store.get_commit_graph()
139 def get_parents(sha: bytes) -> list[bytes]:
140 result = parents.get(sha, None)
141 if not result:
142 # Try to use commit graph first if available
143 if commit_graph:
144 graph_parents = commit_graph.get_parents(sha)
145 if graph_parents is not None:
146 result = graph_parents
147 parents[sha] = result
148 return result
149 # Fall back to loading the object
150 commit = store[sha]
151 assert isinstance(commit, Commit)
152 result = commit.parents
153 parents[sha] = result
154 return result
156 todo = [] # stack of (sha, depth)
157 for head_sha in heads:
158 obj = store[head_sha]
159 # Peel tags if necessary
160 while isinstance(obj, Tag):
161 _, sha = obj.object
162 obj = store[sha]
163 if isinstance(obj, Commit):
164 todo.append((obj.id, 1))
166 not_shallow = set()
167 shallow = set()
168 while todo:
169 sha, cur_depth = todo.pop()
170 if cur_depth < depth:
171 not_shallow.add(sha)
172 new_depth = cur_depth + 1
173 todo.extend((p, new_depth) for p in get_parents(sha))
174 else:
175 shallow.add(sha)
177 return shallow, not_shallow
180def get_depth(
181 store: ObjectContainer,
182 head: bytes,
183 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents,
184 max_depth: Optional[int] = None,
185) -> int:
186 """Return the current available depth for the given head.
188 For commits with multiple parents, the largest possible depth will be
189 returned.
191 Args:
192 store: Object store to search in
193 head: commit to start from
194 get_parents: optional function for getting the parents of a commit
195 max_depth: maximum depth to search
196 """
197 if head not in store:
198 return 0
199 current_depth = 1
200 queue = [(head, current_depth)]
201 commit_graph = store.get_commit_graph()
203 while queue and (max_depth is None or current_depth < max_depth):
204 e, depth = queue.pop(0)
205 current_depth = max(current_depth, depth)
207 # Try to use commit graph for parent lookup if available
208 parents = None
209 if commit_graph:
210 parents = commit_graph.get_parents(e)
212 if parents is None:
213 # Fall back to loading the object
214 cmt = store[e]
215 if isinstance(cmt, Tag):
216 _cls, sha = cmt.object
217 cmt = store[sha]
218 parents = get_parents(cmt)
220 queue.extend((parent, depth + 1) for parent in parents if parent in store)
221 return current_depth
224class PackContainer(Protocol):
225 """Protocol for containers that can accept pack files."""
227 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:
228 """Add a new pack."""
231class BaseObjectStore:
232 """Object store interface."""
234 def determine_wants_all(
235 self, refs: Mapping[Ref, ObjectID], depth: Optional[int] = None
236 ) -> list[ObjectID]:
237 """Determine which objects are wanted based on refs."""
239 def _want_deepen(sha: bytes) -> bool:
240 if not depth:
241 return False
242 if depth == DEPTH_INFINITE:
243 return True
244 return depth > self._get_depth(sha)
246 return [
247 sha
248 for (ref, sha) in refs.items()
249 if (sha not in self or _want_deepen(sha))
250 and not ref.endswith(PEELED_TAG_SUFFIX)
251 and not sha == ZERO_SHA
252 ]
254 def contains_loose(self, sha: bytes) -> bool:
255 """Check if a particular object is present by SHA1 and is loose."""
256 raise NotImplementedError(self.contains_loose)
258 def contains_packed(self, sha: bytes) -> bool:
259 """Check if a particular object is present by SHA1 and is packed."""
260 return False # Default implementation for stores that don't support packing
262 def __contains__(self, sha1: bytes) -> bool:
263 """Check if a particular object is present by SHA1.
265 This method makes no distinction between loose and packed objects.
266 """
267 return self.contains_loose(sha1)
269 @property
270 def packs(self) -> list[Pack]:
271 """Iterable of pack objects."""
272 raise NotImplementedError
274 def get_raw(self, name: bytes) -> tuple[int, bytes]:
275 """Obtain the raw text for an object.
277 Args:
278 name: sha for the object.
279 Returns: tuple with numeric type and object contents.
280 """
281 raise NotImplementedError(self.get_raw)
283 def __getitem__(self, sha1: ObjectID) -> ShaFile:
284 """Obtain an object by SHA1."""
285 type_num, uncomp = self.get_raw(sha1)
286 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1)
288 def __iter__(self) -> Iterator[bytes]:
289 """Iterate over the SHAs that are present in this store."""
290 raise NotImplementedError(self.__iter__)
292 def add_object(self, obj: ShaFile) -> None:
293 """Add a single object to this object store."""
294 raise NotImplementedError(self.add_object)
296 def add_objects(
297 self,
298 objects: Sequence[tuple[ShaFile, Optional[str]]],
299 progress: Optional[Callable[..., None]] = None,
300 ) -> Optional["Pack"]:
301 """Add a set of objects to this object store.
303 Args:
304 objects: Iterable over a list of (object, path) tuples
305 progress: Optional progress callback
306 """
307 raise NotImplementedError(self.add_objects)
309 def tree_changes(
310 self,
311 source: Optional[bytes],
312 target: Optional[bytes],
313 want_unchanged: bool = False,
314 include_trees: bool = False,
315 change_type_same: bool = False,
316 rename_detector: Optional["RenameDetector"] = None,
317 paths: Optional[Sequence[bytes]] = None,
318 ) -> Iterator[
319 tuple[
320 tuple[Optional[bytes], Optional[bytes]],
321 tuple[Optional[int], Optional[int]],
322 tuple[Optional[bytes], Optional[bytes]],
323 ]
324 ]:
325 """Find the differences between the contents of two trees.
327 Args:
328 source: SHA1 of the source tree
329 target: SHA1 of the target tree
330 want_unchanged: Whether unchanged files should be reported
331 include_trees: Whether to include trees
332 change_type_same: Whether to report files changing
333 type in the same entry.
334 rename_detector: RenameDetector object for detecting renames.
335 paths: Optional list of paths to filter to (as bytes).
336 Returns: Iterator over tuples with
337 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
338 """
339 from .diff_tree import tree_changes
341 for change in tree_changes(
342 self,
343 source,
344 target,
345 want_unchanged=want_unchanged,
346 include_trees=include_trees,
347 change_type_same=change_type_same,
348 rename_detector=rename_detector,
349 paths=paths,
350 ):
351 old_path = change.old.path if change.old is not None else None
352 new_path = change.new.path if change.new is not None else None
353 old_mode = change.old.mode if change.old is not None else None
354 new_mode = change.new.mode if change.new is not None else None
355 old_sha = change.old.sha if change.old is not None else None
356 new_sha = change.new.sha if change.new is not None else None
357 yield (
358 (old_path, new_path),
359 (old_mode, new_mode),
360 (old_sha, new_sha),
361 )
363 def iter_tree_contents(
364 self, tree_id: bytes, include_trees: bool = False
365 ) -> Iterator[TreeEntry]:
366 """Iterate the contents of a tree and all subtrees.
368 Iteration is depth-first pre-order, as in e.g. os.walk.
370 Args:
371 tree_id: SHA1 of the tree.
372 include_trees: If True, include tree objects in the iteration.
373 Returns: Iterator over TreeEntry namedtuples for all the objects in a
374 tree.
375 """
376 warnings.warn(
377 "Please use dulwich.object_store.iter_tree_contents",
378 DeprecationWarning,
379 stacklevel=2,
380 )
381 return iter_tree_contents(self, tree_id, include_trees=include_trees)
383 def iterobjects_subset(
384 self, shas: Iterable[bytes], *, allow_missing: bool = False
385 ) -> Iterator[ShaFile]:
386 """Iterate over a subset of objects in the store.
388 Args:
389 shas: Iterable of object SHAs to retrieve
390 allow_missing: If True, skip missing objects; if False, raise KeyError
392 Returns:
393 Iterator of ShaFile objects
395 Raises:
396 KeyError: If an object is missing and allow_missing is False
397 """
398 for sha in shas:
399 try:
400 yield self[sha]
401 except KeyError:
402 if not allow_missing:
403 raise
405 def iter_unpacked_subset(
406 self,
407 shas: Iterable[bytes],
408 include_comp: bool = False,
409 allow_missing: bool = False,
410 convert_ofs_delta: bool = True,
411 ) -> "Iterator[UnpackedObject]":
412 """Iterate over unpacked objects for a subset of SHAs.
414 Default implementation that converts ShaFile objects to UnpackedObject.
415 Subclasses may override for more efficient unpacked access.
417 Args:
418 shas: Iterable of object SHAs to retrieve
419 include_comp: Whether to include compressed data (ignored in base implementation)
420 allow_missing: If True, skip missing objects; if False, raise KeyError
421 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in base implementation)
423 Returns:
424 Iterator of UnpackedObject instances
426 Raises:
427 KeyError: If an object is missing and allow_missing is False
428 """
429 from .pack import UnpackedObject
431 for sha in shas:
432 try:
433 obj = self[sha]
434 # Convert ShaFile to UnpackedObject
435 unpacked = UnpackedObject(
436 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id
437 )
438 yield unpacked
439 except KeyError:
440 if not allow_missing:
441 raise
443 def find_missing_objects(
444 self,
445 haves: Iterable[bytes],
446 wants: Iterable[bytes],
447 shallow: Optional[Set[bytes]] = None,
448 progress: Optional[Callable[..., None]] = None,
449 get_tagged: Optional[Callable[[], dict[bytes, bytes]]] = None,
450 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents,
451 ) -> Iterator[tuple[bytes, Optional[PackHint]]]:
452 """Find the missing objects required for a set of revisions.
454 Args:
455 haves: Iterable over SHAs already in common.
456 wants: Iterable over SHAs of objects to fetch.
457 shallow: Set of shallow commit SHA1s to skip
458 progress: Simple progress function that will be called with
459 updated progress strings.
460 get_tagged: Function that returns a dict of pointed-to sha ->
461 tag sha for including tags.
462 get_parents: Optional function for getting the parents of a
463 commit.
464 Returns: Iterator over (sha, path) pairs.
465 """
466 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning)
467 finder = MissingObjectFinder(
468 self,
469 haves=haves,
470 wants=wants,
471 shallow=shallow,
472 progress=progress,
473 get_tagged=get_tagged,
474 get_parents=get_parents,
475 )
476 return iter(finder)
478 def find_common_revisions(self, graphwalker: GraphWalker) -> list[bytes]:
479 """Find which revisions this store has in common using graphwalker.
481 Args:
482 graphwalker: A graphwalker object.
483 Returns: List of SHAs that are in common
484 """
485 haves = []
486 sha = next(graphwalker)
487 while sha:
488 if sha in self:
489 haves.append(sha)
490 graphwalker.ack(sha)
491 sha = next(graphwalker)
492 return haves
494 def generate_pack_data(
495 self,
496 have: Iterable[bytes],
497 want: Iterable[bytes],
498 *,
499 shallow: Optional[Set[bytes]] = None,
500 progress: Optional[Callable[..., None]] = None,
501 ofs_delta: bool = True,
502 ) -> tuple[int, Iterator[UnpackedObject]]:
503 """Generate pack data objects for a set of wants/haves.
505 Args:
506 have: List of SHA1s of objects that should not be sent
507 want: List of SHA1s of objects that should be sent
508 shallow: Set of shallow commit SHA1s to skip
509 ofs_delta: Whether OFS deltas can be included
510 progress: Optional progress reporting method
511 """
512 # Note that the pack-specific implementation below is more efficient,
513 # as it reuses deltas
514 missing_objects = MissingObjectFinder(
515 self, haves=have, wants=want, shallow=shallow, progress=progress
516 )
517 object_ids = list(missing_objects)
518 return pack_objects_to_data(
519 [(self[oid], path) for oid, path in object_ids],
520 ofs_delta=ofs_delta,
521 progress=progress,
522 )
524 def peel_sha(self, sha: bytes) -> bytes:
525 """Peel all tags from a SHA.
527 Args:
528 sha: The object SHA to peel.
529 Returns: The fully-peeled SHA1 of a tag object, after peeling all
530 intermediate tags; if the original ref does not point to a tag,
531 this will equal the original SHA1.
532 """
533 warnings.warn(
534 "Please use dulwich.object_store.peel_sha()",
535 DeprecationWarning,
536 stacklevel=2,
537 )
538 return peel_sha(self, sha)[1].id
540 def _get_depth(
541 self,
542 head: bytes,
543 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents,
544 max_depth: Optional[int] = None,
545 ) -> int:
546 """Return the current available depth for the given head.
548 For commits with multiple parents, the largest possible depth will be
549 returned.
551 Args:
552 head: commit to start from
553 get_parents: optional function for getting the parents of a commit
554 max_depth: maximum depth to search
555 """
556 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth)
558 def close(self) -> None:
559 """Close any files opened by this object store."""
560 # Default implementation is a NO-OP
562 def prune(self, grace_period: Optional[int] = None) -> None:
563 """Prune/clean up this object store.
565 This includes removing orphaned temporary files and other
566 housekeeping tasks. Default implementation is a NO-OP.
568 Args:
569 grace_period: Grace period in seconds for removing temporary files.
570 If None, uses the default grace period.
571 """
572 # Default implementation is a NO-OP
574 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
575 """Iterate over all SHA1s that start with a given prefix.
577 The default implementation is a naive iteration over all objects.
578 However, subclasses may override this method with more efficient
579 implementations.
580 """
581 for sha in self:
582 if sha.startswith(prefix):
583 yield sha
585 def get_commit_graph(self) -> Optional["CommitGraph"]:
586 """Get the commit graph for this object store.
588 Returns:
589 CommitGraph object if available, None otherwise
590 """
591 return None
593 def write_commit_graph(
594 self, refs: Optional[Sequence[bytes]] = None, reachable: bool = True
595 ) -> None:
596 """Write a commit graph file for this object store.
598 Args:
599 refs: List of refs to include. If None, includes all refs from object store.
600 reachable: If True, includes all commits reachable from refs.
601 If False, only includes the direct ref targets.
603 Note:
604 Default implementation does nothing. Subclasses should override
605 this method to provide commit graph writing functionality.
606 """
607 raise NotImplementedError(self.write_commit_graph)
609 def get_object_mtime(self, sha: bytes) -> float:
610 """Get the modification time of an object.
612 Args:
613 sha: SHA1 of the object
615 Returns:
616 Modification time as seconds since epoch
618 Raises:
619 KeyError: if the object is not found
620 """
621 # Default implementation raises KeyError
622 # Subclasses should override to provide actual mtime
623 raise KeyError(sha)
626class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer):
627 """Object store that supports pack operations.
629 This is a base class for object stores that can handle pack files,
630 including both disk-based and memory-based stores.
631 """
633 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
634 """Add a new pack to this object store.
636 Returns: Tuple of (file, commit_func, abort_func)
637 """
638 raise NotImplementedError(self.add_pack)
640 def add_pack_data(
641 self,
642 count: int,
643 unpacked_objects: Iterator["UnpackedObject"],
644 progress: Optional[Callable[..., None]] = None,
645 ) -> Optional["Pack"]:
646 """Add pack data to this object store.
648 Args:
649 count: Number of objects
650 unpacked_objects: Iterator over unpacked objects
651 progress: Optional progress callback
652 """
653 raise NotImplementedError(self.add_pack_data)
655 def get_unpacked_object(
656 self, sha1: bytes, *, include_comp: bool = False
657 ) -> "UnpackedObject":
658 """Get a raw unresolved object.
660 Args:
661 sha1: SHA-1 hash of the object
662 include_comp: Whether to include compressed data
664 Returns:
665 UnpackedObject instance
666 """
667 from .pack import UnpackedObject
669 obj = self[sha1]
670 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks())
672 def iterobjects_subset(
673 self, shas: Iterable[bytes], *, allow_missing: bool = False
674 ) -> Iterator[ShaFile]:
675 """Iterate over a subset of objects.
677 Args:
678 shas: Iterable of object SHAs to retrieve
679 allow_missing: If True, skip missing objects
681 Returns:
682 Iterator of ShaFile objects
683 """
684 for sha in shas:
685 try:
686 yield self[sha]
687 except KeyError:
688 if not allow_missing:
689 raise
692class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
693 """Object store that uses pack files for storage.
695 This class provides a base implementation for object stores that use
696 Git pack files as their primary storage mechanism. It handles caching
697 of open pack files and provides configuration for pack file operations.
698 """
700 def __init__(
701 self,
702 pack_compression_level: int = -1,
703 pack_index_version: Optional[int] = None,
704 pack_delta_window_size: Optional[int] = None,
705 pack_window_memory: Optional[int] = None,
706 pack_delta_cache_size: Optional[int] = None,
707 pack_depth: Optional[int] = None,
708 pack_threads: Optional[int] = None,
709 pack_big_file_threshold: Optional[int] = None,
710 ) -> None:
711 """Initialize a PackBasedObjectStore.
713 Args:
714 pack_compression_level: Compression level for pack files (-1 to 9)
715 pack_index_version: Pack index version to use
716 pack_delta_window_size: Window size for delta compression
717 pack_window_memory: Maximum memory to use for delta window
718 pack_delta_cache_size: Cache size for delta operations
719 pack_depth: Maximum depth for pack deltas
720 pack_threads: Number of threads to use for packing
721 pack_big_file_threshold: Threshold for treating files as "big"
722 """
723 self._pack_cache: dict[str, Pack] = {}
724 self.pack_compression_level = pack_compression_level
725 self.pack_index_version = pack_index_version
726 self.pack_delta_window_size = pack_delta_window_size
727 self.pack_window_memory = pack_window_memory
728 self.pack_delta_cache_size = pack_delta_cache_size
729 self.pack_depth = pack_depth
730 self.pack_threads = pack_threads
731 self.pack_big_file_threshold = pack_big_file_threshold
733 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
734 """Add a new pack to this object store."""
735 raise NotImplementedError(self.add_pack)
737 def add_pack_data(
738 self,
739 count: int,
740 unpacked_objects: Iterator[UnpackedObject],
741 progress: Optional[Callable[..., None]] = None,
742 ) -> Optional["Pack"]:
743 """Add pack data to this object store.
745 Args:
746 count: Number of items to add
747 unpacked_objects: Iterator of UnpackedObject instances
748 progress: Optional progress callback
749 """
750 if count == 0:
751 # Don't bother writing an empty pack file
752 return None
753 f, commit, abort = self.add_pack()
754 try:
755 write_pack_data(
756 f.write,
757 unpacked_objects,
758 num_records=count,
759 progress=progress,
760 compression_level=self.pack_compression_level,
761 )
762 except BaseException:
763 abort()
764 raise
765 else:
766 return commit()
768 @property
769 def alternates(self) -> list["BaseObjectStore"]:
770 """Return list of alternate object stores."""
771 return []
773 def contains_packed(self, sha: bytes) -> bool:
774 """Check if a particular object is present by SHA1 and is packed.
776 This does not check alternates.
777 """
778 for pack in self.packs:
779 try:
780 if sha in pack:
781 return True
782 except PackFileDisappeared:
783 pass
784 return False
786 def __contains__(self, sha: bytes) -> bool:
787 """Check if a particular object is present by SHA1.
789 This method makes no distinction between loose and packed objects.
790 """
791 if self.contains_packed(sha) or self.contains_loose(sha):
792 return True
793 for alternate in self.alternates:
794 if sha in alternate:
795 return True
796 return False
798 def _add_cached_pack(self, base_name: str, pack: Pack) -> None:
799 """Add a newly appeared pack to the cache by path."""
800 prev_pack = self._pack_cache.get(base_name)
801 if prev_pack is not pack:
802 self._pack_cache[base_name] = pack
803 if prev_pack:
804 prev_pack.close()
806 def generate_pack_data(
807 self,
808 have: Iterable[bytes],
809 want: Iterable[bytes],
810 *,
811 shallow: Optional[Set[bytes]] = None,
812 progress: Optional[Callable[..., None]] = None,
813 ofs_delta: bool = True,
814 ) -> tuple[int, Iterator[UnpackedObject]]:
815 """Generate pack data objects for a set of wants/haves.
817 Args:
818 have: List of SHA1s of objects that should not be sent
819 want: List of SHA1s of objects that should be sent
820 shallow: Set of shallow commit SHA1s to skip
821 ofs_delta: Whether OFS deltas can be included
822 progress: Optional progress reporting method
823 """
824 missing_objects = MissingObjectFinder(
825 self, haves=have, wants=want, shallow=shallow, progress=progress
826 )
827 remote_has = missing_objects.get_remote_has()
828 object_ids = list(missing_objects)
829 return len(object_ids), generate_unpacked_objects(
830 self,
831 object_ids,
832 progress=progress,
833 ofs_delta=ofs_delta,
834 other_haves=remote_has,
835 )
837 def _clear_cached_packs(self) -> None:
838 pack_cache = self._pack_cache
839 self._pack_cache = {}
840 while pack_cache:
841 (_name, pack) = pack_cache.popitem()
842 pack.close()
844 def _iter_cached_packs(self) -> Iterator[Pack]:
845 return iter(self._pack_cache.values())
847 def _update_pack_cache(self) -> list[Pack]:
848 raise NotImplementedError(self._update_pack_cache)
850 def close(self) -> None:
851 """Close the object store and release resources.
853 This method closes all cached pack files and frees associated resources.
854 """
855 self._clear_cached_packs()
857 @property
858 def packs(self) -> list[Pack]:
859 """List with pack objects."""
860 return list(self._iter_cached_packs()) + list(self._update_pack_cache())
862 def count_pack_files(self) -> int:
863 """Count the number of pack files.
865 Returns:
866 Number of pack files (excluding those with .keep files)
867 """
868 count = 0
869 for pack in self.packs:
870 # Check if there's a .keep file for this pack
871 keep_path = pack._basename + ".keep"
872 if not os.path.exists(keep_path):
873 count += 1
874 return count
876 def _iter_alternate_objects(self) -> Iterator[bytes]:
877 """Iterate over the SHAs of all the objects in alternate stores."""
878 for alternate in self.alternates:
879 yield from alternate
881 def _iter_loose_objects(self) -> Iterator[bytes]:
882 """Iterate over the SHAs of all loose objects."""
883 raise NotImplementedError(self._iter_loose_objects)
885 def _get_loose_object(self, sha: bytes) -> Optional[ShaFile]:
886 raise NotImplementedError(self._get_loose_object)
888 def delete_loose_object(self, sha: bytes) -> None:
889 """Delete a loose object.
891 This method only handles loose objects. For packed objects,
892 use repack(exclude=...) to exclude them during repacking.
893 """
894 raise NotImplementedError(self.delete_loose_object)
896 def _remove_pack(self, pack: "Pack") -> None:
897 raise NotImplementedError(self._remove_pack)
899 def pack_loose_objects(
900 self, progress: Optional[Callable[[str], None]] = None
901 ) -> int:
902 """Pack loose objects.
904 Args:
905 progress: Optional progress reporting callback
907 Returns: Number of objects packed
908 """
909 objects: list[tuple[ShaFile, None]] = []
910 for sha in self._iter_loose_objects():
911 obj = self._get_loose_object(sha)
912 if obj is not None:
913 objects.append((obj, None))
914 self.add_objects(objects, progress=progress)
915 for obj, path in objects:
916 self.delete_loose_object(obj.id)
917 return len(objects)
919 def repack(
920 self,
921 exclude: Optional[Set[bytes]] = None,
922 progress: Optional[Callable[[str], None]] = None,
923 ) -> int:
924 """Repack the packs in this repository.
926 Note that this implementation is fairly naive and currently keeps all
927 objects in memory while it repacks.
929 Args:
930 exclude: Optional set of object SHAs to exclude from repacking
931 progress: Optional progress reporting callback
932 """
933 if exclude is None:
934 exclude = set()
936 loose_objects = set()
937 excluded_loose_objects = set()
938 for sha in self._iter_loose_objects():
939 if sha not in exclude:
940 obj = self._get_loose_object(sha)
941 if obj is not None:
942 loose_objects.add(obj)
943 else:
944 excluded_loose_objects.add(sha)
946 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects}
947 old_packs = {p.name(): p for p in self.packs}
948 for name, pack in old_packs.items():
949 objects.update(
950 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude
951 )
953 # Only create a new pack if there are objects to pack
954 if objects:
955 # The name of the consolidated pack might match the name of a
956 # pre-existing pack. Take care not to remove the newly created
957 # consolidated pack.
958 consolidated = self.add_objects(list(objects), progress=progress)
959 if consolidated is not None:
960 old_packs.pop(consolidated.name(), None)
962 # Delete loose objects that were packed
963 for obj in loose_objects:
964 if obj is not None:
965 self.delete_loose_object(obj.id)
966 # Delete excluded loose objects
967 for sha in excluded_loose_objects:
968 self.delete_loose_object(sha)
969 for name, pack in old_packs.items():
970 self._remove_pack(pack)
971 self._update_pack_cache()
972 return len(objects)
974 def __iter__(self) -> Iterator[bytes]:
975 """Iterate over the SHAs that are present in this store."""
976 self._update_pack_cache()
977 for pack in self._iter_cached_packs():
978 try:
979 yield from pack
980 except PackFileDisappeared:
981 pass
982 yield from self._iter_loose_objects()
983 yield from self._iter_alternate_objects()
985 def contains_loose(self, sha: bytes) -> bool:
986 """Check if a particular object is present by SHA1 and is loose.
988 This does not check alternates.
989 """
990 return self._get_loose_object(sha) is not None
992 def get_raw(self, name: bytes) -> tuple[int, bytes]:
993 """Obtain the raw fulltext for an object.
995 Args:
996 name: sha for the object.
997 Returns: tuple with numeric type and object contents.
998 """
999 if name == ZERO_SHA:
1000 raise KeyError(name)
1001 if len(name) == 40:
1002 sha = hex_to_sha(name)
1003 hexsha = name
1004 elif len(name) == 20:
1005 sha = name
1006 hexsha = None
1007 else:
1008 raise AssertionError(f"Invalid object name {name!r}")
1009 for pack in self._iter_cached_packs():
1010 try:
1011 return pack.get_raw(sha)
1012 except (KeyError, PackFileDisappeared):
1013 pass
1014 if hexsha is None:
1015 hexsha = sha_to_hex(name)
1016 ret = self._get_loose_object(hexsha)
1017 if ret is not None:
1018 return ret.type_num, ret.as_raw_string()
1019 # Maybe something else has added a pack with the object
1020 # in the mean time?
1021 for pack in self._update_pack_cache():
1022 try:
1023 return pack.get_raw(sha)
1024 except KeyError:
1025 pass
1026 for alternate in self.alternates:
1027 try:
1028 return alternate.get_raw(hexsha)
1029 except KeyError:
1030 pass
1031 raise KeyError(hexsha)
1033 def iter_unpacked_subset(
1034 self,
1035 shas: Iterable[bytes],
1036 include_comp: bool = False,
1037 allow_missing: bool = False,
1038 convert_ofs_delta: bool = True,
1039 ) -> Iterator[UnpackedObject]:
1040 """Iterate over a subset of objects, yielding UnpackedObject instances.
1042 Args:
1043 shas: Set of object SHAs to retrieve
1044 include_comp: Whether to include compressed data
1045 allow_missing: If True, skip missing objects; if False, raise KeyError
1046 convert_ofs_delta: Whether to convert OFS_DELTA objects
1048 Returns:
1049 Iterator of UnpackedObject instances
1051 Raises:
1052 KeyError: If an object is missing and allow_missing is False
1053 """
1054 todo: set[bytes] = set(shas)
1055 for p in self._iter_cached_packs():
1056 for unpacked in p.iter_unpacked_subset(
1057 todo,
1058 include_comp=include_comp,
1059 allow_missing=True,
1060 convert_ofs_delta=convert_ofs_delta,
1061 ):
1062 yield unpacked
1063 hexsha = sha_to_hex(unpacked.sha())
1064 todo.remove(hexsha)
1065 # Maybe something else has added a pack with the object
1066 # in the mean time?
1067 for p in self._update_pack_cache():
1068 for unpacked in p.iter_unpacked_subset(
1069 todo,
1070 include_comp=include_comp,
1071 allow_missing=True,
1072 convert_ofs_delta=convert_ofs_delta,
1073 ):
1074 yield unpacked
1075 hexsha = sha_to_hex(unpacked.sha())
1076 todo.remove(hexsha)
1077 for alternate in self.alternates:
1078 assert isinstance(alternate, PackBasedObjectStore)
1079 for unpacked in alternate.iter_unpacked_subset(
1080 todo,
1081 include_comp=include_comp,
1082 allow_missing=True,
1083 convert_ofs_delta=convert_ofs_delta,
1084 ):
1085 yield unpacked
1086 hexsha = sha_to_hex(unpacked.sha())
1087 todo.remove(hexsha)
1089 def iterobjects_subset(
1090 self, shas: Iterable[bytes], *, allow_missing: bool = False
1091 ) -> Iterator[ShaFile]:
1092 """Iterate over a subset of objects in the store.
1094 This method searches for objects in pack files, alternates, and loose storage.
1096 Args:
1097 shas: Iterable of object SHAs to retrieve
1098 allow_missing: If True, skip missing objects; if False, raise KeyError
1100 Returns:
1101 Iterator of ShaFile objects
1103 Raises:
1104 KeyError: If an object is missing and allow_missing is False
1105 """
1106 todo: set[bytes] = set(shas)
1107 for p in self._iter_cached_packs():
1108 for o in p.iterobjects_subset(todo, allow_missing=True):
1109 yield o
1110 todo.remove(o.id)
1111 # Maybe something else has added a pack with the object
1112 # in the mean time?
1113 for p in self._update_pack_cache():
1114 for o in p.iterobjects_subset(todo, allow_missing=True):
1115 yield o
1116 todo.remove(o.id)
1117 for alternate in self.alternates:
1118 for o in alternate.iterobjects_subset(todo, allow_missing=True):
1119 yield o
1120 todo.remove(o.id)
1121 for oid in todo:
1122 loose_obj: Optional[ShaFile] = self._get_loose_object(oid)
1123 if loose_obj is not None:
1124 yield loose_obj
1125 elif not allow_missing:
1126 raise KeyError(oid)
1128 def get_unpacked_object(
1129 self, sha1: bytes, *, include_comp: bool = False
1130 ) -> UnpackedObject:
1131 """Obtain the unpacked object.
1133 Args:
1134 sha1: sha for the object.
1135 include_comp: Whether to include compression metadata.
1136 """
1137 if sha1 == ZERO_SHA:
1138 raise KeyError(sha1)
1139 if len(sha1) == 40:
1140 sha = hex_to_sha(sha1)
1141 hexsha = sha1
1142 elif len(sha1) == 20:
1143 sha = sha1
1144 hexsha = None
1145 else:
1146 raise AssertionError(f"Invalid object sha1 {sha1!r}")
1147 for pack in self._iter_cached_packs():
1148 try:
1149 return pack.get_unpacked_object(sha, include_comp=include_comp)
1150 except (KeyError, PackFileDisappeared):
1151 pass
1152 if hexsha is None:
1153 hexsha = sha_to_hex(sha1)
1154 # Maybe something else has added a pack with the object
1155 # in the mean time?
1156 for pack in self._update_pack_cache():
1157 try:
1158 return pack.get_unpacked_object(sha, include_comp=include_comp)
1159 except KeyError:
1160 pass
1161 for alternate in self.alternates:
1162 assert isinstance(alternate, PackBasedObjectStore)
1163 try:
1164 return alternate.get_unpacked_object(hexsha, include_comp=include_comp)
1165 except KeyError:
1166 pass
1167 raise KeyError(hexsha)
1169 def add_objects(
1170 self,
1171 objects: Sequence[tuple[ShaFile, Optional[str]]],
1172 progress: Optional[Callable[[str], None]] = None,
1173 ) -> Optional["Pack"]:
1174 """Add a set of objects to this object store.
1176 Args:
1177 objects: Iterable over (object, path) tuples, should support
1178 __len__.
1179 progress: Optional progress reporting function.
1180 Returns: Pack object of the objects written.
1181 """
1182 count = len(objects)
1183 record_iter = (full_unpacked_object(o) for (o, p) in objects)
1184 return self.add_pack_data(count, record_iter, progress=progress)
1187class DiskObjectStore(PackBasedObjectStore):
1188 """Git-style object store that exists on disk."""
1190 path: Union[str, os.PathLike[str]]
1191 pack_dir: Union[str, os.PathLike[str]]
1192 _alternates: Optional[list["BaseObjectStore"]]
1193 _commit_graph: Optional["CommitGraph"]
1195 def __init__(
1196 self,
1197 path: Union[str, os.PathLike[str]],
1198 loose_compression_level: int = -1,
1199 pack_compression_level: int = -1,
1200 pack_index_version: Optional[int] = None,
1201 pack_delta_window_size: Optional[int] = None,
1202 pack_window_memory: Optional[int] = None,
1203 pack_delta_cache_size: Optional[int] = None,
1204 pack_depth: Optional[int] = None,
1205 pack_threads: Optional[int] = None,
1206 pack_big_file_threshold: Optional[int] = None,
1207 fsync_object_files: bool = False,
1208 ) -> None:
1209 """Open an object store.
1211 Args:
1212 path: Path of the object store.
1213 loose_compression_level: zlib compression level for loose objects
1214 pack_compression_level: zlib compression level for pack objects
1215 pack_index_version: pack index version to use (1, 2, or 3)
1216 pack_delta_window_size: sliding window size for delta compression
1217 pack_window_memory: memory limit for delta window operations
1218 pack_delta_cache_size: size of cache for delta operations
1219 pack_depth: maximum delta chain depth
1220 pack_threads: number of threads for pack operations
1221 pack_big_file_threshold: threshold for treating files as big
1222 fsync_object_files: whether to fsync object files for durability
1223 """
1224 super().__init__(
1225 pack_compression_level=pack_compression_level,
1226 pack_index_version=pack_index_version,
1227 pack_delta_window_size=pack_delta_window_size,
1228 pack_window_memory=pack_window_memory,
1229 pack_delta_cache_size=pack_delta_cache_size,
1230 pack_depth=pack_depth,
1231 pack_threads=pack_threads,
1232 pack_big_file_threshold=pack_big_file_threshold,
1233 )
1234 self.path = path
1235 self.pack_dir = os.path.join(self.path, PACKDIR)
1236 self._alternates = None
1237 self.loose_compression_level = loose_compression_level
1238 self.pack_compression_level = pack_compression_level
1239 self.pack_index_version = pack_index_version
1240 self.fsync_object_files = fsync_object_files
1242 # Commit graph support - lazy loaded
1243 self._commit_graph = None
1244 self._use_commit_graph = True # Default to true
1246 def __repr__(self) -> str:
1247 """Return string representation of DiskObjectStore.
1249 Returns:
1250 String representation including the store path
1251 """
1252 return f"<{self.__class__.__name__}({self.path!r})>"
1254 @classmethod
1255 def from_config(
1256 cls, path: Union[str, os.PathLike[str]], config: "Config"
1257 ) -> "DiskObjectStore":
1258 """Create a DiskObjectStore from a configuration object.
1260 Args:
1261 path: Path to the object store directory
1262 config: Configuration object to read settings from
1264 Returns:
1265 New DiskObjectStore instance configured according to config
1266 """
1267 try:
1268 default_compression_level = int(
1269 config.get((b"core",), b"compression").decode()
1270 )
1271 except KeyError:
1272 default_compression_level = -1
1273 try:
1274 loose_compression_level = int(
1275 config.get((b"core",), b"looseCompression").decode()
1276 )
1277 except KeyError:
1278 loose_compression_level = default_compression_level
1279 try:
1280 pack_compression_level = int(
1281 config.get((b"core",), "packCompression").decode()
1282 )
1283 except KeyError:
1284 pack_compression_level = default_compression_level
1285 try:
1286 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode())
1287 except KeyError:
1288 pack_index_version = None
1290 # Read pack configuration options
1291 try:
1292 pack_delta_window_size = int(
1293 config.get((b"pack",), b"deltaWindowSize").decode()
1294 )
1295 except KeyError:
1296 pack_delta_window_size = None
1297 try:
1298 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode())
1299 except KeyError:
1300 pack_window_memory = None
1301 try:
1302 pack_delta_cache_size = int(
1303 config.get((b"pack",), b"deltaCacheSize").decode()
1304 )
1305 except KeyError:
1306 pack_delta_cache_size = None
1307 try:
1308 pack_depth = int(config.get((b"pack",), b"depth").decode())
1309 except KeyError:
1310 pack_depth = None
1311 try:
1312 pack_threads = int(config.get((b"pack",), b"threads").decode())
1313 except KeyError:
1314 pack_threads = None
1315 try:
1316 pack_big_file_threshold = int(
1317 config.get((b"pack",), b"bigFileThreshold").decode()
1318 )
1319 except KeyError:
1320 pack_big_file_threshold = None
1322 # Read core.commitGraph setting
1323 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)
1325 # Read core.fsyncObjectFiles setting
1326 fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False)
1328 instance = cls(
1329 path,
1330 loose_compression_level,
1331 pack_compression_level,
1332 pack_index_version,
1333 pack_delta_window_size,
1334 pack_window_memory,
1335 pack_delta_cache_size,
1336 pack_depth,
1337 pack_threads,
1338 pack_big_file_threshold,
1339 fsync_object_files,
1340 )
1341 instance._use_commit_graph = use_commit_graph
1342 return instance
1344 @property
1345 def alternates(self) -> list["BaseObjectStore"]:
1346 """Get the list of alternate object stores.
1348 Reads from .git/objects/info/alternates if not already cached.
1350 Returns:
1351 List of DiskObjectStore instances for alternate object directories
1352 """
1353 if self._alternates is not None:
1354 return self._alternates
1355 self._alternates = []
1356 for path in self._read_alternate_paths():
1357 self._alternates.append(DiskObjectStore(path))
1358 return self._alternates
1360 def _read_alternate_paths(self) -> Iterator[str]:
1361 try:
1362 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb")
1363 except FileNotFoundError:
1364 return
1365 with f:
1366 for line in f.readlines():
1367 line = line.rstrip(b"\n")
1368 if line.startswith(b"#"):
1369 continue
1370 if os.path.isabs(line):
1371 yield os.fsdecode(line)
1372 else:
1373 yield os.fsdecode(os.path.join(os.fsencode(self.path), line))
1375 def add_alternate_path(self, path: Union[str, os.PathLike[str]]) -> None:
1376 """Add an alternate path to this object store."""
1377 try:
1378 os.mkdir(os.path.join(self.path, INFODIR))
1379 except FileExistsError:
1380 pass
1381 alternates_path = os.path.join(self.path, INFODIR, "alternates")
1382 with GitFile(alternates_path, "wb") as f:
1383 try:
1384 orig_f = open(alternates_path, "rb")
1385 except FileNotFoundError:
1386 pass
1387 else:
1388 with orig_f:
1389 f.write(orig_f.read())
1390 f.write(os.fsencode(path) + b"\n")
1392 if not os.path.isabs(path):
1393 path = os.path.join(self.path, path)
1394 self.alternates.append(DiskObjectStore(path))
1396 def _update_pack_cache(self) -> list[Pack]:
1397 """Read and iterate over new pack files and cache them."""
1398 try:
1399 pack_dir_contents = os.listdir(self.pack_dir)
1400 except FileNotFoundError:
1401 self.close()
1402 return []
1403 pack_files = set()
1404 for name in pack_dir_contents:
1405 if name.startswith("pack-") and name.endswith(".pack"):
1406 # verify that idx exists first (otherwise the pack was not yet
1407 # fully written)
1408 idx_name = os.path.splitext(name)[0] + ".idx"
1409 if idx_name in pack_dir_contents:
1410 pack_name = name[: -len(".pack")]
1411 pack_files.add(pack_name)
1413 # Open newly appeared pack files
1414 new_packs = []
1415 for f in pack_files:
1416 if f not in self._pack_cache:
1417 pack = Pack(
1418 os.path.join(self.pack_dir, f),
1419 delta_window_size=self.pack_delta_window_size,
1420 window_memory=self.pack_window_memory,
1421 delta_cache_size=self.pack_delta_cache_size,
1422 depth=self.pack_depth,
1423 threads=self.pack_threads,
1424 big_file_threshold=self.pack_big_file_threshold,
1425 )
1426 new_packs.append(pack)
1427 self._pack_cache[f] = pack
1428 # Remove disappeared pack files
1429 for f in set(self._pack_cache) - pack_files:
1430 self._pack_cache.pop(f).close()
1431 return new_packs
1433 def _get_shafile_path(self, sha: bytes) -> str:
1434 # Check from object dir
1435 return hex_to_filename(os.fspath(self.path), sha)
1437 def _iter_loose_objects(self) -> Iterator[bytes]:
1438 for base in os.listdir(self.path):
1439 if len(base) != 2:
1440 continue
1441 for rest in os.listdir(os.path.join(self.path, base)):
1442 sha = os.fsencode(base + rest)
1443 if not valid_hexsha(sha):
1444 continue
1445 yield sha
1447 def count_loose_objects(self) -> int:
1448 """Count the number of loose objects in the object store.
1450 Returns:
1451 Number of loose objects
1452 """
1453 count = 0
1454 if not os.path.exists(self.path):
1455 return 0
1457 for i in range(256):
1458 subdir = os.path.join(self.path, f"{i:02x}")
1459 try:
1460 count += len(
1461 [
1462 name
1463 for name in os.listdir(subdir)
1464 if len(name) == 38 # 40 - 2 for the prefix
1465 ]
1466 )
1467 except FileNotFoundError:
1468 # Directory may have been removed or is inaccessible
1469 continue
1471 return count
1473 def _get_loose_object(self, sha: bytes) -> Optional[ShaFile]:
1474 path = self._get_shafile_path(sha)
1475 try:
1476 return ShaFile.from_path(path)
1477 except FileNotFoundError:
1478 return None
1480 def delete_loose_object(self, sha: bytes) -> None:
1481 """Delete a loose object from disk.
1483 Args:
1484 sha: SHA1 of the object to delete
1486 Raises:
1487 FileNotFoundError: If the object file doesn't exist
1488 """
1489 os.remove(self._get_shafile_path(sha))
1491 def get_object_mtime(self, sha: bytes) -> float:
1492 """Get the modification time of an object.
1494 Args:
1495 sha: SHA1 of the object
1497 Returns:
1498 Modification time as seconds since epoch
1500 Raises:
1501 KeyError: if the object is not found
1502 """
1503 # First check if it's a loose object
1504 if self.contains_loose(sha):
1505 path = self._get_shafile_path(sha)
1506 try:
1507 return os.path.getmtime(path)
1508 except FileNotFoundError:
1509 pass
1511 # Check if it's in a pack file
1512 for pack in self.packs:
1513 try:
1514 if sha in pack:
1515 # Use the pack file's mtime for packed objects
1516 pack_path = pack._data_path
1517 try:
1518 return os.path.getmtime(pack_path)
1519 except (FileNotFoundError, AttributeError):
1520 pass
1521 except PackFileDisappeared:
1522 pass
1524 raise KeyError(sha)
1526 def _remove_pack(self, pack: Pack) -> None:
1527 try:
1528 del self._pack_cache[os.path.basename(pack._basename)]
1529 except KeyError:
1530 pass
1531 pack.close()
1532 os.remove(pack.data.path)
1533 if hasattr(pack.index, "path"):
1534 os.remove(pack.index.path)
1536 def _get_pack_basepath(
1537 self, entries: Iterable[tuple[bytes, int, Union[int, None]]]
1538 ) -> str:
1539 suffix_bytes = iter_sha1(entry[0] for entry in entries)
1540 # TODO: Handle self.pack_dir being bytes
1541 suffix = suffix_bytes.decode("ascii")
1542 return os.path.join(self.pack_dir, "pack-" + suffix)
1544 def _complete_pack(
1545 self,
1546 f: BinaryIO,
1547 path: str,
1548 num_objects: int,
1549 indexer: PackIndexer,
1550 progress: Optional[Callable[..., None]] = None,
1551 ) -> Pack:
1552 """Move a specific file containing a pack into the pack directory.
1554 Note: The file should be on the same file system as the
1555 packs directory.
1557 Args:
1558 f: Open file object for the pack.
1559 path: Path to the pack file.
1560 num_objects: Number of objects in the pack.
1561 indexer: A PackIndexer for indexing the pack.
1562 progress: Optional progress reporting function.
1563 """
1564 entries = []
1565 for i, entry in enumerate(indexer):
1566 if progress is not None:
1567 progress(f"generating index: {i}/{num_objects}\r".encode("ascii"))
1568 entries.append(entry)
1570 pack_sha, extra_entries = extend_pack(
1571 f,
1572 set(indexer.ext_refs()),
1573 get_raw=self.get_raw,
1574 compression_level=self.pack_compression_level,
1575 progress=progress,
1576 )
1577 f.flush()
1578 if self.fsync_object_files:
1579 try:
1580 fileno = f.fileno()
1581 except AttributeError as e:
1582 raise OSError("fsync requested but file has no fileno()") from e
1583 else:
1584 os.fsync(fileno)
1585 f.close()
1587 entries.extend(extra_entries)
1589 # Move the pack in.
1590 entries.sort()
1591 pack_base_name = self._get_pack_basepath(entries)
1593 for pack in self.packs:
1594 if pack._basename == pack_base_name:
1595 return pack
1597 target_pack_path = pack_base_name + ".pack"
1598 target_index_path = pack_base_name + ".idx"
1599 if sys.platform == "win32":
1600 # Windows might have the target pack file lingering. Attempt
1601 # removal, silently passing if the target does not exist.
1602 with suppress(FileNotFoundError):
1603 os.remove(target_pack_path)
1604 os.rename(path, target_pack_path)
1606 # Write the index.
1607 with GitFile(
1608 target_index_path, "wb", mask=PACK_MODE, fsync=self.fsync_object_files
1609 ) as index_file:
1610 write_pack_index(
1611 index_file, entries, pack_sha, version=self.pack_index_version
1612 )
1614 # Add the pack to the store and return it.
1615 final_pack = Pack(
1616 pack_base_name,
1617 delta_window_size=self.pack_delta_window_size,
1618 window_memory=self.pack_window_memory,
1619 delta_cache_size=self.pack_delta_cache_size,
1620 depth=self.pack_depth,
1621 threads=self.pack_threads,
1622 big_file_threshold=self.pack_big_file_threshold,
1623 )
1624 final_pack.check_length_and_checksum()
1625 self._add_cached_pack(pack_base_name, final_pack)
1626 return final_pack
1628 def add_thin_pack(
1629 self,
1630 read_all: Callable[[int], bytes],
1631 read_some: Optional[Callable[[int], bytes]],
1632 progress: Optional[Callable[..., None]] = None,
1633 ) -> "Pack":
1634 """Add a new thin pack to this object store.
1636 Thin packs are packs that contain deltas with parents that exist
1637 outside the pack. They should never be placed in the object store
1638 directly, and always indexed and completed as they are copied.
1640 Args:
1641 read_all: Read function that blocks until the number of
1642 requested bytes are read.
1643 read_some: Read function that returns at least one byte, but may
1644 not return the number of bytes requested.
1645 progress: Optional progress reporting function.
1646 Returns: A Pack object pointing at the now-completed thin pack in the
1647 objects/pack directory.
1648 """
1649 import tempfile
1651 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_")
1652 with os.fdopen(fd, "w+b") as f:
1653 os.chmod(path, PACK_MODE)
1654 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) # type: ignore[arg-type]
1655 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) # type: ignore[arg-type]
1656 copier.verify(progress=progress)
1657 return self._complete_pack(f, path, len(copier), indexer, progress=progress)
1659 def add_pack(
1660 self,
1661 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
1662 """Add a new pack to this object store.
1664 Returns: Fileobject to write to, a commit function to
1665 call when the pack is finished and an abort
1666 function.
1667 """
1668 import tempfile
1670 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
1671 f = os.fdopen(fd, "w+b")
1672 os.chmod(path, PACK_MODE)
1674 def commit() -> Optional["Pack"]:
1675 if f.tell() > 0:
1676 f.seek(0)
1678 with PackData(path, f) as pd:
1679 indexer = PackIndexer.for_pack_data(
1680 pd,
1681 resolve_ext_ref=self.get_raw, # type: ignore[arg-type]
1682 )
1683 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type]
1684 else:
1685 f.close()
1686 os.remove(path)
1687 return None
1689 def abort() -> None:
1690 f.close()
1691 os.remove(path)
1693 return f, commit, abort # type: ignore[return-value]
1695 def add_object(self, obj: ShaFile) -> None:
1696 """Add a single object to this object store.
1698 Args:
1699 obj: Object to add
1700 """
1701 path = self._get_shafile_path(obj.id)
1702 dir = os.path.dirname(path)
1703 try:
1704 os.mkdir(dir)
1705 except FileExistsError:
1706 pass
1707 if os.path.exists(path):
1708 return # Already there, no need to write again
1709 with GitFile(path, "wb", mask=PACK_MODE, fsync=self.fsync_object_files) as f:
1710 f.write(
1711 obj.as_legacy_object(compression_level=self.loose_compression_level)
1712 )
1714 @classmethod
1715 def init(cls, path: Union[str, os.PathLike[str]]) -> "DiskObjectStore":
1716 """Initialize a new disk object store.
1718 Creates the necessary directory structure for a Git object store.
1720 Args:
1721 path: Path where the object store should be created
1723 Returns:
1724 New DiskObjectStore instance
1725 """
1726 try:
1727 os.mkdir(path)
1728 except FileExistsError:
1729 pass
1730 os.mkdir(os.path.join(path, "info"))
1731 os.mkdir(os.path.join(path, PACKDIR))
1732 return cls(path)
1734 def iter_prefix(self, prefix: bytes) -> Iterator[bytes]:
1735 """Iterate over all object SHAs with the given prefix.
1737 Args:
1738 prefix: Hex prefix to search for (as bytes)
1740 Returns:
1741 Iterator of object SHAs (as bytes) matching the prefix
1742 """
1743 if len(prefix) < 2:
1744 yield from super().iter_prefix(prefix)
1745 return
1746 seen = set()
1747 dir = prefix[:2].decode()
1748 rest = prefix[2:].decode()
1749 try:
1750 for name in os.listdir(os.path.join(self.path, dir)):
1751 if name.startswith(rest):
1752 sha = os.fsencode(dir + name)
1753 if sha not in seen:
1754 seen.add(sha)
1755 yield sha
1756 except FileNotFoundError:
1757 pass
1759 for p in self.packs:
1760 bin_prefix = (
1761 binascii.unhexlify(prefix)
1762 if len(prefix) % 2 == 0
1763 else binascii.unhexlify(prefix[:-1])
1764 )
1765 for sha in p.index.iter_prefix(bin_prefix):
1766 sha = sha_to_hex(sha)
1767 if sha.startswith(prefix) and sha not in seen:
1768 seen.add(sha)
1769 yield sha
1770 for alternate in self.alternates:
1771 for sha in alternate.iter_prefix(prefix):
1772 if sha not in seen:
1773 seen.add(sha)
1774 yield sha
1776 def get_commit_graph(self) -> Optional["CommitGraph"]:
1777 """Get the commit graph for this object store.
1779 Returns:
1780 CommitGraph object if available, None otherwise
1781 """
1782 if not self._use_commit_graph:
1783 return None
1785 if self._commit_graph is None:
1786 from .commit_graph import read_commit_graph
1788 # Look for commit graph in our objects directory
1789 graph_file = os.path.join(self.path, "info", "commit-graph")
1790 if os.path.exists(graph_file):
1791 self._commit_graph = read_commit_graph(graph_file)
1792 return self._commit_graph
1794 def write_commit_graph(
1795 self, refs: Optional[Iterable[bytes]] = None, reachable: bool = True
1796 ) -> None:
1797 """Write a commit graph file for this object store.
1799 Args:
1800 refs: List of refs to include. If None, includes all refs from object store.
1801 reachable: If True, includes all commits reachable from refs.
1802 If False, only includes the direct ref targets.
1803 """
1804 from .commit_graph import get_reachable_commits
1806 if refs is None:
1807 # Get all commit objects from the object store
1808 all_refs = []
1809 # Iterate through all objects to find commits
1810 for sha in self:
1811 try:
1812 obj = self[sha]
1813 if obj.type_name == b"commit":
1814 all_refs.append(sha)
1815 except KeyError:
1816 continue
1817 else:
1818 # Use provided refs
1819 all_refs = list(refs)
1821 if not all_refs:
1822 return # No commits to include
1824 if reachable:
1825 # Get all reachable commits
1826 commit_ids = get_reachable_commits(self, all_refs)
1827 else:
1828 # Just use the direct ref targets - ensure they're hex ObjectIDs
1829 commit_ids = []
1830 for ref in all_refs:
1831 if isinstance(ref, bytes) and len(ref) == 40:
1832 # Already hex ObjectID
1833 commit_ids.append(ref)
1834 elif isinstance(ref, bytes) and len(ref) == 20:
1835 # Binary SHA, convert to hex ObjectID
1836 from .objects import sha_to_hex
1838 commit_ids.append(sha_to_hex(ref))
1839 else:
1840 # Assume it's already correct format
1841 commit_ids.append(ref)
1843 if commit_ids:
1844 # Write commit graph directly to our object store path
1845 # Generate the commit graph
1846 from .commit_graph import generate_commit_graph
1848 graph = generate_commit_graph(self, commit_ids)
1850 if graph.entries:
1851 # Ensure the info directory exists
1852 info_dir = os.path.join(self.path, "info")
1853 os.makedirs(info_dir, exist_ok=True)
1855 # Write using GitFile for atomic operation
1856 graph_path = os.path.join(info_dir, "commit-graph")
1857 with GitFile(graph_path, "wb") as f:
1858 assert isinstance(
1859 f, _GitFile
1860 ) # GitFile in write mode always returns _GitFile
1861 graph.write_to_file(f)
1863 # Clear cached commit graph so it gets reloaded
1864 self._commit_graph = None
1866 def prune(self, grace_period: Optional[int] = None) -> None:
1867 """Prune/clean up this object store.
1869 This removes temporary files that were left behind by interrupted
1870 pack operations. These are files that start with ``tmp_pack_`` in the
1871 repository directory or files with .pack extension but no corresponding
1872 .idx file in the pack directory.
1874 Args:
1875 grace_period: Grace period in seconds for removing temporary files.
1876 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD.
1877 """
1878 import glob
1880 if grace_period is None:
1881 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD
1883 # Clean up tmp_pack_* files in the repository directory
1884 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")):
1885 # Check if file is old enough (more than grace period)
1886 mtime = os.path.getmtime(tmp_file)
1887 if time.time() - mtime > grace_period:
1888 os.remove(tmp_file)
1890 # Clean up orphaned .pack files without corresponding .idx files
1891 try:
1892 pack_dir_contents = os.listdir(self.pack_dir)
1893 except FileNotFoundError:
1894 return
1896 pack_files = {}
1897 idx_files = set()
1899 for name in pack_dir_contents:
1900 if name.endswith(".pack"):
1901 base_name = name[:-5] # Remove .pack extension
1902 pack_files[base_name] = name
1903 elif name.endswith(".idx"):
1904 base_name = name[:-4] # Remove .idx extension
1905 idx_files.add(base_name)
1907 # Remove .pack files without corresponding .idx files
1908 for base_name, pack_name in pack_files.items():
1909 if base_name not in idx_files:
1910 pack_path = os.path.join(self.pack_dir, pack_name)
1911 # Check if file is old enough (more than grace period)
1912 mtime = os.path.getmtime(pack_path)
1913 if time.time() - mtime > grace_period:
1914 os.remove(pack_path)
1917class MemoryObjectStore(PackCapableObjectStore):
1918 """Object store that keeps all objects in memory."""
1920 def __init__(self) -> None:
1921 """Initialize a MemoryObjectStore.
1923 Creates an empty in-memory object store.
1924 """
1925 super().__init__()
1926 self._data: dict[bytes, ShaFile] = {}
1927 self.pack_compression_level = -1
1929 def _to_hexsha(self, sha: bytes) -> bytes:
1930 if len(sha) == 40:
1931 return sha
1932 elif len(sha) == 20:
1933 return sha_to_hex(sha)
1934 else:
1935 raise ValueError(f"Invalid sha {sha!r}")
1937 def contains_loose(self, sha: bytes) -> bool:
1938 """Check if a particular object is present by SHA1 and is loose."""
1939 return self._to_hexsha(sha) in self._data
1941 def contains_packed(self, sha: bytes) -> bool:
1942 """Check if a particular object is present by SHA1 and is packed."""
1943 return False
1945 def __iter__(self) -> Iterator[bytes]:
1946 """Iterate over the SHAs that are present in this store."""
1947 return iter(self._data.keys())
1949 @property
1950 def packs(self) -> list[Pack]:
1951 """List with pack objects."""
1952 return []
1954 def get_raw(self, name: ObjectID) -> tuple[int, bytes]:
1955 """Obtain the raw text for an object.
1957 Args:
1958 name: sha for the object.
1959 Returns: tuple with numeric type and object contents.
1960 """
1961 obj = self[self._to_hexsha(name)]
1962 return obj.type_num, obj.as_raw_string()
1964 def __getitem__(self, name: ObjectID) -> ShaFile:
1965 """Retrieve an object by SHA.
1967 Args:
1968 name: SHA of the object (as hex string or bytes)
1970 Returns:
1971 Copy of the ShaFile object
1973 Raises:
1974 KeyError: If the object is not found
1975 """
1976 return self._data[self._to_hexsha(name)].copy()
1978 def __delitem__(self, name: ObjectID) -> None:
1979 """Delete an object from this store, for testing only."""
1980 del self._data[self._to_hexsha(name)]
1982 def add_object(self, obj: ShaFile) -> None:
1983 """Add a single object to this object store."""
1984 self._data[obj.id] = obj.copy()
1986 def add_objects(
1987 self,
1988 objects: Iterable[tuple[ShaFile, Optional[str]]],
1989 progress: Optional[Callable[[str], None]] = None,
1990 ) -> None:
1991 """Add a set of objects to this object store.
1993 Args:
1994 objects: Iterable over a list of (object, path) tuples
1995 progress: Optional progress reporting function.
1996 """
1997 for obj, path in objects:
1998 self.add_object(obj)
2000 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
2001 """Add a new pack to this object store.
2003 Because this object store doesn't support packs, we extract and add the
2004 individual objects.
2006 Returns: Fileobject to write to and a commit function to
2007 call when the pack is finished.
2008 """
2009 from tempfile import SpooledTemporaryFile
2011 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-")
2013 def commit() -> None:
2014 size = f.tell()
2015 if size > 0:
2016 f.seek(0)
2018 p = PackData.from_file(f, size)
2019 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type]
2020 self.add_object(obj)
2021 p.close()
2022 f.close()
2023 else:
2024 f.close()
2026 def abort() -> None:
2027 f.close()
2029 return f, commit, abort # type: ignore[return-value]
2031 def add_pack_data(
2032 self,
2033 count: int,
2034 unpacked_objects: Iterator[UnpackedObject],
2035 progress: Optional[Callable[[str], None]] = None,
2036 ) -> None:
2037 """Add pack data to this object store.
2039 Args:
2040 count: Number of items to add
2041 unpacked_objects: Iterator of UnpackedObject instances
2042 progress: Optional progress reporting function.
2043 """
2044 if count == 0:
2045 return
2047 # Since MemoryObjectStore doesn't support pack files, we need to
2048 # extract individual objects. To handle deltas properly, we write
2049 # to a temporary pack and then use PackInflater to resolve them.
2050 f, commit, abort = self.add_pack()
2051 try:
2052 write_pack_data(
2053 f.write,
2054 unpacked_objects,
2055 num_records=count,
2056 progress=progress,
2057 )
2058 except BaseException:
2059 abort()
2060 raise
2061 else:
2062 commit()
2064 def add_thin_pack(
2065 self,
2066 read_all: Callable[[], bytes],
2067 read_some: Callable[[int], bytes],
2068 progress: Optional[Callable[[str], None]] = None,
2069 ) -> None:
2070 """Add a new thin pack to this object store.
2072 Thin packs are packs that contain deltas with parents that exist
2073 outside the pack. Because this object store doesn't support packs, we
2074 extract and add the individual objects.
2076 Args:
2077 read_all: Read function that blocks until the number of
2078 requested bytes are read.
2079 read_some: Read function that returns at least one byte, but may
2080 not return the number of bytes requested.
2081 progress: Optional progress reporting function.
2082 """
2083 f, commit, abort = self.add_pack()
2084 try:
2085 copier = PackStreamCopier(read_all, read_some, f) # type: ignore[arg-type]
2086 copier.verify()
2087 except BaseException:
2088 abort()
2089 raise
2090 else:
2091 commit()
2094class ObjectIterator(Protocol):
2095 """Interface for iterating over objects."""
2097 def iterobjects(self) -> Iterator[ShaFile]:
2098 """Iterate over all objects.
2100 Returns:
2101 Iterator of ShaFile objects
2102 """
2103 raise NotImplementedError(self.iterobjects)
2106def tree_lookup_path(
2107 lookup_obj: Callable[[bytes], ShaFile], root_sha: bytes, path: bytes
2108) -> tuple[int, bytes]:
2109 """Look up an object in a Git tree.
2111 Args:
2112 lookup_obj: Callback for retrieving object by SHA1
2113 root_sha: SHA1 of the root tree
2114 path: Path to lookup
2115 Returns: A tuple of (mode, SHA) of the resulting path.
2116 """
2117 tree = lookup_obj(root_sha)
2118 if not isinstance(tree, Tree):
2119 raise NotTreeError(root_sha)
2120 return tree.lookup_path(lookup_obj, path)
2123def _collect_filetree_revs(
2124 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID]
2125) -> None:
2126 """Collect SHA1s of files and directories for specified tree.
2128 Args:
2129 obj_store: Object store to get objects by SHA from
2130 tree_sha: tree reference to walk
2131 kset: set to fill with references to files and directories
2132 """
2133 filetree = obj_store[tree_sha]
2134 assert isinstance(filetree, Tree)
2135 for name, mode, sha in filetree.iteritems():
2136 assert mode is not None
2137 assert sha is not None
2138 if not S_ISGITLINK(mode) and sha not in kset:
2139 kset.add(sha)
2140 if stat.S_ISDIR(mode):
2141 _collect_filetree_revs(obj_store, sha, kset)
2144def _split_commits_and_tags(
2145 obj_store: ObjectContainer, lst: Iterable[bytes], *, ignore_unknown: bool = False
2146) -> tuple[set[bytes], set[bytes], set[bytes]]:
2147 """Split object id list into three lists with commit, tag, and other SHAs.
2149 Commits referenced by tags are included into commits
2150 list as well. Only SHA1s known in this repository will get
2151 through, and unless ignore_unknown argument is True, KeyError
2152 is thrown for SHA1 missing in the repository
2154 Args:
2155 obj_store: Object store to get objects by SHA1 from
2156 lst: Collection of commit and tag SHAs
2157 ignore_unknown: True to skip SHA1 missing in the repository
2158 silently.
2159 Returns: A tuple of (commits, tags, others) SHA1s
2160 """
2161 commits: set[bytes] = set()
2162 tags: set[bytes] = set()
2163 others: set[bytes] = set()
2164 for e in lst:
2165 try:
2166 o = obj_store[e]
2167 except KeyError:
2168 if not ignore_unknown:
2169 raise
2170 else:
2171 if isinstance(o, Commit):
2172 commits.add(e)
2173 elif isinstance(o, Tag):
2174 tags.add(e)
2175 tagged = o.object[1]
2176 c, t, os = _split_commits_and_tags(
2177 obj_store, [tagged], ignore_unknown=ignore_unknown
2178 )
2179 commits |= c
2180 tags |= t
2181 others |= os
2182 else:
2183 others.add(e)
2184 return (commits, tags, others)
2187class MissingObjectFinder:
2188 """Find the objects missing from another object store.
2190 Args:
2191 object_store: Object store containing at least all objects to be
2192 sent
2193 haves: SHA1s of commits not to send (already present in target)
2194 wants: SHA1s of commits to send
2195 progress: Optional function to report progress to.
2196 get_tagged: Function that returns a dict of pointed-to sha -> tag
2197 sha for including tags.
2198 get_parents: Optional function for getting the parents of a commit.
2199 """
2201 def __init__(
2202 self,
2203 object_store: BaseObjectStore,
2204 haves: Iterable[bytes],
2205 wants: Iterable[bytes],
2206 *,
2207 shallow: Optional[Set[bytes]] = None,
2208 progress: Optional[Callable[[bytes], None]] = None,
2209 get_tagged: Optional[Callable[[], dict[bytes, bytes]]] = None,
2210 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents,
2211 ) -> None:
2212 """Initialize a MissingObjectFinder.
2214 Args:
2215 object_store: Object store containing objects
2216 haves: SHA1s of objects already present in target
2217 wants: SHA1s of objects to send
2218 shallow: Set of shallow commit SHA1s
2219 progress: Optional progress reporting callback
2220 get_tagged: Function returning dict of pointed-to sha -> tag sha
2221 get_parents: Function for getting commit parents
2222 """
2223 self.object_store = object_store
2224 if shallow is None:
2225 shallow = set()
2226 self._get_parents = get_parents
2227 # process Commits and Tags differently
2228 # Note, while haves may list commits/tags not available locally,
2229 # and such SHAs would get filtered out by _split_commits_and_tags,
2230 # wants shall list only known SHAs, and otherwise
2231 # _split_commits_and_tags fails with KeyError
2232 have_commits, have_tags, have_others = _split_commits_and_tags(
2233 object_store, haves, ignore_unknown=True
2234 )
2235 want_commits, want_tags, want_others = _split_commits_and_tags(
2236 object_store, wants, ignore_unknown=False
2237 )
2238 # all_ancestors is a set of commits that shall not be sent
2239 # (complete repository up to 'haves')
2240 all_ancestors = _collect_ancestors(
2241 object_store,
2242 have_commits,
2243 shallow=frozenset(shallow),
2244 get_parents=self._get_parents,
2245 )[0]
2246 # all_missing - complete set of commits between haves and wants
2247 # common - commits from all_ancestors we hit into while
2248 # traversing parent hierarchy of wants
2249 missing_commits, common_commits = _collect_ancestors(
2250 object_store,
2251 want_commits,
2252 frozenset(all_ancestors),
2253 shallow=frozenset(shallow),
2254 get_parents=self._get_parents,
2255 )
2256 self.remote_has: set[bytes] = set()
2257 # Now, fill sha_done with commits and revisions of
2258 # files and directories known to be both locally
2259 # and on target. Thus these commits and files
2260 # won't get selected for fetch
2261 for h in common_commits:
2262 self.remote_has.add(h)
2263 cmt = object_store[h]
2264 assert isinstance(cmt, Commit)
2265 _collect_filetree_revs(object_store, cmt.tree, self.remote_has)
2266 # record tags we have as visited, too
2267 for t in have_tags:
2268 self.remote_has.add(t)
2269 self.sha_done = set(self.remote_has)
2271 # in fact, what we 'want' is commits, tags, and others
2272 # we've found missing
2273 self.objects_to_send: set[
2274 tuple[ObjectID, Optional[bytes], Optional[int], bool]
2275 ] = {(w, None, Commit.type_num, False) for w in missing_commits}
2276 missing_tags = want_tags.difference(have_tags)
2277 self.objects_to_send.update(
2278 {(w, None, Tag.type_num, False) for w in missing_tags}
2279 )
2280 missing_others = want_others.difference(have_others)
2281 self.objects_to_send.update({(w, None, None, False) for w in missing_others})
2283 if progress is None:
2284 self.progress: Callable[[bytes], None] = lambda x: None
2285 else:
2286 self.progress = progress
2287 self._tagged = (get_tagged and get_tagged()) or {}
2289 def get_remote_has(self) -> set[bytes]:
2290 """Get the set of SHAs the remote has.
2292 Returns:
2293 Set of SHA1s that the remote side already has
2294 """
2295 return self.remote_has
2297 def add_todo(
2298 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]]
2299 ) -> None:
2300 """Add objects to the todo list.
2302 Args:
2303 entries: Iterable of tuples (sha, name, type_num, is_leaf)
2304 """
2305 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done])
2307 def __next__(self) -> tuple[bytes, Optional[PackHint]]:
2308 """Get the next object to send.
2310 Returns:
2311 Tuple of (sha, pack_hint)
2313 Raises:
2314 StopIteration: When no more objects to send
2315 """
2316 while True:
2317 if not self.objects_to_send:
2318 self.progress(
2319 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii")
2320 )
2321 raise StopIteration
2322 (sha, name, type_num, leaf) = self.objects_to_send.pop()
2323 if sha not in self.sha_done:
2324 break
2325 if not leaf:
2326 o = self.object_store[sha]
2327 if isinstance(o, Commit):
2328 self.add_todo([(o.tree, b"", Tree.type_num, False)])
2329 elif isinstance(o, Tree):
2330 todos = []
2331 for n, m, s in o.iteritems():
2332 assert m is not None
2333 assert n is not None
2334 assert s is not None
2335 if not S_ISGITLINK(m):
2336 todos.append(
2337 (
2338 s,
2339 n,
2340 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num),
2341 not stat.S_ISDIR(m),
2342 )
2343 )
2344 self.add_todo(todos)
2345 elif isinstance(o, Tag):
2346 self.add_todo([(o.object[1], None, o.object[0].type_num, False)])
2347 if sha in self._tagged:
2348 self.add_todo([(self._tagged[sha], None, None, True)])
2349 self.sha_done.add(sha)
2350 if len(self.sha_done) % 1000 == 0:
2351 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii"))
2352 if type_num is None:
2353 pack_hint = None
2354 else:
2355 pack_hint = (type_num, name)
2356 return (sha, pack_hint)
2358 def __iter__(self) -> Iterator[tuple[bytes, Optional[PackHint]]]:
2359 """Return iterator over objects to send.
2361 Returns:
2362 Self (this class implements the iterator protocol)
2363 """
2364 return self
2367class ObjectStoreGraphWalker:
2368 """Graph walker that finds what commits are missing from an object store."""
2370 heads: set[ObjectID]
2371 """Revisions without descendants in the local repo."""
2373 get_parents: Callable[[ObjectID], list[ObjectID]]
2374 """Function to retrieve parents in the local repo."""
2376 shallow: set[ObjectID]
2378 def __init__(
2379 self,
2380 local_heads: Iterable[ObjectID],
2381 get_parents: Callable[[ObjectID], list[ObjectID]],
2382 shallow: Optional[set[ObjectID]] = None,
2383 update_shallow: Optional[
2384 Callable[[Optional[set[ObjectID]], Optional[set[ObjectID]]], None]
2385 ] = None,
2386 ) -> None:
2387 """Create a new instance.
2389 Args:
2390 local_heads: Heads to start search with
2391 get_parents: Function for finding the parents of a SHA1.
2392 shallow: Set of shallow commits.
2393 update_shallow: Function to update shallow commits.
2394 """
2395 self.heads = set(local_heads)
2396 self.get_parents = get_parents
2397 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {}
2398 if shallow is None:
2399 shallow = set()
2400 self.shallow = shallow
2401 self.update_shallow = update_shallow
2403 def nak(self) -> None:
2404 """Nothing in common was found."""
2406 def ack(self, sha: ObjectID) -> None:
2407 """Ack that a revision and its ancestors are present in the source."""
2408 if len(sha) != 40:
2409 raise ValueError(f"unexpected sha {sha!r} received")
2410 ancestors = {sha}
2412 # stop if we run out of heads to remove
2413 while self.heads:
2414 for a in ancestors:
2415 if a in self.heads:
2416 self.heads.remove(a)
2418 # collect all ancestors
2419 new_ancestors = set()
2420 for a in ancestors:
2421 ps = self.parents.get(a)
2422 if ps is not None:
2423 new_ancestors.update(ps)
2424 self.parents[a] = None
2426 # no more ancestors; stop
2427 if not new_ancestors:
2428 break
2430 ancestors = new_ancestors
2432 def next(self) -> Optional[ObjectID]:
2433 """Iterate over ancestors of heads in the target."""
2434 if self.heads:
2435 ret = self.heads.pop()
2436 try:
2437 ps = self.get_parents(ret)
2438 except KeyError:
2439 return None
2440 self.parents[ret] = ps
2441 self.heads.update([p for p in ps if p not in self.parents])
2442 return ret
2443 return None
2445 __next__ = next
2448def commit_tree_changes(
2449 object_store: BaseObjectStore,
2450 tree: Union[ObjectID, Tree],
2451 changes: Sequence[tuple[bytes, Optional[int], Optional[bytes]]],
2452) -> ObjectID:
2453 """Commit a specified set of changes to a tree structure.
2455 This will apply a set of changes on top of an existing tree, storing new
2456 objects in object_store.
2458 changes are a list of tuples with (path, mode, object_sha).
2459 Paths can be both blobs and trees. See the mode and
2460 object sha to None deletes the path.
2462 This method works especially well if there are only a small
2463 number of changes to a big tree. For a large number of changes
2464 to a large tree, use e.g. commit_tree.
2466 Args:
2467 object_store: Object store to store new objects in
2468 and retrieve old ones from.
2469 tree: Original tree root (SHA or Tree object)
2470 changes: changes to apply
2471 Returns: New tree root object
2472 """
2473 # TODO(jelmer): Save up the objects and add them using .add_objects
2474 # rather than with individual calls to .add_object.
2475 # Handle both Tree object and SHA
2476 if isinstance(tree, Tree):
2477 tree_obj: Tree = tree
2478 else:
2479 sha_obj = object_store[tree]
2480 assert isinstance(sha_obj, Tree)
2481 tree_obj = sha_obj
2482 nested_changes: dict[bytes, list[tuple[bytes, Optional[int], Optional[bytes]]]] = {}
2483 for path, new_mode, new_sha in changes:
2484 try:
2485 (dirname, subpath) = path.split(b"/", 1)
2486 except ValueError:
2487 if new_sha is None:
2488 del tree_obj[path]
2489 else:
2490 assert new_mode is not None
2491 tree_obj[path] = (new_mode, new_sha)
2492 else:
2493 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha))
2494 for name, subchanges in nested_changes.items():
2495 try:
2496 orig_subtree_id: Union[bytes, Tree] = tree_obj[name][1]
2497 except KeyError:
2498 # For new directories, pass an empty Tree object
2499 orig_subtree_id = Tree()
2500 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges)
2501 subtree = object_store[subtree_id]
2502 assert isinstance(subtree, Tree)
2503 if len(subtree) == 0:
2504 del tree_obj[name]
2505 else:
2506 tree_obj[name] = (stat.S_IFDIR, subtree.id)
2507 object_store.add_object(tree_obj)
2508 return tree_obj.id
2511class OverlayObjectStore(BaseObjectStore):
2512 """Object store that can overlay multiple object stores."""
2514 def __init__(
2515 self,
2516 bases: list[BaseObjectStore],
2517 add_store: Optional[BaseObjectStore] = None,
2518 ) -> None:
2519 """Initialize an OverlayObjectStore.
2521 Args:
2522 bases: List of base object stores to overlay
2523 add_store: Optional store to write new objects to
2524 """
2525 self.bases = bases
2526 self.add_store = add_store
2528 def add_object(self, object: ShaFile) -> None:
2529 """Add a single object to the store.
2531 Args:
2532 object: Object to add
2534 Raises:
2535 NotImplementedError: If no add_store was provided
2536 """
2537 if self.add_store is None:
2538 raise NotImplementedError(self.add_object)
2539 return self.add_store.add_object(object)
2541 def add_objects(
2542 self,
2543 objects: Sequence[tuple[ShaFile, Optional[str]]],
2544 progress: Optional[Callable[[str], None]] = None,
2545 ) -> Optional[Pack]:
2546 """Add multiple objects to the store.
2548 Args:
2549 objects: Iterator of objects to add
2550 progress: Optional progress reporting callback
2552 Raises:
2553 NotImplementedError: If no add_store was provided
2554 """
2555 if self.add_store is None:
2556 raise NotImplementedError(self.add_object)
2557 return self.add_store.add_objects(objects, progress)
2559 @property
2560 def packs(self) -> list[Pack]:
2561 """Get the list of packs from all overlaid stores.
2563 Returns:
2564 Combined list of packs from all base stores
2565 """
2566 ret = []
2567 for b in self.bases:
2568 ret.extend(b.packs)
2569 return ret
2571 def __iter__(self) -> Iterator[ObjectID]:
2572 """Iterate over all object SHAs in the overlaid stores.
2574 Returns:
2575 Iterator of object SHAs (deduped across stores)
2576 """
2577 done = set()
2578 for b in self.bases:
2579 for o_id in b:
2580 if o_id not in done:
2581 yield o_id
2582 done.add(o_id)
2584 def iterobjects_subset(
2585 self, shas: Iterable[bytes], *, allow_missing: bool = False
2586 ) -> Iterator[ShaFile]:
2587 """Iterate over a subset of objects from the overlaid stores.
2589 Args:
2590 shas: Iterable of object SHAs to retrieve
2591 allow_missing: If True, skip missing objects; if False, raise KeyError
2593 Returns:
2594 Iterator of ShaFile objects
2596 Raises:
2597 KeyError: If an object is missing and allow_missing is False
2598 """
2599 todo = set(shas)
2600 found: set[bytes] = set()
2602 for b in self.bases:
2603 # Create a copy of todo for each base to avoid modifying
2604 # the set while iterating through it
2605 current_todo = todo - found
2606 for o in b.iterobjects_subset(current_todo, allow_missing=True):
2607 yield o
2608 found.add(o.id)
2610 # Check for any remaining objects not found
2611 missing = todo - found
2612 if missing and not allow_missing:
2613 raise KeyError(next(iter(missing)))
2615 def iter_unpacked_subset(
2616 self,
2617 shas: Iterable[bytes],
2618 include_comp: bool = False,
2619 allow_missing: bool = False,
2620 convert_ofs_delta: bool = True,
2621 ) -> Iterator[UnpackedObject]:
2622 """Iterate over unpacked objects from the overlaid stores.
2624 Args:
2625 shas: Iterable of object SHAs to retrieve
2626 include_comp: Whether to include compressed data
2627 allow_missing: If True, skip missing objects; if False, raise KeyError
2628 convert_ofs_delta: Whether to convert OFS_DELTA objects
2630 Returns:
2631 Iterator of unpacked objects
2633 Raises:
2634 KeyError: If an object is missing and allow_missing is False
2635 """
2636 todo = set(shas)
2637 for b in self.bases:
2638 for o in b.iter_unpacked_subset(
2639 todo,
2640 include_comp=include_comp,
2641 allow_missing=True,
2642 convert_ofs_delta=convert_ofs_delta,
2643 ):
2644 yield o
2645 todo.remove(o.sha())
2646 if todo and not allow_missing:
2647 raise KeyError(next(iter(todo)))
2649 def get_raw(self, sha_id: ObjectID) -> tuple[int, bytes]:
2650 """Get the raw object data from the overlaid stores.
2652 Args:
2653 sha_id: SHA of the object
2655 Returns:
2656 Tuple of (type_num, raw_data)
2658 Raises:
2659 KeyError: If object not found in any base store
2660 """
2661 for b in self.bases:
2662 try:
2663 return b.get_raw(sha_id)
2664 except KeyError:
2665 pass
2666 raise KeyError(sha_id)
2668 def contains_packed(self, sha: bytes) -> bool:
2669 """Check if an object is packed in any base store.
2671 Args:
2672 sha: SHA of the object
2674 Returns:
2675 True if object is packed in any base store
2676 """
2677 for b in self.bases:
2678 if b.contains_packed(sha):
2679 return True
2680 return False
2682 def contains_loose(self, sha: bytes) -> bool:
2683 """Check if an object is loose in any base store.
2685 Args:
2686 sha: SHA of the object
2688 Returns:
2689 True if object is loose in any base store
2690 """
2691 for b in self.bases:
2692 if b.contains_loose(sha):
2693 return True
2694 return False
2697def read_packs_file(f: BinaryIO) -> Iterator[str]:
2698 """Yield the packs listed in a packs file."""
2699 for line in f.read().splitlines():
2700 if not line:
2701 continue
2702 (kind, name) = line.split(b" ", 1)
2703 if kind != b"P":
2704 continue
2705 yield os.fsdecode(name)
2708class BucketBasedObjectStore(PackBasedObjectStore):
2709 """Object store implementation that uses a bucket store like S3 as backend."""
2711 def _iter_loose_objects(self) -> Iterator[bytes]:
2712 """Iterate over the SHAs of all loose objects."""
2713 return iter([])
2715 def _get_loose_object(self, sha: bytes) -> None:
2716 return None
2718 def delete_loose_object(self, sha: bytes) -> None:
2719 """Delete a loose object (no-op for bucket stores).
2721 Bucket-based stores don't have loose objects, so this is a no-op.
2723 Args:
2724 sha: SHA of the object to delete
2725 """
2726 # Doesn't exist..
2728 def pack_loose_objects(
2729 self, progress: Optional[Callable[[str], None]] = None
2730 ) -> int:
2731 """Pack loose objects. Returns number of objects packed.
2733 BucketBasedObjectStore doesn't support loose objects, so this is a no-op.
2735 Args:
2736 progress: Optional progress reporting callback (ignored)
2737 """
2738 return 0
2740 def _remove_pack_by_name(self, name: str) -> None:
2741 """Remove a pack by name. Subclasses should implement this."""
2742 raise NotImplementedError(self._remove_pack_by_name)
2744 def _iter_pack_names(self) -> Iterator[str]:
2745 raise NotImplementedError(self._iter_pack_names)
2747 def _get_pack(self, name: str) -> Pack:
2748 raise NotImplementedError(self._get_pack)
2750 def _update_pack_cache(self) -> list[Pack]:
2751 pack_files = set(self._iter_pack_names())
2753 # Open newly appeared pack files
2754 new_packs = []
2755 for f in pack_files:
2756 if f not in self._pack_cache:
2757 pack = self._get_pack(f)
2758 new_packs.append(pack)
2759 self._pack_cache[f] = pack
2760 # Remove disappeared pack files
2761 for f in set(self._pack_cache) - pack_files:
2762 self._pack_cache.pop(f).close()
2763 return new_packs
2765 def _upload_pack(
2766 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO
2767 ) -> None:
2768 raise NotImplementedError
2770 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
2771 """Add a new pack to this object store.
2773 Returns: Fileobject to write to, a commit function to
2774 call when the pack is finished and an abort
2775 function.
2776 """
2777 import tempfile
2779 pf = tempfile.SpooledTemporaryFile(
2780 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
2781 )
2783 def commit() -> Optional[Pack]:
2784 if pf.tell() == 0:
2785 pf.close()
2786 return None
2788 pf.seek(0)
2790 p = PackData(pf.name, pf)
2791 entries = p.sorted_entries()
2792 basename = iter_sha1(entry[0] for entry in entries).decode("ascii")
2793 idxf = tempfile.SpooledTemporaryFile(
2794 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
2795 )
2796 checksum = p.get_stored_checksum()
2797 write_pack_index(idxf, entries, checksum, version=self.pack_index_version)
2798 idxf.seek(0)
2799 idx = load_pack_index_file(basename + ".idx", idxf)
2800 for pack in self.packs:
2801 if pack.get_stored_checksum() == p.get_stored_checksum():
2802 p.close()
2803 idx.close()
2804 pf.close()
2805 idxf.close()
2806 return pack
2807 pf.seek(0)
2808 idxf.seek(0)
2809 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type]
2810 final_pack = Pack.from_objects(p, idx)
2811 self._add_cached_pack(basename, final_pack)
2812 pf.close()
2813 idxf.close()
2814 return final_pack
2816 return pf, commit, pf.close # type: ignore[return-value]
2819def _collect_ancestors(
2820 store: ObjectContainer,
2821 heads: Iterable[ObjectID],
2822 common: frozenset[ObjectID] = frozenset(),
2823 shallow: frozenset[ObjectID] = frozenset(),
2824 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents,
2825) -> tuple[set[ObjectID], set[ObjectID]]:
2826 """Collect all ancestors of heads up to (excluding) those in common.
2828 Args:
2829 store: Object store to get commits from
2830 heads: commits to start from
2831 common: commits to end at, or empty set to walk repository
2832 completely
2833 shallow: Set of shallow commits
2834 get_parents: Optional function for getting the parents of a
2835 commit.
2836 Returns: a tuple (A, B) where A - all commits reachable
2837 from heads but not present in common, B - common (shared) elements
2838 that are directly reachable from heads
2839 """
2840 bases = set()
2841 commits = set()
2842 queue: list[ObjectID] = []
2843 queue.extend(heads)
2845 # Try to use commit graph if available
2846 commit_graph = store.get_commit_graph()
2848 while queue:
2849 e = queue.pop(0)
2850 if e in common:
2851 bases.add(e)
2852 elif e not in commits:
2853 commits.add(e)
2854 if e in shallow:
2855 continue
2857 # Try to use commit graph for parent lookup
2858 parents = None
2859 if commit_graph:
2860 parents = commit_graph.get_parents(e)
2862 if parents is None:
2863 # Fall back to loading the object
2864 cmt = store[e]
2865 assert isinstance(cmt, Commit)
2866 parents = get_parents(cmt)
2868 queue.extend(parents)
2869 return (commits, bases)
2872def iter_tree_contents(
2873 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False
2874) -> Iterator[TreeEntry]:
2875 """Iterate the contents of a tree and all subtrees.
2877 Iteration is depth-first pre-order, as in e.g. os.walk.
2879 Args:
2880 store: Object store to get trees from
2881 tree_id: SHA1 of the tree.
2882 include_trees: If True, include tree objects in the iteration.
2884 Yields: TreeEntry namedtuples for all the objects in a tree.
2885 """
2886 if tree_id is None:
2887 return
2888 # This could be fairly easily generalized to >2 trees if we find a use
2889 # case.
2890 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]
2891 while todo:
2892 entry = todo.pop()
2893 assert entry.mode is not None
2894 if stat.S_ISDIR(entry.mode):
2895 extra = []
2896 assert entry.sha is not None
2897 tree = store[entry.sha]
2898 assert isinstance(tree, Tree)
2899 for subentry in tree.iteritems(name_order=True):
2900 assert entry.path is not None
2901 extra.append(subentry.in_path(entry.path))
2902 todo.extend(reversed(extra))
2903 if not stat.S_ISDIR(entry.mode) or include_trees:
2904 yield entry
2907def iter_commit_contents(
2908 store: ObjectContainer,
2909 commit: Union[Commit, bytes],
2910 *,
2911 include: Optional[Sequence[Union[str, bytes, Path]]] = None,
2912) -> Iterator[TreeEntry]:
2913 """Iterate the contents of the repository at the specified commit.
2915 This is a wrapper around iter_tree_contents() and
2916 tree_lookup_path() to simplify the common task of getting the
2917 contest of a repo at a particular commit. See also
2918 dulwich.index.build_file_from_blob() for writing individual files
2919 to disk.
2921 Args:
2922 store: Object store to get trees from
2923 commit: Commit object, or SHA1 of a commit
2924 include: if provided, only the entries whose paths are in the
2925 list, or whose parent tree is in the list, will be
2926 included. Note that duplicate or overlapping paths
2927 (e.g. ["foo", "foo/bar"]) may result in duplicate entries
2929 Yields: TreeEntry namedtuples for all matching files in a commit.
2930 """
2931 sha = commit.id if isinstance(commit, Commit) else commit
2932 if not isinstance(obj := store[sha], Commit):
2933 raise TypeError(
2934 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}"
2935 )
2936 commit = obj
2937 encoding = commit.encoding or "utf-8"
2938 include_bytes: list[bytes] = (
2939 [
2940 path if isinstance(path, bytes) else str(path).encode(encoding)
2941 for path in include
2942 ]
2943 if include is not None
2944 else [b""]
2945 )
2947 for path in include_bytes:
2948 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path)
2949 # Iterate all contained files if path points to a dir, otherwise just get that
2950 # single file
2951 if isinstance(store[obj_id], Tree):
2952 for entry in iter_tree_contents(store, obj_id):
2953 yield entry.in_path(path)
2954 else:
2955 yield TreeEntry(path, mode, obj_id)
2958def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]:
2959 """Peel all tags from a SHA.
2961 Args:
2962 store: Object store to get objects from
2963 sha: The object SHA to peel.
2964 Returns: The fully-peeled SHA1 of a tag object, after peeling all
2965 intermediate tags; if the original ref does not point to a tag,
2966 this will equal the original SHA1.
2967 """
2968 unpeeled = obj = store[sha]
2969 obj_class = object_class(obj.type_name)
2970 while obj_class is Tag:
2971 assert isinstance(obj, Tag)
2972 obj_class, sha = obj.object
2973 obj = store[sha]
2974 return unpeeled, obj