Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# object_store.py -- Object store for git objects
2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
3# and others
4#
5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
7# General Public License as published by the Free Software Foundation; version 2.0
8# or (at your option) any later version. You can redistribute it and/or
9# modify it under the terms of either of these two licenses.
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# You should have received a copy of the licenses; if not, see
18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
20# License, Version 2.0.
21#
24"""Git object store interfaces and implementation."""
26import binascii
27import os
28import stat
29import sys
30import time
31import warnings
32from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set
33from contextlib import suppress
34from io import BytesIO
35from pathlib import Path
36from typing import (
37 TYPE_CHECKING,
38 BinaryIO,
39 Optional,
40 Protocol,
41)
43from .errors import NotTreeError
44from .file import GitFile, _GitFile
45from .objects import (
46 S_ISGITLINK,
47 ZERO_SHA,
48 Blob,
49 Commit,
50 ObjectID,
51 ShaFile,
52 Tag,
53 Tree,
54 TreeEntry,
55 hex_to_filename,
56 hex_to_sha,
57 object_class,
58 sha_to_hex,
59 valid_hexsha,
60)
61from .pack import (
62 PACK_SPOOL_FILE_MAX_SIZE,
63 ObjectContainer,
64 Pack,
65 PackData,
66 PackedObjectContainer,
67 PackFileDisappeared,
68 PackHint,
69 PackIndexer,
70 PackInflater,
71 PackStreamCopier,
72 UnpackedObject,
73 extend_pack,
74 full_unpacked_object,
75 generate_unpacked_objects,
76 iter_sha1,
77 load_pack_index_file,
78 pack_objects_to_data,
79 write_pack_data,
80 write_pack_index,
81)
82from .protocol import DEPTH_INFINITE
83from .refs import PEELED_TAG_SUFFIX, Ref
85if TYPE_CHECKING:
86 from .commit_graph import CommitGraph
87 from .config import Config
88 from .diff_tree import RenameDetector
91class GraphWalker(Protocol):
92 """Protocol for graph walker objects."""
94 def __next__(self) -> bytes | None:
95 """Return the next object SHA to visit."""
96 ...
98 def ack(self, sha: bytes) -> None:
99 """Acknowledge that an object has been received."""
100 ...
102 def nak(self) -> None:
103 """Nothing in common was found."""
104 ...
107INFODIR = "info"
108PACKDIR = "pack"
110# use permissions consistent with Git; just readable by everyone
111# TODO: should packs also be non-writable on Windows? if so, that
112# would requite some rather significant adjustments to the test suite
113PACK_MODE = 0o444 if sys.platform != "win32" else 0o644
115# Grace period for cleaning up temporary pack files (in seconds)
116# Matches git's default of 2 weeks
117DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks
120def find_shallow(
121 store: ObjectContainer, heads: Iterable[bytes], depth: int
122) -> tuple[set[bytes], set[bytes]]:
123 """Find shallow commits according to a given depth.
125 Args:
126 store: An ObjectStore for looking up objects.
127 heads: Iterable of head SHAs to start walking from.
128 depth: The depth of ancestors to include. A depth of one includes
129 only the heads themselves.
130 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be
131 considered shallow and unshallow according to the arguments. Note that
132 these sets may overlap if a commit is reachable along multiple paths.
133 """
134 parents: dict[bytes, list[bytes]] = {}
135 commit_graph = store.get_commit_graph()
137 def get_parents(sha: bytes) -> list[bytes]:
138 result = parents.get(sha, None)
139 if not result:
140 # Try to use commit graph first if available
141 if commit_graph:
142 graph_parents = commit_graph.get_parents(sha)
143 if graph_parents is not None:
144 result = graph_parents
145 parents[sha] = result
146 return result
147 # Fall back to loading the object
148 commit = store[sha]
149 assert isinstance(commit, Commit)
150 result = commit.parents
151 parents[sha] = result
152 return result
154 todo = [] # stack of (sha, depth)
155 for head_sha in heads:
156 obj = store[head_sha]
157 # Peel tags if necessary
158 while isinstance(obj, Tag):
159 _, sha = obj.object
160 obj = store[sha]
161 if isinstance(obj, Commit):
162 todo.append((obj.id, 1))
164 not_shallow = set()
165 shallow = set()
166 while todo:
167 sha, cur_depth = todo.pop()
168 if cur_depth < depth:
169 not_shallow.add(sha)
170 new_depth = cur_depth + 1
171 todo.extend((p, new_depth) for p in get_parents(sha))
172 else:
173 shallow.add(sha)
175 return shallow, not_shallow
178def get_depth(
179 store: ObjectContainer,
180 head: bytes,
181 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents,
182 max_depth: int | None = None,
183) -> int:
184 """Return the current available depth for the given head.
186 For commits with multiple parents, the largest possible depth will be
187 returned.
189 Args:
190 store: Object store to search in
191 head: commit to start from
192 get_parents: optional function for getting the parents of a commit
193 max_depth: maximum depth to search
194 """
195 if head not in store:
196 return 0
197 current_depth = 1
198 queue = [(head, current_depth)]
199 commit_graph = store.get_commit_graph()
201 while queue and (max_depth is None or current_depth < max_depth):
202 e, depth = queue.pop(0)
203 current_depth = max(current_depth, depth)
205 # Try to use commit graph for parent lookup if available
206 parents = None
207 if commit_graph:
208 parents = commit_graph.get_parents(e)
210 if parents is None:
211 # Fall back to loading the object
212 cmt = store[e]
213 if isinstance(cmt, Tag):
214 _cls, sha = cmt.object
215 cmt = store[sha]
216 parents = get_parents(cmt)
218 queue.extend((parent, depth + 1) for parent in parents if parent in store)
219 return current_depth
222class PackContainer(Protocol):
223 """Protocol for containers that can accept pack files."""
225 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:
226 """Add a new pack."""
229class BaseObjectStore:
230 """Object store interface."""
232 def determine_wants_all(
233 self, refs: Mapping[Ref, ObjectID], depth: int | None = None
234 ) -> list[ObjectID]:
235 """Determine which objects are wanted based on refs."""
237 def _want_deepen(sha: bytes) -> bool:
238 if not depth:
239 return False
240 if depth == DEPTH_INFINITE:
241 return True
242 return depth > self._get_depth(sha)
244 return [
245 sha
246 for (ref, sha) in refs.items()
247 if (sha not in self or _want_deepen(sha))
248 and not ref.endswith(PEELED_TAG_SUFFIX)
249 and not sha == ZERO_SHA
250 ]
252 def contains_loose(self, sha: bytes) -> bool:
253 """Check if a particular object is present by SHA1 and is loose."""
254 raise NotImplementedError(self.contains_loose)
256 def contains_packed(self, sha: bytes) -> bool:
257 """Check if a particular object is present by SHA1 and is packed."""
258 return False # Default implementation for stores that don't support packing
260 def __contains__(self, sha1: bytes) -> bool:
261 """Check if a particular object is present by SHA1.
263 This method makes no distinction between loose and packed objects.
264 """
265 return self.contains_loose(sha1)
267 @property
268 def packs(self) -> list[Pack]:
269 """Iterable of pack objects."""
270 raise NotImplementedError
272 def get_raw(self, name: bytes) -> tuple[int, bytes]:
273 """Obtain the raw text for an object.
275 Args:
276 name: sha for the object.
277 Returns: tuple with numeric type and object contents.
278 """
279 raise NotImplementedError(self.get_raw)
281 def __getitem__(self, sha1: ObjectID) -> ShaFile:
282 """Obtain an object by SHA1."""
283 type_num, uncomp = self.get_raw(sha1)
284 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1)
286 def __iter__(self) -> Iterator[bytes]:
287 """Iterate over the SHAs that are present in this store."""
288 raise NotImplementedError(self.__iter__)
290 def add_object(self, obj: ShaFile) -> None:
291 """Add a single object to this object store."""
292 raise NotImplementedError(self.add_object)
294 def add_objects(
295 self,
296 objects: Sequence[tuple[ShaFile, str | None]],
297 progress: Callable[..., None] | None = None,
298 ) -> Optional["Pack"]:
299 """Add a set of objects to this object store.
301 Args:
302 objects: Iterable over a list of (object, path) tuples
303 progress: Optional progress callback
304 """
305 raise NotImplementedError(self.add_objects)
307 def tree_changes(
308 self,
309 source: bytes | None,
310 target: bytes | None,
311 want_unchanged: bool = False,
312 include_trees: bool = False,
313 change_type_same: bool = False,
314 rename_detector: Optional["RenameDetector"] = None,
315 paths: Sequence[bytes] | None = None,
316 ) -> Iterator[
317 tuple[
318 tuple[bytes | None, bytes | None],
319 tuple[int | None, int | None],
320 tuple[bytes | None, bytes | None],
321 ]
322 ]:
323 """Find the differences between the contents of two trees.
325 Args:
326 source: SHA1 of the source tree
327 target: SHA1 of the target tree
328 want_unchanged: Whether unchanged files should be reported
329 include_trees: Whether to include trees
330 change_type_same: Whether to report files changing
331 type in the same entry.
332 rename_detector: RenameDetector object for detecting renames.
333 paths: Optional list of paths to filter to (as bytes).
334 Returns: Iterator over tuples with
335 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
336 """
337 from .diff_tree import tree_changes
339 for change in tree_changes(
340 self,
341 source,
342 target,
343 want_unchanged=want_unchanged,
344 include_trees=include_trees,
345 change_type_same=change_type_same,
346 rename_detector=rename_detector,
347 paths=paths,
348 ):
349 old_path = change.old.path if change.old is not None else None
350 new_path = change.new.path if change.new is not None else None
351 old_mode = change.old.mode if change.old is not None else None
352 new_mode = change.new.mode if change.new is not None else None
353 old_sha = change.old.sha if change.old is not None else None
354 new_sha = change.new.sha if change.new is not None else None
355 yield (
356 (old_path, new_path),
357 (old_mode, new_mode),
358 (old_sha, new_sha),
359 )
361 def iter_tree_contents(
362 self, tree_id: bytes, include_trees: bool = False
363 ) -> Iterator[TreeEntry]:
364 """Iterate the contents of a tree and all subtrees.
366 Iteration is depth-first pre-order, as in e.g. os.walk.
368 Args:
369 tree_id: SHA1 of the tree.
370 include_trees: If True, include tree objects in the iteration.
371 Returns: Iterator over TreeEntry namedtuples for all the objects in a
372 tree.
373 """
374 warnings.warn(
375 "Please use dulwich.object_store.iter_tree_contents",
376 DeprecationWarning,
377 stacklevel=2,
378 )
379 return iter_tree_contents(self, tree_id, include_trees=include_trees)
381 def iterobjects_subset(
382 self, shas: Iterable[bytes], *, allow_missing: bool = False
383 ) -> Iterator[ShaFile]:
384 """Iterate over a subset of objects in the store.
386 Args:
387 shas: Iterable of object SHAs to retrieve
388 allow_missing: If True, skip missing objects; if False, raise KeyError
390 Returns:
391 Iterator of ShaFile objects
393 Raises:
394 KeyError: If an object is missing and allow_missing is False
395 """
396 for sha in shas:
397 try:
398 yield self[sha]
399 except KeyError:
400 if not allow_missing:
401 raise
403 def iter_unpacked_subset(
404 self,
405 shas: Iterable[bytes],
406 include_comp: bool = False,
407 allow_missing: bool = False,
408 convert_ofs_delta: bool = True,
409 ) -> "Iterator[UnpackedObject]":
410 """Iterate over unpacked objects for a subset of SHAs.
412 Default implementation that converts ShaFile objects to UnpackedObject.
413 Subclasses may override for more efficient unpacked access.
415 Args:
416 shas: Iterable of object SHAs to retrieve
417 include_comp: Whether to include compressed data (ignored in base implementation)
418 allow_missing: If True, skip missing objects; if False, raise KeyError
419 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in base implementation)
421 Returns:
422 Iterator of UnpackedObject instances
424 Raises:
425 KeyError: If an object is missing and allow_missing is False
426 """
427 from .pack import UnpackedObject
429 for sha in shas:
430 try:
431 obj = self[sha]
432 # Convert ShaFile to UnpackedObject
433 unpacked = UnpackedObject(
434 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id
435 )
436 yield unpacked
437 except KeyError:
438 if not allow_missing:
439 raise
441 def find_missing_objects(
442 self,
443 haves: Iterable[bytes],
444 wants: Iterable[bytes],
445 shallow: Set[bytes] | None = None,
446 progress: Callable[..., None] | None = None,
447 get_tagged: Callable[[], dict[bytes, bytes]] | None = None,
448 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents,
449 ) -> Iterator[tuple[bytes, PackHint | None]]:
450 """Find the missing objects required for a set of revisions.
452 Args:
453 haves: Iterable over SHAs already in common.
454 wants: Iterable over SHAs of objects to fetch.
455 shallow: Set of shallow commit SHA1s to skip
456 progress: Simple progress function that will be called with
457 updated progress strings.
458 get_tagged: Function that returns a dict of pointed-to sha ->
459 tag sha for including tags.
460 get_parents: Optional function for getting the parents of a
461 commit.
462 Returns: Iterator over (sha, path) pairs.
463 """
464 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning)
465 finder = MissingObjectFinder(
466 self,
467 haves=haves,
468 wants=wants,
469 shallow=shallow,
470 progress=progress,
471 get_tagged=get_tagged,
472 get_parents=get_parents,
473 )
474 return iter(finder)
476 def find_common_revisions(self, graphwalker: GraphWalker) -> list[bytes]:
477 """Find which revisions this store has in common using graphwalker.
479 Args:
480 graphwalker: A graphwalker object.
481 Returns: List of SHAs that are in common
482 """
483 haves = []
484 sha = next(graphwalker)
485 while sha:
486 if sha in self:
487 haves.append(sha)
488 graphwalker.ack(sha)
489 sha = next(graphwalker)
490 return haves
492 def generate_pack_data(
493 self,
494 have: Iterable[bytes],
495 want: Iterable[bytes],
496 *,
497 shallow: Set[bytes] | None = None,
498 progress: Callable[..., None] | None = None,
499 ofs_delta: bool = True,
500 ) -> tuple[int, Iterator[UnpackedObject]]:
501 """Generate pack data objects for a set of wants/haves.
503 Args:
504 have: List of SHA1s of objects that should not be sent
505 want: List of SHA1s of objects that should be sent
506 shallow: Set of shallow commit SHA1s to skip
507 ofs_delta: Whether OFS deltas can be included
508 progress: Optional progress reporting method
509 """
510 # Note that the pack-specific implementation below is more efficient,
511 # as it reuses deltas
512 missing_objects = MissingObjectFinder(
513 self, haves=have, wants=want, shallow=shallow, progress=progress
514 )
515 object_ids = list(missing_objects)
516 return pack_objects_to_data(
517 [(self[oid], path) for oid, path in object_ids],
518 ofs_delta=ofs_delta,
519 progress=progress,
520 )
522 def peel_sha(self, sha: bytes) -> bytes:
523 """Peel all tags from a SHA.
525 Args:
526 sha: The object SHA to peel.
527 Returns: The fully-peeled SHA1 of a tag object, after peeling all
528 intermediate tags; if the original ref does not point to a tag,
529 this will equal the original SHA1.
530 """
531 warnings.warn(
532 "Please use dulwich.object_store.peel_sha()",
533 DeprecationWarning,
534 stacklevel=2,
535 )
536 return peel_sha(self, sha)[1].id
538 def _get_depth(
539 self,
540 head: bytes,
541 get_parents: Callable[..., list[bytes]] = lambda commit: commit.parents,
542 max_depth: int | None = None,
543 ) -> int:
544 """Return the current available depth for the given head.
546 For commits with multiple parents, the largest possible depth will be
547 returned.
549 Args:
550 head: commit to start from
551 get_parents: optional function for getting the parents of a commit
552 max_depth: maximum depth to search
553 """
554 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth)
556 def close(self) -> None:
557 """Close any files opened by this object store."""
558 # Default implementation is a NO-OP
560 def prune(self, grace_period: int | None = None) -> None:
561 """Prune/clean up this object store.
563 This includes removing orphaned temporary files and other
564 housekeeping tasks. Default implementation is a NO-OP.
566 Args:
567 grace_period: Grace period in seconds for removing temporary files.
568 If None, uses the default grace period.
569 """
570 # Default implementation is a NO-OP
572 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
573 """Iterate over all SHA1s that start with a given prefix.
575 The default implementation is a naive iteration over all objects.
576 However, subclasses may override this method with more efficient
577 implementations.
578 """
579 for sha in self:
580 if sha.startswith(prefix):
581 yield sha
583 def get_commit_graph(self) -> Optional["CommitGraph"]:
584 """Get the commit graph for this object store.
586 Returns:
587 CommitGraph object if available, None otherwise
588 """
589 return None
591 def write_commit_graph(
592 self, refs: Sequence[bytes] | None = None, reachable: bool = True
593 ) -> None:
594 """Write a commit graph file for this object store.
596 Args:
597 refs: List of refs to include. If None, includes all refs from object store.
598 reachable: If True, includes all commits reachable from refs.
599 If False, only includes the direct ref targets.
601 Note:
602 Default implementation does nothing. Subclasses should override
603 this method to provide commit graph writing functionality.
604 """
605 raise NotImplementedError(self.write_commit_graph)
607 def get_object_mtime(self, sha: bytes) -> float:
608 """Get the modification time of an object.
610 Args:
611 sha: SHA1 of the object
613 Returns:
614 Modification time as seconds since epoch
616 Raises:
617 KeyError: if the object is not found
618 """
619 # Default implementation raises KeyError
620 # Subclasses should override to provide actual mtime
621 raise KeyError(sha)
624class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer):
625 """Object store that supports pack operations.
627 This is a base class for object stores that can handle pack files,
628 including both disk-based and memory-based stores.
629 """
631 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
632 """Add a new pack to this object store.
634 Returns: Tuple of (file, commit_func, abort_func)
635 """
636 raise NotImplementedError(self.add_pack)
638 def add_pack_data(
639 self,
640 count: int,
641 unpacked_objects: Iterator["UnpackedObject"],
642 progress: Callable[..., None] | None = None,
643 ) -> Optional["Pack"]:
644 """Add pack data to this object store.
646 Args:
647 count: Number of objects
648 unpacked_objects: Iterator over unpacked objects
649 progress: Optional progress callback
650 """
651 raise NotImplementedError(self.add_pack_data)
653 def get_unpacked_object(
654 self, sha1: bytes, *, include_comp: bool = False
655 ) -> "UnpackedObject":
656 """Get a raw unresolved object.
658 Args:
659 sha1: SHA-1 hash of the object
660 include_comp: Whether to include compressed data
662 Returns:
663 UnpackedObject instance
664 """
665 from .pack import UnpackedObject
667 obj = self[sha1]
668 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks())
670 def iterobjects_subset(
671 self, shas: Iterable[bytes], *, allow_missing: bool = False
672 ) -> Iterator[ShaFile]:
673 """Iterate over a subset of objects.
675 Args:
676 shas: Iterable of object SHAs to retrieve
677 allow_missing: If True, skip missing objects
679 Returns:
680 Iterator of ShaFile objects
681 """
682 for sha in shas:
683 try:
684 yield self[sha]
685 except KeyError:
686 if not allow_missing:
687 raise
690class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):
691 """Object store that uses pack files for storage.
693 This class provides a base implementation for object stores that use
694 Git pack files as their primary storage mechanism. It handles caching
695 of open pack files and provides configuration for pack file operations.
696 """
698 def __init__(
699 self,
700 pack_compression_level: int = -1,
701 pack_index_version: int | None = None,
702 pack_delta_window_size: int | None = None,
703 pack_window_memory: int | None = None,
704 pack_delta_cache_size: int | None = None,
705 pack_depth: int | None = None,
706 pack_threads: int | None = None,
707 pack_big_file_threshold: int | None = None,
708 ) -> None:
709 """Initialize a PackBasedObjectStore.
711 Args:
712 pack_compression_level: Compression level for pack files (-1 to 9)
713 pack_index_version: Pack index version to use
714 pack_delta_window_size: Window size for delta compression
715 pack_window_memory: Maximum memory to use for delta window
716 pack_delta_cache_size: Cache size for delta operations
717 pack_depth: Maximum depth for pack deltas
718 pack_threads: Number of threads to use for packing
719 pack_big_file_threshold: Threshold for treating files as "big"
720 """
721 self._pack_cache: dict[str, Pack] = {}
722 self.pack_compression_level = pack_compression_level
723 self.pack_index_version = pack_index_version
724 self.pack_delta_window_size = pack_delta_window_size
725 self.pack_window_memory = pack_window_memory
726 self.pack_delta_cache_size = pack_delta_cache_size
727 self.pack_depth = pack_depth
728 self.pack_threads = pack_threads
729 self.pack_big_file_threshold = pack_big_file_threshold
731 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
732 """Add a new pack to this object store."""
733 raise NotImplementedError(self.add_pack)
735 def add_pack_data(
736 self,
737 count: int,
738 unpacked_objects: Iterator[UnpackedObject],
739 progress: Callable[..., None] | None = None,
740 ) -> Optional["Pack"]:
741 """Add pack data to this object store.
743 Args:
744 count: Number of items to add
745 unpacked_objects: Iterator of UnpackedObject instances
746 progress: Optional progress callback
747 """
748 if count == 0:
749 # Don't bother writing an empty pack file
750 return None
751 f, commit, abort = self.add_pack()
752 try:
753 write_pack_data(
754 f.write,
755 unpacked_objects,
756 num_records=count,
757 progress=progress,
758 compression_level=self.pack_compression_level,
759 )
760 except BaseException:
761 abort()
762 raise
763 else:
764 return commit()
766 @property
767 def alternates(self) -> list["BaseObjectStore"]:
768 """Return list of alternate object stores."""
769 return []
771 def contains_packed(self, sha: bytes) -> bool:
772 """Check if a particular object is present by SHA1 and is packed.
774 This does not check alternates.
775 """
776 for pack in self.packs:
777 try:
778 if sha in pack:
779 return True
780 except PackFileDisappeared:
781 pass
782 return False
784 def __contains__(self, sha: bytes) -> bool:
785 """Check if a particular object is present by SHA1.
787 This method makes no distinction between loose and packed objects.
788 """
789 if self.contains_packed(sha) or self.contains_loose(sha):
790 return True
791 for alternate in self.alternates:
792 if sha in alternate:
793 return True
794 return False
796 def _add_cached_pack(self, base_name: str, pack: Pack) -> None:
797 """Add a newly appeared pack to the cache by path."""
798 prev_pack = self._pack_cache.get(base_name)
799 if prev_pack is not pack:
800 self._pack_cache[base_name] = pack
801 if prev_pack:
802 prev_pack.close()
804 def generate_pack_data(
805 self,
806 have: Iterable[bytes],
807 want: Iterable[bytes],
808 *,
809 shallow: Set[bytes] | None = None,
810 progress: Callable[..., None] | None = None,
811 ofs_delta: bool = True,
812 ) -> tuple[int, Iterator[UnpackedObject]]:
813 """Generate pack data objects for a set of wants/haves.
815 Args:
816 have: List of SHA1s of objects that should not be sent
817 want: List of SHA1s of objects that should be sent
818 shallow: Set of shallow commit SHA1s to skip
819 ofs_delta: Whether OFS deltas can be included
820 progress: Optional progress reporting method
821 """
822 missing_objects = MissingObjectFinder(
823 self, haves=have, wants=want, shallow=shallow, progress=progress
824 )
825 remote_has = missing_objects.get_remote_has()
826 object_ids = list(missing_objects)
827 return len(object_ids), generate_unpacked_objects(
828 self,
829 object_ids,
830 progress=progress,
831 ofs_delta=ofs_delta,
832 other_haves=remote_has,
833 )
835 def _clear_cached_packs(self) -> None:
836 pack_cache = self._pack_cache
837 self._pack_cache = {}
838 while pack_cache:
839 (_name, pack) = pack_cache.popitem()
840 pack.close()
842 def _iter_cached_packs(self) -> Iterator[Pack]:
843 return iter(self._pack_cache.values())
845 def _update_pack_cache(self) -> list[Pack]:
846 raise NotImplementedError(self._update_pack_cache)
848 def close(self) -> None:
849 """Close the object store and release resources.
851 This method closes all cached pack files and frees associated resources.
852 """
853 self._clear_cached_packs()
855 @property
856 def packs(self) -> list[Pack]:
857 """List with pack objects."""
858 return list(self._iter_cached_packs()) + list(self._update_pack_cache())
860 def count_pack_files(self) -> int:
861 """Count the number of pack files.
863 Returns:
864 Number of pack files (excluding those with .keep files)
865 """
866 count = 0
867 for pack in self.packs:
868 # Check if there's a .keep file for this pack
869 keep_path = pack._basename + ".keep"
870 if not os.path.exists(keep_path):
871 count += 1
872 return count
874 def _iter_alternate_objects(self) -> Iterator[bytes]:
875 """Iterate over the SHAs of all the objects in alternate stores."""
876 for alternate in self.alternates:
877 yield from alternate
879 def _iter_loose_objects(self) -> Iterator[bytes]:
880 """Iterate over the SHAs of all loose objects."""
881 raise NotImplementedError(self._iter_loose_objects)
883 def _get_loose_object(self, sha: bytes) -> ShaFile | None:
884 raise NotImplementedError(self._get_loose_object)
886 def delete_loose_object(self, sha: bytes) -> None:
887 """Delete a loose object.
889 This method only handles loose objects. For packed objects,
890 use repack(exclude=...) to exclude them during repacking.
891 """
892 raise NotImplementedError(self.delete_loose_object)
894 def _remove_pack(self, pack: "Pack") -> None:
895 raise NotImplementedError(self._remove_pack)
897 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int:
898 """Pack loose objects.
900 Args:
901 progress: Optional progress reporting callback
903 Returns: Number of objects packed
904 """
905 objects: list[tuple[ShaFile, None]] = []
906 for sha in self._iter_loose_objects():
907 obj = self._get_loose_object(sha)
908 if obj is not None:
909 objects.append((obj, None))
910 self.add_objects(objects, progress=progress)
911 for obj, path in objects:
912 self.delete_loose_object(obj.id)
913 return len(objects)
915 def repack(
916 self,
917 exclude: Set[bytes] | None = None,
918 progress: Callable[[str], None] | None = None,
919 ) -> int:
920 """Repack the packs in this repository.
922 Note that this implementation is fairly naive and currently keeps all
923 objects in memory while it repacks.
925 Args:
926 exclude: Optional set of object SHAs to exclude from repacking
927 progress: Optional progress reporting callback
928 """
929 if exclude is None:
930 exclude = set()
932 loose_objects = set()
933 excluded_loose_objects = set()
934 for sha in self._iter_loose_objects():
935 if sha not in exclude:
936 obj = self._get_loose_object(sha)
937 if obj is not None:
938 loose_objects.add(obj)
939 else:
940 excluded_loose_objects.add(sha)
942 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects}
943 old_packs = {p.name(): p for p in self.packs}
944 for name, pack in old_packs.items():
945 objects.update(
946 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude
947 )
949 # Only create a new pack if there are objects to pack
950 if objects:
951 # The name of the consolidated pack might match the name of a
952 # pre-existing pack. Take care not to remove the newly created
953 # consolidated pack.
954 consolidated = self.add_objects(list(objects), progress=progress)
955 if consolidated is not None:
956 old_packs.pop(consolidated.name(), None)
958 # Delete loose objects that were packed
959 for obj in loose_objects:
960 if obj is not None:
961 self.delete_loose_object(obj.id)
962 # Delete excluded loose objects
963 for sha in excluded_loose_objects:
964 self.delete_loose_object(sha)
965 for name, pack in old_packs.items():
966 self._remove_pack(pack)
967 self._update_pack_cache()
968 return len(objects)
970 def __iter__(self) -> Iterator[bytes]:
971 """Iterate over the SHAs that are present in this store."""
972 self._update_pack_cache()
973 for pack in self._iter_cached_packs():
974 try:
975 yield from pack
976 except PackFileDisappeared:
977 pass
978 yield from self._iter_loose_objects()
979 yield from self._iter_alternate_objects()
981 def contains_loose(self, sha: bytes) -> bool:
982 """Check if a particular object is present by SHA1 and is loose.
984 This does not check alternates.
985 """
986 return self._get_loose_object(sha) is not None
988 def get_raw(self, name: bytes) -> tuple[int, bytes]:
989 """Obtain the raw fulltext for an object.
991 Args:
992 name: sha for the object.
993 Returns: tuple with numeric type and object contents.
994 """
995 if name == ZERO_SHA:
996 raise KeyError(name)
997 if len(name) == 40:
998 sha = hex_to_sha(name)
999 hexsha = name
1000 elif len(name) == 20:
1001 sha = name
1002 hexsha = None
1003 else:
1004 raise AssertionError(f"Invalid object name {name!r}")
1005 for pack in self._iter_cached_packs():
1006 try:
1007 return pack.get_raw(sha)
1008 except (KeyError, PackFileDisappeared):
1009 pass
1010 if hexsha is None:
1011 hexsha = sha_to_hex(name)
1012 ret = self._get_loose_object(hexsha)
1013 if ret is not None:
1014 return ret.type_num, ret.as_raw_string()
1015 # Maybe something else has added a pack with the object
1016 # in the mean time?
1017 for pack in self._update_pack_cache():
1018 try:
1019 return pack.get_raw(sha)
1020 except KeyError:
1021 pass
1022 for alternate in self.alternates:
1023 try:
1024 return alternate.get_raw(hexsha)
1025 except KeyError:
1026 pass
1027 raise KeyError(hexsha)
1029 def iter_unpacked_subset(
1030 self,
1031 shas: Iterable[bytes],
1032 include_comp: bool = False,
1033 allow_missing: bool = False,
1034 convert_ofs_delta: bool = True,
1035 ) -> Iterator[UnpackedObject]:
1036 """Iterate over a subset of objects, yielding UnpackedObject instances.
1038 Args:
1039 shas: Set of object SHAs to retrieve
1040 include_comp: Whether to include compressed data
1041 allow_missing: If True, skip missing objects; if False, raise KeyError
1042 convert_ofs_delta: Whether to convert OFS_DELTA objects
1044 Returns:
1045 Iterator of UnpackedObject instances
1047 Raises:
1048 KeyError: If an object is missing and allow_missing is False
1049 """
1050 todo: set[bytes] = set(shas)
1051 for p in self._iter_cached_packs():
1052 for unpacked in p.iter_unpacked_subset(
1053 todo,
1054 include_comp=include_comp,
1055 allow_missing=True,
1056 convert_ofs_delta=convert_ofs_delta,
1057 ):
1058 yield unpacked
1059 hexsha = sha_to_hex(unpacked.sha())
1060 todo.remove(hexsha)
1061 # Maybe something else has added a pack with the object
1062 # in the mean time?
1063 for p in self._update_pack_cache():
1064 for unpacked in p.iter_unpacked_subset(
1065 todo,
1066 include_comp=include_comp,
1067 allow_missing=True,
1068 convert_ofs_delta=convert_ofs_delta,
1069 ):
1070 yield unpacked
1071 hexsha = sha_to_hex(unpacked.sha())
1072 todo.remove(hexsha)
1073 for alternate in self.alternates:
1074 assert isinstance(alternate, PackBasedObjectStore)
1075 for unpacked in alternate.iter_unpacked_subset(
1076 todo,
1077 include_comp=include_comp,
1078 allow_missing=True,
1079 convert_ofs_delta=convert_ofs_delta,
1080 ):
1081 yield unpacked
1082 hexsha = sha_to_hex(unpacked.sha())
1083 todo.remove(hexsha)
1085 def iterobjects_subset(
1086 self, shas: Iterable[bytes], *, allow_missing: bool = False
1087 ) -> Iterator[ShaFile]:
1088 """Iterate over a subset of objects in the store.
1090 This method searches for objects in pack files, alternates, and loose storage.
1092 Args:
1093 shas: Iterable of object SHAs to retrieve
1094 allow_missing: If True, skip missing objects; if False, raise KeyError
1096 Returns:
1097 Iterator of ShaFile objects
1099 Raises:
1100 KeyError: If an object is missing and allow_missing is False
1101 """
1102 todo: set[bytes] = set(shas)
1103 for p in self._iter_cached_packs():
1104 for o in p.iterobjects_subset(todo, allow_missing=True):
1105 yield o
1106 todo.remove(o.id)
1107 # Maybe something else has added a pack with the object
1108 # in the mean time?
1109 for p in self._update_pack_cache():
1110 for o in p.iterobjects_subset(todo, allow_missing=True):
1111 yield o
1112 todo.remove(o.id)
1113 for alternate in self.alternates:
1114 for o in alternate.iterobjects_subset(todo, allow_missing=True):
1115 yield o
1116 todo.remove(o.id)
1117 for oid in todo:
1118 loose_obj: ShaFile | None = self._get_loose_object(oid)
1119 if loose_obj is not None:
1120 yield loose_obj
1121 elif not allow_missing:
1122 raise KeyError(oid)
1124 def get_unpacked_object(
1125 self, sha1: bytes, *, include_comp: bool = False
1126 ) -> UnpackedObject:
1127 """Obtain the unpacked object.
1129 Args:
1130 sha1: sha for the object.
1131 include_comp: Whether to include compression metadata.
1132 """
1133 if sha1 == ZERO_SHA:
1134 raise KeyError(sha1)
1135 if len(sha1) == 40:
1136 sha = hex_to_sha(sha1)
1137 hexsha = sha1
1138 elif len(sha1) == 20:
1139 sha = sha1
1140 hexsha = None
1141 else:
1142 raise AssertionError(f"Invalid object sha1 {sha1!r}")
1143 for pack in self._iter_cached_packs():
1144 try:
1145 return pack.get_unpacked_object(sha, include_comp=include_comp)
1146 except (KeyError, PackFileDisappeared):
1147 pass
1148 if hexsha is None:
1149 hexsha = sha_to_hex(sha1)
1150 # Maybe something else has added a pack with the object
1151 # in the mean time?
1152 for pack in self._update_pack_cache():
1153 try:
1154 return pack.get_unpacked_object(sha, include_comp=include_comp)
1155 except KeyError:
1156 pass
1157 for alternate in self.alternates:
1158 assert isinstance(alternate, PackBasedObjectStore)
1159 try:
1160 return alternate.get_unpacked_object(hexsha, include_comp=include_comp)
1161 except KeyError:
1162 pass
1163 raise KeyError(hexsha)
1165 def add_objects(
1166 self,
1167 objects: Sequence[tuple[ShaFile, str | None]],
1168 progress: Callable[[str], None] | None = None,
1169 ) -> Optional["Pack"]:
1170 """Add a set of objects to this object store.
1172 Args:
1173 objects: Iterable over (object, path) tuples, should support
1174 __len__.
1175 progress: Optional progress reporting function.
1176 Returns: Pack object of the objects written.
1177 """
1178 count = len(objects)
1179 record_iter = (full_unpacked_object(o) for (o, p) in objects)
1180 return self.add_pack_data(count, record_iter, progress=progress)
1183class DiskObjectStore(PackBasedObjectStore):
1184 """Git-style object store that exists on disk."""
1186 path: str | os.PathLike[str]
1187 pack_dir: str | os.PathLike[str]
1188 _alternates: list["BaseObjectStore"] | None
1189 _commit_graph: Optional["CommitGraph"]
1191 def __init__(
1192 self,
1193 path: str | os.PathLike[str],
1194 loose_compression_level: int = -1,
1195 pack_compression_level: int = -1,
1196 pack_index_version: int | None = None,
1197 pack_delta_window_size: int | None = None,
1198 pack_window_memory: int | None = None,
1199 pack_delta_cache_size: int | None = None,
1200 pack_depth: int | None = None,
1201 pack_threads: int | None = None,
1202 pack_big_file_threshold: int | None = None,
1203 fsync_object_files: bool = False,
1204 ) -> None:
1205 """Open an object store.
1207 Args:
1208 path: Path of the object store.
1209 loose_compression_level: zlib compression level for loose objects
1210 pack_compression_level: zlib compression level for pack objects
1211 pack_index_version: pack index version to use (1, 2, or 3)
1212 pack_delta_window_size: sliding window size for delta compression
1213 pack_window_memory: memory limit for delta window operations
1214 pack_delta_cache_size: size of cache for delta operations
1215 pack_depth: maximum delta chain depth
1216 pack_threads: number of threads for pack operations
1217 pack_big_file_threshold: threshold for treating files as big
1218 fsync_object_files: whether to fsync object files for durability
1219 """
1220 super().__init__(
1221 pack_compression_level=pack_compression_level,
1222 pack_index_version=pack_index_version,
1223 pack_delta_window_size=pack_delta_window_size,
1224 pack_window_memory=pack_window_memory,
1225 pack_delta_cache_size=pack_delta_cache_size,
1226 pack_depth=pack_depth,
1227 pack_threads=pack_threads,
1228 pack_big_file_threshold=pack_big_file_threshold,
1229 )
1230 self.path = path
1231 self.pack_dir = os.path.join(self.path, PACKDIR)
1232 self._alternates = None
1233 self.loose_compression_level = loose_compression_level
1234 self.pack_compression_level = pack_compression_level
1235 self.pack_index_version = pack_index_version
1236 self.fsync_object_files = fsync_object_files
1238 # Commit graph support - lazy loaded
1239 self._commit_graph = None
1240 self._use_commit_graph = True # Default to true
1242 def __repr__(self) -> str:
1243 """Return string representation of DiskObjectStore.
1245 Returns:
1246 String representation including the store path
1247 """
1248 return f"<{self.__class__.__name__}({self.path!r})>"
1250 @classmethod
1251 def from_config(
1252 cls, path: str | os.PathLike[str], config: "Config"
1253 ) -> "DiskObjectStore":
1254 """Create a DiskObjectStore from a configuration object.
1256 Args:
1257 path: Path to the object store directory
1258 config: Configuration object to read settings from
1260 Returns:
1261 New DiskObjectStore instance configured according to config
1262 """
1263 try:
1264 default_compression_level = int(
1265 config.get((b"core",), b"compression").decode()
1266 )
1267 except KeyError:
1268 default_compression_level = -1
1269 try:
1270 loose_compression_level = int(
1271 config.get((b"core",), b"looseCompression").decode()
1272 )
1273 except KeyError:
1274 loose_compression_level = default_compression_level
1275 try:
1276 pack_compression_level = int(
1277 config.get((b"core",), "packCompression").decode()
1278 )
1279 except KeyError:
1280 pack_compression_level = default_compression_level
1281 try:
1282 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode())
1283 except KeyError:
1284 pack_index_version = None
1286 # Read pack configuration options
1287 try:
1288 pack_delta_window_size = int(
1289 config.get((b"pack",), b"deltaWindowSize").decode()
1290 )
1291 except KeyError:
1292 pack_delta_window_size = None
1293 try:
1294 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode())
1295 except KeyError:
1296 pack_window_memory = None
1297 try:
1298 pack_delta_cache_size = int(
1299 config.get((b"pack",), b"deltaCacheSize").decode()
1300 )
1301 except KeyError:
1302 pack_delta_cache_size = None
1303 try:
1304 pack_depth = int(config.get((b"pack",), b"depth").decode())
1305 except KeyError:
1306 pack_depth = None
1307 try:
1308 pack_threads = int(config.get((b"pack",), b"threads").decode())
1309 except KeyError:
1310 pack_threads = None
1311 try:
1312 pack_big_file_threshold = int(
1313 config.get((b"pack",), b"bigFileThreshold").decode()
1314 )
1315 except KeyError:
1316 pack_big_file_threshold = None
1318 # Read core.commitGraph setting
1319 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)
1321 # Read core.fsyncObjectFiles setting
1322 fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False)
1324 instance = cls(
1325 path,
1326 loose_compression_level,
1327 pack_compression_level,
1328 pack_index_version,
1329 pack_delta_window_size,
1330 pack_window_memory,
1331 pack_delta_cache_size,
1332 pack_depth,
1333 pack_threads,
1334 pack_big_file_threshold,
1335 fsync_object_files,
1336 )
1337 instance._use_commit_graph = use_commit_graph
1338 return instance
1340 @property
1341 def alternates(self) -> list["BaseObjectStore"]:
1342 """Get the list of alternate object stores.
1344 Reads from .git/objects/info/alternates if not already cached.
1346 Returns:
1347 List of DiskObjectStore instances for alternate object directories
1348 """
1349 if self._alternates is not None:
1350 return self._alternates
1351 self._alternates = []
1352 for path in self._read_alternate_paths():
1353 self._alternates.append(DiskObjectStore(path))
1354 return self._alternates
1356 def _read_alternate_paths(self) -> Iterator[str]:
1357 try:
1358 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb")
1359 except FileNotFoundError:
1360 return
1361 with f:
1362 for line in f.readlines():
1363 line = line.rstrip(b"\n")
1364 if line.startswith(b"#"):
1365 continue
1366 if os.path.isabs(line):
1367 yield os.fsdecode(line)
1368 else:
1369 yield os.fsdecode(os.path.join(os.fsencode(self.path), line))
1371 def add_alternate_path(self, path: str | os.PathLike[str]) -> None:
1372 """Add an alternate path to this object store."""
1373 try:
1374 os.mkdir(os.path.join(self.path, INFODIR))
1375 except FileExistsError:
1376 pass
1377 alternates_path = os.path.join(self.path, INFODIR, "alternates")
1378 with GitFile(alternates_path, "wb") as f:
1379 try:
1380 orig_f = open(alternates_path, "rb")
1381 except FileNotFoundError:
1382 pass
1383 else:
1384 with orig_f:
1385 f.write(orig_f.read())
1386 f.write(os.fsencode(path) + b"\n")
1388 if not os.path.isabs(path):
1389 path = os.path.join(self.path, path)
1390 self.alternates.append(DiskObjectStore(path))
1392 def _update_pack_cache(self) -> list[Pack]:
1393 """Read and iterate over new pack files and cache them."""
1394 try:
1395 pack_dir_contents = os.listdir(self.pack_dir)
1396 except FileNotFoundError:
1397 self.close()
1398 return []
1399 pack_files = set()
1400 for name in pack_dir_contents:
1401 if name.startswith("pack-") and name.endswith(".pack"):
1402 # verify that idx exists first (otherwise the pack was not yet
1403 # fully written)
1404 idx_name = os.path.splitext(name)[0] + ".idx"
1405 if idx_name in pack_dir_contents:
1406 pack_name = name[: -len(".pack")]
1407 pack_files.add(pack_name)
1409 # Open newly appeared pack files
1410 new_packs = []
1411 for f in pack_files:
1412 if f not in self._pack_cache:
1413 pack = Pack(
1414 os.path.join(self.pack_dir, f),
1415 delta_window_size=self.pack_delta_window_size,
1416 window_memory=self.pack_window_memory,
1417 delta_cache_size=self.pack_delta_cache_size,
1418 depth=self.pack_depth,
1419 threads=self.pack_threads,
1420 big_file_threshold=self.pack_big_file_threshold,
1421 )
1422 new_packs.append(pack)
1423 self._pack_cache[f] = pack
1424 # Remove disappeared pack files
1425 for f in set(self._pack_cache) - pack_files:
1426 self._pack_cache.pop(f).close()
1427 return new_packs
1429 def _get_shafile_path(self, sha: bytes) -> str:
1430 # Check from object dir
1431 return hex_to_filename(os.fspath(self.path), sha)
1433 def _iter_loose_objects(self) -> Iterator[bytes]:
1434 for base in os.listdir(self.path):
1435 if len(base) != 2:
1436 continue
1437 for rest in os.listdir(os.path.join(self.path, base)):
1438 sha = os.fsencode(base + rest)
1439 if not valid_hexsha(sha):
1440 continue
1441 yield sha
1443 def count_loose_objects(self) -> int:
1444 """Count the number of loose objects in the object store.
1446 Returns:
1447 Number of loose objects
1448 """
1449 count = 0
1450 if not os.path.exists(self.path):
1451 return 0
1453 for i in range(256):
1454 subdir = os.path.join(self.path, f"{i:02x}")
1455 try:
1456 count += len(
1457 [
1458 name
1459 for name in os.listdir(subdir)
1460 if len(name) == 38 # 40 - 2 for the prefix
1461 ]
1462 )
1463 except FileNotFoundError:
1464 # Directory may have been removed or is inaccessible
1465 continue
1467 return count
1469 def _get_loose_object(self, sha: bytes) -> ShaFile | None:
1470 path = self._get_shafile_path(sha)
1471 try:
1472 return ShaFile.from_path(path)
1473 except FileNotFoundError:
1474 return None
1476 def delete_loose_object(self, sha: bytes) -> None:
1477 """Delete a loose object from disk.
1479 Args:
1480 sha: SHA1 of the object to delete
1482 Raises:
1483 FileNotFoundError: If the object file doesn't exist
1484 """
1485 os.remove(self._get_shafile_path(sha))
1487 def get_object_mtime(self, sha: bytes) -> float:
1488 """Get the modification time of an object.
1490 Args:
1491 sha: SHA1 of the object
1493 Returns:
1494 Modification time as seconds since epoch
1496 Raises:
1497 KeyError: if the object is not found
1498 """
1499 # First check if it's a loose object
1500 if self.contains_loose(sha):
1501 path = self._get_shafile_path(sha)
1502 try:
1503 return os.path.getmtime(path)
1504 except FileNotFoundError:
1505 pass
1507 # Check if it's in a pack file
1508 for pack in self.packs:
1509 try:
1510 if sha in pack:
1511 # Use the pack file's mtime for packed objects
1512 pack_path = pack._data_path
1513 try:
1514 return os.path.getmtime(pack_path)
1515 except (FileNotFoundError, AttributeError):
1516 pass
1517 except PackFileDisappeared:
1518 pass
1520 raise KeyError(sha)
1522 def _remove_pack(self, pack: Pack) -> None:
1523 try:
1524 del self._pack_cache[os.path.basename(pack._basename)]
1525 except KeyError:
1526 pass
1527 pack.close()
1528 os.remove(pack.data.path)
1529 if hasattr(pack.index, "path"):
1530 os.remove(pack.index.path)
1532 def _get_pack_basepath(
1533 self, entries: Iterable[tuple[bytes, int, int | None]]
1534 ) -> str:
1535 suffix_bytes = iter_sha1(entry[0] for entry in entries)
1536 # TODO: Handle self.pack_dir being bytes
1537 suffix = suffix_bytes.decode("ascii")
1538 return os.path.join(self.pack_dir, "pack-" + suffix)
1540 def _complete_pack(
1541 self,
1542 f: BinaryIO,
1543 path: str,
1544 num_objects: int,
1545 indexer: PackIndexer,
1546 progress: Callable[..., None] | None = None,
1547 ) -> Pack:
1548 """Move a specific file containing a pack into the pack directory.
1550 Note: The file should be on the same file system as the
1551 packs directory.
1553 Args:
1554 f: Open file object for the pack.
1555 path: Path to the pack file.
1556 num_objects: Number of objects in the pack.
1557 indexer: A PackIndexer for indexing the pack.
1558 progress: Optional progress reporting function.
1559 """
1560 entries = []
1561 for i, entry in enumerate(indexer):
1562 if progress is not None:
1563 progress(f"generating index: {i}/{num_objects}\r".encode("ascii"))
1564 entries.append(entry)
1566 pack_sha, extra_entries = extend_pack(
1567 f,
1568 set(indexer.ext_refs()),
1569 get_raw=self.get_raw,
1570 compression_level=self.pack_compression_level,
1571 progress=progress,
1572 )
1573 f.flush()
1574 if self.fsync_object_files:
1575 try:
1576 fileno = f.fileno()
1577 except AttributeError as e:
1578 raise OSError("fsync requested but file has no fileno()") from e
1579 else:
1580 os.fsync(fileno)
1581 f.close()
1583 entries.extend(extra_entries)
1585 # Move the pack in.
1586 entries.sort()
1587 pack_base_name = self._get_pack_basepath(entries)
1589 for pack in self.packs:
1590 if pack._basename == pack_base_name:
1591 return pack
1593 target_pack_path = pack_base_name + ".pack"
1594 target_index_path = pack_base_name + ".idx"
1595 if sys.platform == "win32":
1596 # Windows might have the target pack file lingering. Attempt
1597 # removal, silently passing if the target does not exist.
1598 with suppress(FileNotFoundError):
1599 os.remove(target_pack_path)
1600 os.rename(path, target_pack_path)
1602 # Write the index.
1603 with GitFile(
1604 target_index_path, "wb", mask=PACK_MODE, fsync=self.fsync_object_files
1605 ) as index_file:
1606 write_pack_index(
1607 index_file, entries, pack_sha, version=self.pack_index_version
1608 )
1610 # Add the pack to the store and return it.
1611 final_pack = Pack(
1612 pack_base_name,
1613 delta_window_size=self.pack_delta_window_size,
1614 window_memory=self.pack_window_memory,
1615 delta_cache_size=self.pack_delta_cache_size,
1616 depth=self.pack_depth,
1617 threads=self.pack_threads,
1618 big_file_threshold=self.pack_big_file_threshold,
1619 )
1620 final_pack.check_length_and_checksum()
1621 self._add_cached_pack(pack_base_name, final_pack)
1622 return final_pack
1624 def add_thin_pack(
1625 self,
1626 read_all: Callable[[int], bytes],
1627 read_some: Callable[[int], bytes] | None,
1628 progress: Callable[..., None] | None = None,
1629 ) -> "Pack":
1630 """Add a new thin pack to this object store.
1632 Thin packs are packs that contain deltas with parents that exist
1633 outside the pack. They should never be placed in the object store
1634 directly, and always indexed and completed as they are copied.
1636 Args:
1637 read_all: Read function that blocks until the number of
1638 requested bytes are read.
1639 read_some: Read function that returns at least one byte, but may
1640 not return the number of bytes requested.
1641 progress: Optional progress reporting function.
1642 Returns: A Pack object pointing at the now-completed thin pack in the
1643 objects/pack directory.
1644 """
1645 import tempfile
1647 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_")
1648 with os.fdopen(fd, "w+b") as f:
1649 os.chmod(path, PACK_MODE)
1650 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) # type: ignore[arg-type]
1651 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) # type: ignore[arg-type]
1652 copier.verify(progress=progress)
1653 return self._complete_pack(f, path, len(copier), indexer, progress=progress)
1655 def add_pack(
1656 self,
1657 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
1658 """Add a new pack to this object store.
1660 Returns: Fileobject to write to, a commit function to
1661 call when the pack is finished and an abort
1662 function.
1663 """
1664 import tempfile
1666 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
1667 f = os.fdopen(fd, "w+b")
1668 os.chmod(path, PACK_MODE)
1670 def commit() -> Optional["Pack"]:
1671 if f.tell() > 0:
1672 f.seek(0)
1674 with PackData(path, f) as pd:
1675 indexer = PackIndexer.for_pack_data(
1676 pd,
1677 resolve_ext_ref=self.get_raw, # type: ignore[arg-type]
1678 )
1679 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type]
1680 else:
1681 f.close()
1682 os.remove(path)
1683 return None
1685 def abort() -> None:
1686 f.close()
1687 os.remove(path)
1689 return f, commit, abort # type: ignore[return-value]
1691 def add_object(self, obj: ShaFile) -> None:
1692 """Add a single object to this object store.
1694 Args:
1695 obj: Object to add
1696 """
1697 path = self._get_shafile_path(obj.id)
1698 dir = os.path.dirname(path)
1699 try:
1700 os.mkdir(dir)
1701 except FileExistsError:
1702 pass
1703 if os.path.exists(path):
1704 return # Already there, no need to write again
1705 with GitFile(path, "wb", mask=PACK_MODE, fsync=self.fsync_object_files) as f:
1706 f.write(
1707 obj.as_legacy_object(compression_level=self.loose_compression_level)
1708 )
1710 @classmethod
1711 def init(cls, path: str | os.PathLike[str]) -> "DiskObjectStore":
1712 """Initialize a new disk object store.
1714 Creates the necessary directory structure for a Git object store.
1716 Args:
1717 path: Path where the object store should be created
1719 Returns:
1720 New DiskObjectStore instance
1721 """
1722 try:
1723 os.mkdir(path)
1724 except FileExistsError:
1725 pass
1726 os.mkdir(os.path.join(path, "info"))
1727 os.mkdir(os.path.join(path, PACKDIR))
1728 return cls(path)
1730 def iter_prefix(self, prefix: bytes) -> Iterator[bytes]:
1731 """Iterate over all object SHAs with the given prefix.
1733 Args:
1734 prefix: Hex prefix to search for (as bytes)
1736 Returns:
1737 Iterator of object SHAs (as bytes) matching the prefix
1738 """
1739 if len(prefix) < 2:
1740 yield from super().iter_prefix(prefix)
1741 return
1742 seen = set()
1743 dir = prefix[:2].decode()
1744 rest = prefix[2:].decode()
1745 try:
1746 for name in os.listdir(os.path.join(self.path, dir)):
1747 if name.startswith(rest):
1748 sha = os.fsencode(dir + name)
1749 if sha not in seen:
1750 seen.add(sha)
1751 yield sha
1752 except FileNotFoundError:
1753 pass
1755 for p in self.packs:
1756 bin_prefix = (
1757 binascii.unhexlify(prefix)
1758 if len(prefix) % 2 == 0
1759 else binascii.unhexlify(prefix[:-1])
1760 )
1761 for sha in p.index.iter_prefix(bin_prefix):
1762 sha = sha_to_hex(sha)
1763 if sha.startswith(prefix) and sha not in seen:
1764 seen.add(sha)
1765 yield sha
1766 for alternate in self.alternates:
1767 for sha in alternate.iter_prefix(prefix):
1768 if sha not in seen:
1769 seen.add(sha)
1770 yield sha
1772 def get_commit_graph(self) -> Optional["CommitGraph"]:
1773 """Get the commit graph for this object store.
1775 Returns:
1776 CommitGraph object if available, None otherwise
1777 """
1778 if not self._use_commit_graph:
1779 return None
1781 if self._commit_graph is None:
1782 from .commit_graph import read_commit_graph
1784 # Look for commit graph in our objects directory
1785 graph_file = os.path.join(self.path, "info", "commit-graph")
1786 if os.path.exists(graph_file):
1787 self._commit_graph = read_commit_graph(graph_file)
1788 return self._commit_graph
1790 def write_commit_graph(
1791 self, refs: Iterable[bytes] | None = None, reachable: bool = True
1792 ) -> None:
1793 """Write a commit graph file for this object store.
1795 Args:
1796 refs: List of refs to include. If None, includes all refs from object store.
1797 reachable: If True, includes all commits reachable from refs.
1798 If False, only includes the direct ref targets.
1799 """
1800 from .commit_graph import get_reachable_commits
1802 if refs is None:
1803 # Get all commit objects from the object store
1804 all_refs = []
1805 # Iterate through all objects to find commits
1806 for sha in self:
1807 try:
1808 obj = self[sha]
1809 if obj.type_name == b"commit":
1810 all_refs.append(sha)
1811 except KeyError:
1812 continue
1813 else:
1814 # Use provided refs
1815 all_refs = list(refs)
1817 if not all_refs:
1818 return # No commits to include
1820 if reachable:
1821 # Get all reachable commits
1822 commit_ids = get_reachable_commits(self, all_refs)
1823 else:
1824 # Just use the direct ref targets - ensure they're hex ObjectIDs
1825 commit_ids = []
1826 for ref in all_refs:
1827 if isinstance(ref, bytes) and len(ref) == 40:
1828 # Already hex ObjectID
1829 commit_ids.append(ref)
1830 elif isinstance(ref, bytes) and len(ref) == 20:
1831 # Binary SHA, convert to hex ObjectID
1832 from .objects import sha_to_hex
1834 commit_ids.append(sha_to_hex(ref))
1835 else:
1836 # Assume it's already correct format
1837 commit_ids.append(ref)
1839 if commit_ids:
1840 # Write commit graph directly to our object store path
1841 # Generate the commit graph
1842 from .commit_graph import generate_commit_graph
1844 graph = generate_commit_graph(self, commit_ids)
1846 if graph.entries:
1847 # Ensure the info directory exists
1848 info_dir = os.path.join(self.path, "info")
1849 os.makedirs(info_dir, exist_ok=True)
1851 # Write using GitFile for atomic operation
1852 graph_path = os.path.join(info_dir, "commit-graph")
1853 with GitFile(graph_path, "wb") as f:
1854 assert isinstance(
1855 f, _GitFile
1856 ) # GitFile in write mode always returns _GitFile
1857 graph.write_to_file(f)
1859 # Clear cached commit graph so it gets reloaded
1860 self._commit_graph = None
1862 def prune(self, grace_period: int | None = None) -> None:
1863 """Prune/clean up this object store.
1865 This removes temporary files that were left behind by interrupted
1866 pack operations. These are files that start with ``tmp_pack_`` in the
1867 repository directory or files with .pack extension but no corresponding
1868 .idx file in the pack directory.
1870 Args:
1871 grace_period: Grace period in seconds for removing temporary files.
1872 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD.
1873 """
1874 import glob
1876 if grace_period is None:
1877 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD
1879 # Clean up tmp_pack_* files in the repository directory
1880 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")):
1881 # Check if file is old enough (more than grace period)
1882 mtime = os.path.getmtime(tmp_file)
1883 if time.time() - mtime > grace_period:
1884 os.remove(tmp_file)
1886 # Clean up orphaned .pack files without corresponding .idx files
1887 try:
1888 pack_dir_contents = os.listdir(self.pack_dir)
1889 except FileNotFoundError:
1890 return
1892 pack_files = {}
1893 idx_files = set()
1895 for name in pack_dir_contents:
1896 if name.endswith(".pack"):
1897 base_name = name[:-5] # Remove .pack extension
1898 pack_files[base_name] = name
1899 elif name.endswith(".idx"):
1900 base_name = name[:-4] # Remove .idx extension
1901 idx_files.add(base_name)
1903 # Remove .pack files without corresponding .idx files
1904 for base_name, pack_name in pack_files.items():
1905 if base_name not in idx_files:
1906 pack_path = os.path.join(self.pack_dir, pack_name)
1907 # Check if file is old enough (more than grace period)
1908 mtime = os.path.getmtime(pack_path)
1909 if time.time() - mtime > grace_period:
1910 os.remove(pack_path)
1913class MemoryObjectStore(PackCapableObjectStore):
1914 """Object store that keeps all objects in memory."""
1916 def __init__(self) -> None:
1917 """Initialize a MemoryObjectStore.
1919 Creates an empty in-memory object store.
1920 """
1921 super().__init__()
1922 self._data: dict[bytes, ShaFile] = {}
1923 self.pack_compression_level = -1
1925 def _to_hexsha(self, sha: bytes) -> bytes:
1926 if len(sha) == 40:
1927 return sha
1928 elif len(sha) == 20:
1929 return sha_to_hex(sha)
1930 else:
1931 raise ValueError(f"Invalid sha {sha!r}")
1933 def contains_loose(self, sha: bytes) -> bool:
1934 """Check if a particular object is present by SHA1 and is loose."""
1935 return self._to_hexsha(sha) in self._data
1937 def contains_packed(self, sha: bytes) -> bool:
1938 """Check if a particular object is present by SHA1 and is packed."""
1939 return False
1941 def __iter__(self) -> Iterator[bytes]:
1942 """Iterate over the SHAs that are present in this store."""
1943 return iter(self._data.keys())
1945 @property
1946 def packs(self) -> list[Pack]:
1947 """List with pack objects."""
1948 return []
1950 def get_raw(self, name: ObjectID) -> tuple[int, bytes]:
1951 """Obtain the raw text for an object.
1953 Args:
1954 name: sha for the object.
1955 Returns: tuple with numeric type and object contents.
1956 """
1957 obj = self[self._to_hexsha(name)]
1958 return obj.type_num, obj.as_raw_string()
1960 def __getitem__(self, name: ObjectID) -> ShaFile:
1961 """Retrieve an object by SHA.
1963 Args:
1964 name: SHA of the object (as hex string or bytes)
1966 Returns:
1967 Copy of the ShaFile object
1969 Raises:
1970 KeyError: If the object is not found
1971 """
1972 return self._data[self._to_hexsha(name)].copy()
1974 def __delitem__(self, name: ObjectID) -> None:
1975 """Delete an object from this store, for testing only."""
1976 del self._data[self._to_hexsha(name)]
1978 def add_object(self, obj: ShaFile) -> None:
1979 """Add a single object to this object store."""
1980 self._data[obj.id] = obj.copy()
1982 def add_objects(
1983 self,
1984 objects: Iterable[tuple[ShaFile, str | None]],
1985 progress: Callable[[str], None] | None = None,
1986 ) -> None:
1987 """Add a set of objects to this object store.
1989 Args:
1990 objects: Iterable over a list of (object, path) tuples
1991 progress: Optional progress reporting function.
1992 """
1993 for obj, path in objects:
1994 self.add_object(obj)
1996 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
1997 """Add a new pack to this object store.
1999 Because this object store doesn't support packs, we extract and add the
2000 individual objects.
2002 Returns: Fileobject to write to and a commit function to
2003 call when the pack is finished.
2004 """
2005 from tempfile import SpooledTemporaryFile
2007 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-")
2009 def commit() -> None:
2010 size = f.tell()
2011 if size > 0:
2012 f.seek(0)
2014 p = PackData.from_file(f, size)
2015 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type]
2016 self.add_object(obj)
2017 p.close()
2018 f.close()
2019 else:
2020 f.close()
2022 def abort() -> None:
2023 f.close()
2025 return f, commit, abort # type: ignore[return-value]
2027 def add_pack_data(
2028 self,
2029 count: int,
2030 unpacked_objects: Iterator[UnpackedObject],
2031 progress: Callable[[str], None] | None = None,
2032 ) -> None:
2033 """Add pack data to this object store.
2035 Args:
2036 count: Number of items to add
2037 unpacked_objects: Iterator of UnpackedObject instances
2038 progress: Optional progress reporting function.
2039 """
2040 if count == 0:
2041 return
2043 # Since MemoryObjectStore doesn't support pack files, we need to
2044 # extract individual objects. To handle deltas properly, we write
2045 # to a temporary pack and then use PackInflater to resolve them.
2046 f, commit, abort = self.add_pack()
2047 try:
2048 write_pack_data(
2049 f.write,
2050 unpacked_objects,
2051 num_records=count,
2052 progress=progress,
2053 )
2054 except BaseException:
2055 abort()
2056 raise
2057 else:
2058 commit()
2060 def add_thin_pack(
2061 self,
2062 read_all: Callable[[], bytes],
2063 read_some: Callable[[int], bytes],
2064 progress: Callable[[str], None] | None = None,
2065 ) -> None:
2066 """Add a new thin pack to this object store.
2068 Thin packs are packs that contain deltas with parents that exist
2069 outside the pack. Because this object store doesn't support packs, we
2070 extract and add the individual objects.
2072 Args:
2073 read_all: Read function that blocks until the number of
2074 requested bytes are read.
2075 read_some: Read function that returns at least one byte, but may
2076 not return the number of bytes requested.
2077 progress: Optional progress reporting function.
2078 """
2079 f, commit, abort = self.add_pack()
2080 try:
2081 copier = PackStreamCopier(read_all, read_some, f) # type: ignore[arg-type]
2082 copier.verify()
2083 except BaseException:
2084 abort()
2085 raise
2086 else:
2087 commit()
2090class ObjectIterator(Protocol):
2091 """Interface for iterating over objects."""
2093 def iterobjects(self) -> Iterator[ShaFile]:
2094 """Iterate over all objects.
2096 Returns:
2097 Iterator of ShaFile objects
2098 """
2099 raise NotImplementedError(self.iterobjects)
2102def tree_lookup_path(
2103 lookup_obj: Callable[[bytes], ShaFile], root_sha: bytes, path: bytes
2104) -> tuple[int, bytes]:
2105 """Look up an object in a Git tree.
2107 Args:
2108 lookup_obj: Callback for retrieving object by SHA1
2109 root_sha: SHA1 of the root tree
2110 path: Path to lookup
2111 Returns: A tuple of (mode, SHA) of the resulting path.
2112 """
2113 tree = lookup_obj(root_sha)
2114 if not isinstance(tree, Tree):
2115 raise NotTreeError(root_sha)
2116 return tree.lookup_path(lookup_obj, path)
2119def _collect_filetree_revs(
2120 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID]
2121) -> None:
2122 """Collect SHA1s of files and directories for specified tree.
2124 Args:
2125 obj_store: Object store to get objects by SHA from
2126 tree_sha: tree reference to walk
2127 kset: set to fill with references to files and directories
2128 """
2129 filetree = obj_store[tree_sha]
2130 assert isinstance(filetree, Tree)
2131 for name, mode, sha in filetree.iteritems():
2132 assert mode is not None
2133 assert sha is not None
2134 if not S_ISGITLINK(mode) and sha not in kset:
2135 kset.add(sha)
2136 if stat.S_ISDIR(mode):
2137 _collect_filetree_revs(obj_store, sha, kset)
2140def _split_commits_and_tags(
2141 obj_store: ObjectContainer, lst: Iterable[bytes], *, ignore_unknown: bool = False
2142) -> tuple[set[bytes], set[bytes], set[bytes]]:
2143 """Split object id list into three lists with commit, tag, and other SHAs.
2145 Commits referenced by tags are included into commits
2146 list as well. Only SHA1s known in this repository will get
2147 through, and unless ignore_unknown argument is True, KeyError
2148 is thrown for SHA1 missing in the repository
2150 Args:
2151 obj_store: Object store to get objects by SHA1 from
2152 lst: Collection of commit and tag SHAs
2153 ignore_unknown: True to skip SHA1 missing in the repository
2154 silently.
2155 Returns: A tuple of (commits, tags, others) SHA1s
2156 """
2157 commits: set[bytes] = set()
2158 tags: set[bytes] = set()
2159 others: set[bytes] = set()
2160 for e in lst:
2161 try:
2162 o = obj_store[e]
2163 except KeyError:
2164 if not ignore_unknown:
2165 raise
2166 else:
2167 if isinstance(o, Commit):
2168 commits.add(e)
2169 elif isinstance(o, Tag):
2170 tags.add(e)
2171 tagged = o.object[1]
2172 c, t, os = _split_commits_and_tags(
2173 obj_store, [tagged], ignore_unknown=ignore_unknown
2174 )
2175 commits |= c
2176 tags |= t
2177 others |= os
2178 else:
2179 others.add(e)
2180 return (commits, tags, others)
2183class MissingObjectFinder:
2184 """Find the objects missing from another object store.
2186 Args:
2187 object_store: Object store containing at least all objects to be
2188 sent
2189 haves: SHA1s of commits not to send (already present in target)
2190 wants: SHA1s of commits to send
2191 progress: Optional function to report progress to.
2192 get_tagged: Function that returns a dict of pointed-to sha -> tag
2193 sha for including tags.
2194 get_parents: Optional function for getting the parents of a commit.
2195 """
2197 def __init__(
2198 self,
2199 object_store: BaseObjectStore,
2200 haves: Iterable[bytes],
2201 wants: Iterable[bytes],
2202 *,
2203 shallow: Set[bytes] | None = None,
2204 progress: Callable[[bytes], None] | None = None,
2205 get_tagged: Callable[[], dict[bytes, bytes]] | None = None,
2206 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents,
2207 ) -> None:
2208 """Initialize a MissingObjectFinder.
2210 Args:
2211 object_store: Object store containing objects
2212 haves: SHA1s of objects already present in target
2213 wants: SHA1s of objects to send
2214 shallow: Set of shallow commit SHA1s
2215 progress: Optional progress reporting callback
2216 get_tagged: Function returning dict of pointed-to sha -> tag sha
2217 get_parents: Function for getting commit parents
2218 """
2219 self.object_store = object_store
2220 if shallow is None:
2221 shallow = set()
2222 self._get_parents = get_parents
2223 # process Commits and Tags differently
2224 # Note, while haves may list commits/tags not available locally,
2225 # and such SHAs would get filtered out by _split_commits_and_tags,
2226 # wants shall list only known SHAs, and otherwise
2227 # _split_commits_and_tags fails with KeyError
2228 have_commits, have_tags, have_others = _split_commits_and_tags(
2229 object_store, haves, ignore_unknown=True
2230 )
2231 want_commits, want_tags, want_others = _split_commits_and_tags(
2232 object_store, wants, ignore_unknown=False
2233 )
2234 # all_ancestors is a set of commits that shall not be sent
2235 # (complete repository up to 'haves')
2236 all_ancestors = _collect_ancestors(
2237 object_store,
2238 have_commits,
2239 shallow=frozenset(shallow),
2240 get_parents=self._get_parents,
2241 )[0]
2242 # all_missing - complete set of commits between haves and wants
2243 # common - commits from all_ancestors we hit into while
2244 # traversing parent hierarchy of wants
2245 missing_commits, common_commits = _collect_ancestors(
2246 object_store,
2247 want_commits,
2248 frozenset(all_ancestors),
2249 shallow=frozenset(shallow),
2250 get_parents=self._get_parents,
2251 )
2252 self.remote_has: set[bytes] = set()
2253 # Now, fill sha_done with commits and revisions of
2254 # files and directories known to be both locally
2255 # and on target. Thus these commits and files
2256 # won't get selected for fetch
2257 for h in common_commits:
2258 self.remote_has.add(h)
2259 cmt = object_store[h]
2260 assert isinstance(cmt, Commit)
2261 _collect_filetree_revs(object_store, cmt.tree, self.remote_has)
2262 # record tags we have as visited, too
2263 for t in have_tags:
2264 self.remote_has.add(t)
2265 self.sha_done = set(self.remote_has)
2267 # in fact, what we 'want' is commits, tags, and others
2268 # we've found missing
2269 self.objects_to_send: set[tuple[ObjectID, bytes | None, int | None, bool]] = {
2270 (w, None, Commit.type_num, False) for w in missing_commits
2271 }
2272 missing_tags = want_tags.difference(have_tags)
2273 self.objects_to_send.update(
2274 {(w, None, Tag.type_num, False) for w in missing_tags}
2275 )
2276 missing_others = want_others.difference(have_others)
2277 self.objects_to_send.update({(w, None, None, False) for w in missing_others})
2279 if progress is None:
2280 self.progress: Callable[[bytes], None] = lambda x: None
2281 else:
2282 self.progress = progress
2283 self._tagged = (get_tagged and get_tagged()) or {}
2285 def get_remote_has(self) -> set[bytes]:
2286 """Get the set of SHAs the remote has.
2288 Returns:
2289 Set of SHA1s that the remote side already has
2290 """
2291 return self.remote_has
2293 def add_todo(
2294 self, entries: Iterable[tuple[ObjectID, bytes | None, int | None, bool]]
2295 ) -> None:
2296 """Add objects to the todo list.
2298 Args:
2299 entries: Iterable of tuples (sha, name, type_num, is_leaf)
2300 """
2301 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done])
2303 def __next__(self) -> tuple[bytes, PackHint | None]:
2304 """Get the next object to send.
2306 Returns:
2307 Tuple of (sha, pack_hint)
2309 Raises:
2310 StopIteration: When no more objects to send
2311 """
2312 while True:
2313 if not self.objects_to_send:
2314 self.progress(
2315 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii")
2316 )
2317 raise StopIteration
2318 (sha, name, type_num, leaf) = self.objects_to_send.pop()
2319 if sha not in self.sha_done:
2320 break
2321 if not leaf:
2322 o = self.object_store[sha]
2323 if isinstance(o, Commit):
2324 self.add_todo([(o.tree, b"", Tree.type_num, False)])
2325 elif isinstance(o, Tree):
2326 todos = []
2327 for n, m, s in o.iteritems():
2328 assert m is not None
2329 assert n is not None
2330 assert s is not None
2331 if not S_ISGITLINK(m):
2332 todos.append(
2333 (
2334 s,
2335 n,
2336 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num),
2337 not stat.S_ISDIR(m),
2338 )
2339 )
2340 self.add_todo(todos)
2341 elif isinstance(o, Tag):
2342 self.add_todo([(o.object[1], None, o.object[0].type_num, False)])
2343 if sha in self._tagged:
2344 self.add_todo([(self._tagged[sha], None, None, True)])
2345 self.sha_done.add(sha)
2346 if len(self.sha_done) % 1000 == 0:
2347 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii"))
2348 if type_num is None:
2349 pack_hint = None
2350 else:
2351 pack_hint = (type_num, name)
2352 return (sha, pack_hint)
2354 def __iter__(self) -> Iterator[tuple[bytes, PackHint | None]]:
2355 """Return iterator over objects to send.
2357 Returns:
2358 Self (this class implements the iterator protocol)
2359 """
2360 return self
2363class ObjectStoreGraphWalker:
2364 """Graph walker that finds what commits are missing from an object store."""
2366 heads: set[ObjectID]
2367 """Revisions without descendants in the local repo."""
2369 get_parents: Callable[[ObjectID], list[ObjectID]]
2370 """Function to retrieve parents in the local repo."""
2372 shallow: set[ObjectID]
2374 def __init__(
2375 self,
2376 local_heads: Iterable[ObjectID],
2377 get_parents: Callable[[ObjectID], list[ObjectID]],
2378 shallow: set[ObjectID] | None = None,
2379 update_shallow: Callable[[set[ObjectID] | None, set[ObjectID] | None], None]
2380 | None = None,
2381 ) -> None:
2382 """Create a new instance.
2384 Args:
2385 local_heads: Heads to start search with
2386 get_parents: Function for finding the parents of a SHA1.
2387 shallow: Set of shallow commits.
2388 update_shallow: Function to update shallow commits.
2389 """
2390 self.heads = set(local_heads)
2391 self.get_parents = get_parents
2392 self.parents: dict[ObjectID, list[ObjectID] | None] = {}
2393 if shallow is None:
2394 shallow = set()
2395 self.shallow = shallow
2396 self.update_shallow = update_shallow
2398 def nak(self) -> None:
2399 """Nothing in common was found."""
2401 def ack(self, sha: ObjectID) -> None:
2402 """Ack that a revision and its ancestors are present in the source."""
2403 if len(sha) != 40:
2404 raise ValueError(f"unexpected sha {sha!r} received")
2405 ancestors = {sha}
2407 # stop if we run out of heads to remove
2408 while self.heads:
2409 for a in ancestors:
2410 if a in self.heads:
2411 self.heads.remove(a)
2413 # collect all ancestors
2414 new_ancestors = set()
2415 for a in ancestors:
2416 ps = self.parents.get(a)
2417 if ps is not None:
2418 new_ancestors.update(ps)
2419 self.parents[a] = None
2421 # no more ancestors; stop
2422 if not new_ancestors:
2423 break
2425 ancestors = new_ancestors
2427 def next(self) -> ObjectID | None:
2428 """Iterate over ancestors of heads in the target."""
2429 if self.heads:
2430 ret = self.heads.pop()
2431 try:
2432 ps = self.get_parents(ret)
2433 except KeyError:
2434 return None
2435 self.parents[ret] = ps
2436 self.heads.update([p for p in ps if p not in self.parents])
2437 return ret
2438 return None
2440 __next__ = next
2443def commit_tree_changes(
2444 object_store: BaseObjectStore,
2445 tree: ObjectID | Tree,
2446 changes: Sequence[tuple[bytes, int | None, bytes | None]],
2447) -> ObjectID:
2448 """Commit a specified set of changes to a tree structure.
2450 This will apply a set of changes on top of an existing tree, storing new
2451 objects in object_store.
2453 changes are a list of tuples with (path, mode, object_sha).
2454 Paths can be both blobs and trees. See the mode and
2455 object sha to None deletes the path.
2457 This method works especially well if there are only a small
2458 number of changes to a big tree. For a large number of changes
2459 to a large tree, use e.g. commit_tree.
2461 Args:
2462 object_store: Object store to store new objects in
2463 and retrieve old ones from.
2464 tree: Original tree root (SHA or Tree object)
2465 changes: changes to apply
2466 Returns: New tree root object
2467 """
2468 # TODO(jelmer): Save up the objects and add them using .add_objects
2469 # rather than with individual calls to .add_object.
2470 # Handle both Tree object and SHA
2471 if isinstance(tree, Tree):
2472 tree_obj: Tree = tree
2473 else:
2474 sha_obj = object_store[tree]
2475 assert isinstance(sha_obj, Tree)
2476 tree_obj = sha_obj
2477 nested_changes: dict[bytes, list[tuple[bytes, int | None, bytes | None]]] = {}
2478 for path, new_mode, new_sha in changes:
2479 try:
2480 (dirname, subpath) = path.split(b"/", 1)
2481 except ValueError:
2482 if new_sha is None:
2483 del tree_obj[path]
2484 else:
2485 assert new_mode is not None
2486 tree_obj[path] = (new_mode, new_sha)
2487 else:
2488 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha))
2489 for name, subchanges in nested_changes.items():
2490 try:
2491 orig_subtree_id: bytes | Tree = tree_obj[name][1]
2492 except KeyError:
2493 # For new directories, pass an empty Tree object
2494 orig_subtree_id = Tree()
2495 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges)
2496 subtree = object_store[subtree_id]
2497 assert isinstance(subtree, Tree)
2498 if len(subtree) == 0:
2499 del tree_obj[name]
2500 else:
2501 tree_obj[name] = (stat.S_IFDIR, subtree.id)
2502 object_store.add_object(tree_obj)
2503 return tree_obj.id
2506class OverlayObjectStore(BaseObjectStore):
2507 """Object store that can overlay multiple object stores."""
2509 def __init__(
2510 self,
2511 bases: list[BaseObjectStore],
2512 add_store: BaseObjectStore | None = None,
2513 ) -> None:
2514 """Initialize an OverlayObjectStore.
2516 Args:
2517 bases: List of base object stores to overlay
2518 add_store: Optional store to write new objects to
2519 """
2520 self.bases = bases
2521 self.add_store = add_store
2523 def add_object(self, object: ShaFile) -> None:
2524 """Add a single object to the store.
2526 Args:
2527 object: Object to add
2529 Raises:
2530 NotImplementedError: If no add_store was provided
2531 """
2532 if self.add_store is None:
2533 raise NotImplementedError(self.add_object)
2534 return self.add_store.add_object(object)
2536 def add_objects(
2537 self,
2538 objects: Sequence[tuple[ShaFile, str | None]],
2539 progress: Callable[[str], None] | None = None,
2540 ) -> Pack | None:
2541 """Add multiple objects to the store.
2543 Args:
2544 objects: Iterator of objects to add
2545 progress: Optional progress reporting callback
2547 Raises:
2548 NotImplementedError: If no add_store was provided
2549 """
2550 if self.add_store is None:
2551 raise NotImplementedError(self.add_object)
2552 return self.add_store.add_objects(objects, progress)
2554 @property
2555 def packs(self) -> list[Pack]:
2556 """Get the list of packs from all overlaid stores.
2558 Returns:
2559 Combined list of packs from all base stores
2560 """
2561 ret = []
2562 for b in self.bases:
2563 ret.extend(b.packs)
2564 return ret
2566 def __iter__(self) -> Iterator[ObjectID]:
2567 """Iterate over all object SHAs in the overlaid stores.
2569 Returns:
2570 Iterator of object SHAs (deduped across stores)
2571 """
2572 done = set()
2573 for b in self.bases:
2574 for o_id in b:
2575 if o_id not in done:
2576 yield o_id
2577 done.add(o_id)
2579 def iterobjects_subset(
2580 self, shas: Iterable[bytes], *, allow_missing: bool = False
2581 ) -> Iterator[ShaFile]:
2582 """Iterate over a subset of objects from the overlaid stores.
2584 Args:
2585 shas: Iterable of object SHAs to retrieve
2586 allow_missing: If True, skip missing objects; if False, raise KeyError
2588 Returns:
2589 Iterator of ShaFile objects
2591 Raises:
2592 KeyError: If an object is missing and allow_missing is False
2593 """
2594 todo = set(shas)
2595 found: set[bytes] = set()
2597 for b in self.bases:
2598 # Create a copy of todo for each base to avoid modifying
2599 # the set while iterating through it
2600 current_todo = todo - found
2601 for o in b.iterobjects_subset(current_todo, allow_missing=True):
2602 yield o
2603 found.add(o.id)
2605 # Check for any remaining objects not found
2606 missing = todo - found
2607 if missing and not allow_missing:
2608 raise KeyError(next(iter(missing)))
2610 def iter_unpacked_subset(
2611 self,
2612 shas: Iterable[bytes],
2613 include_comp: bool = False,
2614 allow_missing: bool = False,
2615 convert_ofs_delta: bool = True,
2616 ) -> Iterator[UnpackedObject]:
2617 """Iterate over unpacked objects from the overlaid stores.
2619 Args:
2620 shas: Iterable of object SHAs to retrieve
2621 include_comp: Whether to include compressed data
2622 allow_missing: If True, skip missing objects; if False, raise KeyError
2623 convert_ofs_delta: Whether to convert OFS_DELTA objects
2625 Returns:
2626 Iterator of unpacked objects
2628 Raises:
2629 KeyError: If an object is missing and allow_missing is False
2630 """
2631 todo = set(shas)
2632 for b in self.bases:
2633 for o in b.iter_unpacked_subset(
2634 todo,
2635 include_comp=include_comp,
2636 allow_missing=True,
2637 convert_ofs_delta=convert_ofs_delta,
2638 ):
2639 yield o
2640 todo.remove(o.sha())
2641 if todo and not allow_missing:
2642 raise KeyError(next(iter(todo)))
2644 def get_raw(self, sha_id: ObjectID) -> tuple[int, bytes]:
2645 """Get the raw object data from the overlaid stores.
2647 Args:
2648 sha_id: SHA of the object
2650 Returns:
2651 Tuple of (type_num, raw_data)
2653 Raises:
2654 KeyError: If object not found in any base store
2655 """
2656 for b in self.bases:
2657 try:
2658 return b.get_raw(sha_id)
2659 except KeyError:
2660 pass
2661 raise KeyError(sha_id)
2663 def contains_packed(self, sha: bytes) -> bool:
2664 """Check if an object is packed in any base store.
2666 Args:
2667 sha: SHA of the object
2669 Returns:
2670 True if object is packed in any base store
2671 """
2672 for b in self.bases:
2673 if b.contains_packed(sha):
2674 return True
2675 return False
2677 def contains_loose(self, sha: bytes) -> bool:
2678 """Check if an object is loose in any base store.
2680 Args:
2681 sha: SHA of the object
2683 Returns:
2684 True if object is loose in any base store
2685 """
2686 for b in self.bases:
2687 if b.contains_loose(sha):
2688 return True
2689 return False
2692def read_packs_file(f: BinaryIO) -> Iterator[str]:
2693 """Yield the packs listed in a packs file."""
2694 for line in f.read().splitlines():
2695 if not line:
2696 continue
2697 (kind, name) = line.split(b" ", 1)
2698 if kind != b"P":
2699 continue
2700 yield os.fsdecode(name)
2703class BucketBasedObjectStore(PackBasedObjectStore):
2704 """Object store implementation that uses a bucket store like S3 as backend."""
2706 def _iter_loose_objects(self) -> Iterator[bytes]:
2707 """Iterate over the SHAs of all loose objects."""
2708 return iter([])
2710 def _get_loose_object(self, sha: bytes) -> None:
2711 return None
2713 def delete_loose_object(self, sha: bytes) -> None:
2714 """Delete a loose object (no-op for bucket stores).
2716 Bucket-based stores don't have loose objects, so this is a no-op.
2718 Args:
2719 sha: SHA of the object to delete
2720 """
2721 # Doesn't exist..
2723 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int:
2724 """Pack loose objects. Returns number of objects packed.
2726 BucketBasedObjectStore doesn't support loose objects, so this is a no-op.
2728 Args:
2729 progress: Optional progress reporting callback (ignored)
2730 """
2731 return 0
2733 def _remove_pack_by_name(self, name: str) -> None:
2734 """Remove a pack by name. Subclasses should implement this."""
2735 raise NotImplementedError(self._remove_pack_by_name)
2737 def _iter_pack_names(self) -> Iterator[str]:
2738 raise NotImplementedError(self._iter_pack_names)
2740 def _get_pack(self, name: str) -> Pack:
2741 raise NotImplementedError(self._get_pack)
2743 def _update_pack_cache(self) -> list[Pack]:
2744 pack_files = set(self._iter_pack_names())
2746 # Open newly appeared pack files
2747 new_packs = []
2748 for f in pack_files:
2749 if f not in self._pack_cache:
2750 pack = self._get_pack(f)
2751 new_packs.append(pack)
2752 self._pack_cache[f] = pack
2753 # Remove disappeared pack files
2754 for f in set(self._pack_cache) - pack_files:
2755 self._pack_cache.pop(f).close()
2756 return new_packs
2758 def _upload_pack(
2759 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO
2760 ) -> None:
2761 raise NotImplementedError
2763 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:
2764 """Add a new pack to this object store.
2766 Returns: Fileobject to write to, a commit function to
2767 call when the pack is finished and an abort
2768 function.
2769 """
2770 import tempfile
2772 pf = tempfile.SpooledTemporaryFile(
2773 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
2774 )
2776 def commit() -> Pack | None:
2777 if pf.tell() == 0:
2778 pf.close()
2779 return None
2781 pf.seek(0)
2783 p = PackData(pf.name, pf)
2784 entries = p.sorted_entries()
2785 basename = iter_sha1(entry[0] for entry in entries).decode("ascii")
2786 idxf = tempfile.SpooledTemporaryFile(
2787 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
2788 )
2789 checksum = p.get_stored_checksum()
2790 write_pack_index(idxf, entries, checksum, version=self.pack_index_version)
2791 idxf.seek(0)
2792 idx = load_pack_index_file(basename + ".idx", idxf)
2793 for pack in self.packs:
2794 if pack.get_stored_checksum() == p.get_stored_checksum():
2795 p.close()
2796 idx.close()
2797 pf.close()
2798 idxf.close()
2799 return pack
2800 pf.seek(0)
2801 idxf.seek(0)
2802 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type]
2803 final_pack = Pack.from_objects(p, idx)
2804 self._add_cached_pack(basename, final_pack)
2805 pf.close()
2806 idxf.close()
2807 return final_pack
2809 return pf, commit, pf.close # type: ignore[return-value]
2812def _collect_ancestors(
2813 store: ObjectContainer,
2814 heads: Iterable[ObjectID],
2815 common: frozenset[ObjectID] = frozenset(),
2816 shallow: frozenset[ObjectID] = frozenset(),
2817 get_parents: Callable[[Commit], list[bytes]] = lambda commit: commit.parents,
2818) -> tuple[set[ObjectID], set[ObjectID]]:
2819 """Collect all ancestors of heads up to (excluding) those in common.
2821 Args:
2822 store: Object store to get commits from
2823 heads: commits to start from
2824 common: commits to end at, or empty set to walk repository
2825 completely
2826 shallow: Set of shallow commits
2827 get_parents: Optional function for getting the parents of a
2828 commit.
2829 Returns: a tuple (A, B) where A - all commits reachable
2830 from heads but not present in common, B - common (shared) elements
2831 that are directly reachable from heads
2832 """
2833 bases = set()
2834 commits = set()
2835 queue: list[ObjectID] = []
2836 queue.extend(heads)
2838 # Try to use commit graph if available
2839 commit_graph = store.get_commit_graph()
2841 while queue:
2842 e = queue.pop(0)
2843 if e in common:
2844 bases.add(e)
2845 elif e not in commits:
2846 commits.add(e)
2847 if e in shallow:
2848 continue
2850 # Try to use commit graph for parent lookup
2851 parents = None
2852 if commit_graph:
2853 parents = commit_graph.get_parents(e)
2855 if parents is None:
2856 # Fall back to loading the object
2857 cmt = store[e]
2858 assert isinstance(cmt, Commit)
2859 parents = get_parents(cmt)
2861 queue.extend(parents)
2862 return (commits, bases)
2865def iter_tree_contents(
2866 store: ObjectContainer, tree_id: ObjectID | None, *, include_trees: bool = False
2867) -> Iterator[TreeEntry]:
2868 """Iterate the contents of a tree and all subtrees.
2870 Iteration is depth-first pre-order, as in e.g. os.walk.
2872 Args:
2873 store: Object store to get trees from
2874 tree_id: SHA1 of the tree.
2875 include_trees: If True, include tree objects in the iteration.
2877 Yields: TreeEntry namedtuples for all the objects in a tree.
2878 """
2879 if tree_id is None:
2880 return
2881 # This could be fairly easily generalized to >2 trees if we find a use
2882 # case.
2883 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]
2884 while todo:
2885 entry = todo.pop()
2886 assert entry.mode is not None
2887 if stat.S_ISDIR(entry.mode):
2888 extra = []
2889 assert entry.sha is not None
2890 tree = store[entry.sha]
2891 assert isinstance(tree, Tree)
2892 for subentry in tree.iteritems(name_order=True):
2893 assert entry.path is not None
2894 extra.append(subentry.in_path(entry.path))
2895 todo.extend(reversed(extra))
2896 if not stat.S_ISDIR(entry.mode) or include_trees:
2897 yield entry
2900def iter_commit_contents(
2901 store: ObjectContainer,
2902 commit: Commit | bytes,
2903 *,
2904 include: Sequence[str | bytes | Path] | None = None,
2905) -> Iterator[TreeEntry]:
2906 """Iterate the contents of the repository at the specified commit.
2908 This is a wrapper around iter_tree_contents() and
2909 tree_lookup_path() to simplify the common task of getting the
2910 contest of a repo at a particular commit. See also
2911 dulwich.index.build_file_from_blob() for writing individual files
2912 to disk.
2914 Args:
2915 store: Object store to get trees from
2916 commit: Commit object, or SHA1 of a commit
2917 include: if provided, only the entries whose paths are in the
2918 list, or whose parent tree is in the list, will be
2919 included. Note that duplicate or overlapping paths
2920 (e.g. ["foo", "foo/bar"]) may result in duplicate entries
2922 Yields: TreeEntry namedtuples for all matching files in a commit.
2923 """
2924 sha = commit.id if isinstance(commit, Commit) else commit
2925 if not isinstance(obj := store[sha], Commit):
2926 raise TypeError(
2927 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}"
2928 )
2929 commit = obj
2930 encoding = commit.encoding or "utf-8"
2931 include_bytes: list[bytes] = (
2932 [
2933 path if isinstance(path, bytes) else str(path).encode(encoding)
2934 for path in include
2935 ]
2936 if include is not None
2937 else [b""]
2938 )
2940 for path in include_bytes:
2941 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path)
2942 # Iterate all contained files if path points to a dir, otherwise just get that
2943 # single file
2944 if isinstance(store[obj_id], Tree):
2945 for entry in iter_tree_contents(store, obj_id):
2946 yield entry.in_path(path)
2947 else:
2948 yield TreeEntry(path, mode, obj_id)
2951def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]:
2952 """Peel all tags from a SHA.
2954 Args:
2955 store: Object store to get objects from
2956 sha: The object SHA to peel.
2957 Returns: The fully-peeled SHA1 of a tag object, after peeling all
2958 intermediate tags; if the original ref does not point to a tag,
2959 this will equal the original SHA1.
2960 """
2961 unpeeled = obj = store[sha]
2962 obj_class = object_class(obj.type_name)
2963 while obj_class is Tag:
2964 assert isinstance(obj, Tag)
2965 obj_class, sha = obj.object
2966 obj = store[sha]
2967 return unpeeled, obj