1# object_store.py -- Object store for git objects
2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
3# and others
4#
5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
7# General Public License as published by the Free Software Foundation; version 2.0
8# or (at your option) any later version. You can redistribute it and/or
9# modify it under the terms of either of these two licenses.
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# You should have received a copy of the licenses; if not, see
18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
20# License, Version 2.0.
21#
22
23
24"""Git object store interfaces and implementation."""
25
26import binascii
27import os
28import stat
29import sys
30import time
31import warnings
32from collections.abc import Iterable, Iterator, Sequence
33from contextlib import suppress
34from io import BytesIO
35from typing import (
36 Callable,
37 Optional,
38 Protocol,
39 Union,
40 cast,
41)
42
43from .errors import NotTreeError
44from .file import GitFile, _GitFile
45from .objects import (
46 S_ISGITLINK,
47 ZERO_SHA,
48 Blob,
49 Commit,
50 ObjectID,
51 ShaFile,
52 Tag,
53 Tree,
54 TreeEntry,
55 hex_to_filename,
56 hex_to_sha,
57 object_class,
58 sha_to_hex,
59 valid_hexsha,
60)
61from .pack import (
62 PACK_SPOOL_FILE_MAX_SIZE,
63 ObjectContainer,
64 Pack,
65 PackData,
66 PackedObjectContainer,
67 PackFileDisappeared,
68 PackHint,
69 PackIndexer,
70 PackInflater,
71 PackStreamCopier,
72 UnpackedObject,
73 extend_pack,
74 full_unpacked_object,
75 generate_unpacked_objects,
76 iter_sha1,
77 load_pack_index_file,
78 pack_objects_to_data,
79 write_pack_data,
80 write_pack_index,
81)
82from .protocol import DEPTH_INFINITE
83from .refs import PEELED_TAG_SUFFIX, Ref
84
85INFODIR = "info"
86PACKDIR = "pack"
87
88# use permissions consistent with Git; just readable by everyone
89# TODO: should packs also be non-writable on Windows? if so, that
90# would requite some rather significant adjustments to the test suite
91PACK_MODE = 0o444 if sys.platform != "win32" else 0o644
92
93# Grace period for cleaning up temporary pack files (in seconds)
94# Matches git's default of 2 weeks
95DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks
96
97
98def find_shallow(store, heads, depth):
99 """Find shallow commits according to a given depth.
100
101 Args:
102 store: An ObjectStore for looking up objects.
103 heads: Iterable of head SHAs to start walking from.
104 depth: The depth of ancestors to include. A depth of one includes
105 only the heads themselves.
106 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be
107 considered shallow and unshallow according to the arguments. Note that
108 these sets may overlap if a commit is reachable along multiple paths.
109 """
110 parents = {}
111 commit_graph = store.get_commit_graph()
112
113 def get_parents(sha):
114 result = parents.get(sha, None)
115 if not result:
116 # Try to use commit graph first if available
117 if commit_graph:
118 graph_parents = commit_graph.get_parents(sha)
119 if graph_parents is not None:
120 result = graph_parents
121 parents[sha] = result
122 return result
123 # Fall back to loading the object
124 result = store[sha].parents
125 parents[sha] = result
126 return result
127
128 todo = [] # stack of (sha, depth)
129 for head_sha in heads:
130 obj = store[head_sha]
131 # Peel tags if necessary
132 while isinstance(obj, Tag):
133 _, sha = obj.object
134 obj = store[sha]
135 if isinstance(obj, Commit):
136 todo.append((obj.id, 1))
137
138 not_shallow = set()
139 shallow = set()
140 while todo:
141 sha, cur_depth = todo.pop()
142 if cur_depth < depth:
143 not_shallow.add(sha)
144 new_depth = cur_depth + 1
145 todo.extend((p, new_depth) for p in get_parents(sha))
146 else:
147 shallow.add(sha)
148
149 return shallow, not_shallow
150
151
152def get_depth(
153 store,
154 head,
155 get_parents=lambda commit: commit.parents,
156 max_depth=None,
157):
158 """Return the current available depth for the given head.
159 For commits with multiple parents, the largest possible depth will be
160 returned.
161
162 Args:
163 head: commit to start from
164 get_parents: optional function for getting the parents of a commit
165 max_depth: maximum depth to search
166 """
167 if head not in store:
168 return 0
169 current_depth = 1
170 queue = [(head, current_depth)]
171 commit_graph = store.get_commit_graph()
172
173 while queue and (max_depth is None or current_depth < max_depth):
174 e, depth = queue.pop(0)
175 current_depth = max(current_depth, depth)
176
177 # Try to use commit graph for parent lookup if available
178 parents = None
179 if commit_graph:
180 parents = commit_graph.get_parents(e)
181
182 if parents is None:
183 # Fall back to loading the object
184 cmt = store[e]
185 if isinstance(cmt, Tag):
186 _cls, sha = cmt.object
187 cmt = store[sha]
188 parents = get_parents(cmt)
189
190 queue.extend((parent, depth + 1) for parent in parents if parent in store)
191 return current_depth
192
193
194class PackContainer(Protocol):
195 """Protocol for containers that can accept pack files."""
196
197 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:
198 """Add a new pack."""
199
200
201class BaseObjectStore:
202 """Object store interface."""
203
204 def determine_wants_all(
205 self, refs: dict[Ref, ObjectID], depth: Optional[int] = None
206 ) -> list[ObjectID]:
207 """Determine all objects that are wanted by the client.
208
209 Args:
210 refs: Dictionary mapping ref names to object IDs
211 depth: Shallow fetch depth (None for full fetch)
212
213 Returns:
214 List of object IDs that are wanted
215 """
216
217 def _want_deepen(sha):
218 if not depth:
219 return False
220 if depth == DEPTH_INFINITE:
221 return True
222 return depth > self._get_depth(sha)
223
224 return [
225 sha
226 for (ref, sha) in refs.items()
227 if (sha not in self or _want_deepen(sha))
228 and not ref.endswith(PEELED_TAG_SUFFIX)
229 and not sha == ZERO_SHA
230 ]
231
232 def contains_loose(self, sha) -> bool:
233 """Check if a particular object is present by SHA1 and is loose."""
234 raise NotImplementedError(self.contains_loose)
235
236 def __contains__(self, sha1: bytes) -> bool:
237 """Check if a particular object is present by SHA1.
238
239 This method makes no distinction between loose and packed objects.
240 """
241 return self.contains_loose(sha1)
242
243 @property
244 def packs(self):
245 """Iterable of pack objects."""
246 raise NotImplementedError
247
248 def get_raw(self, name) -> tuple[int, bytes]:
249 """Obtain the raw text for an object.
250
251 Args:
252 name: sha for the object.
253 Returns: tuple with numeric type and object contents.
254 """
255 raise NotImplementedError(self.get_raw)
256
257 def __getitem__(self, sha1: ObjectID) -> ShaFile:
258 """Obtain an object by SHA1."""
259 type_num, uncomp = self.get_raw(sha1)
260 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1)
261
262 def __iter__(self):
263 """Iterate over the SHAs that are present in this store."""
264 raise NotImplementedError(self.__iter__)
265
266 def add_object(self, obj) -> None:
267 """Add a single object to this object store."""
268 raise NotImplementedError(self.add_object)
269
270 def add_objects(self, objects, progress=None) -> None:
271 """Add a set of objects to this object store.
272
273 Args:
274 objects: Iterable over a list of (object, path) tuples
275 """
276 raise NotImplementedError(self.add_objects)
277
278 def tree_changes(
279 self,
280 source,
281 target,
282 want_unchanged=False,
283 include_trees=False,
284 change_type_same=False,
285 rename_detector=None,
286 paths=None,
287 ):
288 """Find the differences between the contents of two trees.
289
290 Args:
291 source: SHA1 of the source tree
292 target: SHA1 of the target tree
293 want_unchanged: Whether unchanged files should be reported
294 include_trees: Whether to include trees
295 change_type_same: Whether to report files changing
296 type in the same entry.
297 rename_detector: RenameDetector object for detecting renames.
298 paths: Optional list of paths to filter to (as bytes).
299 Returns: Iterator over tuples with
300 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
301 """
302 from .diff_tree import tree_changes
303
304 for change in tree_changes(
305 self,
306 source,
307 target,
308 want_unchanged=want_unchanged,
309 include_trees=include_trees,
310 change_type_same=change_type_same,
311 rename_detector=rename_detector,
312 paths=paths,
313 ):
314 yield (
315 (change.old.path, change.new.path),
316 (change.old.mode, change.new.mode),
317 (change.old.sha, change.new.sha),
318 )
319
320 def iter_tree_contents(self, tree_id, include_trees=False):
321 """Iterate the contents of a tree and all subtrees.
322
323 Iteration is depth-first pre-order, as in e.g. os.walk.
324
325 Args:
326 tree_id: SHA1 of the tree.
327 include_trees: If True, include tree objects in the iteration.
328 Returns: Iterator over TreeEntry namedtuples for all the objects in a
329 tree.
330 """
331 warnings.warn(
332 "Please use dulwich.object_store.iter_tree_contents",
333 DeprecationWarning,
334 stacklevel=2,
335 )
336 return iter_tree_contents(self, tree_id, include_trees=include_trees)
337
338 def iterobjects_subset(
339 self, shas: Iterable[bytes], *, allow_missing: bool = False
340 ) -> Iterator[ShaFile]:
341 """Iterate over a subset of objects in the store.
342
343 Args:
344 shas: Iterable of object SHAs to retrieve
345 allow_missing: If True, skip missing objects; if False, raise KeyError
346
347 Returns:
348 Iterator of ShaFile objects
349
350 Raises:
351 KeyError: If an object is missing and allow_missing is False
352 """
353 for sha in shas:
354 try:
355 yield self[sha]
356 except KeyError:
357 if not allow_missing:
358 raise
359
360 def find_missing_objects(
361 self,
362 haves,
363 wants,
364 shallow=None,
365 progress=None,
366 get_tagged=None,
367 get_parents=lambda commit: commit.parents,
368 ):
369 """Find the missing objects required for a set of revisions.
370
371 Args:
372 haves: Iterable over SHAs already in common.
373 wants: Iterable over SHAs of objects to fetch.
374 shallow: Set of shallow commit SHA1s to skip
375 progress: Simple progress function that will be called with
376 updated progress strings.
377 get_tagged: Function that returns a dict of pointed-to sha ->
378 tag sha for including tags.
379 get_parents: Optional function for getting the parents of a
380 commit.
381 Returns: Iterator over (sha, path) pairs.
382 """
383 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning)
384 finder = MissingObjectFinder(
385 self,
386 haves=haves,
387 wants=wants,
388 shallow=shallow,
389 progress=progress,
390 get_tagged=get_tagged,
391 get_parents=get_parents,
392 )
393 return iter(finder)
394
395 def find_common_revisions(self, graphwalker):
396 """Find which revisions this store has in common using graphwalker.
397
398 Args:
399 graphwalker: A graphwalker object.
400 Returns: List of SHAs that are in common
401 """
402 haves = []
403 sha = next(graphwalker)
404 while sha:
405 if sha in self:
406 haves.append(sha)
407 graphwalker.ack(sha)
408 sha = next(graphwalker)
409 return haves
410
411 def generate_pack_data(
412 self, have, want, shallow=None, progress=None, ofs_delta=True
413 ) -> tuple[int, Iterator[UnpackedObject]]:
414 """Generate pack data objects for a set of wants/haves.
415
416 Args:
417 have: List of SHA1s of objects that should not be sent
418 want: List of SHA1s of objects that should be sent
419 shallow: Set of shallow commit SHA1s to skip
420 ofs_delta: Whether OFS deltas can be included
421 progress: Optional progress reporting method
422 """
423 # Note that the pack-specific implementation below is more efficient,
424 # as it reuses deltas
425 missing_objects = MissingObjectFinder(
426 self, haves=have, wants=want, shallow=shallow, progress=progress
427 )
428 object_ids = list(missing_objects)
429 return pack_objects_to_data(
430 [(self[oid], path) for oid, path in object_ids],
431 ofs_delta=ofs_delta,
432 progress=progress,
433 )
434
435 def peel_sha(self, sha):
436 """Peel all tags from a SHA.
437
438 Args:
439 sha: The object SHA to peel.
440 Returns: The fully-peeled SHA1 of a tag object, after peeling all
441 intermediate tags; if the original ref does not point to a tag,
442 this will equal the original SHA1.
443 """
444 warnings.warn(
445 "Please use dulwich.object_store.peel_sha()",
446 DeprecationWarning,
447 stacklevel=2,
448 )
449 return peel_sha(self, sha)[1]
450
451 def _get_depth(
452 self,
453 head,
454 get_parents=lambda commit: commit.parents,
455 max_depth=None,
456 ):
457 """Return the current available depth for the given head.
458 For commits with multiple parents, the largest possible depth will be
459 returned.
460
461 Args:
462 head: commit to start from
463 get_parents: optional function for getting the parents of a commit
464 max_depth: maximum depth to search
465 """
466 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth)
467
468 def close(self) -> None:
469 """Close any files opened by this object store."""
470 # Default implementation is a NO-OP
471
472 def prune(self, grace_period: Optional[int] = None) -> None:
473 """Prune/clean up this object store.
474
475 This includes removing orphaned temporary files and other
476 housekeeping tasks. Default implementation is a NO-OP.
477
478 Args:
479 grace_period: Grace period in seconds for removing temporary files.
480 If None, uses the default grace period.
481 """
482 # Default implementation is a NO-OP
483
484 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:
485 """Iterate over all SHA1s that start with a given prefix.
486
487 The default implementation is a naive iteration over all objects.
488 However, subclasses may override this method with more efficient
489 implementations.
490 """
491 for sha in self:
492 if sha.startswith(prefix):
493 yield sha
494
495 def get_commit_graph(self):
496 """Get the commit graph for this object store.
497
498 Returns:
499 CommitGraph object if available, None otherwise
500 """
501 return None
502
503 def write_commit_graph(self, refs=None, reachable=True) -> None:
504 """Write a commit graph file for this object store.
505
506 Args:
507 refs: List of refs to include. If None, includes all refs from object store.
508 reachable: If True, includes all commits reachable from refs.
509 If False, only includes the direct ref targets.
510
511 Note:
512 Default implementation does nothing. Subclasses should override
513 this method to provide commit graph writing functionality.
514 """
515 raise NotImplementedError(self.write_commit_graph)
516
517 def get_object_mtime(self, sha):
518 """Get the modification time of an object.
519
520 Args:
521 sha: SHA1 of the object
522
523 Returns:
524 Modification time as seconds since epoch
525
526 Raises:
527 KeyError: if the object is not found
528 """
529 # Default implementation raises KeyError
530 # Subclasses should override to provide actual mtime
531 raise KeyError(sha)
532
533
534class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer):
535 """Object store that uses pack files for storage.
536
537 This class provides a base implementation for object stores that use
538 Git pack files as their primary storage mechanism. It handles caching
539 of open pack files and provides configuration for pack file operations.
540 """
541
542 def __init__(
543 self,
544 pack_compression_level=-1,
545 pack_index_version=None,
546 pack_delta_window_size=None,
547 pack_window_memory=None,
548 pack_delta_cache_size=None,
549 pack_depth=None,
550 pack_threads=None,
551 pack_big_file_threshold=None,
552 ) -> None:
553 """Initialize a PackBasedObjectStore.
554
555 Args:
556 pack_compression_level: Compression level for pack files (-1 to 9)
557 pack_index_version: Pack index version to use
558 pack_delta_window_size: Window size for delta compression
559 pack_window_memory: Maximum memory to use for delta window
560 pack_delta_cache_size: Cache size for delta operations
561 pack_depth: Maximum depth for pack deltas
562 pack_threads: Number of threads to use for packing
563 pack_big_file_threshold: Threshold for treating files as "big"
564 """
565 self._pack_cache: dict[str, Pack] = {}
566 self.pack_compression_level = pack_compression_level
567 self.pack_index_version = pack_index_version
568 self.pack_delta_window_size = pack_delta_window_size
569 self.pack_window_memory = pack_window_memory
570 self.pack_delta_cache_size = pack_delta_cache_size
571 self.pack_depth = pack_depth
572 self.pack_threads = pack_threads
573 self.pack_big_file_threshold = pack_big_file_threshold
574
575 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:
576 """Add a new pack to this object store."""
577 raise NotImplementedError(self.add_pack)
578
579 def add_pack_data(
580 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None
581 ) -> None:
582 """Add pack data to this object store.
583
584 Args:
585 count: Number of items to add
586 """
587 if count == 0:
588 # Don't bother writing an empty pack file
589 return
590 f, commit, abort = self.add_pack()
591 try:
592 write_pack_data(
593 f.write,
594 unpacked_objects,
595 num_records=count,
596 progress=progress,
597 compression_level=self.pack_compression_level,
598 )
599 except BaseException:
600 abort()
601 raise
602 else:
603 return commit()
604
605 @property
606 def alternates(self):
607 """Get the list of alternate object stores.
608
609 Returns:
610 List of alternate BaseObjectStore instances
611 """
612 return []
613
614 def contains_packed(self, sha) -> bool:
615 """Check if a particular object is present by SHA1 and is packed.
616
617 This does not check alternates.
618 """
619 for pack in self.packs:
620 try:
621 if sha in pack:
622 return True
623 except PackFileDisappeared:
624 pass
625 return False
626
627 def __contains__(self, sha) -> bool:
628 """Check if a particular object is present by SHA1.
629
630 This method makes no distinction between loose and packed objects.
631 """
632 if self.contains_packed(sha) or self.contains_loose(sha):
633 return True
634 for alternate in self.alternates:
635 if sha in alternate:
636 return True
637 return False
638
639 def _add_cached_pack(self, base_name, pack) -> None:
640 """Add a newly appeared pack to the cache by path."""
641 prev_pack = self._pack_cache.get(base_name)
642 if prev_pack is not pack:
643 self._pack_cache[base_name] = pack
644 if prev_pack:
645 prev_pack.close()
646
647 def generate_pack_data(
648 self, have, want, shallow=None, progress=None, ofs_delta=True
649 ) -> tuple[int, Iterator[UnpackedObject]]:
650 """Generate pack data objects for a set of wants/haves.
651
652 Args:
653 have: List of SHA1s of objects that should not be sent
654 want: List of SHA1s of objects that should be sent
655 shallow: Set of shallow commit SHA1s to skip
656 ofs_delta: Whether OFS deltas can be included
657 progress: Optional progress reporting method
658 """
659 missing_objects = MissingObjectFinder(
660 self, haves=have, wants=want, shallow=shallow, progress=progress
661 )
662 remote_has = missing_objects.get_remote_has()
663 object_ids = list(missing_objects)
664 return len(object_ids), generate_unpacked_objects(
665 cast(PackedObjectContainer, self),
666 object_ids,
667 progress=progress,
668 ofs_delta=ofs_delta,
669 other_haves=remote_has,
670 )
671
672 def _clear_cached_packs(self) -> None:
673 pack_cache = self._pack_cache
674 self._pack_cache = {}
675 while pack_cache:
676 (name, pack) = pack_cache.popitem()
677 pack.close()
678
679 def _iter_cached_packs(self):
680 return self._pack_cache.values()
681
682 def _update_pack_cache(self) -> list[Pack]:
683 raise NotImplementedError(self._update_pack_cache)
684
685 def close(self) -> None:
686 """Close the object store and release resources.
687
688 This method closes all cached pack files and frees associated resources.
689 """
690 self._clear_cached_packs()
691
692 @property
693 def packs(self):
694 """List with pack objects."""
695 return list(self._iter_cached_packs()) + list(self._update_pack_cache())
696
697 def count_pack_files(self) -> int:
698 """Count the number of pack files.
699
700 Returns:
701 Number of pack files (excluding those with .keep files)
702 """
703 count = 0
704 for pack in self.packs:
705 # Check if there's a .keep file for this pack
706 keep_path = pack._basename + ".keep"
707 if not os.path.exists(keep_path):
708 count += 1
709 return count
710
711 def _iter_alternate_objects(self):
712 """Iterate over the SHAs of all the objects in alternate stores."""
713 for alternate in self.alternates:
714 yield from alternate
715
716 def _iter_loose_objects(self):
717 """Iterate over the SHAs of all loose objects."""
718 raise NotImplementedError(self._iter_loose_objects)
719
720 def _get_loose_object(self, sha) -> Optional[ShaFile]:
721 raise NotImplementedError(self._get_loose_object)
722
723 def delete_loose_object(self, sha) -> None:
724 """Delete a loose object.
725
726 This method only handles loose objects. For packed objects,
727 use repack(exclude=...) to exclude them during repacking.
728 """
729 raise NotImplementedError(self.delete_loose_object)
730
731 def _remove_pack(self, name) -> None:
732 raise NotImplementedError(self._remove_pack)
733
734 def pack_loose_objects(self):
735 """Pack loose objects.
736
737 Returns: Number of objects packed
738 """
739 objects = set()
740 for sha in self._iter_loose_objects():
741 objects.add((self._get_loose_object(sha), None))
742 self.add_objects(list(objects))
743 for obj, path in objects:
744 self.delete_loose_object(obj.id)
745 return len(objects)
746
747 def repack(self, exclude=None):
748 """Repack the packs in this repository.
749
750 Note that this implementation is fairly naive and currently keeps all
751 objects in memory while it repacks.
752
753 Args:
754 exclude: Optional set of object SHAs to exclude from repacking
755 """
756 if exclude is None:
757 exclude = set()
758
759 loose_objects = set()
760 excluded_loose_objects = set()
761 for sha in self._iter_loose_objects():
762 if sha not in exclude:
763 loose_objects.add(self._get_loose_object(sha))
764 else:
765 excluded_loose_objects.add(sha)
766
767 objects = {(obj, None) for obj in loose_objects}
768 old_packs = {p.name(): p for p in self.packs}
769 for name, pack in old_packs.items():
770 objects.update(
771 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude
772 )
773
774 # Only create a new pack if there are objects to pack
775 if objects:
776 # The name of the consolidated pack might match the name of a
777 # pre-existing pack. Take care not to remove the newly created
778 # consolidated pack.
779 consolidated = self.add_objects(objects)
780 old_packs.pop(consolidated.name(), None)
781
782 # Delete loose objects that were packed
783 for obj in loose_objects:
784 self.delete_loose_object(obj.id)
785 # Delete excluded loose objects
786 for sha in excluded_loose_objects:
787 self.delete_loose_object(sha)
788 for name, pack in old_packs.items():
789 self._remove_pack(pack)
790 self._update_pack_cache()
791 return len(objects)
792
793 def __iter__(self):
794 """Iterate over the SHAs that are present in this store."""
795 self._update_pack_cache()
796 for pack in self._iter_cached_packs():
797 try:
798 yield from pack
799 except PackFileDisappeared:
800 pass
801 yield from self._iter_loose_objects()
802 yield from self._iter_alternate_objects()
803
804 def contains_loose(self, sha):
805 """Check if a particular object is present by SHA1 and is loose.
806
807 This does not check alternates.
808 """
809 return self._get_loose_object(sha) is not None
810
811 def get_raw(self, name):
812 """Obtain the raw fulltext for an object.
813
814 Args:
815 name: sha for the object.
816 Returns: tuple with numeric type and object contents.
817 """
818 if name == ZERO_SHA:
819 raise KeyError(name)
820 if len(name) == 40:
821 sha = hex_to_sha(name)
822 hexsha = name
823 elif len(name) == 20:
824 sha = name
825 hexsha = None
826 else:
827 raise AssertionError(f"Invalid object name {name!r}")
828 for pack in self._iter_cached_packs():
829 try:
830 return pack.get_raw(sha)
831 except (KeyError, PackFileDisappeared):
832 pass
833 if hexsha is None:
834 hexsha = sha_to_hex(name)
835 ret = self._get_loose_object(hexsha)
836 if ret is not None:
837 return ret.type_num, ret.as_raw_string()
838 # Maybe something else has added a pack with the object
839 # in the mean time?
840 for pack in self._update_pack_cache():
841 try:
842 return pack.get_raw(sha)
843 except KeyError:
844 pass
845 for alternate in self.alternates:
846 try:
847 return alternate.get_raw(hexsha)
848 except KeyError:
849 pass
850 raise KeyError(hexsha)
851
852 def iter_unpacked_subset(
853 self,
854 shas: set[bytes],
855 include_comp: bool = False,
856 allow_missing: bool = False,
857 convert_ofs_delta: bool = True,
858 ) -> Iterator[UnpackedObject]:
859 """Iterate over a subset of objects, yielding UnpackedObject instances.
860
861 Args:
862 shas: Set of object SHAs to retrieve
863 include_comp: Whether to include compressed data
864 allow_missing: If True, skip missing objects; if False, raise KeyError
865 convert_ofs_delta: Whether to convert OFS_DELTA objects
866
867 Returns:
868 Iterator of UnpackedObject instances
869
870 Raises:
871 KeyError: If an object is missing and allow_missing is False
872 """
873 todo: set[bytes] = set(shas)
874 for p in self._iter_cached_packs():
875 for unpacked in p.iter_unpacked_subset(
876 todo,
877 include_comp=include_comp,
878 allow_missing=True,
879 convert_ofs_delta=convert_ofs_delta,
880 ):
881 yield unpacked
882 hexsha = sha_to_hex(unpacked.sha())
883 todo.remove(hexsha)
884 # Maybe something else has added a pack with the object
885 # in the mean time?
886 for p in self._update_pack_cache():
887 for unpacked in p.iter_unpacked_subset(
888 todo,
889 include_comp=include_comp,
890 allow_missing=True,
891 convert_ofs_delta=convert_ofs_delta,
892 ):
893 yield unpacked
894 hexsha = sha_to_hex(unpacked.sha())
895 todo.remove(hexsha)
896 for alternate in self.alternates:
897 for unpacked in alternate.iter_unpacked_subset(
898 todo,
899 include_comp=include_comp,
900 allow_missing=True,
901 convert_ofs_delta=convert_ofs_delta,
902 ):
903 yield unpacked
904 hexsha = sha_to_hex(unpacked.sha())
905 todo.remove(hexsha)
906
907 def iterobjects_subset(
908 self, shas: Iterable[bytes], *, allow_missing: bool = False
909 ) -> Iterator[ShaFile]:
910 """Iterate over a subset of objects in the store.
911
912 This method searches for objects in pack files, alternates, and loose storage.
913
914 Args:
915 shas: Iterable of object SHAs to retrieve
916 allow_missing: If True, skip missing objects; if False, raise KeyError
917
918 Returns:
919 Iterator of ShaFile objects
920
921 Raises:
922 KeyError: If an object is missing and allow_missing is False
923 """
924 todo: set[bytes] = set(shas)
925 for p in self._iter_cached_packs():
926 for o in p.iterobjects_subset(todo, allow_missing=True):
927 yield o
928 todo.remove(o.id)
929 # Maybe something else has added a pack with the object
930 # in the mean time?
931 for p in self._update_pack_cache():
932 for o in p.iterobjects_subset(todo, allow_missing=True):
933 yield o
934 todo.remove(o.id)
935 for alternate in self.alternates:
936 for o in alternate.iterobjects_subset(todo, allow_missing=True):
937 yield o
938 todo.remove(o.id)
939 for oid in todo:
940 o = self._get_loose_object(oid)
941 if o is not None:
942 yield o
943 elif not allow_missing:
944 raise KeyError(oid)
945
946 def get_unpacked_object(
947 self, sha1: bytes, *, include_comp: bool = False
948 ) -> UnpackedObject:
949 """Obtain the unpacked object.
950
951 Args:
952 sha1: sha for the object.
953 """
954 if sha1 == ZERO_SHA:
955 raise KeyError(sha1)
956 if len(sha1) == 40:
957 sha = hex_to_sha(sha1)
958 hexsha = sha1
959 elif len(sha1) == 20:
960 sha = sha1
961 hexsha = None
962 else:
963 raise AssertionError(f"Invalid object sha1 {sha1!r}")
964 for pack in self._iter_cached_packs():
965 try:
966 return pack.get_unpacked_object(sha, include_comp=include_comp)
967 except (KeyError, PackFileDisappeared):
968 pass
969 if hexsha is None:
970 hexsha = sha_to_hex(sha1)
971 # Maybe something else has added a pack with the object
972 # in the mean time?
973 for pack in self._update_pack_cache():
974 try:
975 return pack.get_unpacked_object(sha, include_comp=include_comp)
976 except KeyError:
977 pass
978 for alternate in self.alternates:
979 try:
980 return alternate.get_unpacked_object(hexsha, include_comp=include_comp)
981 except KeyError:
982 pass
983 raise KeyError(hexsha)
984
985 def add_objects(
986 self,
987 objects: Sequence[tuple[ShaFile, Optional[str]]],
988 progress: Optional[Callable[[str], None]] = None,
989 ) -> None:
990 """Add a set of objects to this object store.
991
992 Args:
993 objects: Iterable over (object, path) tuples, should support
994 __len__.
995 Returns: Pack object of the objects written.
996 """
997 count = len(objects)
998 record_iter = (full_unpacked_object(o) for (o, p) in objects)
999 return self.add_pack_data(count, record_iter, progress=progress)
1000
1001
1002class DiskObjectStore(PackBasedObjectStore):
1003 """Git-style object store that exists on disk."""
1004
1005 path: Union[str, os.PathLike]
1006 pack_dir: Union[str, os.PathLike]
1007
1008 def __init__(
1009 self,
1010 path: Union[str, os.PathLike],
1011 loose_compression_level=-1,
1012 pack_compression_level=-1,
1013 pack_index_version=None,
1014 pack_delta_window_size=None,
1015 pack_window_memory=None,
1016 pack_delta_cache_size=None,
1017 pack_depth=None,
1018 pack_threads=None,
1019 pack_big_file_threshold=None,
1020 ) -> None:
1021 """Open an object store.
1022
1023 Args:
1024 path: Path of the object store.
1025 loose_compression_level: zlib compression level for loose objects
1026 pack_compression_level: zlib compression level for pack objects
1027 pack_index_version: pack index version to use (1, 2, or 3)
1028 pack_delta_window_size: sliding window size for delta compression
1029 pack_window_memory: memory limit for delta window operations
1030 pack_delta_cache_size: size of cache for delta operations
1031 pack_depth: maximum delta chain depth
1032 pack_threads: number of threads for pack operations
1033 pack_big_file_threshold: threshold for treating files as big
1034 """
1035 super().__init__(
1036 pack_compression_level=pack_compression_level,
1037 pack_index_version=pack_index_version,
1038 pack_delta_window_size=pack_delta_window_size,
1039 pack_window_memory=pack_window_memory,
1040 pack_delta_cache_size=pack_delta_cache_size,
1041 pack_depth=pack_depth,
1042 pack_threads=pack_threads,
1043 pack_big_file_threshold=pack_big_file_threshold,
1044 )
1045 self.path = path
1046 self.pack_dir = os.path.join(self.path, PACKDIR)
1047 self._alternates = None
1048 self.loose_compression_level = loose_compression_level
1049 self.pack_compression_level = pack_compression_level
1050 self.pack_index_version = pack_index_version
1051
1052 # Commit graph support - lazy loaded
1053 self._commit_graph = None
1054 self._use_commit_graph = True # Default to true
1055
1056 def __repr__(self) -> str:
1057 """Return string representation of DiskObjectStore.
1058
1059 Returns:
1060 String representation including the store path
1061 """
1062 return f"<{self.__class__.__name__}({self.path!r})>"
1063
1064 @classmethod
1065 def from_config(cls, path: Union[str, os.PathLike], config):
1066 """Create a DiskObjectStore from a configuration object.
1067
1068 Args:
1069 path: Path to the object store directory
1070 config: Configuration object to read settings from
1071
1072 Returns:
1073 New DiskObjectStore instance configured according to config
1074 """
1075 try:
1076 default_compression_level = int(
1077 config.get((b"core",), b"compression").decode()
1078 )
1079 except KeyError:
1080 default_compression_level = -1
1081 try:
1082 loose_compression_level = int(
1083 config.get((b"core",), b"looseCompression").decode()
1084 )
1085 except KeyError:
1086 loose_compression_level = default_compression_level
1087 try:
1088 pack_compression_level = int(
1089 config.get((b"core",), "packCompression").decode()
1090 )
1091 except KeyError:
1092 pack_compression_level = default_compression_level
1093 try:
1094 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode())
1095 except KeyError:
1096 pack_index_version = None
1097
1098 # Read pack configuration options
1099 try:
1100 pack_delta_window_size = int(
1101 config.get((b"pack",), b"deltaWindowSize").decode()
1102 )
1103 except KeyError:
1104 pack_delta_window_size = None
1105 try:
1106 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode())
1107 except KeyError:
1108 pack_window_memory = None
1109 try:
1110 pack_delta_cache_size = int(
1111 config.get((b"pack",), b"deltaCacheSize").decode()
1112 )
1113 except KeyError:
1114 pack_delta_cache_size = None
1115 try:
1116 pack_depth = int(config.get((b"pack",), b"depth").decode())
1117 except KeyError:
1118 pack_depth = None
1119 try:
1120 pack_threads = int(config.get((b"pack",), b"threads").decode())
1121 except KeyError:
1122 pack_threads = None
1123 try:
1124 pack_big_file_threshold = int(
1125 config.get((b"pack",), b"bigFileThreshold").decode()
1126 )
1127 except KeyError:
1128 pack_big_file_threshold = None
1129
1130 # Read core.commitGraph setting
1131 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)
1132
1133 instance = cls(
1134 path,
1135 loose_compression_level,
1136 pack_compression_level,
1137 pack_index_version,
1138 pack_delta_window_size,
1139 pack_window_memory,
1140 pack_delta_cache_size,
1141 pack_depth,
1142 pack_threads,
1143 pack_big_file_threshold,
1144 )
1145 instance._use_commit_graph = use_commit_graph
1146 return instance
1147
1148 @property
1149 def alternates(self):
1150 """Get the list of alternate object stores.
1151
1152 Reads from .git/objects/info/alternates if not already cached.
1153
1154 Returns:
1155 List of DiskObjectStore instances for alternate object directories
1156 """
1157 if self._alternates is not None:
1158 return self._alternates
1159 self._alternates = []
1160 for path in self._read_alternate_paths():
1161 self._alternates.append(DiskObjectStore(path))
1162 return self._alternates
1163
1164 def _read_alternate_paths(self):
1165 try:
1166 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb")
1167 except FileNotFoundError:
1168 return
1169 with f:
1170 for line in f.readlines():
1171 line = line.rstrip(b"\n")
1172 if line.startswith(b"#"):
1173 continue
1174 if os.path.isabs(line):
1175 yield os.fsdecode(line)
1176 else:
1177 yield os.fsdecode(os.path.join(os.fsencode(self.path), line))
1178
1179 def add_alternate_path(self, path) -> None:
1180 """Add an alternate path to this object store."""
1181 try:
1182 os.mkdir(os.path.join(self.path, INFODIR))
1183 except FileExistsError:
1184 pass
1185 alternates_path = os.path.join(self.path, INFODIR, "alternates")
1186 with GitFile(alternates_path, "wb") as f:
1187 try:
1188 orig_f = open(alternates_path, "rb")
1189 except FileNotFoundError:
1190 pass
1191 else:
1192 with orig_f:
1193 f.write(orig_f.read())
1194 f.write(os.fsencode(path) + b"\n")
1195
1196 if not os.path.isabs(path):
1197 path = os.path.join(self.path, path)
1198 self.alternates.append(DiskObjectStore(path))
1199
1200 def _update_pack_cache(self):
1201 """Read and iterate over new pack files and cache them."""
1202 try:
1203 pack_dir_contents = os.listdir(self.pack_dir)
1204 except FileNotFoundError:
1205 self.close()
1206 return []
1207 pack_files = set()
1208 for name in pack_dir_contents:
1209 if name.startswith("pack-") and name.endswith(".pack"):
1210 # verify that idx exists first (otherwise the pack was not yet
1211 # fully written)
1212 idx_name = os.path.splitext(name)[0] + ".idx"
1213 if idx_name in pack_dir_contents:
1214 pack_name = name[: -len(".pack")]
1215 pack_files.add(pack_name)
1216
1217 # Open newly appeared pack files
1218 new_packs = []
1219 for f in pack_files:
1220 if f not in self._pack_cache:
1221 pack = Pack(
1222 os.path.join(self.pack_dir, f),
1223 delta_window_size=self.pack_delta_window_size,
1224 window_memory=self.pack_window_memory,
1225 delta_cache_size=self.pack_delta_cache_size,
1226 depth=self.pack_depth,
1227 threads=self.pack_threads,
1228 big_file_threshold=self.pack_big_file_threshold,
1229 )
1230 new_packs.append(pack)
1231 self._pack_cache[f] = pack
1232 # Remove disappeared pack files
1233 for f in set(self._pack_cache) - pack_files:
1234 self._pack_cache.pop(f).close()
1235 return new_packs
1236
1237 def _get_shafile_path(self, sha):
1238 # Check from object dir
1239 return hex_to_filename(self.path, sha)
1240
1241 def _iter_loose_objects(self):
1242 for base in os.listdir(self.path):
1243 if len(base) != 2:
1244 continue
1245 for rest in os.listdir(os.path.join(self.path, base)):
1246 sha = os.fsencode(base + rest)
1247 if not valid_hexsha(sha):
1248 continue
1249 yield sha
1250
1251 def count_loose_objects(self) -> int:
1252 """Count the number of loose objects in the object store.
1253
1254 Returns:
1255 Number of loose objects
1256 """
1257 count = 0
1258 if not os.path.exists(self.path):
1259 return 0
1260
1261 for i in range(256):
1262 subdir = os.path.join(self.path, f"{i:02x}")
1263 try:
1264 count += len(
1265 [
1266 name
1267 for name in os.listdir(subdir)
1268 if len(name) == 38 # 40 - 2 for the prefix
1269 ]
1270 )
1271 except FileNotFoundError:
1272 # Directory may have been removed or is inaccessible
1273 continue
1274
1275 return count
1276
1277 def _get_loose_object(self, sha):
1278 path = self._get_shafile_path(sha)
1279 try:
1280 return ShaFile.from_path(path)
1281 except FileNotFoundError:
1282 return None
1283
1284 def delete_loose_object(self, sha) -> None:
1285 """Delete a loose object from disk.
1286
1287 Args:
1288 sha: SHA1 of the object to delete
1289
1290 Raises:
1291 FileNotFoundError: If the object file doesn't exist
1292 """
1293 os.remove(self._get_shafile_path(sha))
1294
1295 def get_object_mtime(self, sha):
1296 """Get the modification time of an object.
1297
1298 Args:
1299 sha: SHA1 of the object
1300
1301 Returns:
1302 Modification time as seconds since epoch
1303
1304 Raises:
1305 KeyError: if the object is not found
1306 """
1307 # First check if it's a loose object
1308 if self.contains_loose(sha):
1309 path = self._get_shafile_path(sha)
1310 try:
1311 return os.path.getmtime(path)
1312 except FileNotFoundError:
1313 pass
1314
1315 # Check if it's in a pack file
1316 for pack in self.packs:
1317 try:
1318 if sha in pack:
1319 # Use the pack file's mtime for packed objects
1320 pack_path = pack._data_path
1321 try:
1322 return os.path.getmtime(pack_path)
1323 except (FileNotFoundError, AttributeError):
1324 pass
1325 except PackFileDisappeared:
1326 pass
1327
1328 raise KeyError(sha)
1329
1330 def _remove_pack(self, pack) -> None:
1331 try:
1332 del self._pack_cache[os.path.basename(pack._basename)]
1333 except KeyError:
1334 pass
1335 pack.close()
1336 os.remove(pack.data.path)
1337 os.remove(pack.index.path)
1338
1339 def _get_pack_basepath(self, entries):
1340 suffix = iter_sha1(entry[0] for entry in entries)
1341 # TODO: Handle self.pack_dir being bytes
1342 suffix = suffix.decode("ascii")
1343 return os.path.join(self.pack_dir, "pack-" + suffix)
1344
1345 def _complete_pack(self, f, path, num_objects, indexer, progress=None):
1346 """Move a specific file containing a pack into the pack directory.
1347
1348 Note: The file should be on the same file system as the
1349 packs directory.
1350
1351 Args:
1352 f: Open file object for the pack.
1353 path: Path to the pack file.
1354 indexer: A PackIndexer for indexing the pack.
1355 """
1356 entries = []
1357 for i, entry in enumerate(indexer):
1358 if progress is not None:
1359 progress(f"generating index: {i}/{num_objects}\r".encode("ascii"))
1360 entries.append(entry)
1361
1362 pack_sha, extra_entries = extend_pack(
1363 f,
1364 indexer.ext_refs(),
1365 get_raw=self.get_raw,
1366 compression_level=self.pack_compression_level,
1367 progress=progress,
1368 )
1369 f.flush()
1370 try:
1371 fileno = f.fileno()
1372 except AttributeError:
1373 pass
1374 else:
1375 os.fsync(fileno)
1376 f.close()
1377
1378 entries.extend(extra_entries)
1379
1380 # Move the pack in.
1381 entries.sort()
1382 pack_base_name = self._get_pack_basepath(entries)
1383
1384 for pack in self.packs:
1385 if pack._basename == pack_base_name:
1386 return pack
1387
1388 target_pack_path = pack_base_name + ".pack"
1389 target_index_path = pack_base_name + ".idx"
1390 if sys.platform == "win32":
1391 # Windows might have the target pack file lingering. Attempt
1392 # removal, silently passing if the target does not exist.
1393 with suppress(FileNotFoundError):
1394 os.remove(target_pack_path)
1395 os.rename(path, target_pack_path)
1396
1397 # Write the index.
1398 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file:
1399 write_pack_index(
1400 index_file, entries, pack_sha, version=self.pack_index_version
1401 )
1402
1403 # Add the pack to the store and return it.
1404 final_pack = Pack(
1405 pack_base_name,
1406 delta_window_size=self.pack_delta_window_size,
1407 window_memory=self.pack_window_memory,
1408 delta_cache_size=self.pack_delta_cache_size,
1409 depth=self.pack_depth,
1410 threads=self.pack_threads,
1411 big_file_threshold=self.pack_big_file_threshold,
1412 )
1413 final_pack.check_length_and_checksum()
1414 self._add_cached_pack(pack_base_name, final_pack)
1415 return final_pack
1416
1417 def add_thin_pack(self, read_all, read_some, progress=None):
1418 """Add a new thin pack to this object store.
1419
1420 Thin packs are packs that contain deltas with parents that exist
1421 outside the pack. They should never be placed in the object store
1422 directly, and always indexed and completed as they are copied.
1423
1424 Args:
1425 read_all: Read function that blocks until the number of
1426 requested bytes are read.
1427 read_some: Read function that returns at least one byte, but may
1428 not return the number of bytes requested.
1429 Returns: A Pack object pointing at the now-completed thin pack in the
1430 objects/pack directory.
1431 """
1432 import tempfile
1433
1434 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_")
1435 with os.fdopen(fd, "w+b") as f:
1436 os.chmod(path, PACK_MODE)
1437 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
1438 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer)
1439 copier.verify(progress=progress)
1440 return self._complete_pack(f, path, len(copier), indexer, progress=progress)
1441
1442 def add_pack(self):
1443 """Add a new pack to this object store.
1444
1445 Returns: Fileobject to write to, a commit function to
1446 call when the pack is finished and an abort
1447 function.
1448 """
1449 import tempfile
1450
1451 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
1452 f = os.fdopen(fd, "w+b")
1453 os.chmod(path, PACK_MODE)
1454
1455 def commit():
1456 if f.tell() > 0:
1457 f.seek(0)
1458 with PackData(path, f) as pd:
1459 indexer = PackIndexer.for_pack_data(
1460 pd, resolve_ext_ref=self.get_raw
1461 )
1462 return self._complete_pack(f, path, len(pd), indexer)
1463 else:
1464 f.close()
1465 os.remove(path)
1466 return None
1467
1468 def abort() -> None:
1469 f.close()
1470 os.remove(path)
1471
1472 return f, commit, abort
1473
1474 def add_object(self, obj) -> None:
1475 """Add a single object to this object store.
1476
1477 Args:
1478 obj: Object to add
1479 """
1480 path = self._get_shafile_path(obj.id)
1481 dir = os.path.dirname(path)
1482 try:
1483 os.mkdir(dir)
1484 except FileExistsError:
1485 pass
1486 if os.path.exists(path):
1487 return # Already there, no need to write again
1488 with GitFile(path, "wb", mask=PACK_MODE) as f:
1489 f.write(
1490 obj.as_legacy_object(compression_level=self.loose_compression_level)
1491 )
1492
1493 @classmethod
1494 def init(cls, path: Union[str, os.PathLike]):
1495 """Initialize a new disk object store.
1496
1497 Creates the necessary directory structure for a Git object store.
1498
1499 Args:
1500 path: Path where the object store should be created
1501
1502 Returns:
1503 New DiskObjectStore instance
1504 """
1505 try:
1506 os.mkdir(path)
1507 except FileExistsError:
1508 pass
1509 os.mkdir(os.path.join(path, "info"))
1510 os.mkdir(os.path.join(path, PACKDIR))
1511 return cls(path)
1512
1513 def iter_prefix(self, prefix):
1514 """Iterate over all object SHAs with the given prefix.
1515
1516 Args:
1517 prefix: Hex prefix to search for (as bytes)
1518
1519 Returns:
1520 Iterator of object SHAs (as bytes) matching the prefix
1521 """
1522 if len(prefix) < 2:
1523 yield from super().iter_prefix(prefix)
1524 return
1525 seen = set()
1526 dir = prefix[:2].decode()
1527 rest = prefix[2:].decode()
1528 try:
1529 for name in os.listdir(os.path.join(self.path, dir)):
1530 if name.startswith(rest):
1531 sha = os.fsencode(dir + name)
1532 if sha not in seen:
1533 seen.add(sha)
1534 yield sha
1535 except FileNotFoundError:
1536 pass
1537
1538 for p in self.packs:
1539 bin_prefix = (
1540 binascii.unhexlify(prefix)
1541 if len(prefix) % 2 == 0
1542 else binascii.unhexlify(prefix[:-1])
1543 )
1544 for sha in p.index.iter_prefix(bin_prefix):
1545 sha = sha_to_hex(sha)
1546 if sha.startswith(prefix) and sha not in seen:
1547 seen.add(sha)
1548 yield sha
1549 for alternate in self.alternates:
1550 for sha in alternate.iter_prefix(prefix):
1551 if sha not in seen:
1552 seen.add(sha)
1553 yield sha
1554
1555 def get_commit_graph(self):
1556 """Get the commit graph for this object store.
1557
1558 Returns:
1559 CommitGraph object if available, None otherwise
1560 """
1561 if not self._use_commit_graph:
1562 return None
1563
1564 if self._commit_graph is None:
1565 from .commit_graph import read_commit_graph
1566
1567 # Look for commit graph in our objects directory
1568 graph_file = os.path.join(self.path, "info", "commit-graph")
1569 if os.path.exists(graph_file):
1570 self._commit_graph = read_commit_graph(graph_file)
1571 return self._commit_graph
1572
1573 def write_commit_graph(self, refs=None, reachable=True) -> None:
1574 """Write a commit graph file for this object store.
1575
1576 Args:
1577 refs: List of refs to include. If None, includes all refs from object store.
1578 reachable: If True, includes all commits reachable from refs.
1579 If False, only includes the direct ref targets.
1580 """
1581 from .commit_graph import get_reachable_commits
1582
1583 if refs is None:
1584 # Get all commit objects from the object store
1585 all_refs = []
1586 # Iterate through all objects to find commits
1587 for sha in self:
1588 try:
1589 obj = self[sha]
1590 if obj.type_name == b"commit":
1591 all_refs.append(sha)
1592 except KeyError:
1593 continue
1594 else:
1595 # Use provided refs
1596 all_refs = refs
1597
1598 if not all_refs:
1599 return # No commits to include
1600
1601 if reachable:
1602 # Get all reachable commits
1603 commit_ids = get_reachable_commits(self, all_refs)
1604 else:
1605 # Just use the direct ref targets - ensure they're hex ObjectIDs
1606 commit_ids = []
1607 for ref in all_refs:
1608 if isinstance(ref, bytes) and len(ref) == 40:
1609 # Already hex ObjectID
1610 commit_ids.append(ref)
1611 elif isinstance(ref, bytes) and len(ref) == 20:
1612 # Binary SHA, convert to hex ObjectID
1613 from .objects import sha_to_hex
1614
1615 commit_ids.append(sha_to_hex(ref))
1616 else:
1617 # Assume it's already correct format
1618 commit_ids.append(ref)
1619
1620 if commit_ids:
1621 # Write commit graph directly to our object store path
1622 # Generate the commit graph
1623 from .commit_graph import generate_commit_graph
1624
1625 graph = generate_commit_graph(self, commit_ids)
1626
1627 if graph.entries:
1628 # Ensure the info directory exists
1629 info_dir = os.path.join(self.path, "info")
1630 os.makedirs(info_dir, exist_ok=True)
1631
1632 # Write using GitFile for atomic operation
1633 graph_path = os.path.join(info_dir, "commit-graph")
1634 with GitFile(graph_path, "wb") as f:
1635 assert isinstance(
1636 f, _GitFile
1637 ) # GitFile in write mode always returns _GitFile
1638 graph.write_to_file(f)
1639
1640 # Clear cached commit graph so it gets reloaded
1641 self._commit_graph = None
1642
1643 def prune(self, grace_period: Optional[int] = None) -> None:
1644 """Prune/clean up this object store.
1645
1646 This removes temporary files that were left behind by interrupted
1647 pack operations. These are files that start with ``tmp_pack_`` in the
1648 repository directory or files with .pack extension but no corresponding
1649 .idx file in the pack directory.
1650
1651 Args:
1652 grace_period: Grace period in seconds for removing temporary files.
1653 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD.
1654 """
1655 import glob
1656
1657 if grace_period is None:
1658 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD
1659
1660 # Clean up tmp_pack_* files in the repository directory
1661 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")):
1662 # Check if file is old enough (more than grace period)
1663 mtime = os.path.getmtime(tmp_file)
1664 if time.time() - mtime > grace_period:
1665 os.remove(tmp_file)
1666
1667 # Clean up orphaned .pack files without corresponding .idx files
1668 try:
1669 pack_dir_contents = os.listdir(self.pack_dir)
1670 except FileNotFoundError:
1671 return
1672
1673 pack_files = {}
1674 idx_files = set()
1675
1676 for name in pack_dir_contents:
1677 if name.endswith(".pack"):
1678 base_name = name[:-5] # Remove .pack extension
1679 pack_files[base_name] = name
1680 elif name.endswith(".idx"):
1681 base_name = name[:-4] # Remove .idx extension
1682 idx_files.add(base_name)
1683
1684 # Remove .pack files without corresponding .idx files
1685 for base_name, pack_name in pack_files.items():
1686 if base_name not in idx_files:
1687 pack_path = os.path.join(self.pack_dir, pack_name)
1688 # Check if file is old enough (more than grace period)
1689 mtime = os.path.getmtime(pack_path)
1690 if time.time() - mtime > grace_period:
1691 os.remove(pack_path)
1692
1693
1694class MemoryObjectStore(BaseObjectStore):
1695 """Object store that keeps all objects in memory."""
1696
1697 def __init__(self) -> None:
1698 """Initialize a MemoryObjectStore.
1699
1700 Creates an empty in-memory object store.
1701 """
1702 super().__init__()
1703 self._data: dict[str, ShaFile] = {}
1704 self.pack_compression_level = -1
1705
1706 def _to_hexsha(self, sha):
1707 if len(sha) == 40:
1708 return sha
1709 elif len(sha) == 20:
1710 return sha_to_hex(sha)
1711 else:
1712 raise ValueError(f"Invalid sha {sha!r}")
1713
1714 def contains_loose(self, sha):
1715 """Check if a particular object is present by SHA1 and is loose."""
1716 return self._to_hexsha(sha) in self._data
1717
1718 def contains_packed(self, sha) -> bool:
1719 """Check if a particular object is present by SHA1 and is packed."""
1720 return False
1721
1722 def __iter__(self):
1723 """Iterate over the SHAs that are present in this store."""
1724 return iter(self._data.keys())
1725
1726 @property
1727 def packs(self):
1728 """List with pack objects."""
1729 return []
1730
1731 def get_raw(self, name: ObjectID):
1732 """Obtain the raw text for an object.
1733
1734 Args:
1735 name: sha for the object.
1736 Returns: tuple with numeric type and object contents.
1737 """
1738 obj = self[self._to_hexsha(name)]
1739 return obj.type_num, obj.as_raw_string()
1740
1741 def __getitem__(self, name: ObjectID):
1742 """Retrieve an object by SHA.
1743
1744 Args:
1745 name: SHA of the object (as hex string or bytes)
1746
1747 Returns:
1748 Copy of the ShaFile object
1749
1750 Raises:
1751 KeyError: If the object is not found
1752 """
1753 return self._data[self._to_hexsha(name)].copy()
1754
1755 def __delitem__(self, name: ObjectID) -> None:
1756 """Delete an object from this store, for testing only."""
1757 del self._data[self._to_hexsha(name)]
1758
1759 def add_object(self, obj) -> None:
1760 """Add a single object to this object store."""
1761 self._data[obj.id] = obj.copy()
1762
1763 def add_objects(self, objects, progress=None) -> None:
1764 """Add a set of objects to this object store.
1765
1766 Args:
1767 objects: Iterable over a list of (object, path) tuples
1768 """
1769 for obj, path in objects:
1770 self.add_object(obj)
1771
1772 def add_pack(self):
1773 """Add a new pack to this object store.
1774
1775 Because this object store doesn't support packs, we extract and add the
1776 individual objects.
1777
1778 Returns: Fileobject to write to and a commit function to
1779 call when the pack is finished.
1780 """
1781 from tempfile import SpooledTemporaryFile
1782
1783 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-")
1784
1785 def commit() -> None:
1786 size = f.tell()
1787 if size > 0:
1788 f.seek(0)
1789 p = PackData.from_file(f, size)
1790 for obj in PackInflater.for_pack_data(p, self.get_raw):
1791 self.add_object(obj)
1792 p.close()
1793 f.close()
1794 else:
1795 f.close()
1796
1797 def abort() -> None:
1798 f.close()
1799
1800 return f, commit, abort
1801
1802 def add_pack_data(
1803 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None
1804 ) -> None:
1805 """Add pack data to this object store.
1806
1807 Args:
1808 count: Number of items to add
1809 """
1810 if count == 0:
1811 return
1812
1813 # Since MemoryObjectStore doesn't support pack files, we need to
1814 # extract individual objects. To handle deltas properly, we write
1815 # to a temporary pack and then use PackInflater to resolve them.
1816 f, commit, abort = self.add_pack()
1817 try:
1818 write_pack_data(
1819 f.write,
1820 unpacked_objects,
1821 num_records=count,
1822 progress=progress,
1823 )
1824 except BaseException:
1825 abort()
1826 raise
1827 else:
1828 commit()
1829
1830 def add_thin_pack(self, read_all, read_some, progress=None) -> None:
1831 """Add a new thin pack to this object store.
1832
1833 Thin packs are packs that contain deltas with parents that exist
1834 outside the pack. Because this object store doesn't support packs, we
1835 extract and add the individual objects.
1836
1837 Args:
1838 read_all: Read function that blocks until the number of
1839 requested bytes are read.
1840 read_some: Read function that returns at least one byte, but may
1841 not return the number of bytes requested.
1842 """
1843 f, commit, abort = self.add_pack()
1844 try:
1845 copier = PackStreamCopier(read_all, read_some, f)
1846 copier.verify()
1847 except BaseException:
1848 abort()
1849 raise
1850 else:
1851 commit()
1852
1853
1854class ObjectIterator(Protocol):
1855 """Interface for iterating over objects."""
1856
1857 def iterobjects(self) -> Iterator[ShaFile]:
1858 """Iterate over all objects.
1859
1860 Returns:
1861 Iterator of ShaFile objects
1862 """
1863 raise NotImplementedError(self.iterobjects)
1864
1865
1866def tree_lookup_path(lookup_obj, root_sha, path):
1867 """Look up an object in a Git tree.
1868
1869 Args:
1870 lookup_obj: Callback for retrieving object by SHA1
1871 root_sha: SHA1 of the root tree
1872 path: Path to lookup
1873 Returns: A tuple of (mode, SHA) of the resulting path.
1874 """
1875 tree = lookup_obj(root_sha)
1876 if not isinstance(tree, Tree):
1877 raise NotTreeError(root_sha)
1878 return tree.lookup_path(lookup_obj, path)
1879
1880
1881def _collect_filetree_revs(
1882 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID]
1883) -> None:
1884 """Collect SHA1s of files and directories for specified tree.
1885
1886 Args:
1887 obj_store: Object store to get objects by SHA from
1888 tree_sha: tree reference to walk
1889 kset: set to fill with references to files and directories
1890 """
1891 filetree = obj_store[tree_sha]
1892 assert isinstance(filetree, Tree)
1893 for name, mode, sha in filetree.iteritems():
1894 if not S_ISGITLINK(mode) and sha not in kset:
1895 kset.add(sha)
1896 if stat.S_ISDIR(mode):
1897 _collect_filetree_revs(obj_store, sha, kset)
1898
1899
1900def _split_commits_and_tags(
1901 obj_store: ObjectContainer, lst, *, ignore_unknown=False
1902) -> tuple[set[bytes], set[bytes], set[bytes]]:
1903 """Split object id list into three lists with commit, tag, and other SHAs.
1904
1905 Commits referenced by tags are included into commits
1906 list as well. Only SHA1s known in this repository will get
1907 through, and unless ignore_unknown argument is True, KeyError
1908 is thrown for SHA1 missing in the repository
1909
1910 Args:
1911 obj_store: Object store to get objects by SHA1 from
1912 lst: Collection of commit and tag SHAs
1913 ignore_unknown: True to skip SHA1 missing in the repository
1914 silently.
1915 Returns: A tuple of (commits, tags, others) SHA1s
1916 """
1917 commits: set[bytes] = set()
1918 tags: set[bytes] = set()
1919 others: set[bytes] = set()
1920 for e in lst:
1921 try:
1922 o = obj_store[e]
1923 except KeyError:
1924 if not ignore_unknown:
1925 raise
1926 else:
1927 if isinstance(o, Commit):
1928 commits.add(e)
1929 elif isinstance(o, Tag):
1930 tags.add(e)
1931 tagged = o.object[1]
1932 c, t, os = _split_commits_and_tags(
1933 obj_store, [tagged], ignore_unknown=ignore_unknown
1934 )
1935 commits |= c
1936 tags |= t
1937 others |= os
1938 else:
1939 others.add(e)
1940 return (commits, tags, others)
1941
1942
1943class MissingObjectFinder:
1944 """Find the objects missing from another object store.
1945
1946 Args:
1947 object_store: Object store containing at least all objects to be
1948 sent
1949 haves: SHA1s of commits not to send (already present in target)
1950 wants: SHA1s of commits to send
1951 progress: Optional function to report progress to.
1952 get_tagged: Function that returns a dict of pointed-to sha -> tag
1953 sha for including tags.
1954 get_parents: Optional function for getting the parents of a commit.
1955 """
1956
1957 def __init__(
1958 self,
1959 object_store,
1960 haves,
1961 wants,
1962 *,
1963 shallow=None,
1964 progress=None,
1965 get_tagged=None,
1966 get_parents=lambda commit: commit.parents,
1967 ) -> None:
1968 """Initialize a MissingObjectFinder.
1969
1970 Args:
1971 object_store: Object store containing objects
1972 haves: SHA1s of objects already present in target
1973 wants: SHA1s of objects to send
1974 shallow: Set of shallow commit SHA1s
1975 progress: Optional progress reporting callback
1976 get_tagged: Function returning dict of pointed-to sha -> tag sha
1977 get_parents: Function for getting commit parents
1978 """
1979 self.object_store = object_store
1980 if shallow is None:
1981 shallow = set()
1982 self._get_parents = get_parents
1983 # process Commits and Tags differently
1984 # Note, while haves may list commits/tags not available locally,
1985 # and such SHAs would get filtered out by _split_commits_and_tags,
1986 # wants shall list only known SHAs, and otherwise
1987 # _split_commits_and_tags fails with KeyError
1988 have_commits, have_tags, have_others = _split_commits_and_tags(
1989 object_store, haves, ignore_unknown=True
1990 )
1991 want_commits, want_tags, want_others = _split_commits_and_tags(
1992 object_store, wants, ignore_unknown=False
1993 )
1994 # all_ancestors is a set of commits that shall not be sent
1995 # (complete repository up to 'haves')
1996 all_ancestors = _collect_ancestors(
1997 object_store, have_commits, shallow=shallow, get_parents=self._get_parents
1998 )[0]
1999 # all_missing - complete set of commits between haves and wants
2000 # common - commits from all_ancestors we hit into while
2001 # traversing parent hierarchy of wants
2002 missing_commits, common_commits = _collect_ancestors(
2003 object_store,
2004 want_commits,
2005 all_ancestors,
2006 shallow=shallow,
2007 get_parents=self._get_parents,
2008 )
2009 self.remote_has: set[bytes] = set()
2010 # Now, fill sha_done with commits and revisions of
2011 # files and directories known to be both locally
2012 # and on target. Thus these commits and files
2013 # won't get selected for fetch
2014 for h in common_commits:
2015 self.remote_has.add(h)
2016 cmt = object_store[h]
2017 _collect_filetree_revs(object_store, cmt.tree, self.remote_has)
2018 # record tags we have as visited, too
2019 for t in have_tags:
2020 self.remote_has.add(t)
2021 self.sha_done = set(self.remote_has)
2022
2023 # in fact, what we 'want' is commits, tags, and others
2024 # we've found missing
2025 self.objects_to_send: set[
2026 tuple[ObjectID, Optional[bytes], Optional[int], bool]
2027 ] = {(w, None, Commit.type_num, False) for w in missing_commits}
2028 missing_tags = want_tags.difference(have_tags)
2029 self.objects_to_send.update(
2030 {(w, None, Tag.type_num, False) for w in missing_tags}
2031 )
2032 missing_others = want_others.difference(have_others)
2033 self.objects_to_send.update({(w, None, None, False) for w in missing_others})
2034
2035 if progress is None:
2036 self.progress = lambda x: None
2037 else:
2038 self.progress = progress
2039 self._tagged = (get_tagged and get_tagged()) or {}
2040
2041 def get_remote_has(self):
2042 """Get the set of SHAs the remote has.
2043
2044 Returns:
2045 Set of SHA1s that the remote side already has
2046 """
2047 return self.remote_has
2048
2049 def add_todo(
2050 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]]
2051 ) -> None:
2052 """Add objects to the todo list.
2053
2054 Args:
2055 entries: Iterable of tuples (sha, name, type_num, is_leaf)
2056 """
2057 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done])
2058
2059 def __next__(self) -> tuple[bytes, Optional[PackHint]]:
2060 """Get the next object to send.
2061
2062 Returns:
2063 Tuple of (sha, pack_hint)
2064
2065 Raises:
2066 StopIteration: When no more objects to send
2067 """
2068 while True:
2069 if not self.objects_to_send:
2070 self.progress(
2071 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii")
2072 )
2073 raise StopIteration
2074 (sha, name, type_num, leaf) = self.objects_to_send.pop()
2075 if sha not in self.sha_done:
2076 break
2077 if not leaf:
2078 o = self.object_store[sha]
2079 if isinstance(o, Commit):
2080 self.add_todo([(o.tree, b"", Tree.type_num, False)])
2081 elif isinstance(o, Tree):
2082 self.add_todo(
2083 [
2084 (
2085 s,
2086 n,
2087 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num),
2088 not stat.S_ISDIR(m),
2089 )
2090 for n, m, s in o.iteritems()
2091 if not S_ISGITLINK(m)
2092 ]
2093 )
2094 elif isinstance(o, Tag):
2095 self.add_todo([(o.object[1], None, o.object[0].type_num, False)])
2096 if sha in self._tagged:
2097 self.add_todo([(self._tagged[sha], None, None, True)])
2098 self.sha_done.add(sha)
2099 if len(self.sha_done) % 1000 == 0:
2100 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii"))
2101 if type_num is None:
2102 pack_hint = None
2103 else:
2104 pack_hint = (type_num, name)
2105 return (sha, pack_hint)
2106
2107 def __iter__(self):
2108 """Return iterator over objects to send.
2109
2110 Returns:
2111 Self (this class implements the iterator protocol)
2112 """
2113 return self
2114
2115
2116class ObjectStoreGraphWalker:
2117 """Graph walker that finds what commits are missing from an object store."""
2118
2119 heads: set[ObjectID]
2120 """Revisions without descendants in the local repo."""
2121
2122 get_parents: Callable[[ObjectID], ObjectID]
2123 """Function to retrieve parents in the local repo."""
2124
2125 shallow: set[ObjectID]
2126
2127 def __init__(
2128 self,
2129 local_heads: Iterable[ObjectID],
2130 get_parents,
2131 shallow: Optional[set[ObjectID]] = None,
2132 update_shallow=None,
2133 ) -> None:
2134 """Create a new instance.
2135
2136 Args:
2137 local_heads: Heads to start search with
2138 get_parents: Function for finding the parents of a SHA1.
2139 """
2140 self.heads = set(local_heads)
2141 self.get_parents = get_parents
2142 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {}
2143 if shallow is None:
2144 shallow = set()
2145 self.shallow = shallow
2146 self.update_shallow = update_shallow
2147
2148 def nak(self) -> None:
2149 """Nothing in common was found."""
2150
2151 def ack(self, sha: ObjectID) -> None:
2152 """Ack that a revision and its ancestors are present in the source."""
2153 if len(sha) != 40:
2154 raise ValueError(f"unexpected sha {sha!r} received")
2155 ancestors = {sha}
2156
2157 # stop if we run out of heads to remove
2158 while self.heads:
2159 for a in ancestors:
2160 if a in self.heads:
2161 self.heads.remove(a)
2162
2163 # collect all ancestors
2164 new_ancestors = set()
2165 for a in ancestors:
2166 ps = self.parents.get(a)
2167 if ps is not None:
2168 new_ancestors.update(ps)
2169 self.parents[a] = None
2170
2171 # no more ancestors; stop
2172 if not new_ancestors:
2173 break
2174
2175 ancestors = new_ancestors
2176
2177 def next(self):
2178 """Iterate over ancestors of heads in the target."""
2179 if self.heads:
2180 ret = self.heads.pop()
2181 try:
2182 ps = self.get_parents(ret)
2183 except KeyError:
2184 return None
2185 self.parents[ret] = ps
2186 self.heads.update([p for p in ps if p not in self.parents])
2187 return ret
2188 return None
2189
2190 __next__ = next
2191
2192
2193def commit_tree_changes(object_store, tree, changes):
2194 """Commit a specified set of changes to a tree structure.
2195
2196 This will apply a set of changes on top of an existing tree, storing new
2197 objects in object_store.
2198
2199 changes are a list of tuples with (path, mode, object_sha).
2200 Paths can be both blobs and trees. See the mode and
2201 object sha to None deletes the path.
2202
2203 This method works especially well if there are only a small
2204 number of changes to a big tree. For a large number of changes
2205 to a large tree, use e.g. commit_tree.
2206
2207 Args:
2208 object_store: Object store to store new objects in
2209 and retrieve old ones from.
2210 tree: Original tree root
2211 changes: changes to apply
2212 Returns: New tree root object
2213 """
2214 # TODO(jelmer): Save up the objects and add them using .add_objects
2215 # rather than with individual calls to .add_object.
2216 nested_changes = {}
2217 for path, new_mode, new_sha in changes:
2218 try:
2219 (dirname, subpath) = path.split(b"/", 1)
2220 except ValueError:
2221 if new_sha is None:
2222 del tree[path]
2223 else:
2224 tree[path] = (new_mode, new_sha)
2225 else:
2226 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha))
2227 for name, subchanges in nested_changes.items():
2228 try:
2229 orig_subtree = object_store[tree[name][1]]
2230 except KeyError:
2231 orig_subtree = Tree()
2232 subtree = commit_tree_changes(object_store, orig_subtree, subchanges)
2233 if len(subtree) == 0:
2234 del tree[name]
2235 else:
2236 tree[name] = (stat.S_IFDIR, subtree.id)
2237 object_store.add_object(tree)
2238 return tree
2239
2240
2241class OverlayObjectStore(BaseObjectStore):
2242 """Object store that can overlay multiple object stores."""
2243
2244 def __init__(self, bases, add_store=None) -> None:
2245 """Initialize an OverlayObjectStore.
2246
2247 Args:
2248 bases: List of base object stores to overlay
2249 add_store: Optional store to write new objects to
2250 """
2251 self.bases = bases
2252 self.add_store = add_store
2253
2254 def add_object(self, object):
2255 """Add a single object to the store.
2256
2257 Args:
2258 object: Object to add
2259
2260 Raises:
2261 NotImplementedError: If no add_store was provided
2262 """
2263 if self.add_store is None:
2264 raise NotImplementedError(self.add_object)
2265 return self.add_store.add_object(object)
2266
2267 def add_objects(self, objects, progress=None):
2268 """Add multiple objects to the store.
2269
2270 Args:
2271 objects: Iterator of objects to add
2272 progress: Optional progress reporting callback
2273
2274 Raises:
2275 NotImplementedError: If no add_store was provided
2276 """
2277 if self.add_store is None:
2278 raise NotImplementedError(self.add_object)
2279 return self.add_store.add_objects(objects, progress)
2280
2281 @property
2282 def packs(self):
2283 """Get the list of packs from all overlaid stores.
2284
2285 Returns:
2286 Combined list of packs from all base stores
2287 """
2288 ret = []
2289 for b in self.bases:
2290 ret.extend(b.packs)
2291 return ret
2292
2293 def __iter__(self):
2294 """Iterate over all object SHAs in the overlaid stores.
2295
2296 Returns:
2297 Iterator of object SHAs (deduped across stores)
2298 """
2299 done = set()
2300 for b in self.bases:
2301 for o_id in b:
2302 if o_id not in done:
2303 yield o_id
2304 done.add(o_id)
2305
2306 def iterobjects_subset(
2307 self, shas: Iterable[bytes], *, allow_missing: bool = False
2308 ) -> Iterator[ShaFile]:
2309 """Iterate over a subset of objects from the overlaid stores.
2310
2311 Args:
2312 shas: Iterable of object SHAs to retrieve
2313 allow_missing: If True, skip missing objects; if False, raise KeyError
2314
2315 Returns:
2316 Iterator of ShaFile objects
2317
2318 Raises:
2319 KeyError: If an object is missing and allow_missing is False
2320 """
2321 todo = set(shas)
2322 found: set[bytes] = set()
2323
2324 for b in self.bases:
2325 # Create a copy of todo for each base to avoid modifying
2326 # the set while iterating through it
2327 current_todo = todo - found
2328 for o in b.iterobjects_subset(current_todo, allow_missing=True):
2329 yield o
2330 found.add(o.id)
2331
2332 # Check for any remaining objects not found
2333 missing = todo - found
2334 if missing and not allow_missing:
2335 raise KeyError(next(iter(missing)))
2336
2337 def iter_unpacked_subset(
2338 self,
2339 shas: Iterable[bytes],
2340 *,
2341 include_comp=False,
2342 allow_missing: bool = False,
2343 convert_ofs_delta=True,
2344 ) -> Iterator[ShaFile]:
2345 """Iterate over unpacked objects from the overlaid stores.
2346
2347 Args:
2348 shas: Iterable of object SHAs to retrieve
2349 include_comp: Whether to include compressed data
2350 allow_missing: If True, skip missing objects; if False, raise KeyError
2351 convert_ofs_delta: Whether to convert OFS_DELTA objects
2352
2353 Returns:
2354 Iterator of unpacked objects
2355
2356 Raises:
2357 KeyError: If an object is missing and allow_missing is False
2358 """
2359 todo = set(shas)
2360 for b in self.bases:
2361 for o in b.iter_unpacked_subset(
2362 todo,
2363 include_comp=include_comp,
2364 allow_missing=True,
2365 convert_ofs_delta=convert_ofs_delta,
2366 ):
2367 yield o
2368 todo.remove(o.id)
2369 if todo and not allow_missing:
2370 raise KeyError(o.id)
2371
2372 def get_raw(self, sha_id):
2373 """Get the raw object data from the overlaid stores.
2374
2375 Args:
2376 sha_id: SHA of the object
2377
2378 Returns:
2379 Tuple of (type_num, raw_data)
2380
2381 Raises:
2382 KeyError: If object not found in any base store
2383 """
2384 for b in self.bases:
2385 try:
2386 return b.get_raw(sha_id)
2387 except KeyError:
2388 pass
2389 raise KeyError(sha_id)
2390
2391 def contains_packed(self, sha) -> bool:
2392 """Check if an object is packed in any base store.
2393
2394 Args:
2395 sha: SHA of the object
2396
2397 Returns:
2398 True if object is packed in any base store
2399 """
2400 for b in self.bases:
2401 if b.contains_packed(sha):
2402 return True
2403 return False
2404
2405 def contains_loose(self, sha) -> bool:
2406 """Check if an object is loose in any base store.
2407
2408 Args:
2409 sha: SHA of the object
2410
2411 Returns:
2412 True if object is loose in any base store
2413 """
2414 for b in self.bases:
2415 if b.contains_loose(sha):
2416 return True
2417 return False
2418
2419
2420def read_packs_file(f):
2421 """Yield the packs listed in a packs file."""
2422 for line in f.read().splitlines():
2423 if not line:
2424 continue
2425 (kind, name) = line.split(b" ", 1)
2426 if kind != b"P":
2427 continue
2428 yield os.fsdecode(name)
2429
2430
2431class BucketBasedObjectStore(PackBasedObjectStore):
2432 """Object store implementation that uses a bucket store like S3 as backend."""
2433
2434 def _iter_loose_objects(self):
2435 """Iterate over the SHAs of all loose objects."""
2436 return iter([])
2437
2438 def _get_loose_object(self, sha) -> None:
2439 return None
2440
2441 def delete_loose_object(self, sha) -> None:
2442 """Delete a loose object (no-op for bucket stores).
2443
2444 Bucket-based stores don't have loose objects, so this is a no-op.
2445
2446 Args:
2447 sha: SHA of the object to delete
2448 """
2449 # Doesn't exist..
2450
2451 def _remove_pack(self, name) -> None:
2452 raise NotImplementedError(self._remove_pack)
2453
2454 def _iter_pack_names(self) -> Iterator[str]:
2455 raise NotImplementedError(self._iter_pack_names)
2456
2457 def _get_pack(self, name) -> Pack:
2458 raise NotImplementedError(self._get_pack)
2459
2460 def _update_pack_cache(self):
2461 pack_files = set(self._iter_pack_names())
2462
2463 # Open newly appeared pack files
2464 new_packs = []
2465 for f in pack_files:
2466 if f not in self._pack_cache:
2467 pack = self._get_pack(f)
2468 new_packs.append(pack)
2469 self._pack_cache[f] = pack
2470 # Remove disappeared pack files
2471 for f in set(self._pack_cache) - pack_files:
2472 self._pack_cache.pop(f).close()
2473 return new_packs
2474
2475 def _upload_pack(self, basename, pack_file, index_file) -> None:
2476 raise NotImplementedError
2477
2478 def add_pack(self):
2479 """Add a new pack to this object store.
2480
2481 Returns: Fileobject to write to, a commit function to
2482 call when the pack is finished and an abort
2483 function.
2484 """
2485 import tempfile
2486
2487 pf = tempfile.SpooledTemporaryFile(
2488 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
2489 )
2490
2491 def commit():
2492 if pf.tell() == 0:
2493 pf.close()
2494 return None
2495
2496 pf.seek(0)
2497 p = PackData(pf.name, pf)
2498 entries = p.sorted_entries()
2499 basename = iter_sha1(entry[0] for entry in entries).decode("ascii")
2500 idxf = tempfile.SpooledTemporaryFile(
2501 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
2502 )
2503 checksum = p.get_stored_checksum()
2504 write_pack_index(idxf, entries, checksum, version=self.pack_index_version)
2505 idxf.seek(0)
2506 idx = load_pack_index_file(basename + ".idx", idxf)
2507 for pack in self.packs:
2508 if pack.get_stored_checksum() == p.get_stored_checksum():
2509 p.close()
2510 idx.close()
2511 pf.close()
2512 idxf.close()
2513 return pack
2514 pf.seek(0)
2515 idxf.seek(0)
2516 self._upload_pack(basename, pf, idxf)
2517 final_pack = Pack.from_objects(p, idx)
2518 self._add_cached_pack(basename, final_pack)
2519 pf.close()
2520 idxf.close()
2521 return final_pack
2522
2523 return pf, commit, pf.close
2524
2525
2526def _collect_ancestors(
2527 store: ObjectContainer,
2528 heads,
2529 common: frozenset[ObjectID] = frozenset(),
2530 shallow: frozenset[ObjectID] = frozenset(),
2531 get_parents=lambda commit: commit.parents,
2532):
2533 """Collect all ancestors of heads up to (excluding) those in common.
2534
2535 Args:
2536 heads: commits to start from
2537 common: commits to end at, or empty set to walk repository
2538 completely
2539 get_parents: Optional function for getting the parents of a
2540 commit.
2541 Returns: a tuple (A, B) where A - all commits reachable
2542 from heads but not present in common, B - common (shared) elements
2543 that are directly reachable from heads
2544 """
2545 bases = set()
2546 commits = set()
2547 queue = []
2548 queue.extend(heads)
2549
2550 # Try to use commit graph if available
2551 commit_graph = store.get_commit_graph()
2552
2553 while queue:
2554 e = queue.pop(0)
2555 if e in common:
2556 bases.add(e)
2557 elif e not in commits:
2558 commits.add(e)
2559 if e in shallow:
2560 continue
2561
2562 # Try to use commit graph for parent lookup
2563 parents = None
2564 if commit_graph:
2565 parents = commit_graph.get_parents(e)
2566
2567 if parents is None:
2568 # Fall back to loading the object
2569 cmt = store[e]
2570 parents = get_parents(cmt)
2571
2572 queue.extend(parents)
2573 return (commits, bases)
2574
2575
2576def iter_tree_contents(
2577 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False
2578):
2579 """Iterate the contents of a tree and all subtrees.
2580
2581 Iteration is depth-first pre-order, as in e.g. os.walk.
2582
2583 Args:
2584 tree_id: SHA1 of the tree.
2585 include_trees: If True, include tree objects in the iteration.
2586 Returns: Iterator over TreeEntry namedtuples for all the objects in a
2587 tree.
2588 """
2589 if tree_id is None:
2590 return
2591 # This could be fairly easily generalized to >2 trees if we find a use
2592 # case.
2593 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]
2594 while todo:
2595 entry = todo.pop()
2596 if stat.S_ISDIR(entry.mode):
2597 extra = []
2598 tree = store[entry.sha]
2599 assert isinstance(tree, Tree)
2600 for subentry in tree.iteritems(name_order=True):
2601 extra.append(subentry.in_path(entry.path))
2602 todo.extend(reversed(extra))
2603 if not stat.S_ISDIR(entry.mode) or include_trees:
2604 yield entry
2605
2606
2607def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]:
2608 """Peel all tags from a SHA.
2609
2610 Args:
2611 sha: The object SHA to peel.
2612 Returns: The fully-peeled SHA1 of a tag object, after peeling all
2613 intermediate tags; if the original ref does not point to a tag,
2614 this will equal the original SHA1.
2615 """
2616 unpeeled = obj = store[sha]
2617 obj_class = object_class(obj.type_name)
2618 while obj_class is Tag:
2619 assert isinstance(obj, Tag)
2620 obj_class, sha = obj.object
2621 obj = store[sha]
2622 return unpeeled, obj