1# object_store.py -- Object store for git objects
2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk>
3# and others
4#
5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6# General Public License as public by the Free Software Foundation; version 2.0
7# or (at your option) any later version. You can redistribute it and/or
8# modify it under the terms of either of these two licenses.
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16# You should have received a copy of the licenses; if not, see
17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19# License, Version 2.0.
20#
21
22
23"""Git object store interfaces and implementation."""
24
25import os
26import stat
27import sys
28import warnings
29from contextlib import suppress
30from io import BytesIO
31from typing import (
32 Callable,
33 Dict,
34 FrozenSet,
35 Iterable,
36 Iterator,
37 List,
38 Optional,
39 Protocol,
40 Sequence,
41 Set,
42 Tuple,
43 cast,
44)
45
46from .errors import NotTreeError
47from .file import GitFile
48from .objects import (
49 S_ISGITLINK,
50 ZERO_SHA,
51 Blob,
52 Commit,
53 ObjectID,
54 ShaFile,
55 Tag,
56 Tree,
57 TreeEntry,
58 hex_to_filename,
59 hex_to_sha,
60 object_class,
61 sha_to_hex,
62 valid_hexsha,
63)
64from .pack import (
65 PACK_SPOOL_FILE_MAX_SIZE,
66 ObjectContainer,
67 Pack,
68 PackData,
69 PackedObjectContainer,
70 PackFileDisappeared,
71 PackHint,
72 PackIndexer,
73 PackInflater,
74 PackStreamCopier,
75 UnpackedObject,
76 extend_pack,
77 full_unpacked_object,
78 generate_unpacked_objects,
79 iter_sha1,
80 load_pack_index_file,
81 pack_objects_to_data,
82 write_pack_data,
83 write_pack_index,
84)
85from .protocol import DEPTH_INFINITE
86from .refs import PEELED_TAG_SUFFIX, Ref
87
88INFODIR = "info"
89PACKDIR = "pack"
90
91# use permissions consistent with Git; just readable by everyone
92# TODO: should packs also be non-writable on Windows? if so, that
93# would requite some rather significant adjustments to the test suite
94PACK_MODE = 0o444 if sys.platform != "win32" else 0o644
95
96
97class PackContainer(Protocol):
98 def add_pack(self) -> Tuple[BytesIO, Callable[[], None], Callable[[], None]]:
99 """Add a new pack."""
100
101
102class BaseObjectStore:
103 """Object store interface."""
104
105 def determine_wants_all(
106 self, refs: Dict[Ref, ObjectID], depth: Optional[int] = None
107 ) -> List[ObjectID]:
108 def _want_deepen(sha):
109 if not depth:
110 return False
111 if depth == DEPTH_INFINITE:
112 return True
113 return depth > self._get_depth(sha)
114
115 return [
116 sha
117 for (ref, sha) in refs.items()
118 if (sha not in self or _want_deepen(sha))
119 and not ref.endswith(PEELED_TAG_SUFFIX)
120 and not sha == ZERO_SHA
121 ]
122
123 def contains_loose(self, sha):
124 """Check if a particular object is present by SHA1 and is loose."""
125 raise NotImplementedError(self.contains_loose)
126
127 def __contains__(self, sha1: bytes) -> bool:
128 """Check if a particular object is present by SHA1.
129
130 This method makes no distinction between loose and packed objects.
131 """
132 return self.contains_loose(sha1)
133
134 @property
135 def packs(self):
136 """Iterable of pack objects."""
137 raise NotImplementedError
138
139 def get_raw(self, name):
140 """Obtain the raw text for an object.
141
142 Args:
143 name: sha for the object.
144 Returns: tuple with numeric type and object contents.
145 """
146 raise NotImplementedError(self.get_raw)
147
148 def __getitem__(self, sha1: ObjectID) -> ShaFile:
149 """Obtain an object by SHA1."""
150 type_num, uncomp = self.get_raw(sha1)
151 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1)
152
153 def __iter__(self):
154 """Iterate over the SHAs that are present in this store."""
155 raise NotImplementedError(self.__iter__)
156
157 def add_object(self, obj):
158 """Add a single object to this object store."""
159 raise NotImplementedError(self.add_object)
160
161 def add_objects(self, objects, progress=None):
162 """Add a set of objects to this object store.
163
164 Args:
165 objects: Iterable over a list of (object, path) tuples
166 """
167 raise NotImplementedError(self.add_objects)
168
169 def tree_changes(
170 self,
171 source,
172 target,
173 want_unchanged=False,
174 include_trees=False,
175 change_type_same=False,
176 rename_detector=None,
177 ):
178 """Find the differences between the contents of two trees.
179
180 Args:
181 source: SHA1 of the source tree
182 target: SHA1 of the target tree
183 want_unchanged: Whether unchanged files should be reported
184 include_trees: Whether to include trees
185 change_type_same: Whether to report files changing
186 type in the same entry.
187 Returns: Iterator over tuples with
188 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
189 """
190 from .diff_tree import tree_changes
191
192 for change in tree_changes(
193 self,
194 source,
195 target,
196 want_unchanged=want_unchanged,
197 include_trees=include_trees,
198 change_type_same=change_type_same,
199 rename_detector=rename_detector,
200 ):
201 yield (
202 (change.old.path, change.new.path),
203 (change.old.mode, change.new.mode),
204 (change.old.sha, change.new.sha),
205 )
206
207 def iter_tree_contents(self, tree_id, include_trees=False):
208 """Iterate the contents of a tree and all subtrees.
209
210 Iteration is depth-first pre-order, as in e.g. os.walk.
211
212 Args:
213 tree_id: SHA1 of the tree.
214 include_trees: If True, include tree objects in the iteration.
215 Returns: Iterator over TreeEntry namedtuples for all the objects in a
216 tree.
217 """
218 warnings.warn(
219 "Please use dulwich.object_store.iter_tree_contents",
220 DeprecationWarning,
221 stacklevel=2,
222 )
223 return iter_tree_contents(self, tree_id, include_trees=include_trees)
224
225 def iterobjects_subset(
226 self, shas: Iterable[bytes], *, allow_missing: bool = False
227 ) -> Iterator[ShaFile]:
228 for sha in shas:
229 try:
230 yield self[sha]
231 except KeyError:
232 if not allow_missing:
233 raise
234
235 def find_missing_objects(
236 self,
237 haves,
238 wants,
239 shallow=None,
240 progress=None,
241 get_tagged=None,
242 get_parents=lambda commit: commit.parents,
243 ):
244 """Find the missing objects required for a set of revisions.
245
246 Args:
247 haves: Iterable over SHAs already in common.
248 wants: Iterable over SHAs of objects to fetch.
249 shallow: Set of shallow commit SHA1s to skip
250 progress: Simple progress function that will be called with
251 updated progress strings.
252 get_tagged: Function that returns a dict of pointed-to sha ->
253 tag sha for including tags.
254 get_parents: Optional function for getting the parents of a
255 commit.
256 Returns: Iterator over (sha, path) pairs.
257 """
258 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning)
259 finder = MissingObjectFinder(
260 self,
261 haves=haves,
262 wants=wants,
263 shallow=shallow,
264 progress=progress,
265 get_tagged=get_tagged,
266 get_parents=get_parents,
267 )
268 return iter(finder)
269
270 def find_common_revisions(self, graphwalker):
271 """Find which revisions this store has in common using graphwalker.
272
273 Args:
274 graphwalker: A graphwalker object.
275 Returns: List of SHAs that are in common
276 """
277 haves = []
278 sha = next(graphwalker)
279 while sha:
280 if sha in self:
281 haves.append(sha)
282 graphwalker.ack(sha)
283 sha = next(graphwalker)
284 return haves
285
286 def generate_pack_data(
287 self, have, want, shallow=None, progress=None, ofs_delta=True
288 ) -> Tuple[int, Iterator[UnpackedObject]]:
289 """Generate pack data objects for a set of wants/haves.
290
291 Args:
292 have: List of SHA1s of objects that should not be sent
293 want: List of SHA1s of objects that should be sent
294 shallow: Set of shallow commit SHA1s to skip
295 ofs_delta: Whether OFS deltas can be included
296 progress: Optional progress reporting method
297 """
298 # Note that the pack-specific implementation below is more efficient,
299 # as it reuses deltas
300 missing_objects = MissingObjectFinder(
301 self, haves=have, wants=want, shallow=shallow, progress=progress
302 )
303 object_ids = list(missing_objects)
304 return pack_objects_to_data(
305 [(self[oid], path) for oid, path in object_ids],
306 ofs_delta=ofs_delta,
307 progress=progress,
308 )
309
310 def peel_sha(self, sha):
311 """Peel all tags from a SHA.
312
313 Args:
314 sha: The object SHA to peel.
315 Returns: The fully-peeled SHA1 of a tag object, after peeling all
316 intermediate tags; if the original ref does not point to a tag,
317 this will equal the original SHA1.
318 """
319 warnings.warn(
320 "Please use dulwich.object_store.peel_sha()",
321 DeprecationWarning,
322 stacklevel=2,
323 )
324 return peel_sha(self, sha)[1]
325
326 def _get_depth(
327 self,
328 head,
329 get_parents=lambda commit: commit.parents,
330 max_depth=None,
331 ):
332 """Return the current available depth for the given head.
333 For commits with multiple parents, the largest possible depth will be
334 returned.
335
336 Args:
337 head: commit to start from
338 get_parents: optional function for getting the parents of a commit
339 max_depth: maximum depth to search
340 """
341 if head not in self:
342 return 0
343 current_depth = 1
344 queue = [(head, current_depth)]
345 while queue and (max_depth is None or current_depth < max_depth):
346 e, depth = queue.pop(0)
347 current_depth = max(current_depth, depth)
348 cmt = self[e]
349 if isinstance(cmt, Tag):
350 _cls, sha = cmt.object
351 cmt = self[sha]
352 queue.extend(
353 (parent, depth + 1) for parent in get_parents(cmt) if parent in self
354 )
355 return current_depth
356
357 def close(self):
358 """Close any files opened by this object store."""
359 # Default implementation is a NO-OP
360
361
362class PackBasedObjectStore(BaseObjectStore):
363 def __init__(self, pack_compression_level=-1) -> None:
364 self._pack_cache: Dict[str, Pack] = {}
365 self.pack_compression_level = pack_compression_level
366
367 def add_pack(self) -> Tuple[BytesIO, Callable[[], None], Callable[[], None]]:
368 """Add a new pack to this object store."""
369 raise NotImplementedError(self.add_pack)
370
371 def add_pack_data(
372 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None
373 ) -> None:
374 """Add pack data to this object store.
375
376 Args:
377 count: Number of items to add
378 pack_data: Iterator over pack data tuples
379 """
380 if count == 0:
381 # Don't bother writing an empty pack file
382 return
383 f, commit, abort = self.add_pack()
384 try:
385 write_pack_data(
386 f.write,
387 unpacked_objects,
388 num_records=count,
389 progress=progress,
390 compression_level=self.pack_compression_level,
391 )
392 except BaseException:
393 abort()
394 raise
395 else:
396 return commit()
397
398 @property
399 def alternates(self):
400 return []
401
402 def contains_packed(self, sha):
403 """Check if a particular object is present by SHA1 and is packed.
404
405 This does not check alternates.
406 """
407 for pack in self.packs:
408 try:
409 if sha in pack:
410 return True
411 except PackFileDisappeared:
412 pass
413 return False
414
415 def __contains__(self, sha) -> bool:
416 """Check if a particular object is present by SHA1.
417
418 This method makes no distinction between loose and packed objects.
419 """
420 if self.contains_packed(sha) or self.contains_loose(sha):
421 return True
422 for alternate in self.alternates:
423 if sha in alternate:
424 return True
425 return False
426
427 def _add_cached_pack(self, base_name, pack):
428 """Add a newly appeared pack to the cache by path."""
429 prev_pack = self._pack_cache.get(base_name)
430 if prev_pack is not pack:
431 self._pack_cache[base_name] = pack
432 if prev_pack:
433 prev_pack.close()
434
435 def generate_pack_data(
436 self, have, want, shallow=None, progress=None, ofs_delta=True
437 ) -> Tuple[int, Iterator[UnpackedObject]]:
438 """Generate pack data objects for a set of wants/haves.
439
440 Args:
441 have: List of SHA1s of objects that should not be sent
442 want: List of SHA1s of objects that should be sent
443 shallow: Set of shallow commit SHA1s to skip
444 ofs_delta: Whether OFS deltas can be included
445 progress: Optional progress reporting method
446 """
447 missing_objects = MissingObjectFinder(
448 self, haves=have, wants=want, shallow=shallow, progress=progress
449 )
450 remote_has = missing_objects.get_remote_has()
451 object_ids = list(missing_objects)
452 return len(object_ids), generate_unpacked_objects(
453 cast(PackedObjectContainer, self),
454 object_ids,
455 progress=progress,
456 ofs_delta=ofs_delta,
457 other_haves=remote_has,
458 )
459
460 def _clear_cached_packs(self):
461 pack_cache = self._pack_cache
462 self._pack_cache = {}
463 while pack_cache:
464 (name, pack) = pack_cache.popitem()
465 pack.close()
466
467 def _iter_cached_packs(self):
468 return self._pack_cache.values()
469
470 def _update_pack_cache(self):
471 raise NotImplementedError(self._update_pack_cache)
472
473 def close(self):
474 self._clear_cached_packs()
475
476 @property
477 def packs(self):
478 """List with pack objects."""
479 return list(self._iter_cached_packs()) + list(self._update_pack_cache())
480
481 def _iter_alternate_objects(self):
482 """Iterate over the SHAs of all the objects in alternate stores."""
483 for alternate in self.alternates:
484 yield from alternate
485
486 def _iter_loose_objects(self):
487 """Iterate over the SHAs of all loose objects."""
488 raise NotImplementedError(self._iter_loose_objects)
489
490 def _get_loose_object(self, sha):
491 raise NotImplementedError(self._get_loose_object)
492
493 def _remove_loose_object(self, sha):
494 raise NotImplementedError(self._remove_loose_object)
495
496 def _remove_pack(self, name):
497 raise NotImplementedError(self._remove_pack)
498
499 def pack_loose_objects(self):
500 """Pack loose objects.
501
502 Returns: Number of objects packed
503 """
504 objects = set()
505 for sha in self._iter_loose_objects():
506 objects.add((self._get_loose_object(sha), None))
507 self.add_objects(list(objects))
508 for obj, path in objects:
509 self._remove_loose_object(obj.id)
510 return len(objects)
511
512 def repack(self):
513 """Repack the packs in this repository.
514
515 Note that this implementation is fairly naive and currently keeps all
516 objects in memory while it repacks.
517 """
518 loose_objects = set()
519 for sha in self._iter_loose_objects():
520 loose_objects.add(self._get_loose_object(sha))
521 objects = {(obj, None) for obj in loose_objects}
522 old_packs = {p.name(): p for p in self.packs}
523 for name, pack in old_packs.items():
524 objects.update((obj, None) for obj in pack.iterobjects())
525
526 # The name of the consolidated pack might match the name of a
527 # pre-existing pack. Take care not to remove the newly created
528 # consolidated pack.
529
530 consolidated = self.add_objects(objects)
531 old_packs.pop(consolidated.name(), None)
532
533 for obj in loose_objects:
534 self._remove_loose_object(obj.id)
535 for name, pack in old_packs.items():
536 self._remove_pack(pack)
537 self._update_pack_cache()
538 return len(objects)
539
540 def __iter__(self):
541 """Iterate over the SHAs that are present in this store."""
542 self._update_pack_cache()
543 for pack in self._iter_cached_packs():
544 try:
545 yield from pack
546 except PackFileDisappeared:
547 pass
548 yield from self._iter_loose_objects()
549 yield from self._iter_alternate_objects()
550
551 def contains_loose(self, sha):
552 """Check if a particular object is present by SHA1 and is loose.
553
554 This does not check alternates.
555 """
556 return self._get_loose_object(sha) is not None
557
558 def get_raw(self, name):
559 """Obtain the raw fulltext for an object.
560
561 Args:
562 name: sha for the object.
563 Returns: tuple with numeric type and object contents.
564 """
565 if name == ZERO_SHA:
566 raise KeyError(name)
567 if len(name) == 40:
568 sha = hex_to_sha(name)
569 hexsha = name
570 elif len(name) == 20:
571 sha = name
572 hexsha = None
573 else:
574 raise AssertionError(f"Invalid object name {name!r}")
575 for pack in self._iter_cached_packs():
576 try:
577 return pack.get_raw(sha)
578 except (KeyError, PackFileDisappeared):
579 pass
580 if hexsha is None:
581 hexsha = sha_to_hex(name)
582 ret = self._get_loose_object(hexsha)
583 if ret is not None:
584 return ret.type_num, ret.as_raw_string()
585 # Maybe something else has added a pack with the object
586 # in the mean time?
587 for pack in self._update_pack_cache():
588 try:
589 return pack.get_raw(sha)
590 except KeyError:
591 pass
592 for alternate in self.alternates:
593 try:
594 return alternate.get_raw(hexsha)
595 except KeyError:
596 pass
597 raise KeyError(hexsha)
598
599 def iter_unpacked_subset(
600 self,
601 shas,
602 *,
603 include_comp=False,
604 allow_missing: bool = False,
605 convert_ofs_delta: bool = True,
606 ) -> Iterator[ShaFile]:
607 todo: Set[bytes] = set(shas)
608 for p in self._iter_cached_packs():
609 for unpacked in p.iter_unpacked_subset(
610 todo,
611 include_comp=include_comp,
612 allow_missing=True,
613 convert_ofs_delta=convert_ofs_delta,
614 ):
615 yield unpacked
616 hexsha = sha_to_hex(unpacked.sha())
617 todo.remove(hexsha)
618 # Maybe something else has added a pack with the object
619 # in the mean time?
620 for p in self._update_pack_cache():
621 for unpacked in p.iter_unpacked_subset(
622 todo,
623 include_comp=include_comp,
624 allow_missing=True,
625 convert_ofs_delta=convert_ofs_delta,
626 ):
627 yield unpacked
628 hexsha = sha_to_hex(unpacked.sha())
629 todo.remove(hexsha)
630 for alternate in self.alternates:
631 for unpacked in alternate.iter_unpacked_subset(
632 todo,
633 include_comp=include_comp,
634 allow_missing=True,
635 convert_ofs_delta=convert_ofs_delta,
636 ):
637 yield unpacked
638 hexsha = sha_to_hex(unpacked.sha())
639 todo.remove(hexsha)
640
641 def iterobjects_subset(
642 self, shas: Iterable[bytes], *, allow_missing: bool = False
643 ) -> Iterator[ShaFile]:
644 todo: Set[bytes] = set(shas)
645 for p in self._iter_cached_packs():
646 for o in p.iterobjects_subset(todo, allow_missing=True):
647 yield o
648 todo.remove(o.id)
649 # Maybe something else has added a pack with the object
650 # in the mean time?
651 for p in self._update_pack_cache():
652 for o in p.iterobjects_subset(todo, allow_missing=True):
653 yield o
654 todo.remove(o.id)
655 for alternate in self.alternates:
656 for o in alternate.iterobjects_subset(todo, allow_missing=True):
657 yield o
658 todo.remove(o.id)
659 for oid in todo:
660 o = self._get_loose_object(oid)
661 if o is not None:
662 yield o
663 elif not allow_missing:
664 raise KeyError(oid)
665
666 def get_unpacked_object(
667 self, sha1: bytes, *, include_comp: bool = False
668 ) -> UnpackedObject:
669 """Obtain the unpacked object.
670
671 Args:
672 sha1: sha for the object.
673 """
674 if sha1 == ZERO_SHA:
675 raise KeyError(sha1)
676 if len(sha1) == 40:
677 sha = hex_to_sha(sha1)
678 hexsha = sha1
679 elif len(sha1) == 20:
680 sha = sha1
681 hexsha = None
682 else:
683 raise AssertionError(f"Invalid object sha1 {sha1!r}")
684 for pack in self._iter_cached_packs():
685 try:
686 return pack.get_unpacked_object(sha, include_comp=include_comp)
687 except (KeyError, PackFileDisappeared):
688 pass
689 if hexsha is None:
690 hexsha = sha_to_hex(sha1)
691 # Maybe something else has added a pack with the object
692 # in the mean time?
693 for pack in self._update_pack_cache():
694 try:
695 return pack.get_unpacked_object(sha, include_comp=include_comp)
696 except KeyError:
697 pass
698 for alternate in self.alternates:
699 try:
700 return alternate.get_unpacked_object(hexsha, include_comp=include_comp)
701 except KeyError:
702 pass
703 raise KeyError(hexsha)
704
705 def add_objects(
706 self,
707 objects: Sequence[Tuple[ShaFile, Optional[str]]],
708 progress: Optional[Callable[[str], None]] = None,
709 ) -> None:
710 """Add a set of objects to this object store.
711
712 Args:
713 objects: Iterable over (object, path) tuples, should support
714 __len__.
715 Returns: Pack object of the objects written.
716 """
717 count = len(objects)
718 record_iter = (full_unpacked_object(o) for (o, p) in objects)
719 return self.add_pack_data(count, record_iter, progress=progress)
720
721
722class DiskObjectStore(PackBasedObjectStore):
723 """Git-style object store that exists on disk."""
724
725 def __init__(
726 self, path, loose_compression_level=-1, pack_compression_level=-1
727 ) -> None:
728 """Open an object store.
729
730 Args:
731 path: Path of the object store.
732 loose_compression_level: zlib compression level for loose objects
733 pack_compression_level: zlib compression level for pack objects
734 """
735 super().__init__(pack_compression_level=pack_compression_level)
736 self.path = path
737 self.pack_dir = os.path.join(self.path, PACKDIR)
738 self._alternates = None
739 self.loose_compression_level = loose_compression_level
740 self.pack_compression_level = pack_compression_level
741
742 def __repr__(self) -> str:
743 return f"<{self.__class__.__name__}({self.path!r})>"
744
745 @classmethod
746 def from_config(cls, path, config):
747 try:
748 default_compression_level = int(
749 config.get((b"core",), b"compression").decode()
750 )
751 except KeyError:
752 default_compression_level = -1
753 try:
754 loose_compression_level = int(
755 config.get((b"core",), b"looseCompression").decode()
756 )
757 except KeyError:
758 loose_compression_level = default_compression_level
759 try:
760 pack_compression_level = int(
761 config.get((b"core",), "packCompression").decode()
762 )
763 except KeyError:
764 pack_compression_level = default_compression_level
765 return cls(path, loose_compression_level, pack_compression_level)
766
767 @property
768 def alternates(self):
769 if self._alternates is not None:
770 return self._alternates
771 self._alternates = []
772 for path in self._read_alternate_paths():
773 self._alternates.append(DiskObjectStore(path))
774 return self._alternates
775
776 def _read_alternate_paths(self):
777 try:
778 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb")
779 except FileNotFoundError:
780 return
781 with f:
782 for line in f.readlines():
783 line = line.rstrip(b"\n")
784 if line.startswith(b"#"):
785 continue
786 if os.path.isabs(line):
787 yield os.fsdecode(line)
788 else:
789 yield os.fsdecode(os.path.join(os.fsencode(self.path), line))
790
791 def add_alternate_path(self, path):
792 """Add an alternate path to this object store."""
793 try:
794 os.mkdir(os.path.join(self.path, INFODIR))
795 except FileExistsError:
796 pass
797 alternates_path = os.path.join(self.path, INFODIR, "alternates")
798 with GitFile(alternates_path, "wb") as f:
799 try:
800 orig_f = open(alternates_path, "rb")
801 except FileNotFoundError:
802 pass
803 else:
804 with orig_f:
805 f.write(orig_f.read())
806 f.write(os.fsencode(path) + b"\n")
807
808 if not os.path.isabs(path):
809 path = os.path.join(self.path, path)
810 self.alternates.append(DiskObjectStore(path))
811
812 def _update_pack_cache(self):
813 """Read and iterate over new pack files and cache them."""
814 try:
815 pack_dir_contents = os.listdir(self.pack_dir)
816 except FileNotFoundError:
817 self.close()
818 return []
819 pack_files = set()
820 for name in pack_dir_contents:
821 if name.startswith("pack-") and name.endswith(".pack"):
822 # verify that idx exists first (otherwise the pack was not yet
823 # fully written)
824 idx_name = os.path.splitext(name)[0] + ".idx"
825 if idx_name in pack_dir_contents:
826 pack_name = name[: -len(".pack")]
827 pack_files.add(pack_name)
828
829 # Open newly appeared pack files
830 new_packs = []
831 for f in pack_files:
832 if f not in self._pack_cache:
833 pack = Pack(os.path.join(self.pack_dir, f))
834 new_packs.append(pack)
835 self._pack_cache[f] = pack
836 # Remove disappeared pack files
837 for f in set(self._pack_cache) - pack_files:
838 self._pack_cache.pop(f).close()
839 return new_packs
840
841 def _get_shafile_path(self, sha):
842 # Check from object dir
843 return hex_to_filename(self.path, sha)
844
845 def _iter_loose_objects(self):
846 for base in os.listdir(self.path):
847 if len(base) != 2:
848 continue
849 for rest in os.listdir(os.path.join(self.path, base)):
850 sha = os.fsencode(base + rest)
851 if not valid_hexsha(sha):
852 continue
853 yield sha
854
855 def _get_loose_object(self, sha):
856 path = self._get_shafile_path(sha)
857 try:
858 return ShaFile.from_path(path)
859 except FileNotFoundError:
860 return None
861
862 def _remove_loose_object(self, sha):
863 os.remove(self._get_shafile_path(sha))
864
865 def _remove_pack(self, pack):
866 try:
867 del self._pack_cache[os.path.basename(pack._basename)]
868 except KeyError:
869 pass
870 pack.close()
871 os.remove(pack.data.path)
872 os.remove(pack.index.path)
873
874 def _get_pack_basepath(self, entries):
875 suffix = iter_sha1(entry[0] for entry in entries)
876 # TODO: Handle self.pack_dir being bytes
877 suffix = suffix.decode("ascii")
878 return os.path.join(self.pack_dir, "pack-" + suffix)
879
880 def _complete_pack(self, f, path, num_objects, indexer, progress=None):
881 """Move a specific file containing a pack into the pack directory.
882
883 Note: The file should be on the same file system as the
884 packs directory.
885
886 Args:
887 f: Open file object for the pack.
888 path: Path to the pack file.
889 indexer: A PackIndexer for indexing the pack.
890 """
891 entries = []
892 for i, entry in enumerate(indexer):
893 if progress is not None:
894 progress(
895 ("generating index: %d/%d\r" % (i, num_objects)).encode("ascii")
896 )
897 entries.append(entry)
898
899 pack_sha, extra_entries = extend_pack(
900 f,
901 indexer.ext_refs(),
902 get_raw=self.get_raw,
903 compression_level=self.pack_compression_level,
904 progress=progress,
905 )
906 f.flush()
907 try:
908 fileno = f.fileno()
909 except AttributeError:
910 pass
911 else:
912 os.fsync(fileno)
913 f.close()
914
915 entries.extend(extra_entries)
916
917 # Move the pack in.
918 entries.sort()
919 pack_base_name = self._get_pack_basepath(entries)
920
921 for pack in self.packs:
922 if pack._basename == pack_base_name:
923 return pack
924
925 target_pack_path = pack_base_name + ".pack"
926 target_index_path = pack_base_name + ".idx"
927 if sys.platform == "win32":
928 # Windows might have the target pack file lingering. Attempt
929 # removal, silently passing if the target does not exist.
930 with suppress(FileNotFoundError):
931 os.remove(target_pack_path)
932 os.rename(path, target_pack_path)
933
934 # Write the index.
935 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file:
936 write_pack_index(index_file, entries, pack_sha)
937
938 # Add the pack to the store and return it.
939 final_pack = Pack(pack_base_name)
940 final_pack.check_length_and_checksum()
941 self._add_cached_pack(pack_base_name, final_pack)
942 return final_pack
943
944 def add_thin_pack(self, read_all, read_some, progress=None):
945 """Add a new thin pack to this object store.
946
947 Thin packs are packs that contain deltas with parents that exist
948 outside the pack. They should never be placed in the object store
949 directly, and always indexed and completed as they are copied.
950
951 Args:
952 read_all: Read function that blocks until the number of
953 requested bytes are read.
954 read_some: Read function that returns at least one byte, but may
955 not return the number of bytes requested.
956 Returns: A Pack object pointing at the now-completed thin pack in the
957 objects/pack directory.
958 """
959 import tempfile
960
961 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_")
962 with os.fdopen(fd, "w+b") as f:
963 os.chmod(path, PACK_MODE)
964 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)
965 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer)
966 copier.verify(progress=progress)
967 return self._complete_pack(f, path, len(copier), indexer, progress=progress)
968
969 def add_pack(self):
970 """Add a new pack to this object store.
971
972 Returns: Fileobject to write to, a commit function to
973 call when the pack is finished and an abort
974 function.
975 """
976 import tempfile
977
978 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")
979 f = os.fdopen(fd, "w+b")
980 os.chmod(path, PACK_MODE)
981
982 def commit():
983 if f.tell() > 0:
984 f.seek(0)
985 with PackData(path, f) as pd:
986 indexer = PackIndexer.for_pack_data(
987 pd, resolve_ext_ref=self.get_raw
988 )
989 return self._complete_pack(f, path, len(pd), indexer)
990 else:
991 f.close()
992 os.remove(path)
993 return None
994
995 def abort():
996 f.close()
997 os.remove(path)
998
999 return f, commit, abort
1000
1001 def add_object(self, obj):
1002 """Add a single object to this object store.
1003
1004 Args:
1005 obj: Object to add
1006 """
1007 path = self._get_shafile_path(obj.id)
1008 dir = os.path.dirname(path)
1009 try:
1010 os.mkdir(dir)
1011 except FileExistsError:
1012 pass
1013 if os.path.exists(path):
1014 return # Already there, no need to write again
1015 with GitFile(path, "wb", mask=PACK_MODE) as f:
1016 f.write(
1017 obj.as_legacy_object(compression_level=self.loose_compression_level)
1018 )
1019
1020 @classmethod
1021 def init(cls, path):
1022 try:
1023 os.mkdir(path)
1024 except FileExistsError:
1025 pass
1026 os.mkdir(os.path.join(path, "info"))
1027 os.mkdir(os.path.join(path, PACKDIR))
1028 return cls(path)
1029
1030
1031class MemoryObjectStore(BaseObjectStore):
1032 """Object store that keeps all objects in memory."""
1033
1034 def __init__(self) -> None:
1035 super().__init__()
1036 self._data: Dict[str, ShaFile] = {}
1037 self.pack_compression_level = -1
1038
1039 def _to_hexsha(self, sha):
1040 if len(sha) == 40:
1041 return sha
1042 elif len(sha) == 20:
1043 return sha_to_hex(sha)
1044 else:
1045 raise ValueError(f"Invalid sha {sha!r}")
1046
1047 def contains_loose(self, sha):
1048 """Check if a particular object is present by SHA1 and is loose."""
1049 return self._to_hexsha(sha) in self._data
1050
1051 def contains_packed(self, sha):
1052 """Check if a particular object is present by SHA1 and is packed."""
1053 return False
1054
1055 def __iter__(self):
1056 """Iterate over the SHAs that are present in this store."""
1057 return iter(self._data.keys())
1058
1059 @property
1060 def packs(self):
1061 """List with pack objects."""
1062 return []
1063
1064 def get_raw(self, name: ObjectID):
1065 """Obtain the raw text for an object.
1066
1067 Args:
1068 name: sha for the object.
1069 Returns: tuple with numeric type and object contents.
1070 """
1071 obj = self[self._to_hexsha(name)]
1072 return obj.type_num, obj.as_raw_string()
1073
1074 def __getitem__(self, name: ObjectID):
1075 return self._data[self._to_hexsha(name)].copy()
1076
1077 def __delitem__(self, name: ObjectID) -> None:
1078 """Delete an object from this store, for testing only."""
1079 del self._data[self._to_hexsha(name)]
1080
1081 def add_object(self, obj):
1082 """Add a single object to this object store."""
1083 self._data[obj.id] = obj.copy()
1084
1085 def add_objects(self, objects, progress=None):
1086 """Add a set of objects to this object store.
1087
1088 Args:
1089 objects: Iterable over a list of (object, path) tuples
1090 """
1091 for obj, path in objects:
1092 self.add_object(obj)
1093
1094 def add_pack(self):
1095 """Add a new pack to this object store.
1096
1097 Because this object store doesn't support packs, we extract and add the
1098 individual objects.
1099
1100 Returns: Fileobject to write to and a commit function to
1101 call when the pack is finished.
1102 """
1103 from tempfile import SpooledTemporaryFile
1104
1105 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-")
1106
1107 def commit():
1108 size = f.tell()
1109 if size > 0:
1110 f.seek(0)
1111 p = PackData.from_file(f, size)
1112 for obj in PackInflater.for_pack_data(p, self.get_raw):
1113 self.add_object(obj)
1114 p.close()
1115 else:
1116 f.close()
1117
1118 def abort():
1119 f.close()
1120
1121 return f, commit, abort
1122
1123 def add_pack_data(
1124 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None
1125 ) -> None:
1126 """Add pack data to this object store.
1127
1128 Args:
1129 count: Number of items to add
1130 pack_data: Iterator over pack data tuples
1131 """
1132 for unpacked_object in unpacked_objects:
1133 self.add_object(unpacked_object.sha_file())
1134
1135 def add_thin_pack(self, read_all, read_some, progress=None):
1136 """Add a new thin pack to this object store.
1137
1138 Thin packs are packs that contain deltas with parents that exist
1139 outside the pack. Because this object store doesn't support packs, we
1140 extract and add the individual objects.
1141
1142 Args:
1143 read_all: Read function that blocks until the number of
1144 requested bytes are read.
1145 read_some: Read function that returns at least one byte, but may
1146 not return the number of bytes requested.
1147 """
1148 f, commit, abort = self.add_pack()
1149 try:
1150 copier = PackStreamCopier(read_all, read_some, f)
1151 copier.verify()
1152 except BaseException:
1153 abort()
1154 raise
1155 else:
1156 commit()
1157
1158
1159class ObjectIterator(Protocol):
1160 """Interface for iterating over objects."""
1161
1162 def iterobjects(self) -> Iterator[ShaFile]:
1163 raise NotImplementedError(self.iterobjects)
1164
1165
1166def tree_lookup_path(lookup_obj, root_sha, path):
1167 """Look up an object in a Git tree.
1168
1169 Args:
1170 lookup_obj: Callback for retrieving object by SHA1
1171 root_sha: SHA1 of the root tree
1172 path: Path to lookup
1173 Returns: A tuple of (mode, SHA) of the resulting path.
1174 """
1175 tree = lookup_obj(root_sha)
1176 if not isinstance(tree, Tree):
1177 raise NotTreeError(root_sha)
1178 return tree.lookup_path(lookup_obj, path)
1179
1180
1181def _collect_filetree_revs(
1182 obj_store: ObjectContainer, tree_sha: ObjectID, kset: Set[ObjectID]
1183) -> None:
1184 """Collect SHA1s of files and directories for specified tree.
1185
1186 Args:
1187 obj_store: Object store to get objects by SHA from
1188 tree_sha: tree reference to walk
1189 kset: set to fill with references to files and directories
1190 """
1191 filetree = obj_store[tree_sha]
1192 assert isinstance(filetree, Tree)
1193 for name, mode, sha in filetree.iteritems():
1194 if not S_ISGITLINK(mode) and sha not in kset:
1195 kset.add(sha)
1196 if stat.S_ISDIR(mode):
1197 _collect_filetree_revs(obj_store, sha, kset)
1198
1199
1200def _split_commits_and_tags(
1201 obj_store: ObjectContainer, lst, *, ignore_unknown=False
1202) -> Tuple[Set[bytes], Set[bytes], Set[bytes]]:
1203 """Split object id list into three lists with commit, tag, and other SHAs.
1204
1205 Commits referenced by tags are included into commits
1206 list as well. Only SHA1s known in this repository will get
1207 through, and unless ignore_unknown argument is True, KeyError
1208 is thrown for SHA1 missing in the repository
1209
1210 Args:
1211 obj_store: Object store to get objects by SHA1 from
1212 lst: Collection of commit and tag SHAs
1213 ignore_unknown: True to skip SHA1 missing in the repository
1214 silently.
1215 Returns: A tuple of (commits, tags, others) SHA1s
1216 """
1217 commits: Set[bytes] = set()
1218 tags: Set[bytes] = set()
1219 others: Set[bytes] = set()
1220 for e in lst:
1221 try:
1222 o = obj_store[e]
1223 except KeyError:
1224 if not ignore_unknown:
1225 raise
1226 else:
1227 if isinstance(o, Commit):
1228 commits.add(e)
1229 elif isinstance(o, Tag):
1230 tags.add(e)
1231 tagged = o.object[1]
1232 c, t, os = _split_commits_and_tags(
1233 obj_store, [tagged], ignore_unknown=ignore_unknown
1234 )
1235 commits |= c
1236 tags |= t
1237 others |= os
1238 else:
1239 others.add(e)
1240 return (commits, tags, others)
1241
1242
1243class MissingObjectFinder:
1244 """Find the objects missing from another object store.
1245
1246 Args:
1247 object_store: Object store containing at least all objects to be
1248 sent
1249 haves: SHA1s of commits not to send (already present in target)
1250 wants: SHA1s of commits to send
1251 progress: Optional function to report progress to.
1252 get_tagged: Function that returns a dict of pointed-to sha -> tag
1253 sha for including tags.
1254 get_parents: Optional function for getting the parents of a commit.
1255 tagged: dict of pointed-to sha -> tag sha for including tags
1256 """
1257
1258 def __init__(
1259 self,
1260 object_store,
1261 haves,
1262 wants,
1263 *,
1264 shallow=None,
1265 progress=None,
1266 get_tagged=None,
1267 get_parents=lambda commit: commit.parents,
1268 ) -> None:
1269 self.object_store = object_store
1270 if shallow is None:
1271 shallow = set()
1272 self._get_parents = get_parents
1273 # process Commits and Tags differently
1274 # Note, while haves may list commits/tags not available locally,
1275 # and such SHAs would get filtered out by _split_commits_and_tags,
1276 # wants shall list only known SHAs, and otherwise
1277 # _split_commits_and_tags fails with KeyError
1278 have_commits, have_tags, have_others = _split_commits_and_tags(
1279 object_store, haves, ignore_unknown=True
1280 )
1281 want_commits, want_tags, want_others = _split_commits_and_tags(
1282 object_store, wants, ignore_unknown=False
1283 )
1284 # all_ancestors is a set of commits that shall not be sent
1285 # (complete repository up to 'haves')
1286 all_ancestors = _collect_ancestors(
1287 object_store, have_commits, shallow=shallow, get_parents=self._get_parents
1288 )[0]
1289 # all_missing - complete set of commits between haves and wants
1290 # common - commits from all_ancestors we hit into while
1291 # traversing parent hierarchy of wants
1292 missing_commits, common_commits = _collect_ancestors(
1293 object_store,
1294 want_commits,
1295 all_ancestors,
1296 shallow=shallow,
1297 get_parents=self._get_parents,
1298 )
1299 self.remote_has: Set[bytes] = set()
1300 # Now, fill sha_done with commits and revisions of
1301 # files and directories known to be both locally
1302 # and on target. Thus these commits and files
1303 # won't get selected for fetch
1304 for h in common_commits:
1305 self.remote_has.add(h)
1306 cmt = object_store[h]
1307 _collect_filetree_revs(object_store, cmt.tree, self.remote_has)
1308 # record tags we have as visited, too
1309 for t in have_tags:
1310 self.remote_has.add(t)
1311 self.sha_done = set(self.remote_has)
1312
1313 # in fact, what we 'want' is commits, tags, and others
1314 # we've found missing
1315 self.objects_to_send: Set[
1316 Tuple[ObjectID, Optional[bytes], Optional[int], bool]
1317 ] = {(w, None, Commit.type_num, False) for w in missing_commits}
1318 missing_tags = want_tags.difference(have_tags)
1319 self.objects_to_send.update(
1320 {(w, None, Tag.type_num, False) for w in missing_tags}
1321 )
1322 missing_others = want_others.difference(have_others)
1323 self.objects_to_send.update({(w, None, None, False) for w in missing_others})
1324
1325 if progress is None:
1326 self.progress = lambda x: None
1327 else:
1328 self.progress = progress
1329 self._tagged = get_tagged and get_tagged() or {}
1330
1331 def get_remote_has(self):
1332 return self.remote_has
1333
1334 def add_todo(
1335 self, entries: Iterable[Tuple[ObjectID, Optional[bytes], Optional[int], bool]]
1336 ):
1337 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done])
1338
1339 def __next__(self) -> Tuple[bytes, Optional[PackHint]]:
1340 while True:
1341 if not self.objects_to_send:
1342 self.progress(
1343 ("counting objects: %d, done.\n" % len(self.sha_done)).encode(
1344 "ascii"
1345 )
1346 )
1347 raise StopIteration
1348 (sha, name, type_num, leaf) = self.objects_to_send.pop()
1349 if sha not in self.sha_done:
1350 break
1351 if not leaf:
1352 o = self.object_store[sha]
1353 if isinstance(o, Commit):
1354 self.add_todo([(o.tree, b"", Tree.type_num, False)])
1355 elif isinstance(o, Tree):
1356 self.add_todo(
1357 [
1358 (
1359 s,
1360 n,
1361 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num),
1362 not stat.S_ISDIR(m),
1363 )
1364 for n, m, s in o.iteritems()
1365 if not S_ISGITLINK(m)
1366 ]
1367 )
1368 elif isinstance(o, Tag):
1369 self.add_todo([(o.object[1], None, o.object[0].type_num, False)])
1370 if sha in self._tagged:
1371 self.add_todo([(self._tagged[sha], None, None, True)])
1372 self.sha_done.add(sha)
1373 if len(self.sha_done) % 1000 == 0:
1374 self.progress(
1375 ("counting objects: %d\r" % len(self.sha_done)).encode("ascii")
1376 )
1377 if type_num is None:
1378 pack_hint = None
1379 else:
1380 pack_hint = (type_num, name)
1381 return (sha, pack_hint)
1382
1383 def __iter__(self):
1384 return self
1385
1386
1387class ObjectStoreGraphWalker:
1388 """Graph walker that finds what commits are missing from an object store.
1389
1390 Attributes:
1391 heads: Revisions without descendants in the local repo
1392 get_parents: Function to retrieve parents in the local repo
1393 """
1394
1395 def __init__(self, local_heads, get_parents, shallow=None) -> None:
1396 """Create a new instance.
1397
1398 Args:
1399 local_heads: Heads to start search with
1400 get_parents: Function for finding the parents of a SHA1.
1401 """
1402 self.heads = set(local_heads)
1403 self.get_parents = get_parents
1404 self.parents: Dict[ObjectID, Optional[List[ObjectID]]] = {}
1405 if shallow is None:
1406 shallow = set()
1407 self.shallow = shallow
1408
1409 def nak(self):
1410 """Nothing in common was found."""
1411
1412 def ack(self, sha):
1413 """Ack that a revision and its ancestors are present in the source."""
1414 if len(sha) != 40:
1415 raise ValueError(f"unexpected sha {sha!r} received")
1416 ancestors = {sha}
1417
1418 # stop if we run out of heads to remove
1419 while self.heads:
1420 for a in ancestors:
1421 if a in self.heads:
1422 self.heads.remove(a)
1423
1424 # collect all ancestors
1425 new_ancestors = set()
1426 for a in ancestors:
1427 ps = self.parents.get(a)
1428 if ps is not None:
1429 new_ancestors.update(ps)
1430 self.parents[a] = None
1431
1432 # no more ancestors; stop
1433 if not new_ancestors:
1434 break
1435
1436 ancestors = new_ancestors
1437
1438 def next(self):
1439 """Iterate over ancestors of heads in the target."""
1440 if self.heads:
1441 ret = self.heads.pop()
1442 try:
1443 ps = self.get_parents(ret)
1444 except KeyError:
1445 return None
1446 self.parents[ret] = ps
1447 self.heads.update([p for p in ps if p not in self.parents])
1448 return ret
1449 return None
1450
1451 __next__ = next
1452
1453
1454def commit_tree_changes(object_store, tree, changes):
1455 """Commit a specified set of changes to a tree structure.
1456
1457 This will apply a set of changes on top of an existing tree, storing new
1458 objects in object_store.
1459
1460 changes are a list of tuples with (path, mode, object_sha).
1461 Paths can be both blobs and trees. See the mode and
1462 object sha to None deletes the path.
1463
1464 This method works especially well if there are only a small
1465 number of changes to a big tree. For a large number of changes
1466 to a large tree, use e.g. commit_tree.
1467
1468 Args:
1469 object_store: Object store to store new objects in
1470 and retrieve old ones from.
1471 tree: Original tree root
1472 changes: changes to apply
1473 Returns: New tree root object
1474 """
1475 # TODO(jelmer): Save up the objects and add them using .add_objects
1476 # rather than with individual calls to .add_object.
1477 nested_changes = {}
1478 for path, new_mode, new_sha in changes:
1479 try:
1480 (dirname, subpath) = path.split(b"/", 1)
1481 except ValueError:
1482 if new_sha is None:
1483 del tree[path]
1484 else:
1485 tree[path] = (new_mode, new_sha)
1486 else:
1487 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha))
1488 for name, subchanges in nested_changes.items():
1489 try:
1490 orig_subtree = object_store[tree[name][1]]
1491 except KeyError:
1492 orig_subtree = Tree()
1493 subtree = commit_tree_changes(object_store, orig_subtree, subchanges)
1494 if len(subtree) == 0:
1495 del tree[name]
1496 else:
1497 tree[name] = (stat.S_IFDIR, subtree.id)
1498 object_store.add_object(tree)
1499 return tree
1500
1501
1502class OverlayObjectStore(BaseObjectStore):
1503 """Object store that can overlay multiple object stores."""
1504
1505 def __init__(self, bases, add_store=None) -> None:
1506 self.bases = bases
1507 self.add_store = add_store
1508
1509 def add_object(self, object):
1510 if self.add_store is None:
1511 raise NotImplementedError(self.add_object)
1512 return self.add_store.add_object(object)
1513
1514 def add_objects(self, objects, progress=None):
1515 if self.add_store is None:
1516 raise NotImplementedError(self.add_object)
1517 return self.add_store.add_objects(objects, progress)
1518
1519 @property
1520 def packs(self):
1521 ret = []
1522 for b in self.bases:
1523 ret.extend(b.packs)
1524 return ret
1525
1526 def __iter__(self):
1527 done = set()
1528 for b in self.bases:
1529 for o_id in b:
1530 if o_id not in done:
1531 yield o_id
1532 done.add(o_id)
1533
1534 def iterobjects_subset(
1535 self, shas: Iterable[bytes], *, allow_missing: bool = False
1536 ) -> Iterator[ShaFile]:
1537 todo = set(shas)
1538 for b in self.bases:
1539 for o in b.iterobjects_subset(todo, allow_missing=True):
1540 yield o
1541 todo.remove(o.id)
1542 if todo and not allow_missing:
1543 raise KeyError(o.id)
1544
1545 def iter_unpacked_subset(
1546 self,
1547 shas: Iterable[bytes],
1548 *,
1549 include_comp=False,
1550 allow_missing: bool = False,
1551 convert_ofs_delta=True,
1552 ) -> Iterator[ShaFile]:
1553 todo = set(shas)
1554 for b in self.bases:
1555 for o in b.iter_unpacked_subset(
1556 todo,
1557 include_comp=include_comp,
1558 allow_missing=True,
1559 convert_ofs_delta=convert_ofs_delta,
1560 ):
1561 yield o
1562 todo.remove(o.id)
1563 if todo and not allow_missing:
1564 raise KeyError(o.id)
1565
1566 def get_raw(self, sha_id):
1567 for b in self.bases:
1568 try:
1569 return b.get_raw(sha_id)
1570 except KeyError:
1571 pass
1572 raise KeyError(sha_id)
1573
1574 def contains_packed(self, sha):
1575 for b in self.bases:
1576 if b.contains_packed(sha):
1577 return True
1578 return False
1579
1580 def contains_loose(self, sha):
1581 for b in self.bases:
1582 if b.contains_loose(sha):
1583 return True
1584 return False
1585
1586
1587def read_packs_file(f):
1588 """Yield the packs listed in a packs file."""
1589 for line in f.read().splitlines():
1590 if not line:
1591 continue
1592 (kind, name) = line.split(b" ", 1)
1593 if kind != b"P":
1594 continue
1595 yield os.fsdecode(name)
1596
1597
1598class BucketBasedObjectStore(PackBasedObjectStore):
1599 """Object store implementation that uses a bucket store like S3 as backend."""
1600
1601 def _iter_loose_objects(self):
1602 """Iterate over the SHAs of all loose objects."""
1603 return iter([])
1604
1605 def _get_loose_object(self, sha):
1606 return None
1607
1608 def _remove_loose_object(self, sha):
1609 # Doesn't exist..
1610 pass
1611
1612 def _remove_pack(self, name):
1613 raise NotImplementedError(self._remove_pack)
1614
1615 def _iter_pack_names(self):
1616 raise NotImplementedError(self._iter_pack_names)
1617
1618 def _get_pack(self, name):
1619 raise NotImplementedError(self._get_pack)
1620
1621 def _update_pack_cache(self):
1622 pack_files = set(self._iter_pack_names())
1623
1624 # Open newly appeared pack files
1625 new_packs = []
1626 for f in pack_files:
1627 if f not in self._pack_cache:
1628 pack = self._get_pack(f)
1629 new_packs.append(pack)
1630 self._pack_cache[f] = pack
1631 # Remove disappeared pack files
1632 for f in set(self._pack_cache) - pack_files:
1633 self._pack_cache.pop(f).close()
1634 return new_packs
1635
1636 def _upload_pack(self, basename, pack_file, index_file):
1637 raise NotImplementedError
1638
1639 def add_pack(self):
1640 """Add a new pack to this object store.
1641
1642 Returns: Fileobject to write to, a commit function to
1643 call when the pack is finished and an abort
1644 function.
1645 """
1646 import tempfile
1647
1648 pf = tempfile.SpooledTemporaryFile(
1649 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
1650 )
1651
1652 def commit():
1653 if pf.tell() == 0:
1654 pf.close()
1655 return None
1656
1657 pf.seek(0)
1658 p = PackData(pf.name, pf)
1659 entries = p.sorted_entries()
1660 basename = iter_sha1(entry[0] for entry in entries).decode("ascii")
1661 idxf = tempfile.SpooledTemporaryFile(
1662 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"
1663 )
1664 checksum = p.get_stored_checksum()
1665 write_pack_index(idxf, entries, checksum)
1666 idxf.seek(0)
1667 idx = load_pack_index_file(basename + ".idx", idxf)
1668 for pack in self.packs:
1669 if pack.get_stored_checksum() == p.get_stored_checksum():
1670 p.close()
1671 idx.close()
1672 return pack
1673 pf.seek(0)
1674 idxf.seek(0)
1675 self._upload_pack(basename, pf, idxf)
1676 final_pack = Pack.from_objects(p, idx)
1677 self._add_cached_pack(basename, final_pack)
1678 return final_pack
1679
1680 return pf, commit, pf.close
1681
1682
1683def _collect_ancestors(
1684 store: ObjectContainer,
1685 heads,
1686 common: FrozenSet[ObjectID] = frozenset(),
1687 shallow: FrozenSet[ObjectID] = frozenset(),
1688 get_parents=lambda commit: commit.parents,
1689):
1690 """Collect all ancestors of heads up to (excluding) those in common.
1691
1692 Args:
1693 heads: commits to start from
1694 common: commits to end at, or empty set to walk repository
1695 completely
1696 get_parents: Optional function for getting the parents of a
1697 commit.
1698 Returns: a tuple (A, B) where A - all commits reachable
1699 from heads but not present in common, B - common (shared) elements
1700 that are directly reachable from heads
1701 """
1702 bases = set()
1703 commits = set()
1704 queue = []
1705 queue.extend(heads)
1706 while queue:
1707 e = queue.pop(0)
1708 if e in common:
1709 bases.add(e)
1710 elif e not in commits:
1711 commits.add(e)
1712 if e in shallow:
1713 continue
1714 cmt = store[e]
1715 queue.extend(get_parents(cmt))
1716 return (commits, bases)
1717
1718
1719def iter_tree_contents(
1720 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False
1721):
1722 """Iterate the contents of a tree and all subtrees.
1723
1724 Iteration is depth-first pre-order, as in e.g. os.walk.
1725
1726 Args:
1727 tree_id: SHA1 of the tree.
1728 include_trees: If True, include tree objects in the iteration.
1729 Returns: Iterator over TreeEntry namedtuples for all the objects in a
1730 tree.
1731 """
1732 if tree_id is None:
1733 return
1734 # This could be fairly easily generalized to >2 trees if we find a use
1735 # case.
1736 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]
1737 while todo:
1738 entry = todo.pop()
1739 if stat.S_ISDIR(entry.mode):
1740 extra = []
1741 tree = store[entry.sha]
1742 assert isinstance(tree, Tree)
1743 for subentry in tree.iteritems(name_order=True):
1744 extra.append(subentry.in_path(entry.path))
1745 todo.extend(reversed(extra))
1746 if not stat.S_ISDIR(entry.mode) or include_trees:
1747 yield entry
1748
1749
1750def peel_sha(store: ObjectContainer, sha: bytes) -> Tuple[ShaFile, ShaFile]:
1751 """Peel all tags from a SHA.
1752
1753 Args:
1754 sha: The object SHA to peel.
1755 Returns: The fully-peeled SHA1 of a tag object, after peeling all
1756 intermediate tags; if the original ref does not point to a tag,
1757 this will equal the original SHA1.
1758 """
1759 unpeeled = obj = store[sha]
1760 obj_class = object_class(obj.type_name)
1761 while obj_class is Tag:
1762 assert isinstance(obj, Tag)
1763 obj_class, sha = obj.object
1764 obj = store[sha]
1765 return unpeeled, obj