Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object

1# object_store.py -- Object store for git objects

3# and others

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU

7# General Public License as published by the Free Software Foundation; version 2.0

8# or (at your option) any later version. You can redistribute it and/or

9# modify it under the terms of either of these two licenses.

10#

11# Unless required by applicable law or agreed to in writing, software

12# distributed under the License is distributed on an "AS IS" BASIS,

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14# See the License for the specific language governing permissions and

15# limitations under the License.

16#

17# You should have received a copy of the licenses; if not, see

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache

20# License, Version 2.0.

21#

24"""Git object store interfaces and implementation."""

26import binascii

27import os

28import stat

29import sys

30import time

31import warnings

32from collections.abc import Iterable, Iterator, Sequence

33from contextlib import suppress

34from io import BytesIO

35from typing import (

36 Callable,

37 Optional,

38 Protocol,

39 Union,

40 cast,

41)

43from .errors import NotTreeError

44from .file import GitFile, _GitFile

45from .objects import (

46 S_ISGITLINK,

47 ZERO_SHA,

48 Blob,

49 Commit,

50 ObjectID,

51 ShaFile,

52 Tag,

53 Tree,

54 TreeEntry,

55 hex_to_filename,

56 hex_to_sha,

57 object_class,

58 sha_to_hex,

59 valid_hexsha,

60)

61from .pack import (

62 PACK_SPOOL_FILE_MAX_SIZE,

63 ObjectContainer,

64 Pack,

65 PackData,

66 PackedObjectContainer,

67 PackFileDisappeared,

68 PackHint,

69 PackIndexer,

70 PackInflater,

71 PackStreamCopier,

72 UnpackedObject,

73 extend_pack,

74 full_unpacked_object,

75 generate_unpacked_objects,

76 iter_sha1,

77 load_pack_index_file,

78 pack_objects_to_data,

79 write_pack_data,

80 write_pack_index,

81)

82from .protocol import DEPTH_INFINITE

83from .refs import PEELED_TAG_SUFFIX, Ref

85INFODIR = "info"

86PACKDIR = "pack"

88# use permissions consistent with Git; just readable by everyone

89# TODO: should packs also be non-writable on Windows? if so, that

90# would requite some rather significant adjustments to the test suite

91PACK_MODE = 0o444 if sys.platform != "win32" else 0o644

93# Grace period for cleaning up temporary pack files (in seconds)

94# Matches git's default of 2 weeks

95DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks

98def find_shallow(store, heads, depth):

99 """Find shallow commits according to a given depth.

100

101 Args:

102 store: An ObjectStore for looking up objects.

103 heads: Iterable of head SHAs to start walking from.

104 depth: The depth of ancestors to include. A depth of one includes

105 only the heads themselves.

106 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be

107 considered shallow and unshallow according to the arguments. Note that

108 these sets may overlap if a commit is reachable along multiple paths.

109 """

110 parents = {}

111 commit_graph = store.get_commit_graph()

112

113 def get_parents(sha):

114 result = parents.get(sha, None)

115 if not result:

116 # Try to use commit graph first if available

117 if commit_graph:

118 graph_parents = commit_graph.get_parents(sha)

119 if graph_parents is not None:

120 result = graph_parents

121 parents[sha] = result

122 return result

123 # Fall back to loading the object

124 result = store[sha].parents

125 parents[sha] = result

126 return result

127

128 todo = [] # stack of (sha, depth)

129 for head_sha in heads:

130 obj = store[head_sha]

131 # Peel tags if necessary

132 while isinstance(obj, Tag):

133 _, sha = obj.object

134 obj = store[sha]

135 if isinstance(obj, Commit):

136 todo.append((obj.id, 1))

137

138 not_shallow = set()

139 shallow = set()

140 while todo:

141 sha, cur_depth = todo.pop()

142 if cur_depth < depth:

143 not_shallow.add(sha)

144 new_depth = cur_depth + 1

145 todo.extend((p, new_depth) for p in get_parents(sha))

146 else:

147 shallow.add(sha)

148

149 return shallow, not_shallow

150

151

152def get_depth(

153 store,

154 head,

155 get_parents=lambda commit: commit.parents,

156 max_depth=None,

157):

158 """Return the current available depth for the given head.

159 For commits with multiple parents, the largest possible depth will be

160 returned.

161

162 Args:

163 head: commit to start from

164 get_parents: optional function for getting the parents of a commit

165 max_depth: maximum depth to search

166 """

167 if head not in store:

168 return 0

169 current_depth = 1

170 queue = [(head, current_depth)]

171 commit_graph = store.get_commit_graph()

172

173 while queue and (max_depth is None or current_depth < max_depth):

174 e, depth = queue.pop(0)

175 current_depth = max(current_depth, depth)

176

177 # Try to use commit graph for parent lookup if available

178 parents = None

179 if commit_graph:

180 parents = commit_graph.get_parents(e)

181

182 if parents is None:

183 # Fall back to loading the object

184 cmt = store[e]

185 if isinstance(cmt, Tag):

186 _cls, sha = cmt.object

187 cmt = store[sha]

188 parents = get_parents(cmt)

189

190 queue.extend((parent, depth + 1) for parent in parents if parent in store)

191 return current_depth

192

193

194class PackContainer(Protocol):

195 """Protocol for containers that can accept pack files."""

196

197 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:

198 """Add a new pack."""

199

200

201class BaseObjectStore:

202 """Object store interface."""

203

204 def determine_wants_all(

205 self, refs: dict[Ref, ObjectID], depth: Optional[int] = None

206 ) -> list[ObjectID]:

207 """Determine all objects that are wanted by the client.

208

209 Args:

210 refs: Dictionary mapping ref names to object IDs

211 depth: Shallow fetch depth (None for full fetch)

212

213 Returns:

214 List of object IDs that are wanted

215 """

216

217 def _want_deepen(sha):

218 if not depth:

219 return False

220 if depth == DEPTH_INFINITE:

221 return True

222 return depth > self._get_depth(sha)

223

224 return [

225 sha

226 for (ref, sha) in refs.items()

227 if (sha not in self or _want_deepen(sha))

228 and not ref.endswith(PEELED_TAG_SUFFIX)

229 and not sha == ZERO_SHA

230 ]

231

232 def contains_loose(self, sha) -> bool:

233 """Check if a particular object is present by SHA1 and is loose."""

234 raise NotImplementedError(self.contains_loose)

235

236 def __contains__(self, sha1: bytes) -> bool:

237 """Check if a particular object is present by SHA1.

238

239 This method makes no distinction between loose and packed objects.

240 """

241 return self.contains_loose(sha1)

242

243 @property

244 def packs(self):

245 """Iterable of pack objects."""

246 raise NotImplementedError

247

248 def get_raw(self, name) -> tuple[int, bytes]:

249 """Obtain the raw text for an object.

250

251 Args:

252 name: sha for the object.

253 Returns: tuple with numeric type and object contents.

254 """

255 raise NotImplementedError(self.get_raw)

256

257 def __getitem__(self, sha1: ObjectID) -> ShaFile:

258 """Obtain an object by SHA1."""

259 type_num, uncomp = self.get_raw(sha1)

260 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1)

261

262 def __iter__(self):

263 """Iterate over the SHAs that are present in this store."""

264 raise NotImplementedError(self.__iter__)

265

266 def add_object(self, obj) -> None:

267 """Add a single object to this object store."""

268 raise NotImplementedError(self.add_object)

269

270 def add_objects(self, objects, progress=None) -> None:

271 """Add a set of objects to this object store.

272

273 Args:

274 objects: Iterable over a list of (object, path) tuples

275 """

276 raise NotImplementedError(self.add_objects)

277

278 def tree_changes(

279 self,

280 source,

281 target,

282 want_unchanged=False,

283 include_trees=False,

284 change_type_same=False,

285 rename_detector=None,

286 paths=None,

287 ):

288 """Find the differences between the contents of two trees.

289

290 Args:

291 source: SHA1 of the source tree

292 target: SHA1 of the target tree

293 want_unchanged: Whether unchanged files should be reported

294 include_trees: Whether to include trees

295 change_type_same: Whether to report files changing

296 type in the same entry.

297 rename_detector: RenameDetector object for detecting renames.

298 paths: Optional list of paths to filter to (as bytes).

299 Returns: Iterator over tuples with

300 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)

301 """

302 from .diff_tree import tree_changes

303

304 for change in tree_changes(

305 self,

306 source,

307 target,

308 want_unchanged=want_unchanged,

309 include_trees=include_trees,

310 change_type_same=change_type_same,

311 rename_detector=rename_detector,

312 paths=paths,

313 ):

314 yield (

315 (change.old.path, change.new.path),

316 (change.old.mode, change.new.mode),

317 (change.old.sha, change.new.sha),

318 )

319

320 def iter_tree_contents(self, tree_id, include_trees=False):

321 """Iterate the contents of a tree and all subtrees.

322

323 Iteration is depth-first pre-order, as in e.g. os.walk.

324

325 Args:

326 tree_id: SHA1 of the tree.

327 include_trees: If True, include tree objects in the iteration.

328 Returns: Iterator over TreeEntry namedtuples for all the objects in a

329 tree.

330 """

331 warnings.warn(

332 "Please use dulwich.object_store.iter_tree_contents",

333 DeprecationWarning,

334 stacklevel=2,

335 )

336 return iter_tree_contents(self, tree_id, include_trees=include_trees)

337

338 def iterobjects_subset(

339 self, shas: Iterable[bytes], *, allow_missing: bool = False

340 ) -> Iterator[ShaFile]:

341 """Iterate over a subset of objects in the store.

342

343 Args:

344 shas: Iterable of object SHAs to retrieve

345 allow_missing: If True, skip missing objects; if False, raise KeyError

346

347 Returns:

348 Iterator of ShaFile objects

349

350 Raises:

351 KeyError: If an object is missing and allow_missing is False

352 """

353 for sha in shas:

354 try:

355 yield self[sha]

356 except KeyError:

357 if not allow_missing:

358 raise

359

360 def find_missing_objects(

361 self,

362 haves,

363 wants,

364 shallow=None,

365 progress=None,

366 get_tagged=None,

367 get_parents=lambda commit: commit.parents,

368 ):

369 """Find the missing objects required for a set of revisions.

370

371 Args:

372 haves: Iterable over SHAs already in common.

373 wants: Iterable over SHAs of objects to fetch.

374 shallow: Set of shallow commit SHA1s to skip

375 progress: Simple progress function that will be called with

376 updated progress strings.

377 get_tagged: Function that returns a dict of pointed-to sha ->

378 tag sha for including tags.

379 get_parents: Optional function for getting the parents of a

380 commit.

381 Returns: Iterator over (sha, path) pairs.

382 """

383 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning)

384 finder = MissingObjectFinder(

385 self,

386 haves=haves,

387 wants=wants,

388 shallow=shallow,

389 progress=progress,

390 get_tagged=get_tagged,

391 get_parents=get_parents,

392 )

393 return iter(finder)

394

395 def find_common_revisions(self, graphwalker):

396 """Find which revisions this store has in common using graphwalker.

397

398 Args:

399 graphwalker: A graphwalker object.

400 Returns: List of SHAs that are in common

401 """

402 haves = []

403 sha = next(graphwalker)

404 while sha:

405 if sha in self:

406 haves.append(sha)

407 graphwalker.ack(sha)

408 sha = next(graphwalker)

409 return haves

410

411 def generate_pack_data(

412 self, have, want, shallow=None, progress=None, ofs_delta=True

413 ) -> tuple[int, Iterator[UnpackedObject]]:

414 """Generate pack data objects for a set of wants/haves.

415

416 Args:

417 have: List of SHA1s of objects that should not be sent

418 want: List of SHA1s of objects that should be sent

419 shallow: Set of shallow commit SHA1s to skip

420 ofs_delta: Whether OFS deltas can be included

421 progress: Optional progress reporting method

422 """

423 # Note that the pack-specific implementation below is more efficient,

424 # as it reuses deltas

425 missing_objects = MissingObjectFinder(

426 self, haves=have, wants=want, shallow=shallow, progress=progress

427 )

428 object_ids = list(missing_objects)

429 return pack_objects_to_data(

430 [(self[oid], path) for oid, path in object_ids],

431 ofs_delta=ofs_delta,

432 progress=progress,

433 )

434

435 def peel_sha(self, sha):

436 """Peel all tags from a SHA.

437

438 Args:

439 sha: The object SHA to peel.

440 Returns: The fully-peeled SHA1 of a tag object, after peeling all

441 intermediate tags; if the original ref does not point to a tag,

442 this will equal the original SHA1.

443 """

444 warnings.warn(

445 "Please use dulwich.object_store.peel_sha()",

446 DeprecationWarning,

447 stacklevel=2,

448 )

449 return peel_sha(self, sha)[1]

450

451 def _get_depth(

452 self,

453 head,

454 get_parents=lambda commit: commit.parents,

455 max_depth=None,

456 ):

457 """Return the current available depth for the given head.

458 For commits with multiple parents, the largest possible depth will be

459 returned.

460

461 Args:

462 head: commit to start from

463 get_parents: optional function for getting the parents of a commit

464 max_depth: maximum depth to search

465 """

466 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth)

467

468 def close(self) -> None:

469 """Close any files opened by this object store."""

470 # Default implementation is a NO-OP

471

472 def prune(self, grace_period: Optional[int] = None) -> None:

473 """Prune/clean up this object store.

474

475 This includes removing orphaned temporary files and other

476 housekeeping tasks. Default implementation is a NO-OP.

477

478 Args:

479 grace_period: Grace period in seconds for removing temporary files.

480 If None, uses the default grace period.

481 """

482 # Default implementation is a NO-OP

483

484 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:

485 """Iterate over all SHA1s that start with a given prefix.

486

487 The default implementation is a naive iteration over all objects.

488 However, subclasses may override this method with more efficient

489 implementations.

490 """

491 for sha in self:

492 if sha.startswith(prefix):

493 yield sha

494

495 def get_commit_graph(self):

496 """Get the commit graph for this object store.

497

498 Returns:

499 CommitGraph object if available, None otherwise

500 """

501 return None

502

503 def write_commit_graph(self, refs=None, reachable=True) -> None:

504 """Write a commit graph file for this object store.

505

506 Args:

507 refs: List of refs to include. If None, includes all refs from object store.

508 reachable: If True, includes all commits reachable from refs.

509 If False, only includes the direct ref targets.

510

511 Note:

512 Default implementation does nothing. Subclasses should override

513 this method to provide commit graph writing functionality.

514 """

515 raise NotImplementedError(self.write_commit_graph)

516

517 def get_object_mtime(self, sha):

518 """Get the modification time of an object.

519

520 Args:

521 sha: SHA1 of the object

522

523 Returns:

524 Modification time as seconds since epoch

525

526 Raises:

527 KeyError: if the object is not found

528 """

529 # Default implementation raises KeyError

530 # Subclasses should override to provide actual mtime

531 raise KeyError(sha)

532

533

534class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer):

535 """Object store that uses pack files for storage.

536

537 This class provides a base implementation for object stores that use

538 Git pack files as their primary storage mechanism. It handles caching

539 of open pack files and provides configuration for pack file operations.

540 """

541

542 def __init__(

543 self,

544 pack_compression_level=-1,

545 pack_index_version=None,

546 pack_delta_window_size=None,

547 pack_window_memory=None,

548 pack_delta_cache_size=None,

549 pack_depth=None,

550 pack_threads=None,

551 pack_big_file_threshold=None,

552 ) -> None:

553 """Initialize a PackBasedObjectStore.

554

555 Args:

556 pack_compression_level: Compression level for pack files (-1 to 9)

557 pack_index_version: Pack index version to use

558 pack_delta_window_size: Window size for delta compression

559 pack_window_memory: Maximum memory to use for delta window

560 pack_delta_cache_size: Cache size for delta operations

561 pack_depth: Maximum depth for pack deltas

562 pack_threads: Number of threads to use for packing

563 pack_big_file_threshold: Threshold for treating files as "big"

564 """

565 self._pack_cache: dict[str, Pack] = {}

566 self.pack_compression_level = pack_compression_level

567 self.pack_index_version = pack_index_version

568 self.pack_delta_window_size = pack_delta_window_size

569 self.pack_window_memory = pack_window_memory

570 self.pack_delta_cache_size = pack_delta_cache_size

571 self.pack_depth = pack_depth

572 self.pack_threads = pack_threads

573 self.pack_big_file_threshold = pack_big_file_threshold

574

575 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:

576 """Add a new pack to this object store."""

577 raise NotImplementedError(self.add_pack)

578

579 def add_pack_data(

580 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None

581 ) -> None:

582 """Add pack data to this object store.

583

584 Args:

585 count: Number of items to add

586 """

587 if count == 0:

588 # Don't bother writing an empty pack file

589 return

590 f, commit, abort = self.add_pack()

591 try:

592 write_pack_data(

593 f.write,

594 unpacked_objects,

595 num_records=count,

596 progress=progress,

597 compression_level=self.pack_compression_level,

598 )

599 except BaseException:

600 abort()

601 raise

602 else:

603 return commit()

604

605 @property

606 def alternates(self):

607 """Get the list of alternate object stores.

608

609 Returns:

610 List of alternate BaseObjectStore instances

611 """

612 return []

613

614 def contains_packed(self, sha) -> bool:

615 """Check if a particular object is present by SHA1 and is packed.

616

617 This does not check alternates.

618 """

619 for pack in self.packs:

620 try:

621 if sha in pack:

622 return True

623 except PackFileDisappeared:

624 pass

625 return False

626

627 def __contains__(self, sha) -> bool:

628 """Check if a particular object is present by SHA1.

629

630 This method makes no distinction between loose and packed objects.

631 """

632 if self.contains_packed(sha) or self.contains_loose(sha):

633 return True

634 for alternate in self.alternates:

635 if sha in alternate:

636 return True

637 return False

638

639 def _add_cached_pack(self, base_name, pack) -> None:

640 """Add a newly appeared pack to the cache by path."""

641 prev_pack = self._pack_cache.get(base_name)

642 if prev_pack is not pack:

643 self._pack_cache[base_name] = pack

644 if prev_pack:

645 prev_pack.close()

646

647 def generate_pack_data(

648 self, have, want, shallow=None, progress=None, ofs_delta=True

649 ) -> tuple[int, Iterator[UnpackedObject]]:

650 """Generate pack data objects for a set of wants/haves.

651

652 Args:

653 have: List of SHA1s of objects that should not be sent

654 want: List of SHA1s of objects that should be sent

655 shallow: Set of shallow commit SHA1s to skip

656 ofs_delta: Whether OFS deltas can be included

657 progress: Optional progress reporting method

658 """

659 missing_objects = MissingObjectFinder(

660 self, haves=have, wants=want, shallow=shallow, progress=progress

661 )

662 remote_has = missing_objects.get_remote_has()

663 object_ids = list(missing_objects)

664 return len(object_ids), generate_unpacked_objects(

665 cast(PackedObjectContainer, self),

666 object_ids,

667 progress=progress,

668 ofs_delta=ofs_delta,

669 other_haves=remote_has,

670 )

671

672 def _clear_cached_packs(self) -> None:

673 pack_cache = self._pack_cache

674 self._pack_cache = {}

675 while pack_cache:

676 (name, pack) = pack_cache.popitem()

677 pack.close()

678

679 def _iter_cached_packs(self):

680 return self._pack_cache.values()

681

682 def _update_pack_cache(self) -> list[Pack]:

683 raise NotImplementedError(self._update_pack_cache)

684

685 def close(self) -> None:

686 """Close the object store and release resources.

687

688 This method closes all cached pack files and frees associated resources.

689 """

690 self._clear_cached_packs()

691

692 @property

693 def packs(self):

694 """List with pack objects."""

695 return list(self._iter_cached_packs()) + list(self._update_pack_cache())

696

697 def count_pack_files(self) -> int:

698 """Count the number of pack files.

699

700 Returns:

701 Number of pack files (excluding those with .keep files)

702 """

703 count = 0

704 for pack in self.packs:

705 # Check if there's a .keep file for this pack

706 keep_path = pack._basename + ".keep"

707 if not os.path.exists(keep_path):

708 count += 1

709 return count

710

711 def _iter_alternate_objects(self):

712 """Iterate over the SHAs of all the objects in alternate stores."""

713 for alternate in self.alternates:

714 yield from alternate

715

716 def _iter_loose_objects(self):

717 """Iterate over the SHAs of all loose objects."""

718 raise NotImplementedError(self._iter_loose_objects)

719

720 def _get_loose_object(self, sha) -> Optional[ShaFile]:

721 raise NotImplementedError(self._get_loose_object)

722

723 def delete_loose_object(self, sha) -> None:

724 """Delete a loose object.

725

726 This method only handles loose objects. For packed objects,

727 use repack(exclude=...) to exclude them during repacking.

728 """

729 raise NotImplementedError(self.delete_loose_object)

730

731 def _remove_pack(self, name) -> None:

732 raise NotImplementedError(self._remove_pack)

733

734 def pack_loose_objects(self):

735 """Pack loose objects.

736

737 Returns: Number of objects packed

738 """

739 objects = set()

740 for sha in self._iter_loose_objects():

741 objects.add((self._get_loose_object(sha), None))

742 self.add_objects(list(objects))

743 for obj, path in objects:

744 self.delete_loose_object(obj.id)

745 return len(objects)

746

747 def repack(self, exclude=None):

748 """Repack the packs in this repository.

749

750 Note that this implementation is fairly naive and currently keeps all

751 objects in memory while it repacks.

752

753 Args:

754 exclude: Optional set of object SHAs to exclude from repacking

755 """

756 if exclude is None:

757 exclude = set()

758

759 loose_objects = set()

760 excluded_loose_objects = set()

761 for sha in self._iter_loose_objects():

762 if sha not in exclude:

763 loose_objects.add(self._get_loose_object(sha))

764 else:

765 excluded_loose_objects.add(sha)

766

767 objects = {(obj, None) for obj in loose_objects}

768 old_packs = {p.name(): p for p in self.packs}

769 for name, pack in old_packs.items():

770 objects.update(

771 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude

772 )

773

774 # Only create a new pack if there are objects to pack

775 if objects:

776 # The name of the consolidated pack might match the name of a

777 # pre-existing pack. Take care not to remove the newly created

778 # consolidated pack.

779 consolidated = self.add_objects(objects)

780 old_packs.pop(consolidated.name(), None)

781

782 # Delete loose objects that were packed

783 for obj in loose_objects:

784 self.delete_loose_object(obj.id)

785 # Delete excluded loose objects

786 for sha in excluded_loose_objects:

787 self.delete_loose_object(sha)

788 for name, pack in old_packs.items():

789 self._remove_pack(pack)

790 self._update_pack_cache()

791 return len(objects)

792

793 def __iter__(self):

794 """Iterate over the SHAs that are present in this store."""

795 self._update_pack_cache()

796 for pack in self._iter_cached_packs():

797 try:

798 yield from pack

799 except PackFileDisappeared:

800 pass

801 yield from self._iter_loose_objects()

802 yield from self._iter_alternate_objects()

803

804 def contains_loose(self, sha):

805 """Check if a particular object is present by SHA1 and is loose.

806

807 This does not check alternates.

808 """

809 return self._get_loose_object(sha) is not None

810

811 def get_raw(self, name):

812 """Obtain the raw fulltext for an object.

813

814 Args:

815 name: sha for the object.

816 Returns: tuple with numeric type and object contents.

817 """

818 if name == ZERO_SHA:

819 raise KeyError(name)

820 if len(name) == 40:

821 sha = hex_to_sha(name)

822 hexsha = name

823 elif len(name) == 20:

824 sha = name

825 hexsha = None

826 else:

827 raise AssertionError(f"Invalid object name {name!r}")

828 for pack in self._iter_cached_packs():

829 try:

830 return pack.get_raw(sha)

831 except (KeyError, PackFileDisappeared):

832 pass

833 if hexsha is None:

834 hexsha = sha_to_hex(name)

835 ret = self._get_loose_object(hexsha)

836 if ret is not None:

837 return ret.type_num, ret.as_raw_string()

838 # Maybe something else has added a pack with the object

839 # in the mean time?

840 for pack in self._update_pack_cache():

841 try:

842 return pack.get_raw(sha)

843 except KeyError:

844 pass

845 for alternate in self.alternates:

846 try:

847 return alternate.get_raw(hexsha)

848 except KeyError:

849 pass

850 raise KeyError(hexsha)

851

852 def iter_unpacked_subset(

853 self,

854 shas: set[bytes],

855 include_comp: bool = False,

856 allow_missing: bool = False,

857 convert_ofs_delta: bool = True,

858 ) -> Iterator[UnpackedObject]:

859 """Iterate over a subset of objects, yielding UnpackedObject instances.

860

861 Args:

862 shas: Set of object SHAs to retrieve

863 include_comp: Whether to include compressed data

864 allow_missing: If True, skip missing objects; if False, raise KeyError

865 convert_ofs_delta: Whether to convert OFS_DELTA objects

866

867 Returns:

868 Iterator of UnpackedObject instances

869

870 Raises:

871 KeyError: If an object is missing and allow_missing is False

872 """

873 todo: set[bytes] = set(shas)

874 for p in self._iter_cached_packs():

875 for unpacked in p.iter_unpacked_subset(

876 todo,

877 include_comp=include_comp,

878 allow_missing=True,

879 convert_ofs_delta=convert_ofs_delta,

880 ):

881 yield unpacked

882 hexsha = sha_to_hex(unpacked.sha())

883 todo.remove(hexsha)

884 # Maybe something else has added a pack with the object

885 # in the mean time?

886 for p in self._update_pack_cache():

887 for unpacked in p.iter_unpacked_subset(

888 todo,

889 include_comp=include_comp,

890 allow_missing=True,

891 convert_ofs_delta=convert_ofs_delta,

892 ):

893 yield unpacked

894 hexsha = sha_to_hex(unpacked.sha())

895 todo.remove(hexsha)

896 for alternate in self.alternates:

897 for unpacked in alternate.iter_unpacked_subset(

898 todo,

899 include_comp=include_comp,

900 allow_missing=True,

901 convert_ofs_delta=convert_ofs_delta,

902 ):

903 yield unpacked

904 hexsha = sha_to_hex(unpacked.sha())

905 todo.remove(hexsha)

906

907 def iterobjects_subset(

908 self, shas: Iterable[bytes], *, allow_missing: bool = False

909 ) -> Iterator[ShaFile]:

910 """Iterate over a subset of objects in the store.

911

912 This method searches for objects in pack files, alternates, and loose storage.

913

914 Args:

915 shas: Iterable of object SHAs to retrieve

916 allow_missing: If True, skip missing objects; if False, raise KeyError

917

918 Returns:

919 Iterator of ShaFile objects

920

921 Raises:

922 KeyError: If an object is missing and allow_missing is False

923 """

924 todo: set[bytes] = set(shas)

925 for p in self._iter_cached_packs():

926 for o in p.iterobjects_subset(todo, allow_missing=True):

927 yield o

928 todo.remove(o.id)

929 # Maybe something else has added a pack with the object

930 # in the mean time?

931 for p in self._update_pack_cache():

932 for o in p.iterobjects_subset(todo, allow_missing=True):

933 yield o

934 todo.remove(o.id)

935 for alternate in self.alternates:

936 for o in alternate.iterobjects_subset(todo, allow_missing=True):

937 yield o

938 todo.remove(o.id)

939 for oid in todo:

940 o = self._get_loose_object(oid)

941 if o is not None:

942 yield o

943 elif not allow_missing:

944 raise KeyError(oid)

945

946 def get_unpacked_object(

947 self, sha1: bytes, *, include_comp: bool = False

948 ) -> UnpackedObject:

949 """Obtain the unpacked object.

950

951 Args:

952 sha1: sha for the object.

953 """

954 if sha1 == ZERO_SHA:

955 raise KeyError(sha1)

956 if len(sha1) == 40:

957 sha = hex_to_sha(sha1)

958 hexsha = sha1

959 elif len(sha1) == 20:

960 sha = sha1

961 hexsha = None

962 else:

963 raise AssertionError(f"Invalid object sha1 {sha1!r}")

964 for pack in self._iter_cached_packs():

965 try:

966 return pack.get_unpacked_object(sha, include_comp=include_comp)

967 except (KeyError, PackFileDisappeared):

968 pass

969 if hexsha is None:

970 hexsha = sha_to_hex(sha1)

971 # Maybe something else has added a pack with the object

972 # in the mean time?

973 for pack in self._update_pack_cache():

974 try:

975 return pack.get_unpacked_object(sha, include_comp=include_comp)

976 except KeyError:

977 pass

978 for alternate in self.alternates:

979 try:

980 return alternate.get_unpacked_object(hexsha, include_comp=include_comp)

981 except KeyError:

982 pass

983 raise KeyError(hexsha)

984

985 def add_objects(

986 self,

987 objects: Sequence[tuple[ShaFile, Optional[str]]],

988 progress: Optional[Callable[[str], None]] = None,

989 ) -> None:

990 """Add a set of objects to this object store.

991

992 Args:

993 objects: Iterable over (object, path) tuples, should support

994 __len__.

995 Returns: Pack object of the objects written.

996 """

997 count = len(objects)

998 record_iter = (full_unpacked_object(o) for (o, p) in objects)

999 return self.add_pack_data(count, record_iter, progress=progress)

1000

1001

1002class DiskObjectStore(PackBasedObjectStore):

1003 """Git-style object store that exists on disk."""

1004

1005 path: Union[str, os.PathLike]

1006 pack_dir: Union[str, os.PathLike]

1007

1008 def __init__(

1009 self,

1010 path: Union[str, os.PathLike],

1011 loose_compression_level=-1,

1012 pack_compression_level=-1,

1013 pack_index_version=None,

1014 pack_delta_window_size=None,

1015 pack_window_memory=None,

1016 pack_delta_cache_size=None,

1017 pack_depth=None,

1018 pack_threads=None,

1019 pack_big_file_threshold=None,

1020 ) -> None:

1021 """Open an object store.

1022

1023 Args:

1024 path: Path of the object store.

1025 loose_compression_level: zlib compression level for loose objects

1026 pack_compression_level: zlib compression level for pack objects

1027 pack_index_version: pack index version to use (1, 2, or 3)

1028 pack_delta_window_size: sliding window size for delta compression

1029 pack_window_memory: memory limit for delta window operations

1030 pack_delta_cache_size: size of cache for delta operations

1031 pack_depth: maximum delta chain depth

1032 pack_threads: number of threads for pack operations

1033 pack_big_file_threshold: threshold for treating files as big

1034 """

1035 super().__init__(

1036 pack_compression_level=pack_compression_level,

1037 pack_index_version=pack_index_version,

1038 pack_delta_window_size=pack_delta_window_size,

1039 pack_window_memory=pack_window_memory,

1040 pack_delta_cache_size=pack_delta_cache_size,

1041 pack_depth=pack_depth,

1042 pack_threads=pack_threads,

1043 pack_big_file_threshold=pack_big_file_threshold,

1044 )

1045 self.path = path

1046 self.pack_dir = os.path.join(self.path, PACKDIR)

1047 self._alternates = None

1048 self.loose_compression_level = loose_compression_level

1049 self.pack_compression_level = pack_compression_level

1050 self.pack_index_version = pack_index_version

1051

1052 # Commit graph support - lazy loaded

1053 self._commit_graph = None

1054 self._use_commit_graph = True # Default to true

1055

1056 def __repr__(self) -> str:

1057 """Return string representation of DiskObjectStore.

1058

1059 Returns:

1060 String representation including the store path

1061 """

1062 return f"<{self.__class__.__name__}({self.path!r})>"

1063

1064 @classmethod

1065 def from_config(cls, path: Union[str, os.PathLike], config):

1066 """Create a DiskObjectStore from a configuration object.

1067

1068 Args:

1069 path: Path to the object store directory

1070 config: Configuration object to read settings from

1071

1072 Returns:

1073 New DiskObjectStore instance configured according to config

1074 """

1075 try:

1076 default_compression_level = int(

1077 config.get((b"core",), b"compression").decode()

1078 )

1079 except KeyError:

1080 default_compression_level = -1

1081 try:

1082 loose_compression_level = int(

1083 config.get((b"core",), b"looseCompression").decode()

1084 )

1085 except KeyError:

1086 loose_compression_level = default_compression_level

1087 try:

1088 pack_compression_level = int(

1089 config.get((b"core",), "packCompression").decode()

1090 )

1091 except KeyError:

1092 pack_compression_level = default_compression_level

1093 try:

1094 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode())

1095 except KeyError:

1096 pack_index_version = None

1097

1098 # Read pack configuration options

1099 try:

1100 pack_delta_window_size = int(

1101 config.get((b"pack",), b"deltaWindowSize").decode()

1102 )

1103 except KeyError:

1104 pack_delta_window_size = None

1105 try:

1106 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode())

1107 except KeyError:

1108 pack_window_memory = None

1109 try:

1110 pack_delta_cache_size = int(

1111 config.get((b"pack",), b"deltaCacheSize").decode()

1112 )

1113 except KeyError:

1114 pack_delta_cache_size = None

1115 try:

1116 pack_depth = int(config.get((b"pack",), b"depth").decode())

1117 except KeyError:

1118 pack_depth = None

1119 try:

1120 pack_threads = int(config.get((b"pack",), b"threads").decode())

1121 except KeyError:

1122 pack_threads = None

1123 try:

1124 pack_big_file_threshold = int(

1125 config.get((b"pack",), b"bigFileThreshold").decode()

1126 )

1127 except KeyError:

1128 pack_big_file_threshold = None

1129

1130 # Read core.commitGraph setting

1131 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)

1132

1133 instance = cls(

1134 path,

1135 loose_compression_level,

1136 pack_compression_level,

1137 pack_index_version,

1138 pack_delta_window_size,

1139 pack_window_memory,

1140 pack_delta_cache_size,

1141 pack_depth,

1142 pack_threads,

1143 pack_big_file_threshold,

1144 )

1145 instance._use_commit_graph = use_commit_graph

1146 return instance

1147

1148 @property

1149 def alternates(self):

1150 """Get the list of alternate object stores.

1151

1152 Reads from .git/objects/info/alternates if not already cached.

1153

1154 Returns:

1155 List of DiskObjectStore instances for alternate object directories

1156 """

1157 if self._alternates is not None:

1158 return self._alternates

1159 self._alternates = []

1160 for path in self._read_alternate_paths():

1161 self._alternates.append(DiskObjectStore(path))

1162 return self._alternates

1163

1164 def _read_alternate_paths(self):

1165 try:

1166 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb")

1167 except FileNotFoundError:

1168 return

1169 with f:

1170 for line in f.readlines():

1171 line = line.rstrip(b"\n")

1172 if line.startswith(b"#"):

1173 continue

1174 if os.path.isabs(line):

1175 yield os.fsdecode(line)

1176 else:

1177 yield os.fsdecode(os.path.join(os.fsencode(self.path), line))

1178

1179 def add_alternate_path(self, path) -> None:

1180 """Add an alternate path to this object store."""

1181 try:

1182 os.mkdir(os.path.join(self.path, INFODIR))

1183 except FileExistsError:

1184 pass

1185 alternates_path = os.path.join(self.path, INFODIR, "alternates")

1186 with GitFile(alternates_path, "wb") as f:

1187 try:

1188 orig_f = open(alternates_path, "rb")

1189 except FileNotFoundError:

1190 pass

1191 else:

1192 with orig_f:

1193 f.write(orig_f.read())

1194 f.write(os.fsencode(path) + b"\n")

1195

1196 if not os.path.isabs(path):

1197 path = os.path.join(self.path, path)

1198 self.alternates.append(DiskObjectStore(path))

1199

1200 def _update_pack_cache(self):

1201 """Read and iterate over new pack files and cache them."""

1202 try:

1203 pack_dir_contents = os.listdir(self.pack_dir)

1204 except FileNotFoundError:

1205 self.close()

1206 return []

1207 pack_files = set()

1208 for name in pack_dir_contents:

1209 if name.startswith("pack-") and name.endswith(".pack"):

1210 # verify that idx exists first (otherwise the pack was not yet

1211 # fully written)

1212 idx_name = os.path.splitext(name)[0] + ".idx"

1213 if idx_name in pack_dir_contents:

1214 pack_name = name[: -len(".pack")]

1215 pack_files.add(pack_name)

1216

1217 # Open newly appeared pack files

1218 new_packs = []

1219 for f in pack_files:

1220 if f not in self._pack_cache:

1221 pack = Pack(

1222 os.path.join(self.pack_dir, f),

1223 delta_window_size=self.pack_delta_window_size,

1224 window_memory=self.pack_window_memory,

1225 delta_cache_size=self.pack_delta_cache_size,

1226 depth=self.pack_depth,

1227 threads=self.pack_threads,

1228 big_file_threshold=self.pack_big_file_threshold,

1229 )

1230 new_packs.append(pack)

1231 self._pack_cache[f] = pack

1232 # Remove disappeared pack files

1233 for f in set(self._pack_cache) - pack_files:

1234 self._pack_cache.pop(f).close()

1235 return new_packs

1236

1237 def _get_shafile_path(self, sha):

1238 # Check from object dir

1239 return hex_to_filename(self.path, sha)

1240

1241 def _iter_loose_objects(self):

1242 for base in os.listdir(self.path):

1243 if len(base) != 2:

1244 continue

1245 for rest in os.listdir(os.path.join(self.path, base)):

1246 sha = os.fsencode(base + rest)

1247 if not valid_hexsha(sha):

1248 continue

1249 yield sha

1250

1251 def count_loose_objects(self) -> int:

1252 """Count the number of loose objects in the object store.

1253

1254 Returns:

1255 Number of loose objects

1256 """

1257 count = 0

1258 if not os.path.exists(self.path):

1259 return 0

1260

1261 for i in range(256):

1262 subdir = os.path.join(self.path, f"{i:02x}")

1263 try:

1264 count += len(

1265 [

1266 name

1267 for name in os.listdir(subdir)

1268 if len(name) == 38 # 40 - 2 for the prefix

1269 ]

1270 )

1271 except FileNotFoundError:

1272 # Directory may have been removed or is inaccessible

1273 continue

1274

1275 return count

1276

1277 def _get_loose_object(self, sha):

1278 path = self._get_shafile_path(sha)

1279 try:

1280 return ShaFile.from_path(path)

1281 except FileNotFoundError:

1282 return None

1283

1284 def delete_loose_object(self, sha) -> None:

1285 """Delete a loose object from disk.

1286

1287 Args:

1288 sha: SHA1 of the object to delete

1289

1290 Raises:

1291 FileNotFoundError: If the object file doesn't exist

1292 """

1293 os.remove(self._get_shafile_path(sha))

1294

1295 def get_object_mtime(self, sha):

1296 """Get the modification time of an object.

1297

1298 Args:

1299 sha: SHA1 of the object

1300

1301 Returns:

1302 Modification time as seconds since epoch

1303

1304 Raises:

1305 KeyError: if the object is not found

1306 """

1307 # First check if it's a loose object

1308 if self.contains_loose(sha):

1309 path = self._get_shafile_path(sha)

1310 try:

1311 return os.path.getmtime(path)

1312 except FileNotFoundError:

1313 pass

1314

1315 # Check if it's in a pack file

1316 for pack in self.packs:

1317 try:

1318 if sha in pack:

1319 # Use the pack file's mtime for packed objects

1320 pack_path = pack._data_path

1321 try:

1322 return os.path.getmtime(pack_path)

1323 except (FileNotFoundError, AttributeError):

1324 pass

1325 except PackFileDisappeared:

1326 pass

1327

1328 raise KeyError(sha)

1329

1330 def _remove_pack(self, pack) -> None:

1331 try:

1332 del self._pack_cache[os.path.basename(pack._basename)]

1333 except KeyError:

1334 pass

1335 pack.close()

1336 os.remove(pack.data.path)

1337 os.remove(pack.index.path)

1338

1339 def _get_pack_basepath(self, entries):

1340 suffix = iter_sha1(entry[0] for entry in entries)

1341 # TODO: Handle self.pack_dir being bytes

1342 suffix = suffix.decode("ascii")

1343 return os.path.join(self.pack_dir, "pack-" + suffix)

1344

1345 def _complete_pack(self, f, path, num_objects, indexer, progress=None):

1346 """Move a specific file containing a pack into the pack directory.

1347

1348 Note: The file should be on the same file system as the

1349 packs directory.

1350

1351 Args:

1352 f: Open file object for the pack.

1353 path: Path to the pack file.

1354 indexer: A PackIndexer for indexing the pack.

1355 """

1356 entries = []

1357 for i, entry in enumerate(indexer):

1358 if progress is not None:

1359 progress(f"generating index: {i}/{num_objects}\r".encode("ascii"))

1360 entries.append(entry)

1361

1362 pack_sha, extra_entries = extend_pack(

1363 f,

1364 indexer.ext_refs(),

1365 get_raw=self.get_raw,

1366 compression_level=self.pack_compression_level,

1367 progress=progress,

1368 )

1369 f.flush()

1370 try:

1371 fileno = f.fileno()

1372 except AttributeError:

1373 pass

1374 else:

1375 os.fsync(fileno)

1376 f.close()

1377

1378 entries.extend(extra_entries)

1379

1380 # Move the pack in.

1381 entries.sort()

1382 pack_base_name = self._get_pack_basepath(entries)

1383

1384 for pack in self.packs:

1385 if pack._basename == pack_base_name:

1386 return pack

1387

1388 target_pack_path = pack_base_name + ".pack"

1389 target_index_path = pack_base_name + ".idx"

1390 if sys.platform == "win32":

1391 # Windows might have the target pack file lingering. Attempt

1392 # removal, silently passing if the target does not exist.

1393 with suppress(FileNotFoundError):

1394 os.remove(target_pack_path)

1395 os.rename(path, target_pack_path)

1396

1397 # Write the index.

1398 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file:

1399 write_pack_index(

1400 index_file, entries, pack_sha, version=self.pack_index_version

1401 )

1402

1403 # Add the pack to the store and return it.

1404 final_pack = Pack(

1405 pack_base_name,

1406 delta_window_size=self.pack_delta_window_size,

1407 window_memory=self.pack_window_memory,

1408 delta_cache_size=self.pack_delta_cache_size,

1409 depth=self.pack_depth,

1410 threads=self.pack_threads,

1411 big_file_threshold=self.pack_big_file_threshold,

1412 )

1413 final_pack.check_length_and_checksum()

1414 self._add_cached_pack(pack_base_name, final_pack)

1415 return final_pack

1416

1417 def add_thin_pack(self, read_all, read_some, progress=None):

1418 """Add a new thin pack to this object store.

1419

1420 Thin packs are packs that contain deltas with parents that exist

1421 outside the pack. They should never be placed in the object store

1422 directly, and always indexed and completed as they are copied.

1423

1424 Args:

1425 read_all: Read function that blocks until the number of

1426 requested bytes are read.

1427 read_some: Read function that returns at least one byte, but may

1428 not return the number of bytes requested.

1429 Returns: A Pack object pointing at the now-completed thin pack in the

1430 objects/pack directory.

1431 """

1432 import tempfile

1433

1434 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_")

1435 with os.fdopen(fd, "w+b") as f:

1436 os.chmod(path, PACK_MODE)

1437 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)

1438 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer)

1439 copier.verify(progress=progress)

1440 return self._complete_pack(f, path, len(copier), indexer, progress=progress)

1441

1442 def add_pack(self):

1443 """Add a new pack to this object store.

1444

1445 Returns: Fileobject to write to, a commit function to

1446 call when the pack is finished and an abort

1447 function.

1448 """

1449 import tempfile

1450

1451 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")

1452 f = os.fdopen(fd, "w+b")

1453 os.chmod(path, PACK_MODE)

1454

1455 def commit():

1456 if f.tell() > 0:

1457 f.seek(0)

1458 with PackData(path, f) as pd:

1459 indexer = PackIndexer.for_pack_data(

1460 pd, resolve_ext_ref=self.get_raw

1461 )

1462 return self._complete_pack(f, path, len(pd), indexer)

1463 else:

1464 f.close()

1465 os.remove(path)

1466 return None

1467

1468 def abort() -> None:

1469 f.close()

1470 os.remove(path)

1471

1472 return f, commit, abort

1473

1474 def add_object(self, obj) -> None:

1475 """Add a single object to this object store.

1476

1477 Args:

1478 obj: Object to add

1479 """

1480 path = self._get_shafile_path(obj.id)

1481 dir = os.path.dirname(path)

1482 try:

1483 os.mkdir(dir)

1484 except FileExistsError:

1485 pass

1486 if os.path.exists(path):

1487 return # Already there, no need to write again

1488 with GitFile(path, "wb", mask=PACK_MODE) as f:

1489 f.write(

1490 obj.as_legacy_object(compression_level=self.loose_compression_level)

1491 )

1492

1493 @classmethod

1494 def init(cls, path: Union[str, os.PathLike]):

1495 """Initialize a new disk object store.

1496

1497 Creates the necessary directory structure for a Git object store.

1498

1499 Args:

1500 path: Path where the object store should be created

1501

1502 Returns:

1503 New DiskObjectStore instance

1504 """

1505 try:

1506 os.mkdir(path)

1507 except FileExistsError:

1508 pass

1509 os.mkdir(os.path.join(path, "info"))

1510 os.mkdir(os.path.join(path, PACKDIR))

1511 return cls(path)

1512

1513 def iter_prefix(self, prefix):

1514 """Iterate over all object SHAs with the given prefix.

1515

1516 Args:

1517 prefix: Hex prefix to search for (as bytes)

1518

1519 Returns:

1520 Iterator of object SHAs (as bytes) matching the prefix

1521 """

1522 if len(prefix) < 2:

1523 yield from super().iter_prefix(prefix)

1524 return

1525 seen = set()

1526 dir = prefix[:2].decode()

1527 rest = prefix[2:].decode()

1528 try:

1529 for name in os.listdir(os.path.join(self.path, dir)):

1530 if name.startswith(rest):

1531 sha = os.fsencode(dir + name)

1532 if sha not in seen:

1533 seen.add(sha)

1534 yield sha

1535 except FileNotFoundError:

1536 pass

1537

1538 for p in self.packs:

1539 bin_prefix = (

1540 binascii.unhexlify(prefix)

1541 if len(prefix) % 2 == 0

1542 else binascii.unhexlify(prefix[:-1])

1543 )

1544 for sha in p.index.iter_prefix(bin_prefix):

1545 sha = sha_to_hex(sha)

1546 if sha.startswith(prefix) and sha not in seen:

1547 seen.add(sha)

1548 yield sha

1549 for alternate in self.alternates:

1550 for sha in alternate.iter_prefix(prefix):

1551 if sha not in seen:

1552 seen.add(sha)

1553 yield sha

1554

1555 def get_commit_graph(self):

1556 """Get the commit graph for this object store.

1557

1558 Returns:

1559 CommitGraph object if available, None otherwise

1560 """

1561 if not self._use_commit_graph:

1562 return None

1563

1564 if self._commit_graph is None:

1565 from .commit_graph import read_commit_graph

1566

1567 # Look for commit graph in our objects directory

1568 graph_file = os.path.join(self.path, "info", "commit-graph")

1569 if os.path.exists(graph_file):

1570 self._commit_graph = read_commit_graph(graph_file)

1571 return self._commit_graph

1572

1573 def write_commit_graph(self, refs=None, reachable=True) -> None:

1574 """Write a commit graph file for this object store.

1575

1576 Args:

1577 refs: List of refs to include. If None, includes all refs from object store.

1578 reachable: If True, includes all commits reachable from refs.

1579 If False, only includes the direct ref targets.

1580 """

1581 from .commit_graph import get_reachable_commits

1582

1583 if refs is None:

1584 # Get all commit objects from the object store

1585 all_refs = []

1586 # Iterate through all objects to find commits

1587 for sha in self:

1588 try:

1589 obj = self[sha]

1590 if obj.type_name == b"commit":

1591 all_refs.append(sha)

1592 except KeyError:

1593 continue

1594 else:

1595 # Use provided refs

1596 all_refs = refs

1597

1598 if not all_refs:

1599 return # No commits to include

1600

1601 if reachable:

1602 # Get all reachable commits

1603 commit_ids = get_reachable_commits(self, all_refs)

1604 else:

1605 # Just use the direct ref targets - ensure they're hex ObjectIDs

1606 commit_ids = []

1607 for ref in all_refs:

1608 if isinstance(ref, bytes) and len(ref) == 40:

1609 # Already hex ObjectID

1610 commit_ids.append(ref)

1611 elif isinstance(ref, bytes) and len(ref) == 20:

1612 # Binary SHA, convert to hex ObjectID

1613 from .objects import sha_to_hex

1614

1615 commit_ids.append(sha_to_hex(ref))

1616 else:

1617 # Assume it's already correct format

1618 commit_ids.append(ref)

1619

1620 if commit_ids:

1621 # Write commit graph directly to our object store path

1622 # Generate the commit graph

1623 from .commit_graph import generate_commit_graph

1624

1625 graph = generate_commit_graph(self, commit_ids)

1626

1627 if graph.entries:

1628 # Ensure the info directory exists

1629 info_dir = os.path.join(self.path, "info")

1630 os.makedirs(info_dir, exist_ok=True)

1631

1632 # Write using GitFile for atomic operation

1633 graph_path = os.path.join(info_dir, "commit-graph")

1634 with GitFile(graph_path, "wb") as f:

1635 assert isinstance(

1636 f, _GitFile

1637 ) # GitFile in write mode always returns _GitFile

1638 graph.write_to_file(f)

1639

1640 # Clear cached commit graph so it gets reloaded

1641 self._commit_graph = None

1642

1643 def prune(self, grace_period: Optional[int] = None) -> None:

1644 """Prune/clean up this object store.

1645

1646 This removes temporary files that were left behind by interrupted

1647 pack operations. These are files that start with ``tmp_pack_`` in the

1648 repository directory or files with .pack extension but no corresponding

1649 .idx file in the pack directory.

1650

1651 Args:

1652 grace_period: Grace period in seconds for removing temporary files.

1653 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD.

1654 """

1655 import glob

1656

1657 if grace_period is None:

1658 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD

1659

1660 # Clean up tmp_pack_* files in the repository directory

1661 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")):

1662 # Check if file is old enough (more than grace period)

1663 mtime = os.path.getmtime(tmp_file)

1664 if time.time() - mtime > grace_period:

1665 os.remove(tmp_file)

1666

1667 # Clean up orphaned .pack files without corresponding .idx files

1668 try:

1669 pack_dir_contents = os.listdir(self.pack_dir)

1670 except FileNotFoundError:

1671 return

1672

1673 pack_files = {}

1674 idx_files = set()

1675

1676 for name in pack_dir_contents:

1677 if name.endswith(".pack"):

1678 base_name = name[:-5] # Remove .pack extension

1679 pack_files[base_name] = name

1680 elif name.endswith(".idx"):

1681 base_name = name[:-4] # Remove .idx extension

1682 idx_files.add(base_name)

1683

1684 # Remove .pack files without corresponding .idx files

1685 for base_name, pack_name in pack_files.items():

1686 if base_name not in idx_files:

1687 pack_path = os.path.join(self.pack_dir, pack_name)

1688 # Check if file is old enough (more than grace period)

1689 mtime = os.path.getmtime(pack_path)

1690 if time.time() - mtime > grace_period:

1691 os.remove(pack_path)

1692

1693

1694class MemoryObjectStore(BaseObjectStore):

1695 """Object store that keeps all objects in memory."""

1696

1697 def __init__(self) -> None:

1698 """Initialize a MemoryObjectStore.

1699

1700 Creates an empty in-memory object store.

1701 """

1702 super().__init__()

1703 self._data: dict[str, ShaFile] = {}

1704 self.pack_compression_level = -1

1705

1706 def _to_hexsha(self, sha):

1707 if len(sha) == 40:

1708 return sha

1709 elif len(sha) == 20:

1710 return sha_to_hex(sha)

1711 else:

1712 raise ValueError(f"Invalid sha {sha!r}")

1713

1714 def contains_loose(self, sha):

1715 """Check if a particular object is present by SHA1 and is loose."""

1716 return self._to_hexsha(sha) in self._data

1717

1718 def contains_packed(self, sha) -> bool:

1719 """Check if a particular object is present by SHA1 and is packed."""

1720 return False

1721

1722 def __iter__(self):

1723 """Iterate over the SHAs that are present in this store."""

1724 return iter(self._data.keys())

1725

1726 @property

1727 def packs(self):

1728 """List with pack objects."""

1729 return []

1730

1731 def get_raw(self, name: ObjectID):

1732 """Obtain the raw text for an object.

1733

1734 Args:

1735 name: sha for the object.

1736 Returns: tuple with numeric type and object contents.

1737 """

1738 obj = self[self._to_hexsha(name)]

1739 return obj.type_num, obj.as_raw_string()

1740

1741 def __getitem__(self, name: ObjectID):

1742 """Retrieve an object by SHA.

1743

1744 Args:

1745 name: SHA of the object (as hex string or bytes)

1746

1747 Returns:

1748 Copy of the ShaFile object

1749

1750 Raises:

1751 KeyError: If the object is not found

1752 """

1753 return self._data[self._to_hexsha(name)].copy()

1754

1755 def __delitem__(self, name: ObjectID) -> None:

1756 """Delete an object from this store, for testing only."""

1757 del self._data[self._to_hexsha(name)]

1758

1759 def add_object(self, obj) -> None:

1760 """Add a single object to this object store."""

1761 self._data[obj.id] = obj.copy()

1762

1763 def add_objects(self, objects, progress=None) -> None:

1764 """Add a set of objects to this object store.

1765

1766 Args:

1767 objects: Iterable over a list of (object, path) tuples

1768 """

1769 for obj, path in objects:

1770 self.add_object(obj)

1771

1772 def add_pack(self):

1773 """Add a new pack to this object store.

1774

1775 Because this object store doesn't support packs, we extract and add the

1776 individual objects.

1777

1778 Returns: Fileobject to write to and a commit function to

1779 call when the pack is finished.

1780 """

1781 from tempfile import SpooledTemporaryFile

1782

1783 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-")

1784

1785 def commit() -> None:

1786 size = f.tell()

1787 if size > 0:

1788 f.seek(0)

1789 p = PackData.from_file(f, size)

1790 for obj in PackInflater.for_pack_data(p, self.get_raw):

1791 self.add_object(obj)

1792 p.close()

1793 f.close()

1794 else:

1795 f.close()

1796

1797 def abort() -> None:

1798 f.close()

1799

1800 return f, commit, abort

1801

1802 def add_pack_data(

1803 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None

1804 ) -> None:

1805 """Add pack data to this object store.

1806

1807 Args:

1808 count: Number of items to add

1809 """

1810 if count == 0:

1811 return

1812

1813 # Since MemoryObjectStore doesn't support pack files, we need to

1814 # extract individual objects. To handle deltas properly, we write

1815 # to a temporary pack and then use PackInflater to resolve them.

1816 f, commit, abort = self.add_pack()

1817 try:

1818 write_pack_data(

1819 f.write,

1820 unpacked_objects,

1821 num_records=count,

1822 progress=progress,

1823 )

1824 except BaseException:

1825 abort()

1826 raise

1827 else:

1828 commit()

1829

1830 def add_thin_pack(self, read_all, read_some, progress=None) -> None:

1831 """Add a new thin pack to this object store.

1832

1833 Thin packs are packs that contain deltas with parents that exist

1834 outside the pack. Because this object store doesn't support packs, we

1835 extract and add the individual objects.

1836

1837 Args:

1838 read_all: Read function that blocks until the number of

1839 requested bytes are read.

1840 read_some: Read function that returns at least one byte, but may

1841 not return the number of bytes requested.

1842 """

1843 f, commit, abort = self.add_pack()

1844 try:

1845 copier = PackStreamCopier(read_all, read_some, f)

1846 copier.verify()

1847 except BaseException:

1848 abort()

1849 raise

1850 else:

1851 commit()

1852

1853

1854class ObjectIterator(Protocol):

1855 """Interface for iterating over objects."""

1856

1857 def iterobjects(self) -> Iterator[ShaFile]:

1858 """Iterate over all objects.

1859

1860 Returns:

1861 Iterator of ShaFile objects

1862 """

1863 raise NotImplementedError(self.iterobjects)

1864

1865

1866def tree_lookup_path(lookup_obj, root_sha, path):

1867 """Look up an object in a Git tree.

1868

1869 Args:

1870 lookup_obj: Callback for retrieving object by SHA1

1871 root_sha: SHA1 of the root tree

1872 path: Path to lookup

1873 Returns: A tuple of (mode, SHA) of the resulting path.

1874 """

1875 tree = lookup_obj(root_sha)

1876 if not isinstance(tree, Tree):

1877 raise NotTreeError(root_sha)

1878 return tree.lookup_path(lookup_obj, path)

1879

1880

1881def _collect_filetree_revs(

1882 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID]

1883) -> None:

1884 """Collect SHA1s of files and directories for specified tree.

1885

1886 Args:

1887 obj_store: Object store to get objects by SHA from

1888 tree_sha: tree reference to walk

1889 kset: set to fill with references to files and directories

1890 """

1891 filetree = obj_store[tree_sha]

1892 assert isinstance(filetree, Tree)

1893 for name, mode, sha in filetree.iteritems():

1894 if not S_ISGITLINK(mode) and sha not in kset:

1895 kset.add(sha)

1896 if stat.S_ISDIR(mode):

1897 _collect_filetree_revs(obj_store, sha, kset)

1898

1899

1900def _split_commits_and_tags(

1901 obj_store: ObjectContainer, lst, *, ignore_unknown=False

1902) -> tuple[set[bytes], set[bytes], set[bytes]]:

1903 """Split object id list into three lists with commit, tag, and other SHAs.

1904

1905 Commits referenced by tags are included into commits

1906 list as well. Only SHA1s known in this repository will get

1907 through, and unless ignore_unknown argument is True, KeyError

1908 is thrown for SHA1 missing in the repository

1909

1910 Args:

1911 obj_store: Object store to get objects by SHA1 from

1912 lst: Collection of commit and tag SHAs

1913 ignore_unknown: True to skip SHA1 missing in the repository

1914 silently.

1915 Returns: A tuple of (commits, tags, others) SHA1s

1916 """

1917 commits: set[bytes] = set()

1918 tags: set[bytes] = set()

1919 others: set[bytes] = set()

1920 for e in lst:

1921 try:

1922 o = obj_store[e]

1923 except KeyError:

1924 if not ignore_unknown:

1925 raise

1926 else:

1927 if isinstance(o, Commit):

1928 commits.add(e)

1929 elif isinstance(o, Tag):

1930 tags.add(e)

1931 tagged = o.object[1]

1932 c, t, os = _split_commits_and_tags(

1933 obj_store, [tagged], ignore_unknown=ignore_unknown

1934 )

1935 commits |= c

1936 tags |= t

1937 others |= os

1938 else:

1939 others.add(e)

1940 return (commits, tags, others)

1941

1942

1943class MissingObjectFinder:

1944 """Find the objects missing from another object store.

1945

1946 Args:

1947 object_store: Object store containing at least all objects to be

1948 sent

1949 haves: SHA1s of commits not to send (already present in target)

1950 wants: SHA1s of commits to send

1951 progress: Optional function to report progress to.

1952 get_tagged: Function that returns a dict of pointed-to sha -> tag

1953 sha for including tags.

1954 get_parents: Optional function for getting the parents of a commit.

1955 """

1956

1957 def __init__(

1958 self,

1959 object_store,

1960 haves,

1961 wants,

1962 *,

1963 shallow=None,

1964 progress=None,

1965 get_tagged=None,

1966 get_parents=lambda commit: commit.parents,

1967 ) -> None:

1968 """Initialize a MissingObjectFinder.

1969

1970 Args:

1971 object_store: Object store containing objects

1972 haves: SHA1s of objects already present in target

1973 wants: SHA1s of objects to send

1974 shallow: Set of shallow commit SHA1s

1975 progress: Optional progress reporting callback

1976 get_tagged: Function returning dict of pointed-to sha -> tag sha

1977 get_parents: Function for getting commit parents

1978 """

1979 self.object_store = object_store

1980 if shallow is None:

1981 shallow = set()

1982 self._get_parents = get_parents

1983 # process Commits and Tags differently

1984 # Note, while haves may list commits/tags not available locally,

1985 # and such SHAs would get filtered out by _split_commits_and_tags,

1986 # wants shall list only known SHAs, and otherwise

1987 # _split_commits_and_tags fails with KeyError

1988 have_commits, have_tags, have_others = _split_commits_and_tags(

1989 object_store, haves, ignore_unknown=True

1990 )

1991 want_commits, want_tags, want_others = _split_commits_and_tags(

1992 object_store, wants, ignore_unknown=False

1993 )

1994 # all_ancestors is a set of commits that shall not be sent

1995 # (complete repository up to 'haves')

1996 all_ancestors = _collect_ancestors(

1997 object_store, have_commits, shallow=shallow, get_parents=self._get_parents

1998 )[0]

1999 # all_missing - complete set of commits between haves and wants

2000 # common - commits from all_ancestors we hit into while

2001 # traversing parent hierarchy of wants

2002 missing_commits, common_commits = _collect_ancestors(

2003 object_store,

2004 want_commits,

2005 all_ancestors,

2006 shallow=shallow,

2007 get_parents=self._get_parents,

2008 )

2009 self.remote_has: set[bytes] = set()

2010 # Now, fill sha_done with commits and revisions of

2011 # files and directories known to be both locally

2012 # and on target. Thus these commits and files

2013 # won't get selected for fetch

2014 for h in common_commits:

2015 self.remote_has.add(h)

2016 cmt = object_store[h]

2017 _collect_filetree_revs(object_store, cmt.tree, self.remote_has)

2018 # record tags we have as visited, too

2019 for t in have_tags:

2020 self.remote_has.add(t)

2021 self.sha_done = set(self.remote_has)

2022

2023 # in fact, what we 'want' is commits, tags, and others

2024 # we've found missing

2025 self.objects_to_send: set[

2026 tuple[ObjectID, Optional[bytes], Optional[int], bool]

2027 ] = {(w, None, Commit.type_num, False) for w in missing_commits}

2028 missing_tags = want_tags.difference(have_tags)

2029 self.objects_to_send.update(

2030 {(w, None, Tag.type_num, False) for w in missing_tags}

2031 )

2032 missing_others = want_others.difference(have_others)

2033 self.objects_to_send.update({(w, None, None, False) for w in missing_others})

2034

2035 if progress is None:

2036 self.progress = lambda x: None

2037 else:

2038 self.progress = progress

2039 self._tagged = (get_tagged and get_tagged()) or {}

2040

2041 def get_remote_has(self):

2042 """Get the set of SHAs the remote has.

2043

2044 Returns:

2045 Set of SHA1s that the remote side already has

2046 """

2047 return self.remote_has

2048

2049 def add_todo(

2050 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]]

2051 ) -> None:

2052 """Add objects to the todo list.

2053

2054 Args:

2055 entries: Iterable of tuples (sha, name, type_num, is_leaf)

2056 """

2057 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done])

2058

2059 def __next__(self) -> tuple[bytes, Optional[PackHint]]:

2060 """Get the next object to send.

2061

2062 Returns:

2063 Tuple of (sha, pack_hint)

2064

2065 Raises:

2066 StopIteration: When no more objects to send

2067 """

2068 while True:

2069 if not self.objects_to_send:

2070 self.progress(

2071 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii")

2072 )

2073 raise StopIteration

2074 (sha, name, type_num, leaf) = self.objects_to_send.pop()

2075 if sha not in self.sha_done:

2076 break

2077 if not leaf:

2078 o = self.object_store[sha]

2079 if isinstance(o, Commit):

2080 self.add_todo([(o.tree, b"", Tree.type_num, False)])

2081 elif isinstance(o, Tree):

2082 self.add_todo(

2083 [

2084 (

2085 s,

2086 n,

2087 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num),

2088 not stat.S_ISDIR(m),

2089 )

2090 for n, m, s in o.iteritems()

2091 if not S_ISGITLINK(m)

2092 ]

2093 )

2094 elif isinstance(o, Tag):

2095 self.add_todo([(o.object[1], None, o.object[0].type_num, False)])

2096 if sha in self._tagged:

2097 self.add_todo([(self._tagged[sha], None, None, True)])

2098 self.sha_done.add(sha)

2099 if len(self.sha_done) % 1000 == 0:

2100 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii"))

2101 if type_num is None:

2102 pack_hint = None

2103 else:

2104 pack_hint = (type_num, name)

2105 return (sha, pack_hint)

2106

2107 def __iter__(self):

2108 """Return iterator over objects to send.

2109

2110 Returns:

2111 Self (this class implements the iterator protocol)

2112 """

2113 return self

2114

2115

2116class ObjectStoreGraphWalker:

2117 """Graph walker that finds what commits are missing from an object store."""

2118

2119 heads: set[ObjectID]

2120 """Revisions without descendants in the local repo."""

2121

2122 get_parents: Callable[[ObjectID], ObjectID]

2123 """Function to retrieve parents in the local repo."""

2124

2125 shallow: set[ObjectID]

2126

2127 def __init__(

2128 self,

2129 local_heads: Iterable[ObjectID],

2130 get_parents,

2131 shallow: Optional[set[ObjectID]] = None,

2132 update_shallow=None,

2133 ) -> None:

2134 """Create a new instance.

2135

2136 Args:

2137 local_heads: Heads to start search with

2138 get_parents: Function for finding the parents of a SHA1.

2139 """

2140 self.heads = set(local_heads)

2141 self.get_parents = get_parents

2142 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {}

2143 if shallow is None:

2144 shallow = set()

2145 self.shallow = shallow

2146 self.update_shallow = update_shallow

2147

2148 def nak(self) -> None:

2149 """Nothing in common was found."""

2150

2151 def ack(self, sha: ObjectID) -> None:

2152 """Ack that a revision and its ancestors are present in the source."""

2153 if len(sha) != 40:

2154 raise ValueError(f"unexpected sha {sha!r} received")

2155 ancestors = {sha}

2156

2157 # stop if we run out of heads to remove

2158 while self.heads:

2159 for a in ancestors:

2160 if a in self.heads:

2161 self.heads.remove(a)

2162

2163 # collect all ancestors

2164 new_ancestors = set()

2165 for a in ancestors:

2166 ps = self.parents.get(a)

2167 if ps is not None:

2168 new_ancestors.update(ps)

2169 self.parents[a] = None

2170

2171 # no more ancestors; stop

2172 if not new_ancestors:

2173 break

2174

2175 ancestors = new_ancestors

2176

2177 def next(self):

2178 """Iterate over ancestors of heads in the target."""

2179 if self.heads:

2180 ret = self.heads.pop()

2181 try:

2182 ps = self.get_parents(ret)

2183 except KeyError:

2184 return None

2185 self.parents[ret] = ps

2186 self.heads.update([p for p in ps if p not in self.parents])

2187 return ret

2188 return None

2189

2190 __next__ = next

2191

2192

2193def commit_tree_changes(object_store, tree, changes):

2194 """Commit a specified set of changes to a tree structure.

2195

2196 This will apply a set of changes on top of an existing tree, storing new

2197 objects in object_store.

2198

2199 changes are a list of tuples with (path, mode, object_sha).

2200 Paths can be both blobs and trees. See the mode and

2201 object sha to None deletes the path.

2202

2203 This method works especially well if there are only a small

2204 number of changes to a big tree. For a large number of changes

2205 to a large tree, use e.g. commit_tree.

2206

2207 Args:

2208 object_store: Object store to store new objects in

2209 and retrieve old ones from.

2210 tree: Original tree root

2211 changes: changes to apply

2212 Returns: New tree root object

2213 """

2214 # TODO(jelmer): Save up the objects and add them using .add_objects

2215 # rather than with individual calls to .add_object.

2216 nested_changes = {}

2217 for path, new_mode, new_sha in changes:

2218 try:

2219 (dirname, subpath) = path.split(b"/", 1)

2220 except ValueError:

2221 if new_sha is None:

2222 del tree[path]

2223 else:

2224 tree[path] = (new_mode, new_sha)

2225 else:

2226 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha))

2227 for name, subchanges in nested_changes.items():

2228 try:

2229 orig_subtree = object_store[tree[name][1]]

2230 except KeyError:

2231 orig_subtree = Tree()

2232 subtree = commit_tree_changes(object_store, orig_subtree, subchanges)

2233 if len(subtree) == 0:

2234 del tree[name]

2235 else:

2236 tree[name] = (stat.S_IFDIR, subtree.id)

2237 object_store.add_object(tree)

2238 return tree

2239

2240

2241class OverlayObjectStore(BaseObjectStore):

2242 """Object store that can overlay multiple object stores."""

2243

2244 def __init__(self, bases, add_store=None) -> None:

2245 """Initialize an OverlayObjectStore.

2246

2247 Args:

2248 bases: List of base object stores to overlay

2249 add_store: Optional store to write new objects to

2250 """

2251 self.bases = bases

2252 self.add_store = add_store

2253

2254 def add_object(self, object):

2255 """Add a single object to the store.

2256

2257 Args:

2258 object: Object to add

2259

2260 Raises:

2261 NotImplementedError: If no add_store was provided

2262 """

2263 if self.add_store is None:

2264 raise NotImplementedError(self.add_object)

2265 return self.add_store.add_object(object)

2266

2267 def add_objects(self, objects, progress=None):

2268 """Add multiple objects to the store.

2269

2270 Args:

2271 objects: Iterator of objects to add

2272 progress: Optional progress reporting callback

2273

2274 Raises:

2275 NotImplementedError: If no add_store was provided

2276 """

2277 if self.add_store is None:

2278 raise NotImplementedError(self.add_object)

2279 return self.add_store.add_objects(objects, progress)

2280

2281 @property

2282 def packs(self):

2283 """Get the list of packs from all overlaid stores.

2284

2285 Returns:

2286 Combined list of packs from all base stores

2287 """

2288 ret = []

2289 for b in self.bases:

2290 ret.extend(b.packs)

2291 return ret

2292

2293 def __iter__(self):

2294 """Iterate over all object SHAs in the overlaid stores.

2295

2296 Returns:

2297 Iterator of object SHAs (deduped across stores)

2298 """

2299 done = set()

2300 for b in self.bases:

2301 for o_id in b:

2302 if o_id not in done:

2303 yield o_id

2304 done.add(o_id)

2305

2306 def iterobjects_subset(

2307 self, shas: Iterable[bytes], *, allow_missing: bool = False

2308 ) -> Iterator[ShaFile]:

2309 """Iterate over a subset of objects from the overlaid stores.

2310

2311 Args:

2312 shas: Iterable of object SHAs to retrieve

2313 allow_missing: If True, skip missing objects; if False, raise KeyError

2314

2315 Returns:

2316 Iterator of ShaFile objects

2317

2318 Raises:

2319 KeyError: If an object is missing and allow_missing is False

2320 """

2321 todo = set(shas)

2322 found: set[bytes] = set()

2323

2324 for b in self.bases:

2325 # Create a copy of todo for each base to avoid modifying

2326 # the set while iterating through it

2327 current_todo = todo - found

2328 for o in b.iterobjects_subset(current_todo, allow_missing=True):

2329 yield o

2330 found.add(o.id)

2331

2332 # Check for any remaining objects not found

2333 missing = todo - found

2334 if missing and not allow_missing:

2335 raise KeyError(next(iter(missing)))

2336

2337 def iter_unpacked_subset(

2338 self,

2339 shas: Iterable[bytes],

2340 *,

2341 include_comp=False,

2342 allow_missing: bool = False,

2343 convert_ofs_delta=True,

2344 ) -> Iterator[ShaFile]:

2345 """Iterate over unpacked objects from the overlaid stores.

2346

2347 Args:

2348 shas: Iterable of object SHAs to retrieve

2349 include_comp: Whether to include compressed data

2350 allow_missing: If True, skip missing objects; if False, raise KeyError

2351 convert_ofs_delta: Whether to convert OFS_DELTA objects

2352

2353 Returns:

2354 Iterator of unpacked objects

2355

2356 Raises:

2357 KeyError: If an object is missing and allow_missing is False

2358 """

2359 todo = set(shas)

2360 for b in self.bases:

2361 for o in b.iter_unpacked_subset(

2362 todo,

2363 include_comp=include_comp,

2364 allow_missing=True,

2365 convert_ofs_delta=convert_ofs_delta,

2366 ):

2367 yield o

2368 todo.remove(o.id)

2369 if todo and not allow_missing:

2370 raise KeyError(o.id)

2371

2372 def get_raw(self, sha_id):

2373 """Get the raw object data from the overlaid stores.

2374

2375 Args:

2376 sha_id: SHA of the object

2377

2378 Returns:

2379 Tuple of (type_num, raw_data)

2380

2381 Raises:

2382 KeyError: If object not found in any base store

2383 """

2384 for b in self.bases:

2385 try:

2386 return b.get_raw(sha_id)

2387 except KeyError:

2388 pass

2389 raise KeyError(sha_id)

2390

2391 def contains_packed(self, sha) -> bool:

2392 """Check if an object is packed in any base store.

2393

2394 Args:

2395 sha: SHA of the object

2396

2397 Returns:

2398 True if object is packed in any base store

2399 """

2400 for b in self.bases:

2401 if b.contains_packed(sha):

2402 return True

2403 return False

2404

2405 def contains_loose(self, sha) -> bool:

2406 """Check if an object is loose in any base store.

2407

2408 Args:

2409 sha: SHA of the object

2410

2411 Returns:

2412 True if object is loose in any base store

2413 """

2414 for b in self.bases:

2415 if b.contains_loose(sha):

2416 return True

2417 return False

2418

2419

2420def read_packs_file(f):

2421 """Yield the packs listed in a packs file."""

2422 for line in f.read().splitlines():

2423 if not line:

2424 continue

2425 (kind, name) = line.split(b" ", 1)

2426 if kind != b"P":

2427 continue

2428 yield os.fsdecode(name)

2429

2430

2431class BucketBasedObjectStore(PackBasedObjectStore):

2432 """Object store implementation that uses a bucket store like S3 as backend."""

2433

2434 def _iter_loose_objects(self):

2435 """Iterate over the SHAs of all loose objects."""

2436 return iter([])

2437

2438 def _get_loose_object(self, sha) -> None:

2439 return None

2440

2441 def delete_loose_object(self, sha) -> None:

2442 """Delete a loose object (no-op for bucket stores).

2443

2444 Bucket-based stores don't have loose objects, so this is a no-op.

2445

2446 Args:

2447 sha: SHA of the object to delete

2448 """

2449 # Doesn't exist..

2450

2451 def _remove_pack(self, name) -> None:

2452 raise NotImplementedError(self._remove_pack)

2453

2454 def _iter_pack_names(self) -> Iterator[str]:

2455 raise NotImplementedError(self._iter_pack_names)

2456

2457 def _get_pack(self, name) -> Pack:

2458 raise NotImplementedError(self._get_pack)

2459

2460 def _update_pack_cache(self):

2461 pack_files = set(self._iter_pack_names())

2462

2463 # Open newly appeared pack files

2464 new_packs = []

2465 for f in pack_files:

2466 if f not in self._pack_cache:

2467 pack = self._get_pack(f)

2468 new_packs.append(pack)

2469 self._pack_cache[f] = pack

2470 # Remove disappeared pack files

2471 for f in set(self._pack_cache) - pack_files:

2472 self._pack_cache.pop(f).close()

2473 return new_packs

2474

2475 def _upload_pack(self, basename, pack_file, index_file) -> None:

2476 raise NotImplementedError

2477

2478 def add_pack(self):

2479 """Add a new pack to this object store.

2480

2481 Returns: Fileobject to write to, a commit function to

2482 call when the pack is finished and an abort

2483 function.

2484 """

2485 import tempfile

2486

2487 pf = tempfile.SpooledTemporaryFile(

2488 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"

2489 )

2490

2491 def commit():

2492 if pf.tell() == 0:

2493 pf.close()

2494 return None

2495

2496 pf.seek(0)

2497 p = PackData(pf.name, pf)

2498 entries = p.sorted_entries()

2499 basename = iter_sha1(entry[0] for entry in entries).decode("ascii")

2500 idxf = tempfile.SpooledTemporaryFile(

2501 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"

2502 )

2503 checksum = p.get_stored_checksum()

2504 write_pack_index(idxf, entries, checksum, version=self.pack_index_version)

2505 idxf.seek(0)

2506 idx = load_pack_index_file(basename + ".idx", idxf)

2507 for pack in self.packs:

2508 if pack.get_stored_checksum() == p.get_stored_checksum():

2509 p.close()

2510 idx.close()

2511 pf.close()

2512 idxf.close()

2513 return pack

2514 pf.seek(0)

2515 idxf.seek(0)

2516 self._upload_pack(basename, pf, idxf)

2517 final_pack = Pack.from_objects(p, idx)

2518 self._add_cached_pack(basename, final_pack)

2519 pf.close()

2520 idxf.close()

2521 return final_pack

2522

2523 return pf, commit, pf.close

2524

2525

2526def _collect_ancestors(

2527 store: ObjectContainer,

2528 heads,

2529 common: frozenset[ObjectID] = frozenset(),

2530 shallow: frozenset[ObjectID] = frozenset(),

2531 get_parents=lambda commit: commit.parents,

2532):

2533 """Collect all ancestors of heads up to (excluding) those in common.

2534

2535 Args:

2536 heads: commits to start from

2537 common: commits to end at, or empty set to walk repository

2538 completely

2539 get_parents: Optional function for getting the parents of a

2540 commit.

2541 Returns: a tuple (A, B) where A - all commits reachable

2542 from heads but not present in common, B - common (shared) elements

2543 that are directly reachable from heads

2544 """

2545 bases = set()

2546 commits = set()

2547 queue = []

2548 queue.extend(heads)

2549

2550 # Try to use commit graph if available

2551 commit_graph = store.get_commit_graph()

2552

2553 while queue:

2554 e = queue.pop(0)

2555 if e in common:

2556 bases.add(e)

2557 elif e not in commits:

2558 commits.add(e)

2559 if e in shallow:

2560 continue

2561

2562 # Try to use commit graph for parent lookup

2563 parents = None

2564 if commit_graph:

2565 parents = commit_graph.get_parents(e)

2566

2567 if parents is None:

2568 # Fall back to loading the object

2569 cmt = store[e]

2570 parents = get_parents(cmt)

2571

2572 queue.extend(parents)

2573 return (commits, bases)

2574

2575

2576def iter_tree_contents(

2577 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False

2578):

2579 """Iterate the contents of a tree and all subtrees.

2580

2581 Iteration is depth-first pre-order, as in e.g. os.walk.

2582

2583 Args:

2584 tree_id: SHA1 of the tree.

2585 include_trees: If True, include tree objects in the iteration.

2586 Returns: Iterator over TreeEntry namedtuples for all the objects in a

2587 tree.

2588 """

2589 if tree_id is None:

2590 return

2591 # This could be fairly easily generalized to >2 trees if we find a use

2592 # case.

2593 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]

2594 while todo:

2595 entry = todo.pop()

2596 if stat.S_ISDIR(entry.mode):

2597 extra = []

2598 tree = store[entry.sha]

2599 assert isinstance(tree, Tree)

2600 for subentry in tree.iteritems(name_order=True):

2601 extra.append(subentry.in_path(entry.path))

2602 todo.extend(reversed(extra))

2603 if not stat.S_ISDIR(entry.mode) or include_trees:

2604 yield entry

2605

2606

2607def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]:

2608 """Peel all tags from a SHA.

2609

2610 Args:

2611 sha: The object SHA to peel.

2612 Returns: The fully-peeled SHA1 of a tag object, after peeling all

2613 intermediate tags; if the original ref does not point to a tag,

2614 this will equal the original SHA1.

2615 """

2616 unpeeled = obj = store[sha]

2617 obj_class = object_class(obj.type_name)

2618 while obj_class is Tag:

2619 assert isinstance(obj, Tag)

2620 obj_class, sha = obj.object

2621 obj = store[sha]

2622 return unpeeled, obj

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

1137 statements