Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object

1# object_store.py -- Object store for git objects

3# and others

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU

7# General Public License as published by the Free Software Foundation; version 2.0

8# or (at your option) any later version. You can redistribute it and/or

9# modify it under the terms of either of these two licenses.

10#

11# Unless required by applicable law or agreed to in writing, software

12# distributed under the License is distributed on an "AS IS" BASIS,

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14# See the License for the specific language governing permissions and

15# limitations under the License.

16#

17# You should have received a copy of the licenses; if not, see

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache

20# License, Version 2.0.

21#

24"""Git object store interfaces and implementation."""

26__all__ = [

27 "DEFAULT_TEMPFILE_GRACE_PERIOD",

28 "INFODIR",

29 "PACKDIR",

30 "PACK_MODE",

31 "BaseObjectStore",

32 "BitmapReachability",

33 "BucketBasedObjectStore",

34 "DiskObjectStore",

35 "GraphTraversalReachability",

36 "GraphWalker",

37 "MemoryObjectStore",

38 "MissingObjectFinder",

39 "ObjectIterator",

40 "ObjectReachabilityProvider",

41 "ObjectStoreGraphWalker",

42 "OverlayObjectStore",

43 "PackBasedObjectStore",

44 "PackCapableObjectStore",

45 "PackContainer",

46 "commit_tree_changes",

47 "find_shallow",

48 "get_depth",

49 "iter_commit_contents",

50 "iter_tree_contents",

51 "peel_sha",

52 "read_packs_file",

53 "tree_lookup_path",

54]

56import binascii

57import os

58import stat

59import sys

60import time

61import warnings

62from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set

63from contextlib import suppress

64from io import BytesIO

65from pathlib import Path

66from typing import (

67 TYPE_CHECKING,

68 BinaryIO,

69 Protocol,

70 cast,

71)

73from .errors import NotTreeError

74from .file import GitFile, _GitFile

75from .midx import MultiPackIndex, load_midx

76from .objects import (

77 S_ISGITLINK,

78 ZERO_SHA,

79 Blob,

80 Commit,

81 ObjectID,

82 RawObjectID,

83 ShaFile,

84 Tag,

85 Tree,

86 TreeEntry,

87 hex_to_filename,

88 hex_to_sha,

89 object_class,

90 sha_to_hex,

91 valid_hexsha,

92)

93from .pack import (

94 PACK_SPOOL_FILE_MAX_SIZE,

95 ObjectContainer,

96 Pack,

97 PackData,

98 PackedObjectContainer,

99 PackFileDisappeared,

100 PackHint,

101 PackIndexer,

102 PackInflater,

103 PackStreamCopier,

104 UnpackedObject,

105 extend_pack,

106 full_unpacked_object,

107 generate_unpacked_objects,

108 iter_sha1,

109 load_pack_index_file,

110 pack_objects_to_data,

111 write_pack_data,

112 write_pack_index,

113)

114from .protocol import DEPTH_INFINITE, PEELED_TAG_SUFFIX

115from .refs import Ref

116

117if TYPE_CHECKING:

118 from .bitmap import EWAHBitmap

119 from .commit_graph import CommitGraph

120 from .config import Config

121 from .diff_tree import RenameDetector

122 from .pack import Pack

123

124

125class GraphWalker(Protocol):

126 """Protocol for graph walker objects."""

127

128 def __next__(self) -> ObjectID | None:

129 """Return the next object SHA to visit."""

130 ...

131

132 def ack(self, sha: ObjectID) -> None:

133 """Acknowledge that an object has been received."""

134 ...

135

136 def nak(self) -> None:

137 """Nothing in common was found."""

138 ...

139

140

141class ObjectReachabilityProvider(Protocol):

142 """Protocol for computing object reachability queries.

143

144 This abstraction allows reachability computations to be backed by either

145 naive graph traversal or optimized bitmap indexes, with a consistent interface.

146 """

147

148 def get_reachable_commits(

149 self,

150 heads: Iterable[ObjectID],

151 exclude: Iterable[ObjectID] | None = None,

152 shallow: Set[ObjectID] | None = None,

153 ) -> set[ObjectID]:

154 """Get all commits reachable from heads, excluding those in exclude.

155

156 Args:

157 heads: Starting commit SHAs

158 exclude: Commit SHAs to exclude (and their ancestors)

159 shallow: Set of shallow commit boundaries (traversal stops here)

160

161 Returns:

162 Set of commit SHAs reachable from heads but not from exclude

163 """

164 ...

165

166 def get_reachable_objects(

167 self,

168 commits: Iterable[ObjectID],

169 exclude_commits: Iterable[ObjectID] | None = None,

170 ) -> set[ObjectID]:

171 """Get all objects (commits + trees + blobs) reachable from commits.

172

173 Args:

174 commits: Starting commit SHAs

175 exclude_commits: Commits whose objects should be excluded

176

177 Returns:

178 Set of all object SHAs (commits, trees, blobs, tags)

179 """

180 ...

181

182 def get_tree_objects(

183 self,

184 tree_shas: Iterable[ObjectID],

185 ) -> set[ObjectID]:

186 """Get all trees and blobs reachable from the given trees.

187

188 Args:

189 tree_shas: Starting tree SHAs

190

191 Returns:

192 Set of tree and blob SHAs

193 """

194 ...

195

196

197INFODIR = "info"

198PACKDIR = "pack"

199

200# use permissions consistent with Git; just readable by everyone

201# TODO: should packs also be non-writable on Windows? if so, that

202# would requite some rather significant adjustments to the test suite

203PACK_MODE = 0o444 if sys.platform != "win32" else 0o644

204

205# Grace period for cleaning up temporary pack files (in seconds)

206# Matches git's default of 2 weeks

207DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks

208

209

210def find_shallow(

211 store: ObjectContainer, heads: Iterable[ObjectID], depth: int

212) -> tuple[set[ObjectID], set[ObjectID]]:

213 """Find shallow commits according to a given depth.

214

215 Args:

216 store: An ObjectStore for looking up objects.

217 heads: Iterable of head SHAs to start walking from.

218 depth: The depth of ancestors to include. A depth of one includes

219 only the heads themselves.

220 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be

221 considered shallow and unshallow according to the arguments. Note that

222 these sets may overlap if a commit is reachable along multiple paths.

223 """

224 parents: dict[ObjectID, list[ObjectID]] = {}

225 commit_graph = store.get_commit_graph()

226

227 def get_parents(sha: ObjectID) -> list[ObjectID]:

228 result = parents.get(sha, None)

229 if not result:

230 # Try to use commit graph first if available

231 if commit_graph:

232 graph_parents = commit_graph.get_parents(sha)

233 if graph_parents is not None:

234 result = graph_parents

235 parents[sha] = result

236 return result

237 # Fall back to loading the object

238 commit = store[sha]

239 assert isinstance(commit, Commit)

240 result = commit.parents

241 parents[sha] = result

242 return result

243

244 todo = [] # stack of (sha, depth)

245 for head_sha in heads:

246 obj = store[head_sha]

247 # Peel tags if necessary

248 while isinstance(obj, Tag):

249 _, sha = obj.object

250 obj = store[sha]

251 if isinstance(obj, Commit):

252 todo.append((obj.id, 1))

253

254 not_shallow = set()

255 shallow = set()

256 while todo:

257 sha, cur_depth = todo.pop()

258 if cur_depth < depth:

259 not_shallow.add(sha)

260 new_depth = cur_depth + 1

261 todo.extend((p, new_depth) for p in get_parents(sha))

262 else:

263 shallow.add(sha)

264

265 return shallow, not_shallow

266

267

268def get_depth(

269 store: ObjectContainer,

270 head: ObjectID,

271 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents,

272 max_depth: int | None = None,

273) -> int:

274 """Return the current available depth for the given head.

275

276 For commits with multiple parents, the largest possible depth will be

277 returned.

278

279 Args:

280 store: Object store to search in

281 head: commit to start from

282 get_parents: optional function for getting the parents of a commit

283 max_depth: maximum depth to search

284 """

285 if head not in store:

286 return 0

287 current_depth = 1

288 queue = [(head, current_depth)]

289 commit_graph = store.get_commit_graph()

290

291 while queue and (max_depth is None or current_depth < max_depth):

292 e, depth = queue.pop(0)

293 current_depth = max(current_depth, depth)

294

295 # Try to use commit graph for parent lookup if available

296 parents = None

297 if commit_graph:

298 parents = commit_graph.get_parents(e)

299

300 if parents is None:

301 # Fall back to loading the object

302 cmt = store[e]

303 if isinstance(cmt, Tag):

304 _cls, sha = cmt.object

305 cmt = store[sha]

306 parents = get_parents(cmt)

307

308 queue.extend((parent, depth + 1) for parent in parents if parent in store)

309 return current_depth

310

311

312class PackContainer(Protocol):

313 """Protocol for containers that can accept pack files."""

314

315 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:

316 """Add a new pack."""

317

318

319class BaseObjectStore:

320 """Object store interface."""

321

322 def determine_wants_all(

323 self, refs: Mapping[Ref, ObjectID], depth: int | None = None

324 ) -> list[ObjectID]:

325 """Determine which objects are wanted based on refs."""

326

327 def _want_deepen(sha: ObjectID) -> bool:

328 if not depth:

329 return False

330 if depth == DEPTH_INFINITE:

331 return True

332 return depth > self._get_depth(sha)

333

334 return [

335 sha

336 for (ref, sha) in refs.items()

337 if (sha not in self or _want_deepen(sha))

338 and not ref.endswith(PEELED_TAG_SUFFIX)

339 and not sha == ZERO_SHA

340 ]

341

342 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:

343 """Check if a particular object is present by SHA1 and is loose."""

344 raise NotImplementedError(self.contains_loose)

345

346 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:

347 """Check if a particular object is present by SHA1 and is packed."""

348 return False # Default implementation for stores that don't support packing

349

350 def __contains__(self, sha1: ObjectID | RawObjectID) -> bool:

351 """Check if a particular object is present by SHA1.

352

353 This method makes no distinction between loose and packed objects.

354 """

355 return self.contains_loose(sha1)

356

357 @property

358 def packs(self) -> list[Pack]:

359 """Iterable of pack objects."""

360 raise NotImplementedError

361

362 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:

363 """Obtain the raw text for an object.

364

365 Args:

366 name: sha for the object.

367 Returns: tuple with numeric type and object contents.

368 """

369 raise NotImplementedError(self.get_raw)

370

371 def __getitem__(self, sha1: ObjectID | RawObjectID) -> ShaFile:

372 """Obtain an object by SHA1."""

373 type_num, uncomp = self.get_raw(sha1)

374 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1)

375

376 def __iter__(self) -> Iterator[ObjectID]:

377 """Iterate over the SHAs that are present in this store."""

378 raise NotImplementedError(self.__iter__)

379

380 def add_object(self, obj: ShaFile) -> None:

381 """Add a single object to this object store."""

382 raise NotImplementedError(self.add_object)

383

384 def add_objects(

385 self,

386 objects: Sequence[tuple[ShaFile, str | None]],

387 progress: Callable[..., None] | None = None,

388 ) -> "Pack | None":

389 """Add a set of objects to this object store.

390

391 Args:

392 objects: Iterable over a list of (object, path) tuples

393 progress: Optional progress callback

394 """

395 raise NotImplementedError(self.add_objects)

396

397 def get_reachability_provider(

398 self, prefer_bitmap: bool = True

399 ) -> ObjectReachabilityProvider:

400 """Get a reachability provider for this object store.

401

402 Returns an ObjectReachabilityProvider that can efficiently compute

403 object reachability queries. Subclasses can override this to provide

404 optimized implementations (e.g., using bitmap indexes).

405

406 Args:

407 prefer_bitmap: Whether to prefer bitmap-based reachability if

408 available.

409

410 Returns:

411 ObjectReachabilityProvider instance

412 """

413 return GraphTraversalReachability(self)

414

415 def tree_changes(

416 self,

417 source: ObjectID | None,

418 target: ObjectID | None,

419 want_unchanged: bool = False,

420 include_trees: bool = False,

421 change_type_same: bool = False,

422 rename_detector: "RenameDetector | None" = None,

423 paths: Sequence[bytes] | None = None,

424 ) -> Iterator[

425 tuple[

426 tuple[bytes | None, bytes | None],

427 tuple[int | None, int | None],

428 tuple[ObjectID | None, ObjectID | None],

429 ]

430 ]:

431 """Find the differences between the contents of two trees.

432

433 Args:

434 source: SHA1 of the source tree

435 target: SHA1 of the target tree

436 want_unchanged: Whether unchanged files should be reported

437 include_trees: Whether to include trees

438 change_type_same: Whether to report files changing

439 type in the same entry.

440 rename_detector: RenameDetector object for detecting renames.

441 paths: Optional list of paths to filter to (as bytes).

442 Returns: Iterator over tuples with

443 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)

444 """

445 from .diff_tree import tree_changes

446

447 for change in tree_changes(

448 self,

449 source,

450 target,

451 want_unchanged=want_unchanged,

452 include_trees=include_trees,

453 change_type_same=change_type_same,

454 rename_detector=rename_detector,

455 paths=paths,

456 ):

457 old_path = change.old.path if change.old is not None else None

458 new_path = change.new.path if change.new is not None else None

459 old_mode = change.old.mode if change.old is not None else None

460 new_mode = change.new.mode if change.new is not None else None

461 old_sha = change.old.sha if change.old is not None else None

462 new_sha = change.new.sha if change.new is not None else None

463 yield (

464 (old_path, new_path),

465 (old_mode, new_mode),

466 (old_sha, new_sha),

467 )

468

469 def iter_tree_contents(

470 self, tree_id: ObjectID, include_trees: bool = False

471 ) -> Iterator[TreeEntry]:

472 """Iterate the contents of a tree and all subtrees.

473

474 Iteration is depth-first pre-order, as in e.g. os.walk.

475

476 Args:

477 tree_id: SHA1 of the tree.

478 include_trees: If True, include tree objects in the iteration.

479 Returns: Iterator over TreeEntry namedtuples for all the objects in a

480 tree.

481 """

482 warnings.warn(

483 "Please use dulwich.object_store.iter_tree_contents",

484 DeprecationWarning,

485 stacklevel=2,

486 )

487 return iter_tree_contents(self, tree_id, include_trees=include_trees)

488

489 def iterobjects_subset(

490 self, shas: Iterable[ObjectID], *, allow_missing: bool = False

491 ) -> Iterator[ShaFile]:

492 """Iterate over a subset of objects in the store.

493

494 Args:

495 shas: Iterable of object SHAs to retrieve

496 allow_missing: If True, skip missing objects; if False, raise KeyError

497

498 Returns:

499 Iterator of ShaFile objects

500

501 Raises:

502 KeyError: If an object is missing and allow_missing is False

503 """

504 for sha in shas:

505 try:

506 yield self[sha]

507 except KeyError:

508 if not allow_missing:

509 raise

510

511 def iter_unpacked_subset(

512 self,

513 shas: Iterable[ObjectID | RawObjectID],

514 include_comp: bool = False,

515 allow_missing: bool = False,

516 convert_ofs_delta: bool = True,

517 ) -> "Iterator[UnpackedObject]":

518 """Iterate over unpacked objects for a subset of SHAs.

519

520 Default implementation that converts ShaFile objects to UnpackedObject.

521 Subclasses may override for more efficient unpacked access.

522

523 Args:

524 shas: Iterable of object SHAs to retrieve

525 include_comp: Whether to include compressed data (ignored in base

526 implementation)

527 allow_missing: If True, skip missing objects; if False, raise

528 KeyError

529 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in

530 base implementation)

531

532 Returns:

533 Iterator of UnpackedObject instances

534

535 Raises:

536 KeyError: If an object is missing and allow_missing is False

537 """

538 from .pack import UnpackedObject

539

540 for sha in shas:

541 try:

542 obj = self[sha]

543 # Convert ShaFile to UnpackedObject

544 unpacked = UnpackedObject(

545 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id

546 )

547 yield unpacked

548 except KeyError:

549 if not allow_missing:

550 raise

551

552 def find_missing_objects(

553 self,

554 haves: Iterable[ObjectID],

555 wants: Iterable[ObjectID],

556 shallow: Set[ObjectID] | None = None,

557 progress: Callable[..., None] | None = None,

558 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None,

559 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents,

560 ) -> Iterator[tuple[ObjectID, PackHint | None]]:

561 """Find the missing objects required for a set of revisions.

562

563 Args:

564 haves: Iterable over SHAs already in common.

565 wants: Iterable over SHAs of objects to fetch.

566 shallow: Set of shallow commit SHA1s to skip

567 progress: Simple progress function that will be called with

568 updated progress strings.

569 get_tagged: Function that returns a dict of pointed-to sha ->

570 tag sha for including tags.

571 get_parents: Optional function for getting the parents of a

572 commit.

573 Returns: Iterator over (sha, path) pairs.

574 """

575 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning)

576 finder = MissingObjectFinder(

577 self,

578 haves=haves,

579 wants=wants,

580 shallow=shallow,

581 progress=progress,

582 get_tagged=get_tagged,

583 get_parents=get_parents,

584 )

585 return iter(finder)

586

587 def find_common_revisions(self, graphwalker: GraphWalker) -> list[ObjectID]:

588 """Find which revisions this store has in common using graphwalker.

589

590 Args:

591 graphwalker: A graphwalker object.

592 Returns: List of SHAs that are in common

593 """

594 haves = []

595 sha = next(graphwalker)

596 while sha:

597 if sha in self:

598 haves.append(sha)

599 graphwalker.ack(sha)

600 sha = next(graphwalker)

601 return haves

602

603 def generate_pack_data(

604 self,

605 have: Iterable[ObjectID],

606 want: Iterable[ObjectID],

607 *,

608 shallow: Set[ObjectID] | None = None,

609 progress: Callable[..., None] | None = None,

610 ofs_delta: bool = True,

611 ) -> tuple[int, Iterator[UnpackedObject]]:

612 """Generate pack data objects for a set of wants/haves.

613

614 Args:

615 have: List of SHA1s of objects that should not be sent

616 want: List of SHA1s of objects that should be sent

617 shallow: Set of shallow commit SHA1s to skip

618 ofs_delta: Whether OFS deltas can be included

619 progress: Optional progress reporting method

620 """

621 # Note that the pack-specific implementation below is more efficient,

622 # as it reuses deltas

623 missing_objects = MissingObjectFinder(

624 self, haves=have, wants=want, shallow=shallow, progress=progress

625 )

626 object_ids = list(missing_objects)

627 return pack_objects_to_data(

628 [(self[oid], path) for oid, path in object_ids],

629 ofs_delta=ofs_delta,

630 progress=progress,

631 )

632

633 def peel_sha(self, sha: ObjectID | RawObjectID) -> ObjectID:

634 """Peel all tags from a SHA.

635

636 Args:

637 sha: The object SHA to peel.

638 Returns: The fully-peeled SHA1 of a tag object, after peeling all

639 intermediate tags; if the original ref does not point to a tag,

640 this will equal the original SHA1.

641 """

642 warnings.warn(

643 "Please use dulwich.object_store.peel_sha()",

644 DeprecationWarning,

645 stacklevel=2,

646 )

647 return peel_sha(self, sha)[1].id

648

649 def _get_depth(

650 self,

651 head: ObjectID,

652 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents,

653 max_depth: int | None = None,

654 ) -> int:

655 """Return the current available depth for the given head.

656

657 For commits with multiple parents, the largest possible depth will be

658 returned.

659

660 Args:

661 head: commit to start from

662 get_parents: optional function for getting the parents of a commit

663 max_depth: maximum depth to search

664 """

665 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth)

666

667 def close(self) -> None:

668 """Close any files opened by this object store."""

669 # Default implementation is a NO-OP

670

671 def prune(self, grace_period: int | None = None) -> None:

672 """Prune/clean up this object store.

673

674 This includes removing orphaned temporary files and other

675 housekeeping tasks. Default implementation is a NO-OP.

676

677 Args:

678 grace_period: Grace period in seconds for removing temporary files.

679 If None, uses the default grace period.

680 """

681 # Default implementation is a NO-OP

682

683 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:

684 """Iterate over all SHA1s that start with a given prefix.

685

686 The default implementation is a naive iteration over all objects.

687 However, subclasses may override this method with more efficient

688 implementations.

689 """

690 for sha in self:

691 if sha.startswith(prefix):

692 yield sha

693

694 def get_commit_graph(self) -> "CommitGraph | None":

695 """Get the commit graph for this object store.

696

697 Returns:

698 CommitGraph object if available, None otherwise

699 """

700 return None

701

702 def write_commit_graph(

703 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True

704 ) -> None:

705 """Write a commit graph file for this object store.

706

707 Args:

708 refs: List of refs to include. If None, includes all refs from object store.

709 reachable: If True, includes all commits reachable from refs.

710 If False, only includes the direct ref targets.

711

712 Note:

713 Default implementation does nothing. Subclasses should override

714 this method to provide commit graph writing functionality.

715 """

716 raise NotImplementedError(self.write_commit_graph)

717

718 def get_object_mtime(self, sha: ObjectID) -> float:

719 """Get the modification time of an object.

720

721 Args:

722 sha: SHA1 of the object

723

724 Returns:

725 Modification time as seconds since epoch

726

727 Raises:

728 KeyError: if the object is not found

729 """

730 # Default implementation raises KeyError

731 # Subclasses should override to provide actual mtime

732 raise KeyError(sha)

733

734

735class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer):

736 """Object store that supports pack operations.

737

738 This is a base class for object stores that can handle pack files,

739 including both disk-based and memory-based stores.

740 """

741

742 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:

743 """Add a new pack to this object store.

744

745 Returns: Tuple of (file, commit_func, abort_func)

746 """

747 raise NotImplementedError(self.add_pack)

748

749 def add_pack_data(

750 self,

751 count: int,

752 unpacked_objects: Iterator["UnpackedObject"],

753 progress: Callable[..., None] | None = None,

754 ) -> "Pack | None":

755 """Add pack data to this object store.

756

757 Args:

758 count: Number of objects

759 unpacked_objects: Iterator over unpacked objects

760 progress: Optional progress callback

761 """

762 raise NotImplementedError(self.add_pack_data)

763

764 def get_unpacked_object(

765 self, sha1: ObjectID | RawObjectID, *, include_comp: bool = False

766 ) -> "UnpackedObject":

767 """Get a raw unresolved object.

768

769 Args:

770 sha1: SHA-1 hash of the object

771 include_comp: Whether to include compressed data

772

773 Returns:

774 UnpackedObject instance

775 """

776 from .pack import UnpackedObject

777

778 obj = self[sha1]

779 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks())

780

781 def iterobjects_subset(

782 self, shas: Iterable[ObjectID], *, allow_missing: bool = False

783 ) -> Iterator[ShaFile]:

784 """Iterate over a subset of objects.

785

786 Args:

787 shas: Iterable of object SHAs to retrieve

788 allow_missing: If True, skip missing objects

789

790 Returns:

791 Iterator of ShaFile objects

792 """

793 for sha in shas:

794 try:

795 yield self[sha]

796 except KeyError:

797 if not allow_missing:

798 raise

799

800

801class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):

802 """Object store that uses pack files for storage.

803

804 This class provides a base implementation for object stores that use

805 Git pack files as their primary storage mechanism. It handles caching

806 of open pack files and provides configuration for pack file operations.

807 """

808

809 def __init__(

810 self,

811 pack_compression_level: int = -1,

812 pack_index_version: int | None = None,

813 pack_delta_window_size: int | None = None,

814 pack_window_memory: int | None = None,

815 pack_delta_cache_size: int | None = None,

816 pack_depth: int | None = None,

817 pack_threads: int | None = None,

818 pack_big_file_threshold: int | None = None,

819 ) -> None:

820 """Initialize a PackBasedObjectStore.

821

822 Args:

823 pack_compression_level: Compression level for pack files (-1 to 9)

824 pack_index_version: Pack index version to use

825 pack_delta_window_size: Window size for delta compression

826 pack_window_memory: Maximum memory to use for delta window

827 pack_delta_cache_size: Cache size for delta operations

828 pack_depth: Maximum depth for pack deltas

829 pack_threads: Number of threads to use for packing

830 pack_big_file_threshold: Threshold for treating files as "big"

831 """

832 self._pack_cache: dict[str, Pack] = {}

833 self.pack_compression_level = pack_compression_level

834 self.pack_index_version = pack_index_version

835 self.pack_delta_window_size = pack_delta_window_size

836 self.pack_window_memory = pack_window_memory

837 self.pack_delta_cache_size = pack_delta_cache_size

838 self.pack_depth = pack_depth

839 self.pack_threads = pack_threads

840 self.pack_big_file_threshold = pack_big_file_threshold

841

842 def get_reachability_provider(

843 self,

844 prefer_bitmaps: bool = True,

845 ) -> ObjectReachabilityProvider:

846 """Get the best reachability provider for the object store.

847

848 Args:

849 prefer_bitmaps: Whether to use bitmaps if available

850

851 Returns:

852 ObjectReachabilityProvider implementation (either bitmap-accelerated

853 or graph traversal)

854 """

855 if prefer_bitmaps:

856 # Check if any packs have bitmaps

857 has_bitmap = False

858 for pack in self.packs:

859 try:

860 # Try to access bitmap property

861 if pack.bitmap is not None:

862 has_bitmap = True

863 break

864 except FileNotFoundError:

865 # Bitmap file doesn't exist for this pack

866 continue

867

868 if has_bitmap:

869 return BitmapReachability(self)

870

871 # Fall back to graph traversal

872 return GraphTraversalReachability(self)

873

874 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:

875 """Add a new pack to this object store."""

876 raise NotImplementedError(self.add_pack)

877

878 def add_pack_data(

879 self,

880 count: int,

881 unpacked_objects: Iterator[UnpackedObject],

882 progress: Callable[..., None] | None = None,

883 ) -> "Pack | None":

884 """Add pack data to this object store.

885

886 Args:

887 count: Number of items to add

888 unpacked_objects: Iterator of UnpackedObject instances

889 progress: Optional progress callback

890 """

891 if count == 0:

892 # Don't bother writing an empty pack file

893 return None

894 f, commit, abort = self.add_pack()

895 try:

896 write_pack_data(

897 f.write,

898 unpacked_objects,

899 num_records=count,

900 progress=progress,

901 compression_level=self.pack_compression_level,

902 )

903 except BaseException:

904 abort()

905 raise

906 else:

907 return commit()

908

909 @property

910 def alternates(self) -> list["BaseObjectStore"]:

911 """Return list of alternate object stores."""

912 return []

913

914 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:

915 """Check if a particular object is present by SHA1 and is packed.

916

917 This does not check alternates.

918 """

919 for pack in self.packs:

920 try:

921 if sha in pack:

922 return True

923 except PackFileDisappeared:

924 pass

925 return False

926

927 def __contains__(self, sha: ObjectID | RawObjectID) -> bool:

928 """Check if a particular object is present by SHA1.

929

930 This method makes no distinction between loose and packed objects.

931 """

932 if self.contains_packed(sha) or self.contains_loose(sha):

933 return True

934 for alternate in self.alternates:

935 if sha in alternate:

936 return True

937 return False

938

939 def _add_cached_pack(self, base_name: str, pack: Pack) -> None:

940 """Add a newly appeared pack to the cache by path."""

941 prev_pack = self._pack_cache.get(base_name)

942 if prev_pack is not pack:

943 self._pack_cache[base_name] = pack

944 if prev_pack:

945 prev_pack.close()

946

947 def generate_pack_data(

948 self,

949 have: Iterable[ObjectID],

950 want: Iterable[ObjectID],

951 *,

952 shallow: Set[ObjectID] | None = None,

953 progress: Callable[..., None] | None = None,

954 ofs_delta: bool = True,

955 ) -> tuple[int, Iterator[UnpackedObject]]:

956 """Generate pack data objects for a set of wants/haves.

957

958 Args:

959 have: List of SHA1s of objects that should not be sent

960 want: List of SHA1s of objects that should be sent

961 shallow: Set of shallow commit SHA1s to skip

962 ofs_delta: Whether OFS deltas can be included

963 progress: Optional progress reporting method

964 """

965 missing_objects = MissingObjectFinder(

966 self, haves=have, wants=want, shallow=shallow, progress=progress

967 )

968 remote_has = missing_objects.get_remote_has()

969 object_ids = list(missing_objects)

970 return len(object_ids), generate_unpacked_objects(

971 self,

972 object_ids,

973 progress=progress,

974 ofs_delta=ofs_delta,

975 other_haves=remote_has,

976 )

977

978 def _clear_cached_packs(self) -> None:

979 pack_cache = self._pack_cache

980 self._pack_cache = {}

981 while pack_cache:

982 (_name, pack) = pack_cache.popitem()

983 pack.close()

984

985 def _iter_cached_packs(self) -> Iterator[Pack]:

986 return iter(self._pack_cache.values())

987

988 def _update_pack_cache(self) -> list[Pack]:

989 raise NotImplementedError(self._update_pack_cache)

990

991 def close(self) -> None:

992 """Close the object store and release resources.

993

994 This method closes all cached pack files and frees associated resources.

995 """

996 self._clear_cached_packs()

997

998 @property

999 def packs(self) -> list[Pack]:

1000 """List with pack objects."""

1001 return list(self._iter_cached_packs()) + list(self._update_pack_cache())

1002

1003 def count_pack_files(self) -> int:

1004 """Count the number of pack files.

1005

1006 Returns:

1007 Number of pack files (excluding those with .keep files)

1008 """

1009 count = 0

1010 for pack in self.packs:

1011 # Check if there's a .keep file for this pack

1012 keep_path = pack._basename + ".keep"

1013 if not os.path.exists(keep_path):

1014 count += 1

1015 return count

1016

1017 def _iter_alternate_objects(self) -> Iterator[ObjectID]:

1018 """Iterate over the SHAs of all the objects in alternate stores."""

1019 for alternate in self.alternates:

1020 yield from alternate

1021

1022 def _iter_loose_objects(self) -> Iterator[ObjectID]:

1023 """Iterate over the SHAs of all loose objects."""

1024 raise NotImplementedError(self._iter_loose_objects)

1025

1026 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None:

1027 raise NotImplementedError(self._get_loose_object)

1028

1029 def delete_loose_object(self, sha: ObjectID) -> None:

1030 """Delete a loose object.

1031

1032 This method only handles loose objects. For packed objects,

1033 use repack(exclude=...) to exclude them during repacking.

1034 """

1035 raise NotImplementedError(self.delete_loose_object)

1036

1037 def _remove_pack(self, pack: "Pack") -> None:

1038 raise NotImplementedError(self._remove_pack)

1039

1040 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int:

1041 """Pack loose objects.

1042

1043 Args:

1044 progress: Optional progress reporting callback

1045

1046 Returns: Number of objects packed

1047 """

1048 objects: list[tuple[ShaFile, None]] = []

1049 for sha in self._iter_loose_objects():

1050 obj = self._get_loose_object(sha)

1051 if obj is not None:

1052 objects.append((obj, None))

1053 self.add_objects(objects, progress=progress)

1054 for obj, path in objects:

1055 self.delete_loose_object(obj.id)

1056 return len(objects)

1057

1058 def repack(

1059 self,

1060 exclude: Set[bytes] | None = None,

1061 progress: Callable[[str], None] | None = None,

1062 ) -> int:

1063 """Repack the packs in this repository.

1064

1065 Note that this implementation is fairly naive and currently keeps all

1066 objects in memory while it repacks.

1067

1068 Args:

1069 exclude: Optional set of object SHAs to exclude from repacking

1070 progress: Optional progress reporting callback

1071 """

1072 if exclude is None:

1073 exclude = set()

1074

1075 loose_objects = set()

1076 excluded_loose_objects = set()

1077 for sha in self._iter_loose_objects():

1078 if sha not in exclude:

1079 obj = self._get_loose_object(sha)

1080 if obj is not None:

1081 loose_objects.add(obj)

1082 else:

1083 excluded_loose_objects.add(sha)

1084

1085 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects}

1086 old_packs = {p.name(): p for p in self.packs}

1087 for name, pack in old_packs.items():

1088 objects.update(

1089 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude

1090 )

1091

1092 # Only create a new pack if there are objects to pack

1093 if objects:

1094 # The name of the consolidated pack might match the name of a

1095 # pre-existing pack. Take care not to remove the newly created

1096 # consolidated pack.

1097 consolidated = self.add_objects(list(objects), progress=progress)

1098 if consolidated is not None:

1099 old_packs.pop(consolidated.name(), None)

1100

1101 # Delete loose objects that were packed

1102 for obj in loose_objects:

1103 if obj is not None:

1104 self.delete_loose_object(obj.id)

1105 # Delete excluded loose objects

1106 for sha in excluded_loose_objects:

1107 self.delete_loose_object(sha)

1108 for name, pack in old_packs.items():

1109 self._remove_pack(pack)

1110 self._update_pack_cache()

1111 return len(objects)

1112

1113 def generate_pack_bitmaps(

1114 self,

1115 refs: dict[Ref, ObjectID],

1116 *,

1117 commit_interval: int | None = None,

1118 progress: Callable[[str], None] | None = None,

1119 ) -> int:

1120 """Generate bitmap indexes for all packs that don't have them.

1121

1122 This generates .bitmap files for packfiles, enabling fast reachability

1123 queries. Equivalent to the bitmap generation part of 'git repack -b'.

1124

1125 Args:

1126 refs: Dictionary of ref names to commit SHAs

1127 commit_interval: Include every Nth commit in bitmap index (None for default)

1128 progress: Optional progress reporting callback

1129

1130 Returns:

1131 Number of bitmaps generated

1132 """

1133 count = 0

1134 for pack in self.packs:

1135 pack.ensure_bitmap(

1136 self, refs, commit_interval=commit_interval, progress=progress

1137 )

1138 count += 1

1139

1140 # Update cache to pick up new bitmaps

1141 self._update_pack_cache()

1142

1143 return count

1144

1145 def __iter__(self) -> Iterator[ObjectID]:

1146 """Iterate over the SHAs that are present in this store."""

1147 self._update_pack_cache()

1148 for pack in self._iter_cached_packs():

1149 try:

1150 yield from pack

1151 except PackFileDisappeared:

1152 pass

1153 yield from self._iter_loose_objects()

1154 yield from self._iter_alternate_objects()

1155

1156 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:

1157 """Check if a particular object is present by SHA1 and is loose.

1158

1159 This does not check alternates.

1160 """

1161 return self._get_loose_object(sha) is not None

1162

1163 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:

1164 """Obtain the raw fulltext for an object.

1165

1166 Args:

1167 name: sha for the object.

1168 Returns: tuple with numeric type and object contents.

1169 """

1170 if name == ZERO_SHA:

1171 raise KeyError(name)

1172 if len(name) == 40:

1173 sha = hex_to_sha(cast(ObjectID, name))

1174 hexsha = cast(ObjectID, name)

1175 elif len(name) == 20:

1176 sha = cast(RawObjectID, name)

1177 hexsha = None

1178 else:

1179 raise AssertionError(f"Invalid object name {name!r}")

1180 for pack in self._iter_cached_packs():

1181 try:

1182 return pack.get_raw(sha)

1183 except (KeyError, PackFileDisappeared):

1184 pass

1185 if hexsha is None:

1186 hexsha = sha_to_hex(sha)

1187 ret = self._get_loose_object(hexsha)

1188 if ret is not None:

1189 return ret.type_num, ret.as_raw_string()

1190 # Maybe something else has added a pack with the object

1191 # in the mean time?

1192 for pack in self._update_pack_cache():

1193 try:

1194 return pack.get_raw(sha)

1195 except KeyError:

1196 pass

1197 for alternate in self.alternates:

1198 try:

1199 return alternate.get_raw(hexsha)

1200 except KeyError:

1201 pass

1202 raise KeyError(hexsha)

1203

1204 def iter_unpacked_subset(

1205 self,

1206 shas: Iterable[ObjectID | RawObjectID],

1207 include_comp: bool = False,

1208 allow_missing: bool = False,

1209 convert_ofs_delta: bool = True,

1210 ) -> Iterator[UnpackedObject]:

1211 """Iterate over a subset of objects, yielding UnpackedObject instances.

1212

1213 Args:

1214 shas: Set of object SHAs to retrieve

1215 include_comp: Whether to include compressed data

1216 allow_missing: If True, skip missing objects; if False, raise KeyError

1217 convert_ofs_delta: Whether to convert OFS_DELTA objects

1218

1219 Returns:

1220 Iterator of UnpackedObject instances

1221

1222 Raises:

1223 KeyError: If an object is missing and allow_missing is False

1224 """

1225 todo: set[ObjectID | RawObjectID] = set(shas)

1226 for p in self._iter_cached_packs():

1227 for unpacked in p.iter_unpacked_subset(

1228 todo,

1229 include_comp=include_comp,

1230 allow_missing=True,

1231 convert_ofs_delta=convert_ofs_delta,

1232 ):

1233 yield unpacked

1234 hexsha = sha_to_hex(unpacked.sha())

1235 todo.remove(hexsha)

1236 # Maybe something else has added a pack with the object

1237 # in the mean time?

1238 for p in self._update_pack_cache():

1239 for unpacked in p.iter_unpacked_subset(

1240 todo,

1241 include_comp=include_comp,

1242 allow_missing=True,

1243 convert_ofs_delta=convert_ofs_delta,

1244 ):

1245 yield unpacked

1246 hexsha = sha_to_hex(unpacked.sha())

1247 todo.remove(hexsha)

1248 for alternate in self.alternates:

1249 assert isinstance(alternate, PackBasedObjectStore)

1250 for unpacked in alternate.iter_unpacked_subset(

1251 todo,

1252 include_comp=include_comp,

1253 allow_missing=True,

1254 convert_ofs_delta=convert_ofs_delta,

1255 ):

1256 yield unpacked

1257 hexsha = sha_to_hex(unpacked.sha())

1258 todo.remove(hexsha)

1259

1260 def iterobjects_subset(

1261 self, shas: Iterable[ObjectID], *, allow_missing: bool = False

1262 ) -> Iterator[ShaFile]:

1263 """Iterate over a subset of objects in the store.

1264

1265 This method searches for objects in pack files, alternates, and loose storage.

1266

1267 Args:

1268 shas: Iterable of object SHAs to retrieve

1269 allow_missing: If True, skip missing objects; if False, raise KeyError

1270

1271 Returns:

1272 Iterator of ShaFile objects

1273

1274 Raises:

1275 KeyError: If an object is missing and allow_missing is False

1276 """

1277 todo: set[ObjectID] = set(shas)

1278 for p in self._iter_cached_packs():

1279 for o in p.iterobjects_subset(todo, allow_missing=True):

1280 yield o

1281 todo.remove(o.id)

1282 # Maybe something else has added a pack with the object

1283 # in the mean time?

1284 for p in self._update_pack_cache():

1285 for o in p.iterobjects_subset(todo, allow_missing=True):

1286 yield o

1287 todo.remove(o.id)

1288 for alternate in self.alternates:

1289 for o in alternate.iterobjects_subset(todo, allow_missing=True):

1290 yield o

1291 todo.remove(o.id)

1292 for oid in todo:

1293 loose_obj: ShaFile | None = self._get_loose_object(oid)

1294 if loose_obj is not None:

1295 yield loose_obj

1296 elif not allow_missing:

1297 raise KeyError(oid)

1298

1299 def get_unpacked_object(

1300 self, sha1: bytes, *, include_comp: bool = False

1301 ) -> UnpackedObject:

1302 """Obtain the unpacked object.

1303

1304 Args:

1305 sha1: sha for the object.

1306 include_comp: Whether to include compression metadata.

1307 """

1308 if sha1 == ZERO_SHA:

1309 raise KeyError(sha1)

1310 if len(sha1) == 40:

1311 sha = hex_to_sha(cast(ObjectID, sha1))

1312 hexsha = cast(ObjectID, sha1)

1313 elif len(sha1) == 20:

1314 sha = cast(RawObjectID, sha1)

1315 hexsha = None

1316 else:

1317 raise AssertionError(f"Invalid object sha1 {sha1!r}")

1318 for pack in self._iter_cached_packs():

1319 try:

1320 return pack.get_unpacked_object(sha, include_comp=include_comp)

1321 except (KeyError, PackFileDisappeared):

1322 pass

1323 if hexsha is None:

1324 hexsha = sha_to_hex(sha)

1325 # Maybe something else has added a pack with the object

1326 # in the mean time?

1327 for pack in self._update_pack_cache():

1328 try:

1329 return pack.get_unpacked_object(sha, include_comp=include_comp)

1330 except KeyError:

1331 pass

1332 for alternate in self.alternates:

1333 assert isinstance(alternate, PackBasedObjectStore)

1334 try:

1335 return alternate.get_unpacked_object(hexsha, include_comp=include_comp)

1336 except KeyError:

1337 pass

1338 raise KeyError(hexsha)

1339

1340 def add_objects(

1341 self,

1342 objects: Sequence[tuple[ShaFile, str | None]],

1343 progress: Callable[[str], None] | None = None,

1344 ) -> "Pack | None":

1345 """Add a set of objects to this object store.

1346

1347 Args:

1348 objects: Iterable over (object, path) tuples, should support

1349 __len__.

1350 progress: Optional progress reporting function.

1351 Returns: Pack object of the objects written.

1352 """

1353 count = len(objects)

1354 record_iter = (full_unpacked_object(o) for (o, p) in objects)

1355 return self.add_pack_data(count, record_iter, progress=progress)

1356

1357

1358class DiskObjectStore(PackBasedObjectStore):

1359 """Git-style object store that exists on disk."""

1360

1361 path: str | os.PathLike[str]

1362 pack_dir: str | os.PathLike[str]

1363 _alternates: "list[BaseObjectStore] | None"

1364 _commit_graph: "CommitGraph | None"

1365

1366 def __init__(

1367 self,

1368 path: str | os.PathLike[str],

1369 *,

1370 loose_compression_level: int = -1,

1371 pack_compression_level: int = -1,

1372 pack_index_version: int | None = None,

1373 pack_delta_window_size: int | None = None,

1374 pack_window_memory: int | None = None,

1375 pack_delta_cache_size: int | None = None,

1376 pack_depth: int | None = None,

1377 pack_threads: int | None = None,

1378 pack_big_file_threshold: int | None = None,

1379 fsync_object_files: bool = False,

1380 pack_write_bitmaps: bool = False,

1381 pack_write_bitmap_hash_cache: bool = True,

1382 pack_write_bitmap_lookup_table: bool = True,

1383 file_mode: int | None = None,

1384 dir_mode: int | None = None,

1385 ) -> None:

1386 """Open an object store.

1387

1388 Args:

1389 path: Path of the object store.

1390 loose_compression_level: zlib compression level for loose objects

1391 pack_compression_level: zlib compression level for pack objects

1392 pack_index_version: pack index version to use (1, 2, or 3)

1393 pack_delta_window_size: sliding window size for delta compression

1394 pack_window_memory: memory limit for delta window operations

1395 pack_delta_cache_size: size of cache for delta operations

1396 pack_depth: maximum delta chain depth

1397 pack_threads: number of threads for pack operations

1398 pack_big_file_threshold: threshold for treating files as big

1399 fsync_object_files: whether to fsync object files for durability

1400 pack_write_bitmaps: whether to write bitmap indexes for packs

1401 pack_write_bitmap_hash_cache: whether to include name-hash cache in bitmaps

1402 pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps

1403 file_mode: File permission mask for shared repository

1404 dir_mode: Directory permission mask for shared repository

1405 """

1406 super().__init__(

1407 pack_compression_level=pack_compression_level,

1408 pack_index_version=pack_index_version,

1409 pack_delta_window_size=pack_delta_window_size,

1410 pack_window_memory=pack_window_memory,

1411 pack_delta_cache_size=pack_delta_cache_size,

1412 pack_depth=pack_depth,

1413 pack_threads=pack_threads,

1414 pack_big_file_threshold=pack_big_file_threshold,

1415 )

1416 self.path = path

1417 self.pack_dir = os.path.join(self.path, PACKDIR)

1418 self._alternates = None

1419 self.loose_compression_level = loose_compression_level

1420 self.pack_compression_level = pack_compression_level

1421 self.pack_index_version = pack_index_version

1422 self.fsync_object_files = fsync_object_files

1423 self.pack_write_bitmaps = pack_write_bitmaps

1424 self.pack_write_bitmap_hash_cache = pack_write_bitmap_hash_cache

1425 self.pack_write_bitmap_lookup_table = pack_write_bitmap_lookup_table

1426 self.file_mode = file_mode

1427 self.dir_mode = dir_mode

1428

1429 # Commit graph support - lazy loaded

1430 self._commit_graph = None

1431 self._use_commit_graph = True # Default to true

1432

1433 # Multi-pack-index support - lazy loaded

1434 self._midx: MultiPackIndex | None = None

1435 self._use_midx = True # Default to true

1436

1437 def __repr__(self) -> str:

1438 """Return string representation of DiskObjectStore.

1439

1440 Returns:

1441 String representation including the store path

1442 """

1443 return f"<{self.__class__.__name__}({self.path!r})>"

1444

1445 @classmethod

1446 def from_config(

1447 cls,

1448 path: str | os.PathLike[str],

1449 config: "Config",

1450 *,

1451 file_mode: int | None = None,

1452 dir_mode: int | None = None,

1453 ) -> "DiskObjectStore":

1454 """Create a DiskObjectStore from a configuration object.

1455

1456 Args:

1457 path: Path to the object store directory

1458 config: Configuration object to read settings from

1459 file_mode: Optional file permission mask for shared repository

1460 dir_mode: Optional directory permission mask for shared repository

1461

1462 Returns:

1463 New DiskObjectStore instance configured according to config

1464 """

1465 try:

1466 default_compression_level = int(

1467 config.get((b"core",), b"compression").decode()

1468 )

1469 except KeyError:

1470 default_compression_level = -1

1471 try:

1472 loose_compression_level = int(

1473 config.get((b"core",), b"looseCompression").decode()

1474 )

1475 except KeyError:

1476 loose_compression_level = default_compression_level

1477 try:

1478 pack_compression_level = int(

1479 config.get((b"core",), "packCompression").decode()

1480 )

1481 except KeyError:

1482 pack_compression_level = default_compression_level

1483 try:

1484 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode())

1485 except KeyError:

1486 pack_index_version = None

1487

1488 # Read pack configuration options

1489 try:

1490 pack_delta_window_size = int(

1491 config.get((b"pack",), b"deltaWindowSize").decode()

1492 )

1493 except KeyError:

1494 pack_delta_window_size = None

1495 try:

1496 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode())

1497 except KeyError:

1498 pack_window_memory = None

1499 try:

1500 pack_delta_cache_size = int(

1501 config.get((b"pack",), b"deltaCacheSize").decode()

1502 )

1503 except KeyError:

1504 pack_delta_cache_size = None

1505 try:

1506 pack_depth = int(config.get((b"pack",), b"depth").decode())

1507 except KeyError:

1508 pack_depth = None

1509 try:

1510 pack_threads = int(config.get((b"pack",), b"threads").decode())

1511 except KeyError:

1512 pack_threads = None

1513 try:

1514 pack_big_file_threshold = int(

1515 config.get((b"pack",), b"bigFileThreshold").decode()

1516 )

1517 except KeyError:

1518 pack_big_file_threshold = None

1519

1520 # Read core.commitGraph setting

1521 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)

1522

1523 # Read core.multiPackIndex setting

1524 use_midx = config.get_boolean((b"core",), b"multiPackIndex", True)

1525

1526 # Read core.fsyncObjectFiles setting

1527 fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False)

1528

1529 # Read bitmap settings

1530 pack_write_bitmaps = config.get_boolean((b"pack",), b"writeBitmaps", False)

1531 pack_write_bitmap_hash_cache = config.get_boolean(

1532 (b"pack",), b"writeBitmapHashCache", True

1533 )

1534 pack_write_bitmap_lookup_table = config.get_boolean(

1535 (b"pack",), b"writeBitmapLookupTable", True

1536 )

1537 # Also check repack.writeBitmaps for backwards compatibility

1538 if not pack_write_bitmaps:

1539 pack_write_bitmaps = config.get_boolean(

1540 (b"repack",), b"writeBitmaps", False

1541 )

1542

1543 instance = cls(

1544 path,

1545 loose_compression_level=loose_compression_level,

1546 pack_compression_level=pack_compression_level,

1547 pack_index_version=pack_index_version,

1548 pack_delta_window_size=pack_delta_window_size,

1549 pack_window_memory=pack_window_memory,

1550 pack_delta_cache_size=pack_delta_cache_size,

1551 pack_depth=pack_depth,

1552 pack_threads=pack_threads,

1553 pack_big_file_threshold=pack_big_file_threshold,

1554 fsync_object_files=fsync_object_files,

1555 pack_write_bitmaps=pack_write_bitmaps,

1556 pack_write_bitmap_hash_cache=pack_write_bitmap_hash_cache,

1557 pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table,

1558 file_mode=file_mode,

1559 dir_mode=dir_mode,

1560 )

1561 instance._use_commit_graph = use_commit_graph

1562 instance._use_midx = use_midx

1563 return instance

1564

1565 @property

1566 def alternates(self) -> list["BaseObjectStore"]:

1567 """Get the list of alternate object stores.

1568

1569 Reads from .git/objects/info/alternates if not already cached.

1570

1571 Returns:

1572 List of DiskObjectStore instances for alternate object directories

1573 """

1574 if self._alternates is not None:

1575 return self._alternates

1576 self._alternates = []

1577 for path in self._read_alternate_paths():

1578 self._alternates.append(DiskObjectStore(path))

1579 return self._alternates

1580

1581 def _read_alternate_paths(self) -> Iterator[str]:

1582 try:

1583 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb")

1584 except FileNotFoundError:

1585 return

1586 with f:

1587 for line in f.readlines():

1588 line = line.rstrip(b"\n")

1589 if line.startswith(b"#"):

1590 continue

1591 if os.path.isabs(line):

1592 yield os.fsdecode(line)

1593 else:

1594 yield os.fsdecode(os.path.join(os.fsencode(self.path), line))

1595

1596 def add_alternate_path(self, path: str | os.PathLike[str]) -> None:

1597 """Add an alternate path to this object store."""

1598 info_dir = os.path.join(self.path, INFODIR)

1599 try:

1600 os.mkdir(info_dir)

1601 if self.dir_mode is not None:

1602 os.chmod(info_dir, self.dir_mode)

1603 except FileExistsError:

1604 pass

1605 alternates_path = os.path.join(self.path, INFODIR, "alternates")

1606 mask = self.file_mode if self.file_mode is not None else 0o644

1607 with GitFile(alternates_path, "wb", mask=mask) as f:

1608 try:

1609 orig_f = open(alternates_path, "rb")

1610 except FileNotFoundError:

1611 pass

1612 else:

1613 with orig_f:

1614 f.write(orig_f.read())

1615 f.write(os.fsencode(path) + b"\n")

1616

1617 if not os.path.isabs(path):

1618 path = os.path.join(self.path, path)

1619 self.alternates.append(DiskObjectStore(path))

1620

1621 def _update_pack_cache(self) -> list[Pack]:

1622 """Read and iterate over new pack files and cache them."""

1623 try:

1624 pack_dir_contents = os.listdir(self.pack_dir)

1625 except FileNotFoundError:

1626 self.close()

1627 return []

1628 pack_files = set()

1629 for name in pack_dir_contents:

1630 if name.startswith("pack-") and name.endswith(".pack"):

1631 # verify that idx exists first (otherwise the pack was not yet

1632 # fully written)

1633 idx_name = os.path.splitext(name)[0] + ".idx"

1634 if idx_name in pack_dir_contents:

1635 pack_name = name[: -len(".pack")]

1636 pack_files.add(pack_name)

1637

1638 # Open newly appeared pack files

1639 new_packs = []

1640 for f in pack_files:

1641 if f not in self._pack_cache:

1642 pack = Pack(

1643 os.path.join(self.pack_dir, f),

1644 delta_window_size=self.pack_delta_window_size,

1645 window_memory=self.pack_window_memory,

1646 delta_cache_size=self.pack_delta_cache_size,

1647 depth=self.pack_depth,

1648 threads=self.pack_threads,

1649 big_file_threshold=self.pack_big_file_threshold,

1650 )

1651 new_packs.append(pack)

1652 self._pack_cache[f] = pack

1653 # Remove disappeared pack files

1654 for f in set(self._pack_cache) - pack_files:

1655 self._pack_cache.pop(f).close()

1656 return new_packs

1657

1658 def _get_shafile_path(self, sha: ObjectID | RawObjectID) -> str:

1659 # Check from object dir

1660 return hex_to_filename(os.fspath(self.path), sha)

1661

1662 def _iter_loose_objects(self) -> Iterator[ObjectID]:

1663 for base in os.listdir(self.path):

1664 if len(base) != 2:

1665 continue

1666 for rest in os.listdir(os.path.join(self.path, base)):

1667 sha = os.fsencode(base + rest)

1668 if not valid_hexsha(sha):

1669 continue

1670 yield ObjectID(sha)

1671

1672 def count_loose_objects(self) -> int:

1673 """Count the number of loose objects in the object store.

1674

1675 Returns:

1676 Number of loose objects

1677 """

1678 count = 0

1679 if not os.path.exists(self.path):

1680 return 0

1681

1682 for i in range(256):

1683 subdir = os.path.join(self.path, f"{i:02x}")

1684 try:

1685 count += len(

1686 [

1687 name

1688 for name in os.listdir(subdir)

1689 if len(name) == 38 # 40 - 2 for the prefix

1690 ]

1691 )

1692 except FileNotFoundError:

1693 # Directory may have been removed or is inaccessible

1694 continue

1695

1696 return count

1697

1698 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None:

1699 path = self._get_shafile_path(sha)

1700 try:

1701 return ShaFile.from_path(path)

1702 except FileNotFoundError:

1703 return None

1704

1705 def delete_loose_object(self, sha: ObjectID) -> None:

1706 """Delete a loose object from disk.

1707

1708 Args:

1709 sha: SHA1 of the object to delete

1710

1711 Raises:

1712 FileNotFoundError: If the object file doesn't exist

1713 """

1714 os.remove(self._get_shafile_path(sha))

1715

1716 def get_object_mtime(self, sha: ObjectID) -> float:

1717 """Get the modification time of an object.

1718

1719 Args:

1720 sha: SHA1 of the object

1721

1722 Returns:

1723 Modification time as seconds since epoch

1724

1725 Raises:

1726 KeyError: if the object is not found

1727 """

1728 # First check if it's a loose object

1729 if self.contains_loose(sha):

1730 path = self._get_shafile_path(sha)

1731 try:

1732 return os.path.getmtime(path)

1733 except FileNotFoundError:

1734 pass

1735

1736 # Check if it's in a pack file

1737 for pack in self.packs:

1738 try:

1739 if sha in pack:

1740 # Use the pack file's mtime for packed objects

1741 pack_path = pack._data_path

1742 try:

1743 return os.path.getmtime(pack_path)

1744 except (FileNotFoundError, AttributeError):

1745 pass

1746 except PackFileDisappeared:

1747 pass

1748

1749 raise KeyError(sha)

1750

1751 def _remove_pack(self, pack: Pack) -> None:

1752 try:

1753 del self._pack_cache[os.path.basename(pack._basename)]

1754 except KeyError:

1755 pass

1756 pack.close()

1757 os.remove(pack.data.path)

1758 if hasattr(pack.index, "path"):

1759 os.remove(pack.index.path)

1760

1761 def _get_pack_basepath(

1762 self, entries: Iterable[tuple[bytes, int, int | None]]

1763 ) -> str:

1764 suffix_bytes = iter_sha1(entry[0] for entry in entries)

1765 # TODO: Handle self.pack_dir being bytes

1766 suffix = suffix_bytes.decode("ascii")

1767 return os.path.join(self.pack_dir, "pack-" + suffix)

1768

1769 def _complete_pack(

1770 self,

1771 f: BinaryIO,

1772 path: str,

1773 num_objects: int,

1774 indexer: PackIndexer,

1775 progress: Callable[..., None] | None = None,

1776 refs: dict[Ref, ObjectID] | None = None,

1777 ) -> Pack:

1778 """Move a specific file containing a pack into the pack directory.

1779

1780 Note: The file should be on the same file system as the

1781 packs directory.

1782

1783 Args:

1784 f: Open file object for the pack.

1785 path: Path to the pack file.

1786 num_objects: Number of objects in the pack.

1787 indexer: A PackIndexer for indexing the pack.

1788 progress: Optional progress reporting function.

1789 refs: Optional dictionary of refs for bitmap generation.

1790 """

1791 entries = []

1792 for i, entry in enumerate(indexer):

1793 if progress is not None:

1794 progress(f"generating index: {i}/{num_objects}\r".encode("ascii"))

1795 entries.append(entry)

1796

1797 pack_sha, extra_entries = extend_pack(

1798 f,

1799 set(indexer.ext_refs()),

1800 get_raw=self.get_raw,

1801 compression_level=self.pack_compression_level,

1802 progress=progress,

1803 )

1804 f.flush()

1805 if self.fsync_object_files:

1806 try:

1807 fileno = f.fileno()

1808 except AttributeError as e:

1809 raise OSError("fsync requested but file has no fileno()") from e

1810 else:

1811 os.fsync(fileno)

1812 f.close()

1813

1814 entries.extend(extra_entries)

1815

1816 # Move the pack in.

1817 entries.sort()

1818 pack_base_name = self._get_pack_basepath(entries)

1819

1820 for pack in self.packs:

1821 if pack._basename == pack_base_name:

1822 return pack

1823

1824 target_pack_path = pack_base_name + ".pack"

1825 target_index_path = pack_base_name + ".idx"

1826 if sys.platform == "win32":

1827 # Windows might have the target pack file lingering. Attempt

1828 # removal, silently passing if the target does not exist.

1829 with suppress(FileNotFoundError):

1830 os.remove(target_pack_path)

1831 os.rename(path, target_pack_path)

1832

1833 # Write the index.

1834 mask = self.file_mode if self.file_mode is not None else PACK_MODE

1835 with GitFile(

1836 target_index_path,

1837 "wb",

1838 mask=mask,

1839 fsync=self.fsync_object_files,

1840 ) as index_file:

1841 write_pack_index(

1842 index_file, entries, pack_sha, version=self.pack_index_version

1843 )

1844

1845 # Generate bitmap if configured and refs are available

1846 if self.pack_write_bitmaps and refs:

1847 from .bitmap import generate_bitmap, write_bitmap

1848 from .pack import load_pack_index_file

1849

1850 if progress:

1851 progress("Generating bitmap index\r".encode("ascii"))

1852

1853 # Load the index we just wrote

1854 with open(target_index_path, "rb") as idx_file:

1855 pack_index = load_pack_index_file(

1856 os.path.basename(target_index_path), idx_file

1857 )

1858

1859 # Generate the bitmap

1860 bitmap = generate_bitmap(

1861 pack_index=pack_index,

1862 object_store=self,

1863 refs=refs,

1864 pack_checksum=pack_sha,

1865 include_hash_cache=self.pack_write_bitmap_hash_cache,

1866 include_lookup_table=self.pack_write_bitmap_lookup_table,

1867 progress=lambda msg: progress(msg.encode("ascii"))

1868 if progress and isinstance(msg, str)

1869 else None,

1870 )

1871

1872 # Write the bitmap

1873 target_bitmap_path = pack_base_name + ".bitmap"

1874 write_bitmap(target_bitmap_path, bitmap)

1875

1876 if progress:

1877 progress("Bitmap index written\r".encode("ascii"))

1878

1879 # Add the pack to the store and return it.

1880 final_pack = Pack(

1881 pack_base_name,

1882 delta_window_size=self.pack_delta_window_size,

1883 window_memory=self.pack_window_memory,

1884 delta_cache_size=self.pack_delta_cache_size,

1885 depth=self.pack_depth,

1886 threads=self.pack_threads,

1887 big_file_threshold=self.pack_big_file_threshold,

1888 )

1889 final_pack.check_length_and_checksum()

1890 self._add_cached_pack(pack_base_name, final_pack)

1891 return final_pack

1892

1893 def add_thin_pack(

1894 self,

1895 read_all: Callable[[int], bytes],

1896 read_some: Callable[[int], bytes] | None,

1897 progress: Callable[..., None] | None = None,

1898 ) -> "Pack":

1899 """Add a new thin pack to this object store.

1900

1901 Thin packs are packs that contain deltas with parents that exist

1902 outside the pack. They should never be placed in the object store

1903 directly, and always indexed and completed as they are copied.

1904

1905 Args:

1906 read_all: Read function that blocks until the number of

1907 requested bytes are read.

1908 read_some: Read function that returns at least one byte, but may

1909 not return the number of bytes requested.

1910 progress: Optional progress reporting function.

1911 Returns: A Pack object pointing at the now-completed thin pack in the

1912 objects/pack directory.

1913 """

1914 import tempfile

1915

1916 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_")

1917 with os.fdopen(fd, "w+b") as f:

1918 os.chmod(path, PACK_MODE)

1919 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) # type: ignore[arg-type]

1920 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) # type: ignore[arg-type]

1921 copier.verify(progress=progress)

1922 return self._complete_pack(f, path, len(copier), indexer, progress=progress)

1923

1924 def add_pack(

1925 self,

1926 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:

1927 """Add a new pack to this object store.

1928

1929 Returns: Fileobject to write to, a commit function to

1930 call when the pack is finished and an abort

1931 function.

1932 """

1933 import tempfile

1934

1935 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")

1936 f = os.fdopen(fd, "w+b")

1937 mask = self.file_mode if self.file_mode is not None else PACK_MODE

1938 os.chmod(path, mask)

1939

1940 def commit() -> "Pack | None":

1941 if f.tell() > 0:

1942 f.seek(0)

1943

1944 with PackData(path, f) as pd:

1945 indexer = PackIndexer.for_pack_data(

1946 pd,

1947 resolve_ext_ref=self.get_raw, # type: ignore[arg-type]

1948 )

1949 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type]

1950 else:

1951 f.close()

1952 os.remove(path)

1953 return None

1954

1955 def abort() -> None:

1956 f.close()

1957 os.remove(path)

1958

1959 return f, commit, abort # type: ignore[return-value]

1960

1961 def add_object(self, obj: ShaFile) -> None:

1962 """Add a single object to this object store.

1963

1964 Args:

1965 obj: Object to add

1966 """

1967 path = self._get_shafile_path(obj.id)

1968 dir = os.path.dirname(path)

1969 try:

1970 os.mkdir(dir)

1971 if self.dir_mode is not None:

1972 os.chmod(dir, self.dir_mode)

1973 except FileExistsError:

1974 pass

1975 if os.path.exists(path):

1976 return # Already there, no need to write again

1977 mask = self.file_mode if self.file_mode is not None else PACK_MODE

1978 with GitFile(path, "wb", mask=mask, fsync=self.fsync_object_files) as f:

1979 f.write(

1980 obj.as_legacy_object(compression_level=self.loose_compression_level)

1981 )

1982

1983 @classmethod

1984 def init(

1985 cls,

1986 path: str | os.PathLike[str],

1987 *,

1988 file_mode: int | None = None,

1989 dir_mode: int | None = None,

1990 ) -> "DiskObjectStore":

1991 """Initialize a new disk object store.

1992

1993 Creates the necessary directory structure for a Git object store.

1994

1995 Args:

1996 path: Path where the object store should be created

1997 file_mode: Optional file permission mask for shared repository

1998 dir_mode: Optional directory permission mask for shared repository

1999

2000 Returns:

2001 New DiskObjectStore instance

2002 """

2003 try:

2004 os.mkdir(path)

2005 if dir_mode is not None:

2006 os.chmod(path, dir_mode)

2007 except FileExistsError:

2008 pass

2009 info_path = os.path.join(path, "info")

2010 pack_path = os.path.join(path, PACKDIR)

2011 os.mkdir(info_path)

2012 os.mkdir(pack_path)

2013 if dir_mode is not None:

2014 os.chmod(info_path, dir_mode)

2015 os.chmod(pack_path, dir_mode)

2016 return cls(path, file_mode=file_mode, dir_mode=dir_mode)

2017

2018 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:

2019 """Iterate over all object SHAs with the given prefix.

2020

2021 Args:

2022 prefix: Hex prefix to search for (as bytes)

2023

2024 Returns:

2025 Iterator of object SHAs (as ObjectID) matching the prefix

2026 """

2027 if len(prefix) < 2:

2028 yield from super().iter_prefix(prefix)

2029 return

2030 seen = set()

2031 dir = prefix[:2].decode()

2032 rest = prefix[2:].decode()

2033 try:

2034 for name in os.listdir(os.path.join(self.path, dir)):

2035 if name.startswith(rest):

2036 sha = ObjectID(os.fsencode(dir + name))

2037 if sha not in seen:

2038 seen.add(sha)

2039 yield sha

2040 except FileNotFoundError:

2041 pass

2042

2043 for p in self.packs:

2044 bin_prefix = (

2045 binascii.unhexlify(prefix)

2046 if len(prefix) % 2 == 0

2047 else binascii.unhexlify(prefix[:-1])

2048 )

2049 for bin_sha in p.index.iter_prefix(bin_prefix):

2050 sha = sha_to_hex(bin_sha)

2051 if sha.startswith(prefix) and sha not in seen:

2052 seen.add(sha)

2053 yield sha

2054 for alternate in self.alternates:

2055 for sha in alternate.iter_prefix(prefix):

2056 if sha not in seen:

2057 seen.add(sha)

2058 yield sha

2059

2060 def get_commit_graph(self) -> "CommitGraph | None":

2061 """Get the commit graph for this object store.

2062

2063 Returns:

2064 CommitGraph object if available, None otherwise

2065 """

2066 if not self._use_commit_graph:

2067 return None

2068

2069 if self._commit_graph is None:

2070 from .commit_graph import read_commit_graph

2071

2072 # Look for commit graph in our objects directory

2073 graph_file = os.path.join(self.path, "info", "commit-graph")

2074 if os.path.exists(graph_file):

2075 self._commit_graph = read_commit_graph(graph_file)

2076 return self._commit_graph

2077

2078 def get_midx(self) -> MultiPackIndex | None:

2079 """Get the multi-pack-index for this object store.

2080

2081 Returns:

2082 MultiPackIndex object if available, None otherwise

2083

2084 Raises:

2085 ValueError: If MIDX file is corrupt

2086 OSError: If MIDX file cannot be read

2087 """

2088 if not self._use_midx:

2089 return None

2090

2091 if self._midx is None:

2092 # Look for MIDX in pack directory

2093 midx_file = os.path.join(self.pack_dir, "multi-pack-index")

2094 if os.path.exists(midx_file):

2095 self._midx = load_midx(midx_file)

2096 return self._midx

2097

2098 def _get_pack_by_name(self, pack_name: str) -> Pack:

2099 """Get a pack by its base name.

2100

2101 Args:

2102 pack_name: Base name of the pack (e.g., 'pack-abc123.pack' or 'pack-abc123.idx')

2103

2104 Returns:

2105 Pack object

2106

2107 Raises:

2108 KeyError: If pack doesn't exist

2109 """

2110 # Remove .pack or .idx extension if present

2111 if pack_name.endswith(".pack"):

2112 base_name = pack_name[:-5]

2113 elif pack_name.endswith(".idx"):

2114 base_name = pack_name[:-4]

2115 else:

2116 base_name = pack_name

2117

2118 # Check if already in cache

2119 if base_name in self._pack_cache:

2120 return self._pack_cache[base_name]

2121

2122 # Load the pack

2123 pack_path = os.path.join(self.pack_dir, base_name)

2124 if not os.path.exists(pack_path + ".pack"):

2125 raise KeyError(f"Pack {pack_name} not found")

2126

2127 pack = Pack(

2128 pack_path,

2129 delta_window_size=self.pack_delta_window_size,

2130 window_memory=self.pack_window_memory,

2131 delta_cache_size=self.pack_delta_cache_size,

2132 depth=self.pack_depth,

2133 threads=self.pack_threads,

2134 big_file_threshold=self.pack_big_file_threshold,

2135 )

2136 self._pack_cache[base_name] = pack

2137 return pack

2138

2139 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:

2140 """Check if a particular object is present by SHA1 and is packed.

2141

2142 This checks the MIDX first if available, then falls back to checking

2143 individual pack indexes.

2144

2145 Args:

2146 sha: Binary SHA of the object

2147

2148 Returns:

2149 True if the object is in a pack file

2150 """

2151 # Check MIDX first for faster lookup

2152 midx = self.get_midx()

2153 if midx is not None and sha in midx:

2154 return True

2155

2156 # Fall back to checking individual packs

2157 return super().contains_packed(sha)

2158

2159 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:

2160 """Obtain the raw fulltext for an object.

2161

2162 This uses the MIDX if available for faster lookups.

2163

2164 Args:

2165 name: SHA for the object (20 bytes binary or 40 bytes hex)

2166

2167 Returns:

2168 Tuple with numeric type and object contents

2169

2170 Raises:

2171 KeyError: If object not found

2172 """

2173 if name == ZERO_SHA:

2174 raise KeyError(name)

2175

2176 sha: RawObjectID

2177 if len(name) == 40:

2178 # name is ObjectID (hex), convert to RawObjectID

2179 sha = hex_to_sha(cast(ObjectID, name))

2180 elif len(name) == 20:

2181 # name is already RawObjectID (binary)

2182 sha = RawObjectID(name)

2183 else:

2184 raise AssertionError(f"Invalid object name {name!r}")

2185

2186 # Try MIDX first for faster lookup

2187 midx = self.get_midx()

2188 if midx is not None:

2189 result = midx.object_offset(sha)

2190 if result is not None:

2191 pack_name, _offset = result

2192 try:

2193 pack = self._get_pack_by_name(pack_name)

2194 return pack.get_raw(sha)

2195 except (KeyError, PackFileDisappeared):

2196 # Pack disappeared or object not found, fall through to standard lookup

2197 pass

2198

2199 # Fall back to the standard implementation

2200 return super().get_raw(name)

2201

2202 def write_midx(self) -> bytes:

2203 """Write a multi-pack-index file for this object store.

2204

2205 Creates a MIDX file that indexes all pack files in the pack directory.

2206

2207 Returns:

2208 SHA-1 checksum of the written MIDX file

2209

2210 Raises:

2211 OSError: If the pack directory doesn't exist or MIDX can't be written

2212 """

2213 from .midx import write_midx_file

2214

2215 # Get all pack files

2216 packs = self.packs

2217 if not packs:

2218 # No packs to index

2219 return b"\x00" * 20

2220

2221 # Collect entries from all packs

2222 pack_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]] = []

2223

2224 for pack in packs:

2225 # Git stores .idx extension in MIDX, not .pack

2226 pack_name = os.path.basename(pack._basename) + ".idx"

2227 entries = list(pack.index.iterentries())

2228 pack_entries.append((pack_name, entries))

2229

2230 # Write MIDX file

2231 midx_path = os.path.join(self.pack_dir, "multi-pack-index")

2232 return write_midx_file(midx_path, pack_entries)

2233

2234 def write_commit_graph(

2235 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True

2236 ) -> None:

2237 """Write a commit graph file for this object store.

2238

2239 Args:

2240 refs: List of refs to include. If None, includes all refs from object store.

2241 reachable: If True, includes all commits reachable from refs.

2242 If False, only includes the direct ref targets.

2243 """

2244 from .commit_graph import get_reachable_commits

2245

2246 if refs is None:

2247 # Get all commit objects from the object store

2248 all_refs = []

2249 # Iterate through all objects to find commits

2250 for sha in self:

2251 try:

2252 obj = self[sha]

2253 if obj.type_name == b"commit":

2254 all_refs.append(sha)

2255 except KeyError:

2256 continue

2257 else:

2258 # Use provided refs

2259 all_refs = list(refs)

2260

2261 if not all_refs:

2262 return # No commits to include

2263

2264 if reachable:

2265 # Get all reachable commits

2266 commit_ids = get_reachable_commits(self, all_refs)

2267 else:

2268 # Just use the direct ref targets (already ObjectIDs)

2269 commit_ids = all_refs

2270

2271 if commit_ids:

2272 # Write commit graph directly to our object store path

2273 # Generate the commit graph

2274 from .commit_graph import generate_commit_graph

2275

2276 graph = generate_commit_graph(self, commit_ids)

2277

2278 if graph.entries:

2279 # Ensure the info directory exists

2280 info_dir = os.path.join(self.path, "info")

2281 os.makedirs(info_dir, exist_ok=True)

2282 if self.dir_mode is not None:

2283 os.chmod(info_dir, self.dir_mode)

2284

2285 # Write using GitFile for atomic operation

2286 graph_path = os.path.join(info_dir, "commit-graph")

2287 mask = self.file_mode if self.file_mode is not None else 0o644

2288 with GitFile(graph_path, "wb", mask=mask) as f:

2289 assert isinstance(

2290 f, _GitFile

2291 ) # GitFile in write mode always returns _GitFile

2292 graph.write_to_file(f)

2293

2294 # Clear cached commit graph so it gets reloaded

2295 self._commit_graph = None

2296

2297 def prune(self, grace_period: int | None = None) -> None:

2298 """Prune/clean up this object store.

2299

2300 This removes temporary files that were left behind by interrupted

2301 pack operations. These are files that start with ``tmp_pack_`` in the

2302 repository directory or files with .pack extension but no corresponding

2303 .idx file in the pack directory.

2304

2305 Args:

2306 grace_period: Grace period in seconds for removing temporary files.

2307 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD.

2308 """

2309 import glob

2310

2311 if grace_period is None:

2312 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD

2313

2314 # Clean up tmp_pack_* files in the repository directory

2315 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")):

2316 # Check if file is old enough (more than grace period)

2317 mtime = os.path.getmtime(tmp_file)

2318 if time.time() - mtime > grace_period:

2319 os.remove(tmp_file)

2320

2321 # Clean up orphaned .pack files without corresponding .idx files

2322 try:

2323 pack_dir_contents = os.listdir(self.pack_dir)

2324 except FileNotFoundError:

2325 return

2326

2327 pack_files = {}

2328 idx_files = set()

2329

2330 for name in pack_dir_contents:

2331 if name.endswith(".pack"):

2332 base_name = name[:-5] # Remove .pack extension

2333 pack_files[base_name] = name

2334 elif name.endswith(".idx"):

2335 base_name = name[:-4] # Remove .idx extension

2336 idx_files.add(base_name)

2337

2338 # Remove .pack files without corresponding .idx files

2339 for base_name, pack_name in pack_files.items():

2340 if base_name not in idx_files:

2341 pack_path = os.path.join(self.pack_dir, pack_name)

2342 # Check if file is old enough (more than grace period)

2343 mtime = os.path.getmtime(pack_path)

2344 if time.time() - mtime > grace_period:

2345 os.remove(pack_path)

2346

2347 def close(self) -> None:

2348 """Close the object store and release resources.

2349

2350 This method closes all cached pack files, MIDX, and frees associated resources.

2351 """

2352 # Close MIDX if it's loaded

2353 if self._midx is not None:

2354 self._midx.close()

2355 self._midx = None

2356

2357 # Close alternates

2358 if self._alternates is not None:

2359 for alt in self._alternates:

2360 alt.close()

2361 self._alternates = None

2362

2363 # Call parent class close to handle pack files

2364 super().close()

2365

2366

2367class MemoryObjectStore(PackCapableObjectStore):

2368 """Object store that keeps all objects in memory."""

2369

2370 def __init__(self) -> None:

2371 """Initialize a MemoryObjectStore.

2372

2373 Creates an empty in-memory object store.

2374 """

2375 super().__init__()

2376 self._data: dict[ObjectID, ShaFile] = {}

2377 self.pack_compression_level = -1

2378

2379 def _to_hexsha(self, sha: ObjectID | RawObjectID) -> ObjectID:

2380 if len(sha) == 40:

2381 return cast(ObjectID, sha)

2382 elif len(sha) == 20:

2383 return sha_to_hex(cast(RawObjectID, sha))

2384 else:

2385 raise ValueError(f"Invalid sha {sha!r}")

2386

2387 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:

2388 """Check if a particular object is present by SHA1 and is loose."""

2389 return self._to_hexsha(sha) in self._data

2390

2391 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:

2392 """Check if a particular object is present by SHA1 and is packed."""

2393 return False

2394

2395 def __iter__(self) -> Iterator[ObjectID]:

2396 """Iterate over the SHAs that are present in this store."""

2397 return iter(self._data.keys())

2398

2399 @property

2400 def packs(self) -> list[Pack]:

2401 """List with pack objects."""

2402 return []

2403

2404 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:

2405 """Obtain the raw text for an object.

2406

2407 Args:

2408 name: sha for the object.

2409 Returns: tuple with numeric type and object contents.

2410 """

2411 obj = self[self._to_hexsha(name)]

2412 return obj.type_num, obj.as_raw_string()

2413

2414 def __getitem__(self, name: ObjectID | RawObjectID) -> ShaFile:

2415 """Retrieve an object by SHA.

2416

2417 Args:

2418 name: SHA of the object (as hex string or bytes)

2419

2420 Returns:

2421 Copy of the ShaFile object

2422

2423 Raises:

2424 KeyError: If the object is not found

2425 """

2426 return self._data[self._to_hexsha(name)].copy()

2427

2428 def __delitem__(self, name: ObjectID) -> None:

2429 """Delete an object from this store, for testing only."""

2430 del self._data[self._to_hexsha(name)]

2431

2432 def add_object(self, obj: ShaFile) -> None:

2433 """Add a single object to this object store."""

2434 self._data[obj.id] = obj.copy()

2435

2436 def add_objects(

2437 self,

2438 objects: Iterable[tuple[ShaFile, str | None]],

2439 progress: Callable[[str], None] | None = None,

2440 ) -> None:

2441 """Add a set of objects to this object store.

2442

2443 Args:

2444 objects: Iterable over a list of (object, path) tuples

2445 progress: Optional progress reporting function.

2446 """

2447 for obj, path in objects:

2448 self.add_object(obj)

2449

2450 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:

2451 """Add a new pack to this object store.

2452

2453 Because this object store doesn't support packs, we extract and add the

2454 individual objects.

2455

2456 Returns: Fileobject to write to and a commit function to

2457 call when the pack is finished.

2458 """

2459 from tempfile import SpooledTemporaryFile

2460

2461 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-")

2462

2463 def commit() -> None:

2464 size = f.tell()

2465 if size > 0:

2466 f.seek(0)

2467

2468 p = PackData.from_file(f, size)

2469 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type]

2470 self.add_object(obj)

2471 p.close()

2472 f.close()

2473 else:

2474 f.close()

2475

2476 def abort() -> None:

2477 f.close()

2478

2479 return f, commit, abort # type: ignore[return-value]

2480

2481 def add_pack_data(

2482 self,

2483 count: int,

2484 unpacked_objects: Iterator[UnpackedObject],

2485 progress: Callable[[str], None] | None = None,

2486 ) -> None:

2487 """Add pack data to this object store.

2488

2489 Args:

2490 count: Number of items to add

2491 unpacked_objects: Iterator of UnpackedObject instances

2492 progress: Optional progress reporting function.

2493 """

2494 if count == 0:

2495 return

2496

2497 # Since MemoryObjectStore doesn't support pack files, we need to

2498 # extract individual objects. To handle deltas properly, we write

2499 # to a temporary pack and then use PackInflater to resolve them.

2500 f, commit, abort = self.add_pack()

2501 try:

2502 write_pack_data(

2503 f.write,

2504 unpacked_objects,

2505 num_records=count,

2506 progress=progress,

2507 )

2508 except BaseException:

2509 abort()

2510 raise

2511 else:

2512 commit()

2513

2514 def add_thin_pack(

2515 self,

2516 read_all: Callable[[], bytes],

2517 read_some: Callable[[int], bytes],

2518 progress: Callable[[str], None] | None = None,

2519 ) -> None:

2520 """Add a new thin pack to this object store.

2521

2522 Thin packs are packs that contain deltas with parents that exist

2523 outside the pack. Because this object store doesn't support packs, we

2524 extract and add the individual objects.

2525

2526 Args:

2527 read_all: Read function that blocks until the number of

2528 requested bytes are read.

2529 read_some: Read function that returns at least one byte, but may

2530 not return the number of bytes requested.

2531 progress: Optional progress reporting function.

2532 """

2533 f, commit, abort = self.add_pack()

2534 try:

2535 copier = PackStreamCopier(read_all, read_some, f) # type: ignore[arg-type]

2536 copier.verify()

2537 except BaseException:

2538 abort()

2539 raise

2540 else:

2541 commit()

2542

2543

2544class ObjectIterator(Protocol):

2545 """Interface for iterating over objects."""

2546

2547 def iterobjects(self) -> Iterator[ShaFile]:

2548 """Iterate over all objects.

2549

2550 Returns:

2551 Iterator of ShaFile objects

2552 """

2553 raise NotImplementedError(self.iterobjects)

2554

2555

2556def tree_lookup_path(

2557 lookup_obj: Callable[[ObjectID | RawObjectID], ShaFile],

2558 root_sha: ObjectID | RawObjectID,

2559 path: bytes,

2560) -> tuple[int, ObjectID]:

2561 """Look up an object in a Git tree.

2562

2563 Args:

2564 lookup_obj: Callback for retrieving object by SHA1

2565 root_sha: SHA1 of the root tree

2566 path: Path to lookup

2567 Returns: A tuple of (mode, SHA) of the resulting path.

2568 """

2569 tree = lookup_obj(root_sha)

2570 if not isinstance(tree, Tree):

2571 raise NotTreeError(root_sha)

2572 return tree.lookup_path(lookup_obj, path)

2573

2574

2575def _collect_filetree_revs(

2576 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID]

2577) -> None:

2578 """Collect SHA1s of files and directories for specified tree.

2579

2580 Args:

2581 obj_store: Object store to get objects by SHA from

2582 tree_sha: tree reference to walk

2583 kset: set to fill with references to files and directories

2584 """

2585 filetree = obj_store[tree_sha]

2586 assert isinstance(filetree, Tree)

2587 for name, mode, sha in filetree.iteritems():

2588 assert mode is not None

2589 assert sha is not None

2590 if not S_ISGITLINK(mode) and sha not in kset:

2591 kset.add(sha)

2592 if stat.S_ISDIR(mode):

2593 _collect_filetree_revs(obj_store, sha, kset)

2594

2595

2596def _split_commits_and_tags(

2597 obj_store: ObjectContainer,

2598 lst: Iterable[ObjectID],

2599 *,

2600 unknown: str = "error",

2601) -> tuple[set[ObjectID], set[ObjectID], set[ObjectID]]:

2602 """Split object id list into three lists with commit, tag, and other SHAs.

2603

2604 Commits referenced by tags are included into commits

2605 list as well. Only SHA1s known in this repository will get

2606 through, controlled by the unknown parameter.

2607

2608 Args:

2609 obj_store: Object store to get objects by SHA1 from

2610 lst: Collection of commit and tag SHAs

2611 unknown: How to handle unknown objects: "error", "warn", or "ignore"

2612 Returns: A tuple of (commits, tags, others) SHA1s

2613 """

2614 import logging

2615

2616 if unknown not in ("error", "warn", "ignore"):

2617 raise ValueError(

2618 f"unknown must be 'error', 'warn', or 'ignore', got {unknown!r}"

2619 )

2620

2621 commits: set[ObjectID] = set()

2622 tags: set[ObjectID] = set()

2623 others: set[ObjectID] = set()

2624 for e in lst:

2625 try:

2626 o = obj_store[e]

2627 except KeyError:

2628 if unknown == "error":

2629 raise

2630 elif unknown == "warn":

2631 logging.warning(

2632 "Object %s not found in object store", e.decode("ascii")

2633 )

2634 # else: ignore

2635 else:

2636 if isinstance(o, Commit):

2637 commits.add(e)

2638 elif isinstance(o, Tag):

2639 tags.add(e)

2640 tagged = o.object[1]

2641 c, t, os = _split_commits_and_tags(obj_store, [tagged], unknown=unknown)

2642 commits |= c

2643 tags |= t

2644 others |= os

2645 else:

2646 others.add(e)

2647 return (commits, tags, others)

2648

2649

2650class MissingObjectFinder:

2651 """Find the objects missing from another object store.

2652

2653 Args:

2654 object_store: Object store containing at least all objects to be

2655 sent

2656 haves: SHA1s of commits not to send (already present in target)

2657 wants: SHA1s of commits to send

2658 progress: Optional function to report progress to.

2659 get_tagged: Function that returns a dict of pointed-to sha -> tag

2660 sha for including tags.

2661 get_parents: Optional function for getting the parents of a commit.

2662 """

2663

2664 def __init__(

2665 self,

2666 object_store: BaseObjectStore,

2667 haves: Iterable[ObjectID],

2668 wants: Iterable[ObjectID],

2669 *,

2670 shallow: Set[ObjectID] | None = None,

2671 progress: Callable[[bytes], None] | None = None,

2672 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None,

2673 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents,

2674 ) -> None:

2675 """Initialize a MissingObjectFinder.

2676

2677 Args:

2678 object_store: Object store containing objects

2679 haves: SHA1s of objects already present in target

2680 wants: SHA1s of objects to send

2681 shallow: Set of shallow commit SHA1s

2682 progress: Optional progress reporting callback

2683 get_tagged: Function returning dict of pointed-to sha -> tag sha

2684 get_parents: Function for getting commit parents

2685 """

2686 self.object_store = object_store

2687 if shallow is None:

2688 shallow = set()

2689 self._get_parents = get_parents

2690 reachability = object_store.get_reachability_provider()

2691 # process Commits and Tags differently

2692 # haves may list commits/tags not available locally (silently ignore them).

2693 # wants should only contain valid SHAs (fail fast if not).

2694 have_commits, have_tags, have_others = _split_commits_and_tags(

2695 object_store, haves, unknown="ignore"

2696 )

2697 want_commits, want_tags, want_others = _split_commits_and_tags(

2698 object_store, wants, unknown="error"

2699 )

2700 # all_ancestors is a set of commits that shall not be sent

2701 # (complete repository up to 'haves')

2702 all_ancestors = reachability.get_reachable_commits(

2703 have_commits, exclude=None, shallow=shallow

2704 )

2705 # all_missing - complete set of commits between haves and wants

2706 # common_commits - boundary commits directly encountered when traversing wants

2707 # We use _collect_ancestors here because we need the exact boundary behavior:

2708 # commits that are in all_ancestors and directly reachable from wants,

2709 # but we don't traverse past them. This is hard to express with the

2710 # reachability abstraction alone.

2711 missing_commits, common_commits = _collect_ancestors(

2712 object_store,

2713 want_commits,

2714 frozenset(all_ancestors),

2715 shallow=frozenset(shallow),

2716 get_parents=self._get_parents,

2717 )

2718

2719 self.remote_has: set[ObjectID] = set()

2720 # Now, fill sha_done with commits and revisions of

2721 # files and directories known to be both locally

2722 # and on target. Thus these commits and files

2723 # won't get selected for fetch

2724 for h in common_commits:

2725 self.remote_has.add(h)

2726 cmt = object_store[h]

2727 assert isinstance(cmt, Commit)

2728 # Get tree objects for this commit

2729 tree_objects = reachability.get_tree_objects([cmt.tree])

2730 self.remote_has.update(tree_objects)

2731

2732 # record tags we have as visited, too

2733 for t in have_tags:

2734 self.remote_has.add(t)

2735 self.sha_done = set(self.remote_has)

2736

2737 # in fact, what we 'want' is commits, tags, and others

2738 # we've found missing

2739 self.objects_to_send: set[tuple[ObjectID, bytes | None, int | None, bool]] = {

2740 (w, None, Commit.type_num, False) for w in missing_commits

2741 }

2742 missing_tags = want_tags.difference(have_tags)

2743 self.objects_to_send.update(

2744 {(w, None, Tag.type_num, False) for w in missing_tags}

2745 )

2746 missing_others = want_others.difference(have_others)

2747 self.objects_to_send.update({(w, None, None, False) for w in missing_others})

2748

2749 if progress is None:

2750 self.progress: Callable[[bytes], None] = lambda x: None

2751 else:

2752 self.progress = progress

2753 self._tagged = (get_tagged and get_tagged()) or {}

2754

2755 def get_remote_has(self) -> set[ObjectID]:

2756 """Get the set of SHAs the remote has.

2757

2758 Returns:

2759 Set of SHA1s that the remote side already has

2760 """

2761 return self.remote_has

2762

2763 def add_todo(

2764 self, entries: Iterable[tuple[ObjectID, bytes | None, int | None, bool]]

2765 ) -> None:

2766 """Add objects to the todo list.

2767

2768 Args:

2769 entries: Iterable of tuples (sha, name, type_num, is_leaf)

2770 """

2771 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done])

2772

2773 def __next__(self) -> tuple[ObjectID, PackHint | None]:

2774 """Get the next object to send.

2775

2776 Returns:

2777 Tuple of (sha, pack_hint)

2778

2779 Raises:

2780 StopIteration: When no more objects to send

2781 """

2782 while True:

2783 if not self.objects_to_send:

2784 self.progress(

2785 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii")

2786 )

2787 raise StopIteration

2788 (sha, name, type_num, leaf) = self.objects_to_send.pop()

2789 if sha not in self.sha_done:

2790 break

2791 if not leaf:

2792 o = self.object_store[sha]

2793 if isinstance(o, Commit):

2794 self.add_todo([(o.tree, b"", Tree.type_num, False)])

2795 elif isinstance(o, Tree):

2796 todos = []

2797 for n, m, s in o.iteritems():

2798 assert m is not None

2799 assert n is not None

2800 assert s is not None

2801 if not S_ISGITLINK(m):

2802 todos.append(

2803 (

2804 s,

2805 n,

2806 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num),

2807 not stat.S_ISDIR(m),

2808 )

2809 )

2810 self.add_todo(todos)

2811 elif isinstance(o, Tag):

2812 self.add_todo([(o.object[1], None, o.object[0].type_num, False)])

2813 if sha in self._tagged:

2814 self.add_todo([(self._tagged[sha], None, None, True)])

2815 self.sha_done.add(sha)

2816 if len(self.sha_done) % 1000 == 0:

2817 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii"))

2818 if type_num is None:

2819 pack_hint = None

2820 else:

2821 pack_hint = (type_num, name)

2822 return (sha, pack_hint)

2823

2824 def __iter__(self) -> Iterator[tuple[ObjectID, PackHint | None]]:

2825 """Return iterator over objects to send.

2826

2827 Returns:

2828 Self (this class implements the iterator protocol)

2829 """

2830 return self

2831

2832

2833class ObjectStoreGraphWalker:

2834 """Graph walker that finds what commits are missing from an object store."""

2835

2836 heads: set[ObjectID]

2837 """Revisions without descendants in the local repo."""

2838

2839 get_parents: Callable[[ObjectID], list[ObjectID]]

2840 """Function to retrieve parents in the local repo."""

2841

2842 shallow: set[ObjectID]

2843

2844 def __init__(

2845 self,

2846 local_heads: Iterable[ObjectID],

2847 get_parents: Callable[[ObjectID], list[ObjectID]],

2848 shallow: set[ObjectID] | None = None,

2849 update_shallow: Callable[[set[ObjectID] | None, set[ObjectID] | None], None]

2850 | None = None,

2851 ) -> None:

2852 """Create a new instance.

2853

2854 Args:

2855 local_heads: Heads to start search with

2856 get_parents: Function for finding the parents of a SHA1.

2857 shallow: Set of shallow commits.

2858 update_shallow: Function to update shallow commits.

2859 """

2860 self.heads = set(local_heads)

2861 self.get_parents = get_parents

2862 self.parents: dict[ObjectID, list[ObjectID] | None] = {}

2863 if shallow is None:

2864 shallow = set()

2865 self.shallow = shallow

2866 self.update_shallow = update_shallow

2867

2868 def nak(self) -> None:

2869 """Nothing in common was found."""

2870

2871 def ack(self, sha: ObjectID) -> None:

2872 """Ack that a revision and its ancestors are present in the source."""

2873 if len(sha) != 40:

2874 raise ValueError(f"unexpected sha {sha!r} received")

2875 ancestors = {sha}

2876

2877 # stop if we run out of heads to remove

2878 while self.heads:

2879 for a in ancestors:

2880 if a in self.heads:

2881 self.heads.remove(a)

2882

2883 # collect all ancestors

2884 new_ancestors = set()

2885 for a in ancestors:

2886 ps = self.parents.get(a)

2887 if ps is not None:

2888 new_ancestors.update(ps)

2889 self.parents[a] = None

2890

2891 # no more ancestors; stop

2892 if not new_ancestors:

2893 break

2894

2895 ancestors = new_ancestors

2896

2897 def next(self) -> ObjectID | None:

2898 """Iterate over ancestors of heads in the target."""

2899 if self.heads:

2900 ret = self.heads.pop()

2901 try:

2902 ps = self.get_parents(ret)

2903 except KeyError:

2904 return None

2905 self.parents[ret] = ps

2906 self.heads.update([p for p in ps if p not in self.parents])

2907 return ret

2908 return None

2909

2910 __next__ = next

2911

2912

2913def commit_tree_changes(

2914 object_store: BaseObjectStore,

2915 tree: ObjectID | Tree,

2916 changes: Sequence[tuple[bytes, int | None, ObjectID | None]],

2917) -> ObjectID:

2918 """Commit a specified set of changes to a tree structure.

2919

2920 This will apply a set of changes on top of an existing tree, storing new

2921 objects in object_store.

2922

2923 changes are a list of tuples with (path, mode, object_sha).

2924 Paths can be both blobs and trees. See the mode and

2925 object sha to None deletes the path.

2926

2927 This method works especially well if there are only a small

2928 number of changes to a big tree. For a large number of changes

2929 to a large tree, use e.g. commit_tree.

2930

2931 Args:

2932 object_store: Object store to store new objects in

2933 and retrieve old ones from.

2934 tree: Original tree root (SHA or Tree object)

2935 changes: changes to apply

2936 Returns: New tree root object

2937 """

2938 # TODO(jelmer): Save up the objects and add them using .add_objects

2939 # rather than with individual calls to .add_object.

2940 # Handle both Tree object and SHA

2941 if isinstance(tree, Tree):

2942 tree_obj: Tree = tree

2943 else:

2944 sha_obj = object_store[tree]

2945 assert isinstance(sha_obj, Tree)

2946 tree_obj = sha_obj

2947 nested_changes: dict[bytes, list[tuple[bytes, int | None, ObjectID | None]]] = {}

2948 for path, new_mode, new_sha in changes:

2949 try:

2950 (dirname, subpath) = path.split(b"/", 1)

2951 except ValueError:

2952 if new_sha is None:

2953 del tree_obj[path]

2954 else:

2955 assert new_mode is not None

2956 tree_obj[path] = (new_mode, new_sha)

2957 else:

2958 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha))

2959 for name, subchanges in nested_changes.items():

2960 try:

2961 orig_subtree_id: ObjectID | Tree = tree_obj[name][1]

2962 except KeyError:

2963 # For new directories, pass an empty Tree object

2964 orig_subtree_id = Tree()

2965 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges)

2966 subtree = object_store[subtree_id]

2967 assert isinstance(subtree, Tree)

2968 if len(subtree) == 0:

2969 del tree_obj[name]

2970 else:

2971 tree_obj[name] = (stat.S_IFDIR, subtree.id)

2972 object_store.add_object(tree_obj)

2973 return tree_obj.id

2974

2975

2976class OverlayObjectStore(BaseObjectStore):

2977 """Object store that can overlay multiple object stores."""

2978

2979 def __init__(

2980 self,

2981 bases: list[BaseObjectStore],

2982 add_store: BaseObjectStore | None = None,

2983 ) -> None:

2984 """Initialize an OverlayObjectStore.

2985

2986 Args:

2987 bases: List of base object stores to overlay

2988 add_store: Optional store to write new objects to

2989 """

2990 self.bases = bases

2991 self.add_store = add_store

2992

2993 def add_object(self, object: ShaFile) -> None:

2994 """Add a single object to the store.

2995

2996 Args:

2997 object: Object to add

2998

2999 Raises:

3000 NotImplementedError: If no add_store was provided

3001 """

3002 if self.add_store is None:

3003 raise NotImplementedError(self.add_object)

3004 return self.add_store.add_object(object)

3005

3006 def add_objects(

3007 self,

3008 objects: Sequence[tuple[ShaFile, str | None]],

3009 progress: Callable[[str], None] | None = None,

3010 ) -> Pack | None:

3011 """Add multiple objects to the store.

3012

3013 Args:

3014 objects: Iterator of objects to add

3015 progress: Optional progress reporting callback

3016

3017 Raises:

3018 NotImplementedError: If no add_store was provided

3019 """

3020 if self.add_store is None:

3021 raise NotImplementedError(self.add_object)

3022 return self.add_store.add_objects(objects, progress)

3023

3024 @property

3025 def packs(self) -> list[Pack]:

3026 """Get the list of packs from all overlaid stores.

3027

3028 Returns:

3029 Combined list of packs from all base stores

3030 """

3031 ret = []

3032 for b in self.bases:

3033 ret.extend(b.packs)

3034 return ret

3035

3036 def __iter__(self) -> Iterator[ObjectID]:

3037 """Iterate over all object SHAs in the overlaid stores.

3038

3039 Returns:

3040 Iterator of object SHAs (deduped across stores)

3041 """

3042 done = set()

3043 for b in self.bases:

3044 for o_id in b:

3045 if o_id not in done:

3046 yield o_id

3047 done.add(o_id)

3048

3049 def iterobjects_subset(

3050 self, shas: Iterable[ObjectID], *, allow_missing: bool = False

3051 ) -> Iterator[ShaFile]:

3052 """Iterate over a subset of objects from the overlaid stores.

3053

3054 Args:

3055 shas: Iterable of object SHAs to retrieve

3056 allow_missing: If True, skip missing objects; if False, raise KeyError

3057

3058 Returns:

3059 Iterator of ShaFile objects

3060

3061 Raises:

3062 KeyError: If an object is missing and allow_missing is False

3063 """

3064 todo = set(shas)

3065 found: set[ObjectID] = set()

3066

3067 for b in self.bases:

3068 # Create a copy of todo for each base to avoid modifying

3069 # the set while iterating through it

3070 current_todo = todo - found

3071 for o in b.iterobjects_subset(current_todo, allow_missing=True):

3072 yield o

3073 found.add(o.id)

3074

3075 # Check for any remaining objects not found

3076 missing = todo - found

3077 if missing and not allow_missing:

3078 raise KeyError(next(iter(missing)))

3079

3080 def iter_unpacked_subset(

3081 self,

3082 shas: Iterable[ObjectID | RawObjectID],

3083 include_comp: bool = False,

3084 allow_missing: bool = False,

3085 convert_ofs_delta: bool = True,

3086 ) -> Iterator[UnpackedObject]:

3087 """Iterate over unpacked objects from the overlaid stores.

3088

3089 Args:

3090 shas: Iterable of object SHAs to retrieve

3091 include_comp: Whether to include compressed data

3092 allow_missing: If True, skip missing objects; if False, raise KeyError

3093 convert_ofs_delta: Whether to convert OFS_DELTA objects

3094

3095 Returns:

3096 Iterator of unpacked objects

3097

3098 Raises:

3099 KeyError: If an object is missing and allow_missing is False

3100 """

3101 todo: set[ObjectID | RawObjectID] = set(shas)

3102 for b in self.bases:

3103 for o in b.iter_unpacked_subset(

3104 todo,

3105 include_comp=include_comp,

3106 allow_missing=True,

3107 convert_ofs_delta=convert_ofs_delta,

3108 ):

3109 yield o

3110 todo.remove(o.sha())

3111 if todo and not allow_missing:

3112 raise KeyError(next(iter(todo)))

3113

3114 def get_raw(self, sha_id: ObjectID | RawObjectID) -> tuple[int, bytes]:

3115 """Get the raw object data from the overlaid stores.

3116

3117 Args:

3118 sha_id: SHA of the object

3119

3120 Returns:

3121 Tuple of (type_num, raw_data)

3122

3123 Raises:

3124 KeyError: If object not found in any base store

3125 """

3126 for b in self.bases:

3127 try:

3128 return b.get_raw(sha_id)

3129 except KeyError:

3130 pass

3131 raise KeyError(sha_id)

3132

3133 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:

3134 """Check if an object is packed in any base store.

3135

3136 Args:

3137 sha: SHA of the object

3138

3139 Returns:

3140 True if object is packed in any base store

3141 """

3142 for b in self.bases:

3143 if b.contains_packed(sha):

3144 return True

3145 return False

3146

3147 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:

3148 """Check if an object is loose in any base store.

3149

3150 Args:

3151 sha: SHA of the object

3152

3153 Returns:

3154 True if object is loose in any base store

3155 """

3156 for b in self.bases:

3157 if b.contains_loose(sha):

3158 return True

3159 return False

3160

3161

3162def read_packs_file(f: BinaryIO) -> Iterator[str]:

3163 """Yield the packs listed in a packs file."""

3164 for line in f.read().splitlines():

3165 if not line:

3166 continue

3167 (kind, name) = line.split(b" ", 1)

3168 if kind != b"P":

3169 continue

3170 yield os.fsdecode(name)

3171

3172

3173class BucketBasedObjectStore(PackBasedObjectStore):

3174 """Object store implementation that uses a bucket store like S3 as backend."""

3175

3176 def _iter_loose_objects(self) -> Iterator[ObjectID]:

3177 """Iterate over the SHAs of all loose objects."""

3178 return iter([])

3179

3180 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> None:

3181 return None

3182

3183 def delete_loose_object(self, sha: ObjectID) -> None:

3184 """Delete a loose object (no-op for bucket stores).

3185

3186 Bucket-based stores don't have loose objects, so this is a no-op.

3187

3188 Args:

3189 sha: SHA of the object to delete

3190 """

3191 # Doesn't exist..

3192

3193 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int:

3194 """Pack loose objects. Returns number of objects packed.

3195

3196 BucketBasedObjectStore doesn't support loose objects, so this is a no-op.

3197

3198 Args:

3199 progress: Optional progress reporting callback (ignored)

3200 """

3201 return 0

3202

3203 def _remove_pack_by_name(self, name: str) -> None:

3204 """Remove a pack by name. Subclasses should implement this."""

3205 raise NotImplementedError(self._remove_pack_by_name)

3206

3207 def _iter_pack_names(self) -> Iterator[str]:

3208 raise NotImplementedError(self._iter_pack_names)

3209

3210 def _get_pack(self, name: str) -> Pack:

3211 raise NotImplementedError(self._get_pack)

3212

3213 def _update_pack_cache(self) -> list[Pack]:

3214 pack_files = set(self._iter_pack_names())

3215

3216 # Open newly appeared pack files

3217 new_packs = []

3218 for f in pack_files:

3219 if f not in self._pack_cache:

3220 pack = self._get_pack(f)

3221 new_packs.append(pack)

3222 self._pack_cache[f] = pack

3223 # Remove disappeared pack files

3224 for f in set(self._pack_cache) - pack_files:

3225 self._pack_cache.pop(f).close()

3226 return new_packs

3227

3228 def _upload_pack(

3229 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO

3230 ) -> None:

3231 raise NotImplementedError

3232

3233 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:

3234 """Add a new pack to this object store.

3235

3236 Returns: Fileobject to write to, a commit function to

3237 call when the pack is finished and an abort

3238 function.

3239 """

3240 import tempfile

3241

3242 pf = tempfile.SpooledTemporaryFile(

3243 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"

3244 )

3245

3246 def commit() -> Pack | None:

3247 if pf.tell() == 0:

3248 pf.close()

3249 return None

3250

3251 pf.seek(0)

3252

3253 p = PackData(pf.name, pf)

3254 entries = p.sorted_entries()

3255 basename = iter_sha1(entry[0] for entry in entries).decode("ascii")

3256 idxf = tempfile.SpooledTemporaryFile(

3257 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"

3258 )

3259 checksum = p.get_stored_checksum()

3260 write_pack_index(idxf, entries, checksum, version=self.pack_index_version)

3261 idxf.seek(0)

3262 idx = load_pack_index_file(basename + ".idx", idxf)

3263 for pack in self.packs:

3264 if pack.get_stored_checksum() == p.get_stored_checksum():

3265 p.close()

3266 idx.close()

3267 pf.close()

3268 idxf.close()

3269 return pack

3270 pf.seek(0)

3271 idxf.seek(0)

3272 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type]

3273 final_pack = Pack.from_objects(p, idx)

3274 self._add_cached_pack(basename, final_pack)

3275 pf.close()

3276 idxf.close()

3277 return final_pack

3278

3279 return pf, commit, pf.close # type: ignore[return-value]

3280

3281

3282def _collect_ancestors(

3283 store: ObjectContainer,

3284 heads: Iterable[ObjectID],

3285 common: frozenset[ObjectID] = frozenset(),

3286 shallow: frozenset[ObjectID] = frozenset(),

3287 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents,

3288) -> tuple[set[ObjectID], set[ObjectID]]:

3289 """Collect all ancestors of heads up to (excluding) those in common.

3290

3291 Args:

3292 store: Object store to get commits from

3293 heads: commits to start from

3294 common: commits to end at, or empty set to walk repository

3295 completely

3296 shallow: Set of shallow commits

3297 get_parents: Optional function for getting the parents of a

3298 commit.

3299 Returns: a tuple (A, B) where A - all commits reachable

3300 from heads but not present in common, B - common (shared) elements

3301 that are directly reachable from heads

3302 """

3303 bases = set()

3304 commits = set()

3305 queue: list[ObjectID] = []

3306 queue.extend(heads)

3307

3308 # Try to use commit graph if available

3309 commit_graph = store.get_commit_graph()

3310

3311 while queue:

3312 e = queue.pop(0)

3313 if e in common:

3314 bases.add(e)

3315 elif e not in commits:

3316 commits.add(e)

3317 if e in shallow:

3318 continue

3319

3320 # Try to use commit graph for parent lookup

3321 parents = None

3322 if commit_graph:

3323 parents = commit_graph.get_parents(e)

3324

3325 if parents is None:

3326 # Fall back to loading the object

3327 cmt = store[e]

3328 assert isinstance(cmt, Commit)

3329 parents = get_parents(cmt)

3330

3331 queue.extend(parents)

3332 return (commits, bases)

3333

3334

3335def iter_tree_contents(

3336 store: ObjectContainer, tree_id: ObjectID | None, *, include_trees: bool = False

3337) -> Iterator[TreeEntry]:

3338 """Iterate the contents of a tree and all subtrees.

3339

3340 Iteration is depth-first pre-order, as in e.g. os.walk.

3341

3342 Args:

3343 store: Object store to get trees from

3344 tree_id: SHA1 of the tree.

3345 include_trees: If True, include tree objects in the iteration.

3346

3347 Yields: TreeEntry namedtuples for all the objects in a tree.

3348 """

3349 if tree_id is None:

3350 return

3351 # This could be fairly easily generalized to >2 trees if we find a use

3352 # case.

3353 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]

3354 while todo:

3355 entry = todo.pop()

3356 assert entry.mode is not None

3357 if stat.S_ISDIR(entry.mode):

3358 extra = []

3359 assert entry.sha is not None

3360 tree = store[entry.sha]

3361 assert isinstance(tree, Tree)

3362 for subentry in tree.iteritems(name_order=True):

3363 assert entry.path is not None

3364 extra.append(subentry.in_path(entry.path))

3365 todo.extend(reversed(extra))

3366 if not stat.S_ISDIR(entry.mode) or include_trees:

3367 yield entry

3368

3369

3370def iter_commit_contents(

3371 store: ObjectContainer,

3372 commit: Commit | ObjectID | RawObjectID,

3373 *,

3374 include: Sequence[str | bytes | Path] | None = None,

3375) -> Iterator[TreeEntry]:

3376 """Iterate the contents of the repository at the specified commit.

3377

3378 This is a wrapper around iter_tree_contents() and

3379 tree_lookup_path() to simplify the common task of getting the

3380 contest of a repo at a particular commit. See also

3381 dulwich.index.build_file_from_blob() for writing individual files

3382 to disk.

3383

3384 Args:

3385 store: Object store to get trees from

3386 commit: Commit object, or SHA1 of a commit

3387 include: if provided, only the entries whose paths are in the

3388 list, or whose parent tree is in the list, will be

3389 included. Note that duplicate or overlapping paths

3390 (e.g. ["foo", "foo/bar"]) may result in duplicate entries

3391

3392 Yields: TreeEntry namedtuples for all matching files in a commit.

3393 """

3394 sha = commit.id if isinstance(commit, Commit) else commit

3395 if not isinstance(obj := store[sha], Commit):

3396 raise TypeError(

3397 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}"

3398 )

3399 commit = obj

3400 encoding = commit.encoding or "utf-8"

3401 include_bytes: list[bytes] = (

3402 [

3403 path if isinstance(path, bytes) else str(path).encode(encoding)

3404 for path in include

3405 ]

3406 if include is not None

3407 else [b""]

3408 )

3409

3410 for path in include_bytes:

3411 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path)

3412 # Iterate all contained files if path points to a dir, otherwise just get that

3413 # single file

3414 if isinstance(store[obj_id], Tree):

3415 for entry in iter_tree_contents(store, obj_id):

3416 yield entry.in_path(path)

3417 else:

3418 yield TreeEntry(path, mode, obj_id)

3419

3420

3421def peel_sha(

3422 store: ObjectContainer, sha: ObjectID | RawObjectID

3423) -> tuple[ShaFile, ShaFile]:

3424 """Peel all tags from a SHA.

3425

3426 Args:

3427 store: Object store to get objects from

3428 sha: The object SHA to peel.

3429 Returns: The fully-peeled SHA1 of a tag object, after peeling all

3430 intermediate tags; if the original ref does not point to a tag,

3431 this will equal the original SHA1.

3432 """

3433 unpeeled = obj = store[sha]

3434 obj_class = object_class(obj.type_name)

3435 while obj_class is Tag:

3436 assert isinstance(obj, Tag)

3437 obj_class, sha = obj.object

3438 obj = store[sha]

3439 return unpeeled, obj

3440

3441

3442class GraphTraversalReachability:

3443 """Naive graph traversal implementation of ObjectReachabilityProvider.

3444

3445 This implementation wraps existing graph traversal functions

3446 (_collect_ancestors, _collect_filetree_revs) to provide the standard

3447 reachability interface without any performance optimizations.

3448 """

3449

3450 def __init__(self, object_store: BaseObjectStore) -> None:

3451 """Initialize the graph traversal provider.

3452

3453 Args:

3454 object_store: Object store to query

3455 """

3456 self.store = object_store

3457

3458 def get_reachable_commits(

3459 self,

3460 heads: Iterable[ObjectID],

3461 exclude: Iterable[ObjectID] | None = None,

3462 shallow: Set[ObjectID] | None = None,

3463 ) -> set[ObjectID]:

3464 """Get all commits reachable from heads, excluding those in exclude.

3465

3466 Uses _collect_ancestors for commit traversal.

3467

3468 Args:

3469 heads: Starting commit SHAs

3470 exclude: Commit SHAs to exclude (and their ancestors)

3471 shallow: Set of shallow commit boundaries

3472

3473 Returns:

3474 Set of commit SHAs reachable from heads but not from exclude

3475 """

3476 exclude_set = frozenset(exclude) if exclude else frozenset()

3477 shallow_set = frozenset(shallow) if shallow else frozenset()

3478 commits, _bases = _collect_ancestors(

3479 self.store, heads, exclude_set, shallow_set

3480 )

3481 return commits

3482

3483 def get_tree_objects(

3484 self,

3485 tree_shas: Iterable[ObjectID],

3486 ) -> set[ObjectID]:

3487 """Get all trees and blobs reachable from the given trees.

3488

3489 Uses _collect_filetree_revs for tree traversal.

3490

3491 Args:

3492 tree_shas: Starting tree SHAs

3493

3494 Returns:

3495 Set of tree and blob SHAs

3496 """

3497 result: set[ObjectID] = set()

3498 for tree_sha in tree_shas:

3499 _collect_filetree_revs(self.store, tree_sha, result)

3500 return result

3501

3502 def get_reachable_objects(

3503 self,

3504 commits: Iterable[ObjectID],

3505 exclude_commits: Iterable[ObjectID] | None = None,

3506 ) -> set[ObjectID]:

3507 """Get all objects (commits + trees + blobs) reachable from commits.

3508

3509 Args:

3510 commits: Starting commit SHAs

3511 exclude_commits: Commits whose objects should be excluded

3512

3513 Returns:

3514 Set of all object SHAs (commits, trees, blobs)

3515 """

3516 commits_set = set(commits)

3517 result = set(commits_set)

3518

3519 # Get trees for all commits

3520 tree_shas = []

3521 for commit_sha in commits_set:

3522 try:

3523 commit = self.store[commit_sha]

3524 if isinstance(commit, Commit):

3525 tree_shas.append(commit.tree)

3526 except KeyError:

3527 # Commit not in store, skip

3528 continue

3529

3530 # Collect all tree/blob objects

3531 result.update(self.get_tree_objects(tree_shas))

3532

3533 # Exclude objects from exclude_commits if needed

3534 if exclude_commits:

3535 exclude_objects = self.get_reachable_objects(exclude_commits, None)

3536 result -= exclude_objects

3537

3538 return result

3539

3540

3541class BitmapReachability:

3542 """Bitmap-accelerated implementation of ObjectReachabilityProvider.

3543

3544 This implementation uses packfile bitmap indexes where available to

3545 accelerate reachability queries. Falls back to graph traversal when

3546 bitmaps don't cover the requested commits.

3547 """

3548

3549 def __init__(self, object_store: "PackBasedObjectStore") -> None:

3550 """Initialize the bitmap provider.

3551

3552 Args:

3553 object_store: Pack-based object store with bitmap support

3554 """

3555 self.store = object_store

3556 # Fallback to graph traversal for operations not yet optimized

3557 self._fallback = GraphTraversalReachability(object_store)

3558

3559 def _combine_commit_bitmaps(

3560 self,

3561 commit_shas: set[ObjectID],

3562 exclude_shas: set[ObjectID] | None = None,

3563 ) -> tuple["EWAHBitmap", "Pack"] | None:

3564 """Combine bitmaps for multiple commits using OR, with optional exclusion.

3565

3566 Args:

3567 commit_shas: Set of commit SHAs to combine

3568 exclude_shas: Optional set of commit SHAs to exclude

3569

3570 Returns:

3571 Tuple of (combined_bitmap, pack) or None if bitmaps unavailable

3572 """

3573 from .bitmap import find_commit_bitmaps

3574

3575 # Find bitmaps for the commits

3576 commit_bitmaps = find_commit_bitmaps(commit_shas, self.store.packs)

3577

3578 # If we can't find bitmaps for all commits, return None

3579 if len(commit_bitmaps) < len(commit_shas):

3580 return None

3581

3582 # Combine bitmaps using OR

3583 combined_bitmap = None

3584 result_pack = None

3585

3586 for commit_sha in commit_shas:

3587 pack, pack_bitmap, _sha_to_pos = commit_bitmaps[commit_sha]

3588 commit_bitmap = pack_bitmap.get_bitmap(commit_sha)

3589

3590 if commit_bitmap is None:

3591 return None

3592

3593 if combined_bitmap is None:

3594 combined_bitmap = commit_bitmap

3595 result_pack = pack

3596 elif pack == result_pack:

3597 # Same pack, can OR directly

3598 combined_bitmap = combined_bitmap | commit_bitmap

3599 else:

3600 # Different packs, can't combine

3601 return None

3602

3603 # Handle exclusions if provided

3604 if exclude_shas and result_pack and combined_bitmap:

3605 exclude_bitmaps = find_commit_bitmaps(exclude_shas, [result_pack])

3606

3607 if len(exclude_bitmaps) == len(exclude_shas):

3608 # All excludes have bitmaps, compute exclusion

3609 exclude_combined = None

3610

3611 for commit_sha in exclude_shas:

3612 _pack, pack_bitmap, _sha_to_pos = exclude_bitmaps[commit_sha]

3613 exclude_bitmap = pack_bitmap.get_bitmap(commit_sha)

3614

3615 if exclude_bitmap is None:

3616 break

3617

3618 if exclude_combined is None:

3619 exclude_combined = exclude_bitmap

3620 else:

3621 exclude_combined = exclude_combined | exclude_bitmap

3622

3623 # Subtract excludes using set difference

3624 if exclude_combined:

3625 combined_bitmap = combined_bitmap - exclude_combined

3626

3627 if combined_bitmap and result_pack:

3628 return (combined_bitmap, result_pack)

3629 return None

3630

3631 def get_reachable_commits(

3632 self,

3633 heads: Iterable[ObjectID],

3634 exclude: Iterable[ObjectID] | None = None,

3635 shallow: Set[ObjectID] | None = None,

3636 ) -> set[ObjectID]:

3637 """Get all commits reachable from heads using bitmaps where possible.

3638

3639 Args:

3640 heads: Starting commit SHAs

3641 exclude: Commit SHAs to exclude (and their ancestors)

3642 shallow: Set of shallow commit boundaries

3643

3644 Returns:

3645 Set of commit SHAs reachable from heads but not from exclude

3646 """

3647 from .bitmap import bitmap_to_object_shas

3648

3649 # If shallow is specified, fall back to graph traversal

3650 # (bitmaps don't support shallow boundaries well)

3651 if shallow:

3652 return self._fallback.get_reachable_commits(heads, exclude, shallow)

3653

3654 heads_set = set(heads)

3655 exclude_set = set(exclude) if exclude else None

3656

3657 # Try to combine bitmaps

3658 result = self._combine_commit_bitmaps(heads_set, exclude_set)

3659 if result is None:

3660 return self._fallback.get_reachable_commits(heads, exclude, shallow)

3661

3662 combined_bitmap, result_pack = result

3663

3664 # Convert bitmap to commit SHAs, filtering for commits only

3665 pack_bitmap = result_pack.bitmap

3666 if pack_bitmap is None:

3667 return self._fallback.get_reachable_commits(heads, exclude, shallow)

3668 commit_type_filter = pack_bitmap.commit_bitmap

3669 return bitmap_to_object_shas(

3670 combined_bitmap, result_pack.index, commit_type_filter

3671 )

3672

3673 def get_tree_objects(

3674 self,

3675 tree_shas: Iterable[ObjectID],

3676 ) -> set[ObjectID]:

3677 """Get all trees and blobs reachable from the given trees.

3678

3679 Args:

3680 tree_shas: Starting tree SHAs

3681

3682 Returns:

3683 Set of tree and blob SHAs

3684 """

3685 # Tree traversal doesn't benefit much from bitmaps, use fallback

3686 return self._fallback.get_tree_objects(tree_shas)

3687

3688 def get_reachable_objects(

3689 self,

3690 commits: Iterable[ObjectID],

3691 exclude_commits: Iterable[ObjectID] | None = None,

3692 ) -> set[ObjectID]:

3693 """Get all objects reachable from commits using bitmaps.

3694

3695 Args:

3696 commits: Starting commit SHAs

3697 exclude_commits: Commits whose objects should be excluded

3698

3699 Returns:

3700 Set of all object SHAs (commits, trees, blobs)

3701 """

3702 from .bitmap import bitmap_to_object_shas

3703

3704 commits_set = set(commits)

3705 exclude_set = set(exclude_commits) if exclude_commits else None

3706

3707 # Try to combine bitmaps

3708 result = self._combine_commit_bitmaps(commits_set, exclude_set)

3709 if result is None:

3710 return self._fallback.get_reachable_objects(commits, exclude_commits)

3711

3712 combined_bitmap, result_pack = result

3713

3714 # Convert bitmap to all object SHAs (no type filter)

3715 return bitmap_to_object_shas(combined_bitmap, result_pack.index, None)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 20%

1475 statements