Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object

1# object_store.py -- Object store for git objects

3# and others

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU

7# General Public License as published by the Free Software Foundation; version 2.0

8# or (at your option) any later version. You can redistribute it and/or

9# modify it under the terms of either of these two licenses.

10#

11# Unless required by applicable law or agreed to in writing, software

12# distributed under the License is distributed on an "AS IS" BASIS,

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14# See the License for the specific language governing permissions and

15# limitations under the License.

16#

17# You should have received a copy of the licenses; if not, see

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache

20# License, Version 2.0.

21#

24"""Git object store interfaces and implementation."""

26__all__ = [

27 "DEFAULT_TEMPFILE_GRACE_PERIOD",

28 "INFODIR",

29 "PACKDIR",

30 "PACK_MODE",

31 "BaseObjectStore",

32 "BitmapReachability",

33 "BucketBasedObjectStore",

34 "DiskObjectStore",

35 "GraphTraversalReachability",

36 "GraphWalker",

37 "MemoryObjectStore",

38 "MissingObjectFinder",

39 "ObjectIterator",

40 "ObjectReachabilityProvider",

41 "ObjectStoreGraphWalker",

42 "OverlayObjectStore",

43 "PackBasedObjectStore",

44 "PackCapableObjectStore",

45 "PackContainer",

46 "commit_tree_changes",

47 "find_shallow",

48 "get_depth",

49 "iter_commit_contents",

50 "iter_tree_contents",

51 "peel_sha",

52 "read_packs_file",

53 "tree_lookup_path",

54]

56import binascii

57import os

58import stat

59import sys

60import time

61import warnings

62from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence, Set

63from contextlib import suppress

64from io import BytesIO

65from pathlib import Path

66from typing import (

67 TYPE_CHECKING,

68 BinaryIO,

69 Protocol,

70 cast,

71)

73if TYPE_CHECKING:

74 from .object_format import ObjectFormat

76from .errors import NotTreeError

77from .file import GitFile, _GitFile

78from .midx import MultiPackIndex, load_midx

79from .objects import (

80 S_ISGITLINK,

81 Blob,

82 Commit,

83 ObjectID,

84 RawObjectID,

85 ShaFile,

86 Tag,

87 Tree,

88 TreeEntry,

89 hex_to_filename,

90 hex_to_sha,

91 object_class,

92 sha_to_hex,

93 valid_hexsha,

94)

95from .pack import (

96 PACK_SPOOL_FILE_MAX_SIZE,

97 ObjectContainer,

98 Pack,

99 PackData,

100 PackedObjectContainer,

101 PackFileDisappeared,

102 PackHint,

103 PackIndexer,

104 PackInflater,

105 PackStreamCopier,

106 UnpackedObject,

107 extend_pack,

108 full_unpacked_object,

109 generate_unpacked_objects,

110 iter_sha1,

111 load_pack_index_file,

112 pack_objects_to_data,

113 write_pack_data,

114 write_pack_index,

115)

116from .protocol import DEPTH_INFINITE, PEELED_TAG_SUFFIX

117from .refs import Ref

118

119if TYPE_CHECKING:

120 from .bitmap import EWAHBitmap

121 from .commit_graph import CommitGraph

122 from .config import Config

123 from .diff_tree import RenameDetector

124 from .pack import Pack

125

126

127class GraphWalker(Protocol):

128 """Protocol for graph walker objects."""

129

130 def __next__(self) -> ObjectID | None:

131 """Return the next object SHA to visit."""

132 ...

133

134 def ack(self, sha: ObjectID) -> None:

135 """Acknowledge that an object has been received."""

136 ...

137

138 def nak(self) -> None:

139 """Nothing in common was found."""

140 ...

141

142

143class ObjectReachabilityProvider(Protocol):

144 """Protocol for computing object reachability queries.

145

146 This abstraction allows reachability computations to be backed by either

147 naive graph traversal or optimized bitmap indexes, with a consistent interface.

148 """

149

150 def get_reachable_commits(

151 self,

152 heads: Iterable[ObjectID],

153 exclude: Iterable[ObjectID] | None = None,

154 shallow: Set[ObjectID] | None = None,

155 ) -> set[ObjectID]:

156 """Get all commits reachable from heads, excluding those in exclude.

157

158 Args:

159 heads: Starting commit SHAs

160 exclude: Commit SHAs to exclude (and their ancestors)

161 shallow: Set of shallow commit boundaries (traversal stops here)

162

163 Returns:

164 Set of commit SHAs reachable from heads but not from exclude

165 """

166 ...

167

168 def get_reachable_objects(

169 self,

170 commits: Iterable[ObjectID],

171 exclude_commits: Iterable[ObjectID] | None = None,

172 ) -> set[ObjectID]:

173 """Get all objects (commits + trees + blobs) reachable from commits.

174

175 Args:

176 commits: Starting commit SHAs

177 exclude_commits: Commits whose objects should be excluded

178

179 Returns:

180 Set of all object SHAs (commits, trees, blobs, tags)

181 """

182 ...

183

184 def get_tree_objects(

185 self,

186 tree_shas: Iterable[ObjectID],

187 ) -> set[ObjectID]:

188 """Get all trees and blobs reachable from the given trees.

189

190 Args:

191 tree_shas: Starting tree SHAs

192

193 Returns:

194 Set of tree and blob SHAs

195 """

196 ...

197

198

199INFODIR = "info"

200PACKDIR = "pack"

201

202# use permissions consistent with Git; just readable by everyone

203# TODO: should packs also be non-writable on Windows? if so, that

204# would requite some rather significant adjustments to the test suite

205PACK_MODE = 0o444 if sys.platform != "win32" else 0o644

206

207# Grace period for cleaning up temporary pack files (in seconds)

208# Matches git's default of 2 weeks

209DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks

210

211

212def find_shallow(

213 store: ObjectContainer, heads: Iterable[ObjectID], depth: int

214) -> tuple[set[ObjectID], set[ObjectID]]:

215 """Find shallow commits according to a given depth.

216

217 Args:

218 store: An ObjectStore for looking up objects.

219 heads: Iterable of head SHAs to start walking from.

220 depth: The depth of ancestors to include. A depth of one includes

221 only the heads themselves.

222 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be

223 considered shallow and unshallow according to the arguments. Note that

224 these sets may overlap if a commit is reachable along multiple paths.

225 """

226 parents: dict[ObjectID, list[ObjectID]] = {}

227 commit_graph = store.get_commit_graph()

228

229 def get_parents(sha: ObjectID) -> list[ObjectID]:

230 result = parents.get(sha, None)

231 if not result:

232 # Try to use commit graph first if available

233 if commit_graph:

234 graph_parents = commit_graph.get_parents(sha)

235 if graph_parents is not None:

236 result = graph_parents

237 parents[sha] = result

238 return result

239 # Fall back to loading the object

240 commit = store[sha]

241 assert isinstance(commit, Commit)

242 result = commit.parents

243 parents[sha] = result

244 return result

245

246 todo = [] # stack of (sha, depth)

247 for head_sha in heads:

248 obj = store[head_sha]

249 # Peel tags if necessary

250 while isinstance(obj, Tag):

251 _, sha = obj.object

252 obj = store[sha]

253 if isinstance(obj, Commit):

254 todo.append((obj.id, 1))

255

256 not_shallow = set()

257 shallow = set()

258 while todo:

259 sha, cur_depth = todo.pop()

260 if cur_depth < depth:

261 not_shallow.add(sha)

262 new_depth = cur_depth + 1

263 todo.extend((p, new_depth) for p in get_parents(sha))

264 else:

265 shallow.add(sha)

266

267 return shallow, not_shallow

268

269

270def get_depth(

271 store: ObjectContainer,

272 head: ObjectID,

273 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents,

274 max_depth: int | None = None,

275) -> int:

276 """Return the current available depth for the given head.

277

278 For commits with multiple parents, the largest possible depth will be

279 returned.

280

281 Args:

282 store: Object store to search in

283 head: commit to start from

284 get_parents: optional function for getting the parents of a commit

285 max_depth: maximum depth to search

286 """

287 if head not in store:

288 return 0

289 current_depth = 1

290 queue = [(head, current_depth)]

291 commit_graph = store.get_commit_graph()

292

293 while queue and (max_depth is None or current_depth < max_depth):

294 e, depth = queue.pop(0)

295 current_depth = max(current_depth, depth)

296

297 # Try to use commit graph for parent lookup if available

298 parents = None

299 if commit_graph:

300 parents = commit_graph.get_parents(e)

301

302 if parents is None:

303 # Fall back to loading the object

304 cmt = store[e]

305 if isinstance(cmt, Tag):

306 _cls, sha = cmt.object

307 cmt = store[sha]

308 parents = get_parents(cmt)

309

310 queue.extend((parent, depth + 1) for parent in parents if parent in store)

311 return current_depth

312

313

314class PackContainer(Protocol):

315 """Protocol for containers that can accept pack files."""

316

317 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:

318 """Add a new pack."""

319

320

321class BaseObjectStore:

322 """Object store interface."""

323

324 def __init__(self, *, object_format: "ObjectFormat | None" = None) -> None:

325 """Initialize object store.

326

327 Args:

328 object_format: Object format to use (defaults to DEFAULT_OBJECT_FORMAT)

329 """

330 from .object_format import DEFAULT_OBJECT_FORMAT

331

332 self.object_format = object_format if object_format else DEFAULT_OBJECT_FORMAT

333

334 def determine_wants_all(

335 self, refs: Mapping[Ref, ObjectID], depth: int | None = None

336 ) -> list[ObjectID]:

337 """Determine which objects are wanted based on refs."""

338

339 def _want_deepen(sha: ObjectID) -> bool:

340 if not depth:

341 return False

342 if depth == DEPTH_INFINITE:

343 return True

344 return depth > self._get_depth(sha)

345

346 return [

347 sha

348 for (ref, sha) in refs.items()

349 if (sha not in self or _want_deepen(sha))

350 and not ref.endswith(PEELED_TAG_SUFFIX)

351 ]

352

353 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:

354 """Check if a particular object is present by SHA1 and is loose."""

355 raise NotImplementedError(self.contains_loose)

356

357 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:

358 """Check if a particular object is present by SHA1 and is packed."""

359 return False # Default implementation for stores that don't support packing

360

361 def __contains__(self, sha1: ObjectID | RawObjectID) -> bool:

362 """Check if a particular object is present by SHA1.

363

364 This method makes no distinction between loose and packed objects.

365 """

366 return self.contains_loose(sha1)

367

368 @property

369 def packs(self) -> list[Pack]:

370 """Iterable of pack objects."""

371 raise NotImplementedError

372

373 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:

374 """Obtain the raw text for an object.

375

376 Args:

377 name: sha for the object.

378 Returns: tuple with numeric type and object contents.

379 """

380 raise NotImplementedError(self.get_raw)

381

382 def __getitem__(self, sha1: ObjectID | RawObjectID) -> ShaFile:

383 """Obtain an object by SHA1."""

384 type_num, uncomp = self.get_raw(sha1)

385 return ShaFile.from_raw_string(

386 type_num, uncomp, sha=sha1, object_format=self.object_format

387 )

388

389 def __iter__(self) -> Iterator[ObjectID]:

390 """Iterate over the SHAs that are present in this store."""

391 raise NotImplementedError(self.__iter__)

392

393 def add_object(self, obj: ShaFile) -> None:

394 """Add a single object to this object store."""

395 raise NotImplementedError(self.add_object)

396

397 def add_objects(

398 self,

399 objects: Sequence[tuple[ShaFile, str | None]],

400 progress: Callable[..., None] | None = None,

401 ) -> "Pack | None":

402 """Add a set of objects to this object store.

403

404 Args:

405 objects: Iterable over a list of (object, path) tuples

406 progress: Optional progress callback

407 """

408 raise NotImplementedError(self.add_objects)

409

410 def get_reachability_provider(

411 self, prefer_bitmap: bool = True

412 ) -> ObjectReachabilityProvider:

413 """Get a reachability provider for this object store.

414

415 Returns an ObjectReachabilityProvider that can efficiently compute

416 object reachability queries. Subclasses can override this to provide

417 optimized implementations (e.g., using bitmap indexes).

418

419 Args:

420 prefer_bitmap: Whether to prefer bitmap-based reachability if

421 available.

422

423 Returns:

424 ObjectReachabilityProvider instance

425 """

426 return GraphTraversalReachability(self)

427

428 def tree_changes(

429 self,

430 source: ObjectID | None,

431 target: ObjectID | None,

432 want_unchanged: bool = False,

433 include_trees: bool = False,

434 change_type_same: bool = False,

435 rename_detector: "RenameDetector | None" = None,

436 paths: Sequence[bytes] | None = None,

437 ) -> Iterator[

438 tuple[

439 tuple[bytes | None, bytes | None],

440 tuple[int | None, int | None],

441 tuple[ObjectID | None, ObjectID | None],

442 ]

443 ]:

444 """Find the differences between the contents of two trees.

445

446 Args:

447 source: SHA1 of the source tree

448 target: SHA1 of the target tree

449 want_unchanged: Whether unchanged files should be reported

450 include_trees: Whether to include trees

451 change_type_same: Whether to report files changing

452 type in the same entry.

453 rename_detector: RenameDetector object for detecting renames.

454 paths: Optional list of paths to filter to (as bytes).

455 Returns: Iterator over tuples with

456 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)

457 """

458 from .diff_tree import tree_changes

459

460 for change in tree_changes(

461 self,

462 source,

463 target,

464 want_unchanged=want_unchanged,

465 include_trees=include_trees,

466 change_type_same=change_type_same,

467 rename_detector=rename_detector,

468 paths=paths,

469 ):

470 old_path = change.old.path if change.old is not None else None

471 new_path = change.new.path if change.new is not None else None

472 old_mode = change.old.mode if change.old is not None else None

473 new_mode = change.new.mode if change.new is not None else None

474 old_sha = change.old.sha if change.old is not None else None

475 new_sha = change.new.sha if change.new is not None else None

476 yield (

477 (old_path, new_path),

478 (old_mode, new_mode),

479 (old_sha, new_sha),

480 )

481

482 def iter_tree_contents(

483 self, tree_id: ObjectID, include_trees: bool = False

484 ) -> Iterator[TreeEntry]:

485 """Iterate the contents of a tree and all subtrees.

486

487 Iteration is depth-first pre-order, as in e.g. os.walk.

488

489 Args:

490 tree_id: SHA1 of the tree.

491 include_trees: If True, include tree objects in the iteration.

492 Returns: Iterator over TreeEntry namedtuples for all the objects in a

493 tree.

494 """

495 warnings.warn(

496 "Please use dulwich.object_store.iter_tree_contents",

497 DeprecationWarning,

498 stacklevel=2,

499 )

500 return iter_tree_contents(self, tree_id, include_trees=include_trees)

501

502 def iterobjects_subset(

503 self, shas: Iterable[ObjectID], *, allow_missing: bool = False

504 ) -> Iterator[ShaFile]:

505 """Iterate over a subset of objects in the store.

506

507 Args:

508 shas: Iterable of object SHAs to retrieve

509 allow_missing: If True, skip missing objects; if False, raise KeyError

510

511 Returns:

512 Iterator of ShaFile objects

513

514 Raises:

515 KeyError: If an object is missing and allow_missing is False

516 """

517 for sha in shas:

518 try:

519 yield self[sha]

520 except KeyError:

521 if not allow_missing:

522 raise

523

524 def iter_unpacked_subset(

525 self,

526 shas: Iterable[ObjectID | RawObjectID],

527 include_comp: bool = False,

528 allow_missing: bool = False,

529 convert_ofs_delta: bool = True,

530 ) -> "Iterator[UnpackedObject]":

531 """Iterate over unpacked objects for a subset of SHAs.

532

533 Default implementation that converts ShaFile objects to UnpackedObject.

534 Subclasses may override for more efficient unpacked access.

535

536 Args:

537 shas: Iterable of object SHAs to retrieve

538 include_comp: Whether to include compressed data (ignored in base

539 implementation)

540 allow_missing: If True, skip missing objects; if False, raise

541 KeyError

542 convert_ofs_delta: Whether to convert OFS_DELTA objects (ignored in

543 base implementation)

544

545 Returns:

546 Iterator of UnpackedObject instances

547

548 Raises:

549 KeyError: If an object is missing and allow_missing is False

550 """

551 from .pack import UnpackedObject

552

553 for sha in shas:

554 try:

555 obj = self[sha]

556 # Convert ShaFile to UnpackedObject

557 unpacked = UnpackedObject(

558 obj.type_num, decomp_chunks=obj.as_raw_chunks(), sha=obj.id

559 )

560 yield unpacked

561 except KeyError:

562 if not allow_missing:

563 raise

564

565 def find_missing_objects(

566 self,

567 haves: Iterable[ObjectID],

568 wants: Iterable[ObjectID],

569 shallow: Set[ObjectID] | None = None,

570 progress: Callable[..., None] | None = None,

571 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None,

572 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents,

573 ) -> Iterator[tuple[ObjectID, PackHint | None]]:

574 """Find the missing objects required for a set of revisions.

575

576 Args:

577 haves: Iterable over SHAs already in common.

578 wants: Iterable over SHAs of objects to fetch.

579 shallow: Set of shallow commit SHA1s to skip

580 progress: Simple progress function that will be called with

581 updated progress strings.

582 get_tagged: Function that returns a dict of pointed-to sha ->

583 tag sha for including tags.

584 get_parents: Optional function for getting the parents of a

585 commit.

586 Returns: Iterator over (sha, path) pairs.

587 """

588 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning)

589 finder = MissingObjectFinder(

590 self,

591 haves=haves,

592 wants=wants,

593 shallow=shallow,

594 progress=progress,

595 get_tagged=get_tagged,

596 get_parents=get_parents,

597 )

598 return iter(finder)

599

600 def find_common_revisions(self, graphwalker: GraphWalker) -> list[ObjectID]:

601 """Find which revisions this store has in common using graphwalker.

602

603 Args:

604 graphwalker: A graphwalker object.

605 Returns: List of SHAs that are in common

606 """

607 haves = []

608 sha = next(graphwalker)

609 while sha:

610 if sha in self:

611 haves.append(sha)

612 graphwalker.ack(sha)

613 sha = next(graphwalker)

614 return haves

615

616 def generate_pack_data(

617 self,

618 have: Iterable[ObjectID],

619 want: Iterable[ObjectID],

620 *,

621 shallow: Set[ObjectID] | None = None,

622 progress: Callable[..., None] | None = None,

623 ofs_delta: bool = True,

624 ) -> tuple[int, Iterator[UnpackedObject]]:

625 """Generate pack data objects for a set of wants/haves.

626

627 Args:

628 have: List of SHA1s of objects that should not be sent

629 want: List of SHA1s of objects that should be sent

630 shallow: Set of shallow commit SHA1s to skip

631 ofs_delta: Whether OFS deltas can be included

632 progress: Optional progress reporting method

633 """

634 # Note that the pack-specific implementation below is more efficient,

635 # as it reuses deltas

636 missing_objects = MissingObjectFinder(

637 self, haves=have, wants=want, shallow=shallow, progress=progress

638 )

639 object_ids = list(missing_objects)

640 return pack_objects_to_data(

641 [(self[oid], path) for oid, path in object_ids],

642 ofs_delta=ofs_delta,

643 progress=progress,

644 )

645

646 def peel_sha(self, sha: ObjectID | RawObjectID) -> ObjectID:

647 """Peel all tags from a SHA.

648

649 Args:

650 sha: The object SHA to peel.

651 Returns: The fully-peeled SHA1 of a tag object, after peeling all

652 intermediate tags; if the original ref does not point to a tag,

653 this will equal the original SHA1.

654 """

655 warnings.warn(

656 "Please use dulwich.object_store.peel_sha()",

657 DeprecationWarning,

658 stacklevel=2,

659 )

660 return peel_sha(self, sha)[1].id

661

662 def _get_depth(

663 self,

664 head: ObjectID,

665 get_parents: Callable[..., list[ObjectID]] = lambda commit: commit.parents,

666 max_depth: int | None = None,

667 ) -> int:

668 """Return the current available depth for the given head.

669

670 For commits with multiple parents, the largest possible depth will be

671 returned.

672

673 Args:

674 head: commit to start from

675 get_parents: optional function for getting the parents of a commit

676 max_depth: maximum depth to search

677 """

678 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth)

679

680 def close(self) -> None:

681 """Close any files opened by this object store."""

682 # Default implementation is a NO-OP

683

684 def prune(self, grace_period: int | None = None) -> None:

685 """Prune/clean up this object store.

686

687 This includes removing orphaned temporary files and other

688 housekeeping tasks. Default implementation is a NO-OP.

689

690 Args:

691 grace_period: Grace period in seconds for removing temporary files.

692 If None, uses the default grace period.

693 """

694 # Default implementation is a NO-OP

695

696 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:

697 """Iterate over all SHA1s that start with a given prefix.

698

699 The default implementation is a naive iteration over all objects.

700 However, subclasses may override this method with more efficient

701 implementations.

702 """

703 for sha in self:

704 if sha.startswith(prefix):

705 yield sha

706

707 def get_commit_graph(self) -> "CommitGraph | None":

708 """Get the commit graph for this object store.

709

710 Returns:

711 CommitGraph object if available, None otherwise

712 """

713 return None

714

715 def write_commit_graph(

716 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True

717 ) -> None:

718 """Write a commit graph file for this object store.

719

720 Args:

721 refs: List of refs to include. If None, includes all refs from object store.

722 reachable: If True, includes all commits reachable from refs.

723 If False, only includes the direct ref targets.

724

725 Note:

726 Default implementation does nothing. Subclasses should override

727 this method to provide commit graph writing functionality.

728 """

729 raise NotImplementedError(self.write_commit_graph)

730

731 def get_object_mtime(self, sha: ObjectID) -> float:

732 """Get the modification time of an object.

733

734 Args:

735 sha: SHA1 of the object

736

737 Returns:

738 Modification time as seconds since epoch

739

740 Raises:

741 KeyError: if the object is not found

742 """

743 # Default implementation raises KeyError

744 # Subclasses should override to provide actual mtime

745 raise KeyError(sha)

746

747

748class PackCapableObjectStore(BaseObjectStore, PackedObjectContainer):

749 """Object store that supports pack operations.

750

751 This is a base class for object stores that can handle pack files,

752 including both disk-based and memory-based stores.

753 """

754

755 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:

756 """Add a new pack to this object store.

757

758 Returns: Tuple of (file, commit_func, abort_func)

759 """

760 raise NotImplementedError(self.add_pack)

761

762 def add_pack_data(

763 self,

764 count: int,

765 unpacked_objects: Iterator["UnpackedObject"],

766 progress: Callable[..., None] | None = None,

767 ) -> "Pack | None":

768 """Add pack data to this object store.

769

770 Args:

771 count: Number of objects

772 unpacked_objects: Iterator over unpacked objects

773 progress: Optional progress callback

774 """

775 raise NotImplementedError(self.add_pack_data)

776

777 def get_unpacked_object(

778 self, sha1: ObjectID | RawObjectID, *, include_comp: bool = False

779 ) -> "UnpackedObject":

780 """Get a raw unresolved object.

781

782 Args:

783 sha1: SHA-1 hash of the object

784 include_comp: Whether to include compressed data

785

786 Returns:

787 UnpackedObject instance

788 """

789 from .pack import UnpackedObject

790

791 obj = self[sha1]

792 return UnpackedObject(obj.type_num, sha=sha1, decomp_chunks=obj.as_raw_chunks())

793

794 def iterobjects_subset(

795 self, shas: Iterable[ObjectID], *, allow_missing: bool = False

796 ) -> Iterator[ShaFile]:

797 """Iterate over a subset of objects.

798

799 Args:

800 shas: Iterable of object SHAs to retrieve

801 allow_missing: If True, skip missing objects

802

803 Returns:

804 Iterator of ShaFile objects

805 """

806 for sha in shas:

807 try:

808 yield self[sha]

809 except KeyError:

810 if not allow_missing:

811 raise

812

813

814class PackBasedObjectStore(PackCapableObjectStore, PackedObjectContainer):

815 """Object store that uses pack files for storage.

816

817 This class provides a base implementation for object stores that use

818 Git pack files as their primary storage mechanism. It handles caching

819 of open pack files and provides configuration for pack file operations.

820 """

821

822 def __init__(

823 self,

824 pack_compression_level: int = -1,

825 pack_index_version: int | None = None,

826 pack_delta_window_size: int | None = None,

827 pack_window_memory: int | None = None,

828 pack_delta_cache_size: int | None = None,

829 pack_depth: int | None = None,

830 pack_threads: int | None = None,

831 pack_big_file_threshold: int | None = None,

832 *,

833 object_format: "ObjectFormat | None" = None,

834 ) -> None:

835 """Initialize a PackBasedObjectStore.

836

837 Args:

838 pack_compression_level: Compression level for pack files (-1 to 9)

839 pack_index_version: Pack index version to use

840 pack_delta_window_size: Window size for delta compression

841 pack_window_memory: Maximum memory to use for delta window

842 pack_delta_cache_size: Cache size for delta operations

843 pack_depth: Maximum depth for pack deltas

844 pack_threads: Number of threads to use for packing

845 pack_big_file_threshold: Threshold for treating files as "big"

846 object_format: Hash algorithm to use

847 """

848 super().__init__(object_format=object_format)

849 self._pack_cache: dict[str, Pack] = {}

850 self.pack_compression_level = pack_compression_level

851 self.pack_index_version = pack_index_version

852 self.pack_delta_window_size = pack_delta_window_size

853 self.pack_window_memory = pack_window_memory

854 self.pack_delta_cache_size = pack_delta_cache_size

855 self.pack_depth = pack_depth

856 self.pack_threads = pack_threads

857 self.pack_big_file_threshold = pack_big_file_threshold

858

859 def get_reachability_provider(

860 self,

861 prefer_bitmaps: bool = True,

862 ) -> ObjectReachabilityProvider:

863 """Get the best reachability provider for the object store.

864

865 Args:

866 prefer_bitmaps: Whether to use bitmaps if available

867

868 Returns:

869 ObjectReachabilityProvider implementation (either bitmap-accelerated

870 or graph traversal)

871 """

872 if prefer_bitmaps:

873 # Check if any packs have bitmaps

874 has_bitmap = False

875 for pack in self.packs:

876 try:

877 # Try to access bitmap property

878 if pack.bitmap is not None:

879 has_bitmap = True

880 break

881 except FileNotFoundError:

882 # Bitmap file doesn't exist for this pack

883 continue

884

885 if has_bitmap:

886 return BitmapReachability(self)

887

888 # Fall back to graph traversal

889 return GraphTraversalReachability(self)

890

891 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:

892 """Add a new pack to this object store."""

893 raise NotImplementedError(self.add_pack)

894

895 def add_pack_data(

896 self,

897 count: int,

898 unpacked_objects: Iterator[UnpackedObject],

899 progress: Callable[..., None] | None = None,

900 ) -> "Pack | None":

901 """Add pack data to this object store.

902

903 Args:

904 count: Number of items to add

905 unpacked_objects: Iterator of UnpackedObject instances

906 progress: Optional progress callback

907 """

908 if count == 0:

909 # Don't bother writing an empty pack file

910 return None

911 f, commit, abort = self.add_pack()

912 try:

913 write_pack_data(

914 f.write,

915 unpacked_objects,

916 num_records=count,

917 progress=progress,

918 compression_level=self.pack_compression_level,

919 object_format=self.object_format,

920 )

921 except BaseException:

922 abort()

923 raise

924 else:

925 return commit()

926

927 @property

928 def alternates(self) -> list["BaseObjectStore"]:

929 """Return list of alternate object stores."""

930 return []

931

932 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:

933 """Check if a particular object is present by SHA1 and is packed.

934

935 This does not check alternates.

936 """

937 for pack in self.packs:

938 try:

939 if sha in pack:

940 return True

941 except PackFileDisappeared:

942 pass

943 return False

944

945 def __contains__(self, sha: ObjectID | RawObjectID) -> bool:

946 """Check if a particular object is present by SHA1.

947

948 This method makes no distinction between loose and packed objects.

949 """

950 if self.contains_packed(sha) or self.contains_loose(sha):

951 return True

952 for alternate in self.alternates:

953 if sha in alternate:

954 return True

955 return False

956

957 def _add_cached_pack(self, base_name: str, pack: Pack) -> None:

958 """Add a newly appeared pack to the cache by path."""

959 prev_pack = self._pack_cache.get(base_name)

960 if prev_pack is not pack:

961 self._pack_cache[base_name] = pack

962 if prev_pack:

963 prev_pack.close()

964

965 def generate_pack_data(

966 self,

967 have: Iterable[ObjectID],

968 want: Iterable[ObjectID],

969 *,

970 shallow: Set[ObjectID] | None = None,

971 progress: Callable[..., None] | None = None,

972 ofs_delta: bool = True,

973 ) -> tuple[int, Iterator[UnpackedObject]]:

974 """Generate pack data objects for a set of wants/haves.

975

976 Args:

977 have: List of SHA1s of objects that should not be sent

978 want: List of SHA1s of objects that should be sent

979 shallow: Set of shallow commit SHA1s to skip

980 ofs_delta: Whether OFS deltas can be included

981 progress: Optional progress reporting method

982 """

983 missing_objects = MissingObjectFinder(

984 self, haves=have, wants=want, shallow=shallow, progress=progress

985 )

986 remote_has = missing_objects.get_remote_has()

987 object_ids = list(missing_objects)

988 return len(object_ids), generate_unpacked_objects(

989 self,

990 object_ids,

991 progress=progress,

992 ofs_delta=ofs_delta,

993 other_haves=remote_has,

994 )

995

996 def _clear_cached_packs(self) -> None:

997 pack_cache = self._pack_cache

998 self._pack_cache = {}

999 while pack_cache:

1000 (_name, pack) = pack_cache.popitem()

1001 pack.close()

1002

1003 def _iter_cached_packs(self) -> Iterator[Pack]:

1004 return iter(self._pack_cache.values())

1005

1006 def _update_pack_cache(self) -> list[Pack]:

1007 raise NotImplementedError(self._update_pack_cache)

1008

1009 def close(self) -> None:

1010 """Close the object store and release resources.

1011

1012 This method closes all cached pack files and frees associated resources.

1013 Can be called multiple times safely.

1014 """

1015 self._clear_cached_packs()

1016

1017 def __del__(self) -> None:

1018 """Warn if the object store is being deleted with unclosed packs."""

1019 if self._pack_cache:

1020 import warnings

1021

1022 warnings.warn(

1023 f"ObjectStore {self!r} was destroyed with {len(self._pack_cache)} "

1024 "unclosed pack(s). Please call close() explicitly.",

1025 ResourceWarning,

1026 stacklevel=2,

1027 )

1028 self.close()

1029

1030 @property

1031 def packs(self) -> list[Pack]:

1032 """List with pack objects."""

1033 return list(self._iter_cached_packs()) + list(self._update_pack_cache())

1034

1035 def count_pack_files(self) -> int:

1036 """Count the number of pack files.

1037

1038 Returns:

1039 Number of pack files (excluding those with .keep files)

1040 """

1041 count = 0

1042 for pack in self.packs:

1043 # Check if there's a .keep file for this pack

1044 keep_path = pack._basename + ".keep"

1045 if not os.path.exists(keep_path):

1046 count += 1

1047 return count

1048

1049 def _iter_alternate_objects(self) -> Iterator[ObjectID]:

1050 """Iterate over the SHAs of all the objects in alternate stores."""

1051 for alternate in self.alternates:

1052 yield from alternate

1053

1054 def _iter_loose_objects(self) -> Iterator[ObjectID]:

1055 """Iterate over the SHAs of all loose objects."""

1056 raise NotImplementedError(self._iter_loose_objects)

1057

1058 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None:

1059 raise NotImplementedError(self._get_loose_object)

1060

1061 def delete_loose_object(self, sha: ObjectID) -> None:

1062 """Delete a loose object.

1063

1064 This method only handles loose objects. For packed objects,

1065 use repack(exclude=...) to exclude them during repacking.

1066 """

1067 raise NotImplementedError(self.delete_loose_object)

1068

1069 def _remove_pack(self, pack: "Pack") -> None:

1070 raise NotImplementedError(self._remove_pack)

1071

1072 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int:

1073 """Pack loose objects.

1074

1075 Args:

1076 progress: Optional progress reporting callback

1077

1078 Returns: Number of objects packed

1079 """

1080 objects: list[tuple[ShaFile, None]] = []

1081 for sha in self._iter_loose_objects():

1082 obj = self._get_loose_object(sha)

1083 if obj is not None:

1084 objects.append((obj, None))

1085 self.add_objects(objects, progress=progress)

1086 for obj, path in objects:

1087 self.delete_loose_object(obj.id)

1088 return len(objects)

1089

1090 def repack(

1091 self,

1092 exclude: Set[bytes] | None = None,

1093 progress: Callable[[str], None] | None = None,

1094 ) -> int:

1095 """Repack the packs in this repository.

1096

1097 Note that this implementation is fairly naive and currently keeps all

1098 objects in memory while it repacks.

1099

1100 Args:

1101 exclude: Optional set of object SHAs to exclude from repacking

1102 progress: Optional progress reporting callback

1103 """

1104 if exclude is None:

1105 exclude = set()

1106

1107 loose_objects = set()

1108 excluded_loose_objects = set()

1109 for sha in self._iter_loose_objects():

1110 if sha not in exclude:

1111 obj = self._get_loose_object(sha)

1112 if obj is not None:

1113 loose_objects.add(obj)

1114 else:

1115 excluded_loose_objects.add(sha)

1116

1117 objects: set[tuple[ShaFile, None]] = {(obj, None) for obj in loose_objects}

1118 old_packs = {p.name(): p for p in self.packs}

1119 for name, pack in old_packs.items():

1120 objects.update(

1121 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude

1122 )

1123

1124 # Only create a new pack if there are objects to pack

1125 if objects:

1126 # The name of the consolidated pack might match the name of a

1127 # pre-existing pack. Take care not to remove the newly created

1128 # consolidated pack.

1129 consolidated = self.add_objects(list(objects), progress=progress)

1130 if consolidated is not None:

1131 old_packs.pop(consolidated.name(), None)

1132

1133 # Delete loose objects that were packed

1134 for obj in loose_objects:

1135 if obj is not None:

1136 self.delete_loose_object(obj.id)

1137 # Delete excluded loose objects

1138 for sha in excluded_loose_objects:

1139 self.delete_loose_object(sha)

1140 for name, pack in old_packs.items():

1141 self._remove_pack(pack)

1142 self._update_pack_cache()

1143 return len(objects)

1144

1145 def generate_pack_bitmaps(

1146 self,

1147 refs: dict[Ref, ObjectID],

1148 *,

1149 commit_interval: int | None = None,

1150 progress: Callable[[str], None] | None = None,

1151 ) -> int:

1152 """Generate bitmap indexes for all packs that don't have them.

1153

1154 This generates .bitmap files for packfiles, enabling fast reachability

1155 queries. Equivalent to the bitmap generation part of 'git repack -b'.

1156

1157 Args:

1158 refs: Dictionary of ref names to commit SHAs

1159 commit_interval: Include every Nth commit in bitmap index (None for default)

1160 progress: Optional progress reporting callback

1161

1162 Returns:

1163 Number of bitmaps generated

1164 """

1165 count = 0

1166 for pack in self.packs:

1167 pack.ensure_bitmap(

1168 self, refs, commit_interval=commit_interval, progress=progress

1169 )

1170 count += 1

1171

1172 # Update cache to pick up new bitmaps

1173 self._update_pack_cache()

1174

1175 return count

1176

1177 def __iter__(self) -> Iterator[ObjectID]:

1178 """Iterate over the SHAs that are present in this store."""

1179 self._update_pack_cache()

1180 for pack in self._iter_cached_packs():

1181 try:

1182 yield from pack

1183 except PackFileDisappeared:

1184 pass

1185 yield from self._iter_loose_objects()

1186 yield from self._iter_alternate_objects()

1187

1188 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:

1189 """Check if a particular object is present by SHA1 and is loose.

1190

1191 This does not check alternates.

1192 """

1193 return self._get_loose_object(sha) is not None

1194

1195 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:

1196 """Obtain the raw fulltext for an object.

1197

1198 Args:

1199 name: sha for the object.

1200 Returns: tuple with numeric type and object contents.

1201 """

1202 sha: RawObjectID

1203 if len(name) == self.object_format.hex_length:

1204 sha = hex_to_sha(ObjectID(name))

1205 hexsha = name

1206 elif len(name) == self.object_format.oid_length:

1207 sha = RawObjectID(name)

1208 hexsha = None

1209 else:

1210 raise AssertionError(f"Invalid object name {name!r}")

1211 for pack in self._iter_cached_packs():

1212 try:

1213 return pack.get_raw(sha)

1214 except (KeyError, PackFileDisappeared):

1215 pass

1216 if hexsha is None:

1217 hexsha = sha_to_hex(sha)

1218 ret = self._get_loose_object(hexsha)

1219 if ret is not None:

1220 return ret.type_num, ret.as_raw_string()

1221 # Maybe something else has added a pack with the object

1222 # in the mean time?

1223 for pack in self._update_pack_cache():

1224 try:

1225 return pack.get_raw(sha)

1226 except KeyError:

1227 pass

1228 for alternate in self.alternates:

1229 try:

1230 return alternate.get_raw(hexsha)

1231 except KeyError:

1232 pass

1233 raise KeyError(hexsha)

1234

1235 def iter_unpacked_subset(

1236 self,

1237 shas: Iterable[ObjectID | RawObjectID],

1238 include_comp: bool = False,

1239 allow_missing: bool = False,

1240 convert_ofs_delta: bool = True,

1241 ) -> Iterator[UnpackedObject]:

1242 """Iterate over a subset of objects, yielding UnpackedObject instances.

1243

1244 Args:

1245 shas: Set of object SHAs to retrieve

1246 include_comp: Whether to include compressed data

1247 allow_missing: If True, skip missing objects; if False, raise KeyError

1248 convert_ofs_delta: Whether to convert OFS_DELTA objects

1249

1250 Returns:

1251 Iterator of UnpackedObject instances

1252

1253 Raises:

1254 KeyError: If an object is missing and allow_missing is False

1255 """

1256 todo: set[ObjectID | RawObjectID] = set(shas)

1257 for p in self._iter_cached_packs():

1258 for unpacked in p.iter_unpacked_subset(

1259 todo,

1260 include_comp=include_comp,

1261 allow_missing=True,

1262 convert_ofs_delta=convert_ofs_delta,

1263 ):

1264 yield unpacked

1265 hexsha = sha_to_hex(unpacked.sha())

1266 todo.remove(hexsha)

1267 # Maybe something else has added a pack with the object

1268 # in the mean time?

1269 for p in self._update_pack_cache():

1270 for unpacked in p.iter_unpacked_subset(

1271 todo,

1272 include_comp=include_comp,

1273 allow_missing=True,

1274 convert_ofs_delta=convert_ofs_delta,

1275 ):

1276 yield unpacked

1277 hexsha = sha_to_hex(unpacked.sha())

1278 todo.remove(hexsha)

1279 for alternate in self.alternates:

1280 assert isinstance(alternate, PackBasedObjectStore)

1281 for unpacked in alternate.iter_unpacked_subset(

1282 todo,

1283 include_comp=include_comp,

1284 allow_missing=True,

1285 convert_ofs_delta=convert_ofs_delta,

1286 ):

1287 yield unpacked

1288 hexsha = sha_to_hex(unpacked.sha())

1289 todo.remove(hexsha)

1290

1291 def iterobjects_subset(

1292 self, shas: Iterable[ObjectID], *, allow_missing: bool = False

1293 ) -> Iterator[ShaFile]:

1294 """Iterate over a subset of objects in the store.

1295

1296 This method searches for objects in pack files, alternates, and loose storage.

1297

1298 Args:

1299 shas: Iterable of object SHAs to retrieve

1300 allow_missing: If True, skip missing objects; if False, raise KeyError

1301

1302 Returns:

1303 Iterator of ShaFile objects

1304

1305 Raises:

1306 KeyError: If an object is missing and allow_missing is False

1307 """

1308 todo: set[ObjectID] = set(shas)

1309 for p in self._iter_cached_packs():

1310 for o in p.iterobjects_subset(todo, allow_missing=True):

1311 yield o

1312 todo.remove(o.id)

1313 # Maybe something else has added a pack with the object

1314 # in the mean time?

1315 for p in self._update_pack_cache():

1316 for o in p.iterobjects_subset(todo, allow_missing=True):

1317 yield o

1318 todo.remove(o.id)

1319 for alternate in self.alternates:

1320 for o in alternate.iterobjects_subset(todo, allow_missing=True):

1321 yield o

1322 todo.remove(o.id)

1323 for oid in todo:

1324 loose_obj: ShaFile | None = self._get_loose_object(oid)

1325 if loose_obj is not None:

1326 yield loose_obj

1327 elif not allow_missing:

1328 raise KeyError(oid)

1329

1330 def get_unpacked_object(

1331 self, sha1: bytes, *, include_comp: bool = False

1332 ) -> UnpackedObject:

1333 """Obtain the unpacked object.

1334

1335 Args:

1336 sha1: sha for the object.

1337 include_comp: Whether to include compression metadata.

1338 """

1339 if len(sha1) == self.object_format.hex_length:

1340 sha = hex_to_sha(cast(ObjectID, sha1))

1341 hexsha = cast(ObjectID, sha1)

1342 elif len(sha1) == self.object_format.oid_length:

1343 sha = cast(RawObjectID, sha1)

1344 hexsha = None

1345 else:

1346 raise AssertionError(f"Invalid object sha1 {sha1!r}")

1347 for pack in self._iter_cached_packs():

1348 try:

1349 return pack.get_unpacked_object(sha, include_comp=include_comp)

1350 except (KeyError, PackFileDisappeared):

1351 pass

1352 if hexsha is None:

1353 hexsha = sha_to_hex(sha)

1354 # Maybe something else has added a pack with the object

1355 # in the mean time?

1356 for pack in self._update_pack_cache():

1357 try:

1358 return pack.get_unpacked_object(sha, include_comp=include_comp)

1359 except KeyError:

1360 pass

1361 for alternate in self.alternates:

1362 assert isinstance(alternate, PackBasedObjectStore)

1363 try:

1364 return alternate.get_unpacked_object(hexsha, include_comp=include_comp)

1365 except KeyError:

1366 pass

1367 raise KeyError(hexsha)

1368

1369 def add_objects(

1370 self,

1371 objects: Sequence[tuple[ShaFile, str | None]],

1372 progress: Callable[[str], None] | None = None,

1373 ) -> "Pack | None":

1374 """Add a set of objects to this object store.

1375

1376 Args:

1377 objects: Iterable over (object, path) tuples, should support

1378 __len__.

1379 progress: Optional progress reporting function.

1380 Returns: Pack object of the objects written.

1381 """

1382 count = len(objects)

1383 record_iter = (full_unpacked_object(o) for (o, p) in objects)

1384 return self.add_pack_data(count, record_iter, progress=progress)

1385

1386

1387class DiskObjectStore(PackBasedObjectStore):

1388 """Git-style object store that exists on disk."""

1389

1390 path: str | os.PathLike[str]

1391 pack_dir: str | os.PathLike[str]

1392 _alternates: "list[BaseObjectStore] | None"

1393 _commit_graph: "CommitGraph | None"

1394

1395 def __init__(

1396 self,

1397 path: str | os.PathLike[str],

1398 *,

1399 loose_compression_level: int = -1,

1400 pack_compression_level: int = -1,

1401 pack_index_version: int | None = None,

1402 pack_delta_window_size: int | None = None,

1403 pack_window_memory: int | None = None,

1404 pack_delta_cache_size: int | None = None,

1405 pack_depth: int | None = None,

1406 pack_threads: int | None = None,

1407 pack_big_file_threshold: int | None = None,

1408 fsync_object_files: bool = False,

1409 pack_write_bitmaps: bool = False,

1410 pack_write_bitmap_hash_cache: bool = True,

1411 pack_write_bitmap_lookup_table: bool = True,

1412 file_mode: int | None = None,

1413 dir_mode: int | None = None,

1414 object_format: "ObjectFormat | None" = None,

1415 ) -> None:

1416 """Open an object store.

1417

1418 Args:

1419 path: Path of the object store.

1420 loose_compression_level: zlib compression level for loose objects

1421 pack_compression_level: zlib compression level for pack objects

1422 pack_index_version: pack index version to use (1, 2, or 3)

1423 pack_delta_window_size: sliding window size for delta compression

1424 pack_window_memory: memory limit for delta window operations

1425 pack_delta_cache_size: size of cache for delta operations

1426 pack_depth: maximum delta chain depth

1427 pack_threads: number of threads for pack operations

1428 pack_big_file_threshold: threshold for treating files as big

1429 fsync_object_files: whether to fsync object files for durability

1430 pack_write_bitmaps: whether to write bitmap indexes for packs

1431 pack_write_bitmap_hash_cache: whether to include name-hash cache in bitmaps

1432 pack_write_bitmap_lookup_table: whether to include lookup table in bitmaps

1433 file_mode: File permission mask for shared repository

1434 dir_mode: Directory permission mask for shared repository

1435 object_format: Hash algorithm to use (SHA1 or SHA256)

1436 """

1437 # Import here to avoid circular dependency

1438 from .object_format import DEFAULT_OBJECT_FORMAT

1439

1440 super().__init__(

1441 pack_compression_level=pack_compression_level,

1442 pack_index_version=pack_index_version,

1443 pack_delta_window_size=pack_delta_window_size,

1444 pack_window_memory=pack_window_memory,

1445 pack_delta_cache_size=pack_delta_cache_size,

1446 pack_depth=pack_depth,

1447 pack_threads=pack_threads,

1448 pack_big_file_threshold=pack_big_file_threshold,

1449 object_format=object_format if object_format else DEFAULT_OBJECT_FORMAT,

1450 )

1451 self.path = path

1452 self.pack_dir = os.path.join(self.path, PACKDIR)

1453 self._alternates = None

1454 self.loose_compression_level = loose_compression_level

1455 self.pack_compression_level = pack_compression_level

1456 self.pack_index_version = pack_index_version

1457 self.fsync_object_files = fsync_object_files

1458 self.pack_write_bitmaps = pack_write_bitmaps

1459 self.pack_write_bitmap_hash_cache = pack_write_bitmap_hash_cache

1460 self.pack_write_bitmap_lookup_table = pack_write_bitmap_lookup_table

1461 self.file_mode = file_mode

1462 self.dir_mode = dir_mode

1463

1464 # Commit graph support - lazy loaded

1465 self._commit_graph = None

1466 self._use_commit_graph = True # Default to true

1467

1468 # Multi-pack-index support - lazy loaded

1469 self._midx: MultiPackIndex | None = None

1470 self._use_midx = True # Default to true

1471

1472 def __repr__(self) -> str:

1473 """Return string representation of DiskObjectStore.

1474

1475 Returns:

1476 String representation including the store path

1477 """

1478 return f"<{self.__class__.__name__}({self.path!r})>"

1479

1480 @classmethod

1481 def from_config(

1482 cls,

1483 path: str | os.PathLike[str],

1484 config: "Config",

1485 *,

1486 file_mode: int | None = None,

1487 dir_mode: int | None = None,

1488 ) -> "DiskObjectStore":

1489 """Create a DiskObjectStore from a configuration object.

1490

1491 Args:

1492 path: Path to the object store directory

1493 config: Configuration object to read settings from

1494 file_mode: Optional file permission mask for shared repository

1495 dir_mode: Optional directory permission mask for shared repository

1496

1497 Returns:

1498 New DiskObjectStore instance configured according to config

1499 """

1500 try:

1501 default_compression_level = int(

1502 config.get((b"core",), b"compression").decode()

1503 )

1504 except KeyError:

1505 default_compression_level = -1

1506 try:

1507 loose_compression_level = int(

1508 config.get((b"core",), b"looseCompression").decode()

1509 )

1510 except KeyError:

1511 loose_compression_level = default_compression_level

1512 try:

1513 pack_compression_level = int(

1514 config.get((b"core",), "packCompression").decode()

1515 )

1516 except KeyError:

1517 pack_compression_level = default_compression_level

1518 try:

1519 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode())

1520 except KeyError:

1521 pack_index_version = None

1522

1523 # Read pack configuration options

1524 try:

1525 pack_delta_window_size = int(

1526 config.get((b"pack",), b"deltaWindowSize").decode()

1527 )

1528 except KeyError:

1529 pack_delta_window_size = None

1530 try:

1531 pack_window_memory = int(config.get((b"pack",), b"windowMemory").decode())

1532 except KeyError:

1533 pack_window_memory = None

1534 try:

1535 pack_delta_cache_size = int(

1536 config.get((b"pack",), b"deltaCacheSize").decode()

1537 )

1538 except KeyError:

1539 pack_delta_cache_size = None

1540 try:

1541 pack_depth = int(config.get((b"pack",), b"depth").decode())

1542 except KeyError:

1543 pack_depth = None

1544 try:

1545 pack_threads = int(config.get((b"pack",), b"threads").decode())

1546 except KeyError:

1547 pack_threads = None

1548 try:

1549 pack_big_file_threshold = int(

1550 config.get((b"pack",), b"bigFileThreshold").decode()

1551 )

1552 except KeyError:

1553 pack_big_file_threshold = None

1554

1555 # Read core.commitGraph setting

1556 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)

1557

1558 # Read core.multiPackIndex setting

1559 use_midx = config.get_boolean((b"core",), b"multiPackIndex", True)

1560

1561 # Read core.fsyncObjectFiles setting

1562 fsync_object_files = config.get_boolean((b"core",), b"fsyncObjectFiles", False)

1563

1564 # Read bitmap settings

1565 pack_write_bitmaps = config.get_boolean((b"pack",), b"writeBitmaps", False)

1566 pack_write_bitmap_hash_cache = config.get_boolean(

1567 (b"pack",), b"writeBitmapHashCache", True

1568 )

1569 pack_write_bitmap_lookup_table = config.get_boolean(

1570 (b"pack",), b"writeBitmapLookupTable", True

1571 )

1572 # Also check repack.writeBitmaps for backwards compatibility

1573 if not pack_write_bitmaps:

1574 pack_write_bitmaps = config.get_boolean(

1575 (b"repack",), b"writeBitmaps", False

1576 )

1577

1578 # Get hash algorithm from config

1579 from .object_format import get_object_format

1580

1581 object_format = None

1582 try:

1583 try:

1584 version = int(config.get((b"core",), b"repositoryformatversion"))

1585 except KeyError:

1586 version = 0

1587 if version == 1:

1588 try:

1589 object_format_name = config.get((b"extensions",), b"objectformat")

1590 except KeyError:

1591 object_format_name = b"sha1"

1592 object_format = get_object_format(object_format_name.decode("ascii"))

1593 except (KeyError, ValueError):

1594 pass

1595

1596 instance = cls(

1597 path,

1598 loose_compression_level=loose_compression_level,

1599 pack_compression_level=pack_compression_level,

1600 pack_index_version=pack_index_version,

1601 pack_delta_window_size=pack_delta_window_size,

1602 pack_window_memory=pack_window_memory,

1603 pack_delta_cache_size=pack_delta_cache_size,

1604 pack_depth=pack_depth,

1605 pack_threads=pack_threads,

1606 pack_big_file_threshold=pack_big_file_threshold,

1607 fsync_object_files=fsync_object_files,

1608 pack_write_bitmaps=pack_write_bitmaps,

1609 pack_write_bitmap_hash_cache=pack_write_bitmap_hash_cache,

1610 pack_write_bitmap_lookup_table=pack_write_bitmap_lookup_table,

1611 file_mode=file_mode,

1612 dir_mode=dir_mode,

1613 object_format=object_format,

1614 )

1615 instance._use_commit_graph = use_commit_graph

1616 instance._use_midx = use_midx

1617 return instance

1618

1619 @property

1620 def alternates(self) -> list["BaseObjectStore"]:

1621 """Get the list of alternate object stores.

1622

1623 Reads from .git/objects/info/alternates if not already cached.

1624

1625 Returns:

1626 List of DiskObjectStore instances for alternate object directories

1627 """

1628 if self._alternates is not None:

1629 return self._alternates

1630 self._alternates = []

1631 for path in self._read_alternate_paths():

1632 self._alternates.append(DiskObjectStore(path))

1633 return self._alternates

1634

1635 def _read_alternate_paths(self) -> Iterator[str]:

1636 try:

1637 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb")

1638 except FileNotFoundError:

1639 return

1640 with f:

1641 for line in f.readlines():

1642 line = line.rstrip(b"\n")

1643 if line.startswith(b"#"):

1644 continue

1645 if os.path.isabs(line):

1646 yield os.fsdecode(line)

1647 else:

1648 yield os.fsdecode(os.path.join(os.fsencode(self.path), line))

1649

1650 def add_alternate_path(self, path: str | os.PathLike[str]) -> None:

1651 """Add an alternate path to this object store."""

1652 info_dir = os.path.join(self.path, INFODIR)

1653 try:

1654 os.mkdir(info_dir)

1655 if self.dir_mode is not None:

1656 os.chmod(info_dir, self.dir_mode)

1657 except FileExistsError:

1658 pass

1659 alternates_path = os.path.join(self.path, INFODIR, "alternates")

1660 mask = self.file_mode if self.file_mode is not None else 0o644

1661 with GitFile(alternates_path, "wb", mask=mask) as f:

1662 try:

1663 orig_f = open(alternates_path, "rb")

1664 except FileNotFoundError:

1665 pass

1666 else:

1667 with orig_f:

1668 f.write(orig_f.read())

1669 f.write(os.fsencode(path) + b"\n")

1670

1671 if not os.path.isabs(path):

1672 path = os.path.join(self.path, path)

1673 self.alternates.append(DiskObjectStore(path))

1674

1675 def _update_pack_cache(self) -> list[Pack]:

1676 """Read and iterate over new pack files and cache them."""

1677 try:

1678 pack_dir_contents = os.listdir(self.pack_dir)

1679 except FileNotFoundError:

1680 return []

1681 pack_files = set()

1682 for name in pack_dir_contents:

1683 if name.startswith("pack-") and name.endswith(".pack"):

1684 # verify that idx exists first (otherwise the pack was not yet

1685 # fully written)

1686 idx_name = os.path.splitext(name)[0] + ".idx"

1687 if idx_name in pack_dir_contents:

1688 # Extract just the hash (remove "pack-" prefix and ".pack" suffix)

1689 pack_hash = name[len("pack-") : -len(".pack")]

1690 pack_files.add(pack_hash)

1691

1692 # Open newly appeared pack files

1693 new_packs = []

1694 for pack_hash in pack_files:

1695 if pack_hash not in self._pack_cache:

1696 pack = Pack(

1697 os.path.join(self.pack_dir, "pack-" + pack_hash),

1698 object_format=self.object_format,

1699 delta_window_size=self.pack_delta_window_size,

1700 window_memory=self.pack_window_memory,

1701 delta_cache_size=self.pack_delta_cache_size,

1702 depth=self.pack_depth,

1703 threads=self.pack_threads,

1704 big_file_threshold=self.pack_big_file_threshold,

1705 )

1706 new_packs.append(pack)

1707 self._pack_cache[pack_hash] = pack

1708 # Remove disappeared pack files

1709 for f in set(self._pack_cache) - pack_files:

1710 self._pack_cache.pop(f).close()

1711 return new_packs

1712

1713 def _get_shafile_path(self, sha: ObjectID | RawObjectID) -> str:

1714 # Check from object dir

1715 return hex_to_filename(os.fspath(self.path), sha)

1716

1717 def _iter_loose_objects(self) -> Iterator[ObjectID]:

1718 for base in os.listdir(self.path):

1719 if len(base) != 2:

1720 continue

1721 for rest in os.listdir(os.path.join(self.path, base)):

1722 sha = os.fsencode(base + rest)

1723 if not valid_hexsha(sha):

1724 continue

1725 yield ObjectID(sha)

1726

1727 def count_loose_objects(self) -> int:

1728 """Count the number of loose objects in the object store.

1729

1730 Returns:

1731 Number of loose objects

1732 """

1733 # Calculate expected filename length for loose

1734 # objects (excluding directory)

1735 fn_length = self.object_format.hex_length - 2

1736 count = 0

1737 if not os.path.exists(self.path):

1738 return 0

1739

1740 for i in range(256):

1741 subdir = os.path.join(self.path, f"{i:02x}")

1742 try:

1743 count += len(

1744 [name for name in os.listdir(subdir) if len(name) == fn_length]

1745 )

1746 except FileNotFoundError:

1747 # Directory may have been removed or is inaccessible

1748 continue

1749

1750 return count

1751

1752 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> ShaFile | None:

1753 path = self._get_shafile_path(sha)

1754 try:

1755 # Load the object from path with SHA and hash algorithm from object store

1756 # Convert to hex ObjectID if needed

1757 if len(sha) == self.object_format.oid_length:

1758 hex_sha: ObjectID = sha_to_hex(RawObjectID(sha))

1759 else:

1760 hex_sha = ObjectID(sha)

1761 return ShaFile.from_path(path, hex_sha, object_format=self.object_format)

1762 except FileNotFoundError:

1763 return None

1764

1765 def delete_loose_object(self, sha: ObjectID) -> None:

1766 """Delete a loose object from disk.

1767

1768 Args:

1769 sha: SHA1 of the object to delete

1770

1771 Raises:

1772 FileNotFoundError: If the object file doesn't exist

1773 """

1774 os.remove(self._get_shafile_path(sha))

1775

1776 def get_object_mtime(self, sha: ObjectID) -> float:

1777 """Get the modification time of an object.

1778

1779 Args:

1780 sha: SHA1 of the object

1781

1782 Returns:

1783 Modification time as seconds since epoch

1784

1785 Raises:

1786 KeyError: if the object is not found

1787 """

1788 # First check if it's a loose object

1789 if self.contains_loose(sha):

1790 path = self._get_shafile_path(sha)

1791 try:

1792 return os.path.getmtime(path)

1793 except FileNotFoundError:

1794 pass

1795

1796 # Check if it's in a pack file

1797 for pack in self.packs:

1798 try:

1799 if sha in pack:

1800 # Use the pack file's mtime for packed objects

1801 pack_path = pack._data_path

1802 try:

1803 return os.path.getmtime(pack_path)

1804 except (FileNotFoundError, AttributeError):

1805 pass

1806 except PackFileDisappeared:

1807 pass

1808

1809 raise KeyError(sha)

1810

1811 def _remove_pack(self, pack: Pack) -> None:

1812 try:

1813 del self._pack_cache[os.path.basename(pack._basename)]

1814 except KeyError:

1815 pass

1816 # Store paths before closing to avoid re-opening files on Windows

1817 data_path = pack._data_path

1818 idx_path = pack._idx_path

1819 pack.close()

1820 os.remove(data_path)

1821 if os.path.exists(idx_path):

1822 os.remove(idx_path)

1823

1824 def _get_pack_basepath(

1825 self, entries: Iterable[tuple[bytes, int, int | None]]

1826 ) -> str:

1827 suffix_bytes = iter_sha1(entry[0] for entry in entries)

1828 # TODO: Handle self.pack_dir being bytes

1829 suffix = suffix_bytes.decode("ascii")

1830 return os.path.join(self.pack_dir, "pack-" + suffix)

1831

1832 def _complete_pack(

1833 self,

1834 f: BinaryIO,

1835 path: str,

1836 num_objects: int,

1837 indexer: PackIndexer,

1838 progress: Callable[..., None] | None = None,

1839 refs: dict[Ref, ObjectID] | None = None,

1840 ) -> Pack:

1841 """Move a specific file containing a pack into the pack directory.

1842

1843 Note: The file should be on the same file system as the

1844 packs directory.

1845

1846 Args:

1847 f: Open file object for the pack.

1848 path: Path to the pack file.

1849 num_objects: Number of objects in the pack.

1850 indexer: A PackIndexer for indexing the pack.

1851 progress: Optional progress reporting function.

1852 refs: Optional dictionary of refs for bitmap generation.

1853 """

1854 entries = []

1855 for i, entry in enumerate(indexer):

1856 if progress is not None:

1857 progress(f"generating index: {i}/{num_objects}\r".encode("ascii"))

1858 entries.append(entry)

1859

1860 pack_sha, extra_entries = extend_pack(

1861 f,

1862 set(indexer.ext_refs()),

1863 get_raw=self.get_raw,

1864 compression_level=self.pack_compression_level,

1865 progress=progress,

1866 object_format=self.object_format,

1867 )

1868 f.flush()

1869 if self.fsync_object_files:

1870 try:

1871 fileno = f.fileno()

1872 except AttributeError as e:

1873 raise OSError("fsync requested but file has no fileno()") from e

1874 else:

1875 os.fsync(fileno)

1876 f.close()

1877

1878 entries.extend(extra_entries)

1879

1880 # Move the pack in.

1881 entries.sort()

1882 pack_base_name = self._get_pack_basepath(entries)

1883

1884 for pack in self.packs:

1885 if pack._basename == pack_base_name:

1886 return pack

1887

1888 target_pack_path = pack_base_name + ".pack"

1889 target_index_path = pack_base_name + ".idx"

1890 if sys.platform == "win32":

1891 # Windows might have the target pack file lingering. Attempt

1892 # removal, silently passing if the target does not exist.

1893 with suppress(FileNotFoundError):

1894 os.remove(target_pack_path)

1895 os.rename(path, target_pack_path)

1896

1897 # Write the index.

1898 mask = self.file_mode if self.file_mode is not None else PACK_MODE

1899 with GitFile(

1900 target_index_path,

1901 "wb",

1902 mask=mask,

1903 fsync=self.fsync_object_files,

1904 ) as index_file:

1905 write_pack_index(

1906 index_file, entries, pack_sha, version=self.pack_index_version

1907 )

1908

1909 # Generate bitmap if configured and refs are available

1910 if self.pack_write_bitmaps and refs:

1911 from .bitmap import generate_bitmap, write_bitmap

1912 from .pack import load_pack_index_file

1913

1914 if progress:

1915 progress("Generating bitmap index\r".encode("ascii"))

1916

1917 # Load the index we just wrote

1918 with open(target_index_path, "rb") as idx_file:

1919 pack_index = load_pack_index_file(

1920 os.path.basename(target_index_path),

1921 idx_file,

1922 self.object_format,

1923 )

1924

1925 # Generate the bitmap

1926 bitmap = generate_bitmap(

1927 pack_index=pack_index,

1928 object_store=self,

1929 refs=refs,

1930 pack_checksum=pack_sha,

1931 include_hash_cache=self.pack_write_bitmap_hash_cache,

1932 include_lookup_table=self.pack_write_bitmap_lookup_table,

1933 progress=lambda msg: (

1934 progress(msg.encode("ascii"))

1935 if progress and isinstance(msg, str)

1936 else None

1937 ),

1938 )

1939

1940 # Write the bitmap

1941 target_bitmap_path = pack_base_name + ".bitmap"

1942 write_bitmap(target_bitmap_path, bitmap)

1943

1944 if progress:

1945 progress("Bitmap index written\r".encode("ascii"))

1946

1947 # Add the pack to the store and return it.

1948 final_pack = Pack(

1949 pack_base_name,

1950 object_format=self.object_format,

1951 delta_window_size=self.pack_delta_window_size,

1952 window_memory=self.pack_window_memory,

1953 delta_cache_size=self.pack_delta_cache_size,

1954 depth=self.pack_depth,

1955 threads=self.pack_threads,

1956 big_file_threshold=self.pack_big_file_threshold,

1957 )

1958 final_pack.check_length_and_checksum()

1959 # Extract just the hash from pack_base_name (/path/to/pack-HASH -> HASH)

1960 pack_hash = os.path.basename(pack_base_name)[len("pack-") :]

1961 self._add_cached_pack(pack_hash, final_pack)

1962 return final_pack

1963

1964 def add_thin_pack(

1965 self,

1966 read_all: Callable[[int], bytes],

1967 read_some: Callable[[int], bytes] | None,

1968 progress: Callable[..., None] | None = None,

1969 ) -> "Pack":

1970 """Add a new thin pack to this object store.

1971

1972 Thin packs are packs that contain deltas with parents that exist

1973 outside the pack. They should never be placed in the object store

1974 directly, and always indexed and completed as they are copied.

1975

1976 Args:

1977 read_all: Read function that blocks until the number of

1978 requested bytes are read.

1979 read_some: Read function that returns at least one byte, but may

1980 not return the number of bytes requested.

1981 progress: Optional progress reporting function.

1982 Returns: A Pack object pointing at the now-completed thin pack in the

1983 objects/pack directory.

1984 """

1985 import tempfile

1986

1987 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_")

1988 with os.fdopen(fd, "w+b") as f:

1989 os.chmod(path, PACK_MODE)

1990 indexer = PackIndexer(

1991 f,

1992 self.object_format.hash_func,

1993 resolve_ext_ref=self.get_raw, # type: ignore[arg-type]

1994 )

1995 copier = PackStreamCopier(

1996 self.object_format.hash_func,

1997 read_all,

1998 read_some,

1999 f,

2000 delta_iter=indexer, # type: ignore[arg-type]

2001 )

2002 copier.verify(progress=progress)

2003 return self._complete_pack(f, path, len(copier), indexer, progress=progress)

2004

2005 def add_pack(

2006 self,

2007 ) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:

2008 """Add a new pack to this object store.

2009

2010 Returns: Fileobject to write to, a commit function to

2011 call when the pack is finished and an abort

2012 function.

2013 """

2014 import tempfile

2015

2016 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")

2017 f = os.fdopen(fd, "w+b")

2018 mask = self.file_mode if self.file_mode is not None else PACK_MODE

2019 os.chmod(path, mask)

2020

2021 def commit() -> "Pack | None":

2022 if f.tell() > 0:

2023 f.seek(0)

2024

2025 with PackData(path, file=f, object_format=self.object_format) as pd:

2026 indexer = PackIndexer.for_pack_data(

2027 pd,

2028 resolve_ext_ref=self.get_raw, # type: ignore[arg-type]

2029 )

2030 return self._complete_pack(f, path, len(pd), indexer) # type: ignore[arg-type]

2031 else:

2032 f.close()

2033 os.remove(path)

2034 return None

2035

2036 def abort() -> None:

2037 f.close()

2038 os.remove(path)

2039

2040 return f, commit, abort # type: ignore[return-value]

2041

2042 def add_object(self, obj: ShaFile) -> None:

2043 """Add a single object to this object store.

2044

2045 Args:

2046 obj: Object to add

2047 """

2048 # Use the correct hash algorithm for the object ID

2049 obj_id = ObjectID(obj.get_id(self.object_format))

2050 path = self._get_shafile_path(obj_id)

2051 dir = os.path.dirname(path)

2052 try:

2053 os.mkdir(dir)

2054 if self.dir_mode is not None:

2055 os.chmod(dir, self.dir_mode)

2056 except FileExistsError:

2057 pass

2058 if os.path.exists(path):

2059 return # Already there, no need to write again

2060 mask = self.file_mode if self.file_mode is not None else PACK_MODE

2061 with GitFile(path, "wb", mask=mask, fsync=self.fsync_object_files) as f:

2062 f.write(

2063 obj.as_legacy_object(compression_level=self.loose_compression_level)

2064 )

2065

2066 @classmethod

2067 def init(

2068 cls,

2069 path: str | os.PathLike[str],

2070 *,

2071 file_mode: int | None = None,

2072 dir_mode: int | None = None,

2073 object_format: "ObjectFormat | None" = None,

2074 ) -> "DiskObjectStore":

2075 """Initialize a new disk object store.

2076

2077 Creates the necessary directory structure for a Git object store.

2078

2079 Args:

2080 path: Path where the object store should be created

2081 file_mode: Optional file permission mask for shared repository

2082 dir_mode: Optional directory permission mask for shared repository

2083 object_format: Hash algorithm to use (SHA1 or SHA256)

2084

2085 Returns:

2086 New DiskObjectStore instance

2087 """

2088 try:

2089 os.mkdir(path)

2090 if dir_mode is not None:

2091 os.chmod(path, dir_mode)

2092 except FileExistsError:

2093 pass

2094 info_path = os.path.join(path, "info")

2095 pack_path = os.path.join(path, PACKDIR)

2096 os.mkdir(info_path)

2097 os.mkdir(pack_path)

2098 if dir_mode is not None:

2099 os.chmod(info_path, dir_mode)

2100 os.chmod(pack_path, dir_mode)

2101 return cls(

2102 path, file_mode=file_mode, dir_mode=dir_mode, object_format=object_format

2103 )

2104

2105 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:

2106 """Iterate over all object SHAs with the given prefix.

2107

2108 Args:

2109 prefix: Hex prefix to search for (as bytes)

2110

2111 Returns:

2112 Iterator of object SHAs (as ObjectID) matching the prefix

2113 """

2114 if len(prefix) < 2:

2115 yield from super().iter_prefix(prefix)

2116 return

2117 seen = set()

2118 dir = prefix[:2].decode()

2119 rest = prefix[2:].decode()

2120 try:

2121 for name in os.listdir(os.path.join(self.path, dir)):

2122 if name.startswith(rest):

2123 sha = ObjectID(os.fsencode(dir + name))

2124 if sha not in seen:

2125 seen.add(sha)

2126 yield sha

2127 except FileNotFoundError:

2128 pass

2129

2130 for p in self.packs:

2131 bin_prefix = (

2132 binascii.unhexlify(prefix)

2133 if len(prefix) % 2 == 0

2134 else binascii.unhexlify(prefix[:-1])

2135 )

2136 for bin_sha in p.index.iter_prefix(bin_prefix):

2137 sha = sha_to_hex(bin_sha)

2138 if sha.startswith(prefix) and sha not in seen:

2139 seen.add(sha)

2140 yield sha

2141 for alternate in self.alternates:

2142 for sha in alternate.iter_prefix(prefix):

2143 if sha not in seen:

2144 seen.add(sha)

2145 yield sha

2146

2147 def get_commit_graph(self) -> "CommitGraph | None":

2148 """Get the commit graph for this object store.

2149

2150 Returns:

2151 CommitGraph object if available, None otherwise

2152 """

2153 if not self._use_commit_graph:

2154 return None

2155

2156 if self._commit_graph is None:

2157 from .commit_graph import read_commit_graph

2158

2159 # Look for commit graph in our objects directory

2160 graph_file = os.path.join(self.path, "info", "commit-graph")

2161 if os.path.exists(graph_file):

2162 self._commit_graph = read_commit_graph(graph_file)

2163 return self._commit_graph

2164

2165 def get_midx(self) -> MultiPackIndex | None:

2166 """Get the multi-pack-index for this object store.

2167

2168 Returns:

2169 MultiPackIndex object if available, None otherwise

2170

2171 Raises:

2172 ValueError: If MIDX file is corrupt

2173 OSError: If MIDX file cannot be read

2174 """

2175 if not self._use_midx:

2176 return None

2177

2178 if self._midx is None:

2179 # Look for MIDX in pack directory

2180 midx_file = os.path.join(self.pack_dir, "multi-pack-index")

2181 if os.path.exists(midx_file):

2182 self._midx = load_midx(midx_file)

2183 return self._midx

2184

2185 def _get_pack_by_name(self, pack_name: str) -> Pack:

2186 """Get a pack by its base name.

2187

2188 Args:

2189 pack_name: Base name of the pack (e.g., 'pack-abc123.pack' or 'pack-abc123.idx')

2190

2191 Returns:

2192 Pack object

2193

2194 Raises:

2195 KeyError: If pack doesn't exist

2196 """

2197 # Remove .pack or .idx extension if present

2198 if pack_name.endswith(".pack"):

2199 base_name = pack_name[:-5]

2200 elif pack_name.endswith(".idx"):

2201 base_name = pack_name[:-4]

2202 else:

2203 base_name = pack_name

2204

2205 # Check if already in cache

2206 if base_name in self._pack_cache:

2207 return self._pack_cache[base_name]

2208

2209 # Load the pack

2210 pack_path = os.path.join(self.pack_dir, base_name)

2211 if not os.path.exists(pack_path + ".pack"):

2212 raise KeyError(f"Pack {pack_name} not found")

2213

2214 pack = Pack(

2215 pack_path,

2216 object_format=self.object_format,

2217 delta_window_size=self.pack_delta_window_size,

2218 window_memory=self.pack_window_memory,

2219 delta_cache_size=self.pack_delta_cache_size,

2220 depth=self.pack_depth,

2221 threads=self.pack_threads,

2222 big_file_threshold=self.pack_big_file_threshold,

2223 )

2224 self._pack_cache[base_name] = pack

2225 return pack

2226

2227 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:

2228 """Check if a particular object is present by SHA1 and is packed.

2229

2230 This checks the MIDX first if available, then falls back to checking

2231 individual pack indexes.

2232

2233 Args:

2234 sha: Binary SHA of the object

2235

2236 Returns:

2237 True if the object is in a pack file

2238 """

2239 # Check MIDX first for faster lookup

2240 midx = self.get_midx()

2241 if midx is not None and sha in midx:

2242 return True

2243

2244 # Fall back to checking individual packs

2245 return super().contains_packed(sha)

2246

2247 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:

2248 """Obtain the raw fulltext for an object.

2249

2250 This uses the MIDX if available for faster lookups.

2251

2252 Args:

2253 name: SHA for the object (20 bytes binary or 40 bytes hex)

2254

2255 Returns:

2256 Tuple with numeric type and object contents

2257

2258 Raises:

2259 KeyError: If object not found

2260 """

2261 sha: RawObjectID

2262 if len(name) in (40, 64):

2263 # name is ObjectID (hex), convert to RawObjectID

2264 # Support both SHA1 (40) and SHA256 (64)

2265 sha = hex_to_sha(cast(ObjectID, name))

2266 elif len(name) in (20, 32):

2267 # name is already RawObjectID (binary)

2268 # Support both SHA1 (20) and SHA256 (32)

2269 sha = RawObjectID(name)

2270 else:

2271 raise AssertionError(f"Invalid object name {name!r}")

2272

2273 # Try MIDX first for faster lookup

2274 midx = self.get_midx()

2275 if midx is not None:

2276 result = midx.object_offset(sha)

2277 if result is not None:

2278 pack_name, _offset = result

2279 try:

2280 pack = self._get_pack_by_name(pack_name)

2281 return pack.get_raw(sha)

2282 except (KeyError, PackFileDisappeared):

2283 # Pack disappeared or object not found, fall through to standard lookup

2284 pass

2285

2286 # Fall back to the standard implementation

2287 return super().get_raw(name)

2288

2289 def write_midx(self) -> bytes:

2290 """Write a multi-pack-index file for this object store.

2291

2292 Creates a MIDX file that indexes all pack files in the pack directory.

2293

2294 Returns:

2295 SHA-1 checksum of the written MIDX file

2296

2297 Raises:

2298 OSError: If the pack directory doesn't exist or MIDX can't be written

2299 """

2300 from .midx import write_midx_file

2301

2302 # Get all pack files

2303 packs = self.packs

2304 if not packs:

2305 # No packs to index

2306 return b"\x00" * 20

2307

2308 # Collect entries from all packs

2309 pack_entries: list[tuple[str, list[tuple[RawObjectID, int, int | None]]]] = []

2310

2311 for pack in packs:

2312 # Git stores .idx extension in MIDX, not .pack

2313 pack_name = os.path.basename(pack._basename) + ".idx"

2314 entries = list(pack.index.iterentries())

2315 pack_entries.append((pack_name, entries))

2316

2317 # Write MIDX file

2318 midx_path = os.path.join(self.pack_dir, "multi-pack-index")

2319 return write_midx_file(midx_path, pack_entries)

2320

2321 def write_commit_graph(

2322 self, refs: Iterable[ObjectID] | None = None, reachable: bool = True

2323 ) -> None:

2324 """Write a commit graph file for this object store.

2325

2326 Args:

2327 refs: List of refs to include. If None, includes all refs from object store.

2328 reachable: If True, includes all commits reachable from refs.

2329 If False, only includes the direct ref targets.

2330 """

2331 from .commit_graph import get_reachable_commits

2332

2333 if refs is None:

2334 # Get all commit objects from the object store

2335 all_refs = []

2336 # Iterate through all objects to find commits

2337 for sha in self:

2338 try:

2339 obj = self[sha]

2340 if obj.type_name == b"commit":

2341 all_refs.append(sha)

2342 except KeyError:

2343 continue

2344 else:

2345 # Use provided refs

2346 all_refs = list(refs)

2347

2348 if not all_refs:

2349 return # No commits to include

2350

2351 if reachable:

2352 # Get all reachable commits

2353 commit_ids = get_reachable_commits(self, all_refs)

2354 else:

2355 # Just use the direct ref targets - ensure they're hex ObjectIDs

2356 commit_ids = []

2357 for ref in all_refs:

2358 if isinstance(ref, bytes) and len(ref) == self.object_format.hex_length:

2359 # Already hex ObjectID

2360 commit_ids.append(ref)

2361 elif (

2362 isinstance(ref, bytes) and len(ref) == self.object_format.oid_length

2363 ):

2364 # Binary SHA, convert to hex ObjectID

2365 from .objects import sha_to_hex

2366

2367 commit_ids.append(sha_to_hex(RawObjectID(ref)))

2368 else:

2369 # Assume it's already correct format

2370 commit_ids.append(ref)

2371

2372 if commit_ids:

2373 # Write commit graph directly to our object store path

2374 # Generate the commit graph

2375 from .commit_graph import generate_commit_graph

2376

2377 graph = generate_commit_graph(self, commit_ids)

2378

2379 if graph.entries:

2380 # Ensure the info directory exists

2381 info_dir = os.path.join(self.path, "info")

2382 os.makedirs(info_dir, exist_ok=True)

2383 if self.dir_mode is not None:

2384 os.chmod(info_dir, self.dir_mode)

2385

2386 # Write using GitFile for atomic operation

2387 graph_path = os.path.join(info_dir, "commit-graph")

2388 mask = self.file_mode if self.file_mode is not None else 0o644

2389 with GitFile(graph_path, "wb", mask=mask) as f:

2390 assert isinstance(

2391 f, _GitFile

2392 ) # GitFile in write mode always returns _GitFile

2393 graph.write_to_file(f)

2394

2395 # Clear cached commit graph so it gets reloaded

2396 self._commit_graph = None

2397

2398 def prune(self, grace_period: int | None = None) -> None:

2399 """Prune/clean up this object store.

2400

2401 This removes temporary files that were left behind by interrupted

2402 pack operations. These are files that start with ``tmp_pack_`` in the

2403 repository directory or files with .pack extension but no corresponding

2404 .idx file in the pack directory.

2405

2406 Args:

2407 grace_period: Grace period in seconds for removing temporary files.

2408 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD.

2409 """

2410 import glob

2411

2412 if grace_period is None:

2413 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD

2414

2415 # Clean up tmp_pack_* files in the repository directory

2416 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")):

2417 # Check if file is old enough (more than grace period)

2418 mtime = os.path.getmtime(tmp_file)

2419 if time.time() - mtime > grace_period:

2420 os.remove(tmp_file)

2421

2422 # Clean up orphaned .pack files without corresponding .idx files

2423 try:

2424 pack_dir_contents = os.listdir(self.pack_dir)

2425 except FileNotFoundError:

2426 return

2427

2428 pack_files = {}

2429 idx_files = set()

2430

2431 for name in pack_dir_contents:

2432 if name.endswith(".pack"):

2433 base_name = name[:-5] # Remove .pack extension

2434 pack_files[base_name] = name

2435 elif name.endswith(".idx"):

2436 base_name = name[:-4] # Remove .idx extension

2437 idx_files.add(base_name)

2438

2439 # Remove .pack files without corresponding .idx files

2440 for base_name, pack_name in pack_files.items():

2441 if base_name not in idx_files:

2442 pack_path = os.path.join(self.pack_dir, pack_name)

2443 # Check if file is old enough (more than grace period)

2444 mtime = os.path.getmtime(pack_path)

2445 if time.time() - mtime > grace_period:

2446 os.remove(pack_path)

2447

2448 def close(self) -> None:

2449 """Close the object store and release resources.

2450

2451 This method closes all cached pack files, MIDX, and frees associated resources.

2452 Can be called multiple times safely.

2453 """

2454 # Close MIDX if it's loaded

2455 if self._midx is not None:

2456 self._midx.close()

2457 self._midx = None

2458

2459 # Close alternates

2460 if self._alternates is not None:

2461 for alt in self._alternates:

2462 alt.close()

2463 self._alternates = None

2464

2465 # Call parent class close to handle pack files

2466 super().close()

2467

2468

2469class MemoryObjectStore(PackCapableObjectStore):

2470 """Object store that keeps all objects in memory."""

2471

2472 def __init__(self, *, object_format: "ObjectFormat | None" = None) -> None:

2473 """Initialize a MemoryObjectStore.

2474

2475 Creates an empty in-memory object store.

2476

2477 Args:

2478 object_format: Hash algorithm to use (defaults to SHA1)

2479 """

2480 super().__init__(object_format=object_format)

2481 self._data: dict[ObjectID, ShaFile] = {}

2482 self.pack_compression_level = -1

2483

2484 def _to_hexsha(self, sha: ObjectID | RawObjectID) -> ObjectID:

2485 if len(sha) == self.object_format.hex_length:

2486 return cast(ObjectID, sha)

2487 elif len(sha) == self.object_format.oid_length:

2488 return sha_to_hex(cast(RawObjectID, sha))

2489 else:

2490 raise ValueError(f"Invalid sha {sha!r}")

2491

2492 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:

2493 """Check if a particular object is present by SHA1 and is loose."""

2494 return self._to_hexsha(sha) in self._data

2495

2496 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:

2497 """Check if a particular object is present by SHA1 and is packed."""

2498 return False

2499

2500 def __iter__(self) -> Iterator[ObjectID]:

2501 """Iterate over the SHAs that are present in this store."""

2502 return iter(self._data.keys())

2503

2504 @property

2505 def packs(self) -> list[Pack]:

2506 """List with pack objects."""

2507 return []

2508

2509 def get_raw(self, name: RawObjectID | ObjectID) -> tuple[int, bytes]:

2510 """Obtain the raw text for an object.

2511

2512 Args:

2513 name: sha for the object.

2514 Returns: tuple with numeric type and object contents.

2515 """

2516 obj = self[self._to_hexsha(name)]

2517 return obj.type_num, obj.as_raw_string()

2518

2519 def __getitem__(self, name: ObjectID | RawObjectID) -> ShaFile:

2520 """Retrieve an object by SHA.

2521

2522 Args:

2523 name: SHA of the object (as hex string or bytes)

2524

2525 Returns:

2526 Copy of the ShaFile object

2527

2528 Raises:

2529 KeyError: If the object is not found

2530 """

2531 return self._data[self._to_hexsha(name)].copy()

2532

2533 def __delitem__(self, name: ObjectID) -> None:

2534 """Delete an object from this store, for testing only."""

2535 del self._data[self._to_hexsha(name)]

2536

2537 def add_object(self, obj: ShaFile) -> None:

2538 """Add a single object to this object store."""

2539 self._data[obj.id] = obj.copy()

2540

2541 def add_objects(

2542 self,

2543 objects: Iterable[tuple[ShaFile, str | None]],

2544 progress: Callable[[str], None] | None = None,

2545 ) -> None:

2546 """Add a set of objects to this object store.

2547

2548 Args:

2549 objects: Iterable over a list of (object, path) tuples

2550 progress: Optional progress reporting function.

2551 """

2552 for obj, path in objects:

2553 self.add_object(obj)

2554

2555 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:

2556 """Add a new pack to this object store.

2557

2558 Because this object store doesn't support packs, we extract and add the

2559 individual objects.

2560

2561 Returns: Fileobject to write to and a commit function to

2562 call when the pack is finished.

2563 """

2564 from tempfile import SpooledTemporaryFile

2565

2566 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-")

2567

2568 def commit() -> None:

2569 size = f.tell()

2570 if size > 0:

2571 f.seek(0)

2572

2573 p = PackData.from_file(f, self.object_format, size)

2574 for obj in PackInflater.for_pack_data(p, self.get_raw): # type: ignore[arg-type]

2575 self.add_object(obj)

2576 p.close()

2577 f.close()

2578 else:

2579 f.close()

2580

2581 def abort() -> None:

2582 f.close()

2583

2584 return f, commit, abort # type: ignore[return-value]

2585

2586 def add_pack_data(

2587 self,

2588 count: int,

2589 unpacked_objects: Iterator[UnpackedObject],

2590 progress: Callable[[str], None] | None = None,

2591 ) -> None:

2592 """Add pack data to this object store.

2593

2594 Args:

2595 count: Number of items to add

2596 unpacked_objects: Iterator of UnpackedObject instances

2597 progress: Optional progress reporting function.

2598 """

2599 if count == 0:

2600 return

2601

2602 # Since MemoryObjectStore doesn't support pack files, we need to

2603 # extract individual objects. To handle deltas properly, we write

2604 # to a temporary pack and then use PackInflater to resolve them.

2605 f, commit, abort = self.add_pack()

2606 try:

2607 write_pack_data(

2608 f.write,

2609 unpacked_objects,

2610 num_records=count,

2611 progress=progress,

2612 object_format=self.object_format,

2613 )

2614 except BaseException:

2615 abort()

2616 raise

2617 else:

2618 commit()

2619

2620 def add_thin_pack(

2621 self,

2622 read_all: Callable[[int], bytes],

2623 read_some: Callable[[int], bytes] | None,

2624 progress: Callable[[str], None] | None = None,

2625 ) -> None:

2626 """Add a new thin pack to this object store.

2627

2628 Thin packs are packs that contain deltas with parents that exist

2629 outside the pack. Because this object store doesn't support packs, we

2630 extract and add the individual objects.

2631

2632 Args:

2633 read_all: Read function that blocks until the number of

2634 requested bytes are read.

2635 read_some: Read function that returns at least one byte, but may

2636 not return the number of bytes requested.

2637 progress: Optional progress reporting function.

2638 """

2639 f, commit, abort = self.add_pack()

2640 try:

2641 copier = PackStreamCopier(

2642 self.object_format.hash_func,

2643 read_all,

2644 read_some,

2645 f,

2646 )

2647 copier.verify()

2648 except BaseException:

2649 abort()

2650 raise

2651 else:

2652 commit()

2653

2654

2655class ObjectIterator(Protocol):

2656 """Interface for iterating over objects."""

2657

2658 def iterobjects(self) -> Iterator[ShaFile]:

2659 """Iterate over all objects.

2660

2661 Returns:

2662 Iterator of ShaFile objects

2663 """

2664 raise NotImplementedError(self.iterobjects)

2665

2666

2667def tree_lookup_path(

2668 lookup_obj: Callable[[ObjectID | RawObjectID], ShaFile],

2669 root_sha: ObjectID | RawObjectID,

2670 path: bytes,

2671) -> tuple[int, ObjectID]:

2672 """Look up an object in a Git tree.

2673

2674 Args:

2675 lookup_obj: Callback for retrieving object by SHA1

2676 root_sha: SHA1 of the root tree

2677 path: Path to lookup

2678 Returns: A tuple of (mode, SHA) of the resulting path.

2679 """

2680 tree = lookup_obj(root_sha)

2681 if not isinstance(tree, Tree):

2682 raise NotTreeError(root_sha)

2683 return tree.lookup_path(lookup_obj, path)

2684

2685

2686def _collect_filetree_revs(

2687 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID]

2688) -> None:

2689 """Collect SHA1s of files and directories for specified tree.

2690

2691 Args:

2692 obj_store: Object store to get objects by SHA from

2693 tree_sha: tree reference to walk

2694 kset: set to fill with references to files and directories

2695 """

2696 filetree = obj_store[tree_sha]

2697 assert isinstance(filetree, Tree)

2698 for name, mode, sha in filetree.iteritems():

2699 assert mode is not None

2700 assert sha is not None

2701 if not S_ISGITLINK(mode) and sha not in kset:

2702 kset.add(sha)

2703 if stat.S_ISDIR(mode):

2704 _collect_filetree_revs(obj_store, sha, kset)

2705

2706

2707def _split_commits_and_tags(

2708 obj_store: ObjectContainer,

2709 lst: Iterable[ObjectID],

2710 *,

2711 unknown: str = "error",

2712) -> tuple[set[ObjectID], set[ObjectID], set[ObjectID]]:

2713 """Split object id list into three lists with commit, tag, and other SHAs.

2714

2715 Commits referenced by tags are included into commits

2716 list as well. Only SHA1s known in this repository will get

2717 through, controlled by the unknown parameter.

2718

2719 Args:

2720 obj_store: Object store to get objects by SHA1 from

2721 lst: Collection of commit and tag SHAs

2722 unknown: How to handle unknown objects: "error", "warn", or "ignore"

2723 Returns: A tuple of (commits, tags, others) SHA1s

2724 """

2725 import logging

2726

2727 if unknown not in ("error", "warn", "ignore"):

2728 raise ValueError(

2729 f"unknown must be 'error', 'warn', or 'ignore', got {unknown!r}"

2730 )

2731

2732 commits: set[ObjectID] = set()

2733 tags: set[ObjectID] = set()

2734 others: set[ObjectID] = set()

2735 for e in lst:

2736 try:

2737 o = obj_store[e]

2738 except KeyError:

2739 if unknown == "error":

2740 raise

2741 elif unknown == "warn":

2742 logging.warning(

2743 "Object %s not found in object store", e.decode("ascii")

2744 )

2745 # else: ignore

2746 else:

2747 if isinstance(o, Commit):

2748 commits.add(e)

2749 elif isinstance(o, Tag):

2750 tags.add(e)

2751 tagged = o.object[1]

2752 c, t, os = _split_commits_and_tags(obj_store, [tagged], unknown=unknown)

2753 commits |= c

2754 tags |= t

2755 others |= os

2756 else:

2757 others.add(e)

2758 return (commits, tags, others)

2759

2760

2761class MissingObjectFinder:

2762 """Find the objects missing from another object store.

2763

2764 Args:

2765 object_store: Object store containing at least all objects to be

2766 sent

2767 haves: SHA1s of commits not to send (already present in target)

2768 wants: SHA1s of commits to send

2769 progress: Optional function to report progress to.

2770 get_tagged: Function that returns a dict of pointed-to sha -> tag

2771 sha for including tags.

2772 get_parents: Optional function for getting the parents of a commit.

2773 """

2774

2775 def __init__(

2776 self,

2777 object_store: BaseObjectStore,

2778 haves: Iterable[ObjectID],

2779 wants: Iterable[ObjectID],

2780 *,

2781 shallow: Set[ObjectID] | None = None,

2782 progress: Callable[[bytes], None] | None = None,

2783 get_tagged: Callable[[], dict[ObjectID, ObjectID]] | None = None,

2784 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents,

2785 ) -> None:

2786 """Initialize a MissingObjectFinder.

2787

2788 Args:

2789 object_store: Object store containing objects

2790 haves: SHA1s of objects already present in target

2791 wants: SHA1s of objects to send

2792 shallow: Set of shallow commit SHA1s

2793 progress: Optional progress reporting callback

2794 get_tagged: Function returning dict of pointed-to sha -> tag sha

2795 get_parents: Function for getting commit parents

2796 """

2797 self.object_store = object_store

2798 if shallow is None:

2799 shallow = set()

2800 self._get_parents = get_parents

2801 reachability = object_store.get_reachability_provider()

2802 # process Commits and Tags differently

2803 # haves may list commits/tags not available locally (silently ignore them).

2804 # wants should only contain valid SHAs (fail fast if not).

2805 have_commits, have_tags, have_others = _split_commits_and_tags(

2806 object_store, haves, unknown="ignore"

2807 )

2808 want_commits, want_tags, want_others = _split_commits_and_tags(

2809 object_store, wants, unknown="error"

2810 )

2811 # all_ancestors is a set of commits that shall not be sent

2812 # (complete repository up to 'haves')

2813 all_ancestors = reachability.get_reachable_commits(

2814 have_commits, exclude=None, shallow=shallow

2815 )

2816 # all_missing - complete set of commits between haves and wants

2817 # common_commits - boundary commits directly encountered when traversing wants

2818 # We use _collect_ancestors here because we need the exact boundary behavior:

2819 # commits that are in all_ancestors and directly reachable from wants,

2820 # but we don't traverse past them. This is hard to express with the

2821 # reachability abstraction alone.

2822 missing_commits, common_commits = _collect_ancestors(

2823 object_store,

2824 want_commits,

2825 frozenset(all_ancestors),

2826 shallow=frozenset(shallow),

2827 get_parents=self._get_parents,

2828 )

2829

2830 self.remote_has: set[ObjectID] = set()

2831 # Now, fill sha_done with commits and revisions of

2832 # files and directories known to be both locally

2833 # and on target. Thus these commits and files

2834 # won't get selected for fetch

2835 for h in common_commits:

2836 self.remote_has.add(h)

2837 cmt = object_store[h]

2838 assert isinstance(cmt, Commit)

2839 # Get tree objects for this commit

2840 tree_objects = reachability.get_tree_objects([cmt.tree])

2841 self.remote_has.update(tree_objects)

2842

2843 # record tags we have as visited, too

2844 for t in have_tags:

2845 self.remote_has.add(t)

2846 self.sha_done = set(self.remote_has)

2847

2848 # in fact, what we 'want' is commits, tags, and others

2849 # we've found missing

2850 self.objects_to_send: set[tuple[ObjectID, bytes | None, int | None, bool]] = {

2851 (w, None, Commit.type_num, False) for w in missing_commits

2852 }

2853 missing_tags = want_tags.difference(have_tags)

2854 self.objects_to_send.update(

2855 {(w, None, Tag.type_num, False) for w in missing_tags}

2856 )

2857 missing_others = want_others.difference(have_others)

2858 self.objects_to_send.update({(w, None, None, False) for w in missing_others})

2859

2860 if progress is None:

2861 self.progress: Callable[[bytes], None] = lambda x: None

2862 else:

2863 self.progress = progress

2864 self._tagged = (get_tagged and get_tagged()) or {}

2865

2866 def get_remote_has(self) -> set[ObjectID]:

2867 """Get the set of SHAs the remote has.

2868

2869 Returns:

2870 Set of SHA1s that the remote side already has

2871 """

2872 return self.remote_has

2873

2874 def add_todo(

2875 self, entries: Iterable[tuple[ObjectID, bytes | None, int | None, bool]]

2876 ) -> None:

2877 """Add objects to the todo list.

2878

2879 Args:

2880 entries: Iterable of tuples (sha, name, type_num, is_leaf)

2881 """

2882 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done])

2883

2884 def __next__(self) -> tuple[ObjectID, PackHint | None]:

2885 """Get the next object to send.

2886

2887 Returns:

2888 Tuple of (sha, pack_hint)

2889

2890 Raises:

2891 StopIteration: When no more objects to send

2892 """

2893 while True:

2894 if not self.objects_to_send:

2895 self.progress(

2896 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii")

2897 )

2898 raise StopIteration

2899 (sha, name, type_num, leaf) = self.objects_to_send.pop()

2900 if sha not in self.sha_done:

2901 break

2902 if not leaf:

2903 o = self.object_store[sha]

2904 if isinstance(o, Commit):

2905 self.add_todo([(o.tree, b"", Tree.type_num, False)])

2906 elif isinstance(o, Tree):

2907 todos = []

2908 for n, m, s in o.iteritems():

2909 assert m is not None

2910 assert n is not None

2911 assert s is not None

2912 if not S_ISGITLINK(m):

2913 todos.append(

2914 (

2915 s,

2916 n,

2917 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num),

2918 not stat.S_ISDIR(m),

2919 )

2920 )

2921 self.add_todo(todos)

2922 elif isinstance(o, Tag):

2923 self.add_todo([(o.object[1], None, o.object[0].type_num, False)])

2924 if sha in self._tagged:

2925 self.add_todo([(self._tagged[sha], None, None, True)])

2926 self.sha_done.add(sha)

2927 if len(self.sha_done) % 1000 == 0:

2928 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii"))

2929 if type_num is None:

2930 pack_hint = None

2931 else:

2932 pack_hint = (type_num, name)

2933 return (sha, pack_hint)

2934

2935 def __iter__(self) -> Iterator[tuple[ObjectID, PackHint | None]]:

2936 """Return iterator over objects to send.

2937

2938 Returns:

2939 Self (this class implements the iterator protocol)

2940 """

2941 return self

2942

2943

2944class ObjectStoreGraphWalker:

2945 """Graph walker that finds what commits are missing from an object store."""

2946

2947 heads: set[ObjectID]

2948 """Revisions without descendants in the local repo."""

2949

2950 get_parents: Callable[[ObjectID], list[ObjectID]]

2951 """Function to retrieve parents in the local repo."""

2952

2953 shallow: set[ObjectID]

2954

2955 def __init__(

2956 self,

2957 local_heads: Iterable[ObjectID],

2958 get_parents: Callable[[ObjectID], list[ObjectID]],

2959 shallow: set[ObjectID] | None = None,

2960 update_shallow: Callable[[set[ObjectID] | None, set[ObjectID] | None], None]

2961 | None = None,

2962 ) -> None:

2963 """Create a new instance.

2964

2965 Args:

2966 local_heads: Heads to start search with

2967 get_parents: Function for finding the parents of a SHA1.

2968 shallow: Set of shallow commits.

2969 update_shallow: Function to update shallow commits.

2970 """

2971 self.heads = set(local_heads)

2972 self.get_parents = get_parents

2973 self.parents: dict[ObjectID, list[ObjectID] | None] = {}

2974 if shallow is None:

2975 shallow = set()

2976 self.shallow = shallow

2977 self.update_shallow = update_shallow

2978

2979 def nak(self) -> None:

2980 """Nothing in common was found."""

2981

2982 def ack(self, sha: ObjectID) -> None:

2983 """Ack that a revision and its ancestors are present in the source."""

2984 if len(sha) != 40:

2985 # TODO: support SHA256

2986 raise ValueError(f"unexpected sha {sha!r} received")

2987 ancestors = {sha}

2988

2989 # stop if we run out of heads to remove

2990 while self.heads:

2991 for a in ancestors:

2992 if a in self.heads:

2993 self.heads.remove(a)

2994

2995 # collect all ancestors

2996 new_ancestors = set()

2997 for a in ancestors:

2998 ps = self.parents.get(a)

2999 if ps is not None:

3000 new_ancestors.update(ps)

3001 self.parents[a] = None

3002

3003 # no more ancestors; stop

3004 if not new_ancestors:

3005 break

3006

3007 ancestors = new_ancestors

3008

3009 def next(self) -> ObjectID | None:

3010 """Iterate over ancestors of heads in the target."""

3011 if self.heads:

3012 ret = self.heads.pop()

3013 try:

3014 ps = self.get_parents(ret)

3015 except KeyError:

3016 return None

3017 self.parents[ret] = ps

3018 self.heads.update([p for p in ps if p not in self.parents])

3019 return ret

3020 return None

3021

3022 __next__ = next

3023

3024

3025def commit_tree_changes(

3026 object_store: BaseObjectStore,

3027 tree: ObjectID | Tree,

3028 changes: Sequence[tuple[bytes, int | None, ObjectID | None]],

3029) -> ObjectID:

3030 """Commit a specified set of changes to a tree structure.

3031

3032 This will apply a set of changes on top of an existing tree, storing new

3033 objects in object_store.

3034

3035 changes are a list of tuples with (path, mode, object_sha).

3036 Paths can be both blobs and trees. See the mode and

3037 object sha to None deletes the path.

3038

3039 This method works especially well if there are only a small

3040 number of changes to a big tree. For a large number of changes

3041 to a large tree, use e.g. commit_tree.

3042

3043 Args:

3044 object_store: Object store to store new objects in

3045 and retrieve old ones from.

3046 tree: Original tree root (SHA or Tree object)

3047 changes: changes to apply

3048 Returns: New tree root object

3049 """

3050 # TODO(jelmer): Save up the objects and add them using .add_objects

3051 # rather than with individual calls to .add_object.

3052 # Handle both Tree object and SHA

3053 if isinstance(tree, Tree):

3054 tree_obj: Tree = tree

3055 else:

3056 sha_obj = object_store[tree]

3057 assert isinstance(sha_obj, Tree)

3058 tree_obj = sha_obj

3059 nested_changes: dict[bytes, list[tuple[bytes, int | None, ObjectID | None]]] = {}

3060 for path, new_mode, new_sha in changes:

3061 try:

3062 (dirname, subpath) = path.split(b"/", 1)

3063 except ValueError:

3064 if new_sha is None:

3065 del tree_obj[path]

3066 else:

3067 assert new_mode is not None

3068 tree_obj[path] = (new_mode, new_sha)

3069 else:

3070 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha))

3071 for name, subchanges in nested_changes.items():

3072 try:

3073 orig_subtree_id: ObjectID | Tree = tree_obj[name][1]

3074 except KeyError:

3075 # For new directories, pass an empty Tree object

3076 orig_subtree_id = Tree()

3077 subtree_id = commit_tree_changes(object_store, orig_subtree_id, subchanges)

3078 subtree = object_store[subtree_id]

3079 assert isinstance(subtree, Tree)

3080 if len(subtree) == 0:

3081 del tree_obj[name]

3082 else:

3083 tree_obj[name] = (stat.S_IFDIR, subtree.id)

3084 object_store.add_object(tree_obj)

3085 return tree_obj.id

3086

3087

3088class OverlayObjectStore(BaseObjectStore):

3089 """Object store that can overlay multiple object stores."""

3090

3091 def __init__(

3092 self,

3093 bases: list[BaseObjectStore],

3094 add_store: BaseObjectStore | None = None,

3095 ) -> None:

3096 """Initialize an OverlayObjectStore.

3097

3098 Args:

3099 bases: List of base object stores to overlay

3100 add_store: Optional store to write new objects to

3101

3102 Raises:

3103 ValueError: If stores have different hash algorithms

3104 """

3105 from .object_format import verify_same_object_format

3106

3107 # Verify all stores use the same hash algorithm

3108 store_algorithms = [store.object_format for store in bases]

3109 if add_store:

3110 store_algorithms.append(add_store.object_format)

3111

3112 object_format = verify_same_object_format(*store_algorithms)

3113

3114 super().__init__(object_format=object_format)

3115 self.bases = bases

3116 self.add_store = add_store

3117

3118 def add_object(self, object: ShaFile) -> None:

3119 """Add a single object to the store.

3120

3121 Args:

3122 object: Object to add

3123

3124 Raises:

3125 NotImplementedError: If no add_store was provided

3126 """

3127 if self.add_store is None:

3128 raise NotImplementedError(self.add_object)

3129 return self.add_store.add_object(object)

3130

3131 def add_objects(

3132 self,

3133 objects: Sequence[tuple[ShaFile, str | None]],

3134 progress: Callable[[str], None] | None = None,

3135 ) -> Pack | None:

3136 """Add multiple objects to the store.

3137

3138 Args:

3139 objects: Iterator of objects to add

3140 progress: Optional progress reporting callback

3141

3142 Raises:

3143 NotImplementedError: If no add_store was provided

3144 """

3145 if self.add_store is None:

3146 raise NotImplementedError(self.add_object)

3147 return self.add_store.add_objects(objects, progress)

3148

3149 @property

3150 def packs(self) -> list[Pack]:

3151 """Get the list of packs from all overlaid stores.

3152

3153 Returns:

3154 Combined list of packs from all base stores

3155 """

3156 ret = []

3157 for b in self.bases:

3158 ret.extend(b.packs)

3159 return ret

3160

3161 def __iter__(self) -> Iterator[ObjectID]:

3162 """Iterate over all object SHAs in the overlaid stores.

3163

3164 Returns:

3165 Iterator of object SHAs (deduped across stores)

3166 """

3167 done = set()

3168 for b in self.bases:

3169 for o_id in b:

3170 if o_id not in done:

3171 yield o_id

3172 done.add(o_id)

3173

3174 def iterobjects_subset(

3175 self, shas: Iterable[ObjectID], *, allow_missing: bool = False

3176 ) -> Iterator[ShaFile]:

3177 """Iterate over a subset of objects from the overlaid stores.

3178

3179 Args:

3180 shas: Iterable of object SHAs to retrieve

3181 allow_missing: If True, skip missing objects; if False, raise KeyError

3182

3183 Returns:

3184 Iterator of ShaFile objects

3185

3186 Raises:

3187 KeyError: If an object is missing and allow_missing is False

3188 """

3189 todo = set(shas)

3190 found: set[ObjectID] = set()

3191

3192 for b in self.bases:

3193 # Create a copy of todo for each base to avoid modifying

3194 # the set while iterating through it

3195 current_todo = todo - found

3196 for o in b.iterobjects_subset(current_todo, allow_missing=True):

3197 yield o

3198 found.add(o.id)

3199

3200 # Check for any remaining objects not found

3201 missing = todo - found

3202 if missing and not allow_missing:

3203 raise KeyError(next(iter(missing)))

3204

3205 def iter_unpacked_subset(

3206 self,

3207 shas: Iterable[ObjectID | RawObjectID],

3208 include_comp: bool = False,

3209 allow_missing: bool = False,

3210 convert_ofs_delta: bool = True,

3211 ) -> Iterator[UnpackedObject]:

3212 """Iterate over unpacked objects from the overlaid stores.

3213

3214 Args:

3215 shas: Iterable of object SHAs to retrieve

3216 include_comp: Whether to include compressed data

3217 allow_missing: If True, skip missing objects; if False, raise KeyError

3218 convert_ofs_delta: Whether to convert OFS_DELTA objects

3219

3220 Returns:

3221 Iterator of unpacked objects

3222

3223 Raises:

3224 KeyError: If an object is missing and allow_missing is False

3225 """

3226 todo: set[ObjectID | RawObjectID] = set(shas)

3227 for b in self.bases:

3228 for o in b.iter_unpacked_subset(

3229 todo,

3230 include_comp=include_comp,

3231 allow_missing=True,

3232 convert_ofs_delta=convert_ofs_delta,

3233 ):

3234 yield o

3235 todo.remove(o.sha())

3236 if todo and not allow_missing:

3237 raise KeyError(next(iter(todo)))

3238

3239 def get_raw(self, sha_id: ObjectID | RawObjectID) -> tuple[int, bytes]:

3240 """Get the raw object data from the overlaid stores.

3241

3242 Args:

3243 sha_id: SHA of the object

3244

3245 Returns:

3246 Tuple of (type_num, raw_data)

3247

3248 Raises:

3249 KeyError: If object not found in any base store

3250 """

3251 for b in self.bases:

3252 try:

3253 return b.get_raw(sha_id)

3254 except KeyError:

3255 pass

3256 raise KeyError(sha_id)

3257

3258 def contains_packed(self, sha: ObjectID | RawObjectID) -> bool:

3259 """Check if an object is packed in any base store.

3260

3261 Args:

3262 sha: SHA of the object

3263

3264 Returns:

3265 True if object is packed in any base store

3266 """

3267 for b in self.bases:

3268 if b.contains_packed(sha):

3269 return True

3270 return False

3271

3272 def contains_loose(self, sha: ObjectID | RawObjectID) -> bool:

3273 """Check if an object is loose in any base store.

3274

3275 Args:

3276 sha: SHA of the object

3277

3278 Returns:

3279 True if object is loose in any base store

3280 """

3281 for b in self.bases:

3282 if b.contains_loose(sha):

3283 return True

3284 return False

3285

3286

3287def read_packs_file(f: BinaryIO) -> Iterator[str]:

3288 """Yield the packs listed in a packs file."""

3289 for line in f.read().splitlines():

3290 if not line:

3291 continue

3292 (kind, name) = line.split(b" ", 1)

3293 if kind != b"P":

3294 continue

3295 yield os.fsdecode(name)

3296

3297

3298class BucketBasedObjectStore(PackBasedObjectStore):

3299 """Object store implementation that uses a bucket store like S3 as backend."""

3300

3301 def _iter_loose_objects(self) -> Iterator[ObjectID]:

3302 """Iterate over the SHAs of all loose objects."""

3303 return iter([])

3304

3305 def _get_loose_object(self, sha: ObjectID | RawObjectID) -> None:

3306 return None

3307

3308 def delete_loose_object(self, sha: ObjectID) -> None:

3309 """Delete a loose object (no-op for bucket stores).

3310

3311 Bucket-based stores don't have loose objects, so this is a no-op.

3312

3313 Args:

3314 sha: SHA of the object to delete

3315 """

3316 # Doesn't exist..

3317

3318 def pack_loose_objects(self, progress: Callable[[str], None] | None = None) -> int:

3319 """Pack loose objects. Returns number of objects packed.

3320

3321 BucketBasedObjectStore doesn't support loose objects, so this is a no-op.

3322

3323 Args:

3324 progress: Optional progress reporting callback (ignored)

3325 """

3326 return 0

3327

3328 def _remove_pack_by_name(self, name: str) -> None:

3329 """Remove a pack by name. Subclasses should implement this."""

3330 raise NotImplementedError(self._remove_pack_by_name)

3331

3332 def _iter_pack_names(self) -> Iterator[str]:

3333 raise NotImplementedError(self._iter_pack_names)

3334

3335 def _get_pack(self, name: str) -> Pack:

3336 raise NotImplementedError(self._get_pack)

3337

3338 def _update_pack_cache(self) -> list[Pack]:

3339 pack_files = set(self._iter_pack_names())

3340

3341 # Open newly appeared pack files

3342 new_packs = []

3343 for f in pack_files:

3344 if f not in self._pack_cache:

3345 pack = self._get_pack(f)

3346 new_packs.append(pack)

3347 self._pack_cache[f] = pack

3348 # Remove disappeared pack files

3349 for f in set(self._pack_cache) - pack_files:

3350 self._pack_cache.pop(f).close()

3351 return new_packs

3352

3353 def _upload_pack(

3354 self, basename: str, pack_file: BinaryIO, index_file: BinaryIO

3355 ) -> None:

3356 raise NotImplementedError

3357

3358 def add_pack(self) -> tuple[BinaryIO, Callable[[], None], Callable[[], None]]:

3359 """Add a new pack to this object store.

3360

3361 Returns: Fileobject to write to, a commit function to

3362 call when the pack is finished and an abort

3363 function.

3364 """

3365 import tempfile

3366

3367 pf = tempfile.SpooledTemporaryFile(

3368 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"

3369 )

3370

3371 def commit() -> Pack | None:

3372 if pf.tell() == 0:

3373 pf.close()

3374 return None

3375

3376 pf.seek(0)

3377

3378 p = PackData(pf.name, file=pf, object_format=self.object_format)

3379 entries = p.sorted_entries()

3380 basename = iter_sha1(entry[0] for entry in entries).decode("ascii")

3381 idxf = tempfile.SpooledTemporaryFile(

3382 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"

3383 )

3384 checksum = p.get_stored_checksum()

3385 write_pack_index(idxf, entries, checksum, version=self.pack_index_version)

3386 idxf.seek(0)

3387 idx = load_pack_index_file(basename + ".idx", idxf, self.object_format)

3388 for pack in self.packs:

3389 if pack.get_stored_checksum() == p.get_stored_checksum():

3390 p.close()

3391 idx.close()

3392 pf.close()

3393 idxf.close()

3394 return pack

3395 pf.seek(0)

3396 idxf.seek(0)

3397 self._upload_pack(basename, pf, idxf) # type: ignore[arg-type]

3398 final_pack = Pack.from_objects(p, idx)

3399 self._add_cached_pack(basename, final_pack)

3400 pf.close()

3401 idxf.close()

3402 return final_pack

3403

3404 return pf, commit, pf.close # type: ignore[return-value]

3405

3406

3407def _collect_ancestors(

3408 store: ObjectContainer,

3409 heads: Iterable[ObjectID],

3410 common: frozenset[ObjectID] = frozenset(),

3411 shallow: frozenset[ObjectID] = frozenset(),

3412 get_parents: Callable[[Commit], list[ObjectID]] = lambda commit: commit.parents,

3413) -> tuple[set[ObjectID], set[ObjectID]]:

3414 """Collect all ancestors of heads up to (excluding) those in common.

3415

3416 Args:

3417 store: Object store to get commits from

3418 heads: commits to start from

3419 common: commits to end at, or empty set to walk repository

3420 completely

3421 shallow: Set of shallow commits

3422 get_parents: Optional function for getting the parents of a

3423 commit.

3424 Returns: a tuple (A, B) where A - all commits reachable

3425 from heads but not present in common, B - common (shared) elements

3426 that are directly reachable from heads

3427 """

3428 bases = set()

3429 commits = set()

3430 queue: list[ObjectID] = []

3431 queue.extend(heads)

3432

3433 # Try to use commit graph if available

3434 commit_graph = store.get_commit_graph()

3435

3436 while queue:

3437 e = queue.pop(0)

3438 if e in common:

3439 bases.add(e)

3440 elif e not in commits:

3441 commits.add(e)

3442 if e in shallow:

3443 continue

3444

3445 # Try to use commit graph for parent lookup

3446 parents = None

3447 if commit_graph:

3448 parents = commit_graph.get_parents(e)

3449

3450 if parents is None:

3451 # Fall back to loading the object

3452 cmt = store[e]

3453 assert isinstance(cmt, Commit)

3454 parents = get_parents(cmt)

3455

3456 queue.extend(parents)

3457 return (commits, bases)

3458

3459

3460def iter_tree_contents(

3461 store: ObjectContainer, tree_id: ObjectID | None, *, include_trees: bool = False

3462) -> Iterator[TreeEntry]:

3463 """Iterate the contents of a tree and all subtrees.

3464

3465 Iteration is depth-first pre-order, as in e.g. os.walk.

3466

3467 Args:

3468 store: Object store to get trees from

3469 tree_id: SHA1 of the tree.

3470 include_trees: If True, include tree objects in the iteration.

3471

3472 Yields: TreeEntry namedtuples for all the objects in a tree.

3473 """

3474 if tree_id is None:

3475 return

3476 # This could be fairly easily generalized to >2 trees if we find a use

3477 # case.

3478 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]

3479 while todo:

3480 entry = todo.pop()

3481 assert entry.mode is not None

3482 if stat.S_ISDIR(entry.mode):

3483 extra = []

3484 assert entry.sha is not None

3485 tree = store[entry.sha]

3486 assert isinstance(tree, Tree)

3487 for subentry in tree.iteritems(name_order=True):

3488 assert entry.path is not None

3489 extra.append(subentry.in_path(entry.path))

3490 todo.extend(reversed(extra))

3491 if not stat.S_ISDIR(entry.mode) or include_trees:

3492 yield entry

3493

3494

3495def iter_commit_contents(

3496 store: ObjectContainer,

3497 commit: Commit | ObjectID | RawObjectID,

3498 *,

3499 include: Sequence[str | bytes | Path] | None = None,

3500) -> Iterator[TreeEntry]:

3501 """Iterate the contents of the repository at the specified commit.

3502

3503 This is a wrapper around iter_tree_contents() and

3504 tree_lookup_path() to simplify the common task of getting the

3505 contest of a repo at a particular commit. See also

3506 dulwich.index.build_file_from_blob() for writing individual files

3507 to disk.

3508

3509 Args:

3510 store: Object store to get trees from

3511 commit: Commit object, or SHA1 of a commit

3512 include: if provided, only the entries whose paths are in the

3513 list, or whose parent tree is in the list, will be

3514 included. Note that duplicate or overlapping paths

3515 (e.g. ["foo", "foo/bar"]) may result in duplicate entries

3516

3517 Yields: TreeEntry namedtuples for all matching files in a commit.

3518 """

3519 sha = commit.id if isinstance(commit, Commit) else commit

3520 if not isinstance(obj := store[sha], Commit):

3521 raise TypeError(

3522 f"{sha.decode('ascii')} should be ID of a Commit, but is {type(obj)}"

3523 )

3524 commit = obj

3525 encoding = commit.encoding or "utf-8"

3526 include_bytes: list[bytes] = (

3527 [

3528 path if isinstance(path, bytes) else str(path).encode(encoding)

3529 for path in include

3530 ]

3531 if include is not None

3532 else [b""]

3533 )

3534

3535 for path in include_bytes:

3536 mode, obj_id = tree_lookup_path(store.__getitem__, commit.tree, path)

3537 # Iterate all contained files if path points to a dir, otherwise just get that

3538 # single file

3539 if isinstance(store[obj_id], Tree):

3540 for entry in iter_tree_contents(store, obj_id):

3541 yield entry.in_path(path)

3542 else:

3543 yield TreeEntry(path, mode, obj_id)

3544

3545

3546def peel_sha(

3547 store: ObjectContainer, sha: ObjectID | RawObjectID

3548) -> tuple[ShaFile, ShaFile]:

3549 """Peel all tags from a SHA.

3550

3551 Args:

3552 store: Object store to get objects from

3553 sha: The object SHA to peel.

3554 Returns: The fully-peeled SHA1 of a tag object, after peeling all

3555 intermediate tags; if the original ref does not point to a tag,

3556 this will equal the original SHA1.

3557 """

3558 unpeeled = obj = store[sha]

3559 obj_class = object_class(obj.type_name)

3560 while obj_class is Tag:

3561 assert isinstance(obj, Tag)

3562 obj_class, sha = obj.object

3563 obj = store[sha]

3564 return unpeeled, obj

3565

3566

3567class GraphTraversalReachability:

3568 """Naive graph traversal implementation of ObjectReachabilityProvider.

3569

3570 This implementation wraps existing graph traversal functions

3571 (_collect_ancestors, _collect_filetree_revs) to provide the standard

3572 reachability interface without any performance optimizations.

3573 """

3574

3575 def __init__(self, object_store: BaseObjectStore) -> None:

3576 """Initialize the graph traversal provider.

3577

3578 Args:

3579 object_store: Object store to query

3580 """

3581 self.store = object_store

3582

3583 def get_reachable_commits(

3584 self,

3585 heads: Iterable[ObjectID],

3586 exclude: Iterable[ObjectID] | None = None,

3587 shallow: Set[ObjectID] | None = None,

3588 ) -> set[ObjectID]:

3589 """Get all commits reachable from heads, excluding those in exclude.

3590

3591 Uses _collect_ancestors for commit traversal.

3592

3593 Args:

3594 heads: Starting commit SHAs

3595 exclude: Commit SHAs to exclude (and their ancestors)

3596 shallow: Set of shallow commit boundaries

3597

3598 Returns:

3599 Set of commit SHAs reachable from heads but not from exclude

3600 """

3601 exclude_set = frozenset(exclude) if exclude else frozenset()

3602 shallow_set = frozenset(shallow) if shallow else frozenset()

3603 commits, _bases = _collect_ancestors(

3604 self.store, heads, exclude_set, shallow_set

3605 )

3606 return commits

3607

3608 def get_tree_objects(

3609 self,

3610 tree_shas: Iterable[ObjectID],

3611 ) -> set[ObjectID]:

3612 """Get all trees and blobs reachable from the given trees.

3613

3614 Uses _collect_filetree_revs for tree traversal.

3615

3616 Args:

3617 tree_shas: Starting tree SHAs

3618

3619 Returns:

3620 Set of tree and blob SHAs

3621 """

3622 result: set[ObjectID] = set()

3623 for tree_sha in tree_shas:

3624 _collect_filetree_revs(self.store, tree_sha, result)

3625 return result

3626

3627 def get_reachable_objects(

3628 self,

3629 commits: Iterable[ObjectID],

3630 exclude_commits: Iterable[ObjectID] | None = None,

3631 ) -> set[ObjectID]:

3632 """Get all objects (commits + trees + blobs) reachable from commits.

3633

3634 Args:

3635 commits: Starting commit SHAs

3636 exclude_commits: Commits whose objects should be excluded

3637

3638 Returns:

3639 Set of all object SHAs (commits, trees, blobs)

3640 """

3641 commits_set = set(commits)

3642 result = set(commits_set)

3643

3644 # Get trees for all commits

3645 tree_shas = []

3646 for commit_sha in commits_set:

3647 try:

3648 commit = self.store[commit_sha]

3649 if isinstance(commit, Commit):

3650 tree_shas.append(commit.tree)

3651 except KeyError:

3652 # Commit not in store, skip

3653 continue

3654

3655 # Collect all tree/blob objects

3656 result.update(self.get_tree_objects(tree_shas))

3657

3658 # Exclude objects from exclude_commits if needed

3659 if exclude_commits:

3660 exclude_objects = self.get_reachable_objects(exclude_commits, None)

3661 result -= exclude_objects

3662

3663 return result

3664

3665

3666class BitmapReachability:

3667 """Bitmap-accelerated implementation of ObjectReachabilityProvider.

3668

3669 This implementation uses packfile bitmap indexes where available to

3670 accelerate reachability queries. Falls back to graph traversal when

3671 bitmaps don't cover the requested commits.

3672 """

3673

3674 def __init__(self, object_store: "PackBasedObjectStore") -> None:

3675 """Initialize the bitmap provider.

3676

3677 Args:

3678 object_store: Pack-based object store with bitmap support

3679 """

3680 self.store = object_store

3681 # Fallback to graph traversal for operations not yet optimized

3682 self._fallback = GraphTraversalReachability(object_store)

3683

3684 def _combine_commit_bitmaps(

3685 self,

3686 commit_shas: set[ObjectID],

3687 exclude_shas: set[ObjectID] | None = None,

3688 ) -> tuple["EWAHBitmap", "Pack"] | None:

3689 """Combine bitmaps for multiple commits using OR, with optional exclusion.

3690

3691 Args:

3692 commit_shas: Set of commit SHAs to combine

3693 exclude_shas: Optional set of commit SHAs to exclude

3694

3695 Returns:

3696 Tuple of (combined_bitmap, pack) or None if bitmaps unavailable

3697 """

3698 from .bitmap import find_commit_bitmaps

3699

3700 # Find bitmaps for the commits

3701 commit_bitmaps = find_commit_bitmaps(commit_shas, self.store.packs)

3702

3703 # If we can't find bitmaps for all commits, return None

3704 if len(commit_bitmaps) < len(commit_shas):

3705 return None

3706

3707 # Combine bitmaps using OR

3708 combined_bitmap = None

3709 result_pack = None

3710

3711 for commit_sha in commit_shas:

3712 pack, pack_bitmap, _sha_to_pos = commit_bitmaps[commit_sha]

3713 commit_bitmap = pack_bitmap.get_bitmap(commit_sha)

3714

3715 if commit_bitmap is None:

3716 return None

3717

3718 if combined_bitmap is None:

3719 combined_bitmap = commit_bitmap

3720 result_pack = pack

3721 elif pack == result_pack:

3722 # Same pack, can OR directly

3723 combined_bitmap = combined_bitmap | commit_bitmap

3724 else:

3725 # Different packs, can't combine

3726 return None

3727

3728 # Handle exclusions if provided

3729 if exclude_shas and result_pack and combined_bitmap:

3730 exclude_bitmaps = find_commit_bitmaps(exclude_shas, [result_pack])

3731

3732 if len(exclude_bitmaps) == len(exclude_shas):

3733 # All excludes have bitmaps, compute exclusion

3734 exclude_combined = None

3735

3736 for commit_sha in exclude_shas:

3737 _pack, pack_bitmap, _sha_to_pos = exclude_bitmaps[commit_sha]

3738 exclude_bitmap = pack_bitmap.get_bitmap(commit_sha)

3739

3740 if exclude_bitmap is None:

3741 break

3742

3743 if exclude_combined is None:

3744 exclude_combined = exclude_bitmap

3745 else:

3746 exclude_combined = exclude_combined | exclude_bitmap

3747

3748 # Subtract excludes using set difference

3749 if exclude_combined:

3750 combined_bitmap = combined_bitmap - exclude_combined

3751

3752 if combined_bitmap and result_pack:

3753 return (combined_bitmap, result_pack)

3754 return None

3755

3756 def get_reachable_commits(

3757 self,

3758 heads: Iterable[ObjectID],

3759 exclude: Iterable[ObjectID] | None = None,

3760 shallow: Set[ObjectID] | None = None,

3761 ) -> set[ObjectID]:

3762 """Get all commits reachable from heads using bitmaps where possible.

3763

3764 Args:

3765 heads: Starting commit SHAs

3766 exclude: Commit SHAs to exclude (and their ancestors)

3767 shallow: Set of shallow commit boundaries

3768

3769 Returns:

3770 Set of commit SHAs reachable from heads but not from exclude

3771 """

3772 from .bitmap import bitmap_to_object_shas

3773

3774 # If shallow is specified, fall back to graph traversal

3775 # (bitmaps don't support shallow boundaries well)

3776 if shallow:

3777 return self._fallback.get_reachable_commits(heads, exclude, shallow)

3778

3779 heads_set = set(heads)

3780 exclude_set = set(exclude) if exclude else None

3781

3782 # Try to combine bitmaps

3783 result = self._combine_commit_bitmaps(heads_set, exclude_set)

3784 if result is None:

3785 return self._fallback.get_reachable_commits(heads, exclude, shallow)

3786

3787 combined_bitmap, result_pack = result

3788

3789 # Convert bitmap to commit SHAs, filtering for commits only

3790 pack_bitmap = result_pack.bitmap

3791 if pack_bitmap is None:

3792 return self._fallback.get_reachable_commits(heads, exclude, shallow)

3793 commit_type_filter = pack_bitmap.commit_bitmap

3794 return bitmap_to_object_shas(

3795 combined_bitmap, result_pack.index, commit_type_filter

3796 )

3797

3798 def get_tree_objects(

3799 self,

3800 tree_shas: Iterable[ObjectID],

3801 ) -> set[ObjectID]:

3802 """Get all trees and blobs reachable from the given trees.

3803

3804 Args:

3805 tree_shas: Starting tree SHAs

3806

3807 Returns:

3808 Set of tree and blob SHAs

3809 """

3810 # Tree traversal doesn't benefit much from bitmaps, use fallback

3811 return self._fallback.get_tree_objects(tree_shas)

3812

3813 def get_reachable_objects(

3814 self,

3815 commits: Iterable[ObjectID],

3816 exclude_commits: Iterable[ObjectID] | None = None,

3817 ) -> set[ObjectID]:

3818 """Get all objects reachable from commits using bitmaps.

3819

3820 Args:

3821 commits: Starting commit SHAs

3822 exclude_commits: Commits whose objects should be excluded

3823

3824 Returns:

3825 Set of all object SHAs (commits, trees, blobs)

3826 """

3827 from .bitmap import bitmap_to_object_shas

3828

3829 commits_set = set(commits)

3830 exclude_set = set(exclude_commits) if exclude_commits else None

3831

3832 # Try to combine bitmaps

3833 result = self._combine_commit_bitmaps(commits_set, exclude_set)

3834 if result is None:

3835 return self._fallback.get_reachable_objects(commits, exclude_commits)

3836

3837 combined_bitmap, result_pack = result

3838

3839 # Convert bitmap to all object SHAs (no type filter)

3840 return bitmap_to_object_shas(combined_bitmap, result_pack.index, None)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 20%

1516 statements