Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object

1# object_store.py -- Object store for git objects

3# and others

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU

7# General Public License as public by the Free Software Foundation; version 2.0

8# or (at your option) any later version. You can redistribute it and/or

9# modify it under the terms of either of these two licenses.

10#

11# Unless required by applicable law or agreed to in writing, software

12# distributed under the License is distributed on an "AS IS" BASIS,

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14# See the License for the specific language governing permissions and

15# limitations under the License.

16#

17# You should have received a copy of the licenses; if not, see

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache

20# License, Version 2.0.

21#

24"""Git object store interfaces and implementation."""

26import binascii

27import os

28import stat

29import sys

30import time

31import warnings

32from collections.abc import Iterable, Iterator, Sequence

33from contextlib import suppress

34from io import BytesIO

35from typing import (

36 Callable,

37 Optional,

38 Protocol,

39 Union,

40 cast,

41)

43from .errors import NotTreeError

44from .file import GitFile

45from .objects import (

46 S_ISGITLINK,

47 ZERO_SHA,

48 Blob,

49 Commit,

50 ObjectID,

51 ShaFile,

52 Tag,

53 Tree,

54 TreeEntry,

55 hex_to_filename,

56 hex_to_sha,

57 object_class,

58 sha_to_hex,

59 valid_hexsha,

60)

61from .pack import (

62 PACK_SPOOL_FILE_MAX_SIZE,

63 ObjectContainer,

64 Pack,

65 PackData,

66 PackedObjectContainer,

67 PackFileDisappeared,

68 PackHint,

69 PackIndexer,

70 PackInflater,

71 PackStreamCopier,

72 UnpackedObject,

73 extend_pack,

74 full_unpacked_object,

75 generate_unpacked_objects,

76 iter_sha1,

77 load_pack_index_file,

78 pack_objects_to_data,

79 write_pack_data,

80 write_pack_index,

81)

82from .protocol import DEPTH_INFINITE

83from .refs import PEELED_TAG_SUFFIX, Ref

85INFODIR = "info"

86PACKDIR = "pack"

88# use permissions consistent with Git; just readable by everyone

89# TODO: should packs also be non-writable on Windows? if so, that

90# would requite some rather significant adjustments to the test suite

91PACK_MODE = 0o444 if sys.platform != "win32" else 0o644

93# Grace period for cleaning up temporary pack files (in seconds)

94# Matches git's default of 2 weeks

95DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks

98def find_shallow(store, heads, depth):

99 """Find shallow commits according to a given depth.

100

101 Args:

102 store: An ObjectStore for looking up objects.

103 heads: Iterable of head SHAs to start walking from.

104 depth: The depth of ancestors to include. A depth of one includes

105 only the heads themselves.

106 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be

107 considered shallow and unshallow according to the arguments. Note that

108 these sets may overlap if a commit is reachable along multiple paths.

109 """

110 parents = {}

111 commit_graph = store.get_commit_graph()

112

113 def get_parents(sha):

114 result = parents.get(sha, None)

115 if not result:

116 # Try to use commit graph first if available

117 if commit_graph:

118 graph_parents = commit_graph.get_parents(sha)

119 if graph_parents is not None:

120 result = graph_parents

121 parents[sha] = result

122 return result

123 # Fall back to loading the object

124 result = store[sha].parents

125 parents[sha] = result

126 return result

127

128 todo = [] # stack of (sha, depth)

129 for head_sha in heads:

130 obj = store[head_sha]

131 # Peel tags if necessary

132 while isinstance(obj, Tag):

133 _, sha = obj.object

134 obj = store[sha]

135 if isinstance(obj, Commit):

136 todo.append((obj.id, 1))

137

138 not_shallow = set()

139 shallow = set()

140 while todo:

141 sha, cur_depth = todo.pop()

142 if cur_depth < depth:

143 not_shallow.add(sha)

144 new_depth = cur_depth + 1

145 todo.extend((p, new_depth) for p in get_parents(sha))

146 else:

147 shallow.add(sha)

148

149 return shallow, not_shallow

150

151

152def get_depth(

153 store,

154 head,

155 get_parents=lambda commit: commit.parents,

156 max_depth=None,

157):

158 """Return the current available depth for the given head.

159 For commits with multiple parents, the largest possible depth will be

160 returned.

161

162 Args:

163 head: commit to start from

164 get_parents: optional function for getting the parents of a commit

165 max_depth: maximum depth to search

166 """

167 if head not in store:

168 return 0

169 current_depth = 1

170 queue = [(head, current_depth)]

171 commit_graph = store.get_commit_graph()

172

173 while queue and (max_depth is None or current_depth < max_depth):

174 e, depth = queue.pop(0)

175 current_depth = max(current_depth, depth)

176

177 # Try to use commit graph for parent lookup if available

178 parents = None

179 if commit_graph:

180 parents = commit_graph.get_parents(e)

181

182 if parents is None:

183 # Fall back to loading the object

184 cmt = store[e]

185 if isinstance(cmt, Tag):

186 _cls, sha = cmt.object

187 cmt = store[sha]

188 parents = get_parents(cmt)

189

190 queue.extend((parent, depth + 1) for parent in parents if parent in store)

191 return current_depth

192

193

194class PackContainer(Protocol):

195 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:

196 """Add a new pack."""

197

198

199class BaseObjectStore:

200 """Object store interface."""

201

202 def determine_wants_all(

203 self, refs: dict[Ref, ObjectID], depth: Optional[int] = None

204 ) -> list[ObjectID]:

205 def _want_deepen(sha):

206 if not depth:

207 return False

208 if depth == DEPTH_INFINITE:

209 return True

210 return depth > self._get_depth(sha)

211

212 return [

213 sha

214 for (ref, sha) in refs.items()

215 if (sha not in self or _want_deepen(sha))

216 and not ref.endswith(PEELED_TAG_SUFFIX)

217 and not sha == ZERO_SHA

218 ]

219

220 def contains_loose(self, sha) -> bool:

221 """Check if a particular object is present by SHA1 and is loose."""

222 raise NotImplementedError(self.contains_loose)

223

224 def __contains__(self, sha1: bytes) -> bool:

225 """Check if a particular object is present by SHA1.

226

227 This method makes no distinction between loose and packed objects.

228 """

229 return self.contains_loose(sha1)

230

231 @property

232 def packs(self):

233 """Iterable of pack objects."""

234 raise NotImplementedError

235

236 def get_raw(self, name) -> tuple[int, bytes]:

237 """Obtain the raw text for an object.

238

239 Args:

240 name: sha for the object.

241 Returns: tuple with numeric type and object contents.

242 """

243 raise NotImplementedError(self.get_raw)

244

245 def __getitem__(self, sha1: ObjectID) -> ShaFile:

246 """Obtain an object by SHA1."""

247 type_num, uncomp = self.get_raw(sha1)

248 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1)

249

250 def __iter__(self):

251 """Iterate over the SHAs that are present in this store."""

252 raise NotImplementedError(self.__iter__)

253

254 def add_object(self, obj) -> None:

255 """Add a single object to this object store."""

256 raise NotImplementedError(self.add_object)

257

258 def add_objects(self, objects, progress=None) -> None:

259 """Add a set of objects to this object store.

260

261 Args:

262 objects: Iterable over a list of (object, path) tuples

263 """

264 raise NotImplementedError(self.add_objects)

265

266 def tree_changes(

267 self,

268 source,

269 target,

270 want_unchanged=False,

271 include_trees=False,

272 change_type_same=False,

273 rename_detector=None,

274 paths=None,

275 ):

276 """Find the differences between the contents of two trees.

277

278 Args:

279 source: SHA1 of the source tree

280 target: SHA1 of the target tree

281 want_unchanged: Whether unchanged files should be reported

282 include_trees: Whether to include trees

283 change_type_same: Whether to report files changing

284 type in the same entry.

285 rename_detector: RenameDetector object for detecting renames.

286 paths: Optional list of paths to filter to (as bytes).

287 Returns: Iterator over tuples with

288 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)

289 """

290 from .diff_tree import tree_changes

291

292 for change in tree_changes(

293 self,

294 source,

295 target,

296 want_unchanged=want_unchanged,

297 include_trees=include_trees,

298 change_type_same=change_type_same,

299 rename_detector=rename_detector,

300 paths=paths,

301 ):

302 yield (

303 (change.old.path, change.new.path),

304 (change.old.mode, change.new.mode),

305 (change.old.sha, change.new.sha),

306 )

307

308 def iter_tree_contents(self, tree_id, include_trees=False):

309 """Iterate the contents of a tree and all subtrees.

310

311 Iteration is depth-first pre-order, as in e.g. os.walk.

312

313 Args:

314 tree_id: SHA1 of the tree.

315 include_trees: If True, include tree objects in the iteration.

316 Returns: Iterator over TreeEntry namedtuples for all the objects in a

317 tree.

318 """

319 warnings.warn(

320 "Please use dulwich.object_store.iter_tree_contents",

321 DeprecationWarning,

322 stacklevel=2,

323 )

324 return iter_tree_contents(self, tree_id, include_trees=include_trees)

325

326 def iterobjects_subset(

327 self, shas: Iterable[bytes], *, allow_missing: bool = False

328 ) -> Iterator[ShaFile]:

329 for sha in shas:

330 try:

331 yield self[sha]

332 except KeyError:

333 if not allow_missing:

334 raise

335

336 def find_missing_objects(

337 self,

338 haves,

339 wants,

340 shallow=None,

341 progress=None,

342 get_tagged=None,

343 get_parents=lambda commit: commit.parents,

344 ):

345 """Find the missing objects required for a set of revisions.

346

347 Args:

348 haves: Iterable over SHAs already in common.

349 wants: Iterable over SHAs of objects to fetch.

350 shallow: Set of shallow commit SHA1s to skip

351 progress: Simple progress function that will be called with

352 updated progress strings.

353 get_tagged: Function that returns a dict of pointed-to sha ->

354 tag sha for including tags.

355 get_parents: Optional function for getting the parents of a

356 commit.

357 Returns: Iterator over (sha, path) pairs.

358 """

359 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning)

360 finder = MissingObjectFinder(

361 self,

362 haves=haves,

363 wants=wants,

364 shallow=shallow,

365 progress=progress,

366 get_tagged=get_tagged,

367 get_parents=get_parents,

368 )

369 return iter(finder)

370

371 def find_common_revisions(self, graphwalker):

372 """Find which revisions this store has in common using graphwalker.

373

374 Args:

375 graphwalker: A graphwalker object.

376 Returns: List of SHAs that are in common

377 """

378 haves = []

379 sha = next(graphwalker)

380 while sha:

381 if sha in self:

382 haves.append(sha)

383 graphwalker.ack(sha)

384 sha = next(graphwalker)

385 return haves

386

387 def generate_pack_data(

388 self, have, want, shallow=None, progress=None, ofs_delta=True

389 ) -> tuple[int, Iterator[UnpackedObject]]:

390 """Generate pack data objects for a set of wants/haves.

391

392 Args:

393 have: List of SHA1s of objects that should not be sent

394 want: List of SHA1s of objects that should be sent

395 shallow: Set of shallow commit SHA1s to skip

396 ofs_delta: Whether OFS deltas can be included

397 progress: Optional progress reporting method

398 """

399 # Note that the pack-specific implementation below is more efficient,

400 # as it reuses deltas

401 missing_objects = MissingObjectFinder(

402 self, haves=have, wants=want, shallow=shallow, progress=progress

403 )

404 object_ids = list(missing_objects)

405 return pack_objects_to_data(

406 [(self[oid], path) for oid, path in object_ids],

407 ofs_delta=ofs_delta,

408 progress=progress,

409 )

410

411 def peel_sha(self, sha):

412 """Peel all tags from a SHA.

413

414 Args:

415 sha: The object SHA to peel.

416 Returns: The fully-peeled SHA1 of a tag object, after peeling all

417 intermediate tags; if the original ref does not point to a tag,

418 this will equal the original SHA1.

419 """

420 warnings.warn(

421 "Please use dulwich.object_store.peel_sha()",

422 DeprecationWarning,

423 stacklevel=2,

424 )

425 return peel_sha(self, sha)[1]

426

427 def _get_depth(

428 self,

429 head,

430 get_parents=lambda commit: commit.parents,

431 max_depth=None,

432 ):

433 """Return the current available depth for the given head.

434 For commits with multiple parents, the largest possible depth will be

435 returned.

436

437 Args:

438 head: commit to start from

439 get_parents: optional function for getting the parents of a commit

440 max_depth: maximum depth to search

441 """

442 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth)

443

444 def close(self) -> None:

445 """Close any files opened by this object store."""

446 # Default implementation is a NO-OP

447

448 def prune(self, grace_period: Optional[int] = None) -> None:

449 """Prune/clean up this object store.

450

451 This includes removing orphaned temporary files and other

452 housekeeping tasks. Default implementation is a NO-OP.

453

454 Args:

455 grace_period: Grace period in seconds for removing temporary files.

456 If None, uses the default grace period.

457 """

458 # Default implementation is a NO-OP

459

460 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:

461 """Iterate over all SHA1s that start with a given prefix.

462

463 The default implementation is a naive iteration over all objects.

464 However, subclasses may override this method with more efficient

465 implementations.

466 """

467 for sha in self:

468 if sha.startswith(prefix):

469 yield sha

470

471 def get_commit_graph(self):

472 """Get the commit graph for this object store.

473

474 Returns:

475 CommitGraph object if available, None otherwise

476 """

477 return None

478

479 def write_commit_graph(self, refs=None, reachable=True) -> None:

480 """Write a commit graph file for this object store.

481

482 Args:

483 refs: List of refs to include. If None, includes all refs from object store.

484 reachable: If True, includes all commits reachable from refs.

485 If False, only includes the direct ref targets.

486

487 Note:

488 Default implementation does nothing. Subclasses should override

489 this method to provide commit graph writing functionality.

490 """

491 raise NotImplementedError(self.write_commit_graph)

492

493 def get_object_mtime(self, sha):

494 """Get the modification time of an object.

495

496 Args:

497 sha: SHA1 of the object

498

499 Returns:

500 Modification time as seconds since epoch

501

502 Raises:

503 KeyError: if the object is not found

504 """

505 # Default implementation raises KeyError

506 # Subclasses should override to provide actual mtime

507 raise KeyError(sha)

508

509

510class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer):

511 def __init__(self, pack_compression_level=-1, pack_index_version=None) -> None:

512 self._pack_cache: dict[str, Pack] = {}

513 self.pack_compression_level = pack_compression_level

514 self.pack_index_version = pack_index_version

515

516 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:

517 """Add a new pack to this object store."""

518 raise NotImplementedError(self.add_pack)

519

520 def add_pack_data(

521 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None

522 ) -> None:

523 """Add pack data to this object store.

524

525 Args:

526 count: Number of items to add

527 """

528 if count == 0:

529 # Don't bother writing an empty pack file

530 return

531 f, commit, abort = self.add_pack()

532 try:

533 write_pack_data(

534 f.write,

535 unpacked_objects,

536 num_records=count,

537 progress=progress,

538 compression_level=self.pack_compression_level,

539 )

540 except BaseException:

541 abort()

542 raise

543 else:

544 return commit()

545

546 @property

547 def alternates(self):

548 return []

549

550 def contains_packed(self, sha) -> bool:

551 """Check if a particular object is present by SHA1 and is packed.

552

553 This does not check alternates.

554 """

555 for pack in self.packs:

556 try:

557 if sha in pack:

558 return True

559 except PackFileDisappeared:

560 pass

561 return False

562

563 def __contains__(self, sha) -> bool:

564 """Check if a particular object is present by SHA1.

565

566 This method makes no distinction between loose and packed objects.

567 """

568 if self.contains_packed(sha) or self.contains_loose(sha):

569 return True

570 for alternate in self.alternates:

571 if sha in alternate:

572 return True

573 return False

574

575 def _add_cached_pack(self, base_name, pack) -> None:

576 """Add a newly appeared pack to the cache by path."""

577 prev_pack = self._pack_cache.get(base_name)

578 if prev_pack is not pack:

579 self._pack_cache[base_name] = pack

580 if prev_pack:

581 prev_pack.close()

582

583 def generate_pack_data(

584 self, have, want, shallow=None, progress=None, ofs_delta=True

585 ) -> tuple[int, Iterator[UnpackedObject]]:

586 """Generate pack data objects for a set of wants/haves.

587

588 Args:

589 have: List of SHA1s of objects that should not be sent

590 want: List of SHA1s of objects that should be sent

591 shallow: Set of shallow commit SHA1s to skip

592 ofs_delta: Whether OFS deltas can be included

593 progress: Optional progress reporting method

594 """

595 missing_objects = MissingObjectFinder(

596 self, haves=have, wants=want, shallow=shallow, progress=progress

597 )

598 remote_has = missing_objects.get_remote_has()

599 object_ids = list(missing_objects)

600 return len(object_ids), generate_unpacked_objects(

601 cast(PackedObjectContainer, self),

602 object_ids,

603 progress=progress,

604 ofs_delta=ofs_delta,

605 other_haves=remote_has,

606 )

607

608 def _clear_cached_packs(self) -> None:

609 pack_cache = self._pack_cache

610 self._pack_cache = {}

611 while pack_cache:

612 (name, pack) = pack_cache.popitem()

613 pack.close()

614

615 def _iter_cached_packs(self):

616 return self._pack_cache.values()

617

618 def _update_pack_cache(self) -> list[Pack]:

619 raise NotImplementedError(self._update_pack_cache)

620

621 def close(self) -> None:

622 self._clear_cached_packs()

623

624 @property

625 def packs(self):

626 """List with pack objects."""

627 return list(self._iter_cached_packs()) + list(self._update_pack_cache())

628

629 def count_pack_files(self) -> int:

630 """Count the number of pack files.

631

632 Returns:

633 Number of pack files (excluding those with .keep files)

634 """

635 count = 0

636 for pack in self.packs:

637 # Check if there's a .keep file for this pack

638 keep_path = pack._basename + ".keep"

639 if not os.path.exists(keep_path):

640 count += 1

641 return count

642

643 def _iter_alternate_objects(self):

644 """Iterate over the SHAs of all the objects in alternate stores."""

645 for alternate in self.alternates:

646 yield from alternate

647

648 def _iter_loose_objects(self):

649 """Iterate over the SHAs of all loose objects."""

650 raise NotImplementedError(self._iter_loose_objects)

651

652 def _get_loose_object(self, sha) -> Optional[ShaFile]:

653 raise NotImplementedError(self._get_loose_object)

654

655 def delete_loose_object(self, sha) -> None:

656 """Delete a loose object.

657

658 This method only handles loose objects. For packed objects,

659 use repack(exclude=...) to exclude them during repacking.

660 """

661 raise NotImplementedError(self.delete_loose_object)

662

663 def _remove_pack(self, name) -> None:

664 raise NotImplementedError(self._remove_pack)

665

666 def pack_loose_objects(self):

667 """Pack loose objects.

668

669 Returns: Number of objects packed

670 """

671 objects = set()

672 for sha in self._iter_loose_objects():

673 objects.add((self._get_loose_object(sha), None))

674 self.add_objects(list(objects))

675 for obj, path in objects:

676 self.delete_loose_object(obj.id)

677 return len(objects)

678

679 def repack(self, exclude=None):

680 """Repack the packs in this repository.

681

682 Note that this implementation is fairly naive and currently keeps all

683 objects in memory while it repacks.

684

685 Args:

686 exclude: Optional set of object SHAs to exclude from repacking

687 """

688 if exclude is None:

689 exclude = set()

690

691 loose_objects = set()

692 excluded_loose_objects = set()

693 for sha in self._iter_loose_objects():

694 if sha not in exclude:

695 loose_objects.add(self._get_loose_object(sha))

696 else:

697 excluded_loose_objects.add(sha)

698

699 objects = {(obj, None) for obj in loose_objects}

700 old_packs = {p.name(): p for p in self.packs}

701 for name, pack in old_packs.items():

702 objects.update(

703 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude

704 )

705

706 # Only create a new pack if there are objects to pack

707 if objects:

708 # The name of the consolidated pack might match the name of a

709 # pre-existing pack. Take care not to remove the newly created

710 # consolidated pack.

711 consolidated = self.add_objects(objects)

712 old_packs.pop(consolidated.name(), None)

713

714 # Delete loose objects that were packed

715 for obj in loose_objects:

716 self.delete_loose_object(obj.id)

717 # Delete excluded loose objects

718 for sha in excluded_loose_objects:

719 self.delete_loose_object(sha)

720 for name, pack in old_packs.items():

721 self._remove_pack(pack)

722 self._update_pack_cache()

723 return len(objects)

724

725 def __iter__(self):

726 """Iterate over the SHAs that are present in this store."""

727 self._update_pack_cache()

728 for pack in self._iter_cached_packs():

729 try:

730 yield from pack

731 except PackFileDisappeared:

732 pass

733 yield from self._iter_loose_objects()

734 yield from self._iter_alternate_objects()

735

736 def contains_loose(self, sha):

737 """Check if a particular object is present by SHA1 and is loose.

738

739 This does not check alternates.

740 """

741 return self._get_loose_object(sha) is not None

742

743 def get_raw(self, name):

744 """Obtain the raw fulltext for an object.

745

746 Args:

747 name: sha for the object.

748 Returns: tuple with numeric type and object contents.

749 """

750 if name == ZERO_SHA:

751 raise KeyError(name)

752 if len(name) == 40:

753 sha = hex_to_sha(name)

754 hexsha = name

755 elif len(name) == 20:

756 sha = name

757 hexsha = None

758 else:

759 raise AssertionError(f"Invalid object name {name!r}")

760 for pack in self._iter_cached_packs():

761 try:

762 return pack.get_raw(sha)

763 except (KeyError, PackFileDisappeared):

764 pass

765 if hexsha is None:

766 hexsha = sha_to_hex(name)

767 ret = self._get_loose_object(hexsha)

768 if ret is not None:

769 return ret.type_num, ret.as_raw_string()

770 # Maybe something else has added a pack with the object

771 # in the mean time?

772 for pack in self._update_pack_cache():

773 try:

774 return pack.get_raw(sha)

775 except KeyError:

776 pass

777 for alternate in self.alternates:

778 try:

779 return alternate.get_raw(hexsha)

780 except KeyError:

781 pass

782 raise KeyError(hexsha)

783

784 def iter_unpacked_subset(

785 self,

786 shas: set[bytes],

787 include_comp: bool = False,

788 allow_missing: bool = False,

789 convert_ofs_delta: bool = True,

790 ) -> Iterator[UnpackedObject]:

791 todo: set[bytes] = set(shas)

792 for p in self._iter_cached_packs():

793 for unpacked in p.iter_unpacked_subset(

794 todo,

795 include_comp=include_comp,

796 allow_missing=True,

797 convert_ofs_delta=convert_ofs_delta,

798 ):

799 yield unpacked

800 hexsha = sha_to_hex(unpacked.sha())

801 todo.remove(hexsha)

802 # Maybe something else has added a pack with the object

803 # in the mean time?

804 for p in self._update_pack_cache():

805 for unpacked in p.iter_unpacked_subset(

806 todo,

807 include_comp=include_comp,

808 allow_missing=True,

809 convert_ofs_delta=convert_ofs_delta,

810 ):

811 yield unpacked

812 hexsha = sha_to_hex(unpacked.sha())

813 todo.remove(hexsha)

814 for alternate in self.alternates:

815 for unpacked in alternate.iter_unpacked_subset(

816 todo,

817 include_comp=include_comp,

818 allow_missing=True,

819 convert_ofs_delta=convert_ofs_delta,

820 ):

821 yield unpacked

822 hexsha = sha_to_hex(unpacked.sha())

823 todo.remove(hexsha)

824

825 def iterobjects_subset(

826 self, shas: Iterable[bytes], *, allow_missing: bool = False

827 ) -> Iterator[ShaFile]:

828 todo: set[bytes] = set(shas)

829 for p in self._iter_cached_packs():

830 for o in p.iterobjects_subset(todo, allow_missing=True):

831 yield o

832 todo.remove(o.id)

833 # Maybe something else has added a pack with the object

834 # in the mean time?

835 for p in self._update_pack_cache():

836 for o in p.iterobjects_subset(todo, allow_missing=True):

837 yield o

838 todo.remove(o.id)

839 for alternate in self.alternates:

840 for o in alternate.iterobjects_subset(todo, allow_missing=True):

841 yield o

842 todo.remove(o.id)

843 for oid in todo:

844 o = self._get_loose_object(oid)

845 if o is not None:

846 yield o

847 elif not allow_missing:

848 raise KeyError(oid)

849

850 def get_unpacked_object(

851 self, sha1: bytes, *, include_comp: bool = False

852 ) -> UnpackedObject:

853 """Obtain the unpacked object.

854

855 Args:

856 sha1: sha for the object.

857 """

858 if sha1 == ZERO_SHA:

859 raise KeyError(sha1)

860 if len(sha1) == 40:

861 sha = hex_to_sha(sha1)

862 hexsha = sha1

863 elif len(sha1) == 20:

864 sha = sha1

865 hexsha = None

866 else:

867 raise AssertionError(f"Invalid object sha1 {sha1!r}")

868 for pack in self._iter_cached_packs():

869 try:

870 return pack.get_unpacked_object(sha, include_comp=include_comp)

871 except (KeyError, PackFileDisappeared):

872 pass

873 if hexsha is None:

874 hexsha = sha_to_hex(sha1)

875 # Maybe something else has added a pack with the object

876 # in the mean time?

877 for pack in self._update_pack_cache():

878 try:

879 return pack.get_unpacked_object(sha, include_comp=include_comp)

880 except KeyError:

881 pass

882 for alternate in self.alternates:

883 try:

884 return alternate.get_unpacked_object(hexsha, include_comp=include_comp)

885 except KeyError:

886 pass

887 raise KeyError(hexsha)

888

889 def add_objects(

890 self,

891 objects: Sequence[tuple[ShaFile, Optional[str]]],

892 progress: Optional[Callable[[str], None]] = None,

893 ) -> None:

894 """Add a set of objects to this object store.

895

896 Args:

897 objects: Iterable over (object, path) tuples, should support

898 __len__.

899 Returns: Pack object of the objects written.

900 """

901 count = len(objects)

902 record_iter = (full_unpacked_object(o) for (o, p) in objects)

903 return self.add_pack_data(count, record_iter, progress=progress)

904

905

906class DiskObjectStore(PackBasedObjectStore):

907 """Git-style object store that exists on disk."""

908

909 path: Union[str, os.PathLike]

910 pack_dir: Union[str, os.PathLike]

911

912 def __init__(

913 self,

914 path: Union[str, os.PathLike],

915 loose_compression_level=-1,

916 pack_compression_level=-1,

917 pack_index_version=None,

918 ) -> None:

919 """Open an object store.

920

921 Args:

922 path: Path of the object store.

923 loose_compression_level: zlib compression level for loose objects

924 pack_compression_level: zlib compression level for pack objects

925 pack_index_version: pack index version to use (1, 2, or 3)

926 """

927 super().__init__(

928 pack_compression_level=pack_compression_level,

929 pack_index_version=pack_index_version,

930 )

931 self.path = path

932 self.pack_dir = os.path.join(self.path, PACKDIR)

933 self._alternates = None

934 self.loose_compression_level = loose_compression_level

935 self.pack_compression_level = pack_compression_level

936 self.pack_index_version = pack_index_version

937

938 # Commit graph support - lazy loaded

939 self._commit_graph = None

940 self._use_commit_graph = True # Default to true

941

942 def __repr__(self) -> str:

943 return f"<{self.__class__.__name__}({self.path!r})>"

944

945 @classmethod

946 def from_config(cls, path: Union[str, os.PathLike], config):

947 try:

948 default_compression_level = int(

949 config.get((b"core",), b"compression").decode()

950 )

951 except KeyError:

952 default_compression_level = -1

953 try:

954 loose_compression_level = int(

955 config.get((b"core",), b"looseCompression").decode()

956 )

957 except KeyError:

958 loose_compression_level = default_compression_level

959 try:

960 pack_compression_level = int(

961 config.get((b"core",), "packCompression").decode()

962 )

963 except KeyError:

964 pack_compression_level = default_compression_level

965 try:

966 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode())

967 except KeyError:

968 pack_index_version = None

969

970 # Read core.commitGraph setting

971 use_commit_graph = config.get_boolean((b"core",), b"commitGraph", True)

972

973 instance = cls(

974 path, loose_compression_level, pack_compression_level, pack_index_version

975 )

976 instance._use_commit_graph = use_commit_graph

977 return instance

978

979 @property

980 def alternates(self):

981 if self._alternates is not None:

982 return self._alternates

983 self._alternates = []

984 for path in self._read_alternate_paths():

985 self._alternates.append(DiskObjectStore(path))

986 return self._alternates

987

988 def _read_alternate_paths(self):

989 try:

990 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb")

991 except FileNotFoundError:

992 return

993 with f:

994 for line in f.readlines():

995 line = line.rstrip(b"\n")

996 if line.startswith(b"#"):

997 continue

998 if os.path.isabs(line):

999 yield os.fsdecode(line)

1000 else:

1001 yield os.fsdecode(os.path.join(os.fsencode(self.path), line))

1002

1003 def add_alternate_path(self, path) -> None:

1004 """Add an alternate path to this object store."""

1005 try:

1006 os.mkdir(os.path.join(self.path, INFODIR))

1007 except FileExistsError:

1008 pass

1009 alternates_path = os.path.join(self.path, INFODIR, "alternates")

1010 with GitFile(alternates_path, "wb") as f:

1011 try:

1012 orig_f = open(alternates_path, "rb")

1013 except FileNotFoundError:

1014 pass

1015 else:

1016 with orig_f:

1017 f.write(orig_f.read())

1018 f.write(os.fsencode(path) + b"\n")

1019

1020 if not os.path.isabs(path):

1021 path = os.path.join(self.path, path)

1022 self.alternates.append(DiskObjectStore(path))

1023

1024 def _update_pack_cache(self):

1025 """Read and iterate over new pack files and cache them."""

1026 try:

1027 pack_dir_contents = os.listdir(self.pack_dir)

1028 except FileNotFoundError:

1029 self.close()

1030 return []

1031 pack_files = set()

1032 for name in pack_dir_contents:

1033 if name.startswith("pack-") and name.endswith(".pack"):

1034 # verify that idx exists first (otherwise the pack was not yet

1035 # fully written)

1036 idx_name = os.path.splitext(name)[0] + ".idx"

1037 if idx_name in pack_dir_contents:

1038 pack_name = name[: -len(".pack")]

1039 pack_files.add(pack_name)

1040

1041 # Open newly appeared pack files

1042 new_packs = []

1043 for f in pack_files:

1044 if f not in self._pack_cache:

1045 pack = Pack(os.path.join(self.pack_dir, f))

1046 new_packs.append(pack)

1047 self._pack_cache[f] = pack

1048 # Remove disappeared pack files

1049 for f in set(self._pack_cache) - pack_files:

1050 self._pack_cache.pop(f).close()

1051 return new_packs

1052

1053 def _get_shafile_path(self, sha):

1054 # Check from object dir

1055 return hex_to_filename(self.path, sha)

1056

1057 def _iter_loose_objects(self):

1058 for base in os.listdir(self.path):

1059 if len(base) != 2:

1060 continue

1061 for rest in os.listdir(os.path.join(self.path, base)):

1062 sha = os.fsencode(base + rest)

1063 if not valid_hexsha(sha):

1064 continue

1065 yield sha

1066

1067 def count_loose_objects(self) -> int:

1068 """Count the number of loose objects in the object store.

1069

1070 Returns:

1071 Number of loose objects

1072 """

1073 count = 0

1074 if not os.path.exists(self.path):

1075 return 0

1076

1077 for i in range(256):

1078 subdir = os.path.join(self.path, f"{i:02x}")

1079 try:

1080 count += len(

1081 [

1082 name

1083 for name in os.listdir(subdir)

1084 if len(name) == 38 # 40 - 2 for the prefix

1085 ]

1086 )

1087 except FileNotFoundError:

1088 # Directory may have been removed or is inaccessible

1089 continue

1090

1091 return count

1092

1093 def _get_loose_object(self, sha):

1094 path = self._get_shafile_path(sha)

1095 try:

1096 return ShaFile.from_path(path)

1097 except FileNotFoundError:

1098 return None

1099

1100 def delete_loose_object(self, sha) -> None:

1101 os.remove(self._get_shafile_path(sha))

1102

1103 def get_object_mtime(self, sha):

1104 """Get the modification time of an object.

1105

1106 Args:

1107 sha: SHA1 of the object

1108

1109 Returns:

1110 Modification time as seconds since epoch

1111

1112 Raises:

1113 KeyError: if the object is not found

1114 """

1115 # First check if it's a loose object

1116 if self.contains_loose(sha):

1117 path = self._get_shafile_path(sha)

1118 try:

1119 return os.path.getmtime(path)

1120 except FileNotFoundError:

1121 pass

1122

1123 # Check if it's in a pack file

1124 for pack in self.packs:

1125 try:

1126 if sha in pack:

1127 # Use the pack file's mtime for packed objects

1128 pack_path = pack._data_path

1129 try:

1130 return os.path.getmtime(pack_path)

1131 except (FileNotFoundError, AttributeError):

1132 pass

1133 except PackFileDisappeared:

1134 pass

1135

1136 raise KeyError(sha)

1137

1138 def _remove_pack(self, pack) -> None:

1139 try:

1140 del self._pack_cache[os.path.basename(pack._basename)]

1141 except KeyError:

1142 pass

1143 pack.close()

1144 os.remove(pack.data.path)

1145 os.remove(pack.index.path)

1146

1147 def _get_pack_basepath(self, entries):

1148 suffix = iter_sha1(entry[0] for entry in entries)

1149 # TODO: Handle self.pack_dir being bytes

1150 suffix = suffix.decode("ascii")

1151 return os.path.join(self.pack_dir, "pack-" + suffix)

1152

1153 def _complete_pack(self, f, path, num_objects, indexer, progress=None):

1154 """Move a specific file containing a pack into the pack directory.

1155

1156 Note: The file should be on the same file system as the

1157 packs directory.

1158

1159 Args:

1160 f: Open file object for the pack.

1161 path: Path to the pack file.

1162 indexer: A PackIndexer for indexing the pack.

1163 """

1164 entries = []

1165 for i, entry in enumerate(indexer):

1166 if progress is not None:

1167 progress(f"generating index: {i}/{num_objects}\r".encode("ascii"))

1168 entries.append(entry)

1169

1170 pack_sha, extra_entries = extend_pack(

1171 f,

1172 indexer.ext_refs(),

1173 get_raw=self.get_raw,

1174 compression_level=self.pack_compression_level,

1175 progress=progress,

1176 )

1177 f.flush()

1178 try:

1179 fileno = f.fileno()

1180 except AttributeError:

1181 pass

1182 else:

1183 os.fsync(fileno)

1184 f.close()

1185

1186 entries.extend(extra_entries)

1187

1188 # Move the pack in.

1189 entries.sort()

1190 pack_base_name = self._get_pack_basepath(entries)

1191

1192 for pack in self.packs:

1193 if pack._basename == pack_base_name:

1194 return pack

1195

1196 target_pack_path = pack_base_name + ".pack"

1197 target_index_path = pack_base_name + ".idx"

1198 if sys.platform == "win32":

1199 # Windows might have the target pack file lingering. Attempt

1200 # removal, silently passing if the target does not exist.

1201 with suppress(FileNotFoundError):

1202 os.remove(target_pack_path)

1203 os.rename(path, target_pack_path)

1204

1205 # Write the index.

1206 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file:

1207 write_pack_index(

1208 index_file, entries, pack_sha, version=self.pack_index_version

1209 )

1210

1211 # Add the pack to the store and return it.

1212 final_pack = Pack(pack_base_name)

1213 final_pack.check_length_and_checksum()

1214 self._add_cached_pack(pack_base_name, final_pack)

1215 return final_pack

1216

1217 def add_thin_pack(self, read_all, read_some, progress=None):

1218 """Add a new thin pack to this object store.

1219

1220 Thin packs are packs that contain deltas with parents that exist

1221 outside the pack. They should never be placed in the object store

1222 directly, and always indexed and completed as they are copied.

1223

1224 Args:

1225 read_all: Read function that blocks until the number of

1226 requested bytes are read.

1227 read_some: Read function that returns at least one byte, but may

1228 not return the number of bytes requested.

1229 Returns: A Pack object pointing at the now-completed thin pack in the

1230 objects/pack directory.

1231 """

1232 import tempfile

1233

1234 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_")

1235 with os.fdopen(fd, "w+b") as f:

1236 os.chmod(path, PACK_MODE)

1237 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)

1238 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer)

1239 copier.verify(progress=progress)

1240 return self._complete_pack(f, path, len(copier), indexer, progress=progress)

1241

1242 def add_pack(self):

1243 """Add a new pack to this object store.

1244

1245 Returns: Fileobject to write to, a commit function to

1246 call when the pack is finished and an abort

1247 function.

1248 """

1249 import tempfile

1250

1251 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")

1252 f = os.fdopen(fd, "w+b")

1253 os.chmod(path, PACK_MODE)

1254

1255 def commit():

1256 if f.tell() > 0:

1257 f.seek(0)

1258 with PackData(path, f) as pd:

1259 indexer = PackIndexer.for_pack_data(

1260 pd, resolve_ext_ref=self.get_raw

1261 )

1262 return self._complete_pack(f, path, len(pd), indexer)

1263 else:

1264 f.close()

1265 os.remove(path)

1266 return None

1267

1268 def abort() -> None:

1269 f.close()

1270 os.remove(path)

1271

1272 return f, commit, abort

1273

1274 def add_object(self, obj) -> None:

1275 """Add a single object to this object store.

1276

1277 Args:

1278 obj: Object to add

1279 """

1280 path = self._get_shafile_path(obj.id)

1281 dir = os.path.dirname(path)

1282 try:

1283 os.mkdir(dir)

1284 except FileExistsError:

1285 pass

1286 if os.path.exists(path):

1287 return # Already there, no need to write again

1288 with GitFile(path, "wb", mask=PACK_MODE) as f:

1289 f.write(

1290 obj.as_legacy_object(compression_level=self.loose_compression_level)

1291 )

1292

1293 @classmethod

1294 def init(cls, path: Union[str, os.PathLike]):

1295 try:

1296 os.mkdir(path)

1297 except FileExistsError:

1298 pass

1299 os.mkdir(os.path.join(path, "info"))

1300 os.mkdir(os.path.join(path, PACKDIR))

1301 return cls(path)

1302

1303 def iter_prefix(self, prefix):

1304 if len(prefix) < 2:

1305 yield from super().iter_prefix(prefix)

1306 return

1307 seen = set()

1308 dir = prefix[:2].decode()

1309 rest = prefix[2:].decode()

1310 try:

1311 for name in os.listdir(os.path.join(self.path, dir)):

1312 if name.startswith(rest):

1313 sha = os.fsencode(dir + name)

1314 if sha not in seen:

1315 seen.add(sha)

1316 yield sha

1317 except FileNotFoundError:

1318 pass

1319

1320 for p in self.packs:

1321 bin_prefix = (

1322 binascii.unhexlify(prefix)

1323 if len(prefix) % 2 == 0

1324 else binascii.unhexlify(prefix[:-1])

1325 )

1326 for sha in p.index.iter_prefix(bin_prefix):

1327 sha = sha_to_hex(sha)

1328 if sha.startswith(prefix) and sha not in seen:

1329 seen.add(sha)

1330 yield sha

1331 for alternate in self.alternates:

1332 for sha in alternate.iter_prefix(prefix):

1333 if sha not in seen:

1334 seen.add(sha)

1335 yield sha

1336

1337 def get_commit_graph(self):

1338 """Get the commit graph for this object store.

1339

1340 Returns:

1341 CommitGraph object if available, None otherwise

1342 """

1343 if not self._use_commit_graph:

1344 return None

1345

1346 if self._commit_graph is None:

1347 from .commit_graph import read_commit_graph

1348

1349 # Look for commit graph in our objects directory

1350 graph_file = os.path.join(self.path, "info", "commit-graph")

1351 if os.path.exists(graph_file):

1352 self._commit_graph = read_commit_graph(graph_file)

1353 return self._commit_graph

1354

1355 def write_commit_graph(self, refs=None, reachable=True) -> None:

1356 """Write a commit graph file for this object store.

1357

1358 Args:

1359 refs: List of refs to include. If None, includes all refs from object store.

1360 reachable: If True, includes all commits reachable from refs.

1361 If False, only includes the direct ref targets.

1362 """

1363 from .commit_graph import get_reachable_commits

1364

1365 if refs is None:

1366 # Get all commit objects from the object store

1367 all_refs = []

1368 # Iterate through all objects to find commits

1369 for sha in self:

1370 try:

1371 obj = self[sha]

1372 if obj.type_name == b"commit":

1373 all_refs.append(sha)

1374 except KeyError:

1375 continue

1376 else:

1377 # Use provided refs

1378 all_refs = refs

1379

1380 if not all_refs:

1381 return # No commits to include

1382

1383 if reachable:

1384 # Get all reachable commits

1385 commit_ids = get_reachable_commits(self, all_refs)

1386 else:

1387 # Just use the direct ref targets - ensure they're hex ObjectIDs

1388 commit_ids = []

1389 for ref in all_refs:

1390 if isinstance(ref, bytes) and len(ref) == 40:

1391 # Already hex ObjectID

1392 commit_ids.append(ref)

1393 elif isinstance(ref, bytes) and len(ref) == 20:

1394 # Binary SHA, convert to hex ObjectID

1395 from .objects import sha_to_hex

1396

1397 commit_ids.append(sha_to_hex(ref))

1398 else:

1399 # Assume it's already correct format

1400 commit_ids.append(ref)

1401

1402 if commit_ids:

1403 # Write commit graph directly to our object store path

1404 # Generate the commit graph

1405 from .commit_graph import generate_commit_graph

1406

1407 graph = generate_commit_graph(self, commit_ids)

1408

1409 if graph.entries:

1410 # Ensure the info directory exists

1411 info_dir = os.path.join(self.path, "info")

1412 os.makedirs(info_dir, exist_ok=True)

1413

1414 # Write using GitFile for atomic operation

1415 graph_path = os.path.join(info_dir, "commit-graph")

1416 with GitFile(graph_path, "wb") as f:

1417 graph.write_to_file(f)

1418

1419 # Clear cached commit graph so it gets reloaded

1420 self._commit_graph = None

1421

1422 def prune(self, grace_period: Optional[int] = None) -> None:

1423 """Prune/clean up this object store.

1424

1425 This removes temporary files that were left behind by interrupted

1426 pack operations. These are files that start with ``tmp_pack_`` in the

1427 repository directory or files with .pack extension but no corresponding

1428 .idx file in the pack directory.

1429

1430 Args:

1431 grace_period: Grace period in seconds for removing temporary files.

1432 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD.

1433 """

1434 import glob

1435

1436 if grace_period is None:

1437 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD

1438

1439 # Clean up tmp_pack_* files in the repository directory

1440 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")):

1441 # Check if file is old enough (more than grace period)

1442 mtime = os.path.getmtime(tmp_file)

1443 if time.time() - mtime > grace_period:

1444 os.remove(tmp_file)

1445

1446 # Clean up orphaned .pack files without corresponding .idx files

1447 try:

1448 pack_dir_contents = os.listdir(self.pack_dir)

1449 except FileNotFoundError:

1450 return

1451

1452 pack_files = {}

1453 idx_files = set()

1454

1455 for name in pack_dir_contents:

1456 if name.endswith(".pack"):

1457 base_name = name[:-5] # Remove .pack extension

1458 pack_files[base_name] = name

1459 elif name.endswith(".idx"):

1460 base_name = name[:-4] # Remove .idx extension

1461 idx_files.add(base_name)

1462

1463 # Remove .pack files without corresponding .idx files

1464 for base_name, pack_name in pack_files.items():

1465 if base_name not in idx_files:

1466 pack_path = os.path.join(self.pack_dir, pack_name)

1467 # Check if file is old enough (more than grace period)

1468 mtime = os.path.getmtime(pack_path)

1469 if time.time() - mtime > grace_period:

1470 os.remove(pack_path)

1471

1472

1473class MemoryObjectStore(BaseObjectStore):

1474 """Object store that keeps all objects in memory."""

1475

1476 def __init__(self) -> None:

1477 super().__init__()

1478 self._data: dict[str, ShaFile] = {}

1479 self.pack_compression_level = -1

1480

1481 def _to_hexsha(self, sha):

1482 if len(sha) == 40:

1483 return sha

1484 elif len(sha) == 20:

1485 return sha_to_hex(sha)

1486 else:

1487 raise ValueError(f"Invalid sha {sha!r}")

1488

1489 def contains_loose(self, sha):

1490 """Check if a particular object is present by SHA1 and is loose."""

1491 return self._to_hexsha(sha) in self._data

1492

1493 def contains_packed(self, sha) -> bool:

1494 """Check if a particular object is present by SHA1 and is packed."""

1495 return False

1496

1497 def __iter__(self):

1498 """Iterate over the SHAs that are present in this store."""

1499 return iter(self._data.keys())

1500

1501 @property

1502 def packs(self):

1503 """List with pack objects."""

1504 return []

1505

1506 def get_raw(self, name: ObjectID):

1507 """Obtain the raw text for an object.

1508

1509 Args:

1510 name: sha for the object.

1511 Returns: tuple with numeric type and object contents.

1512 """

1513 obj = self[self._to_hexsha(name)]

1514 return obj.type_num, obj.as_raw_string()

1515

1516 def __getitem__(self, name: ObjectID):

1517 return self._data[self._to_hexsha(name)].copy()

1518

1519 def __delitem__(self, name: ObjectID) -> None:

1520 """Delete an object from this store, for testing only."""

1521 del self._data[self._to_hexsha(name)]

1522

1523 def add_object(self, obj) -> None:

1524 """Add a single object to this object store."""

1525 self._data[obj.id] = obj.copy()

1526

1527 def add_objects(self, objects, progress=None) -> None:

1528 """Add a set of objects to this object store.

1529

1530 Args:

1531 objects: Iterable over a list of (object, path) tuples

1532 """

1533 for obj, path in objects:

1534 self.add_object(obj)

1535

1536 def add_pack(self):

1537 """Add a new pack to this object store.

1538

1539 Because this object store doesn't support packs, we extract and add the

1540 individual objects.

1541

1542 Returns: Fileobject to write to and a commit function to

1543 call when the pack is finished.

1544 """

1545 from tempfile import SpooledTemporaryFile

1546

1547 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-")

1548

1549 def commit() -> None:

1550 size = f.tell()

1551 if size > 0:

1552 f.seek(0)

1553 p = PackData.from_file(f, size)

1554 for obj in PackInflater.for_pack_data(p, self.get_raw):

1555 self.add_object(obj)

1556 p.close()

1557 f.close()

1558 else:

1559 f.close()

1560

1561 def abort() -> None:

1562 f.close()

1563

1564 return f, commit, abort

1565

1566 def add_pack_data(

1567 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None

1568 ) -> None:

1569 """Add pack data to this object store.

1570

1571 Args:

1572 count: Number of items to add

1573 """

1574 if count == 0:

1575 return

1576

1577 # Since MemoryObjectStore doesn't support pack files, we need to

1578 # extract individual objects. To handle deltas properly, we write

1579 # to a temporary pack and then use PackInflater to resolve them.

1580 f, commit, abort = self.add_pack()

1581 try:

1582 write_pack_data(

1583 f.write,

1584 unpacked_objects,

1585 num_records=count,

1586 progress=progress,

1587 )

1588 except BaseException:

1589 abort()

1590 raise

1591 else:

1592 commit()

1593

1594 def add_thin_pack(self, read_all, read_some, progress=None) -> None:

1595 """Add a new thin pack to this object store.

1596

1597 Thin packs are packs that contain deltas with parents that exist

1598 outside the pack. Because this object store doesn't support packs, we

1599 extract and add the individual objects.

1600

1601 Args:

1602 read_all: Read function that blocks until the number of

1603 requested bytes are read.

1604 read_some: Read function that returns at least one byte, but may

1605 not return the number of bytes requested.

1606 """

1607 f, commit, abort = self.add_pack()

1608 try:

1609 copier = PackStreamCopier(read_all, read_some, f)

1610 copier.verify()

1611 except BaseException:

1612 abort()

1613 raise

1614 else:

1615 commit()

1616

1617

1618class ObjectIterator(Protocol):

1619 """Interface for iterating over objects."""

1620

1621 def iterobjects(self) -> Iterator[ShaFile]:

1622 raise NotImplementedError(self.iterobjects)

1623

1624

1625def tree_lookup_path(lookup_obj, root_sha, path):

1626 """Look up an object in a Git tree.

1627

1628 Args:

1629 lookup_obj: Callback for retrieving object by SHA1

1630 root_sha: SHA1 of the root tree

1631 path: Path to lookup

1632 Returns: A tuple of (mode, SHA) of the resulting path.

1633 """

1634 tree = lookup_obj(root_sha)

1635 if not isinstance(tree, Tree):

1636 raise NotTreeError(root_sha)

1637 return tree.lookup_path(lookup_obj, path)

1638

1639

1640def _collect_filetree_revs(

1641 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID]

1642) -> None:

1643 """Collect SHA1s of files and directories for specified tree.

1644

1645 Args:

1646 obj_store: Object store to get objects by SHA from

1647 tree_sha: tree reference to walk

1648 kset: set to fill with references to files and directories

1649 """

1650 filetree = obj_store[tree_sha]

1651 assert isinstance(filetree, Tree)

1652 for name, mode, sha in filetree.iteritems():

1653 if not S_ISGITLINK(mode) and sha not in kset:

1654 kset.add(sha)

1655 if stat.S_ISDIR(mode):

1656 _collect_filetree_revs(obj_store, sha, kset)

1657

1658

1659def _split_commits_and_tags(

1660 obj_store: ObjectContainer, lst, *, ignore_unknown=False

1661) -> tuple[set[bytes], set[bytes], set[bytes]]:

1662 """Split object id list into three lists with commit, tag, and other SHAs.

1663

1664 Commits referenced by tags are included into commits

1665 list as well. Only SHA1s known in this repository will get

1666 through, and unless ignore_unknown argument is True, KeyError

1667 is thrown for SHA1 missing in the repository

1668

1669 Args:

1670 obj_store: Object store to get objects by SHA1 from

1671 lst: Collection of commit and tag SHAs

1672 ignore_unknown: True to skip SHA1 missing in the repository

1673 silently.

1674 Returns: A tuple of (commits, tags, others) SHA1s

1675 """

1676 commits: set[bytes] = set()

1677 tags: set[bytes] = set()

1678 others: set[bytes] = set()

1679 for e in lst:

1680 try:

1681 o = obj_store[e]

1682 except KeyError:

1683 if not ignore_unknown:

1684 raise

1685 else:

1686 if isinstance(o, Commit):

1687 commits.add(e)

1688 elif isinstance(o, Tag):

1689 tags.add(e)

1690 tagged = o.object[1]

1691 c, t, os = _split_commits_and_tags(

1692 obj_store, [tagged], ignore_unknown=ignore_unknown

1693 )

1694 commits |= c

1695 tags |= t

1696 others |= os

1697 else:

1698 others.add(e)

1699 return (commits, tags, others)

1700

1701

1702class MissingObjectFinder:

1703 """Find the objects missing from another object store.

1704

1705 Args:

1706 object_store: Object store containing at least all objects to be

1707 sent

1708 haves: SHA1s of commits not to send (already present in target)

1709 wants: SHA1s of commits to send

1710 progress: Optional function to report progress to.

1711 get_tagged: Function that returns a dict of pointed-to sha -> tag

1712 sha for including tags.

1713 get_parents: Optional function for getting the parents of a commit.

1714 """

1715

1716 def __init__(

1717 self,

1718 object_store,

1719 haves,

1720 wants,

1721 *,

1722 shallow=None,

1723 progress=None,

1724 get_tagged=None,

1725 get_parents=lambda commit: commit.parents,

1726 ) -> None:

1727 self.object_store = object_store

1728 if shallow is None:

1729 shallow = set()

1730 self._get_parents = get_parents

1731 # process Commits and Tags differently

1732 # Note, while haves may list commits/tags not available locally,

1733 # and such SHAs would get filtered out by _split_commits_and_tags,

1734 # wants shall list only known SHAs, and otherwise

1735 # _split_commits_and_tags fails with KeyError

1736 have_commits, have_tags, have_others = _split_commits_and_tags(

1737 object_store, haves, ignore_unknown=True

1738 )

1739 want_commits, want_tags, want_others = _split_commits_and_tags(

1740 object_store, wants, ignore_unknown=False

1741 )

1742 # all_ancestors is a set of commits that shall not be sent

1743 # (complete repository up to 'haves')

1744 all_ancestors = _collect_ancestors(

1745 object_store, have_commits, shallow=shallow, get_parents=self._get_parents

1746 )[0]

1747 # all_missing - complete set of commits between haves and wants

1748 # common - commits from all_ancestors we hit into while

1749 # traversing parent hierarchy of wants

1750 missing_commits, common_commits = _collect_ancestors(

1751 object_store,

1752 want_commits,

1753 all_ancestors,

1754 shallow=shallow,

1755 get_parents=self._get_parents,

1756 )

1757 self.remote_has: set[bytes] = set()

1758 # Now, fill sha_done with commits and revisions of

1759 # files and directories known to be both locally

1760 # and on target. Thus these commits and files

1761 # won't get selected for fetch

1762 for h in common_commits:

1763 self.remote_has.add(h)

1764 cmt = object_store[h]

1765 _collect_filetree_revs(object_store, cmt.tree, self.remote_has)

1766 # record tags we have as visited, too

1767 for t in have_tags:

1768 self.remote_has.add(t)

1769 self.sha_done = set(self.remote_has)

1770

1771 # in fact, what we 'want' is commits, tags, and others

1772 # we've found missing

1773 self.objects_to_send: set[

1774 tuple[ObjectID, Optional[bytes], Optional[int], bool]

1775 ] = {(w, None, Commit.type_num, False) for w in missing_commits}

1776 missing_tags = want_tags.difference(have_tags)

1777 self.objects_to_send.update(

1778 {(w, None, Tag.type_num, False) for w in missing_tags}

1779 )

1780 missing_others = want_others.difference(have_others)

1781 self.objects_to_send.update({(w, None, None, False) for w in missing_others})

1782

1783 if progress is None:

1784 self.progress = lambda x: None

1785 else:

1786 self.progress = progress

1787 self._tagged = (get_tagged and get_tagged()) or {}

1788

1789 def get_remote_has(self):

1790 return self.remote_has

1791

1792 def add_todo(

1793 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]]

1794 ) -> None:

1795 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done])

1796

1797 def __next__(self) -> tuple[bytes, Optional[PackHint]]:

1798 while True:

1799 if not self.objects_to_send:

1800 self.progress(

1801 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii")

1802 )

1803 raise StopIteration

1804 (sha, name, type_num, leaf) = self.objects_to_send.pop()

1805 if sha not in self.sha_done:

1806 break

1807 if not leaf:

1808 o = self.object_store[sha]

1809 if isinstance(o, Commit):

1810 self.add_todo([(o.tree, b"", Tree.type_num, False)])

1811 elif isinstance(o, Tree):

1812 self.add_todo(

1813 [

1814 (

1815 s,

1816 n,

1817 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num),

1818 not stat.S_ISDIR(m),

1819 )

1820 for n, m, s in o.iteritems()

1821 if not S_ISGITLINK(m)

1822 ]

1823 )

1824 elif isinstance(o, Tag):

1825 self.add_todo([(o.object[1], None, o.object[0].type_num, False)])

1826 if sha in self._tagged:

1827 self.add_todo([(self._tagged[sha], None, None, True)])

1828 self.sha_done.add(sha)

1829 if len(self.sha_done) % 1000 == 0:

1830 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii"))

1831 if type_num is None:

1832 pack_hint = None

1833 else:

1834 pack_hint = (type_num, name)

1835 return (sha, pack_hint)

1836

1837 def __iter__(self):

1838 return self

1839

1840

1841class ObjectStoreGraphWalker:

1842 """Graph walker that finds what commits are missing from an object store."""

1843

1844 heads: set[ObjectID]

1845 """Revisions without descendants in the local repo."""

1846

1847 get_parents: Callable[[ObjectID], ObjectID]

1848 """Function to retrieve parents in the local repo."""

1849

1850 shallow: set[ObjectID]

1851

1852 def __init__(

1853 self,

1854 local_heads: Iterable[ObjectID],

1855 get_parents,

1856 shallow: Optional[set[ObjectID]] = None,

1857 update_shallow=None,

1858 ) -> None:

1859 """Create a new instance.

1860

1861 Args:

1862 local_heads: Heads to start search with

1863 get_parents: Function for finding the parents of a SHA1.

1864 """

1865 self.heads = set(local_heads)

1866 self.get_parents = get_parents

1867 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {}

1868 if shallow is None:

1869 shallow = set()

1870 self.shallow = shallow

1871 self.update_shallow = update_shallow

1872

1873 def nak(self) -> None:

1874 """Nothing in common was found."""

1875

1876 def ack(self, sha: ObjectID) -> None:

1877 """Ack that a revision and its ancestors are present in the source."""

1878 if len(sha) != 40:

1879 raise ValueError(f"unexpected sha {sha!r} received")

1880 ancestors = {sha}

1881

1882 # stop if we run out of heads to remove

1883 while self.heads:

1884 for a in ancestors:

1885 if a in self.heads:

1886 self.heads.remove(a)

1887

1888 # collect all ancestors

1889 new_ancestors = set()

1890 for a in ancestors:

1891 ps = self.parents.get(a)

1892 if ps is not None:

1893 new_ancestors.update(ps)

1894 self.parents[a] = None

1895

1896 # no more ancestors; stop

1897 if not new_ancestors:

1898 break

1899

1900 ancestors = new_ancestors

1901

1902 def next(self):

1903 """Iterate over ancestors of heads in the target."""

1904 if self.heads:

1905 ret = self.heads.pop()

1906 try:

1907 ps = self.get_parents(ret)

1908 except KeyError:

1909 return None

1910 self.parents[ret] = ps

1911 self.heads.update([p for p in ps if p not in self.parents])

1912 return ret

1913 return None

1914

1915 __next__ = next

1916

1917

1918def commit_tree_changes(object_store, tree, changes):

1919 """Commit a specified set of changes to a tree structure.

1920

1921 This will apply a set of changes on top of an existing tree, storing new

1922 objects in object_store.

1923

1924 changes are a list of tuples with (path, mode, object_sha).

1925 Paths can be both blobs and trees. See the mode and

1926 object sha to None deletes the path.

1927

1928 This method works especially well if there are only a small

1929 number of changes to a big tree. For a large number of changes

1930 to a large tree, use e.g. commit_tree.

1931

1932 Args:

1933 object_store: Object store to store new objects in

1934 and retrieve old ones from.

1935 tree: Original tree root

1936 changes: changes to apply

1937 Returns: New tree root object

1938 """

1939 # TODO(jelmer): Save up the objects and add them using .add_objects

1940 # rather than with individual calls to .add_object.

1941 nested_changes = {}

1942 for path, new_mode, new_sha in changes:

1943 try:

1944 (dirname, subpath) = path.split(b"/", 1)

1945 except ValueError:

1946 if new_sha is None:

1947 del tree[path]

1948 else:

1949 tree[path] = (new_mode, new_sha)

1950 else:

1951 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha))

1952 for name, subchanges in nested_changes.items():

1953 try:

1954 orig_subtree = object_store[tree[name][1]]

1955 except KeyError:

1956 orig_subtree = Tree()

1957 subtree = commit_tree_changes(object_store, orig_subtree, subchanges)

1958 if len(subtree) == 0:

1959 del tree[name]

1960 else:

1961 tree[name] = (stat.S_IFDIR, subtree.id)

1962 object_store.add_object(tree)

1963 return tree

1964

1965

1966class OverlayObjectStore(BaseObjectStore):

1967 """Object store that can overlay multiple object stores."""

1968

1969 def __init__(self, bases, add_store=None) -> None:

1970 self.bases = bases

1971 self.add_store = add_store

1972

1973 def add_object(self, object):

1974 if self.add_store is None:

1975 raise NotImplementedError(self.add_object)

1976 return self.add_store.add_object(object)

1977

1978 def add_objects(self, objects, progress=None):

1979 if self.add_store is None:

1980 raise NotImplementedError(self.add_object)

1981 return self.add_store.add_objects(objects, progress)

1982

1983 @property

1984 def packs(self):

1985 ret = []

1986 for b in self.bases:

1987 ret.extend(b.packs)

1988 return ret

1989

1990 def __iter__(self):

1991 done = set()

1992 for b in self.bases:

1993 for o_id in b:

1994 if o_id not in done:

1995 yield o_id

1996 done.add(o_id)

1997

1998 def iterobjects_subset(

1999 self, shas: Iterable[bytes], *, allow_missing: bool = False

2000 ) -> Iterator[ShaFile]:

2001 todo = set(shas)

2002 found: set[bytes] = set()

2003

2004 for b in self.bases:

2005 # Create a copy of todo for each base to avoid modifying

2006 # the set while iterating through it

2007 current_todo = todo - found

2008 for o in b.iterobjects_subset(current_todo, allow_missing=True):

2009 yield o

2010 found.add(o.id)

2011

2012 # Check for any remaining objects not found

2013 missing = todo - found

2014 if missing and not allow_missing:

2015 raise KeyError(next(iter(missing)))

2016

2017 def iter_unpacked_subset(

2018 self,

2019 shas: Iterable[bytes],

2020 *,

2021 include_comp=False,

2022 allow_missing: bool = False,

2023 convert_ofs_delta=True,

2024 ) -> Iterator[ShaFile]:

2025 todo = set(shas)

2026 for b in self.bases:

2027 for o in b.iter_unpacked_subset(

2028 todo,

2029 include_comp=include_comp,

2030 allow_missing=True,

2031 convert_ofs_delta=convert_ofs_delta,

2032 ):

2033 yield o

2034 todo.remove(o.id)

2035 if todo and not allow_missing:

2036 raise KeyError(o.id)

2037

2038 def get_raw(self, sha_id):

2039 for b in self.bases:

2040 try:

2041 return b.get_raw(sha_id)

2042 except KeyError:

2043 pass

2044 raise KeyError(sha_id)

2045

2046 def contains_packed(self, sha) -> bool:

2047 for b in self.bases:

2048 if b.contains_packed(sha):

2049 return True

2050 return False

2051

2052 def contains_loose(self, sha) -> bool:

2053 for b in self.bases:

2054 if b.contains_loose(sha):

2055 return True

2056 return False

2057

2058

2059def read_packs_file(f):

2060 """Yield the packs listed in a packs file."""

2061 for line in f.read().splitlines():

2062 if not line:

2063 continue

2064 (kind, name) = line.split(b" ", 1)

2065 if kind != b"P":

2066 continue

2067 yield os.fsdecode(name)

2068

2069

2070class BucketBasedObjectStore(PackBasedObjectStore):

2071 """Object store implementation that uses a bucket store like S3 as backend."""

2072

2073 def _iter_loose_objects(self):

2074 """Iterate over the SHAs of all loose objects."""

2075 return iter([])

2076

2077 def _get_loose_object(self, sha) -> None:

2078 return None

2079

2080 def delete_loose_object(self, sha) -> None:

2081 # Doesn't exist..

2082 pass

2083

2084 def _remove_pack(self, name) -> None:

2085 raise NotImplementedError(self._remove_pack)

2086

2087 def _iter_pack_names(self) -> Iterator[str]:

2088 raise NotImplementedError(self._iter_pack_names)

2089

2090 def _get_pack(self, name) -> Pack:

2091 raise NotImplementedError(self._get_pack)

2092

2093 def _update_pack_cache(self):

2094 pack_files = set(self._iter_pack_names())

2095

2096 # Open newly appeared pack files

2097 new_packs = []

2098 for f in pack_files:

2099 if f not in self._pack_cache:

2100 pack = self._get_pack(f)

2101 new_packs.append(pack)

2102 self._pack_cache[f] = pack

2103 # Remove disappeared pack files

2104 for f in set(self._pack_cache) - pack_files:

2105 self._pack_cache.pop(f).close()

2106 return new_packs

2107

2108 def _upload_pack(self, basename, pack_file, index_file) -> None:

2109 raise NotImplementedError

2110

2111 def add_pack(self):

2112 """Add a new pack to this object store.

2113

2114 Returns: Fileobject to write to, a commit function to

2115 call when the pack is finished and an abort

2116 function.

2117 """

2118 import tempfile

2119

2120 pf = tempfile.SpooledTemporaryFile(

2121 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"

2122 )

2123

2124 def commit():

2125 if pf.tell() == 0:

2126 pf.close()

2127 return None

2128

2129 pf.seek(0)

2130 p = PackData(pf.name, pf)

2131 entries = p.sorted_entries()

2132 basename = iter_sha1(entry[0] for entry in entries).decode("ascii")

2133 idxf = tempfile.SpooledTemporaryFile(

2134 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"

2135 )

2136 checksum = p.get_stored_checksum()

2137 write_pack_index(idxf, entries, checksum, version=self.pack_index_version)

2138 idxf.seek(0)

2139 idx = load_pack_index_file(basename + ".idx", idxf)

2140 for pack in self.packs:

2141 if pack.get_stored_checksum() == p.get_stored_checksum():

2142 p.close()

2143 idx.close()

2144 pf.close()

2145 idxf.close()

2146 return pack

2147 pf.seek(0)

2148 idxf.seek(0)

2149 self._upload_pack(basename, pf, idxf)

2150 final_pack = Pack.from_objects(p, idx)

2151 self._add_cached_pack(basename, final_pack)

2152 pf.close()

2153 idxf.close()

2154 return final_pack

2155

2156 return pf, commit, pf.close

2157

2158

2159def _collect_ancestors(

2160 store: ObjectContainer,

2161 heads,

2162 common: frozenset[ObjectID] = frozenset(),

2163 shallow: frozenset[ObjectID] = frozenset(),

2164 get_parents=lambda commit: commit.parents,

2165):

2166 """Collect all ancestors of heads up to (excluding) those in common.

2167

2168 Args:

2169 heads: commits to start from

2170 common: commits to end at, or empty set to walk repository

2171 completely

2172 get_parents: Optional function for getting the parents of a

2173 commit.

2174 Returns: a tuple (A, B) where A - all commits reachable

2175 from heads but not present in common, B - common (shared) elements

2176 that are directly reachable from heads

2177 """

2178 bases = set()

2179 commits = set()

2180 queue = []

2181 queue.extend(heads)

2182

2183 # Try to use commit graph if available

2184 commit_graph = store.get_commit_graph()

2185

2186 while queue:

2187 e = queue.pop(0)

2188 if e in common:

2189 bases.add(e)

2190 elif e not in commits:

2191 commits.add(e)

2192 if e in shallow:

2193 continue

2194

2195 # Try to use commit graph for parent lookup

2196 parents = None

2197 if commit_graph:

2198 parents = commit_graph.get_parents(e)

2199

2200 if parents is None:

2201 # Fall back to loading the object

2202 cmt = store[e]

2203 parents = get_parents(cmt)

2204

2205 queue.extend(parents)

2206 return (commits, bases)

2207

2208

2209def iter_tree_contents(

2210 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False

2211):

2212 """Iterate the contents of a tree and all subtrees.

2213

2214 Iteration is depth-first pre-order, as in e.g. os.walk.

2215

2216 Args:

2217 tree_id: SHA1 of the tree.

2218 include_trees: If True, include tree objects in the iteration.

2219 Returns: Iterator over TreeEntry namedtuples for all the objects in a

2220 tree.

2221 """

2222 if tree_id is None:

2223 return

2224 # This could be fairly easily generalized to >2 trees if we find a use

2225 # case.

2226 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]

2227 while todo:

2228 entry = todo.pop()

2229 if stat.S_ISDIR(entry.mode):

2230 extra = []

2231 tree = store[entry.sha]

2232 assert isinstance(tree, Tree)

2233 for subentry in tree.iteritems(name_order=True):

2234 extra.append(subentry.in_path(entry.path))

2235 todo.extend(reversed(extra))

2236 if not stat.S_ISDIR(entry.mode) or include_trees:

2237 yield entry

2238

2239

2240def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]:

2241 """Peel all tags from a SHA.

2242

2243 Args:

2244 sha: The object SHA to peel.

2245 Returns: The fully-peeled SHA1 of a tag object, after peeling all

2246 intermediate tags; if the original ref does not point to a tag,

2247 this will equal the original SHA1.

2248 """

2249 unpeeled = obj = store[sha]

2250 obj_class = object_class(obj.type_name)

2251 while obj_class is Tag:

2252 assert isinstance(obj, Tag)

2253 obj_class, sha = obj.object

2254 obj = store[sha]

2255 return unpeeled, obj

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

1107 statements