Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object

1# object_store.py -- Object store for git objects

3# and others

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU

7# General Public License as public by the Free Software Foundation; version 2.0

8# or (at your option) any later version. You can redistribute it and/or

9# modify it under the terms of either of these two licenses.

10#

11# Unless required by applicable law or agreed to in writing, software

12# distributed under the License is distributed on an "AS IS" BASIS,

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

14# See the License for the specific language governing permissions and

15# limitations under the License.

16#

17# You should have received a copy of the licenses; if not, see

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache

20# License, Version 2.0.

21#

24"""Git object store interfaces and implementation."""

26import binascii

27import os

28import stat

29import sys

30import time

31import warnings

32from collections.abc import Iterable, Iterator, Sequence

33from contextlib import suppress

34from io import BytesIO

35from typing import (

36 Callable,

37 Optional,

38 Protocol,

39 Union,

40 cast,

41)

43from .errors import NotTreeError

44from .file import GitFile

45from .objects import (

46 S_ISGITLINK,

47 ZERO_SHA,

48 Blob,

49 Commit,

50 ObjectID,

51 ShaFile,

52 Tag,

53 Tree,

54 TreeEntry,

55 hex_to_filename,

56 hex_to_sha,

57 object_class,

58 sha_to_hex,

59 valid_hexsha,

60)

61from .pack import (

62 PACK_SPOOL_FILE_MAX_SIZE,

63 ObjectContainer,

64 Pack,

65 PackData,

66 PackedObjectContainer,

67 PackFileDisappeared,

68 PackHint,

69 PackIndexer,

70 PackInflater,

71 PackStreamCopier,

72 UnpackedObject,

73 extend_pack,

74 full_unpacked_object,

75 generate_unpacked_objects,

76 iter_sha1,

77 load_pack_index_file,

78 pack_objects_to_data,

79 write_pack_data,

80 write_pack_index,

81)

82from .protocol import DEPTH_INFINITE

83from .refs import PEELED_TAG_SUFFIX, Ref

85INFODIR = "info"

86PACKDIR = "pack"

88# use permissions consistent with Git; just readable by everyone

89# TODO: should packs also be non-writable on Windows? if so, that

90# would requite some rather significant adjustments to the test suite

91PACK_MODE = 0o444 if sys.platform != "win32" else 0o644

93# Grace period for cleaning up temporary pack files (in seconds)

94# Matches git's default of 2 weeks

95DEFAULT_TEMPFILE_GRACE_PERIOD = 14 * 24 * 60 * 60 # 2 weeks

98def find_shallow(store, heads, depth):

99 """Find shallow commits according to a given depth.

100

101 Args:

102 store: An ObjectStore for looking up objects.

103 heads: Iterable of head SHAs to start walking from.

104 depth: The depth of ancestors to include. A depth of one includes

105 only the heads themselves.

106 Returns: A tuple of (shallow, not_shallow), sets of SHAs that should be

107 considered shallow and unshallow according to the arguments. Note that

108 these sets may overlap if a commit is reachable along multiple paths.

109 """

110 parents = {}

111

112 def get_parents(sha):

113 result = parents.get(sha, None)

114 if not result:

115 result = store[sha].parents

116 parents[sha] = result

117 return result

118

119 todo = [] # stack of (sha, depth)

120 for head_sha in heads:

121 obj = store[head_sha]

122 # Peel tags if necessary

123 while isinstance(obj, Tag):

124 _, sha = obj.object

125 obj = store[sha]

126 if isinstance(obj, Commit):

127 todo.append((obj.id, 1))

128

129 not_shallow = set()

130 shallow = set()

131 while todo:

132 sha, cur_depth = todo.pop()

133 if cur_depth < depth:

134 not_shallow.add(sha)

135 new_depth = cur_depth + 1

136 todo.extend((p, new_depth) for p in get_parents(sha))

137 else:

138 shallow.add(sha)

139

140 return shallow, not_shallow

141

142

143def get_depth(

144 store,

145 head,

146 get_parents=lambda commit: commit.parents,

147 max_depth=None,

148):

149 """Return the current available depth for the given head.

150 For commits with multiple parents, the largest possible depth will be

151 returned.

152

153 Args:

154 head: commit to start from

155 get_parents: optional function for getting the parents of a commit

156 max_depth: maximum depth to search

157 """

158 if head not in store:

159 return 0

160 current_depth = 1

161 queue = [(head, current_depth)]

162 while queue and (max_depth is None or current_depth < max_depth):

163 e, depth = queue.pop(0)

164 current_depth = max(current_depth, depth)

165 cmt = store[e]

166 if isinstance(cmt, Tag):

167 _cls, sha = cmt.object

168 cmt = store[sha]

169 queue.extend(

170 (parent, depth + 1) for parent in get_parents(cmt) if parent in store

171 )

172 return current_depth

173

174

175class PackContainer(Protocol):

176 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:

177 """Add a new pack."""

178

179

180class BaseObjectStore:

181 """Object store interface."""

182

183 def determine_wants_all(

184 self, refs: dict[Ref, ObjectID], depth: Optional[int] = None

185 ) -> list[ObjectID]:

186 def _want_deepen(sha):

187 if not depth:

188 return False

189 if depth == DEPTH_INFINITE:

190 return True

191 return depth > self._get_depth(sha)

192

193 return [

194 sha

195 for (ref, sha) in refs.items()

196 if (sha not in self or _want_deepen(sha))

197 and not ref.endswith(PEELED_TAG_SUFFIX)

198 and not sha == ZERO_SHA

199 ]

200

201 def contains_loose(self, sha) -> bool:

202 """Check if a particular object is present by SHA1 and is loose."""

203 raise NotImplementedError(self.contains_loose)

204

205 def __contains__(self, sha1: bytes) -> bool:

206 """Check if a particular object is present by SHA1.

207

208 This method makes no distinction between loose and packed objects.

209 """

210 return self.contains_loose(sha1)

211

212 @property

213 def packs(self):

214 """Iterable of pack objects."""

215 raise NotImplementedError

216

217 def get_raw(self, name) -> tuple[int, bytes]:

218 """Obtain the raw text for an object.

219

220 Args:

221 name: sha for the object.

222 Returns: tuple with numeric type and object contents.

223 """

224 raise NotImplementedError(self.get_raw)

225

226 def __getitem__(self, sha1: ObjectID) -> ShaFile:

227 """Obtain an object by SHA1."""

228 type_num, uncomp = self.get_raw(sha1)

229 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1)

230

231 def __iter__(self):

232 """Iterate over the SHAs that are present in this store."""

233 raise NotImplementedError(self.__iter__)

234

235 def add_object(self, obj) -> None:

236 """Add a single object to this object store."""

237 raise NotImplementedError(self.add_object)

238

239 def add_objects(self, objects, progress=None) -> None:

240 """Add a set of objects to this object store.

241

242 Args:

243 objects: Iterable over a list of (object, path) tuples

244 """

245 raise NotImplementedError(self.add_objects)

246

247 def tree_changes(

248 self,

249 source,

250 target,

251 want_unchanged=False,

252 include_trees=False,

253 change_type_same=False,

254 rename_detector=None,

255 ):

256 """Find the differences between the contents of two trees.

257

258 Args:

259 source: SHA1 of the source tree

260 target: SHA1 of the target tree

261 want_unchanged: Whether unchanged files should be reported

262 include_trees: Whether to include trees

263 change_type_same: Whether to report files changing

264 type in the same entry.

265 Returns: Iterator over tuples with

266 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)

267 """

268 from .diff_tree import tree_changes

269

270 for change in tree_changes(

271 self,

272 source,

273 target,

274 want_unchanged=want_unchanged,

275 include_trees=include_trees,

276 change_type_same=change_type_same,

277 rename_detector=rename_detector,

278 ):

279 yield (

280 (change.old.path, change.new.path),

281 (change.old.mode, change.new.mode),

282 (change.old.sha, change.new.sha),

283 )

284

285 def iter_tree_contents(self, tree_id, include_trees=False):

286 """Iterate the contents of a tree and all subtrees.

287

288 Iteration is depth-first pre-order, as in e.g. os.walk.

289

290 Args:

291 tree_id: SHA1 of the tree.

292 include_trees: If True, include tree objects in the iteration.

293 Returns: Iterator over TreeEntry namedtuples for all the objects in a

294 tree.

295 """

296 warnings.warn(

297 "Please use dulwich.object_store.iter_tree_contents",

298 DeprecationWarning,

299 stacklevel=2,

300 )

301 return iter_tree_contents(self, tree_id, include_trees=include_trees)

302

303 def iterobjects_subset(

304 self, shas: Iterable[bytes], *, allow_missing: bool = False

305 ) -> Iterator[ShaFile]:

306 for sha in shas:

307 try:

308 yield self[sha]

309 except KeyError:

310 if not allow_missing:

311 raise

312

313 def find_missing_objects(

314 self,

315 haves,

316 wants,

317 shallow=None,

318 progress=None,

319 get_tagged=None,

320 get_parents=lambda commit: commit.parents,

321 ):

322 """Find the missing objects required for a set of revisions.

323

324 Args:

325 haves: Iterable over SHAs already in common.

326 wants: Iterable over SHAs of objects to fetch.

327 shallow: Set of shallow commit SHA1s to skip

328 progress: Simple progress function that will be called with

329 updated progress strings.

330 get_tagged: Function that returns a dict of pointed-to sha ->

331 tag sha for including tags.

332 get_parents: Optional function for getting the parents of a

333 commit.

334 Returns: Iterator over (sha, path) pairs.

335 """

336 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning)

337 finder = MissingObjectFinder(

338 self,

339 haves=haves,

340 wants=wants,

341 shallow=shallow,

342 progress=progress,

343 get_tagged=get_tagged,

344 get_parents=get_parents,

345 )

346 return iter(finder)

347

348 def find_common_revisions(self, graphwalker):

349 """Find which revisions this store has in common using graphwalker.

350

351 Args:

352 graphwalker: A graphwalker object.

353 Returns: List of SHAs that are in common

354 """

355 haves = []

356 sha = next(graphwalker)

357 while sha:

358 if sha in self:

359 haves.append(sha)

360 graphwalker.ack(sha)

361 sha = next(graphwalker)

362 return haves

363

364 def generate_pack_data(

365 self, have, want, shallow=None, progress=None, ofs_delta=True

366 ) -> tuple[int, Iterator[UnpackedObject]]:

367 """Generate pack data objects for a set of wants/haves.

368

369 Args:

370 have: List of SHA1s of objects that should not be sent

371 want: List of SHA1s of objects that should be sent

372 shallow: Set of shallow commit SHA1s to skip

373 ofs_delta: Whether OFS deltas can be included

374 progress: Optional progress reporting method

375 """

376 # Note that the pack-specific implementation below is more efficient,

377 # as it reuses deltas

378 missing_objects = MissingObjectFinder(

379 self, haves=have, wants=want, shallow=shallow, progress=progress

380 )

381 object_ids = list(missing_objects)

382 return pack_objects_to_data(

383 [(self[oid], path) for oid, path in object_ids],

384 ofs_delta=ofs_delta,

385 progress=progress,

386 )

387

388 def peel_sha(self, sha):

389 """Peel all tags from a SHA.

390

391 Args:

392 sha: The object SHA to peel.

393 Returns: The fully-peeled SHA1 of a tag object, after peeling all

394 intermediate tags; if the original ref does not point to a tag,

395 this will equal the original SHA1.

396 """

397 warnings.warn(

398 "Please use dulwich.object_store.peel_sha()",

399 DeprecationWarning,

400 stacklevel=2,

401 )

402 return peel_sha(self, sha)[1]

403

404 def _get_depth(

405 self,

406 head,

407 get_parents=lambda commit: commit.parents,

408 max_depth=None,

409 ):

410 """Return the current available depth for the given head.

411 For commits with multiple parents, the largest possible depth will be

412 returned.

413

414 Args:

415 head: commit to start from

416 get_parents: optional function for getting the parents of a commit

417 max_depth: maximum depth to search

418 """

419 return get_depth(self, head, get_parents=get_parents, max_depth=max_depth)

420

421 def close(self) -> None:

422 """Close any files opened by this object store."""

423 # Default implementation is a NO-OP

424

425 def prune(self, grace_period: Optional[int] = None) -> None:

426 """Prune/clean up this object store.

427

428 This includes removing orphaned temporary files and other

429 housekeeping tasks. Default implementation is a NO-OP.

430

431 Args:

432 grace_period: Grace period in seconds for removing temporary files.

433 If None, uses the default grace period.

434 """

435 # Default implementation is a NO-OP

436

437 def iter_prefix(self, prefix: bytes) -> Iterator[ObjectID]:

438 """Iterate over all SHA1s that start with a given prefix.

439

440 The default implementation is a naive iteration over all objects.

441 However, subclasses may override this method with more efficient

442 implementations.

443 """

444 for sha in self:

445 if sha.startswith(prefix):

446 yield sha

447

448 def get_commit_graph(self):

449 """Get the commit graph for this object store.

450

451 Returns:

452 CommitGraph object if available, None otherwise

453 """

454 return None

455

456 def write_commit_graph(self, refs=None, reachable=True) -> None:

457 """Write a commit graph file for this object store.

458

459 Args:

460 refs: List of refs to include. If None, includes all refs from object store.

461 reachable: If True, includes all commits reachable from refs.

462 If False, only includes the direct ref targets.

463

464 Note:

465 Default implementation does nothing. Subclasses should override

466 this method to provide commit graph writing functionality.

467 """

468 raise NotImplementedError(self.write_commit_graph)

469

470 def get_object_mtime(self, sha):

471 """Get the modification time of an object.

472

473 Args:

474 sha: SHA1 of the object

475

476 Returns:

477 Modification time as seconds since epoch

478

479 Raises:

480 KeyError: if the object is not found

481 """

482 # Default implementation raises KeyError

483 # Subclasses should override to provide actual mtime

484 raise KeyError(sha)

485

486

487class PackBasedObjectStore(BaseObjectStore, PackedObjectContainer):

488 def __init__(self, pack_compression_level=-1, pack_index_version=None) -> None:

489 self._pack_cache: dict[str, Pack] = {}

490 self.pack_compression_level = pack_compression_level

491 self.pack_index_version = pack_index_version

492

493 def add_pack(self) -> tuple[BytesIO, Callable[[], None], Callable[[], None]]:

494 """Add a new pack to this object store."""

495 raise NotImplementedError(self.add_pack)

496

497 def add_pack_data(

498 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None

499 ) -> None:

500 """Add pack data to this object store.

501

502 Args:

503 count: Number of items to add

504 """

505 if count == 0:

506 # Don't bother writing an empty pack file

507 return

508 f, commit, abort = self.add_pack()

509 try:

510 write_pack_data(

511 f.write,

512 unpacked_objects,

513 num_records=count,

514 progress=progress,

515 compression_level=self.pack_compression_level,

516 )

517 except BaseException:

518 abort()

519 raise

520 else:

521 return commit()

522

523 @property

524 def alternates(self):

525 return []

526

527 def contains_packed(self, sha) -> bool:

528 """Check if a particular object is present by SHA1 and is packed.

529

530 This does not check alternates.

531 """

532 for pack in self.packs:

533 try:

534 if sha in pack:

535 return True

536 except PackFileDisappeared:

537 pass

538 return False

539

540 def __contains__(self, sha) -> bool:

541 """Check if a particular object is present by SHA1.

542

543 This method makes no distinction between loose and packed objects.

544 """

545 if self.contains_packed(sha) or self.contains_loose(sha):

546 return True

547 for alternate in self.alternates:

548 if sha in alternate:

549 return True

550 return False

551

552 def _add_cached_pack(self, base_name, pack) -> None:

553 """Add a newly appeared pack to the cache by path."""

554 prev_pack = self._pack_cache.get(base_name)

555 if prev_pack is not pack:

556 self._pack_cache[base_name] = pack

557 if prev_pack:

558 prev_pack.close()

559

560 def generate_pack_data(

561 self, have, want, shallow=None, progress=None, ofs_delta=True

562 ) -> tuple[int, Iterator[UnpackedObject]]:

563 """Generate pack data objects for a set of wants/haves.

564

565 Args:

566 have: List of SHA1s of objects that should not be sent

567 want: List of SHA1s of objects that should be sent

568 shallow: Set of shallow commit SHA1s to skip

569 ofs_delta: Whether OFS deltas can be included

570 progress: Optional progress reporting method

571 """

572 missing_objects = MissingObjectFinder(

573 self, haves=have, wants=want, shallow=shallow, progress=progress

574 )

575 remote_has = missing_objects.get_remote_has()

576 object_ids = list(missing_objects)

577 return len(object_ids), generate_unpacked_objects(

578 cast(PackedObjectContainer, self),

579 object_ids,

580 progress=progress,

581 ofs_delta=ofs_delta,

582 other_haves=remote_has,

583 )

584

585 def _clear_cached_packs(self) -> None:

586 pack_cache = self._pack_cache

587 self._pack_cache = {}

588 while pack_cache:

589 (name, pack) = pack_cache.popitem()

590 pack.close()

591

592 def _iter_cached_packs(self):

593 return self._pack_cache.values()

594

595 def _update_pack_cache(self) -> list[Pack]:

596 raise NotImplementedError(self._update_pack_cache)

597

598 def close(self) -> None:

599 self._clear_cached_packs()

600

601 @property

602 def packs(self):

603 """List with pack objects."""

604 return list(self._iter_cached_packs()) + list(self._update_pack_cache())

605

606 def count_pack_files(self) -> int:

607 """Count the number of pack files.

608

609 Returns:

610 Number of pack files (excluding those with .keep files)

611 """

612 count = 0

613 for pack in self.packs:

614 # Check if there's a .keep file for this pack

615 keep_path = pack._basename + ".keep"

616 if not os.path.exists(keep_path):

617 count += 1

618 return count

619

620 def _iter_alternate_objects(self):

621 """Iterate over the SHAs of all the objects in alternate stores."""

622 for alternate in self.alternates:

623 yield from alternate

624

625 def _iter_loose_objects(self):

626 """Iterate over the SHAs of all loose objects."""

627 raise NotImplementedError(self._iter_loose_objects)

628

629 def _get_loose_object(self, sha) -> Optional[ShaFile]:

630 raise NotImplementedError(self._get_loose_object)

631

632 def delete_loose_object(self, sha) -> None:

633 """Delete a loose object.

634

635 This method only handles loose objects. For packed objects,

636 use repack(exclude=...) to exclude them during repacking.

637 """

638 raise NotImplementedError(self.delete_loose_object)

639

640 def _remove_pack(self, name) -> None:

641 raise NotImplementedError(self._remove_pack)

642

643 def pack_loose_objects(self):

644 """Pack loose objects.

645

646 Returns: Number of objects packed

647 """

648 objects = set()

649 for sha in self._iter_loose_objects():

650 objects.add((self._get_loose_object(sha), None))

651 self.add_objects(list(objects))

652 for obj, path in objects:

653 self.delete_loose_object(obj.id)

654 return len(objects)

655

656 def repack(self, exclude=None):

657 """Repack the packs in this repository.

658

659 Note that this implementation is fairly naive and currently keeps all

660 objects in memory while it repacks.

661

662 Args:

663 exclude: Optional set of object SHAs to exclude from repacking

664 """

665 if exclude is None:

666 exclude = set()

667

668 loose_objects = set()

669 excluded_loose_objects = set()

670 for sha in self._iter_loose_objects():

671 if sha not in exclude:

672 loose_objects.add(self._get_loose_object(sha))

673 else:

674 excluded_loose_objects.add(sha)

675

676 objects = {(obj, None) for obj in loose_objects}

677 old_packs = {p.name(): p for p in self.packs}

678 for name, pack in old_packs.items():

679 objects.update(

680 (obj, None) for obj in pack.iterobjects() if obj.id not in exclude

681 )

682

683 # Only create a new pack if there are objects to pack

684 if objects:

685 # The name of the consolidated pack might match the name of a

686 # pre-existing pack. Take care not to remove the newly created

687 # consolidated pack.

688 consolidated = self.add_objects(objects)

689 old_packs.pop(consolidated.name(), None)

690

691 # Delete loose objects that were packed

692 for obj in loose_objects:

693 self.delete_loose_object(obj.id)

694 # Delete excluded loose objects

695 for sha in excluded_loose_objects:

696 self.delete_loose_object(sha)

697 for name, pack in old_packs.items():

698 self._remove_pack(pack)

699 self._update_pack_cache()

700 return len(objects)

701

702 def __iter__(self):

703 """Iterate over the SHAs that are present in this store."""

704 self._update_pack_cache()

705 for pack in self._iter_cached_packs():

706 try:

707 yield from pack

708 except PackFileDisappeared:

709 pass

710 yield from self._iter_loose_objects()

711 yield from self._iter_alternate_objects()

712

713 def contains_loose(self, sha):

714 """Check if a particular object is present by SHA1 and is loose.

715

716 This does not check alternates.

717 """

718 return self._get_loose_object(sha) is not None

719

720 def get_raw(self, name):

721 """Obtain the raw fulltext for an object.

722

723 Args:

724 name: sha for the object.

725 Returns: tuple with numeric type and object contents.

726 """

727 if name == ZERO_SHA:

728 raise KeyError(name)

729 if len(name) == 40:

730 sha = hex_to_sha(name)

731 hexsha = name

732 elif len(name) == 20:

733 sha = name

734 hexsha = None

735 else:

736 raise AssertionError(f"Invalid object name {name!r}")

737 for pack in self._iter_cached_packs():

738 try:

739 return pack.get_raw(sha)

740 except (KeyError, PackFileDisappeared):

741 pass

742 if hexsha is None:

743 hexsha = sha_to_hex(name)

744 ret = self._get_loose_object(hexsha)

745 if ret is not None:

746 return ret.type_num, ret.as_raw_string()

747 # Maybe something else has added a pack with the object

748 # in the mean time?

749 for pack in self._update_pack_cache():

750 try:

751 return pack.get_raw(sha)

752 except KeyError:

753 pass

754 for alternate in self.alternates:

755 try:

756 return alternate.get_raw(hexsha)

757 except KeyError:

758 pass

759 raise KeyError(hexsha)

760

761 def iter_unpacked_subset(

762 self,

763 shas: set[bytes],

764 include_comp: bool = False,

765 allow_missing: bool = False,

766 convert_ofs_delta: bool = True,

767 ) -> Iterator[UnpackedObject]:

768 todo: set[bytes] = set(shas)

769 for p in self._iter_cached_packs():

770 for unpacked in p.iter_unpacked_subset(

771 todo,

772 include_comp=include_comp,

773 allow_missing=True,

774 convert_ofs_delta=convert_ofs_delta,

775 ):

776 yield unpacked

777 hexsha = sha_to_hex(unpacked.sha())

778 todo.remove(hexsha)

779 # Maybe something else has added a pack with the object

780 # in the mean time?

781 for p in self._update_pack_cache():

782 for unpacked in p.iter_unpacked_subset(

783 todo,

784 include_comp=include_comp,

785 allow_missing=True,

786 convert_ofs_delta=convert_ofs_delta,

787 ):

788 yield unpacked

789 hexsha = sha_to_hex(unpacked.sha())

790 todo.remove(hexsha)

791 for alternate in self.alternates:

792 for unpacked in alternate.iter_unpacked_subset(

793 todo,

794 include_comp=include_comp,

795 allow_missing=True,

796 convert_ofs_delta=convert_ofs_delta,

797 ):

798 yield unpacked

799 hexsha = sha_to_hex(unpacked.sha())

800 todo.remove(hexsha)

801

802 def iterobjects_subset(

803 self, shas: Iterable[bytes], *, allow_missing: bool = False

804 ) -> Iterator[ShaFile]:

805 todo: set[bytes] = set(shas)

806 for p in self._iter_cached_packs():

807 for o in p.iterobjects_subset(todo, allow_missing=True):

808 yield o

809 todo.remove(o.id)

810 # Maybe something else has added a pack with the object

811 # in the mean time?

812 for p in self._update_pack_cache():

813 for o in p.iterobjects_subset(todo, allow_missing=True):

814 yield o

815 todo.remove(o.id)

816 for alternate in self.alternates:

817 for o in alternate.iterobjects_subset(todo, allow_missing=True):

818 yield o

819 todo.remove(o.id)

820 for oid in todo:

821 o = self._get_loose_object(oid)

822 if o is not None:

823 yield o

824 elif not allow_missing:

825 raise KeyError(oid)

826

827 def get_unpacked_object(

828 self, sha1: bytes, *, include_comp: bool = False

829 ) -> UnpackedObject:

830 """Obtain the unpacked object.

831

832 Args:

833 sha1: sha for the object.

834 """

835 if sha1 == ZERO_SHA:

836 raise KeyError(sha1)

837 if len(sha1) == 40:

838 sha = hex_to_sha(sha1)

839 hexsha = sha1

840 elif len(sha1) == 20:

841 sha = sha1

842 hexsha = None

843 else:

844 raise AssertionError(f"Invalid object sha1 {sha1!r}")

845 for pack in self._iter_cached_packs():

846 try:

847 return pack.get_unpacked_object(sha, include_comp=include_comp)

848 except (KeyError, PackFileDisappeared):

849 pass

850 if hexsha is None:

851 hexsha = sha_to_hex(sha1)

852 # Maybe something else has added a pack with the object

853 # in the mean time?

854 for pack in self._update_pack_cache():

855 try:

856 return pack.get_unpacked_object(sha, include_comp=include_comp)

857 except KeyError:

858 pass

859 for alternate in self.alternates:

860 try:

861 return alternate.get_unpacked_object(hexsha, include_comp=include_comp)

862 except KeyError:

863 pass

864 raise KeyError(hexsha)

865

866 def add_objects(

867 self,

868 objects: Sequence[tuple[ShaFile, Optional[str]]],

869 progress: Optional[Callable[[str], None]] = None,

870 ) -> None:

871 """Add a set of objects to this object store.

872

873 Args:

874 objects: Iterable over (object, path) tuples, should support

875 __len__.

876 Returns: Pack object of the objects written.

877 """

878 count = len(objects)

879 record_iter = (full_unpacked_object(o) for (o, p) in objects)

880 return self.add_pack_data(count, record_iter, progress=progress)

881

882

883class DiskObjectStore(PackBasedObjectStore):

884 """Git-style object store that exists on disk."""

885

886 path: Union[str, os.PathLike]

887 pack_dir: Union[str, os.PathLike]

888

889 def __init__(

890 self,

891 path: Union[str, os.PathLike],

892 loose_compression_level=-1,

893 pack_compression_level=-1,

894 pack_index_version=None,

895 ) -> None:

896 """Open an object store.

897

898 Args:

899 path: Path of the object store.

900 loose_compression_level: zlib compression level for loose objects

901 pack_compression_level: zlib compression level for pack objects

902 pack_index_version: pack index version to use (1, 2, or 3)

903 """

904 super().__init__(

905 pack_compression_level=pack_compression_level,

906 pack_index_version=pack_index_version,

907 )

908 self.path = path

909 self.pack_dir = os.path.join(self.path, PACKDIR)

910 self._alternates = None

911 self.loose_compression_level = loose_compression_level

912 self.pack_compression_level = pack_compression_level

913 self.pack_index_version = pack_index_version

914

915 # Commit graph support - lazy loaded

916 self._commit_graph = None

917

918 def __repr__(self) -> str:

919 return f"<{self.__class__.__name__}({self.path!r})>"

920

921 @classmethod

922 def from_config(cls, path: Union[str, os.PathLike], config):

923 try:

924 default_compression_level = int(

925 config.get((b"core",), b"compression").decode()

926 )

927 except KeyError:

928 default_compression_level = -1

929 try:

930 loose_compression_level = int(

931 config.get((b"core",), b"looseCompression").decode()

932 )

933 except KeyError:

934 loose_compression_level = default_compression_level

935 try:

936 pack_compression_level = int(

937 config.get((b"core",), "packCompression").decode()

938 )

939 except KeyError:

940 pack_compression_level = default_compression_level

941 try:

942 pack_index_version = int(config.get((b"pack",), b"indexVersion").decode())

943 except KeyError:

944 pack_index_version = None

945 return cls(

946 path, loose_compression_level, pack_compression_level, pack_index_version

947 )

948

949 @property

950 def alternates(self):

951 if self._alternates is not None:

952 return self._alternates

953 self._alternates = []

954 for path in self._read_alternate_paths():

955 self._alternates.append(DiskObjectStore(path))

956 return self._alternates

957

958 def _read_alternate_paths(self):

959 try:

960 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb")

961 except FileNotFoundError:

962 return

963 with f:

964 for line in f.readlines():

965 line = line.rstrip(b"\n")

966 if line.startswith(b"#"):

967 continue

968 if os.path.isabs(line):

969 yield os.fsdecode(line)

970 else:

971 yield os.fsdecode(os.path.join(os.fsencode(self.path), line))

972

973 def add_alternate_path(self, path) -> None:

974 """Add an alternate path to this object store."""

975 try:

976 os.mkdir(os.path.join(self.path, INFODIR))

977 except FileExistsError:

978 pass

979 alternates_path = os.path.join(self.path, INFODIR, "alternates")

980 with GitFile(alternates_path, "wb") as f:

981 try:

982 orig_f = open(alternates_path, "rb")

983 except FileNotFoundError:

984 pass

985 else:

986 with orig_f:

987 f.write(orig_f.read())

988 f.write(os.fsencode(path) + b"\n")

989

990 if not os.path.isabs(path):

991 path = os.path.join(self.path, path)

992 self.alternates.append(DiskObjectStore(path))

993

994 def _update_pack_cache(self):

995 """Read and iterate over new pack files and cache them."""

996 try:

997 pack_dir_contents = os.listdir(self.pack_dir)

998 except FileNotFoundError:

999 self.close()

1000 return []

1001 pack_files = set()

1002 for name in pack_dir_contents:

1003 if name.startswith("pack-") and name.endswith(".pack"):

1004 # verify that idx exists first (otherwise the pack was not yet

1005 # fully written)

1006 idx_name = os.path.splitext(name)[0] + ".idx"

1007 if idx_name in pack_dir_contents:

1008 pack_name = name[: -len(".pack")]

1009 pack_files.add(pack_name)

1010

1011 # Open newly appeared pack files

1012 new_packs = []

1013 for f in pack_files:

1014 if f not in self._pack_cache:

1015 pack = Pack(os.path.join(self.pack_dir, f))

1016 new_packs.append(pack)

1017 self._pack_cache[f] = pack

1018 # Remove disappeared pack files

1019 for f in set(self._pack_cache) - pack_files:

1020 self._pack_cache.pop(f).close()

1021 return new_packs

1022

1023 def _get_shafile_path(self, sha):

1024 # Check from object dir

1025 return hex_to_filename(self.path, sha)

1026

1027 def _iter_loose_objects(self):

1028 for base in os.listdir(self.path):

1029 if len(base) != 2:

1030 continue

1031 for rest in os.listdir(os.path.join(self.path, base)):

1032 sha = os.fsencode(base + rest)

1033 if not valid_hexsha(sha):

1034 continue

1035 yield sha

1036

1037 def count_loose_objects(self) -> int:

1038 """Count the number of loose objects in the object store.

1039

1040 Returns:

1041 Number of loose objects

1042 """

1043 count = 0

1044 if not os.path.exists(self.path):

1045 return 0

1046

1047 for i in range(256):

1048 subdir = os.path.join(self.path, f"{i:02x}")

1049 try:

1050 count += len(

1051 [

1052 name

1053 for name in os.listdir(subdir)

1054 if len(name) == 38 # 40 - 2 for the prefix

1055 ]

1056 )

1057 except FileNotFoundError:

1058 # Directory may have been removed or is inaccessible

1059 continue

1060

1061 return count

1062

1063 def _get_loose_object(self, sha):

1064 path = self._get_shafile_path(sha)

1065 try:

1066 return ShaFile.from_path(path)

1067 except FileNotFoundError:

1068 return None

1069

1070 def delete_loose_object(self, sha) -> None:

1071 os.remove(self._get_shafile_path(sha))

1072

1073 def get_object_mtime(self, sha):

1074 """Get the modification time of an object.

1075

1076 Args:

1077 sha: SHA1 of the object

1078

1079 Returns:

1080 Modification time as seconds since epoch

1081

1082 Raises:

1083 KeyError: if the object is not found

1084 """

1085 # First check if it's a loose object

1086 if self.contains_loose(sha):

1087 path = self._get_shafile_path(sha)

1088 try:

1089 return os.path.getmtime(path)

1090 except FileNotFoundError:

1091 pass

1092

1093 # Check if it's in a pack file

1094 for pack in self.packs:

1095 try:

1096 if sha in pack:

1097 # Use the pack file's mtime for packed objects

1098 pack_path = pack._data_path

1099 try:

1100 return os.path.getmtime(pack_path)

1101 except (FileNotFoundError, AttributeError):

1102 pass

1103 except PackFileDisappeared:

1104 pass

1105

1106 raise KeyError(sha)

1107

1108 def _remove_pack(self, pack) -> None:

1109 try:

1110 del self._pack_cache[os.path.basename(pack._basename)]

1111 except KeyError:

1112 pass

1113 pack.close()

1114 os.remove(pack.data.path)

1115 os.remove(pack.index.path)

1116

1117 def _get_pack_basepath(self, entries):

1118 suffix = iter_sha1(entry[0] for entry in entries)

1119 # TODO: Handle self.pack_dir being bytes

1120 suffix = suffix.decode("ascii")

1121 return os.path.join(self.pack_dir, "pack-" + suffix)

1122

1123 def _complete_pack(self, f, path, num_objects, indexer, progress=None):

1124 """Move a specific file containing a pack into the pack directory.

1125

1126 Note: The file should be on the same file system as the

1127 packs directory.

1128

1129 Args:

1130 f: Open file object for the pack.

1131 path: Path to the pack file.

1132 indexer: A PackIndexer for indexing the pack.

1133 """

1134 entries = []

1135 for i, entry in enumerate(indexer):

1136 if progress is not None:

1137 progress(f"generating index: {i}/{num_objects}\r".encode("ascii"))

1138 entries.append(entry)

1139

1140 pack_sha, extra_entries = extend_pack(

1141 f,

1142 indexer.ext_refs(),

1143 get_raw=self.get_raw,

1144 compression_level=self.pack_compression_level,

1145 progress=progress,

1146 )

1147 f.flush()

1148 try:

1149 fileno = f.fileno()

1150 except AttributeError:

1151 pass

1152 else:

1153 os.fsync(fileno)

1154 f.close()

1155

1156 entries.extend(extra_entries)

1157

1158 # Move the pack in.

1159 entries.sort()

1160 pack_base_name = self._get_pack_basepath(entries)

1161

1162 for pack in self.packs:

1163 if pack._basename == pack_base_name:

1164 return pack

1165

1166 target_pack_path = pack_base_name + ".pack"

1167 target_index_path = pack_base_name + ".idx"

1168 if sys.platform == "win32":

1169 # Windows might have the target pack file lingering. Attempt

1170 # removal, silently passing if the target does not exist.

1171 with suppress(FileNotFoundError):

1172 os.remove(target_pack_path)

1173 os.rename(path, target_pack_path)

1174

1175 # Write the index.

1176 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file:

1177 write_pack_index(

1178 index_file, entries, pack_sha, version=self.pack_index_version

1179 )

1180

1181 # Add the pack to the store and return it.

1182 final_pack = Pack(pack_base_name)

1183 final_pack.check_length_and_checksum()

1184 self._add_cached_pack(pack_base_name, final_pack)

1185 return final_pack

1186

1187 def add_thin_pack(self, read_all, read_some, progress=None):

1188 """Add a new thin pack to this object store.

1189

1190 Thin packs are packs that contain deltas with parents that exist

1191 outside the pack. They should never be placed in the object store

1192 directly, and always indexed and completed as they are copied.

1193

1194 Args:

1195 read_all: Read function that blocks until the number of

1196 requested bytes are read.

1197 read_some: Read function that returns at least one byte, but may

1198 not return the number of bytes requested.

1199 Returns: A Pack object pointing at the now-completed thin pack in the

1200 objects/pack directory.

1201 """

1202 import tempfile

1203

1204 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_")

1205 with os.fdopen(fd, "w+b") as f:

1206 os.chmod(path, PACK_MODE)

1207 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw)

1208 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer)

1209 copier.verify(progress=progress)

1210 return self._complete_pack(f, path, len(copier), indexer, progress=progress)

1211

1212 def add_pack(self):

1213 """Add a new pack to this object store.

1214

1215 Returns: Fileobject to write to, a commit function to

1216 call when the pack is finished and an abort

1217 function.

1218 """

1219 import tempfile

1220

1221 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack")

1222 f = os.fdopen(fd, "w+b")

1223 os.chmod(path, PACK_MODE)

1224

1225 def commit():

1226 if f.tell() > 0:

1227 f.seek(0)

1228 with PackData(path, f) as pd:

1229 indexer = PackIndexer.for_pack_data(

1230 pd, resolve_ext_ref=self.get_raw

1231 )

1232 return self._complete_pack(f, path, len(pd), indexer)

1233 else:

1234 f.close()

1235 os.remove(path)

1236 return None

1237

1238 def abort() -> None:

1239 f.close()

1240 os.remove(path)

1241

1242 return f, commit, abort

1243

1244 def add_object(self, obj) -> None:

1245 """Add a single object to this object store.

1246

1247 Args:

1248 obj: Object to add

1249 """

1250 path = self._get_shafile_path(obj.id)

1251 dir = os.path.dirname(path)

1252 try:

1253 os.mkdir(dir)

1254 except FileExistsError:

1255 pass

1256 if os.path.exists(path):

1257 return # Already there, no need to write again

1258 with GitFile(path, "wb", mask=PACK_MODE) as f:

1259 f.write(

1260 obj.as_legacy_object(compression_level=self.loose_compression_level)

1261 )

1262

1263 @classmethod

1264 def init(cls, path: Union[str, os.PathLike]):

1265 try:

1266 os.mkdir(path)

1267 except FileExistsError:

1268 pass

1269 os.mkdir(os.path.join(path, "info"))

1270 os.mkdir(os.path.join(path, PACKDIR))

1271 return cls(path)

1272

1273 def iter_prefix(self, prefix):

1274 if len(prefix) < 2:

1275 yield from super().iter_prefix(prefix)

1276 return

1277 seen = set()

1278 dir = prefix[:2].decode()

1279 rest = prefix[2:].decode()

1280 try:

1281 for name in os.listdir(os.path.join(self.path, dir)):

1282 if name.startswith(rest):

1283 sha = os.fsencode(dir + name)

1284 if sha not in seen:

1285 seen.add(sha)

1286 yield sha

1287 except FileNotFoundError:

1288 pass

1289

1290 for p in self.packs:

1291 bin_prefix = (

1292 binascii.unhexlify(prefix)

1293 if len(prefix) % 2 == 0

1294 else binascii.unhexlify(prefix[:-1])

1295 )

1296 for sha in p.index.iter_prefix(bin_prefix):

1297 sha = sha_to_hex(sha)

1298 if sha.startswith(prefix) and sha not in seen:

1299 seen.add(sha)

1300 yield sha

1301 for alternate in self.alternates:

1302 for sha in alternate.iter_prefix(prefix):

1303 if sha not in seen:

1304 seen.add(sha)

1305 yield sha

1306

1307 def get_commit_graph(self):

1308 """Get the commit graph for this object store.

1309

1310 Returns:

1311 CommitGraph object if available, None otherwise

1312 """

1313 if self._commit_graph is None:

1314 from .commit_graph import read_commit_graph

1315

1316 # Look for commit graph in our objects directory

1317 graph_file = os.path.join(self.path, "info", "commit-graph")

1318 if os.path.exists(graph_file):

1319 self._commit_graph = read_commit_graph(graph_file)

1320 return self._commit_graph

1321

1322 def write_commit_graph(self, refs=None, reachable=True) -> None:

1323 """Write a commit graph file for this object store.

1324

1325 Args:

1326 refs: List of refs to include. If None, includes all refs from object store.

1327 reachable: If True, includes all commits reachable from refs.

1328 If False, only includes the direct ref targets.

1329 """

1330 from .commit_graph import get_reachable_commits

1331

1332 if refs is None:

1333 # Get all commit objects from the object store

1334 all_refs = []

1335 # Iterate through all objects to find commits

1336 for sha in self:

1337 try:

1338 obj = self[sha]

1339 if obj.type_name == b"commit":

1340 all_refs.append(sha)

1341 except KeyError:

1342 continue

1343 else:

1344 # Use provided refs

1345 all_refs = refs

1346

1347 if not all_refs:

1348 return # No commits to include

1349

1350 if reachable:

1351 # Get all reachable commits

1352 commit_ids = get_reachable_commits(self, all_refs)

1353 else:

1354 # Just use the direct ref targets - ensure they're hex ObjectIDs

1355 commit_ids = []

1356 for ref in all_refs:

1357 if isinstance(ref, bytes) and len(ref) == 40:

1358 # Already hex ObjectID

1359 commit_ids.append(ref)

1360 elif isinstance(ref, bytes) and len(ref) == 20:

1361 # Binary SHA, convert to hex ObjectID

1362 from .objects import sha_to_hex

1363

1364 commit_ids.append(sha_to_hex(ref))

1365 else:

1366 # Assume it's already correct format

1367 commit_ids.append(ref)

1368

1369 if commit_ids:

1370 # Write commit graph directly to our object store path

1371 # Generate the commit graph

1372 from .commit_graph import generate_commit_graph

1373

1374 graph = generate_commit_graph(self, commit_ids)

1375

1376 if graph.entries:

1377 # Ensure the info directory exists

1378 info_dir = os.path.join(self.path, "info")

1379 os.makedirs(info_dir, exist_ok=True)

1380

1381 # Write using GitFile for atomic operation

1382 graph_path = os.path.join(info_dir, "commit-graph")

1383 with GitFile(graph_path, "wb") as f:

1384 graph.write_to_file(f)

1385

1386 # Clear cached commit graph so it gets reloaded

1387 self._commit_graph = None

1388

1389 def prune(self, grace_period: Optional[int] = None) -> None:

1390 """Prune/clean up this object store.

1391

1392 This removes temporary files that were left behind by interrupted

1393 pack operations. These are files that start with ``tmp_pack_`` in the

1394 repository directory or files with .pack extension but no corresponding

1395 .idx file in the pack directory.

1396

1397 Args:

1398 grace_period: Grace period in seconds for removing temporary files.

1399 If None, uses DEFAULT_TEMPFILE_GRACE_PERIOD.

1400 """

1401 import glob

1402

1403 if grace_period is None:

1404 grace_period = DEFAULT_TEMPFILE_GRACE_PERIOD

1405

1406 # Clean up tmp_pack_* files in the repository directory

1407 for tmp_file in glob.glob(os.path.join(self.path, "tmp_pack_*")):

1408 # Check if file is old enough (more than grace period)

1409 mtime = os.path.getmtime(tmp_file)

1410 if time.time() - mtime > grace_period:

1411 os.remove(tmp_file)

1412

1413 # Clean up orphaned .pack files without corresponding .idx files

1414 try:

1415 pack_dir_contents = os.listdir(self.pack_dir)

1416 except FileNotFoundError:

1417 return

1418

1419 pack_files = {}

1420 idx_files = set()

1421

1422 for name in pack_dir_contents:

1423 if name.endswith(".pack"):

1424 base_name = name[:-5] # Remove .pack extension

1425 pack_files[base_name] = name

1426 elif name.endswith(".idx"):

1427 base_name = name[:-4] # Remove .idx extension

1428 idx_files.add(base_name)

1429

1430 # Remove .pack files without corresponding .idx files

1431 for base_name, pack_name in pack_files.items():

1432 if base_name not in idx_files:

1433 pack_path = os.path.join(self.pack_dir, pack_name)

1434 # Check if file is old enough (more than grace period)

1435 mtime = os.path.getmtime(pack_path)

1436 if time.time() - mtime > grace_period:

1437 os.remove(pack_path)

1438

1439

1440class MemoryObjectStore(BaseObjectStore):

1441 """Object store that keeps all objects in memory."""

1442

1443 def __init__(self) -> None:

1444 super().__init__()

1445 self._data: dict[str, ShaFile] = {}

1446 self.pack_compression_level = -1

1447

1448 def _to_hexsha(self, sha):

1449 if len(sha) == 40:

1450 return sha

1451 elif len(sha) == 20:

1452 return sha_to_hex(sha)

1453 else:

1454 raise ValueError(f"Invalid sha {sha!r}")

1455

1456 def contains_loose(self, sha):

1457 """Check if a particular object is present by SHA1 and is loose."""

1458 return self._to_hexsha(sha) in self._data

1459

1460 def contains_packed(self, sha) -> bool:

1461 """Check if a particular object is present by SHA1 and is packed."""

1462 return False

1463

1464 def __iter__(self):

1465 """Iterate over the SHAs that are present in this store."""

1466 return iter(self._data.keys())

1467

1468 @property

1469 def packs(self):

1470 """List with pack objects."""

1471 return []

1472

1473 def get_raw(self, name: ObjectID):

1474 """Obtain the raw text for an object.

1475

1476 Args:

1477 name: sha for the object.

1478 Returns: tuple with numeric type and object contents.

1479 """

1480 obj = self[self._to_hexsha(name)]

1481 return obj.type_num, obj.as_raw_string()

1482

1483 def __getitem__(self, name: ObjectID):

1484 return self._data[self._to_hexsha(name)].copy()

1485

1486 def __delitem__(self, name: ObjectID) -> None:

1487 """Delete an object from this store, for testing only."""

1488 del self._data[self._to_hexsha(name)]

1489

1490 def add_object(self, obj) -> None:

1491 """Add a single object to this object store."""

1492 self._data[obj.id] = obj.copy()

1493

1494 def add_objects(self, objects, progress=None) -> None:

1495 """Add a set of objects to this object store.

1496

1497 Args:

1498 objects: Iterable over a list of (object, path) tuples

1499 """

1500 for obj, path in objects:

1501 self.add_object(obj)

1502

1503 def add_pack(self):

1504 """Add a new pack to this object store.

1505

1506 Because this object store doesn't support packs, we extract and add the

1507 individual objects.

1508

1509 Returns: Fileobject to write to and a commit function to

1510 call when the pack is finished.

1511 """

1512 from tempfile import SpooledTemporaryFile

1513

1514 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-")

1515

1516 def commit() -> None:

1517 size = f.tell()

1518 if size > 0:

1519 f.seek(0)

1520 p = PackData.from_file(f, size)

1521 for obj in PackInflater.for_pack_data(p, self.get_raw):

1522 self.add_object(obj)

1523 p.close()

1524 f.close()

1525 else:

1526 f.close()

1527

1528 def abort() -> None:

1529 f.close()

1530

1531 return f, commit, abort

1532

1533 def add_pack_data(

1534 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None

1535 ) -> None:

1536 """Add pack data to this object store.

1537

1538 Args:

1539 count: Number of items to add

1540 """

1541 if count == 0:

1542 return

1543

1544 # Since MemoryObjectStore doesn't support pack files, we need to

1545 # extract individual objects. To handle deltas properly, we write

1546 # to a temporary pack and then use PackInflater to resolve them.

1547 f, commit, abort = self.add_pack()

1548 try:

1549 write_pack_data(

1550 f.write,

1551 unpacked_objects,

1552 num_records=count,

1553 progress=progress,

1554 )

1555 except BaseException:

1556 abort()

1557 raise

1558 else:

1559 commit()

1560

1561 def add_thin_pack(self, read_all, read_some, progress=None) -> None:

1562 """Add a new thin pack to this object store.

1563

1564 Thin packs are packs that contain deltas with parents that exist

1565 outside the pack. Because this object store doesn't support packs, we

1566 extract and add the individual objects.

1567

1568 Args:

1569 read_all: Read function that blocks until the number of

1570 requested bytes are read.

1571 read_some: Read function that returns at least one byte, but may

1572 not return the number of bytes requested.

1573 """

1574 f, commit, abort = self.add_pack()

1575 try:

1576 copier = PackStreamCopier(read_all, read_some, f)

1577 copier.verify()

1578 except BaseException:

1579 abort()

1580 raise

1581 else:

1582 commit()

1583

1584

1585class ObjectIterator(Protocol):

1586 """Interface for iterating over objects."""

1587

1588 def iterobjects(self) -> Iterator[ShaFile]:

1589 raise NotImplementedError(self.iterobjects)

1590

1591

1592def tree_lookup_path(lookup_obj, root_sha, path):

1593 """Look up an object in a Git tree.

1594

1595 Args:

1596 lookup_obj: Callback for retrieving object by SHA1

1597 root_sha: SHA1 of the root tree

1598 path: Path to lookup

1599 Returns: A tuple of (mode, SHA) of the resulting path.

1600 """

1601 tree = lookup_obj(root_sha)

1602 if not isinstance(tree, Tree):

1603 raise NotTreeError(root_sha)

1604 return tree.lookup_path(lookup_obj, path)

1605

1606

1607def _collect_filetree_revs(

1608 obj_store: ObjectContainer, tree_sha: ObjectID, kset: set[ObjectID]

1609) -> None:

1610 """Collect SHA1s of files and directories for specified tree.

1611

1612 Args:

1613 obj_store: Object store to get objects by SHA from

1614 tree_sha: tree reference to walk

1615 kset: set to fill with references to files and directories

1616 """

1617 filetree = obj_store[tree_sha]

1618 assert isinstance(filetree, Tree)

1619 for name, mode, sha in filetree.iteritems():

1620 if not S_ISGITLINK(mode) and sha not in kset:

1621 kset.add(sha)

1622 if stat.S_ISDIR(mode):

1623 _collect_filetree_revs(obj_store, sha, kset)

1624

1625

1626def _split_commits_and_tags(

1627 obj_store: ObjectContainer, lst, *, ignore_unknown=False

1628) -> tuple[set[bytes], set[bytes], set[bytes]]:

1629 """Split object id list into three lists with commit, tag, and other SHAs.

1630

1631 Commits referenced by tags are included into commits

1632 list as well. Only SHA1s known in this repository will get

1633 through, and unless ignore_unknown argument is True, KeyError

1634 is thrown for SHA1 missing in the repository

1635

1636 Args:

1637 obj_store: Object store to get objects by SHA1 from

1638 lst: Collection of commit and tag SHAs

1639 ignore_unknown: True to skip SHA1 missing in the repository

1640 silently.

1641 Returns: A tuple of (commits, tags, others) SHA1s

1642 """

1643 commits: set[bytes] = set()

1644 tags: set[bytes] = set()

1645 others: set[bytes] = set()

1646 for e in lst:

1647 try:

1648 o = obj_store[e]

1649 except KeyError:

1650 if not ignore_unknown:

1651 raise

1652 else:

1653 if isinstance(o, Commit):

1654 commits.add(e)

1655 elif isinstance(o, Tag):

1656 tags.add(e)

1657 tagged = o.object[1]

1658 c, t, os = _split_commits_and_tags(

1659 obj_store, [tagged], ignore_unknown=ignore_unknown

1660 )

1661 commits |= c

1662 tags |= t

1663 others |= os

1664 else:

1665 others.add(e)

1666 return (commits, tags, others)

1667

1668

1669class MissingObjectFinder:

1670 """Find the objects missing from another object store.

1671

1672 Args:

1673 object_store: Object store containing at least all objects to be

1674 sent

1675 haves: SHA1s of commits not to send (already present in target)

1676 wants: SHA1s of commits to send

1677 progress: Optional function to report progress to.

1678 get_tagged: Function that returns a dict of pointed-to sha -> tag

1679 sha for including tags.

1680 get_parents: Optional function for getting the parents of a commit.

1681 """

1682

1683 def __init__(

1684 self,

1685 object_store,

1686 haves,

1687 wants,

1688 *,

1689 shallow=None,

1690 progress=None,

1691 get_tagged=None,

1692 get_parents=lambda commit: commit.parents,

1693 ) -> None:

1694 self.object_store = object_store

1695 if shallow is None:

1696 shallow = set()

1697 self._get_parents = get_parents

1698 # process Commits and Tags differently

1699 # Note, while haves may list commits/tags not available locally,

1700 # and such SHAs would get filtered out by _split_commits_and_tags,

1701 # wants shall list only known SHAs, and otherwise

1702 # _split_commits_and_tags fails with KeyError

1703 have_commits, have_tags, have_others = _split_commits_and_tags(

1704 object_store, haves, ignore_unknown=True

1705 )

1706 want_commits, want_tags, want_others = _split_commits_and_tags(

1707 object_store, wants, ignore_unknown=False

1708 )

1709 # all_ancestors is a set of commits that shall not be sent

1710 # (complete repository up to 'haves')

1711 all_ancestors = _collect_ancestors(

1712 object_store, have_commits, shallow=shallow, get_parents=self._get_parents

1713 )[0]

1714 # all_missing - complete set of commits between haves and wants

1715 # common - commits from all_ancestors we hit into while

1716 # traversing parent hierarchy of wants

1717 missing_commits, common_commits = _collect_ancestors(

1718 object_store,

1719 want_commits,

1720 all_ancestors,

1721 shallow=shallow,

1722 get_parents=self._get_parents,

1723 )

1724 self.remote_has: set[bytes] = set()

1725 # Now, fill sha_done with commits and revisions of

1726 # files and directories known to be both locally

1727 # and on target. Thus these commits and files

1728 # won't get selected for fetch

1729 for h in common_commits:

1730 self.remote_has.add(h)

1731 cmt = object_store[h]

1732 _collect_filetree_revs(object_store, cmt.tree, self.remote_has)

1733 # record tags we have as visited, too

1734 for t in have_tags:

1735 self.remote_has.add(t)

1736 self.sha_done = set(self.remote_has)

1737

1738 # in fact, what we 'want' is commits, tags, and others

1739 # we've found missing

1740 self.objects_to_send: set[

1741 tuple[ObjectID, Optional[bytes], Optional[int], bool]

1742 ] = {(w, None, Commit.type_num, False) for w in missing_commits}

1743 missing_tags = want_tags.difference(have_tags)

1744 self.objects_to_send.update(

1745 {(w, None, Tag.type_num, False) for w in missing_tags}

1746 )

1747 missing_others = want_others.difference(have_others)

1748 self.objects_to_send.update({(w, None, None, False) for w in missing_others})

1749

1750 if progress is None:

1751 self.progress = lambda x: None

1752 else:

1753 self.progress = progress

1754 self._tagged = (get_tagged and get_tagged()) or {}

1755

1756 def get_remote_has(self):

1757 return self.remote_has

1758

1759 def add_todo(

1760 self, entries: Iterable[tuple[ObjectID, Optional[bytes], Optional[int], bool]]

1761 ) -> None:

1762 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done])

1763

1764 def __next__(self) -> tuple[bytes, Optional[PackHint]]:

1765 while True:

1766 if not self.objects_to_send:

1767 self.progress(

1768 f"counting objects: {len(self.sha_done)}, done.\n".encode("ascii")

1769 )

1770 raise StopIteration

1771 (sha, name, type_num, leaf) = self.objects_to_send.pop()

1772 if sha not in self.sha_done:

1773 break

1774 if not leaf:

1775 o = self.object_store[sha]

1776 if isinstance(o, Commit):

1777 self.add_todo([(o.tree, b"", Tree.type_num, False)])

1778 elif isinstance(o, Tree):

1779 self.add_todo(

1780 [

1781 (

1782 s,

1783 n,

1784 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num),

1785 not stat.S_ISDIR(m),

1786 )

1787 for n, m, s in o.iteritems()

1788 if not S_ISGITLINK(m)

1789 ]

1790 )

1791 elif isinstance(o, Tag):

1792 self.add_todo([(o.object[1], None, o.object[0].type_num, False)])

1793 if sha in self._tagged:

1794 self.add_todo([(self._tagged[sha], None, None, True)])

1795 self.sha_done.add(sha)

1796 if len(self.sha_done) % 1000 == 0:

1797 self.progress(f"counting objects: {len(self.sha_done)}\r".encode("ascii"))

1798 if type_num is None:

1799 pack_hint = None

1800 else:

1801 pack_hint = (type_num, name)

1802 return (sha, pack_hint)

1803

1804 def __iter__(self):

1805 return self

1806

1807

1808class ObjectStoreGraphWalker:

1809 """Graph walker that finds what commits are missing from an object store."""

1810

1811 heads: set[ObjectID]

1812 """Revisions without descendants in the local repo."""

1813

1814 get_parents: Callable[[ObjectID], ObjectID]

1815 """Function to retrieve parents in the local repo."""

1816

1817 shallow: set[ObjectID]

1818

1819 def __init__(

1820 self,

1821 local_heads: Iterable[ObjectID],

1822 get_parents,

1823 shallow: Optional[set[ObjectID]] = None,

1824 update_shallow=None,

1825 ) -> None:

1826 """Create a new instance.

1827

1828 Args:

1829 local_heads: Heads to start search with

1830 get_parents: Function for finding the parents of a SHA1.

1831 """

1832 self.heads = set(local_heads)

1833 self.get_parents = get_parents

1834 self.parents: dict[ObjectID, Optional[list[ObjectID]]] = {}

1835 if shallow is None:

1836 shallow = set()

1837 self.shallow = shallow

1838 self.update_shallow = update_shallow

1839

1840 def nak(self) -> None:

1841 """Nothing in common was found."""

1842

1843 def ack(self, sha: ObjectID) -> None:

1844 """Ack that a revision and its ancestors are present in the source."""

1845 if len(sha) != 40:

1846 raise ValueError(f"unexpected sha {sha!r} received")

1847 ancestors = {sha}

1848

1849 # stop if we run out of heads to remove

1850 while self.heads:

1851 for a in ancestors:

1852 if a in self.heads:

1853 self.heads.remove(a)

1854

1855 # collect all ancestors

1856 new_ancestors = set()

1857 for a in ancestors:

1858 ps = self.parents.get(a)

1859 if ps is not None:

1860 new_ancestors.update(ps)

1861 self.parents[a] = None

1862

1863 # no more ancestors; stop

1864 if not new_ancestors:

1865 break

1866

1867 ancestors = new_ancestors

1868

1869 def next(self):

1870 """Iterate over ancestors of heads in the target."""

1871 if self.heads:

1872 ret = self.heads.pop()

1873 try:

1874 ps = self.get_parents(ret)

1875 except KeyError:

1876 return None

1877 self.parents[ret] = ps

1878 self.heads.update([p for p in ps if p not in self.parents])

1879 return ret

1880 return None

1881

1882 __next__ = next

1883

1884

1885def commit_tree_changes(object_store, tree, changes):

1886 """Commit a specified set of changes to a tree structure.

1887

1888 This will apply a set of changes on top of an existing tree, storing new

1889 objects in object_store.

1890

1891 changes are a list of tuples with (path, mode, object_sha).

1892 Paths can be both blobs and trees. See the mode and

1893 object sha to None deletes the path.

1894

1895 This method works especially well if there are only a small

1896 number of changes to a big tree. For a large number of changes

1897 to a large tree, use e.g. commit_tree.

1898

1899 Args:

1900 object_store: Object store to store new objects in

1901 and retrieve old ones from.

1902 tree: Original tree root

1903 changes: changes to apply

1904 Returns: New tree root object

1905 """

1906 # TODO(jelmer): Save up the objects and add them using .add_objects

1907 # rather than with individual calls to .add_object.

1908 nested_changes = {}

1909 for path, new_mode, new_sha in changes:

1910 try:

1911 (dirname, subpath) = path.split(b"/", 1)

1912 except ValueError:

1913 if new_sha is None:

1914 del tree[path]

1915 else:

1916 tree[path] = (new_mode, new_sha)

1917 else:

1918 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha))

1919 for name, subchanges in nested_changes.items():

1920 try:

1921 orig_subtree = object_store[tree[name][1]]

1922 except KeyError:

1923 orig_subtree = Tree()

1924 subtree = commit_tree_changes(object_store, orig_subtree, subchanges)

1925 if len(subtree) == 0:

1926 del tree[name]

1927 else:

1928 tree[name] = (stat.S_IFDIR, subtree.id)

1929 object_store.add_object(tree)

1930 return tree

1931

1932

1933class OverlayObjectStore(BaseObjectStore):

1934 """Object store that can overlay multiple object stores."""

1935

1936 def __init__(self, bases, add_store=None) -> None:

1937 self.bases = bases

1938 self.add_store = add_store

1939

1940 def add_object(self, object):

1941 if self.add_store is None:

1942 raise NotImplementedError(self.add_object)

1943 return self.add_store.add_object(object)

1944

1945 def add_objects(self, objects, progress=None):

1946 if self.add_store is None:

1947 raise NotImplementedError(self.add_object)

1948 return self.add_store.add_objects(objects, progress)

1949

1950 @property

1951 def packs(self):

1952 ret = []

1953 for b in self.bases:

1954 ret.extend(b.packs)

1955 return ret

1956

1957 def __iter__(self):

1958 done = set()

1959 for b in self.bases:

1960 for o_id in b:

1961 if o_id not in done:

1962 yield o_id

1963 done.add(o_id)

1964

1965 def iterobjects_subset(

1966 self, shas: Iterable[bytes], *, allow_missing: bool = False

1967 ) -> Iterator[ShaFile]:

1968 todo = set(shas)

1969 found: set[bytes] = set()

1970

1971 for b in self.bases:

1972 # Create a copy of todo for each base to avoid modifying

1973 # the set while iterating through it

1974 current_todo = todo - found

1975 for o in b.iterobjects_subset(current_todo, allow_missing=True):

1976 yield o

1977 found.add(o.id)

1978

1979 # Check for any remaining objects not found

1980 missing = todo - found

1981 if missing and not allow_missing:

1982 raise KeyError(next(iter(missing)))

1983

1984 def iter_unpacked_subset(

1985 self,

1986 shas: Iterable[bytes],

1987 *,

1988 include_comp=False,

1989 allow_missing: bool = False,

1990 convert_ofs_delta=True,

1991 ) -> Iterator[ShaFile]:

1992 todo = set(shas)

1993 for b in self.bases:

1994 for o in b.iter_unpacked_subset(

1995 todo,

1996 include_comp=include_comp,

1997 allow_missing=True,

1998 convert_ofs_delta=convert_ofs_delta,

1999 ):

2000 yield o

2001 todo.remove(o.id)

2002 if todo and not allow_missing:

2003 raise KeyError(o.id)

2004

2005 def get_raw(self, sha_id):

2006 for b in self.bases:

2007 try:

2008 return b.get_raw(sha_id)

2009 except KeyError:

2010 pass

2011 raise KeyError(sha_id)

2012

2013 def contains_packed(self, sha) -> bool:

2014 for b in self.bases:

2015 if b.contains_packed(sha):

2016 return True

2017 return False

2018

2019 def contains_loose(self, sha) -> bool:

2020 for b in self.bases:

2021 if b.contains_loose(sha):

2022 return True

2023 return False

2024

2025

2026def read_packs_file(f):

2027 """Yield the packs listed in a packs file."""

2028 for line in f.read().splitlines():

2029 if not line:

2030 continue

2031 (kind, name) = line.split(b" ", 1)

2032 if kind != b"P":

2033 continue

2034 yield os.fsdecode(name)

2035

2036

2037class BucketBasedObjectStore(PackBasedObjectStore):

2038 """Object store implementation that uses a bucket store like S3 as backend."""

2039

2040 def _iter_loose_objects(self):

2041 """Iterate over the SHAs of all loose objects."""

2042 return iter([])

2043

2044 def _get_loose_object(self, sha) -> None:

2045 return None

2046

2047 def delete_loose_object(self, sha) -> None:

2048 # Doesn't exist..

2049 pass

2050

2051 def _remove_pack(self, name) -> None:

2052 raise NotImplementedError(self._remove_pack)

2053

2054 def _iter_pack_names(self) -> Iterator[str]:

2055 raise NotImplementedError(self._iter_pack_names)

2056

2057 def _get_pack(self, name) -> Pack:

2058 raise NotImplementedError(self._get_pack)

2059

2060 def _update_pack_cache(self):

2061 pack_files = set(self._iter_pack_names())

2062

2063 # Open newly appeared pack files

2064 new_packs = []

2065 for f in pack_files:

2066 if f not in self._pack_cache:

2067 pack = self._get_pack(f)

2068 new_packs.append(pack)

2069 self._pack_cache[f] = pack

2070 # Remove disappeared pack files

2071 for f in set(self._pack_cache) - pack_files:

2072 self._pack_cache.pop(f).close()

2073 return new_packs

2074

2075 def _upload_pack(self, basename, pack_file, index_file) -> None:

2076 raise NotImplementedError

2077

2078 def add_pack(self):

2079 """Add a new pack to this object store.

2080

2081 Returns: Fileobject to write to, a commit function to

2082 call when the pack is finished and an abort

2083 function.

2084 """

2085 import tempfile

2086

2087 pf = tempfile.SpooledTemporaryFile(

2088 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"

2089 )

2090

2091 def commit():

2092 if pf.tell() == 0:

2093 pf.close()

2094 return None

2095

2096 pf.seek(0)

2097 p = PackData(pf.name, pf)

2098 entries = p.sorted_entries()

2099 basename = iter_sha1(entry[0] for entry in entries).decode("ascii")

2100 idxf = tempfile.SpooledTemporaryFile(

2101 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-"

2102 )

2103 checksum = p.get_stored_checksum()

2104 write_pack_index(idxf, entries, checksum, version=self.pack_index_version)

2105 idxf.seek(0)

2106 idx = load_pack_index_file(basename + ".idx", idxf)

2107 for pack in self.packs:

2108 if pack.get_stored_checksum() == p.get_stored_checksum():

2109 p.close()

2110 idx.close()

2111 pf.close()

2112 idxf.close()

2113 return pack

2114 pf.seek(0)

2115 idxf.seek(0)

2116 self._upload_pack(basename, pf, idxf)

2117 final_pack = Pack.from_objects(p, idx)

2118 self._add_cached_pack(basename, final_pack)

2119 pf.close()

2120 idxf.close()

2121 return final_pack

2122

2123 return pf, commit, pf.close

2124

2125

2126def _collect_ancestors(

2127 store: ObjectContainer,

2128 heads,

2129 common: frozenset[ObjectID] = frozenset(),

2130 shallow: frozenset[ObjectID] = frozenset(),

2131 get_parents=lambda commit: commit.parents,

2132):

2133 """Collect all ancestors of heads up to (excluding) those in common.

2134

2135 Args:

2136 heads: commits to start from

2137 common: commits to end at, or empty set to walk repository

2138 completely

2139 get_parents: Optional function for getting the parents of a

2140 commit.

2141 Returns: a tuple (A, B) where A - all commits reachable

2142 from heads but not present in common, B - common (shared) elements

2143 that are directly reachable from heads

2144 """

2145 bases = set()

2146 commits = set()

2147 queue = []

2148 queue.extend(heads)

2149 while queue:

2150 e = queue.pop(0)

2151 if e in common:

2152 bases.add(e)

2153 elif e not in commits:

2154 commits.add(e)

2155 if e in shallow:

2156 continue

2157 cmt = store[e]

2158 queue.extend(get_parents(cmt))

2159 return (commits, bases)

2160

2161

2162def iter_tree_contents(

2163 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False

2164):

2165 """Iterate the contents of a tree and all subtrees.

2166

2167 Iteration is depth-first pre-order, as in e.g. os.walk.

2168

2169 Args:

2170 tree_id: SHA1 of the tree.

2171 include_trees: If True, include tree objects in the iteration.

2172 Returns: Iterator over TreeEntry namedtuples for all the objects in a

2173 tree.

2174 """

2175 if tree_id is None:

2176 return

2177 # This could be fairly easily generalized to >2 trees if we find a use

2178 # case.

2179 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)]

2180 while todo:

2181 entry = todo.pop()

2182 if stat.S_ISDIR(entry.mode):

2183 extra = []

2184 tree = store[entry.sha]

2185 assert isinstance(tree, Tree)

2186 for subentry in tree.iteritems(name_order=True):

2187 extra.append(subentry.in_path(entry.path))

2188 todo.extend(reversed(extra))

2189 if not stat.S_ISDIR(entry.mode) or include_trees:

2190 yield entry

2191

2192

2193def peel_sha(store: ObjectContainer, sha: bytes) -> tuple[ShaFile, ShaFile]:

2194 """Peel all tags from a SHA.

2195

2196 Args:

2197 sha: The object SHA to peel.

2198 Returns: The fully-peeled SHA1 of a tag object, after peeling all

2199 intermediate tags; if the original ref does not point to a tag,

2200 this will equal the original SHA1.

2201 """

2202 unpeeled = obj = store[sha]

2203 obj_class = object_class(obj.type_name)

2204 while obj_class is Tag:

2205 assert isinstance(obj, Tag)

2206 obj_class, sha = obj.object

2207 obj = store[sha]

2208 return unpeeled, obj

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/object_store.py: 21%

1082 statements