Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/dulwich/object_store.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

867 statements  

1# object_store.py -- Object store for git objects 

2# Copyright (C) 2008-2013 Jelmer Vernooij <jelmer@jelmer.uk> 

3# and others 

4# 

5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

6# General Public License as public by the Free Software Foundation; version 2.0 

7# or (at your option) any later version. You can redistribute it and/or 

8# modify it under the terms of either of these two licenses. 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, 

12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

13# See the License for the specific language governing permissions and 

14# limitations under the License. 

15# 

16# You should have received a copy of the licenses; if not, see 

17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

19# License, Version 2.0. 

20# 

21 

22 

23"""Git object store interfaces and implementation.""" 

24 

25import os 

26import stat 

27import sys 

28import warnings 

29from contextlib import suppress 

30from io import BytesIO 

31from typing import ( 

32 Callable, 

33 Dict, 

34 FrozenSet, 

35 Iterable, 

36 Iterator, 

37 List, 

38 Optional, 

39 Protocol, 

40 Sequence, 

41 Set, 

42 Tuple, 

43 cast, 

44) 

45 

46from .errors import NotTreeError 

47from .file import GitFile 

48from .objects import ( 

49 S_ISGITLINK, 

50 ZERO_SHA, 

51 Blob, 

52 Commit, 

53 ObjectID, 

54 ShaFile, 

55 Tag, 

56 Tree, 

57 TreeEntry, 

58 hex_to_filename, 

59 hex_to_sha, 

60 object_class, 

61 sha_to_hex, 

62 valid_hexsha, 

63) 

64from .pack import ( 

65 PACK_SPOOL_FILE_MAX_SIZE, 

66 ObjectContainer, 

67 Pack, 

68 PackData, 

69 PackedObjectContainer, 

70 PackFileDisappeared, 

71 PackHint, 

72 PackIndexer, 

73 PackInflater, 

74 PackStreamCopier, 

75 UnpackedObject, 

76 extend_pack, 

77 full_unpacked_object, 

78 generate_unpacked_objects, 

79 iter_sha1, 

80 load_pack_index_file, 

81 pack_objects_to_data, 

82 write_pack_data, 

83 write_pack_index, 

84) 

85from .protocol import DEPTH_INFINITE 

86from .refs import PEELED_TAG_SUFFIX, Ref 

87 

88INFODIR = "info" 

89PACKDIR = "pack" 

90 

91# use permissions consistent with Git; just readable by everyone 

92# TODO: should packs also be non-writable on Windows? if so, that 

93# would requite some rather significant adjustments to the test suite 

94PACK_MODE = 0o444 if sys.platform != "win32" else 0o644 

95 

96 

97class PackContainer(Protocol): 

98 def add_pack(self) -> Tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

99 """Add a new pack.""" 

100 

101 

102class BaseObjectStore: 

103 """Object store interface.""" 

104 

105 def determine_wants_all( 

106 self, refs: Dict[Ref, ObjectID], depth: Optional[int] = None 

107 ) -> List[ObjectID]: 

108 def _want_deepen(sha): 

109 if not depth: 

110 return False 

111 if depth == DEPTH_INFINITE: 

112 return True 

113 return depth > self._get_depth(sha) 

114 

115 return [ 

116 sha 

117 for (ref, sha) in refs.items() 

118 if (sha not in self or _want_deepen(sha)) 

119 and not ref.endswith(PEELED_TAG_SUFFIX) 

120 and not sha == ZERO_SHA 

121 ] 

122 

123 def contains_loose(self, sha): 

124 """Check if a particular object is present by SHA1 and is loose.""" 

125 raise NotImplementedError(self.contains_loose) 

126 

127 def __contains__(self, sha1: bytes) -> bool: 

128 """Check if a particular object is present by SHA1. 

129 

130 This method makes no distinction between loose and packed objects. 

131 """ 

132 return self.contains_loose(sha1) 

133 

134 @property 

135 def packs(self): 

136 """Iterable of pack objects.""" 

137 raise NotImplementedError 

138 

139 def get_raw(self, name): 

140 """Obtain the raw text for an object. 

141 

142 Args: 

143 name: sha for the object. 

144 Returns: tuple with numeric type and object contents. 

145 """ 

146 raise NotImplementedError(self.get_raw) 

147 

148 def __getitem__(self, sha1: ObjectID) -> ShaFile: 

149 """Obtain an object by SHA1.""" 

150 type_num, uncomp = self.get_raw(sha1) 

151 return ShaFile.from_raw_string(type_num, uncomp, sha=sha1) 

152 

153 def __iter__(self): 

154 """Iterate over the SHAs that are present in this store.""" 

155 raise NotImplementedError(self.__iter__) 

156 

157 def add_object(self, obj): 

158 """Add a single object to this object store.""" 

159 raise NotImplementedError(self.add_object) 

160 

161 def add_objects(self, objects, progress=None): 

162 """Add a set of objects to this object store. 

163 

164 Args: 

165 objects: Iterable over a list of (object, path) tuples 

166 """ 

167 raise NotImplementedError(self.add_objects) 

168 

169 def tree_changes( 

170 self, 

171 source, 

172 target, 

173 want_unchanged=False, 

174 include_trees=False, 

175 change_type_same=False, 

176 rename_detector=None, 

177 ): 

178 """Find the differences between the contents of two trees. 

179 

180 Args: 

181 source: SHA1 of the source tree 

182 target: SHA1 of the target tree 

183 want_unchanged: Whether unchanged files should be reported 

184 include_trees: Whether to include trees 

185 change_type_same: Whether to report files changing 

186 type in the same entry. 

187 Returns: Iterator over tuples with 

188 (oldpath, newpath), (oldmode, newmode), (oldsha, newsha) 

189 """ 

190 from .diff_tree import tree_changes 

191 

192 for change in tree_changes( 

193 self, 

194 source, 

195 target, 

196 want_unchanged=want_unchanged, 

197 include_trees=include_trees, 

198 change_type_same=change_type_same, 

199 rename_detector=rename_detector, 

200 ): 

201 yield ( 

202 (change.old.path, change.new.path), 

203 (change.old.mode, change.new.mode), 

204 (change.old.sha, change.new.sha), 

205 ) 

206 

207 def iter_tree_contents(self, tree_id, include_trees=False): 

208 """Iterate the contents of a tree and all subtrees. 

209 

210 Iteration is depth-first pre-order, as in e.g. os.walk. 

211 

212 Args: 

213 tree_id: SHA1 of the tree. 

214 include_trees: If True, include tree objects in the iteration. 

215 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

216 tree. 

217 """ 

218 warnings.warn( 

219 "Please use dulwich.object_store.iter_tree_contents", 

220 DeprecationWarning, 

221 stacklevel=2, 

222 ) 

223 return iter_tree_contents(self, tree_id, include_trees=include_trees) 

224 

225 def iterobjects_subset( 

226 self, shas: Iterable[bytes], *, allow_missing: bool = False 

227 ) -> Iterator[ShaFile]: 

228 for sha in shas: 

229 try: 

230 yield self[sha] 

231 except KeyError: 

232 if not allow_missing: 

233 raise 

234 

235 def find_missing_objects( 

236 self, 

237 haves, 

238 wants, 

239 shallow=None, 

240 progress=None, 

241 get_tagged=None, 

242 get_parents=lambda commit: commit.parents, 

243 ): 

244 """Find the missing objects required for a set of revisions. 

245 

246 Args: 

247 haves: Iterable over SHAs already in common. 

248 wants: Iterable over SHAs of objects to fetch. 

249 shallow: Set of shallow commit SHA1s to skip 

250 progress: Simple progress function that will be called with 

251 updated progress strings. 

252 get_tagged: Function that returns a dict of pointed-to sha -> 

253 tag sha for including tags. 

254 get_parents: Optional function for getting the parents of a 

255 commit. 

256 Returns: Iterator over (sha, path) pairs. 

257 """ 

258 warnings.warn("Please use MissingObjectFinder(store)", DeprecationWarning) 

259 finder = MissingObjectFinder( 

260 self, 

261 haves=haves, 

262 wants=wants, 

263 shallow=shallow, 

264 progress=progress, 

265 get_tagged=get_tagged, 

266 get_parents=get_parents, 

267 ) 

268 return iter(finder) 

269 

270 def find_common_revisions(self, graphwalker): 

271 """Find which revisions this store has in common using graphwalker. 

272 

273 Args: 

274 graphwalker: A graphwalker object. 

275 Returns: List of SHAs that are in common 

276 """ 

277 haves = [] 

278 sha = next(graphwalker) 

279 while sha: 

280 if sha in self: 

281 haves.append(sha) 

282 graphwalker.ack(sha) 

283 sha = next(graphwalker) 

284 return haves 

285 

286 def generate_pack_data( 

287 self, have, want, shallow=None, progress=None, ofs_delta=True 

288 ) -> Tuple[int, Iterator[UnpackedObject]]: 

289 """Generate pack data objects for a set of wants/haves. 

290 

291 Args: 

292 have: List of SHA1s of objects that should not be sent 

293 want: List of SHA1s of objects that should be sent 

294 shallow: Set of shallow commit SHA1s to skip 

295 ofs_delta: Whether OFS deltas can be included 

296 progress: Optional progress reporting method 

297 """ 

298 # Note that the pack-specific implementation below is more efficient, 

299 # as it reuses deltas 

300 missing_objects = MissingObjectFinder( 

301 self, haves=have, wants=want, shallow=shallow, progress=progress 

302 ) 

303 object_ids = list(missing_objects) 

304 return pack_objects_to_data( 

305 [(self[oid], path) for oid, path in object_ids], 

306 ofs_delta=ofs_delta, 

307 progress=progress, 

308 ) 

309 

310 def peel_sha(self, sha): 

311 """Peel all tags from a SHA. 

312 

313 Args: 

314 sha: The object SHA to peel. 

315 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

316 intermediate tags; if the original ref does not point to a tag, 

317 this will equal the original SHA1. 

318 """ 

319 warnings.warn( 

320 "Please use dulwich.object_store.peel_sha()", 

321 DeprecationWarning, 

322 stacklevel=2, 

323 ) 

324 return peel_sha(self, sha)[1] 

325 

326 def _get_depth( 

327 self, 

328 head, 

329 get_parents=lambda commit: commit.parents, 

330 max_depth=None, 

331 ): 

332 """Return the current available depth for the given head. 

333 For commits with multiple parents, the largest possible depth will be 

334 returned. 

335 

336 Args: 

337 head: commit to start from 

338 get_parents: optional function for getting the parents of a commit 

339 max_depth: maximum depth to search 

340 """ 

341 if head not in self: 

342 return 0 

343 current_depth = 1 

344 queue = [(head, current_depth)] 

345 while queue and (max_depth is None or current_depth < max_depth): 

346 e, depth = queue.pop(0) 

347 current_depth = max(current_depth, depth) 

348 cmt = self[e] 

349 if isinstance(cmt, Tag): 

350 _cls, sha = cmt.object 

351 cmt = self[sha] 

352 queue.extend( 

353 (parent, depth + 1) for parent in get_parents(cmt) if parent in self 

354 ) 

355 return current_depth 

356 

357 def close(self): 

358 """Close any files opened by this object store.""" 

359 # Default implementation is a NO-OP 

360 

361 

362class PackBasedObjectStore(BaseObjectStore): 

363 def __init__(self, pack_compression_level=-1) -> None: 

364 self._pack_cache: Dict[str, Pack] = {} 

365 self.pack_compression_level = pack_compression_level 

366 

367 def add_pack(self) -> Tuple[BytesIO, Callable[[], None], Callable[[], None]]: 

368 """Add a new pack to this object store.""" 

369 raise NotImplementedError(self.add_pack) 

370 

371 def add_pack_data( 

372 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

373 ) -> None: 

374 """Add pack data to this object store. 

375 

376 Args: 

377 count: Number of items to add 

378 pack_data: Iterator over pack data tuples 

379 """ 

380 if count == 0: 

381 # Don't bother writing an empty pack file 

382 return 

383 f, commit, abort = self.add_pack() 

384 try: 

385 write_pack_data( 

386 f.write, 

387 unpacked_objects, 

388 num_records=count, 

389 progress=progress, 

390 compression_level=self.pack_compression_level, 

391 ) 

392 except BaseException: 

393 abort() 

394 raise 

395 else: 

396 return commit() 

397 

398 @property 

399 def alternates(self): 

400 return [] 

401 

402 def contains_packed(self, sha): 

403 """Check if a particular object is present by SHA1 and is packed. 

404 

405 This does not check alternates. 

406 """ 

407 for pack in self.packs: 

408 try: 

409 if sha in pack: 

410 return True 

411 except PackFileDisappeared: 

412 pass 

413 return False 

414 

415 def __contains__(self, sha) -> bool: 

416 """Check if a particular object is present by SHA1. 

417 

418 This method makes no distinction between loose and packed objects. 

419 """ 

420 if self.contains_packed(sha) or self.contains_loose(sha): 

421 return True 

422 for alternate in self.alternates: 

423 if sha in alternate: 

424 return True 

425 return False 

426 

427 def _add_cached_pack(self, base_name, pack): 

428 """Add a newly appeared pack to the cache by path.""" 

429 prev_pack = self._pack_cache.get(base_name) 

430 if prev_pack is not pack: 

431 self._pack_cache[base_name] = pack 

432 if prev_pack: 

433 prev_pack.close() 

434 

435 def generate_pack_data( 

436 self, have, want, shallow=None, progress=None, ofs_delta=True 

437 ) -> Tuple[int, Iterator[UnpackedObject]]: 

438 """Generate pack data objects for a set of wants/haves. 

439 

440 Args: 

441 have: List of SHA1s of objects that should not be sent 

442 want: List of SHA1s of objects that should be sent 

443 shallow: Set of shallow commit SHA1s to skip 

444 ofs_delta: Whether OFS deltas can be included 

445 progress: Optional progress reporting method 

446 """ 

447 missing_objects = MissingObjectFinder( 

448 self, haves=have, wants=want, shallow=shallow, progress=progress 

449 ) 

450 remote_has = missing_objects.get_remote_has() 

451 object_ids = list(missing_objects) 

452 return len(object_ids), generate_unpacked_objects( 

453 cast(PackedObjectContainer, self), 

454 object_ids, 

455 progress=progress, 

456 ofs_delta=ofs_delta, 

457 other_haves=remote_has, 

458 ) 

459 

460 def _clear_cached_packs(self): 

461 pack_cache = self._pack_cache 

462 self._pack_cache = {} 

463 while pack_cache: 

464 (name, pack) = pack_cache.popitem() 

465 pack.close() 

466 

467 def _iter_cached_packs(self): 

468 return self._pack_cache.values() 

469 

470 def _update_pack_cache(self): 

471 raise NotImplementedError(self._update_pack_cache) 

472 

473 def close(self): 

474 self._clear_cached_packs() 

475 

476 @property 

477 def packs(self): 

478 """List with pack objects.""" 

479 return list(self._iter_cached_packs()) + list(self._update_pack_cache()) 

480 

481 def _iter_alternate_objects(self): 

482 """Iterate over the SHAs of all the objects in alternate stores.""" 

483 for alternate in self.alternates: 

484 yield from alternate 

485 

486 def _iter_loose_objects(self): 

487 """Iterate over the SHAs of all loose objects.""" 

488 raise NotImplementedError(self._iter_loose_objects) 

489 

490 def _get_loose_object(self, sha): 

491 raise NotImplementedError(self._get_loose_object) 

492 

493 def _remove_loose_object(self, sha): 

494 raise NotImplementedError(self._remove_loose_object) 

495 

496 def _remove_pack(self, name): 

497 raise NotImplementedError(self._remove_pack) 

498 

499 def pack_loose_objects(self): 

500 """Pack loose objects. 

501 

502 Returns: Number of objects packed 

503 """ 

504 objects = set() 

505 for sha in self._iter_loose_objects(): 

506 objects.add((self._get_loose_object(sha), None)) 

507 self.add_objects(list(objects)) 

508 for obj, path in objects: 

509 self._remove_loose_object(obj.id) 

510 return len(objects) 

511 

512 def repack(self): 

513 """Repack the packs in this repository. 

514 

515 Note that this implementation is fairly naive and currently keeps all 

516 objects in memory while it repacks. 

517 """ 

518 loose_objects = set() 

519 for sha in self._iter_loose_objects(): 

520 loose_objects.add(self._get_loose_object(sha)) 

521 objects = {(obj, None) for obj in loose_objects} 

522 old_packs = {p.name(): p for p in self.packs} 

523 for name, pack in old_packs.items(): 

524 objects.update((obj, None) for obj in pack.iterobjects()) 

525 

526 # The name of the consolidated pack might match the name of a 

527 # pre-existing pack. Take care not to remove the newly created 

528 # consolidated pack. 

529 

530 consolidated = self.add_objects(objects) 

531 old_packs.pop(consolidated.name(), None) 

532 

533 for obj in loose_objects: 

534 self._remove_loose_object(obj.id) 

535 for name, pack in old_packs.items(): 

536 self._remove_pack(pack) 

537 self._update_pack_cache() 

538 return len(objects) 

539 

540 def __iter__(self): 

541 """Iterate over the SHAs that are present in this store.""" 

542 self._update_pack_cache() 

543 for pack in self._iter_cached_packs(): 

544 try: 

545 yield from pack 

546 except PackFileDisappeared: 

547 pass 

548 yield from self._iter_loose_objects() 

549 yield from self._iter_alternate_objects() 

550 

551 def contains_loose(self, sha): 

552 """Check if a particular object is present by SHA1 and is loose. 

553 

554 This does not check alternates. 

555 """ 

556 return self._get_loose_object(sha) is not None 

557 

558 def get_raw(self, name): 

559 """Obtain the raw fulltext for an object. 

560 

561 Args: 

562 name: sha for the object. 

563 Returns: tuple with numeric type and object contents. 

564 """ 

565 if name == ZERO_SHA: 

566 raise KeyError(name) 

567 if len(name) == 40: 

568 sha = hex_to_sha(name) 

569 hexsha = name 

570 elif len(name) == 20: 

571 sha = name 

572 hexsha = None 

573 else: 

574 raise AssertionError(f"Invalid object name {name!r}") 

575 for pack in self._iter_cached_packs(): 

576 try: 

577 return pack.get_raw(sha) 

578 except (KeyError, PackFileDisappeared): 

579 pass 

580 if hexsha is None: 

581 hexsha = sha_to_hex(name) 

582 ret = self._get_loose_object(hexsha) 

583 if ret is not None: 

584 return ret.type_num, ret.as_raw_string() 

585 # Maybe something else has added a pack with the object 

586 # in the mean time? 

587 for pack in self._update_pack_cache(): 

588 try: 

589 return pack.get_raw(sha) 

590 except KeyError: 

591 pass 

592 for alternate in self.alternates: 

593 try: 

594 return alternate.get_raw(hexsha) 

595 except KeyError: 

596 pass 

597 raise KeyError(hexsha) 

598 

599 def iter_unpacked_subset( 

600 self, 

601 shas, 

602 *, 

603 include_comp=False, 

604 allow_missing: bool = False, 

605 convert_ofs_delta: bool = True, 

606 ) -> Iterator[ShaFile]: 

607 todo: Set[bytes] = set(shas) 

608 for p in self._iter_cached_packs(): 

609 for unpacked in p.iter_unpacked_subset( 

610 todo, 

611 include_comp=include_comp, 

612 allow_missing=True, 

613 convert_ofs_delta=convert_ofs_delta, 

614 ): 

615 yield unpacked 

616 hexsha = sha_to_hex(unpacked.sha()) 

617 todo.remove(hexsha) 

618 # Maybe something else has added a pack with the object 

619 # in the mean time? 

620 for p in self._update_pack_cache(): 

621 for unpacked in p.iter_unpacked_subset( 

622 todo, 

623 include_comp=include_comp, 

624 allow_missing=True, 

625 convert_ofs_delta=convert_ofs_delta, 

626 ): 

627 yield unpacked 

628 hexsha = sha_to_hex(unpacked.sha()) 

629 todo.remove(hexsha) 

630 for alternate in self.alternates: 

631 for unpacked in alternate.iter_unpacked_subset( 

632 todo, 

633 include_comp=include_comp, 

634 allow_missing=True, 

635 convert_ofs_delta=convert_ofs_delta, 

636 ): 

637 yield unpacked 

638 hexsha = sha_to_hex(unpacked.sha()) 

639 todo.remove(hexsha) 

640 

641 def iterobjects_subset( 

642 self, shas: Iterable[bytes], *, allow_missing: bool = False 

643 ) -> Iterator[ShaFile]: 

644 todo: Set[bytes] = set(shas) 

645 for p in self._iter_cached_packs(): 

646 for o in p.iterobjects_subset(todo, allow_missing=True): 

647 yield o 

648 todo.remove(o.id) 

649 # Maybe something else has added a pack with the object 

650 # in the mean time? 

651 for p in self._update_pack_cache(): 

652 for o in p.iterobjects_subset(todo, allow_missing=True): 

653 yield o 

654 todo.remove(o.id) 

655 for alternate in self.alternates: 

656 for o in alternate.iterobjects_subset(todo, allow_missing=True): 

657 yield o 

658 todo.remove(o.id) 

659 for oid in todo: 

660 o = self._get_loose_object(oid) 

661 if o is not None: 

662 yield o 

663 elif not allow_missing: 

664 raise KeyError(oid) 

665 

666 def get_unpacked_object( 

667 self, sha1: bytes, *, include_comp: bool = False 

668 ) -> UnpackedObject: 

669 """Obtain the unpacked object. 

670 

671 Args: 

672 sha1: sha for the object. 

673 """ 

674 if sha1 == ZERO_SHA: 

675 raise KeyError(sha1) 

676 if len(sha1) == 40: 

677 sha = hex_to_sha(sha1) 

678 hexsha = sha1 

679 elif len(sha1) == 20: 

680 sha = sha1 

681 hexsha = None 

682 else: 

683 raise AssertionError(f"Invalid object sha1 {sha1!r}") 

684 for pack in self._iter_cached_packs(): 

685 try: 

686 return pack.get_unpacked_object(sha, include_comp=include_comp) 

687 except (KeyError, PackFileDisappeared): 

688 pass 

689 if hexsha is None: 

690 hexsha = sha_to_hex(sha1) 

691 # Maybe something else has added a pack with the object 

692 # in the mean time? 

693 for pack in self._update_pack_cache(): 

694 try: 

695 return pack.get_unpacked_object(sha, include_comp=include_comp) 

696 except KeyError: 

697 pass 

698 for alternate in self.alternates: 

699 try: 

700 return alternate.get_unpacked_object(hexsha, include_comp=include_comp) 

701 except KeyError: 

702 pass 

703 raise KeyError(hexsha) 

704 

705 def add_objects( 

706 self, 

707 objects: Sequence[Tuple[ShaFile, Optional[str]]], 

708 progress: Optional[Callable[[str], None]] = None, 

709 ) -> None: 

710 """Add a set of objects to this object store. 

711 

712 Args: 

713 objects: Iterable over (object, path) tuples, should support 

714 __len__. 

715 Returns: Pack object of the objects written. 

716 """ 

717 count = len(objects) 

718 record_iter = (full_unpacked_object(o) for (o, p) in objects) 

719 return self.add_pack_data(count, record_iter, progress=progress) 

720 

721 

722class DiskObjectStore(PackBasedObjectStore): 

723 """Git-style object store that exists on disk.""" 

724 

725 def __init__( 

726 self, path, loose_compression_level=-1, pack_compression_level=-1 

727 ) -> None: 

728 """Open an object store. 

729 

730 Args: 

731 path: Path of the object store. 

732 loose_compression_level: zlib compression level for loose objects 

733 pack_compression_level: zlib compression level for pack objects 

734 """ 

735 super().__init__(pack_compression_level=pack_compression_level) 

736 self.path = path 

737 self.pack_dir = os.path.join(self.path, PACKDIR) 

738 self._alternates = None 

739 self.loose_compression_level = loose_compression_level 

740 self.pack_compression_level = pack_compression_level 

741 

742 def __repr__(self) -> str: 

743 return f"<{self.__class__.__name__}({self.path!r})>" 

744 

745 @classmethod 

746 def from_config(cls, path, config): 

747 try: 

748 default_compression_level = int( 

749 config.get((b"core",), b"compression").decode() 

750 ) 

751 except KeyError: 

752 default_compression_level = -1 

753 try: 

754 loose_compression_level = int( 

755 config.get((b"core",), b"looseCompression").decode() 

756 ) 

757 except KeyError: 

758 loose_compression_level = default_compression_level 

759 try: 

760 pack_compression_level = int( 

761 config.get((b"core",), "packCompression").decode() 

762 ) 

763 except KeyError: 

764 pack_compression_level = default_compression_level 

765 return cls(path, loose_compression_level, pack_compression_level) 

766 

767 @property 

768 def alternates(self): 

769 if self._alternates is not None: 

770 return self._alternates 

771 self._alternates = [] 

772 for path in self._read_alternate_paths(): 

773 self._alternates.append(DiskObjectStore(path)) 

774 return self._alternates 

775 

776 def _read_alternate_paths(self): 

777 try: 

778 f = GitFile(os.path.join(self.path, INFODIR, "alternates"), "rb") 

779 except FileNotFoundError: 

780 return 

781 with f: 

782 for line in f.readlines(): 

783 line = line.rstrip(b"\n") 

784 if line.startswith(b"#"): 

785 continue 

786 if os.path.isabs(line): 

787 yield os.fsdecode(line) 

788 else: 

789 yield os.fsdecode(os.path.join(os.fsencode(self.path), line)) 

790 

791 def add_alternate_path(self, path): 

792 """Add an alternate path to this object store.""" 

793 try: 

794 os.mkdir(os.path.join(self.path, INFODIR)) 

795 except FileExistsError: 

796 pass 

797 alternates_path = os.path.join(self.path, INFODIR, "alternates") 

798 with GitFile(alternates_path, "wb") as f: 

799 try: 

800 orig_f = open(alternates_path, "rb") 

801 except FileNotFoundError: 

802 pass 

803 else: 

804 with orig_f: 

805 f.write(orig_f.read()) 

806 f.write(os.fsencode(path) + b"\n") 

807 

808 if not os.path.isabs(path): 

809 path = os.path.join(self.path, path) 

810 self.alternates.append(DiskObjectStore(path)) 

811 

812 def _update_pack_cache(self): 

813 """Read and iterate over new pack files and cache them.""" 

814 try: 

815 pack_dir_contents = os.listdir(self.pack_dir) 

816 except FileNotFoundError: 

817 self.close() 

818 return [] 

819 pack_files = set() 

820 for name in pack_dir_contents: 

821 if name.startswith("pack-") and name.endswith(".pack"): 

822 # verify that idx exists first (otherwise the pack was not yet 

823 # fully written) 

824 idx_name = os.path.splitext(name)[0] + ".idx" 

825 if idx_name in pack_dir_contents: 

826 pack_name = name[: -len(".pack")] 

827 pack_files.add(pack_name) 

828 

829 # Open newly appeared pack files 

830 new_packs = [] 

831 for f in pack_files: 

832 if f not in self._pack_cache: 

833 pack = Pack(os.path.join(self.pack_dir, f)) 

834 new_packs.append(pack) 

835 self._pack_cache[f] = pack 

836 # Remove disappeared pack files 

837 for f in set(self._pack_cache) - pack_files: 

838 self._pack_cache.pop(f).close() 

839 return new_packs 

840 

841 def _get_shafile_path(self, sha): 

842 # Check from object dir 

843 return hex_to_filename(self.path, sha) 

844 

845 def _iter_loose_objects(self): 

846 for base in os.listdir(self.path): 

847 if len(base) != 2: 

848 continue 

849 for rest in os.listdir(os.path.join(self.path, base)): 

850 sha = os.fsencode(base + rest) 

851 if not valid_hexsha(sha): 

852 continue 

853 yield sha 

854 

855 def _get_loose_object(self, sha): 

856 path = self._get_shafile_path(sha) 

857 try: 

858 return ShaFile.from_path(path) 

859 except FileNotFoundError: 

860 return None 

861 

862 def _remove_loose_object(self, sha): 

863 os.remove(self._get_shafile_path(sha)) 

864 

865 def _remove_pack(self, pack): 

866 try: 

867 del self._pack_cache[os.path.basename(pack._basename)] 

868 except KeyError: 

869 pass 

870 pack.close() 

871 os.remove(pack.data.path) 

872 os.remove(pack.index.path) 

873 

874 def _get_pack_basepath(self, entries): 

875 suffix = iter_sha1(entry[0] for entry in entries) 

876 # TODO: Handle self.pack_dir being bytes 

877 suffix = suffix.decode("ascii") 

878 return os.path.join(self.pack_dir, "pack-" + suffix) 

879 

880 def _complete_pack(self, f, path, num_objects, indexer, progress=None): 

881 """Move a specific file containing a pack into the pack directory. 

882 

883 Note: The file should be on the same file system as the 

884 packs directory. 

885 

886 Args: 

887 f: Open file object for the pack. 

888 path: Path to the pack file. 

889 indexer: A PackIndexer for indexing the pack. 

890 """ 

891 entries = [] 

892 for i, entry in enumerate(indexer): 

893 if progress is not None: 

894 progress( 

895 ("generating index: %d/%d\r" % (i, num_objects)).encode("ascii") 

896 ) 

897 entries.append(entry) 

898 

899 pack_sha, extra_entries = extend_pack( 

900 f, 

901 indexer.ext_refs(), 

902 get_raw=self.get_raw, 

903 compression_level=self.pack_compression_level, 

904 progress=progress, 

905 ) 

906 f.flush() 

907 try: 

908 fileno = f.fileno() 

909 except AttributeError: 

910 pass 

911 else: 

912 os.fsync(fileno) 

913 f.close() 

914 

915 entries.extend(extra_entries) 

916 

917 # Move the pack in. 

918 entries.sort() 

919 pack_base_name = self._get_pack_basepath(entries) 

920 

921 for pack in self.packs: 

922 if pack._basename == pack_base_name: 

923 return pack 

924 

925 target_pack_path = pack_base_name + ".pack" 

926 target_index_path = pack_base_name + ".idx" 

927 if sys.platform == "win32": 

928 # Windows might have the target pack file lingering. Attempt 

929 # removal, silently passing if the target does not exist. 

930 with suppress(FileNotFoundError): 

931 os.remove(target_pack_path) 

932 os.rename(path, target_pack_path) 

933 

934 # Write the index. 

935 with GitFile(target_index_path, "wb", mask=PACK_MODE) as index_file: 

936 write_pack_index(index_file, entries, pack_sha) 

937 

938 # Add the pack to the store and return it. 

939 final_pack = Pack(pack_base_name) 

940 final_pack.check_length_and_checksum() 

941 self._add_cached_pack(pack_base_name, final_pack) 

942 return final_pack 

943 

944 def add_thin_pack(self, read_all, read_some, progress=None): 

945 """Add a new thin pack to this object store. 

946 

947 Thin packs are packs that contain deltas with parents that exist 

948 outside the pack. They should never be placed in the object store 

949 directly, and always indexed and completed as they are copied. 

950 

951 Args: 

952 read_all: Read function that blocks until the number of 

953 requested bytes are read. 

954 read_some: Read function that returns at least one byte, but may 

955 not return the number of bytes requested. 

956 Returns: A Pack object pointing at the now-completed thin pack in the 

957 objects/pack directory. 

958 """ 

959 import tempfile 

960 

961 fd, path = tempfile.mkstemp(dir=self.path, prefix="tmp_pack_") 

962 with os.fdopen(fd, "w+b") as f: 

963 os.chmod(path, PACK_MODE) 

964 indexer = PackIndexer(f, resolve_ext_ref=self.get_raw) 

965 copier = PackStreamCopier(read_all, read_some, f, delta_iter=indexer) 

966 copier.verify(progress=progress) 

967 return self._complete_pack(f, path, len(copier), indexer, progress=progress) 

968 

969 def add_pack(self): 

970 """Add a new pack to this object store. 

971 

972 Returns: Fileobject to write to, a commit function to 

973 call when the pack is finished and an abort 

974 function. 

975 """ 

976 import tempfile 

977 

978 fd, path = tempfile.mkstemp(dir=self.pack_dir, suffix=".pack") 

979 f = os.fdopen(fd, "w+b") 

980 os.chmod(path, PACK_MODE) 

981 

982 def commit(): 

983 if f.tell() > 0: 

984 f.seek(0) 

985 with PackData(path, f) as pd: 

986 indexer = PackIndexer.for_pack_data( 

987 pd, resolve_ext_ref=self.get_raw 

988 ) 

989 return self._complete_pack(f, path, len(pd), indexer) 

990 else: 

991 f.close() 

992 os.remove(path) 

993 return None 

994 

995 def abort(): 

996 f.close() 

997 os.remove(path) 

998 

999 return f, commit, abort 

1000 

1001 def add_object(self, obj): 

1002 """Add a single object to this object store. 

1003 

1004 Args: 

1005 obj: Object to add 

1006 """ 

1007 path = self._get_shafile_path(obj.id) 

1008 dir = os.path.dirname(path) 

1009 try: 

1010 os.mkdir(dir) 

1011 except FileExistsError: 

1012 pass 

1013 if os.path.exists(path): 

1014 return # Already there, no need to write again 

1015 with GitFile(path, "wb", mask=PACK_MODE) as f: 

1016 f.write( 

1017 obj.as_legacy_object(compression_level=self.loose_compression_level) 

1018 ) 

1019 

1020 @classmethod 

1021 def init(cls, path): 

1022 try: 

1023 os.mkdir(path) 

1024 except FileExistsError: 

1025 pass 

1026 os.mkdir(os.path.join(path, "info")) 

1027 os.mkdir(os.path.join(path, PACKDIR)) 

1028 return cls(path) 

1029 

1030 

1031class MemoryObjectStore(BaseObjectStore): 

1032 """Object store that keeps all objects in memory.""" 

1033 

1034 def __init__(self) -> None: 

1035 super().__init__() 

1036 self._data: Dict[str, ShaFile] = {} 

1037 self.pack_compression_level = -1 

1038 

1039 def _to_hexsha(self, sha): 

1040 if len(sha) == 40: 

1041 return sha 

1042 elif len(sha) == 20: 

1043 return sha_to_hex(sha) 

1044 else: 

1045 raise ValueError(f"Invalid sha {sha!r}") 

1046 

1047 def contains_loose(self, sha): 

1048 """Check if a particular object is present by SHA1 and is loose.""" 

1049 return self._to_hexsha(sha) in self._data 

1050 

1051 def contains_packed(self, sha): 

1052 """Check if a particular object is present by SHA1 and is packed.""" 

1053 return False 

1054 

1055 def __iter__(self): 

1056 """Iterate over the SHAs that are present in this store.""" 

1057 return iter(self._data.keys()) 

1058 

1059 @property 

1060 def packs(self): 

1061 """List with pack objects.""" 

1062 return [] 

1063 

1064 def get_raw(self, name: ObjectID): 

1065 """Obtain the raw text for an object. 

1066 

1067 Args: 

1068 name: sha for the object. 

1069 Returns: tuple with numeric type and object contents. 

1070 """ 

1071 obj = self[self._to_hexsha(name)] 

1072 return obj.type_num, obj.as_raw_string() 

1073 

1074 def __getitem__(self, name: ObjectID): 

1075 return self._data[self._to_hexsha(name)].copy() 

1076 

1077 def __delitem__(self, name: ObjectID) -> None: 

1078 """Delete an object from this store, for testing only.""" 

1079 del self._data[self._to_hexsha(name)] 

1080 

1081 def add_object(self, obj): 

1082 """Add a single object to this object store.""" 

1083 self._data[obj.id] = obj.copy() 

1084 

1085 def add_objects(self, objects, progress=None): 

1086 """Add a set of objects to this object store. 

1087 

1088 Args: 

1089 objects: Iterable over a list of (object, path) tuples 

1090 """ 

1091 for obj, path in objects: 

1092 self.add_object(obj) 

1093 

1094 def add_pack(self): 

1095 """Add a new pack to this object store. 

1096 

1097 Because this object store doesn't support packs, we extract and add the 

1098 individual objects. 

1099 

1100 Returns: Fileobject to write to and a commit function to 

1101 call when the pack is finished. 

1102 """ 

1103 from tempfile import SpooledTemporaryFile 

1104 

1105 f = SpooledTemporaryFile(max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-") 

1106 

1107 def commit(): 

1108 size = f.tell() 

1109 if size > 0: 

1110 f.seek(0) 

1111 p = PackData.from_file(f, size) 

1112 for obj in PackInflater.for_pack_data(p, self.get_raw): 

1113 self.add_object(obj) 

1114 p.close() 

1115 else: 

1116 f.close() 

1117 

1118 def abort(): 

1119 f.close() 

1120 

1121 return f, commit, abort 

1122 

1123 def add_pack_data( 

1124 self, count: int, unpacked_objects: Iterator[UnpackedObject], progress=None 

1125 ) -> None: 

1126 """Add pack data to this object store. 

1127 

1128 Args: 

1129 count: Number of items to add 

1130 pack_data: Iterator over pack data tuples 

1131 """ 

1132 for unpacked_object in unpacked_objects: 

1133 self.add_object(unpacked_object.sha_file()) 

1134 

1135 def add_thin_pack(self, read_all, read_some, progress=None): 

1136 """Add a new thin pack to this object store. 

1137 

1138 Thin packs are packs that contain deltas with parents that exist 

1139 outside the pack. Because this object store doesn't support packs, we 

1140 extract and add the individual objects. 

1141 

1142 Args: 

1143 read_all: Read function that blocks until the number of 

1144 requested bytes are read. 

1145 read_some: Read function that returns at least one byte, but may 

1146 not return the number of bytes requested. 

1147 """ 

1148 f, commit, abort = self.add_pack() 

1149 try: 

1150 copier = PackStreamCopier(read_all, read_some, f) 

1151 copier.verify() 

1152 except BaseException: 

1153 abort() 

1154 raise 

1155 else: 

1156 commit() 

1157 

1158 

1159class ObjectIterator(Protocol): 

1160 """Interface for iterating over objects.""" 

1161 

1162 def iterobjects(self) -> Iterator[ShaFile]: 

1163 raise NotImplementedError(self.iterobjects) 

1164 

1165 

1166def tree_lookup_path(lookup_obj, root_sha, path): 

1167 """Look up an object in a Git tree. 

1168 

1169 Args: 

1170 lookup_obj: Callback for retrieving object by SHA1 

1171 root_sha: SHA1 of the root tree 

1172 path: Path to lookup 

1173 Returns: A tuple of (mode, SHA) of the resulting path. 

1174 """ 

1175 tree = lookup_obj(root_sha) 

1176 if not isinstance(tree, Tree): 

1177 raise NotTreeError(root_sha) 

1178 return tree.lookup_path(lookup_obj, path) 

1179 

1180 

1181def _collect_filetree_revs( 

1182 obj_store: ObjectContainer, tree_sha: ObjectID, kset: Set[ObjectID] 

1183) -> None: 

1184 """Collect SHA1s of files and directories for specified tree. 

1185 

1186 Args: 

1187 obj_store: Object store to get objects by SHA from 

1188 tree_sha: tree reference to walk 

1189 kset: set to fill with references to files and directories 

1190 """ 

1191 filetree = obj_store[tree_sha] 

1192 assert isinstance(filetree, Tree) 

1193 for name, mode, sha in filetree.iteritems(): 

1194 if not S_ISGITLINK(mode) and sha not in kset: 

1195 kset.add(sha) 

1196 if stat.S_ISDIR(mode): 

1197 _collect_filetree_revs(obj_store, sha, kset) 

1198 

1199 

1200def _split_commits_and_tags( 

1201 obj_store: ObjectContainer, lst, *, ignore_unknown=False 

1202) -> Tuple[Set[bytes], Set[bytes], Set[bytes]]: 

1203 """Split object id list into three lists with commit, tag, and other SHAs. 

1204 

1205 Commits referenced by tags are included into commits 

1206 list as well. Only SHA1s known in this repository will get 

1207 through, and unless ignore_unknown argument is True, KeyError 

1208 is thrown for SHA1 missing in the repository 

1209 

1210 Args: 

1211 obj_store: Object store to get objects by SHA1 from 

1212 lst: Collection of commit and tag SHAs 

1213 ignore_unknown: True to skip SHA1 missing in the repository 

1214 silently. 

1215 Returns: A tuple of (commits, tags, others) SHA1s 

1216 """ 

1217 commits: Set[bytes] = set() 

1218 tags: Set[bytes] = set() 

1219 others: Set[bytes] = set() 

1220 for e in lst: 

1221 try: 

1222 o = obj_store[e] 

1223 except KeyError: 

1224 if not ignore_unknown: 

1225 raise 

1226 else: 

1227 if isinstance(o, Commit): 

1228 commits.add(e) 

1229 elif isinstance(o, Tag): 

1230 tags.add(e) 

1231 tagged = o.object[1] 

1232 c, t, os = _split_commits_and_tags( 

1233 obj_store, [tagged], ignore_unknown=ignore_unknown 

1234 ) 

1235 commits |= c 

1236 tags |= t 

1237 others |= os 

1238 else: 

1239 others.add(e) 

1240 return (commits, tags, others) 

1241 

1242 

1243class MissingObjectFinder: 

1244 """Find the objects missing from another object store. 

1245 

1246 Args: 

1247 object_store: Object store containing at least all objects to be 

1248 sent 

1249 haves: SHA1s of commits not to send (already present in target) 

1250 wants: SHA1s of commits to send 

1251 progress: Optional function to report progress to. 

1252 get_tagged: Function that returns a dict of pointed-to sha -> tag 

1253 sha for including tags. 

1254 get_parents: Optional function for getting the parents of a commit. 

1255 tagged: dict of pointed-to sha -> tag sha for including tags 

1256 """ 

1257 

1258 def __init__( 

1259 self, 

1260 object_store, 

1261 haves, 

1262 wants, 

1263 *, 

1264 shallow=None, 

1265 progress=None, 

1266 get_tagged=None, 

1267 get_parents=lambda commit: commit.parents, 

1268 ) -> None: 

1269 self.object_store = object_store 

1270 if shallow is None: 

1271 shallow = set() 

1272 self._get_parents = get_parents 

1273 # process Commits and Tags differently 

1274 # Note, while haves may list commits/tags not available locally, 

1275 # and such SHAs would get filtered out by _split_commits_and_tags, 

1276 # wants shall list only known SHAs, and otherwise 

1277 # _split_commits_and_tags fails with KeyError 

1278 have_commits, have_tags, have_others = _split_commits_and_tags( 

1279 object_store, haves, ignore_unknown=True 

1280 ) 

1281 want_commits, want_tags, want_others = _split_commits_and_tags( 

1282 object_store, wants, ignore_unknown=False 

1283 ) 

1284 # all_ancestors is a set of commits that shall not be sent 

1285 # (complete repository up to 'haves') 

1286 all_ancestors = _collect_ancestors( 

1287 object_store, have_commits, shallow=shallow, get_parents=self._get_parents 

1288 )[0] 

1289 # all_missing - complete set of commits between haves and wants 

1290 # common - commits from all_ancestors we hit into while 

1291 # traversing parent hierarchy of wants 

1292 missing_commits, common_commits = _collect_ancestors( 

1293 object_store, 

1294 want_commits, 

1295 all_ancestors, 

1296 shallow=shallow, 

1297 get_parents=self._get_parents, 

1298 ) 

1299 self.remote_has: Set[bytes] = set() 

1300 # Now, fill sha_done with commits and revisions of 

1301 # files and directories known to be both locally 

1302 # and on target. Thus these commits and files 

1303 # won't get selected for fetch 

1304 for h in common_commits: 

1305 self.remote_has.add(h) 

1306 cmt = object_store[h] 

1307 _collect_filetree_revs(object_store, cmt.tree, self.remote_has) 

1308 # record tags we have as visited, too 

1309 for t in have_tags: 

1310 self.remote_has.add(t) 

1311 self.sha_done = set(self.remote_has) 

1312 

1313 # in fact, what we 'want' is commits, tags, and others 

1314 # we've found missing 

1315 self.objects_to_send: Set[ 

1316 Tuple[ObjectID, Optional[bytes], Optional[int], bool] 

1317 ] = {(w, None, Commit.type_num, False) for w in missing_commits} 

1318 missing_tags = want_tags.difference(have_tags) 

1319 self.objects_to_send.update( 

1320 {(w, None, Tag.type_num, False) for w in missing_tags} 

1321 ) 

1322 missing_others = want_others.difference(have_others) 

1323 self.objects_to_send.update({(w, None, None, False) for w in missing_others}) 

1324 

1325 if progress is None: 

1326 self.progress = lambda x: None 

1327 else: 

1328 self.progress = progress 

1329 self._tagged = get_tagged and get_tagged() or {} 

1330 

1331 def get_remote_has(self): 

1332 return self.remote_has 

1333 

1334 def add_todo( 

1335 self, entries: Iterable[Tuple[ObjectID, Optional[bytes], Optional[int], bool]] 

1336 ): 

1337 self.objects_to_send.update([e for e in entries if e[0] not in self.sha_done]) 

1338 

1339 def __next__(self) -> Tuple[bytes, Optional[PackHint]]: 

1340 while True: 

1341 if not self.objects_to_send: 

1342 self.progress( 

1343 ("counting objects: %d, done.\n" % len(self.sha_done)).encode( 

1344 "ascii" 

1345 ) 

1346 ) 

1347 raise StopIteration 

1348 (sha, name, type_num, leaf) = self.objects_to_send.pop() 

1349 if sha not in self.sha_done: 

1350 break 

1351 if not leaf: 

1352 o = self.object_store[sha] 

1353 if isinstance(o, Commit): 

1354 self.add_todo([(o.tree, b"", Tree.type_num, False)]) 

1355 elif isinstance(o, Tree): 

1356 self.add_todo( 

1357 [ 

1358 ( 

1359 s, 

1360 n, 

1361 (Blob.type_num if stat.S_ISREG(m) else Tree.type_num), 

1362 not stat.S_ISDIR(m), 

1363 ) 

1364 for n, m, s in o.iteritems() 

1365 if not S_ISGITLINK(m) 

1366 ] 

1367 ) 

1368 elif isinstance(o, Tag): 

1369 self.add_todo([(o.object[1], None, o.object[0].type_num, False)]) 

1370 if sha in self._tagged: 

1371 self.add_todo([(self._tagged[sha], None, None, True)]) 

1372 self.sha_done.add(sha) 

1373 if len(self.sha_done) % 1000 == 0: 

1374 self.progress( 

1375 ("counting objects: %d\r" % len(self.sha_done)).encode("ascii") 

1376 ) 

1377 if type_num is None: 

1378 pack_hint = None 

1379 else: 

1380 pack_hint = (type_num, name) 

1381 return (sha, pack_hint) 

1382 

1383 def __iter__(self): 

1384 return self 

1385 

1386 

1387class ObjectStoreGraphWalker: 

1388 """Graph walker that finds what commits are missing from an object store. 

1389 

1390 Attributes: 

1391 heads: Revisions without descendants in the local repo 

1392 get_parents: Function to retrieve parents in the local repo 

1393 """ 

1394 

1395 def __init__(self, local_heads, get_parents, shallow=None) -> None: 

1396 """Create a new instance. 

1397 

1398 Args: 

1399 local_heads: Heads to start search with 

1400 get_parents: Function for finding the parents of a SHA1. 

1401 """ 

1402 self.heads = set(local_heads) 

1403 self.get_parents = get_parents 

1404 self.parents: Dict[ObjectID, Optional[List[ObjectID]]] = {} 

1405 if shallow is None: 

1406 shallow = set() 

1407 self.shallow = shallow 

1408 

1409 def nak(self): 

1410 """Nothing in common was found.""" 

1411 

1412 def ack(self, sha): 

1413 """Ack that a revision and its ancestors are present in the source.""" 

1414 if len(sha) != 40: 

1415 raise ValueError(f"unexpected sha {sha!r} received") 

1416 ancestors = {sha} 

1417 

1418 # stop if we run out of heads to remove 

1419 while self.heads: 

1420 for a in ancestors: 

1421 if a in self.heads: 

1422 self.heads.remove(a) 

1423 

1424 # collect all ancestors 

1425 new_ancestors = set() 

1426 for a in ancestors: 

1427 ps = self.parents.get(a) 

1428 if ps is not None: 

1429 new_ancestors.update(ps) 

1430 self.parents[a] = None 

1431 

1432 # no more ancestors; stop 

1433 if not new_ancestors: 

1434 break 

1435 

1436 ancestors = new_ancestors 

1437 

1438 def next(self): 

1439 """Iterate over ancestors of heads in the target.""" 

1440 if self.heads: 

1441 ret = self.heads.pop() 

1442 try: 

1443 ps = self.get_parents(ret) 

1444 except KeyError: 

1445 return None 

1446 self.parents[ret] = ps 

1447 self.heads.update([p for p in ps if p not in self.parents]) 

1448 return ret 

1449 return None 

1450 

1451 __next__ = next 

1452 

1453 

1454def commit_tree_changes(object_store, tree, changes): 

1455 """Commit a specified set of changes to a tree structure. 

1456 

1457 This will apply a set of changes on top of an existing tree, storing new 

1458 objects in object_store. 

1459 

1460 changes are a list of tuples with (path, mode, object_sha). 

1461 Paths can be both blobs and trees. See the mode and 

1462 object sha to None deletes the path. 

1463 

1464 This method works especially well if there are only a small 

1465 number of changes to a big tree. For a large number of changes 

1466 to a large tree, use e.g. commit_tree. 

1467 

1468 Args: 

1469 object_store: Object store to store new objects in 

1470 and retrieve old ones from. 

1471 tree: Original tree root 

1472 changes: changes to apply 

1473 Returns: New tree root object 

1474 """ 

1475 # TODO(jelmer): Save up the objects and add them using .add_objects 

1476 # rather than with individual calls to .add_object. 

1477 nested_changes = {} 

1478 for path, new_mode, new_sha in changes: 

1479 try: 

1480 (dirname, subpath) = path.split(b"/", 1) 

1481 except ValueError: 

1482 if new_sha is None: 

1483 del tree[path] 

1484 else: 

1485 tree[path] = (new_mode, new_sha) 

1486 else: 

1487 nested_changes.setdefault(dirname, []).append((subpath, new_mode, new_sha)) 

1488 for name, subchanges in nested_changes.items(): 

1489 try: 

1490 orig_subtree = object_store[tree[name][1]] 

1491 except KeyError: 

1492 orig_subtree = Tree() 

1493 subtree = commit_tree_changes(object_store, orig_subtree, subchanges) 

1494 if len(subtree) == 0: 

1495 del tree[name] 

1496 else: 

1497 tree[name] = (stat.S_IFDIR, subtree.id) 

1498 object_store.add_object(tree) 

1499 return tree 

1500 

1501 

1502class OverlayObjectStore(BaseObjectStore): 

1503 """Object store that can overlay multiple object stores.""" 

1504 

1505 def __init__(self, bases, add_store=None) -> None: 

1506 self.bases = bases 

1507 self.add_store = add_store 

1508 

1509 def add_object(self, object): 

1510 if self.add_store is None: 

1511 raise NotImplementedError(self.add_object) 

1512 return self.add_store.add_object(object) 

1513 

1514 def add_objects(self, objects, progress=None): 

1515 if self.add_store is None: 

1516 raise NotImplementedError(self.add_object) 

1517 return self.add_store.add_objects(objects, progress) 

1518 

1519 @property 

1520 def packs(self): 

1521 ret = [] 

1522 for b in self.bases: 

1523 ret.extend(b.packs) 

1524 return ret 

1525 

1526 def __iter__(self): 

1527 done = set() 

1528 for b in self.bases: 

1529 for o_id in b: 

1530 if o_id not in done: 

1531 yield o_id 

1532 done.add(o_id) 

1533 

1534 def iterobjects_subset( 

1535 self, shas: Iterable[bytes], *, allow_missing: bool = False 

1536 ) -> Iterator[ShaFile]: 

1537 todo = set(shas) 

1538 for b in self.bases: 

1539 for o in b.iterobjects_subset(todo, allow_missing=True): 

1540 yield o 

1541 todo.remove(o.id) 

1542 if todo and not allow_missing: 

1543 raise KeyError(o.id) 

1544 

1545 def iter_unpacked_subset( 

1546 self, 

1547 shas: Iterable[bytes], 

1548 *, 

1549 include_comp=False, 

1550 allow_missing: bool = False, 

1551 convert_ofs_delta=True, 

1552 ) -> Iterator[ShaFile]: 

1553 todo = set(shas) 

1554 for b in self.bases: 

1555 for o in b.iter_unpacked_subset( 

1556 todo, 

1557 include_comp=include_comp, 

1558 allow_missing=True, 

1559 convert_ofs_delta=convert_ofs_delta, 

1560 ): 

1561 yield o 

1562 todo.remove(o.id) 

1563 if todo and not allow_missing: 

1564 raise KeyError(o.id) 

1565 

1566 def get_raw(self, sha_id): 

1567 for b in self.bases: 

1568 try: 

1569 return b.get_raw(sha_id) 

1570 except KeyError: 

1571 pass 

1572 raise KeyError(sha_id) 

1573 

1574 def contains_packed(self, sha): 

1575 for b in self.bases: 

1576 if b.contains_packed(sha): 

1577 return True 

1578 return False 

1579 

1580 def contains_loose(self, sha): 

1581 for b in self.bases: 

1582 if b.contains_loose(sha): 

1583 return True 

1584 return False 

1585 

1586 

1587def read_packs_file(f): 

1588 """Yield the packs listed in a packs file.""" 

1589 for line in f.read().splitlines(): 

1590 if not line: 

1591 continue 

1592 (kind, name) = line.split(b" ", 1) 

1593 if kind != b"P": 

1594 continue 

1595 yield os.fsdecode(name) 

1596 

1597 

1598class BucketBasedObjectStore(PackBasedObjectStore): 

1599 """Object store implementation that uses a bucket store like S3 as backend.""" 

1600 

1601 def _iter_loose_objects(self): 

1602 """Iterate over the SHAs of all loose objects.""" 

1603 return iter([]) 

1604 

1605 def _get_loose_object(self, sha): 

1606 return None 

1607 

1608 def _remove_loose_object(self, sha): 

1609 # Doesn't exist.. 

1610 pass 

1611 

1612 def _remove_pack(self, name): 

1613 raise NotImplementedError(self._remove_pack) 

1614 

1615 def _iter_pack_names(self): 

1616 raise NotImplementedError(self._iter_pack_names) 

1617 

1618 def _get_pack(self, name): 

1619 raise NotImplementedError(self._get_pack) 

1620 

1621 def _update_pack_cache(self): 

1622 pack_files = set(self._iter_pack_names()) 

1623 

1624 # Open newly appeared pack files 

1625 new_packs = [] 

1626 for f in pack_files: 

1627 if f not in self._pack_cache: 

1628 pack = self._get_pack(f) 

1629 new_packs.append(pack) 

1630 self._pack_cache[f] = pack 

1631 # Remove disappeared pack files 

1632 for f in set(self._pack_cache) - pack_files: 

1633 self._pack_cache.pop(f).close() 

1634 return new_packs 

1635 

1636 def _upload_pack(self, basename, pack_file, index_file): 

1637 raise NotImplementedError 

1638 

1639 def add_pack(self): 

1640 """Add a new pack to this object store. 

1641 

1642 Returns: Fileobject to write to, a commit function to 

1643 call when the pack is finished and an abort 

1644 function. 

1645 """ 

1646 import tempfile 

1647 

1648 pf = tempfile.SpooledTemporaryFile( 

1649 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

1650 ) 

1651 

1652 def commit(): 

1653 if pf.tell() == 0: 

1654 pf.close() 

1655 return None 

1656 

1657 pf.seek(0) 

1658 p = PackData(pf.name, pf) 

1659 entries = p.sorted_entries() 

1660 basename = iter_sha1(entry[0] for entry in entries).decode("ascii") 

1661 idxf = tempfile.SpooledTemporaryFile( 

1662 max_size=PACK_SPOOL_FILE_MAX_SIZE, prefix="incoming-" 

1663 ) 

1664 checksum = p.get_stored_checksum() 

1665 write_pack_index(idxf, entries, checksum) 

1666 idxf.seek(0) 

1667 idx = load_pack_index_file(basename + ".idx", idxf) 

1668 for pack in self.packs: 

1669 if pack.get_stored_checksum() == p.get_stored_checksum(): 

1670 p.close() 

1671 idx.close() 

1672 return pack 

1673 pf.seek(0) 

1674 idxf.seek(0) 

1675 self._upload_pack(basename, pf, idxf) 

1676 final_pack = Pack.from_objects(p, idx) 

1677 self._add_cached_pack(basename, final_pack) 

1678 return final_pack 

1679 

1680 return pf, commit, pf.close 

1681 

1682 

1683def _collect_ancestors( 

1684 store: ObjectContainer, 

1685 heads, 

1686 common: FrozenSet[ObjectID] = frozenset(), 

1687 shallow: FrozenSet[ObjectID] = frozenset(), 

1688 get_parents=lambda commit: commit.parents, 

1689): 

1690 """Collect all ancestors of heads up to (excluding) those in common. 

1691 

1692 Args: 

1693 heads: commits to start from 

1694 common: commits to end at, or empty set to walk repository 

1695 completely 

1696 get_parents: Optional function for getting the parents of a 

1697 commit. 

1698 Returns: a tuple (A, B) where A - all commits reachable 

1699 from heads but not present in common, B - common (shared) elements 

1700 that are directly reachable from heads 

1701 """ 

1702 bases = set() 

1703 commits = set() 

1704 queue = [] 

1705 queue.extend(heads) 

1706 while queue: 

1707 e = queue.pop(0) 

1708 if e in common: 

1709 bases.add(e) 

1710 elif e not in commits: 

1711 commits.add(e) 

1712 if e in shallow: 

1713 continue 

1714 cmt = store[e] 

1715 queue.extend(get_parents(cmt)) 

1716 return (commits, bases) 

1717 

1718 

1719def iter_tree_contents( 

1720 store: ObjectContainer, tree_id: Optional[ObjectID], *, include_trees: bool = False 

1721): 

1722 """Iterate the contents of a tree and all subtrees. 

1723 

1724 Iteration is depth-first pre-order, as in e.g. os.walk. 

1725 

1726 Args: 

1727 tree_id: SHA1 of the tree. 

1728 include_trees: If True, include tree objects in the iteration. 

1729 Returns: Iterator over TreeEntry namedtuples for all the objects in a 

1730 tree. 

1731 """ 

1732 if tree_id is None: 

1733 return 

1734 # This could be fairly easily generalized to >2 trees if we find a use 

1735 # case. 

1736 todo = [TreeEntry(b"", stat.S_IFDIR, tree_id)] 

1737 while todo: 

1738 entry = todo.pop() 

1739 if stat.S_ISDIR(entry.mode): 

1740 extra = [] 

1741 tree = store[entry.sha] 

1742 assert isinstance(tree, Tree) 

1743 for subentry in tree.iteritems(name_order=True): 

1744 extra.append(subentry.in_path(entry.path)) 

1745 todo.extend(reversed(extra)) 

1746 if not stat.S_ISDIR(entry.mode) or include_trees: 

1747 yield entry 

1748 

1749 

1750def peel_sha(store: ObjectContainer, sha: bytes) -> Tuple[ShaFile, ShaFile]: 

1751 """Peel all tags from a SHA. 

1752 

1753 Args: 

1754 sha: The object SHA to peel. 

1755 Returns: The fully-peeled SHA1 of a tag object, after peeling all 

1756 intermediate tags; if the original ref does not point to a tag, 

1757 this will equal the original SHA1. 

1758 """ 

1759 unpeeled = obj = store[sha] 

1760 obj_class = object_class(obj.type_name) 

1761 while obj_class is Tag: 

1762 assert isinstance(obj, Tag) 

1763 obj_class, sha = obj.object 

1764 obj = store[sha] 

1765 return unpeeled, obj