Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/protocol.py: 32%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

374 statements  

1# protocol.py -- Shared parts of the git protocols 

2# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk> 

3# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@jelmer.uk> 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23"""Generic functions for talking the git smart server protocol.""" 

24 

25__all__ = [ 

26 "CAPABILITIES_REF", 

27 "CAPABILITY_AGENT", 

28 "CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT", 

29 "CAPABILITY_ALLOW_TIP_SHA1_IN_WANT", 

30 "CAPABILITY_ATOMIC", 

31 "CAPABILITY_DEEPEN_NOT", 

32 "CAPABILITY_DEEPEN_RELATIVE", 

33 "CAPABILITY_DEEPEN_SINCE", 

34 "CAPABILITY_DELETE_REFS", 

35 "CAPABILITY_FETCH", 

36 "CAPABILITY_FILTER", 

37 "CAPABILITY_INCLUDE_TAG", 

38 "CAPABILITY_MULTI_ACK", 

39 "CAPABILITY_MULTI_ACK_DETAILED", 

40 "CAPABILITY_NO_DONE", 

41 "CAPABILITY_NO_PROGRESS", 

42 "CAPABILITY_OBJECT_FORMAT", 

43 "CAPABILITY_OFS_DELTA", 

44 "CAPABILITY_QUIET", 

45 "CAPABILITY_REPORT_STATUS", 

46 "CAPABILITY_SHALLOW", 

47 "CAPABILITY_SIDE_BAND", 

48 "CAPABILITY_SIDE_BAND_64K", 

49 "CAPABILITY_SYMREF", 

50 "CAPABILITY_THIN_PACK", 

51 "COMMAND_DEEPEN", 

52 "COMMAND_DEEPEN_NOT", 

53 "COMMAND_DEEPEN_SINCE", 

54 "COMMAND_DONE", 

55 "COMMAND_FILTER", 

56 "COMMAND_HAVE", 

57 "COMMAND_SHALLOW", 

58 "COMMAND_UNSHALLOW", 

59 "COMMAND_WANT", 

60 "COMMON_CAPABILITIES", 

61 "DEFAULT_GIT_PROTOCOL_VERSION_FETCH", 

62 "DEFAULT_GIT_PROTOCOL_VERSION_SEND", 

63 "DEPTH_INFINITE", 

64 "GIT_PROTOCOL_VERSIONS", 

65 "KNOWN_RECEIVE_CAPABILITIES", 

66 "KNOWN_UPLOAD_CAPABILITIES", 

67 "MULTI_ACK", 

68 "MULTI_ACK_DETAILED", 

69 "NAK_LINE", 

70 "PEELED_TAG_SUFFIX", 

71 "SIDE_BAND_CHANNEL_DATA", 

72 "SIDE_BAND_CHANNEL_FATAL", 

73 "SIDE_BAND_CHANNEL_PROGRESS", 

74 "SINGLE_ACK", 

75 "TCP_GIT_PORT", 

76 "BufferedPktLineWriter", 

77 "PktLineParser", 

78 "Protocol", 

79 "ReceivableProtocol", 

80 "ack_type", 

81 "agent_string", 

82 "capability_agent", 

83 "capability_object_format", 

84 "capability_symref", 

85 "extract_capabilities", 

86 "extract_capability_names", 

87 "extract_want_line_capabilities", 

88 "find_capability", 

89 "format_ack_line", 

90 "format_capability_line", 

91 "format_cmd_pkt", 

92 "format_ref_line", 

93 "format_shallow_line", 

94 "format_unshallow_line", 

95 "parse_capability", 

96 "parse_cmd_pkt", 

97 "pkt_line", 

98 "pkt_seq", 

99 "serialize_refs", 

100 "split_peeled_refs", 

101 "strip_peeled_refs", 

102 "symref_capabilities", 

103 "write_info_refs", 

104] 

105 

106import logging 

107import sys 

108import types 

109from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence 

110from io import BytesIO 

111from os import SEEK_END 

112from typing import TYPE_CHECKING 

113 

114import dulwich 

115 

116from .errors import GitProtocolError, HangupException 

117from .objects import ObjectID 

118 

119if sys.version_info >= (3, 11): 

120 from typing import Self 

121else: 

122 from typing_extensions import Self 

123 

124logger = logging.getLogger(__name__) 

125 

126if TYPE_CHECKING: 

127 from .pack import ObjectContainer 

128 from .refs import Ref 

129 

130TCP_GIT_PORT = 9418 

131 

132# Git protocol version 0 is the original Git protocol, which lacked a 

133# version number until Git protocol version 1 was introduced by Brandon 

134# Williams in 2017. 

135# 

136# Protocol version 1 is simply the original v0 protocol with the addition of 

137# a single packet line, which precedes the ref advertisement, indicating the 

138# protocol version being used. This was done in preparation for protocol v2. 

139# 

140# Git protocol version 2 was first introduced by Brandon Williams in 2018 and 

141# adds many features. See the gitprotocol-v2(5) manual page for details. 

142# As of 2024, Git only implements version 2 during 'git fetch' and still uses 

143# version 0 during 'git push'. 

144GIT_PROTOCOL_VERSIONS = [0, 1, 2] 

145DEFAULT_GIT_PROTOCOL_VERSION_FETCH = 2 

146DEFAULT_GIT_PROTOCOL_VERSION_SEND = 0 

147 

148# Suffix used in the Git protocol to indicate peeled tag references 

149PEELED_TAG_SUFFIX = b"^{}" 

150 

151ZERO_SHA: ObjectID = ObjectID(b"0" * 40) 

152 

153SINGLE_ACK = 0 

154MULTI_ACK = 1 

155MULTI_ACK_DETAILED = 2 

156 

157# pack data 

158SIDE_BAND_CHANNEL_DATA = 1 

159# progress messages 

160SIDE_BAND_CHANNEL_PROGRESS = 2 

161# fatal error message just before stream aborts 

162SIDE_BAND_CHANNEL_FATAL = 3 

163 

164CAPABILITY_ATOMIC = b"atomic" 

165CAPABILITY_DEEPEN_SINCE = b"deepen-since" 

166CAPABILITY_DEEPEN_NOT = b"deepen-not" 

167CAPABILITY_DEEPEN_RELATIVE = b"deepen-relative" 

168CAPABILITY_DELETE_REFS = b"delete-refs" 

169CAPABILITY_INCLUDE_TAG = b"include-tag" 

170CAPABILITY_MULTI_ACK = b"multi_ack" 

171CAPABILITY_MULTI_ACK_DETAILED = b"multi_ack_detailed" 

172CAPABILITY_NO_DONE = b"no-done" 

173CAPABILITY_NO_PROGRESS = b"no-progress" 

174CAPABILITY_OFS_DELTA = b"ofs-delta" 

175CAPABILITY_QUIET = b"quiet" 

176CAPABILITY_REPORT_STATUS = b"report-status" 

177CAPABILITY_SHALLOW = b"shallow" 

178CAPABILITY_SIDE_BAND = b"side-band" 

179CAPABILITY_SIDE_BAND_64K = b"side-band-64k" 

180CAPABILITY_THIN_PACK = b"thin-pack" 

181CAPABILITY_AGENT = b"agent" 

182CAPABILITY_SYMREF = b"symref" 

183CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want" 

184CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want" 

185CAPABILITY_FETCH = b"fetch" 

186CAPABILITY_FILTER = b"filter" 

187CAPABILITY_OBJECT_FORMAT = b"object-format" 

188CAPABILITY_PACKFILE_URIS = b"packfile-uris" 

189CAPABILITY_PUSH_OPTIONS = b"push-options" 

190 

191# Magic ref that is used to attach capabilities to when 

192# there are no refs. Should always be ste to ZERO_SHA. 

193CAPABILITIES_REF = b"capabilities^{}" 

194 

195COMMON_CAPABILITIES = [ 

196 CAPABILITY_OFS_DELTA, 

197 CAPABILITY_SIDE_BAND, 

198 CAPABILITY_SIDE_BAND_64K, 

199 CAPABILITY_AGENT, 

200 CAPABILITY_NO_PROGRESS, 

201] 

202KNOWN_UPLOAD_CAPABILITIES = set( 

203 [ 

204 *COMMON_CAPABILITIES, 

205 CAPABILITY_THIN_PACK, 

206 CAPABILITY_MULTI_ACK, 

207 CAPABILITY_MULTI_ACK_DETAILED, 

208 CAPABILITY_INCLUDE_TAG, 

209 CAPABILITY_DEEPEN_SINCE, 

210 CAPABILITY_SYMREF, 

211 CAPABILITY_SHALLOW, 

212 CAPABILITY_DEEPEN_NOT, 

213 CAPABILITY_DEEPEN_RELATIVE, 

214 CAPABILITY_ALLOW_TIP_SHA1_IN_WANT, 

215 CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT, 

216 CAPABILITY_FETCH, 

217 CAPABILITY_FILTER, 

218 CAPABILITY_PACKFILE_URIS, 

219 ] 

220) 

221KNOWN_RECEIVE_CAPABILITIES = set( 

222 [ 

223 *COMMON_CAPABILITIES, 

224 CAPABILITY_REPORT_STATUS, 

225 CAPABILITY_DELETE_REFS, 

226 CAPABILITY_QUIET, 

227 CAPABILITY_ATOMIC, 

228 CAPABILITY_PUSH_OPTIONS, 

229 ] 

230) 

231 

232DEPTH_INFINITE = 0x7FFFFFFF 

233 

234NAK_LINE = b"NAK\n" 

235 

236 

237def agent_string() -> bytes: 

238 """Generate the agent string for dulwich. 

239 

240 Returns: 

241 Agent string as bytes 

242 """ 

243 return ("dulwich/" + ".".join(map(str, dulwich.__version__))).encode("ascii") 

244 

245 

246def capability_agent() -> bytes: 

247 """Generate the agent capability string. 

248 

249 Returns: 

250 Agent capability with dulwich version 

251 """ 

252 return CAPABILITY_AGENT + b"=" + agent_string() 

253 

254 

255def capability_object_format(fmt: str) -> bytes: 

256 """Generate the object-format capability string. 

257 

258 Args: 

259 fmt: Object format name (e.g., "sha1" or "sha256") 

260 

261 Returns: 

262 Object-format capability with format name 

263 """ 

264 return CAPABILITY_OBJECT_FORMAT + b"=" + fmt.encode("ascii") 

265 

266 

267def capability_symref(from_ref: bytes, to_ref: bytes) -> bytes: 

268 """Generate a symref capability string. 

269 

270 Args: 

271 from_ref: Source reference name 

272 to_ref: Target reference name 

273 

274 Returns: 

275 Symref capability string 

276 """ 

277 return CAPABILITY_SYMREF + b"=" + from_ref + b":" + to_ref 

278 

279 

280def extract_capability_names(capabilities: Iterable[bytes]) -> set[bytes]: 

281 """Extract capability names from a list of capabilities. 

282 

283 Args: 

284 capabilities: List of capability strings 

285 

286 Returns: 

287 Set of capability names 

288 """ 

289 return {parse_capability(c)[0] for c in capabilities} 

290 

291 

292def parse_capability(capability: bytes) -> tuple[bytes, bytes | None]: 

293 """Parse a capability string into name and value. 

294 

295 Args: 

296 capability: Capability string 

297 

298 Returns: 

299 Tuple of (capability_name, capability_value) 

300 """ 

301 parts = capability.split(b"=", 1) 

302 if len(parts) == 1: 

303 return (parts[0], None) 

304 return (parts[0], parts[1]) 

305 

306 

307def symref_capabilities(symrefs: Iterable[tuple[bytes, bytes]]) -> list[bytes]: 

308 """Generate symref capability strings from symref pairs. 

309 

310 Args: 

311 symrefs: Iterable of (from_ref, to_ref) tuples 

312 

313 Returns: 

314 List of symref capability strings 

315 """ 

316 return [capability_symref(*k) for k in symrefs] 

317 

318 

319COMMAND_DEEPEN = b"deepen" 

320COMMAND_DEEPEN_SINCE = b"deepen-since" 

321COMMAND_DEEPEN_NOT = b"deepen-not" 

322COMMAND_SHALLOW = b"shallow" 

323COMMAND_UNSHALLOW = b"unshallow" 

324COMMAND_DONE = b"done" 

325COMMAND_WANT = b"want" 

326COMMAND_HAVE = b"have" 

327COMMAND_FILTER = b"filter" 

328 

329 

330def format_cmd_pkt(cmd: bytes, *args: bytes) -> bytes: 

331 """Format a command packet. 

332 

333 Args: 

334 cmd: Command name 

335 *args: Command arguments 

336 

337 Returns: 

338 Formatted command packet 

339 """ 

340 return cmd + b" " + b"".join([(a + b"\0") for a in args]) 

341 

342 

343def parse_cmd_pkt(line: bytes) -> tuple[bytes, list[bytes]]: 

344 """Parse a command packet. 

345 

346 Args: 

347 line: Command line to parse 

348 

349 Returns: 

350 Tuple of (command, [arguments]) 

351 """ 

352 splice_at = line.find(b" ") 

353 cmd, args = line[:splice_at], line[splice_at + 1 :] 

354 assert args[-1:] == b"\x00" 

355 return cmd, args[:-1].split(b"\0") 

356 

357 

358def pkt_line(data: bytes | None) -> bytes: 

359 """Wrap data in a pkt-line. 

360 

361 Args: 

362 data: The data to wrap, as a str or None. 

363 Returns: The data prefixed with its length in pkt-line format; if data was 

364 None, returns the flush-pkt ('0000'). 

365 """ 

366 if data is None: 

367 return b"0000" 

368 return f"{len(data) + 4:04x}".encode("ascii") + data 

369 

370 

371def pkt_seq(*seq: bytes | None) -> bytes: 

372 """Wrap a sequence of data in pkt-lines. 

373 

374 Args: 

375 seq: An iterable of strings to wrap. 

376 """ 

377 return b"".join([pkt_line(s) for s in seq]) + pkt_line(None) 

378 

379 

380class Protocol: 

381 """Class for interacting with a remote git process over the wire. 

382 

383 Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line 

384 consists of the length of the line as a 4-byte hex string, followed by the 

385 payload data. The length includes the 4-byte header. The special line 

386 '0000' indicates the end of a section of input and is called a 'flush-pkt'. 

387 

388 For details on the pkt-line format, see the cgit distribution: 

389 Documentation/technical/protocol-common.txt 

390 """ 

391 

392 def __init__( 

393 self, 

394 read: Callable[[int], bytes], 

395 write: Callable[[bytes], int | None], 

396 close: Callable[[], None] | None = None, 

397 report_activity: Callable[[int, str], None] | None = None, 

398 ) -> None: 

399 """Initialize Protocol. 

400 

401 Args: 

402 read: Function to read bytes from the transport 

403 write: Function to write bytes to the transport 

404 close: Optional function to close the transport 

405 report_activity: Optional function to report activity 

406 """ 

407 self.read = read 

408 self.write = write 

409 self._close = close 

410 self.report_activity = report_activity 

411 self._readahead: BytesIO | None = None 

412 

413 def close(self) -> None: 

414 """Close the underlying transport if a close function was provided.""" 

415 if self._close: 

416 self._close() 

417 self._close = None # Prevent double-close 

418 

419 def __del__(self) -> None: 

420 """Ensure transport is closed when Protocol is garbage collected.""" 

421 if self._close is not None: 

422 import warnings 

423 

424 warnings.warn( 

425 f"unclosed Protocol {self!r}", 

426 ResourceWarning, 

427 stacklevel=2, 

428 source=self, 

429 ) 

430 try: 

431 self.close() 

432 except Exception: 

433 # Ignore errors during cleanup 

434 pass 

435 

436 def __enter__(self) -> Self: 

437 """Enter context manager.""" 

438 return self 

439 

440 def __exit__( 

441 self, 

442 exc_type: type[BaseException] | None, 

443 exc_val: BaseException | None, 

444 exc_tb: types.TracebackType | None, 

445 ) -> None: 

446 """Exit context manager and close transport.""" 

447 self.close() 

448 

449 def read_pkt_line(self) -> bytes | None: 

450 """Reads a pkt-line from the remote git process. 

451 

452 This method may read from the readahead buffer; see unread_pkt_line. 

453 

454 Returns: The next string from the stream, without the length prefix, or 

455 None for a flush-pkt ('0000') or delim-pkt ('0001'). 

456 """ 

457 if self._readahead is None: 

458 read = self.read 

459 else: 

460 read = self._readahead.read 

461 self._readahead = None 

462 

463 try: 

464 sizestr = read(4) 

465 if not sizestr: 

466 raise HangupException 

467 size = int(sizestr, 16) 

468 if size == 0 or size == 1: # flush-pkt or delim-pkt 

469 if self.report_activity: 

470 self.report_activity(4, "read") 

471 logger.debug("git< %s", sizestr.decode("ascii")) 

472 return None 

473 if self.report_activity: 

474 self.report_activity(size, "read") 

475 pkt_contents = read(size - 4) 

476 except ConnectionResetError as exc: 

477 raise HangupException from exc 

478 except OSError as exc: 

479 raise GitProtocolError(str(exc)) from exc 

480 else: 

481 if len(pkt_contents) + 4 != size: 

482 raise GitProtocolError( 

483 f"Length of pkt read {len(pkt_contents) + 4:04x} does not match length prefix {size:04x}" 

484 ) 

485 # Log the packet contents (truncate if too long for readability) 

486 if len(pkt_contents) > 80: 

487 logger.debug( 

488 "git< %s... (%d bytes)", pkt_contents[:80], len(pkt_contents) 

489 ) 

490 else: 

491 logger.debug("git< %s", pkt_contents) 

492 return pkt_contents 

493 

494 def eof(self) -> bool: 

495 """Test whether the protocol stream has reached EOF. 

496 

497 Note that this refers to the actual stream EOF and not just a 

498 flush-pkt. 

499 

500 Returns: True if the stream is at EOF, False otherwise. 

501 """ 

502 try: 

503 next_line = self.read_pkt_line() 

504 except HangupException: 

505 return True 

506 self.unread_pkt_line(next_line) 

507 return False 

508 

509 def unread_pkt_line(self, data: bytes | None) -> None: 

510 """Unread a single line of data into the readahead buffer. 

511 

512 This method can be used to unread a single pkt-line into a fixed 

513 readahead buffer. 

514 

515 Args: 

516 data: The data to unread, without the length prefix. 

517 

518 Raises: 

519 ValueError: If more than one pkt-line is unread. 

520 """ 

521 if self._readahead is not None: 

522 raise ValueError("Attempted to unread multiple pkt-lines.") 

523 self._readahead = BytesIO(pkt_line(data)) 

524 

525 def read_pkt_seq(self) -> Iterable[bytes]: 

526 """Read a sequence of pkt-lines from the remote git process. 

527 

528 Returns: Yields each line of data up to but not including the next 

529 flush-pkt. 

530 """ 

531 pkt = self.read_pkt_line() 

532 while pkt: 

533 yield pkt 

534 pkt = self.read_pkt_line() 

535 

536 def write_pkt_line(self, line: bytes | None) -> None: 

537 """Sends a pkt-line to the remote git process. 

538 

539 Args: 

540 line: A string containing the data to send, without the length 

541 prefix. 

542 """ 

543 try: 

544 # Log before converting to pkt format 

545 if line is None: 

546 logger.debug("git> 0000") 

547 elif len(line) > 80: 

548 logger.debug("git> %s... (%d bytes)", line[:80], len(line)) 

549 else: 

550 logger.debug("git> %s", line) 

551 

552 line = pkt_line(line) 

553 self.write(line) 

554 if self.report_activity: 

555 self.report_activity(len(line), "write") 

556 except OSError as exc: 

557 raise GitProtocolError(str(exc)) from exc 

558 

559 def write_sideband(self, channel: int, blob: bytes) -> None: 

560 """Write multiplexed data to the sideband. 

561 

562 Args: 

563 channel: An int specifying the channel to write to. 

564 blob: A blob of data (as a string) to send on this channel. 

565 """ 

566 # a pktline can be a max of 65520. a sideband line can therefore be 

567 # 65520-5 = 65515 

568 # WTF: Why have the len in ASCII, but the channel in binary. 

569 while blob: 

570 self.write_pkt_line(bytes(bytearray([channel])) + blob[:65515]) 

571 blob = blob[65515:] 

572 

573 def send_cmd(self, cmd: bytes, *args: bytes) -> None: 

574 """Send a command and some arguments to a git server. 

575 

576 Only used for the TCP git protocol (git://). 

577 

578 Args: 

579 cmd: The remote service to access. 

580 args: List of arguments to send to remove service. 

581 """ 

582 self.write_pkt_line(format_cmd_pkt(cmd, *args)) 

583 

584 def read_cmd(self) -> tuple[bytes, list[bytes]]: 

585 """Read a command and some arguments from the git client. 

586 

587 Only used for the TCP git protocol (git://). 

588 

589 Returns: A tuple of (command, [list of arguments]). 

590 """ 

591 line = self.read_pkt_line() 

592 if line is None: 

593 raise GitProtocolError("Expected command, got flush packet") 

594 return parse_cmd_pkt(line) 

595 

596 

597_RBUFSIZE = 65536 # 64KB buffer for better network I/O performance 

598 

599 

600class ReceivableProtocol(Protocol): 

601 """Variant of Protocol that allows reading up to a size without blocking. 

602 

603 This class has a recv() method that behaves like socket.recv() in addition 

604 to a read() method. 

605 

606 If you want to read n bytes from the wire and block until exactly n bytes 

607 (or EOF) are read, use read(n). If you want to read at most n bytes from 

608 the wire but don't care if you get less, use recv(n). Note that recv(n) 

609 will still block until at least one byte is read. 

610 """ 

611 

612 def __init__( 

613 self, 

614 recv: Callable[[int], bytes], 

615 write: Callable[[bytes], int | None], 

616 close: Callable[[], None] | None = None, 

617 report_activity: Callable[[int, str], None] | None = None, 

618 rbufsize: int = _RBUFSIZE, 

619 ) -> None: 

620 """Initialize ReceivableProtocol. 

621 

622 Args: 

623 recv: Function to receive bytes from the transport 

624 write: Function to write bytes to the transport 

625 close: Optional function to close the transport 

626 report_activity: Optional function to report activity 

627 rbufsize: Read buffer size 

628 """ 

629 super().__init__(self.read, write, close=close, report_activity=report_activity) 

630 self._recv = recv 

631 self._rbuf = BytesIO() 

632 self._rbufsize = rbufsize 

633 

634 def read(self, size: int) -> bytes: 

635 """Read bytes from the socket. 

636 

637 Args: 

638 size: Number of bytes to read 

639 

640 Returns: 

641 Bytes read from socket 

642 """ 

643 # From _fileobj.read in socket.py in the Python 2.6.5 standard library, 

644 # with the following modifications: 

645 # - omit the size <= 0 branch 

646 # - seek back to start rather than 0 in case some buffer has been 

647 # consumed. 

648 # - use SEEK_END instead of the magic number. 

649 # Copyright (c) 2001-2010 Python Software Foundation; All Rights 

650 # Reserved 

651 # Licensed under the Python Software Foundation License. 

652 # TODO: see if buffer is more efficient than cBytesIO. 

653 assert size > 0 

654 

655 # Our use of BytesIO rather than lists of string objects returned by 

656 # recv() minimizes memory usage and fragmentation that occurs when 

657 # rbufsize is large compared to the typical return value of recv(). 

658 buf = self._rbuf 

659 start = buf.tell() 

660 buf.seek(0, SEEK_END) 

661 # buffer may have been partially consumed by recv() 

662 buf_len = buf.tell() - start 

663 if buf_len >= size: 

664 # Already have size bytes in our buffer? Extract and return. 

665 buf.seek(start) 

666 rv = buf.read(size) 

667 self._rbuf = BytesIO() 

668 self._rbuf.write(buf.read()) 

669 self._rbuf.seek(0) 

670 return rv 

671 

672 self._rbuf = BytesIO() # reset _rbuf. we consume it via buf. 

673 while True: 

674 left = size - buf_len 

675 # recv() will malloc the amount of memory given as its 

676 # parameter even though it often returns much less data 

677 # than that. The returned data string is short lived 

678 # as we copy it into a BytesIO and free it. This avoids 

679 # fragmentation issues on many platforms. 

680 data = self._recv(left) 

681 if not data: 

682 break 

683 n = len(data) 

684 if n == size and not buf_len: 

685 # Shortcut. Avoid buffer data copies when: 

686 # - We have no data in our buffer. 

687 # AND 

688 # - Our call to recv returned exactly the 

689 # number of bytes we were asked to read. 

690 return data 

691 if n == left: 

692 buf.write(data) 

693 del data # explicit free 

694 break 

695 assert n <= left, f"_recv({left}) returned {n} bytes" 

696 buf.write(data) 

697 buf_len += n 

698 del data # explicit free 

699 # assert buf_len == buf.tell() 

700 buf.seek(start) 

701 return buf.read() 

702 

703 def recv(self, size: int) -> bytes: 

704 """Receive bytes from the socket with buffering. 

705 

706 Args: 

707 size: Maximum number of bytes to receive 

708 

709 Returns: 

710 Bytes received from socket 

711 """ 

712 assert size > 0 

713 

714 buf = self._rbuf 

715 start = buf.tell() 

716 buf.seek(0, SEEK_END) 

717 buf_len = buf.tell() 

718 buf.seek(start) 

719 

720 left = buf_len - start 

721 if not left: 

722 # only read from the wire if our read buffer is exhausted 

723 data = self._recv(self._rbufsize) 

724 if len(data) == size: 

725 # shortcut: skip the buffer if we read exactly size bytes 

726 return data 

727 buf = BytesIO() 

728 buf.write(data) 

729 buf.seek(0) 

730 del data # explicit free 

731 self._rbuf = buf 

732 return buf.read(size) 

733 

734 

735def extract_capabilities(text: bytes) -> tuple[bytes, list[bytes]]: 

736 """Extract a capabilities list from a string, if present. 

737 

738 Args: 

739 text: String to extract from 

740 Returns: Tuple with text with capabilities removed and list of capabilities 

741 """ 

742 if b"\0" not in text: 

743 return text, [] 

744 text, capabilities = text.rstrip().split(b"\0") 

745 return (text, capabilities.strip().split(b" ")) 

746 

747 

748def extract_want_line_capabilities(text: bytes) -> tuple[bytes, list[bytes]]: 

749 """Extract a capabilities list from a want line, if present. 

750 

751 Note that want lines have capabilities separated from the rest of the line 

752 by a space instead of a null byte. Thus want lines have the form: 

753 

754 want obj-id cap1 cap2 ... 

755 

756 Args: 

757 text: Want line to extract from 

758 Returns: Tuple with text with capabilities removed and list of capabilities 

759 """ 

760 split_text = text.rstrip().split(b" ") 

761 if len(split_text) < 3: 

762 return text, [] 

763 return (b" ".join(split_text[:2]), split_text[2:]) 

764 

765 

766def ack_type(capabilities: Iterable[bytes]) -> int: 

767 """Extract the ack type from a capabilities list.""" 

768 if b"multi_ack_detailed" in capabilities: 

769 return MULTI_ACK_DETAILED 

770 elif b"multi_ack" in capabilities: 

771 return MULTI_ACK 

772 return SINGLE_ACK 

773 

774 

775def find_capability( 

776 capabilities: Iterable[bytes], *capability_names: bytes 

777) -> bytes | None: 

778 """Find a capability value in a list of capabilities. 

779 

780 This function looks for capabilities that may include arguments after an equals sign 

781 and returns only the value part (after the '='). For capabilities without values, 

782 returns the capability name itself. 

783 

784 Args: 

785 capabilities: List of capability strings 

786 capability_names: Capability name(s) to search for 

787 

788 Returns: 

789 The value after '=' if found, or the capability name if no '=', or None if not found 

790 

791 Example: 

792 >>> caps = [b'filter=blob:none', b'agent=git/2.0', b'thin-pack'] 

793 >>> find_capability(caps, b'filter') 

794 b'blob:none' 

795 >>> find_capability(caps, b'thin-pack') 

796 b'thin-pack' 

797 >>> find_capability(caps, b'missing') 

798 None 

799 """ 

800 for cap in capabilities: 

801 for name in capability_names: 

802 if cap == name: 

803 return cap 

804 elif cap.startswith(name + b"="): 

805 return cap[len(name) + 1 :] 

806 return None 

807 

808 

809class BufferedPktLineWriter: 

810 """Writer that wraps its data in pkt-lines and has an independent buffer. 

811 

812 Consecutive calls to write() wrap the data in a pkt-line and then buffers 

813 it until enough lines have been written such that their total length 

814 (including length prefix) reach the buffer size. 

815 """ 

816 

817 def __init__( 

818 self, write: Callable[[bytes], int | None], bufsize: int = 65515 

819 ) -> None: 

820 """Initialize the BufferedPktLineWriter. 

821 

822 Args: 

823 write: A write callback for the underlying writer. 

824 bufsize: The internal buffer size, including length prefixes. 

825 """ 

826 self._write = write 

827 self._bufsize = bufsize 

828 self._wbuf = BytesIO() 

829 self._buflen = 0 

830 

831 def write(self, data: bytes) -> None: 

832 """Write data, wrapping it in a pkt-line.""" 

833 line = pkt_line(data) 

834 line_len = len(line) 

835 over = self._buflen + line_len - self._bufsize 

836 if over >= 0: 

837 start = line_len - over 

838 self._wbuf.write(line[:start]) 

839 self.flush() 

840 else: 

841 start = 0 

842 saved = line[start:] 

843 self._wbuf.write(saved) 

844 self._buflen += len(saved) 

845 

846 def flush(self) -> None: 

847 """Flush all data from the buffer.""" 

848 data = self._wbuf.getvalue() 

849 if data: 

850 self._write(data) 

851 self._len = 0 

852 self._wbuf = BytesIO() 

853 

854 

855class PktLineParser: 

856 """Packet line parser that hands completed packets off to a callback.""" 

857 

858 def __init__(self, handle_pkt: Callable[[bytes | None], None]) -> None: 

859 """Initialize PktLineParser. 

860 

861 Args: 

862 handle_pkt: Callback function to handle completed packets 

863 """ 

864 self.handle_pkt = handle_pkt 

865 self._readahead = BytesIO() 

866 

867 def parse(self, data: bytes) -> None: 

868 """Parse a fragment of data and call back for any completed packets.""" 

869 self._readahead.write(data) 

870 buf = self._readahead.getvalue() 

871 if len(buf) < 4: 

872 return 

873 while len(buf) >= 4: 

874 size = int(buf[:4], 16) 

875 if size == 0: 

876 self.handle_pkt(None) 

877 buf = buf[4:] 

878 elif size <= len(buf): 

879 self.handle_pkt(buf[4:size]) 

880 buf = buf[size:] 

881 else: 

882 break 

883 self._readahead = BytesIO() 

884 self._readahead.write(buf) 

885 

886 def get_tail(self) -> bytes: 

887 """Read back any unused data.""" 

888 return self._readahead.getvalue() 

889 

890 

891def format_capability_line(capabilities: Iterable[bytes]) -> bytes: 

892 """Format a capabilities list for the wire protocol. 

893 

894 Args: 

895 capabilities: List of capability strings 

896 

897 Returns: 

898 Space-separated capabilities as bytes 

899 """ 

900 return b"".join([b" " + c for c in capabilities]) 

901 

902 

903def format_ref_line( 

904 ref: bytes, sha: bytes, capabilities: Sequence[bytes] | None = None 

905) -> bytes: 

906 """Format a ref advertisement line. 

907 

908 Args: 

909 ref: Reference name 

910 sha: SHA hash 

911 capabilities: Optional list of capabilities 

912 

913 Returns: 

914 Formatted ref line 

915 """ 

916 if capabilities is None: 

917 return sha + b" " + ref + b"\n" 

918 else: 

919 return sha + b" " + ref + b"\0" + format_capability_line(capabilities) + b"\n" 

920 

921 

922def format_shallow_line(sha: bytes) -> bytes: 

923 """Format a shallow line. 

924 

925 Args: 

926 sha: SHA to mark as shallow 

927 

928 Returns: 

929 Formatted shallow line 

930 """ 

931 return COMMAND_SHALLOW + b" " + sha 

932 

933 

934def format_unshallow_line(sha: bytes) -> bytes: 

935 """Format an unshallow line. 

936 

937 Args: 

938 sha: SHA to unshallow 

939 

940 Returns: 

941 Formatted unshallow line 

942 """ 

943 return COMMAND_UNSHALLOW + b" " + sha 

944 

945 

946def format_ack_line(sha: bytes, ack_type: bytes = b"") -> bytes: 

947 """Format an ACK line. 

948 

949 Args: 

950 sha: SHA to acknowledge 

951 ack_type: Optional ACK type (e.g. b"continue") 

952 

953 Returns: 

954 Formatted ACK line 

955 """ 

956 if ack_type: 

957 ack_type = b" " + ack_type 

958 return b"ACK " + sha + ack_type + b"\n" 

959 

960 

961def strip_peeled_refs( 

962 refs: "Mapping[Ref, ObjectID | None]", 

963) -> "dict[Ref, ObjectID | None]": 

964 """Remove all peeled refs from a refs dictionary. 

965 

966 Args: 

967 refs: Dictionary of refs (may include peeled refs with ^{} suffix) 

968 

969 Returns: 

970 Dictionary with peeled refs removed 

971 """ 

972 return { 

973 ref: sha for (ref, sha) in refs.items() if not ref.endswith(PEELED_TAG_SUFFIX) 

974 } 

975 

976 

977def split_peeled_refs( 

978 refs: "Mapping[Ref, ObjectID]", 

979) -> "tuple[dict[Ref, ObjectID], dict[Ref, ObjectID]]": 

980 """Split peeled refs from regular refs. 

981 

982 Args: 

983 refs: Dictionary of refs (may include peeled refs with ^{} suffix) 

984 

985 Returns: 

986 Tuple of (regular_refs, peeled_refs) where peeled_refs keys have 

987 the ^{} suffix removed 

988 """ 

989 from .refs import Ref 

990 

991 peeled: dict[Ref, ObjectID] = {} 

992 regular = {k: v for k, v in refs.items() if not k.endswith(PEELED_TAG_SUFFIX)} 

993 

994 for ref, sha in refs.items(): 

995 if ref.endswith(PEELED_TAG_SUFFIX): 

996 # Peeled refs are always ObjectID values 

997 peeled[Ref(ref[: -len(PEELED_TAG_SUFFIX)])] = sha 

998 

999 return regular, peeled 

1000 

1001 

1002def write_info_refs( 

1003 refs: "Mapping[Ref, ObjectID]", store: "ObjectContainer" 

1004) -> "Iterator[bytes]": 

1005 """Generate info refs in the format used by the dumb HTTP protocol. 

1006 

1007 Args: 

1008 refs: Dictionary of refs 

1009 store: Object store to peel tags from 

1010 

1011 Yields: 

1012 Lines in info/refs format (sha + tab + refname) 

1013 """ 

1014 from .object_store import peel_sha 

1015 from .refs import HEADREF 

1016 

1017 for name, sha in sorted(refs.items()): 

1018 # get_refs() includes HEAD as a special case, but we don't want to 

1019 # advertise it 

1020 if name == HEADREF: 

1021 continue 

1022 try: 

1023 o = store[sha] 

1024 except KeyError: 

1025 continue 

1026 _unpeeled, peeled = peel_sha(store, sha) 

1027 yield o.id + b"\t" + name + b"\n" 

1028 if o.id != peeled.id: 

1029 yield peeled.id + b"\t" + name + PEELED_TAG_SUFFIX + b"\n" 

1030 

1031 

1032def serialize_refs( 

1033 store: "ObjectContainer", refs: "Mapping[Ref, ObjectID]" 

1034) -> "dict[bytes, ObjectID]": 

1035 """Serialize refs with peeled refs for Git protocol v0/v1. 

1036 

1037 This function is used to prepare refs for transmission over the Git protocol. 

1038 For tags, it includes both the tag object and the dereferenced object. 

1039 

1040 Args: 

1041 store: Object store to peel refs from 

1042 refs: Dictionary of ref names to SHAs 

1043 

1044 Returns: 

1045 Dictionary with refs and peeled refs (marked with ^{}) 

1046 """ 

1047 import warnings 

1048 

1049 from .object_store import peel_sha 

1050 from .objects import Tag 

1051 

1052 ret: dict[bytes, ObjectID] = {} 

1053 for ref, sha in refs.items(): 

1054 try: 

1055 unpeeled, peeled = peel_sha(store, ObjectID(sha)) 

1056 except KeyError: 

1057 warnings.warn( 

1058 "ref {} points at non-present sha {}".format( 

1059 ref.decode("utf-8", "replace"), sha.decode("ascii") 

1060 ), 

1061 UserWarning, 

1062 ) 

1063 continue 

1064 else: 

1065 if isinstance(unpeeled, Tag): 

1066 ret[ref + PEELED_TAG_SUFFIX] = peeled.id 

1067 ret[ref] = unpeeled.id 

1068 return ret