Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/protocol.py: 32%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

370 statements  

1# protocol.py -- Shared parts of the git protocols 

2# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk> 

3# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@jelmer.uk> 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23"""Generic functions for talking the git smart server protocol.""" 

24 

25__all__ = [ 

26 "CAPABILITIES_REF", 

27 "CAPABILITY_AGENT", 

28 "CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT", 

29 "CAPABILITY_ALLOW_TIP_SHA1_IN_WANT", 

30 "CAPABILITY_ATOMIC", 

31 "CAPABILITY_DEEPEN_NOT", 

32 "CAPABILITY_DEEPEN_RELATIVE", 

33 "CAPABILITY_DEEPEN_SINCE", 

34 "CAPABILITY_DELETE_REFS", 

35 "CAPABILITY_FETCH", 

36 "CAPABILITY_FILTER", 

37 "CAPABILITY_INCLUDE_TAG", 

38 "CAPABILITY_MULTI_ACK", 

39 "CAPABILITY_MULTI_ACK_DETAILED", 

40 "CAPABILITY_NO_DONE", 

41 "CAPABILITY_NO_PROGRESS", 

42 "CAPABILITY_OBJECT_FORMAT", 

43 "CAPABILITY_OFS_DELTA", 

44 "CAPABILITY_QUIET", 

45 "CAPABILITY_REPORT_STATUS", 

46 "CAPABILITY_SHALLOW", 

47 "CAPABILITY_SIDE_BAND", 

48 "CAPABILITY_SIDE_BAND_64K", 

49 "CAPABILITY_SYMREF", 

50 "CAPABILITY_THIN_PACK", 

51 "COMMAND_DEEPEN", 

52 "COMMAND_DEEPEN_NOT", 

53 "COMMAND_DEEPEN_SINCE", 

54 "COMMAND_DONE", 

55 "COMMAND_FILTER", 

56 "COMMAND_HAVE", 

57 "COMMAND_SHALLOW", 

58 "COMMAND_UNSHALLOW", 

59 "COMMAND_WANT", 

60 "COMMON_CAPABILITIES", 

61 "DEFAULT_GIT_PROTOCOL_VERSION_FETCH", 

62 "DEFAULT_GIT_PROTOCOL_VERSION_SEND", 

63 "DEPTH_INFINITE", 

64 "GIT_PROTOCOL_VERSIONS", 

65 "KNOWN_RECEIVE_CAPABILITIES", 

66 "KNOWN_UPLOAD_CAPABILITIES", 

67 "MULTI_ACK", 

68 "MULTI_ACK_DETAILED", 

69 "NAK_LINE", 

70 "PEELED_TAG_SUFFIX", 

71 "SIDE_BAND_CHANNEL_DATA", 

72 "SIDE_BAND_CHANNEL_FATAL", 

73 "SIDE_BAND_CHANNEL_PROGRESS", 

74 "SINGLE_ACK", 

75 "TCP_GIT_PORT", 

76 "BufferedPktLineWriter", 

77 "PktLineParser", 

78 "Protocol", 

79 "ReceivableProtocol", 

80 "ack_type", 

81 "agent_string", 

82 "capability_agent", 

83 "capability_object_format", 

84 "capability_symref", 

85 "extract_capabilities", 

86 "extract_capability_names", 

87 "extract_want_line_capabilities", 

88 "find_capability", 

89 "format_ack_line", 

90 "format_capability_line", 

91 "format_cmd_pkt", 

92 "format_ref_line", 

93 "format_shallow_line", 

94 "format_unshallow_line", 

95 "parse_capability", 

96 "parse_cmd_pkt", 

97 "pkt_line", 

98 "pkt_seq", 

99 "serialize_refs", 

100 "split_peeled_refs", 

101 "strip_peeled_refs", 

102 "symref_capabilities", 

103 "write_info_refs", 

104] 

105 

106import logging 

107import types 

108from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence 

109from io import BytesIO 

110from os import SEEK_END 

111from typing import TYPE_CHECKING 

112 

113import dulwich 

114 

115from .errors import GitProtocolError, HangupException 

116from .objects import ObjectID 

117 

118logger = logging.getLogger(__name__) 

119 

120if TYPE_CHECKING: 

121 from .pack import ObjectContainer 

122 from .refs import Ref 

123 

124TCP_GIT_PORT = 9418 

125 

126# Git protocol version 0 is the original Git protocol, which lacked a 

127# version number until Git protocol version 1 was introduced by Brandon 

128# Williams in 2017. 

129# 

130# Protocol version 1 is simply the original v0 protocol with the addition of 

131# a single packet line, which precedes the ref advertisement, indicating the 

132# protocol version being used. This was done in preparation for protocol v2. 

133# 

134# Git protocol version 2 was first introduced by Brandon Williams in 2018 and 

135# adds many features. See the gitprotocol-v2(5) manual page for details. 

136# As of 2024, Git only implements version 2 during 'git fetch' and still uses 

137# version 0 during 'git push'. 

138GIT_PROTOCOL_VERSIONS = [0, 1, 2] 

139DEFAULT_GIT_PROTOCOL_VERSION_FETCH = 2 

140DEFAULT_GIT_PROTOCOL_VERSION_SEND = 0 

141 

142# Suffix used in the Git protocol to indicate peeled tag references 

143PEELED_TAG_SUFFIX = b"^{}" 

144 

145ZERO_SHA: ObjectID = ObjectID(b"0" * 40) 

146 

147SINGLE_ACK = 0 

148MULTI_ACK = 1 

149MULTI_ACK_DETAILED = 2 

150 

151# pack data 

152SIDE_BAND_CHANNEL_DATA = 1 

153# progress messages 

154SIDE_BAND_CHANNEL_PROGRESS = 2 

155# fatal error message just before stream aborts 

156SIDE_BAND_CHANNEL_FATAL = 3 

157 

158CAPABILITY_ATOMIC = b"atomic" 

159CAPABILITY_DEEPEN_SINCE = b"deepen-since" 

160CAPABILITY_DEEPEN_NOT = b"deepen-not" 

161CAPABILITY_DEEPEN_RELATIVE = b"deepen-relative" 

162CAPABILITY_DELETE_REFS = b"delete-refs" 

163CAPABILITY_INCLUDE_TAG = b"include-tag" 

164CAPABILITY_MULTI_ACK = b"multi_ack" 

165CAPABILITY_MULTI_ACK_DETAILED = b"multi_ack_detailed" 

166CAPABILITY_NO_DONE = b"no-done" 

167CAPABILITY_NO_PROGRESS = b"no-progress" 

168CAPABILITY_OFS_DELTA = b"ofs-delta" 

169CAPABILITY_QUIET = b"quiet" 

170CAPABILITY_REPORT_STATUS = b"report-status" 

171CAPABILITY_SHALLOW = b"shallow" 

172CAPABILITY_SIDE_BAND = b"side-band" 

173CAPABILITY_SIDE_BAND_64K = b"side-band-64k" 

174CAPABILITY_THIN_PACK = b"thin-pack" 

175CAPABILITY_AGENT = b"agent" 

176CAPABILITY_SYMREF = b"symref" 

177CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want" 

178CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want" 

179CAPABILITY_FETCH = b"fetch" 

180CAPABILITY_FILTER = b"filter" 

181CAPABILITY_OBJECT_FORMAT = b"object-format" 

182CAPABILITY_PACKFILE_URIS = b"packfile-uris" 

183CAPABILITY_PUSH_OPTIONS = b"push-options" 

184 

185# Magic ref that is used to attach capabilities to when 

186# there are no refs. Should always be ste to ZERO_SHA. 

187CAPABILITIES_REF = b"capabilities^{}" 

188 

189COMMON_CAPABILITIES = [ 

190 CAPABILITY_OFS_DELTA, 

191 CAPABILITY_SIDE_BAND, 

192 CAPABILITY_SIDE_BAND_64K, 

193 CAPABILITY_AGENT, 

194 CAPABILITY_NO_PROGRESS, 

195] 

196KNOWN_UPLOAD_CAPABILITIES = set( 

197 [ 

198 *COMMON_CAPABILITIES, 

199 CAPABILITY_THIN_PACK, 

200 CAPABILITY_MULTI_ACK, 

201 CAPABILITY_MULTI_ACK_DETAILED, 

202 CAPABILITY_INCLUDE_TAG, 

203 CAPABILITY_DEEPEN_SINCE, 

204 CAPABILITY_SYMREF, 

205 CAPABILITY_SHALLOW, 

206 CAPABILITY_DEEPEN_NOT, 

207 CAPABILITY_DEEPEN_RELATIVE, 

208 CAPABILITY_ALLOW_TIP_SHA1_IN_WANT, 

209 CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT, 

210 CAPABILITY_FETCH, 

211 CAPABILITY_FILTER, 

212 CAPABILITY_PACKFILE_URIS, 

213 ] 

214) 

215KNOWN_RECEIVE_CAPABILITIES = set( 

216 [ 

217 *COMMON_CAPABILITIES, 

218 CAPABILITY_REPORT_STATUS, 

219 CAPABILITY_DELETE_REFS, 

220 CAPABILITY_QUIET, 

221 CAPABILITY_ATOMIC, 

222 CAPABILITY_PUSH_OPTIONS, 

223 ] 

224) 

225 

226DEPTH_INFINITE = 0x7FFFFFFF 

227 

228NAK_LINE = b"NAK\n" 

229 

230 

231def agent_string() -> bytes: 

232 """Generate the agent string for dulwich. 

233 

234 Returns: 

235 Agent string as bytes 

236 """ 

237 return ("dulwich/" + ".".join(map(str, dulwich.__version__))).encode("ascii") 

238 

239 

240def capability_agent() -> bytes: 

241 """Generate the agent capability string. 

242 

243 Returns: 

244 Agent capability with dulwich version 

245 """ 

246 return CAPABILITY_AGENT + b"=" + agent_string() 

247 

248 

249def capability_object_format(fmt: str) -> bytes: 

250 """Generate the object-format capability string. 

251 

252 Args: 

253 fmt: Object format name (e.g., "sha1" or "sha256") 

254 

255 Returns: 

256 Object-format capability with format name 

257 """ 

258 return CAPABILITY_OBJECT_FORMAT + b"=" + fmt.encode("ascii") 

259 

260 

261def capability_symref(from_ref: bytes, to_ref: bytes) -> bytes: 

262 """Generate a symref capability string. 

263 

264 Args: 

265 from_ref: Source reference name 

266 to_ref: Target reference name 

267 

268 Returns: 

269 Symref capability string 

270 """ 

271 return CAPABILITY_SYMREF + b"=" + from_ref + b":" + to_ref 

272 

273 

274def extract_capability_names(capabilities: Iterable[bytes]) -> set[bytes]: 

275 """Extract capability names from a list of capabilities. 

276 

277 Args: 

278 capabilities: List of capability strings 

279 

280 Returns: 

281 Set of capability names 

282 """ 

283 return {parse_capability(c)[0] for c in capabilities} 

284 

285 

286def parse_capability(capability: bytes) -> tuple[bytes, bytes | None]: 

287 """Parse a capability string into name and value. 

288 

289 Args: 

290 capability: Capability string 

291 

292 Returns: 

293 Tuple of (capability_name, capability_value) 

294 """ 

295 parts = capability.split(b"=", 1) 

296 if len(parts) == 1: 

297 return (parts[0], None) 

298 return (parts[0], parts[1]) 

299 

300 

301def symref_capabilities(symrefs: Iterable[tuple[bytes, bytes]]) -> list[bytes]: 

302 """Generate symref capability strings from symref pairs. 

303 

304 Args: 

305 symrefs: Iterable of (from_ref, to_ref) tuples 

306 

307 Returns: 

308 List of symref capability strings 

309 """ 

310 return [capability_symref(*k) for k in symrefs] 

311 

312 

313COMMAND_DEEPEN = b"deepen" 

314COMMAND_DEEPEN_SINCE = b"deepen-since" 

315COMMAND_DEEPEN_NOT = b"deepen-not" 

316COMMAND_SHALLOW = b"shallow" 

317COMMAND_UNSHALLOW = b"unshallow" 

318COMMAND_DONE = b"done" 

319COMMAND_WANT = b"want" 

320COMMAND_HAVE = b"have" 

321COMMAND_FILTER = b"filter" 

322 

323 

324def format_cmd_pkt(cmd: bytes, *args: bytes) -> bytes: 

325 """Format a command packet. 

326 

327 Args: 

328 cmd: Command name 

329 *args: Command arguments 

330 

331 Returns: 

332 Formatted command packet 

333 """ 

334 return cmd + b" " + b"".join([(a + b"\0") for a in args]) 

335 

336 

337def parse_cmd_pkt(line: bytes) -> tuple[bytes, list[bytes]]: 

338 """Parse a command packet. 

339 

340 Args: 

341 line: Command line to parse 

342 

343 Returns: 

344 Tuple of (command, [arguments]) 

345 """ 

346 splice_at = line.find(b" ") 

347 cmd, args = line[:splice_at], line[splice_at + 1 :] 

348 assert args[-1:] == b"\x00" 

349 return cmd, args[:-1].split(b"\0") 

350 

351 

352def pkt_line(data: bytes | None) -> bytes: 

353 """Wrap data in a pkt-line. 

354 

355 Args: 

356 data: The data to wrap, as a str or None. 

357 Returns: The data prefixed with its length in pkt-line format; if data was 

358 None, returns the flush-pkt ('0000'). 

359 """ 

360 if data is None: 

361 return b"0000" 

362 return f"{len(data) + 4:04x}".encode("ascii") + data 

363 

364 

365def pkt_seq(*seq: bytes | None) -> bytes: 

366 """Wrap a sequence of data in pkt-lines. 

367 

368 Args: 

369 seq: An iterable of strings to wrap. 

370 """ 

371 return b"".join([pkt_line(s) for s in seq]) + pkt_line(None) 

372 

373 

374class Protocol: 

375 """Class for interacting with a remote git process over the wire. 

376 

377 Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line 

378 consists of the length of the line as a 4-byte hex string, followed by the 

379 payload data. The length includes the 4-byte header. The special line 

380 '0000' indicates the end of a section of input and is called a 'flush-pkt'. 

381 

382 For details on the pkt-line format, see the cgit distribution: 

383 Documentation/technical/protocol-common.txt 

384 """ 

385 

386 def __init__( 

387 self, 

388 read: Callable[[int], bytes], 

389 write: Callable[[bytes], int | None], 

390 close: Callable[[], None] | None = None, 

391 report_activity: Callable[[int, str], None] | None = None, 

392 ) -> None: 

393 """Initialize Protocol. 

394 

395 Args: 

396 read: Function to read bytes from the transport 

397 write: Function to write bytes to the transport 

398 close: Optional function to close the transport 

399 report_activity: Optional function to report activity 

400 """ 

401 self.read = read 

402 self.write = write 

403 self._close = close 

404 self.report_activity = report_activity 

405 self._readahead: BytesIO | None = None 

406 

407 def close(self) -> None: 

408 """Close the underlying transport if a close function was provided.""" 

409 if self._close: 

410 self._close() 

411 self._close = None # Prevent double-close 

412 

413 def __del__(self) -> None: 

414 """Ensure transport is closed when Protocol is garbage collected.""" 

415 if self._close is not None: 

416 import warnings 

417 

418 warnings.warn( 

419 f"unclosed Protocol {self!r}", 

420 ResourceWarning, 

421 stacklevel=2, 

422 source=self, 

423 ) 

424 try: 

425 self.close() 

426 except Exception: 

427 # Ignore errors during cleanup 

428 pass 

429 

430 def __enter__(self) -> "Protocol": 

431 """Enter context manager.""" 

432 return self 

433 

434 def __exit__( 

435 self, 

436 exc_type: type[BaseException] | None, 

437 exc_val: BaseException | None, 

438 exc_tb: types.TracebackType | None, 

439 ) -> None: 

440 """Exit context manager and close transport.""" 

441 self.close() 

442 

443 def read_pkt_line(self) -> bytes | None: 

444 """Reads a pkt-line from the remote git process. 

445 

446 This method may read from the readahead buffer; see unread_pkt_line. 

447 

448 Returns: The next string from the stream, without the length prefix, or 

449 None for a flush-pkt ('0000') or delim-pkt ('0001'). 

450 """ 

451 if self._readahead is None: 

452 read = self.read 

453 else: 

454 read = self._readahead.read 

455 self._readahead = None 

456 

457 try: 

458 sizestr = read(4) 

459 if not sizestr: 

460 raise HangupException 

461 size = int(sizestr, 16) 

462 if size == 0 or size == 1: # flush-pkt or delim-pkt 

463 if self.report_activity: 

464 self.report_activity(4, "read") 

465 logger.debug("git< %s", sizestr.decode("ascii")) 

466 return None 

467 if self.report_activity: 

468 self.report_activity(size, "read") 

469 pkt_contents = read(size - 4) 

470 except ConnectionResetError as exc: 

471 raise HangupException from exc 

472 except OSError as exc: 

473 raise GitProtocolError(str(exc)) from exc 

474 else: 

475 if len(pkt_contents) + 4 != size: 

476 raise GitProtocolError( 

477 f"Length of pkt read {len(pkt_contents) + 4:04x} does not match length prefix {size:04x}" 

478 ) 

479 # Log the packet contents (truncate if too long for readability) 

480 if len(pkt_contents) > 80: 

481 logger.debug( 

482 "git< %s... (%d bytes)", pkt_contents[:80], len(pkt_contents) 

483 ) 

484 else: 

485 logger.debug("git< %s", pkt_contents) 

486 return pkt_contents 

487 

488 def eof(self) -> bool: 

489 """Test whether the protocol stream has reached EOF. 

490 

491 Note that this refers to the actual stream EOF and not just a 

492 flush-pkt. 

493 

494 Returns: True if the stream is at EOF, False otherwise. 

495 """ 

496 try: 

497 next_line = self.read_pkt_line() 

498 except HangupException: 

499 return True 

500 self.unread_pkt_line(next_line) 

501 return False 

502 

503 def unread_pkt_line(self, data: bytes | None) -> None: 

504 """Unread a single line of data into the readahead buffer. 

505 

506 This method can be used to unread a single pkt-line into a fixed 

507 readahead buffer. 

508 

509 Args: 

510 data: The data to unread, without the length prefix. 

511 

512 Raises: 

513 ValueError: If more than one pkt-line is unread. 

514 """ 

515 if self._readahead is not None: 

516 raise ValueError("Attempted to unread multiple pkt-lines.") 

517 self._readahead = BytesIO(pkt_line(data)) 

518 

519 def read_pkt_seq(self) -> Iterable[bytes]: 

520 """Read a sequence of pkt-lines from the remote git process. 

521 

522 Returns: Yields each line of data up to but not including the next 

523 flush-pkt. 

524 """ 

525 pkt = self.read_pkt_line() 

526 while pkt: 

527 yield pkt 

528 pkt = self.read_pkt_line() 

529 

530 def write_pkt_line(self, line: bytes | None) -> None: 

531 """Sends a pkt-line to the remote git process. 

532 

533 Args: 

534 line: A string containing the data to send, without the length 

535 prefix. 

536 """ 

537 try: 

538 # Log before converting to pkt format 

539 if line is None: 

540 logger.debug("git> 0000") 

541 elif len(line) > 80: 

542 logger.debug("git> %s... (%d bytes)", line[:80], len(line)) 

543 else: 

544 logger.debug("git> %s", line) 

545 

546 line = pkt_line(line) 

547 self.write(line) 

548 if self.report_activity: 

549 self.report_activity(len(line), "write") 

550 except OSError as exc: 

551 raise GitProtocolError(str(exc)) from exc 

552 

553 def write_sideband(self, channel: int, blob: bytes) -> None: 

554 """Write multiplexed data to the sideband. 

555 

556 Args: 

557 channel: An int specifying the channel to write to. 

558 blob: A blob of data (as a string) to send on this channel. 

559 """ 

560 # a pktline can be a max of 65520. a sideband line can therefore be 

561 # 65520-5 = 65515 

562 # WTF: Why have the len in ASCII, but the channel in binary. 

563 while blob: 

564 self.write_pkt_line(bytes(bytearray([channel])) + blob[:65515]) 

565 blob = blob[65515:] 

566 

567 def send_cmd(self, cmd: bytes, *args: bytes) -> None: 

568 """Send a command and some arguments to a git server. 

569 

570 Only used for the TCP git protocol (git://). 

571 

572 Args: 

573 cmd: The remote service to access. 

574 args: List of arguments to send to remove service. 

575 """ 

576 self.write_pkt_line(format_cmd_pkt(cmd, *args)) 

577 

578 def read_cmd(self) -> tuple[bytes, list[bytes]]: 

579 """Read a command and some arguments from the git client. 

580 

581 Only used for the TCP git protocol (git://). 

582 

583 Returns: A tuple of (command, [list of arguments]). 

584 """ 

585 line = self.read_pkt_line() 

586 if line is None: 

587 raise GitProtocolError("Expected command, got flush packet") 

588 return parse_cmd_pkt(line) 

589 

590 

591_RBUFSIZE = 65536 # 64KB buffer for better network I/O performance 

592 

593 

594class ReceivableProtocol(Protocol): 

595 """Variant of Protocol that allows reading up to a size without blocking. 

596 

597 This class has a recv() method that behaves like socket.recv() in addition 

598 to a read() method. 

599 

600 If you want to read n bytes from the wire and block until exactly n bytes 

601 (or EOF) are read, use read(n). If you want to read at most n bytes from 

602 the wire but don't care if you get less, use recv(n). Note that recv(n) 

603 will still block until at least one byte is read. 

604 """ 

605 

606 def __init__( 

607 self, 

608 recv: Callable[[int], bytes], 

609 write: Callable[[bytes], int | None], 

610 close: Callable[[], None] | None = None, 

611 report_activity: Callable[[int, str], None] | None = None, 

612 rbufsize: int = _RBUFSIZE, 

613 ) -> None: 

614 """Initialize ReceivableProtocol. 

615 

616 Args: 

617 recv: Function to receive bytes from the transport 

618 write: Function to write bytes to the transport 

619 close: Optional function to close the transport 

620 report_activity: Optional function to report activity 

621 rbufsize: Read buffer size 

622 """ 

623 super().__init__(self.read, write, close=close, report_activity=report_activity) 

624 self._recv = recv 

625 self._rbuf = BytesIO() 

626 self._rbufsize = rbufsize 

627 

628 def read(self, size: int) -> bytes: 

629 """Read bytes from the socket. 

630 

631 Args: 

632 size: Number of bytes to read 

633 

634 Returns: 

635 Bytes read from socket 

636 """ 

637 # From _fileobj.read in socket.py in the Python 2.6.5 standard library, 

638 # with the following modifications: 

639 # - omit the size <= 0 branch 

640 # - seek back to start rather than 0 in case some buffer has been 

641 # consumed. 

642 # - use SEEK_END instead of the magic number. 

643 # Copyright (c) 2001-2010 Python Software Foundation; All Rights 

644 # Reserved 

645 # Licensed under the Python Software Foundation License. 

646 # TODO: see if buffer is more efficient than cBytesIO. 

647 assert size > 0 

648 

649 # Our use of BytesIO rather than lists of string objects returned by 

650 # recv() minimizes memory usage and fragmentation that occurs when 

651 # rbufsize is large compared to the typical return value of recv(). 

652 buf = self._rbuf 

653 start = buf.tell() 

654 buf.seek(0, SEEK_END) 

655 # buffer may have been partially consumed by recv() 

656 buf_len = buf.tell() - start 

657 if buf_len >= size: 

658 # Already have size bytes in our buffer? Extract and return. 

659 buf.seek(start) 

660 rv = buf.read(size) 

661 self._rbuf = BytesIO() 

662 self._rbuf.write(buf.read()) 

663 self._rbuf.seek(0) 

664 return rv 

665 

666 self._rbuf = BytesIO() # reset _rbuf. we consume it via buf. 

667 while True: 

668 left = size - buf_len 

669 # recv() will malloc the amount of memory given as its 

670 # parameter even though it often returns much less data 

671 # than that. The returned data string is short lived 

672 # as we copy it into a BytesIO and free it. This avoids 

673 # fragmentation issues on many platforms. 

674 data = self._recv(left) 

675 if not data: 

676 break 

677 n = len(data) 

678 if n == size and not buf_len: 

679 # Shortcut. Avoid buffer data copies when: 

680 # - We have no data in our buffer. 

681 # AND 

682 # - Our call to recv returned exactly the 

683 # number of bytes we were asked to read. 

684 return data 

685 if n == left: 

686 buf.write(data) 

687 del data # explicit free 

688 break 

689 assert n <= left, f"_recv({left}) returned {n} bytes" 

690 buf.write(data) 

691 buf_len += n 

692 del data # explicit free 

693 # assert buf_len == buf.tell() 

694 buf.seek(start) 

695 return buf.read() 

696 

697 def recv(self, size: int) -> bytes: 

698 """Receive bytes from the socket with buffering. 

699 

700 Args: 

701 size: Maximum number of bytes to receive 

702 

703 Returns: 

704 Bytes received from socket 

705 """ 

706 assert size > 0 

707 

708 buf = self._rbuf 

709 start = buf.tell() 

710 buf.seek(0, SEEK_END) 

711 buf_len = buf.tell() 

712 buf.seek(start) 

713 

714 left = buf_len - start 

715 if not left: 

716 # only read from the wire if our read buffer is exhausted 

717 data = self._recv(self._rbufsize) 

718 if len(data) == size: 

719 # shortcut: skip the buffer if we read exactly size bytes 

720 return data 

721 buf = BytesIO() 

722 buf.write(data) 

723 buf.seek(0) 

724 del data # explicit free 

725 self._rbuf = buf 

726 return buf.read(size) 

727 

728 

729def extract_capabilities(text: bytes) -> tuple[bytes, list[bytes]]: 

730 """Extract a capabilities list from a string, if present. 

731 

732 Args: 

733 text: String to extract from 

734 Returns: Tuple with text with capabilities removed and list of capabilities 

735 """ 

736 if b"\0" not in text: 

737 return text, [] 

738 text, capabilities = text.rstrip().split(b"\0") 

739 return (text, capabilities.strip().split(b" ")) 

740 

741 

742def extract_want_line_capabilities(text: bytes) -> tuple[bytes, list[bytes]]: 

743 """Extract a capabilities list from a want line, if present. 

744 

745 Note that want lines have capabilities separated from the rest of the line 

746 by a space instead of a null byte. Thus want lines have the form: 

747 

748 want obj-id cap1 cap2 ... 

749 

750 Args: 

751 text: Want line to extract from 

752 Returns: Tuple with text with capabilities removed and list of capabilities 

753 """ 

754 split_text = text.rstrip().split(b" ") 

755 if len(split_text) < 3: 

756 return text, [] 

757 return (b" ".join(split_text[:2]), split_text[2:]) 

758 

759 

760def ack_type(capabilities: Iterable[bytes]) -> int: 

761 """Extract the ack type from a capabilities list.""" 

762 if b"multi_ack_detailed" in capabilities: 

763 return MULTI_ACK_DETAILED 

764 elif b"multi_ack" in capabilities: 

765 return MULTI_ACK 

766 return SINGLE_ACK 

767 

768 

769def find_capability( 

770 capabilities: Iterable[bytes], *capability_names: bytes 

771) -> bytes | None: 

772 """Find a capability value in a list of capabilities. 

773 

774 This function looks for capabilities that may include arguments after an equals sign 

775 and returns only the value part (after the '='). For capabilities without values, 

776 returns the capability name itself. 

777 

778 Args: 

779 capabilities: List of capability strings 

780 capability_names: Capability name(s) to search for 

781 

782 Returns: 

783 The value after '=' if found, or the capability name if no '=', or None if not found 

784 

785 Example: 

786 >>> caps = [b'filter=blob:none', b'agent=git/2.0', b'thin-pack'] 

787 >>> find_capability(caps, b'filter') 

788 b'blob:none' 

789 >>> find_capability(caps, b'thin-pack') 

790 b'thin-pack' 

791 >>> find_capability(caps, b'missing') 

792 None 

793 """ 

794 for cap in capabilities: 

795 for name in capability_names: 

796 if cap == name: 

797 return cap 

798 elif cap.startswith(name + b"="): 

799 return cap[len(name) + 1 :] 

800 return None 

801 

802 

803class BufferedPktLineWriter: 

804 """Writer that wraps its data in pkt-lines and has an independent buffer. 

805 

806 Consecutive calls to write() wrap the data in a pkt-line and then buffers 

807 it until enough lines have been written such that their total length 

808 (including length prefix) reach the buffer size. 

809 """ 

810 

811 def __init__( 

812 self, write: Callable[[bytes], int | None], bufsize: int = 65515 

813 ) -> None: 

814 """Initialize the BufferedPktLineWriter. 

815 

816 Args: 

817 write: A write callback for the underlying writer. 

818 bufsize: The internal buffer size, including length prefixes. 

819 """ 

820 self._write = write 

821 self._bufsize = bufsize 

822 self._wbuf = BytesIO() 

823 self._buflen = 0 

824 

825 def write(self, data: bytes) -> None: 

826 """Write data, wrapping it in a pkt-line.""" 

827 line = pkt_line(data) 

828 line_len = len(line) 

829 over = self._buflen + line_len - self._bufsize 

830 if over >= 0: 

831 start = line_len - over 

832 self._wbuf.write(line[:start]) 

833 self.flush() 

834 else: 

835 start = 0 

836 saved = line[start:] 

837 self._wbuf.write(saved) 

838 self._buflen += len(saved) 

839 

840 def flush(self) -> None: 

841 """Flush all data from the buffer.""" 

842 data = self._wbuf.getvalue() 

843 if data: 

844 self._write(data) 

845 self._len = 0 

846 self._wbuf = BytesIO() 

847 

848 

849class PktLineParser: 

850 """Packet line parser that hands completed packets off to a callback.""" 

851 

852 def __init__(self, handle_pkt: Callable[[bytes | None], None]) -> None: 

853 """Initialize PktLineParser. 

854 

855 Args: 

856 handle_pkt: Callback function to handle completed packets 

857 """ 

858 self.handle_pkt = handle_pkt 

859 self._readahead = BytesIO() 

860 

861 def parse(self, data: bytes) -> None: 

862 """Parse a fragment of data and call back for any completed packets.""" 

863 self._readahead.write(data) 

864 buf = self._readahead.getvalue() 

865 if len(buf) < 4: 

866 return 

867 while len(buf) >= 4: 

868 size = int(buf[:4], 16) 

869 if size == 0: 

870 self.handle_pkt(None) 

871 buf = buf[4:] 

872 elif size <= len(buf): 

873 self.handle_pkt(buf[4:size]) 

874 buf = buf[size:] 

875 else: 

876 break 

877 self._readahead = BytesIO() 

878 self._readahead.write(buf) 

879 

880 def get_tail(self) -> bytes: 

881 """Read back any unused data.""" 

882 return self._readahead.getvalue() 

883 

884 

885def format_capability_line(capabilities: Iterable[bytes]) -> bytes: 

886 """Format a capabilities list for the wire protocol. 

887 

888 Args: 

889 capabilities: List of capability strings 

890 

891 Returns: 

892 Space-separated capabilities as bytes 

893 """ 

894 return b"".join([b" " + c for c in capabilities]) 

895 

896 

897def format_ref_line( 

898 ref: bytes, sha: bytes, capabilities: Sequence[bytes] | None = None 

899) -> bytes: 

900 """Format a ref advertisement line. 

901 

902 Args: 

903 ref: Reference name 

904 sha: SHA hash 

905 capabilities: Optional list of capabilities 

906 

907 Returns: 

908 Formatted ref line 

909 """ 

910 if capabilities is None: 

911 return sha + b" " + ref + b"\n" 

912 else: 

913 return sha + b" " + ref + b"\0" + format_capability_line(capabilities) + b"\n" 

914 

915 

916def format_shallow_line(sha: bytes) -> bytes: 

917 """Format a shallow line. 

918 

919 Args: 

920 sha: SHA to mark as shallow 

921 

922 Returns: 

923 Formatted shallow line 

924 """ 

925 return COMMAND_SHALLOW + b" " + sha 

926 

927 

928def format_unshallow_line(sha: bytes) -> bytes: 

929 """Format an unshallow line. 

930 

931 Args: 

932 sha: SHA to unshallow 

933 

934 Returns: 

935 Formatted unshallow line 

936 """ 

937 return COMMAND_UNSHALLOW + b" " + sha 

938 

939 

940def format_ack_line(sha: bytes, ack_type: bytes = b"") -> bytes: 

941 """Format an ACK line. 

942 

943 Args: 

944 sha: SHA to acknowledge 

945 ack_type: Optional ACK type (e.g. b"continue") 

946 

947 Returns: 

948 Formatted ACK line 

949 """ 

950 if ack_type: 

951 ack_type = b" " + ack_type 

952 return b"ACK " + sha + ack_type + b"\n" 

953 

954 

955def strip_peeled_refs( 

956 refs: "Mapping[Ref, ObjectID | None]", 

957) -> "dict[Ref, ObjectID | None]": 

958 """Remove all peeled refs from a refs dictionary. 

959 

960 Args: 

961 refs: Dictionary of refs (may include peeled refs with ^{} suffix) 

962 

963 Returns: 

964 Dictionary with peeled refs removed 

965 """ 

966 return { 

967 ref: sha for (ref, sha) in refs.items() if not ref.endswith(PEELED_TAG_SUFFIX) 

968 } 

969 

970 

971def split_peeled_refs( 

972 refs: "Mapping[Ref, ObjectID]", 

973) -> "tuple[dict[Ref, ObjectID], dict[Ref, ObjectID]]": 

974 """Split peeled refs from regular refs. 

975 

976 Args: 

977 refs: Dictionary of refs (may include peeled refs with ^{} suffix) 

978 

979 Returns: 

980 Tuple of (regular_refs, peeled_refs) where peeled_refs keys have 

981 the ^{} suffix removed 

982 """ 

983 from .refs import Ref 

984 

985 peeled: dict[Ref, ObjectID] = {} 

986 regular = {k: v for k, v in refs.items() if not k.endswith(PEELED_TAG_SUFFIX)} 

987 

988 for ref, sha in refs.items(): 

989 if ref.endswith(PEELED_TAG_SUFFIX): 

990 # Peeled refs are always ObjectID values 

991 peeled[Ref(ref[: -len(PEELED_TAG_SUFFIX)])] = sha 

992 

993 return regular, peeled 

994 

995 

996def write_info_refs( 

997 refs: "Mapping[Ref, ObjectID]", store: "ObjectContainer" 

998) -> "Iterator[bytes]": 

999 """Generate info refs in the format used by the dumb HTTP protocol. 

1000 

1001 Args: 

1002 refs: Dictionary of refs 

1003 store: Object store to peel tags from 

1004 

1005 Yields: 

1006 Lines in info/refs format (sha + tab + refname) 

1007 """ 

1008 from .object_store import peel_sha 

1009 from .refs import HEADREF 

1010 

1011 for name, sha in sorted(refs.items()): 

1012 # get_refs() includes HEAD as a special case, but we don't want to 

1013 # advertise it 

1014 if name == HEADREF: 

1015 continue 

1016 try: 

1017 o = store[sha] 

1018 except KeyError: 

1019 continue 

1020 _unpeeled, peeled = peel_sha(store, sha) 

1021 yield o.id + b"\t" + name + b"\n" 

1022 if o.id != peeled.id: 

1023 yield peeled.id + b"\t" + name + PEELED_TAG_SUFFIX + b"\n" 

1024 

1025 

1026def serialize_refs( 

1027 store: "ObjectContainer", refs: "Mapping[Ref, ObjectID]" 

1028) -> "dict[bytes, ObjectID]": 

1029 """Serialize refs with peeled refs for Git protocol v0/v1. 

1030 

1031 This function is used to prepare refs for transmission over the Git protocol. 

1032 For tags, it includes both the tag object and the dereferenced object. 

1033 

1034 Args: 

1035 store: Object store to peel refs from 

1036 refs: Dictionary of ref names to SHAs 

1037 

1038 Returns: 

1039 Dictionary with refs and peeled refs (marked with ^{}) 

1040 """ 

1041 import warnings 

1042 

1043 from .object_store import peel_sha 

1044 from .objects import Tag 

1045 

1046 ret: dict[bytes, ObjectID] = {} 

1047 for ref, sha in refs.items(): 

1048 try: 

1049 unpeeled, peeled = peel_sha(store, ObjectID(sha)) 

1050 except KeyError: 

1051 warnings.warn( 

1052 "ref {} points at non-present sha {}".format( 

1053 ref.decode("utf-8", "replace"), sha.decode("ascii") 

1054 ), 

1055 UserWarning, 

1056 ) 

1057 continue 

1058 else: 

1059 if isinstance(unpeeled, Tag): 

1060 ret[ref + PEELED_TAG_SUFFIX] = peeled.id 

1061 ret[ref] = unpeeled.id 

1062 return ret