Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/protocol.py: 32%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

336 statements  

1# protocol.py -- Shared parts of the git protocols 

2# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk> 

3# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@jelmer.uk> 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as published by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23"""Generic functions for talking the git smart server protocol.""" 

24 

25__all__ = [ 

26 "CAPABILITIES_REF", 

27 "CAPABILITY_AGENT", 

28 "CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT", 

29 "CAPABILITY_ALLOW_TIP_SHA1_IN_WANT", 

30 "CAPABILITY_ATOMIC", 

31 "CAPABILITY_DEEPEN_NOT", 

32 "CAPABILITY_DEEPEN_RELATIVE", 

33 "CAPABILITY_DEEPEN_SINCE", 

34 "CAPABILITY_DELETE_REFS", 

35 "CAPABILITY_FETCH", 

36 "CAPABILITY_FILTER", 

37 "CAPABILITY_INCLUDE_TAG", 

38 "CAPABILITY_MULTI_ACK", 

39 "CAPABILITY_MULTI_ACK_DETAILED", 

40 "CAPABILITY_NO_DONE", 

41 "CAPABILITY_NO_PROGRESS", 

42 "CAPABILITY_OFS_DELTA", 

43 "CAPABILITY_QUIET", 

44 "CAPABILITY_REPORT_STATUS", 

45 "CAPABILITY_SHALLOW", 

46 "CAPABILITY_SIDE_BAND", 

47 "CAPABILITY_SIDE_BAND_64K", 

48 "CAPABILITY_SYMREF", 

49 "CAPABILITY_THIN_PACK", 

50 "COMMAND_DEEPEN", 

51 "COMMAND_DEEPEN_NOT", 

52 "COMMAND_DEEPEN_SINCE", 

53 "COMMAND_DONE", 

54 "COMMAND_HAVE", 

55 "COMMAND_SHALLOW", 

56 "COMMAND_UNSHALLOW", 

57 "COMMAND_WANT", 

58 "COMMON_CAPABILITIES", 

59 "DEFAULT_GIT_PROTOCOL_VERSION_FETCH", 

60 "DEFAULT_GIT_PROTOCOL_VERSION_SEND", 

61 "DEPTH_INFINITE", 

62 "GIT_PROTOCOL_VERSIONS", 

63 "KNOWN_RECEIVE_CAPABILITIES", 

64 "KNOWN_UPLOAD_CAPABILITIES", 

65 "MULTI_ACK", 

66 "MULTI_ACK_DETAILED", 

67 "NAK_LINE", 

68 "PEELED_TAG_SUFFIX", 

69 "SIDE_BAND_CHANNEL_DATA", 

70 "SIDE_BAND_CHANNEL_FATAL", 

71 "SIDE_BAND_CHANNEL_PROGRESS", 

72 "SINGLE_ACK", 

73 "TCP_GIT_PORT", 

74 "BufferedPktLineWriter", 

75 "PktLineParser", 

76 "Protocol", 

77 "ReceivableProtocol", 

78 "ack_type", 

79 "agent_string", 

80 "capability_agent", 

81 "capability_symref", 

82 "extract_capabilities", 

83 "extract_capability_names", 

84 "extract_want_line_capabilities", 

85 "format_ack_line", 

86 "format_capability_line", 

87 "format_cmd_pkt", 

88 "format_ref_line", 

89 "format_shallow_line", 

90 "format_unshallow_line", 

91 "parse_capability", 

92 "parse_cmd_pkt", 

93 "pkt_line", 

94 "pkt_seq", 

95 "serialize_refs", 

96 "split_peeled_refs", 

97 "strip_peeled_refs", 

98 "symref_capabilities", 

99 "write_info_refs", 

100] 

101 

102import types 

103from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence 

104from io import BytesIO 

105from os import SEEK_END 

106from typing import TYPE_CHECKING 

107 

108import dulwich 

109 

110from .errors import GitProtocolError, HangupException 

111from .objects import ObjectID 

112 

113if TYPE_CHECKING: 

114 from .pack import ObjectContainer 

115 from .refs import Ref 

116 

117TCP_GIT_PORT = 9418 

118 

119# Git protocol version 0 is the original Git protocol, which lacked a 

120# version number until Git protocol version 1 was introduced by Brandon 

121# Williams in 2017. 

122# 

123# Protocol version 1 is simply the original v0 protocol with the addition of 

124# a single packet line, which precedes the ref advertisement, indicating the 

125# protocol version being used. This was done in preparation for protocol v2. 

126# 

127# Git protocol version 2 was first introduced by Brandon Williams in 2018 and 

128# adds many features. See the gitprotocol-v2(5) manual page for details. 

129# As of 2024, Git only implements version 2 during 'git fetch' and still uses 

130# version 0 during 'git push'. 

131GIT_PROTOCOL_VERSIONS = [0, 1, 2] 

132DEFAULT_GIT_PROTOCOL_VERSION_FETCH = 2 

133DEFAULT_GIT_PROTOCOL_VERSION_SEND = 0 

134 

135# Suffix used in the Git protocol to indicate peeled tag references 

136PEELED_TAG_SUFFIX = b"^{}" 

137 

138ZERO_SHA: ObjectID = ObjectID(b"0" * 40) 

139 

140SINGLE_ACK = 0 

141MULTI_ACK = 1 

142MULTI_ACK_DETAILED = 2 

143 

144# pack data 

145SIDE_BAND_CHANNEL_DATA = 1 

146# progress messages 

147SIDE_BAND_CHANNEL_PROGRESS = 2 

148# fatal error message just before stream aborts 

149SIDE_BAND_CHANNEL_FATAL = 3 

150 

151CAPABILITY_ATOMIC = b"atomic" 

152CAPABILITY_DEEPEN_SINCE = b"deepen-since" 

153CAPABILITY_DEEPEN_NOT = b"deepen-not" 

154CAPABILITY_DEEPEN_RELATIVE = b"deepen-relative" 

155CAPABILITY_DELETE_REFS = b"delete-refs" 

156CAPABILITY_INCLUDE_TAG = b"include-tag" 

157CAPABILITY_MULTI_ACK = b"multi_ack" 

158CAPABILITY_MULTI_ACK_DETAILED = b"multi_ack_detailed" 

159CAPABILITY_NO_DONE = b"no-done" 

160CAPABILITY_NO_PROGRESS = b"no-progress" 

161CAPABILITY_OFS_DELTA = b"ofs-delta" 

162CAPABILITY_QUIET = b"quiet" 

163CAPABILITY_REPORT_STATUS = b"report-status" 

164CAPABILITY_SHALLOW = b"shallow" 

165CAPABILITY_SIDE_BAND = b"side-band" 

166CAPABILITY_SIDE_BAND_64K = b"side-band-64k" 

167CAPABILITY_THIN_PACK = b"thin-pack" 

168CAPABILITY_AGENT = b"agent" 

169CAPABILITY_SYMREF = b"symref" 

170CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want" 

171CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want" 

172CAPABILITY_FETCH = b"fetch" 

173CAPABILITY_FILTER = b"filter" 

174 

175# Magic ref that is used to attach capabilities to when 

176# there are no refs. Should always be ste to ZERO_SHA. 

177CAPABILITIES_REF = b"capabilities^{}" 

178 

179COMMON_CAPABILITIES = [ 

180 CAPABILITY_OFS_DELTA, 

181 CAPABILITY_SIDE_BAND, 

182 CAPABILITY_SIDE_BAND_64K, 

183 CAPABILITY_AGENT, 

184 CAPABILITY_NO_PROGRESS, 

185] 

186KNOWN_UPLOAD_CAPABILITIES = set( 

187 [ 

188 *COMMON_CAPABILITIES, 

189 CAPABILITY_THIN_PACK, 

190 CAPABILITY_MULTI_ACK, 

191 CAPABILITY_MULTI_ACK_DETAILED, 

192 CAPABILITY_INCLUDE_TAG, 

193 CAPABILITY_DEEPEN_SINCE, 

194 CAPABILITY_SYMREF, 

195 CAPABILITY_SHALLOW, 

196 CAPABILITY_DEEPEN_NOT, 

197 CAPABILITY_DEEPEN_RELATIVE, 

198 CAPABILITY_ALLOW_TIP_SHA1_IN_WANT, 

199 CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT, 

200 CAPABILITY_FETCH, 

201 ] 

202) 

203KNOWN_RECEIVE_CAPABILITIES = set( 

204 [ 

205 *COMMON_CAPABILITIES, 

206 CAPABILITY_REPORT_STATUS, 

207 CAPABILITY_DELETE_REFS, 

208 CAPABILITY_QUIET, 

209 CAPABILITY_ATOMIC, 

210 ] 

211) 

212 

213DEPTH_INFINITE = 0x7FFFFFFF 

214 

215NAK_LINE = b"NAK\n" 

216 

217 

218def agent_string() -> bytes: 

219 """Generate the agent string for dulwich. 

220 

221 Returns: 

222 Agent string as bytes 

223 """ 

224 return ("dulwich/" + ".".join(map(str, dulwich.__version__))).encode("ascii") 

225 

226 

227def capability_agent() -> bytes: 

228 """Generate the agent capability string. 

229 

230 Returns: 

231 Agent capability with dulwich version 

232 """ 

233 return CAPABILITY_AGENT + b"=" + agent_string() 

234 

235 

236def capability_symref(from_ref: bytes, to_ref: bytes) -> bytes: 

237 """Generate a symref capability string. 

238 

239 Args: 

240 from_ref: Source reference name 

241 to_ref: Target reference name 

242 

243 Returns: 

244 Symref capability string 

245 """ 

246 return CAPABILITY_SYMREF + b"=" + from_ref + b":" + to_ref 

247 

248 

249def extract_capability_names(capabilities: Iterable[bytes]) -> set[bytes]: 

250 """Extract capability names from a list of capabilities. 

251 

252 Args: 

253 capabilities: List of capability strings 

254 

255 Returns: 

256 Set of capability names 

257 """ 

258 return {parse_capability(c)[0] for c in capabilities} 

259 

260 

261def parse_capability(capability: bytes) -> tuple[bytes, bytes | None]: 

262 """Parse a capability string into name and value. 

263 

264 Args: 

265 capability: Capability string 

266 

267 Returns: 

268 Tuple of (capability_name, capability_value) 

269 """ 

270 parts = capability.split(b"=", 1) 

271 if len(parts) == 1: 

272 return (parts[0], None) 

273 return (parts[0], parts[1]) 

274 

275 

276def symref_capabilities(symrefs: Iterable[tuple[bytes, bytes]]) -> list[bytes]: 

277 """Generate symref capability strings from symref pairs. 

278 

279 Args: 

280 symrefs: Iterable of (from_ref, to_ref) tuples 

281 

282 Returns: 

283 List of symref capability strings 

284 """ 

285 return [capability_symref(*k) for k in symrefs] 

286 

287 

288COMMAND_DEEPEN = b"deepen" 

289COMMAND_DEEPEN_SINCE = b"deepen-since" 

290COMMAND_DEEPEN_NOT = b"deepen-not" 

291COMMAND_SHALLOW = b"shallow" 

292COMMAND_UNSHALLOW = b"unshallow" 

293COMMAND_DONE = b"done" 

294COMMAND_WANT = b"want" 

295COMMAND_HAVE = b"have" 

296 

297 

298def format_cmd_pkt(cmd: bytes, *args: bytes) -> bytes: 

299 """Format a command packet. 

300 

301 Args: 

302 cmd: Command name 

303 *args: Command arguments 

304 

305 Returns: 

306 Formatted command packet 

307 """ 

308 return cmd + b" " + b"".join([(a + b"\0") for a in args]) 

309 

310 

311def parse_cmd_pkt(line: bytes) -> tuple[bytes, list[bytes]]: 

312 """Parse a command packet. 

313 

314 Args: 

315 line: Command line to parse 

316 

317 Returns: 

318 Tuple of (command, [arguments]) 

319 """ 

320 splice_at = line.find(b" ") 

321 cmd, args = line[:splice_at], line[splice_at + 1 :] 

322 assert args[-1:] == b"\x00" 

323 return cmd, args[:-1].split(b"\0") 

324 

325 

326def pkt_line(data: bytes | None) -> bytes: 

327 """Wrap data in a pkt-line. 

328 

329 Args: 

330 data: The data to wrap, as a str or None. 

331 Returns: The data prefixed with its length in pkt-line format; if data was 

332 None, returns the flush-pkt ('0000'). 

333 """ 

334 if data is None: 

335 return b"0000" 

336 return f"{len(data) + 4:04x}".encode("ascii") + data 

337 

338 

339def pkt_seq(*seq: bytes | None) -> bytes: 

340 """Wrap a sequence of data in pkt-lines. 

341 

342 Args: 

343 seq: An iterable of strings to wrap. 

344 """ 

345 return b"".join([pkt_line(s) for s in seq]) + pkt_line(None) 

346 

347 

348class Protocol: 

349 """Class for interacting with a remote git process over the wire. 

350 

351 Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line 

352 consists of the length of the line as a 4-byte hex string, followed by the 

353 payload data. The length includes the 4-byte header. The special line 

354 '0000' indicates the end of a section of input and is called a 'flush-pkt'. 

355 

356 For details on the pkt-line format, see the cgit distribution: 

357 Documentation/technical/protocol-common.txt 

358 """ 

359 

360 def __init__( 

361 self, 

362 read: Callable[[int], bytes], 

363 write: Callable[[bytes], int | None], 

364 close: Callable[[], None] | None = None, 

365 report_activity: Callable[[int, str], None] | None = None, 

366 ) -> None: 

367 """Initialize Protocol. 

368 

369 Args: 

370 read: Function to read bytes from the transport 

371 write: Function to write bytes to the transport 

372 close: Optional function to close the transport 

373 report_activity: Optional function to report activity 

374 """ 

375 self.read = read 

376 self.write = write 

377 self._close = close 

378 self.report_activity = report_activity 

379 self._readahead: BytesIO | None = None 

380 

381 def close(self) -> None: 

382 """Close the underlying transport if a close function was provided.""" 

383 if self._close: 

384 self._close() 

385 

386 def __enter__(self) -> "Protocol": 

387 """Enter context manager.""" 

388 return self 

389 

390 def __exit__( 

391 self, 

392 exc_type: type[BaseException] | None, 

393 exc_val: BaseException | None, 

394 exc_tb: types.TracebackType | None, 

395 ) -> None: 

396 """Exit context manager and close transport.""" 

397 self.close() 

398 

399 def read_pkt_line(self) -> bytes | None: 

400 """Reads a pkt-line from the remote git process. 

401 

402 This method may read from the readahead buffer; see unread_pkt_line. 

403 

404 Returns: The next string from the stream, without the length prefix, or 

405 None for a flush-pkt ('0000') or delim-pkt ('0001'). 

406 """ 

407 if self._readahead is None: 

408 read = self.read 

409 else: 

410 read = self._readahead.read 

411 self._readahead = None 

412 

413 try: 

414 sizestr = read(4) 

415 if not sizestr: 

416 raise HangupException 

417 size = int(sizestr, 16) 

418 if size == 0 or size == 1: # flush-pkt or delim-pkt 

419 if self.report_activity: 

420 self.report_activity(4, "read") 

421 return None 

422 if self.report_activity: 

423 self.report_activity(size, "read") 

424 pkt_contents = read(size - 4) 

425 except ConnectionResetError as exc: 

426 raise HangupException from exc 

427 except OSError as exc: 

428 raise GitProtocolError(str(exc)) from exc 

429 else: 

430 if len(pkt_contents) + 4 != size: 

431 raise GitProtocolError( 

432 f"Length of pkt read {len(pkt_contents) + 4:04x} does not match length prefix {size:04x}" 

433 ) 

434 return pkt_contents 

435 

436 def eof(self) -> bool: 

437 """Test whether the protocol stream has reached EOF. 

438 

439 Note that this refers to the actual stream EOF and not just a 

440 flush-pkt. 

441 

442 Returns: True if the stream is at EOF, False otherwise. 

443 """ 

444 try: 

445 next_line = self.read_pkt_line() 

446 except HangupException: 

447 return True 

448 self.unread_pkt_line(next_line) 

449 return False 

450 

451 def unread_pkt_line(self, data: bytes | None) -> None: 

452 """Unread a single line of data into the readahead buffer. 

453 

454 This method can be used to unread a single pkt-line into a fixed 

455 readahead buffer. 

456 

457 Args: 

458 data: The data to unread, without the length prefix. 

459 

460 Raises: 

461 ValueError: If more than one pkt-line is unread. 

462 """ 

463 if self._readahead is not None: 

464 raise ValueError("Attempted to unread multiple pkt-lines.") 

465 self._readahead = BytesIO(pkt_line(data)) 

466 

467 def read_pkt_seq(self) -> Iterable[bytes]: 

468 """Read a sequence of pkt-lines from the remote git process. 

469 

470 Returns: Yields each line of data up to but not including the next 

471 flush-pkt. 

472 """ 

473 pkt = self.read_pkt_line() 

474 while pkt: 

475 yield pkt 

476 pkt = self.read_pkt_line() 

477 

478 def write_pkt_line(self, line: bytes | None) -> None: 

479 """Sends a pkt-line to the remote git process. 

480 

481 Args: 

482 line: A string containing the data to send, without the length 

483 prefix. 

484 """ 

485 try: 

486 line = pkt_line(line) 

487 self.write(line) 

488 if self.report_activity: 

489 self.report_activity(len(line), "write") 

490 except OSError as exc: 

491 raise GitProtocolError(str(exc)) from exc 

492 

493 def write_sideband(self, channel: int, blob: bytes) -> None: 

494 """Write multiplexed data to the sideband. 

495 

496 Args: 

497 channel: An int specifying the channel to write to. 

498 blob: A blob of data (as a string) to send on this channel. 

499 """ 

500 # a pktline can be a max of 65520. a sideband line can therefore be 

501 # 65520-5 = 65515 

502 # WTF: Why have the len in ASCII, but the channel in binary. 

503 while blob: 

504 self.write_pkt_line(bytes(bytearray([channel])) + blob[:65515]) 

505 blob = blob[65515:] 

506 

507 def send_cmd(self, cmd: bytes, *args: bytes) -> None: 

508 """Send a command and some arguments to a git server. 

509 

510 Only used for the TCP git protocol (git://). 

511 

512 Args: 

513 cmd: The remote service to access. 

514 args: List of arguments to send to remove service. 

515 """ 

516 self.write_pkt_line(format_cmd_pkt(cmd, *args)) 

517 

518 def read_cmd(self) -> tuple[bytes, list[bytes]]: 

519 """Read a command and some arguments from the git client. 

520 

521 Only used for the TCP git protocol (git://). 

522 

523 Returns: A tuple of (command, [list of arguments]). 

524 """ 

525 line = self.read_pkt_line() 

526 if line is None: 

527 raise GitProtocolError("Expected command, got flush packet") 

528 return parse_cmd_pkt(line) 

529 

530 

531_RBUFSIZE = 65536 # 64KB buffer for better network I/O performance 

532 

533 

534class ReceivableProtocol(Protocol): 

535 """Variant of Protocol that allows reading up to a size without blocking. 

536 

537 This class has a recv() method that behaves like socket.recv() in addition 

538 to a read() method. 

539 

540 If you want to read n bytes from the wire and block until exactly n bytes 

541 (or EOF) are read, use read(n). If you want to read at most n bytes from 

542 the wire but don't care if you get less, use recv(n). Note that recv(n) 

543 will still block until at least one byte is read. 

544 """ 

545 

546 def __init__( 

547 self, 

548 recv: Callable[[int], bytes], 

549 write: Callable[[bytes], int | None], 

550 close: Callable[[], None] | None = None, 

551 report_activity: Callable[[int, str], None] | None = None, 

552 rbufsize: int = _RBUFSIZE, 

553 ) -> None: 

554 """Initialize ReceivableProtocol. 

555 

556 Args: 

557 recv: Function to receive bytes from the transport 

558 write: Function to write bytes to the transport 

559 close: Optional function to close the transport 

560 report_activity: Optional function to report activity 

561 rbufsize: Read buffer size 

562 """ 

563 super().__init__(self.read, write, close=close, report_activity=report_activity) 

564 self._recv = recv 

565 self._rbuf = BytesIO() 

566 self._rbufsize = rbufsize 

567 

568 def read(self, size: int) -> bytes: 

569 """Read bytes from the socket. 

570 

571 Args: 

572 size: Number of bytes to read 

573 

574 Returns: 

575 Bytes read from socket 

576 """ 

577 # From _fileobj.read in socket.py in the Python 2.6.5 standard library, 

578 # with the following modifications: 

579 # - omit the size <= 0 branch 

580 # - seek back to start rather than 0 in case some buffer has been 

581 # consumed. 

582 # - use SEEK_END instead of the magic number. 

583 # Copyright (c) 2001-2010 Python Software Foundation; All Rights 

584 # Reserved 

585 # Licensed under the Python Software Foundation License. 

586 # TODO: see if buffer is more efficient than cBytesIO. 

587 assert size > 0 

588 

589 # Our use of BytesIO rather than lists of string objects returned by 

590 # recv() minimizes memory usage and fragmentation that occurs when 

591 # rbufsize is large compared to the typical return value of recv(). 

592 buf = self._rbuf 

593 start = buf.tell() 

594 buf.seek(0, SEEK_END) 

595 # buffer may have been partially consumed by recv() 

596 buf_len = buf.tell() - start 

597 if buf_len >= size: 

598 # Already have size bytes in our buffer? Extract and return. 

599 buf.seek(start) 

600 rv = buf.read(size) 

601 self._rbuf = BytesIO() 

602 self._rbuf.write(buf.read()) 

603 self._rbuf.seek(0) 

604 return rv 

605 

606 self._rbuf = BytesIO() # reset _rbuf. we consume it via buf. 

607 while True: 

608 left = size - buf_len 

609 # recv() will malloc the amount of memory given as its 

610 # parameter even though it often returns much less data 

611 # than that. The returned data string is short lived 

612 # as we copy it into a BytesIO and free it. This avoids 

613 # fragmentation issues on many platforms. 

614 data = self._recv(left) 

615 if not data: 

616 break 

617 n = len(data) 

618 if n == size and not buf_len: 

619 # Shortcut. Avoid buffer data copies when: 

620 # - We have no data in our buffer. 

621 # AND 

622 # - Our call to recv returned exactly the 

623 # number of bytes we were asked to read. 

624 return data 

625 if n == left: 

626 buf.write(data) 

627 del data # explicit free 

628 break 

629 assert n <= left, f"_recv({left}) returned {n} bytes" 

630 buf.write(data) 

631 buf_len += n 

632 del data # explicit free 

633 # assert buf_len == buf.tell() 

634 buf.seek(start) 

635 return buf.read() 

636 

637 def recv(self, size: int) -> bytes: 

638 """Receive bytes from the socket with buffering. 

639 

640 Args: 

641 size: Maximum number of bytes to receive 

642 

643 Returns: 

644 Bytes received from socket 

645 """ 

646 assert size > 0 

647 

648 buf = self._rbuf 

649 start = buf.tell() 

650 buf.seek(0, SEEK_END) 

651 buf_len = buf.tell() 

652 buf.seek(start) 

653 

654 left = buf_len - start 

655 if not left: 

656 # only read from the wire if our read buffer is exhausted 

657 data = self._recv(self._rbufsize) 

658 if len(data) == size: 

659 # shortcut: skip the buffer if we read exactly size bytes 

660 return data 

661 buf = BytesIO() 

662 buf.write(data) 

663 buf.seek(0) 

664 del data # explicit free 

665 self._rbuf = buf 

666 return buf.read(size) 

667 

668 

669def extract_capabilities(text: bytes) -> tuple[bytes, list[bytes]]: 

670 """Extract a capabilities list from a string, if present. 

671 

672 Args: 

673 text: String to extract from 

674 Returns: Tuple with text with capabilities removed and list of capabilities 

675 """ 

676 if b"\0" not in text: 

677 return text, [] 

678 text, capabilities = text.rstrip().split(b"\0") 

679 return (text, capabilities.strip().split(b" ")) 

680 

681 

682def extract_want_line_capabilities(text: bytes) -> tuple[bytes, list[bytes]]: 

683 """Extract a capabilities list from a want line, if present. 

684 

685 Note that want lines have capabilities separated from the rest of the line 

686 by a space instead of a null byte. Thus want lines have the form: 

687 

688 want obj-id cap1 cap2 ... 

689 

690 Args: 

691 text: Want line to extract from 

692 Returns: Tuple with text with capabilities removed and list of capabilities 

693 """ 

694 split_text = text.rstrip().split(b" ") 

695 if len(split_text) < 3: 

696 return text, [] 

697 return (b" ".join(split_text[:2]), split_text[2:]) 

698 

699 

700def ack_type(capabilities: Iterable[bytes]) -> int: 

701 """Extract the ack type from a capabilities list.""" 

702 if b"multi_ack_detailed" in capabilities: 

703 return MULTI_ACK_DETAILED 

704 elif b"multi_ack" in capabilities: 

705 return MULTI_ACK 

706 return SINGLE_ACK 

707 

708 

709class BufferedPktLineWriter: 

710 """Writer that wraps its data in pkt-lines and has an independent buffer. 

711 

712 Consecutive calls to write() wrap the data in a pkt-line and then buffers 

713 it until enough lines have been written such that their total length 

714 (including length prefix) reach the buffer size. 

715 """ 

716 

717 def __init__( 

718 self, write: Callable[[bytes], int | None], bufsize: int = 65515 

719 ) -> None: 

720 """Initialize the BufferedPktLineWriter. 

721 

722 Args: 

723 write: A write callback for the underlying writer. 

724 bufsize: The internal buffer size, including length prefixes. 

725 """ 

726 self._write = write 

727 self._bufsize = bufsize 

728 self._wbuf = BytesIO() 

729 self._buflen = 0 

730 

731 def write(self, data: bytes) -> None: 

732 """Write data, wrapping it in a pkt-line.""" 

733 line = pkt_line(data) 

734 line_len = len(line) 

735 over = self._buflen + line_len - self._bufsize 

736 if over >= 0: 

737 start = line_len - over 

738 self._wbuf.write(line[:start]) 

739 self.flush() 

740 else: 

741 start = 0 

742 saved = line[start:] 

743 self._wbuf.write(saved) 

744 self._buflen += len(saved) 

745 

746 def flush(self) -> None: 

747 """Flush all data from the buffer.""" 

748 data = self._wbuf.getvalue() 

749 if data: 

750 self._write(data) 

751 self._len = 0 

752 self._wbuf = BytesIO() 

753 

754 

755class PktLineParser: 

756 """Packet line parser that hands completed packets off to a callback.""" 

757 

758 def __init__(self, handle_pkt: Callable[[bytes | None], None]) -> None: 

759 """Initialize PktLineParser. 

760 

761 Args: 

762 handle_pkt: Callback function to handle completed packets 

763 """ 

764 self.handle_pkt = handle_pkt 

765 self._readahead = BytesIO() 

766 

767 def parse(self, data: bytes) -> None: 

768 """Parse a fragment of data and call back for any completed packets.""" 

769 self._readahead.write(data) 

770 buf = self._readahead.getvalue() 

771 if len(buf) < 4: 

772 return 

773 while len(buf) >= 4: 

774 size = int(buf[:4], 16) 

775 if size == 0: 

776 self.handle_pkt(None) 

777 buf = buf[4:] 

778 elif size <= len(buf): 

779 self.handle_pkt(buf[4:size]) 

780 buf = buf[size:] 

781 else: 

782 break 

783 self._readahead = BytesIO() 

784 self._readahead.write(buf) 

785 

786 def get_tail(self) -> bytes: 

787 """Read back any unused data.""" 

788 return self._readahead.getvalue() 

789 

790 

791def format_capability_line(capabilities: Iterable[bytes]) -> bytes: 

792 """Format a capabilities list for the wire protocol. 

793 

794 Args: 

795 capabilities: List of capability strings 

796 

797 Returns: 

798 Space-separated capabilities as bytes 

799 """ 

800 return b"".join([b" " + c for c in capabilities]) 

801 

802 

803def format_ref_line( 

804 ref: bytes, sha: bytes, capabilities: Sequence[bytes] | None = None 

805) -> bytes: 

806 """Format a ref advertisement line. 

807 

808 Args: 

809 ref: Reference name 

810 sha: SHA hash 

811 capabilities: Optional list of capabilities 

812 

813 Returns: 

814 Formatted ref line 

815 """ 

816 if capabilities is None: 

817 return sha + b" " + ref + b"\n" 

818 else: 

819 return sha + b" " + ref + b"\0" + format_capability_line(capabilities) + b"\n" 

820 

821 

822def format_shallow_line(sha: bytes) -> bytes: 

823 """Format a shallow line. 

824 

825 Args: 

826 sha: SHA to mark as shallow 

827 

828 Returns: 

829 Formatted shallow line 

830 """ 

831 return COMMAND_SHALLOW + b" " + sha 

832 

833 

834def format_unshallow_line(sha: bytes) -> bytes: 

835 """Format an unshallow line. 

836 

837 Args: 

838 sha: SHA to unshallow 

839 

840 Returns: 

841 Formatted unshallow line 

842 """ 

843 return COMMAND_UNSHALLOW + b" " + sha 

844 

845 

846def format_ack_line(sha: bytes, ack_type: bytes = b"") -> bytes: 

847 """Format an ACK line. 

848 

849 Args: 

850 sha: SHA to acknowledge 

851 ack_type: Optional ACK type (e.g. b"continue") 

852 

853 Returns: 

854 Formatted ACK line 

855 """ 

856 if ack_type: 

857 ack_type = b" " + ack_type 

858 return b"ACK " + sha + ack_type + b"\n" 

859 

860 

861def strip_peeled_refs( 

862 refs: "Mapping[Ref, ObjectID | None]", 

863) -> "dict[Ref, ObjectID | None]": 

864 """Remove all peeled refs from a refs dictionary. 

865 

866 Args: 

867 refs: Dictionary of refs (may include peeled refs with ^{} suffix) 

868 

869 Returns: 

870 Dictionary with peeled refs removed 

871 """ 

872 return { 

873 ref: sha for (ref, sha) in refs.items() if not ref.endswith(PEELED_TAG_SUFFIX) 

874 } 

875 

876 

877def split_peeled_refs( 

878 refs: "Mapping[Ref, ObjectID]", 

879) -> "tuple[dict[Ref, ObjectID], dict[Ref, ObjectID]]": 

880 """Split peeled refs from regular refs. 

881 

882 Args: 

883 refs: Dictionary of refs (may include peeled refs with ^{} suffix) 

884 

885 Returns: 

886 Tuple of (regular_refs, peeled_refs) where peeled_refs keys have 

887 the ^{} suffix removed 

888 """ 

889 from .refs import Ref 

890 

891 peeled: dict[Ref, ObjectID] = {} 

892 regular = {k: v for k, v in refs.items() if not k.endswith(PEELED_TAG_SUFFIX)} 

893 

894 for ref, sha in refs.items(): 

895 if ref.endswith(PEELED_TAG_SUFFIX): 

896 # Peeled refs are always ObjectID values 

897 peeled[Ref(ref[: -len(PEELED_TAG_SUFFIX)])] = sha 

898 

899 return regular, peeled 

900 

901 

902def write_info_refs( 

903 refs: "Mapping[Ref, ObjectID]", store: "ObjectContainer" 

904) -> "Iterator[bytes]": 

905 """Generate info refs in the format used by the dumb HTTP protocol. 

906 

907 Args: 

908 refs: Dictionary of refs 

909 store: Object store to peel tags from 

910 

911 Yields: 

912 Lines in info/refs format (sha + tab + refname) 

913 """ 

914 from .object_store import peel_sha 

915 from .refs import HEADREF 

916 

917 for name, sha in sorted(refs.items()): 

918 # get_refs() includes HEAD as a special case, but we don't want to 

919 # advertise it 

920 if name == HEADREF: 

921 continue 

922 try: 

923 o = store[sha] 

924 except KeyError: 

925 continue 

926 _unpeeled, peeled = peel_sha(store, sha) 

927 yield o.id + b"\t" + name + b"\n" 

928 if o.id != peeled.id: 

929 yield peeled.id + b"\t" + name + PEELED_TAG_SUFFIX + b"\n" 

930 

931 

932def serialize_refs( 

933 store: "ObjectContainer", refs: "Mapping[Ref, ObjectID]" 

934) -> "dict[bytes, ObjectID]": 

935 """Serialize refs with peeled refs for Git protocol v0/v1. 

936 

937 This function is used to prepare refs for transmission over the Git protocol. 

938 For tags, it includes both the tag object and the dereferenced object. 

939 

940 Args: 

941 store: Object store to peel refs from 

942 refs: Dictionary of ref names to SHAs 

943 

944 Returns: 

945 Dictionary with refs and peeled refs (marked with ^{}) 

946 """ 

947 import warnings 

948 

949 from .object_store import peel_sha 

950 from .objects import Tag 

951 

952 ret: dict[bytes, ObjectID] = {} 

953 for ref, sha in refs.items(): 

954 try: 

955 unpeeled, peeled = peel_sha(store, ObjectID(sha)) 

956 except KeyError: 

957 warnings.warn( 

958 "ref {} points at non-present sha {}".format( 

959 ref.decode("utf-8", "replace"), sha.decode("ascii") 

960 ), 

961 UserWarning, 

962 ) 

963 continue 

964 else: 

965 if isinstance(unpeeled, Tag): 

966 ret[ref + PEELED_TAG_SUFFIX] = peeled.id 

967 ret[ref] = unpeeled.id 

968 return ret