Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/protocol.py: 34%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

291 statements  

1# protocol.py -- Shared parts of the git protocols 

2# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk> 

3# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@jelmer.uk> 

4# 

5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later 

6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

7# General Public License as public by the Free Software Foundation; version 2.0 

8# or (at your option) any later version. You can redistribute it and/or 

9# modify it under the terms of either of these two licenses. 

10# 

11# Unless required by applicable law or agreed to in writing, software 

12# distributed under the License is distributed on an "AS IS" BASIS, 

13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

14# See the License for the specific language governing permissions and 

15# limitations under the License. 

16# 

17# You should have received a copy of the licenses; if not, see 

18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

20# License, Version 2.0. 

21# 

22 

23"""Generic functions for talking the git smart server protocol.""" 

24 

25import types 

26from collections.abc import Iterable 

27from io import BytesIO 

28from os import SEEK_END 

29from typing import Callable, Optional 

30 

31import dulwich 

32 

33from .errors import GitProtocolError, HangupException 

34 

35TCP_GIT_PORT = 9418 

36 

37# Git protocol version 0 is the original Git protocol, which lacked a 

38# version number until Git protocol version 1 was introduced by Brandon 

39# Williams in 2017. 

40# 

41# Protocol version 1 is simply the original v0 protocol with the addition of 

42# a single packet line, which precedes the ref advertisement, indicating the 

43# protocol version being used. This was done in preparation for protocol v2. 

44# 

45# Git protocol version 2 was first introduced by Brandon Williams in 2018 and 

46# adds many features. See the gitprotocol-v2(5) manual page for details. 

47# As of 2024, Git only implements version 2 during 'git fetch' and still uses 

48# version 0 during 'git push'. 

49GIT_PROTOCOL_VERSIONS = [0, 1, 2] 

50DEFAULT_GIT_PROTOCOL_VERSION_FETCH = 2 

51DEFAULT_GIT_PROTOCOL_VERSION_SEND = 0 

52 

53ZERO_SHA = b"0" * 40 

54 

55SINGLE_ACK = 0 

56MULTI_ACK = 1 

57MULTI_ACK_DETAILED = 2 

58 

59# pack data 

60SIDE_BAND_CHANNEL_DATA = 1 

61# progress messages 

62SIDE_BAND_CHANNEL_PROGRESS = 2 

63# fatal error message just before stream aborts 

64SIDE_BAND_CHANNEL_FATAL = 3 

65 

66CAPABILITY_ATOMIC = b"atomic" 

67CAPABILITY_DEEPEN_SINCE = b"deepen-since" 

68CAPABILITY_DEEPEN_NOT = b"deepen-not" 

69CAPABILITY_DEEPEN_RELATIVE = b"deepen-relative" 

70CAPABILITY_DELETE_REFS = b"delete-refs" 

71CAPABILITY_INCLUDE_TAG = b"include-tag" 

72CAPABILITY_MULTI_ACK = b"multi_ack" 

73CAPABILITY_MULTI_ACK_DETAILED = b"multi_ack_detailed" 

74CAPABILITY_NO_DONE = b"no-done" 

75CAPABILITY_NO_PROGRESS = b"no-progress" 

76CAPABILITY_OFS_DELTA = b"ofs-delta" 

77CAPABILITY_QUIET = b"quiet" 

78CAPABILITY_REPORT_STATUS = b"report-status" 

79CAPABILITY_SHALLOW = b"shallow" 

80CAPABILITY_SIDE_BAND = b"side-band" 

81CAPABILITY_SIDE_BAND_64K = b"side-band-64k" 

82CAPABILITY_THIN_PACK = b"thin-pack" 

83CAPABILITY_AGENT = b"agent" 

84CAPABILITY_SYMREF = b"symref" 

85CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want" 

86CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want" 

87CAPABILITY_FETCH = b"fetch" 

88CAPABILITY_FILTER = b"filter" 

89 

90# Magic ref that is used to attach capabilities to when 

91# there are no refs. Should always be ste to ZERO_SHA. 

92CAPABILITIES_REF = b"capabilities^{}" 

93 

94COMMON_CAPABILITIES = [ 

95 CAPABILITY_OFS_DELTA, 

96 CAPABILITY_SIDE_BAND, 

97 CAPABILITY_SIDE_BAND_64K, 

98 CAPABILITY_AGENT, 

99 CAPABILITY_NO_PROGRESS, 

100] 

101KNOWN_UPLOAD_CAPABILITIES = set( 

102 [ 

103 *COMMON_CAPABILITIES, 

104 CAPABILITY_THIN_PACK, 

105 CAPABILITY_MULTI_ACK, 

106 CAPABILITY_MULTI_ACK_DETAILED, 

107 CAPABILITY_INCLUDE_TAG, 

108 CAPABILITY_DEEPEN_SINCE, 

109 CAPABILITY_SYMREF, 

110 CAPABILITY_SHALLOW, 

111 CAPABILITY_DEEPEN_NOT, 

112 CAPABILITY_DEEPEN_RELATIVE, 

113 CAPABILITY_ALLOW_TIP_SHA1_IN_WANT, 

114 CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT, 

115 CAPABILITY_FETCH, 

116 ] 

117) 

118KNOWN_RECEIVE_CAPABILITIES = set( 

119 [ 

120 *COMMON_CAPABILITIES, 

121 CAPABILITY_REPORT_STATUS, 

122 CAPABILITY_DELETE_REFS, 

123 CAPABILITY_QUIET, 

124 CAPABILITY_ATOMIC, 

125 ] 

126) 

127 

128DEPTH_INFINITE = 0x7FFFFFFF 

129 

130NAK_LINE = b"NAK\n" 

131 

132 

133def agent_string() -> bytes: 

134 return ("dulwich/" + ".".join(map(str, dulwich.__version__))).encode("ascii") 

135 

136 

137def capability_agent() -> bytes: 

138 return CAPABILITY_AGENT + b"=" + agent_string() 

139 

140 

141def capability_symref(from_ref: bytes, to_ref: bytes) -> bytes: 

142 return CAPABILITY_SYMREF + b"=" + from_ref + b":" + to_ref 

143 

144 

145def extract_capability_names(capabilities: Iterable[bytes]) -> set[bytes]: 

146 return {parse_capability(c)[0] for c in capabilities} 

147 

148 

149def parse_capability(capability: bytes) -> tuple[bytes, Optional[bytes]]: 

150 parts = capability.split(b"=", 1) 

151 if len(parts) == 1: 

152 return (parts[0], None) 

153 return (parts[0], parts[1]) 

154 

155 

156def symref_capabilities(symrefs: Iterable[tuple[bytes, bytes]]) -> list[bytes]: 

157 return [capability_symref(*k) for k in symrefs] 

158 

159 

160COMMAND_DEEPEN = b"deepen" 

161COMMAND_SHALLOW = b"shallow" 

162COMMAND_UNSHALLOW = b"unshallow" 

163COMMAND_DONE = b"done" 

164COMMAND_WANT = b"want" 

165COMMAND_HAVE = b"have" 

166 

167 

168def format_cmd_pkt(cmd: bytes, *args: bytes) -> bytes: 

169 return cmd + b" " + b"".join([(a + b"\0") for a in args]) 

170 

171 

172def parse_cmd_pkt(line: bytes) -> tuple[bytes, list[bytes]]: 

173 splice_at = line.find(b" ") 

174 cmd, args = line[:splice_at], line[splice_at + 1 :] 

175 assert args[-1:] == b"\x00" 

176 return cmd, args[:-1].split(b"\0") 

177 

178 

179def pkt_line(data: Optional[bytes]) -> bytes: 

180 """Wrap data in a pkt-line. 

181 

182 Args: 

183 data: The data to wrap, as a str or None. 

184 Returns: The data prefixed with its length in pkt-line format; if data was 

185 None, returns the flush-pkt ('0000'). 

186 """ 

187 if data is None: 

188 return b"0000" 

189 return ("%04x" % (len(data) + 4)).encode("ascii") + data 

190 

191 

192def pkt_seq(*seq: Optional[bytes]) -> bytes: 

193 """Wrap a sequence of data in pkt-lines. 

194 

195 Args: 

196 seq: An iterable of strings to wrap. 

197 """ 

198 return b"".join([pkt_line(s) for s in seq]) + pkt_line(None) 

199 

200 

201def filter_ref_prefix( 

202 refs: dict[bytes, bytes], prefixes: Iterable[bytes] 

203) -> dict[bytes, bytes]: 

204 """Filter refs to only include those with a given prefix. 

205 

206 Args: 

207 refs: A list of refs. 

208 prefixes: The prefixes to filter by. 

209 """ 

210 return {k: v for k, v in refs.items() if any(k.startswith(p) for p in prefixes)} 

211 

212 

213class Protocol: 

214 """Class for interacting with a remote git process over the wire. 

215 

216 Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line 

217 consists of the length of the line as a 4-byte hex string, followed by the 

218 payload data. The length includes the 4-byte header. The special line 

219 '0000' indicates the end of a section of input and is called a 'flush-pkt'. 

220 

221 For details on the pkt-line format, see the cgit distribution: 

222 Documentation/technical/protocol-common.txt 

223 """ 

224 

225 def __init__( 

226 self, 

227 read: Callable[[int], bytes], 

228 write: Callable[[bytes], Optional[int]], 

229 close: Optional[Callable[[], None]] = None, 

230 report_activity: Optional[Callable[[int, str], None]] = None, 

231 ) -> None: 

232 self.read = read 

233 self.write = write 

234 self._close = close 

235 self.report_activity = report_activity 

236 self._readahead: Optional[BytesIO] = None 

237 

238 def close(self) -> None: 

239 if self._close: 

240 self._close() 

241 

242 def __enter__(self) -> "Protocol": 

243 return self 

244 

245 def __exit__( 

246 self, 

247 exc_type: Optional[type[BaseException]], 

248 exc_val: Optional[BaseException], 

249 exc_tb: Optional[types.TracebackType], 

250 ) -> None: 

251 self.close() 

252 

253 def read_pkt_line(self) -> Optional[bytes]: 

254 """Reads a pkt-line from the remote git process. 

255 

256 This method may read from the readahead buffer; see unread_pkt_line. 

257 

258 Returns: The next string from the stream, without the length prefix, or 

259 None for a flush-pkt ('0000') or delim-pkt ('0001'). 

260 """ 

261 if self._readahead is None: 

262 read = self.read 

263 else: 

264 read = self._readahead.read 

265 self._readahead = None 

266 

267 try: 

268 sizestr = read(4) 

269 if not sizestr: 

270 raise HangupException 

271 size = int(sizestr, 16) 

272 if size == 0 or size == 1: # flush-pkt or delim-pkt 

273 if self.report_activity: 

274 self.report_activity(4, "read") 

275 return None 

276 if self.report_activity: 

277 self.report_activity(size, "read") 

278 pkt_contents = read(size - 4) 

279 except ConnectionResetError as exc: 

280 raise HangupException from exc 

281 except OSError as exc: 

282 raise GitProtocolError(str(exc)) from exc 

283 else: 

284 if len(pkt_contents) + 4 != size: 

285 raise GitProtocolError( 

286 f"Length of pkt read {len(pkt_contents) + 4:04x} does not match length prefix {size:04x}" 

287 ) 

288 return pkt_contents 

289 

290 def eof(self) -> bool: 

291 """Test whether the protocol stream has reached EOF. 

292 

293 Note that this refers to the actual stream EOF and not just a 

294 flush-pkt. 

295 

296 Returns: True if the stream is at EOF, False otherwise. 

297 """ 

298 try: 

299 next_line = self.read_pkt_line() 

300 except HangupException: 

301 return True 

302 self.unread_pkt_line(next_line) 

303 return False 

304 

305 def unread_pkt_line(self, data: Optional[bytes]) -> None: 

306 """Unread a single line of data into the readahead buffer. 

307 

308 This method can be used to unread a single pkt-line into a fixed 

309 readahead buffer. 

310 

311 Args: 

312 data: The data to unread, without the length prefix. 

313 

314 Raises: 

315 ValueError: If more than one pkt-line is unread. 

316 """ 

317 if self._readahead is not None: 

318 raise ValueError("Attempted to unread multiple pkt-lines.") 

319 self._readahead = BytesIO(pkt_line(data)) 

320 

321 def read_pkt_seq(self) -> Iterable[bytes]: 

322 """Read a sequence of pkt-lines from the remote git process. 

323 

324 Returns: Yields each line of data up to but not including the next 

325 flush-pkt. 

326 """ 

327 pkt = self.read_pkt_line() 

328 while pkt: 

329 yield pkt 

330 pkt = self.read_pkt_line() 

331 

332 def write_pkt_line(self, line: Optional[bytes]) -> None: 

333 """Sends a pkt-line to the remote git process. 

334 

335 Args: 

336 line: A string containing the data to send, without the length 

337 prefix. 

338 """ 

339 try: 

340 line = pkt_line(line) 

341 self.write(line) 

342 if self.report_activity: 

343 self.report_activity(len(line), "write") 

344 except OSError as exc: 

345 raise GitProtocolError(str(exc)) from exc 

346 

347 def write_sideband(self, channel: int, blob: bytes) -> None: 

348 """Write multiplexed data to the sideband. 

349 

350 Args: 

351 channel: An int specifying the channel to write to. 

352 blob: A blob of data (as a string) to send on this channel. 

353 """ 

354 # a pktline can be a max of 65520. a sideband line can therefore be 

355 # 65520-5 = 65515 

356 # WTF: Why have the len in ASCII, but the channel in binary. 

357 while blob: 

358 self.write_pkt_line(bytes(bytearray([channel])) + blob[:65515]) 

359 blob = blob[65515:] 

360 

361 def send_cmd(self, cmd: bytes, *args: bytes) -> None: 

362 """Send a command and some arguments to a git server. 

363 

364 Only used for the TCP git protocol (git://). 

365 

366 Args: 

367 cmd: The remote service to access. 

368 args: List of arguments to send to remove service. 

369 """ 

370 self.write_pkt_line(format_cmd_pkt(cmd, *args)) 

371 

372 def read_cmd(self) -> tuple[bytes, list[bytes]]: 

373 """Read a command and some arguments from the git client. 

374 

375 Only used for the TCP git protocol (git://). 

376 

377 Returns: A tuple of (command, [list of arguments]). 

378 """ 

379 line = self.read_pkt_line() 

380 if line is None: 

381 raise GitProtocolError("Expected command, got flush packet") 

382 return parse_cmd_pkt(line) 

383 

384 

385_RBUFSIZE = 65536 # 64KB buffer for better network I/O performance 

386 

387 

388class ReceivableProtocol(Protocol): 

389 """Variant of Protocol that allows reading up to a size without blocking. 

390 

391 This class has a recv() method that behaves like socket.recv() in addition 

392 to a read() method. 

393 

394 If you want to read n bytes from the wire and block until exactly n bytes 

395 (or EOF) are read, use read(n). If you want to read at most n bytes from 

396 the wire but don't care if you get less, use recv(n). Note that recv(n) 

397 will still block until at least one byte is read. 

398 """ 

399 

400 def __init__( 

401 self, 

402 recv: Callable[[int], bytes], 

403 write: Callable[[bytes], Optional[int]], 

404 close: Optional[Callable[[], None]] = None, 

405 report_activity: Optional[Callable[[int, str], None]] = None, 

406 rbufsize: int = _RBUFSIZE, 

407 ) -> None: 

408 super().__init__(self.read, write, close=close, report_activity=report_activity) 

409 self._recv = recv 

410 self._rbuf = BytesIO() 

411 self._rbufsize = rbufsize 

412 

413 def read(self, size: int) -> bytes: 

414 # From _fileobj.read in socket.py in the Python 2.6.5 standard library, 

415 # with the following modifications: 

416 # - omit the size <= 0 branch 

417 # - seek back to start rather than 0 in case some buffer has been 

418 # consumed. 

419 # - use SEEK_END instead of the magic number. 

420 # Copyright (c) 2001-2010 Python Software Foundation; All Rights 

421 # Reserved 

422 # Licensed under the Python Software Foundation License. 

423 # TODO: see if buffer is more efficient than cBytesIO. 

424 assert size > 0 

425 

426 # Our use of BytesIO rather than lists of string objects returned by 

427 # recv() minimizes memory usage and fragmentation that occurs when 

428 # rbufsize is large compared to the typical return value of recv(). 

429 buf = self._rbuf 

430 start = buf.tell() 

431 buf.seek(0, SEEK_END) 

432 # buffer may have been partially consumed by recv() 

433 buf_len = buf.tell() - start 

434 if buf_len >= size: 

435 # Already have size bytes in our buffer? Extract and return. 

436 buf.seek(start) 

437 rv = buf.read(size) 

438 self._rbuf = BytesIO() 

439 self._rbuf.write(buf.read()) 

440 self._rbuf.seek(0) 

441 return rv 

442 

443 self._rbuf = BytesIO() # reset _rbuf. we consume it via buf. 

444 while True: 

445 left = size - buf_len 

446 # recv() will malloc the amount of memory given as its 

447 # parameter even though it often returns much less data 

448 # than that. The returned data string is short lived 

449 # as we copy it into a BytesIO and free it. This avoids 

450 # fragmentation issues on many platforms. 

451 data = self._recv(left) 

452 if not data: 

453 break 

454 n = len(data) 

455 if n == size and not buf_len: 

456 # Shortcut. Avoid buffer data copies when: 

457 # - We have no data in our buffer. 

458 # AND 

459 # - Our call to recv returned exactly the 

460 # number of bytes we were asked to read. 

461 return data 

462 if n == left: 

463 buf.write(data) 

464 del data # explicit free 

465 break 

466 assert n <= left, f"_recv({left}) returned {n} bytes" 

467 buf.write(data) 

468 buf_len += n 

469 del data # explicit free 

470 # assert buf_len == buf.tell() 

471 buf.seek(start) 

472 return buf.read() 

473 

474 def recv(self, size: int) -> bytes: 

475 assert size > 0 

476 

477 buf = self._rbuf 

478 start = buf.tell() 

479 buf.seek(0, SEEK_END) 

480 buf_len = buf.tell() 

481 buf.seek(start) 

482 

483 left = buf_len - start 

484 if not left: 

485 # only read from the wire if our read buffer is exhausted 

486 data = self._recv(self._rbufsize) 

487 if len(data) == size: 

488 # shortcut: skip the buffer if we read exactly size bytes 

489 return data 

490 buf = BytesIO() 

491 buf.write(data) 

492 buf.seek(0) 

493 del data # explicit free 

494 self._rbuf = buf 

495 return buf.read(size) 

496 

497 

498def extract_capabilities(text: bytes) -> tuple[bytes, list[bytes]]: 

499 """Extract a capabilities list from a string, if present. 

500 

501 Args: 

502 text: String to extract from 

503 Returns: Tuple with text with capabilities removed and list of capabilities 

504 """ 

505 if b"\0" not in text: 

506 return text, [] 

507 text, capabilities = text.rstrip().split(b"\0") 

508 return (text, capabilities.strip().split(b" ")) 

509 

510 

511def extract_want_line_capabilities(text: bytes) -> tuple[bytes, list[bytes]]: 

512 """Extract a capabilities list from a want line, if present. 

513 

514 Note that want lines have capabilities separated from the rest of the line 

515 by a space instead of a null byte. Thus want lines have the form: 

516 

517 want obj-id cap1 cap2 ... 

518 

519 Args: 

520 text: Want line to extract from 

521 Returns: Tuple with text with capabilities removed and list of capabilities 

522 """ 

523 split_text = text.rstrip().split(b" ") 

524 if len(split_text) < 3: 

525 return text, [] 

526 return (b" ".join(split_text[:2]), split_text[2:]) 

527 

528 

529def ack_type(capabilities: Iterable[bytes]) -> int: 

530 """Extract the ack type from a capabilities list.""" 

531 if b"multi_ack_detailed" in capabilities: 

532 return MULTI_ACK_DETAILED 

533 elif b"multi_ack" in capabilities: 

534 return MULTI_ACK 

535 return SINGLE_ACK 

536 

537 

538class BufferedPktLineWriter: 

539 """Writer that wraps its data in pkt-lines and has an independent buffer. 

540 

541 Consecutive calls to write() wrap the data in a pkt-line and then buffers 

542 it until enough lines have been written such that their total length 

543 (including length prefix) reach the buffer size. 

544 """ 

545 

546 def __init__( 

547 self, write: Callable[[bytes], Optional[int]], bufsize: int = 65515 

548 ) -> None: 

549 """Initialize the BufferedPktLineWriter. 

550 

551 Args: 

552 write: A write callback for the underlying writer. 

553 bufsize: The internal buffer size, including length prefixes. 

554 """ 

555 self._write = write 

556 self._bufsize = bufsize 

557 self._wbuf = BytesIO() 

558 self._buflen = 0 

559 

560 def write(self, data: bytes) -> None: 

561 """Write data, wrapping it in a pkt-line.""" 

562 line = pkt_line(data) 

563 line_len = len(line) 

564 over = self._buflen + line_len - self._bufsize 

565 if over >= 0: 

566 start = line_len - over 

567 self._wbuf.write(line[:start]) 

568 self.flush() 

569 else: 

570 start = 0 

571 saved = line[start:] 

572 self._wbuf.write(saved) 

573 self._buflen += len(saved) 

574 

575 def flush(self) -> None: 

576 """Flush all data from the buffer.""" 

577 data = self._wbuf.getvalue() 

578 if data: 

579 self._write(data) 

580 self._len = 0 

581 self._wbuf = BytesIO() 

582 

583 

584class PktLineParser: 

585 """Packet line parser that hands completed packets off to a callback.""" 

586 

587 def __init__(self, handle_pkt: Callable[[Optional[bytes]], None]) -> None: 

588 self.handle_pkt = handle_pkt 

589 self._readahead = BytesIO() 

590 

591 def parse(self, data: bytes) -> None: 

592 """Parse a fragment of data and call back for any completed packets.""" 

593 self._readahead.write(data) 

594 buf = self._readahead.getvalue() 

595 if len(buf) < 4: 

596 return 

597 while len(buf) >= 4: 

598 size = int(buf[:4], 16) 

599 if size == 0: 

600 self.handle_pkt(None) 

601 buf = buf[4:] 

602 elif size <= len(buf): 

603 self.handle_pkt(buf[4:size]) 

604 buf = buf[size:] 

605 else: 

606 break 

607 self._readahead = BytesIO() 

608 self._readahead.write(buf) 

609 

610 def get_tail(self) -> bytes: 

611 """Read back any unused data.""" 

612 return self._readahead.getvalue() 

613 

614 

615def format_capability_line(capabilities: Iterable[bytes]) -> bytes: 

616 return b"".join([b" " + c for c in capabilities]) 

617 

618 

619def format_ref_line( 

620 ref: bytes, sha: bytes, capabilities: Optional[list[bytes]] = None 

621) -> bytes: 

622 if capabilities is None: 

623 return sha + b" " + ref + b"\n" 

624 else: 

625 return sha + b" " + ref + b"\0" + format_capability_line(capabilities) + b"\n" 

626 

627 

628def format_shallow_line(sha: bytes) -> bytes: 

629 return COMMAND_SHALLOW + b" " + sha 

630 

631 

632def format_unshallow_line(sha: bytes) -> bytes: 

633 return COMMAND_UNSHALLOW + b" " + sha 

634 

635 

636def format_ack_line(sha: bytes, ack_type: bytes = b"") -> bytes: 

637 if ack_type: 

638 ack_type = b" " + ack_type 

639 return b"ACK " + sha + ack_type + b"\n"