Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/dulwich/protocol.py: 34%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

280 statements  

1# protocol.py -- Shared parts of the git protocols 

2# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk> 

3# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@jelmer.uk> 

4# 

5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU 

6# General Public License as public by the Free Software Foundation; version 2.0 

7# or (at your option) any later version. You can redistribute it and/or 

8# modify it under the terms of either of these two licenses. 

9# 

10# Unless required by applicable law or agreed to in writing, software 

11# distributed under the License is distributed on an "AS IS" BASIS, 

12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

13# See the License for the specific language governing permissions and 

14# limitations under the License. 

15# 

16# You should have received a copy of the licenses; if not, see 

17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License 

18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache 

19# License, Version 2.0. 

20# 

21 

22"""Generic functions for talking the git smart server protocol.""" 

23 

24from io import BytesIO 

25from os import SEEK_END 

26 

27import dulwich 

28 

29from .errors import GitProtocolError, HangupException 

30 

31TCP_GIT_PORT = 9418 

32 

33# Git protocol version 0 is the original Git protocol, which lacked a 

34# version number until Git protocol version 1 was introduced by Brandon 

35# Williams in 2017. 

36# 

37# Protocol version 1 is simply the original v0 protocol with the addition of 

38# a single packet line, which precedes the ref advertisement, indicating the 

39# protocol version being used. This was done in preparation for protocol v2. 

40# 

41# Git protocol version 2 was first introduced by Brandon Williams in 2018 and 

42# adds many features. See the gitprotocol-v2(5) manual page for details. 

43# As of 2024, Git only implements version 2 during 'git fetch' and still uses 

44# version 0 during 'git push'. 

45GIT_PROTOCOL_VERSIONS = [0, 1, 2] 

46DEFAULT_GIT_PROTOCOL_VERSION_FETCH = 2 

47DEFAULT_GIT_PROTOCOL_VERSION_SEND = 0 

48 

49ZERO_SHA = b"0" * 40 

50 

51SINGLE_ACK = 0 

52MULTI_ACK = 1 

53MULTI_ACK_DETAILED = 2 

54 

55# pack data 

56SIDE_BAND_CHANNEL_DATA = 1 

57# progress messages 

58SIDE_BAND_CHANNEL_PROGRESS = 2 

59# fatal error message just before stream aborts 

60SIDE_BAND_CHANNEL_FATAL = 3 

61 

62CAPABILITY_ATOMIC = b"atomic" 

63CAPABILITY_DEEPEN_SINCE = b"deepen-since" 

64CAPABILITY_DEEPEN_NOT = b"deepen-not" 

65CAPABILITY_DEEPEN_RELATIVE = b"deepen-relative" 

66CAPABILITY_DELETE_REFS = b"delete-refs" 

67CAPABILITY_INCLUDE_TAG = b"include-tag" 

68CAPABILITY_MULTI_ACK = b"multi_ack" 

69CAPABILITY_MULTI_ACK_DETAILED = b"multi_ack_detailed" 

70CAPABILITY_NO_DONE = b"no-done" 

71CAPABILITY_NO_PROGRESS = b"no-progress" 

72CAPABILITY_OFS_DELTA = b"ofs-delta" 

73CAPABILITY_QUIET = b"quiet" 

74CAPABILITY_REPORT_STATUS = b"report-status" 

75CAPABILITY_SHALLOW = b"shallow" 

76CAPABILITY_SIDE_BAND = b"side-band" 

77CAPABILITY_SIDE_BAND_64K = b"side-band-64k" 

78CAPABILITY_THIN_PACK = b"thin-pack" 

79CAPABILITY_AGENT = b"agent" 

80CAPABILITY_SYMREF = b"symref" 

81CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want" 

82CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want" 

83CAPABILITY_FETCH = b"fetch" 

84CAPABILITY_FILTER = b"filter" 

85 

86# Magic ref that is used to attach capabilities to when 

87# there are no refs. Should always be ste to ZERO_SHA. 

88CAPABILITIES_REF = b"capabilities^{}" 

89 

90COMMON_CAPABILITIES = [ 

91 CAPABILITY_OFS_DELTA, 

92 CAPABILITY_SIDE_BAND, 

93 CAPABILITY_SIDE_BAND_64K, 

94 CAPABILITY_AGENT, 

95 CAPABILITY_NO_PROGRESS, 

96] 

97KNOWN_UPLOAD_CAPABILITIES = set( 

98 [ 

99 *COMMON_CAPABILITIES, 

100 CAPABILITY_THIN_PACK, 

101 CAPABILITY_MULTI_ACK, 

102 CAPABILITY_MULTI_ACK_DETAILED, 

103 CAPABILITY_INCLUDE_TAG, 

104 CAPABILITY_DEEPEN_SINCE, 

105 CAPABILITY_SYMREF, 

106 CAPABILITY_SHALLOW, 

107 CAPABILITY_DEEPEN_NOT, 

108 CAPABILITY_DEEPEN_RELATIVE, 

109 CAPABILITY_ALLOW_TIP_SHA1_IN_WANT, 

110 CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT, 

111 CAPABILITY_FETCH, 

112 ] 

113) 

114KNOWN_RECEIVE_CAPABILITIES = set( 

115 [ 

116 *COMMON_CAPABILITIES, 

117 CAPABILITY_REPORT_STATUS, 

118 CAPABILITY_DELETE_REFS, 

119 CAPABILITY_QUIET, 

120 CAPABILITY_ATOMIC, 

121 ] 

122) 

123 

124DEPTH_INFINITE = 0x7FFFFFFF 

125 

126NAK_LINE = b"NAK\n" 

127 

128 

129def agent_string(): 

130 return ("dulwich/" + ".".join(map(str, dulwich.__version__))).encode("ascii") 

131 

132 

133def capability_agent(): 

134 return CAPABILITY_AGENT + b"=" + agent_string() 

135 

136 

137def capability_symref(from_ref, to_ref): 

138 return CAPABILITY_SYMREF + b"=" + from_ref + b":" + to_ref 

139 

140 

141def extract_capability_names(capabilities): 

142 return {parse_capability(c)[0] for c in capabilities} 

143 

144 

145def parse_capability(capability): 

146 parts = capability.split(b"=", 1) 

147 if len(parts) == 1: 

148 return (parts[0], None) 

149 return tuple(parts) 

150 

151 

152def symref_capabilities(symrefs): 

153 return [capability_symref(*k) for k in symrefs] 

154 

155 

156COMMAND_DEEPEN = b"deepen" 

157COMMAND_SHALLOW = b"shallow" 

158COMMAND_UNSHALLOW = b"unshallow" 

159COMMAND_DONE = b"done" 

160COMMAND_WANT = b"want" 

161COMMAND_HAVE = b"have" 

162 

163 

164def format_cmd_pkt(cmd, *args): 

165 return cmd + b" " + b"".join([(a + b"\0") for a in args]) 

166 

167 

168def parse_cmd_pkt(line): 

169 splice_at = line.find(b" ") 

170 cmd, args = line[:splice_at], line[splice_at + 1 :] 

171 assert args[-1:] == b"\x00" 

172 return cmd, args[:-1].split(b"\0") 

173 

174 

175def pkt_line(data): 

176 """Wrap data in a pkt-line. 

177 

178 Args: 

179 data: The data to wrap, as a str or None. 

180 Returns: The data prefixed with its length in pkt-line format; if data was 

181 None, returns the flush-pkt ('0000'). 

182 """ 

183 if data is None: 

184 return b"0000" 

185 return ("%04x" % (len(data) + 4)).encode("ascii") + data 

186 

187 

188class Protocol: 

189 """Class for interacting with a remote git process over the wire. 

190 

191 Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line 

192 consists of the length of the line as a 4-byte hex string, followed by the 

193 payload data. The length includes the 4-byte header. The special line 

194 '0000' indicates the end of a section of input and is called a 'flush-pkt'. 

195 

196 For details on the pkt-line format, see the cgit distribution: 

197 Documentation/technical/protocol-common.txt 

198 """ 

199 

200 def __init__(self, read, write, close=None, report_activity=None) -> None: 

201 self.read = read 

202 self.write = write 

203 self._close = close 

204 self.report_activity = report_activity 

205 self._readahead = None 

206 

207 def close(self): 

208 if self._close: 

209 self._close() 

210 

211 def __enter__(self): 

212 return self 

213 

214 def __exit__(self, exc_type, exc_val, exc_tb): 

215 self.close() 

216 

217 def read_pkt_line(self): 

218 """Reads a pkt-line from the remote git process. 

219 

220 This method may read from the readahead buffer; see unread_pkt_line. 

221 

222 Returns: The next string from the stream, without the length prefix, or 

223 None for a flush-pkt ('0000') or delim-pkt ('0001'). 

224 """ 

225 if self._readahead is None: 

226 read = self.read 

227 else: 

228 read = self._readahead.read 

229 self._readahead = None 

230 

231 try: 

232 sizestr = read(4) 

233 if not sizestr: 

234 raise HangupException 

235 size = int(sizestr, 16) 

236 if size == 0 or size == 1: # flush-pkt or delim-pkt 

237 if self.report_activity: 

238 self.report_activity(4, "read") 

239 return None 

240 if self.report_activity: 

241 self.report_activity(size, "read") 

242 pkt_contents = read(size - 4) 

243 except ConnectionResetError as exc: 

244 raise HangupException from exc 

245 except OSError as exc: 

246 raise GitProtocolError(str(exc)) from exc 

247 else: 

248 if len(pkt_contents) + 4 != size: 

249 raise GitProtocolError( 

250 f"Length of pkt read {len(pkt_contents) + 4:04x} does not match length prefix {size:04x}" 

251 ) 

252 return pkt_contents 

253 

254 def eof(self): 

255 """Test whether the protocol stream has reached EOF. 

256 

257 Note that this refers to the actual stream EOF and not just a 

258 flush-pkt. 

259 

260 Returns: True if the stream is at EOF, False otherwise. 

261 """ 

262 try: 

263 next_line = self.read_pkt_line() 

264 except HangupException: 

265 return True 

266 self.unread_pkt_line(next_line) 

267 return False 

268 

269 def unread_pkt_line(self, data): 

270 """Unread a single line of data into the readahead buffer. 

271 

272 This method can be used to unread a single pkt-line into a fixed 

273 readahead buffer. 

274 

275 Args: 

276 data: The data to unread, without the length prefix. 

277 

278 Raises: 

279 ValueError: If more than one pkt-line is unread. 

280 """ 

281 if self._readahead is not None: 

282 raise ValueError("Attempted to unread multiple pkt-lines.") 

283 self._readahead = BytesIO(pkt_line(data)) 

284 

285 def read_pkt_seq(self): 

286 """Read a sequence of pkt-lines from the remote git process. 

287 

288 Returns: Yields each line of data up to but not including the next 

289 flush-pkt. 

290 """ 

291 pkt = self.read_pkt_line() 

292 while pkt: 

293 yield pkt 

294 pkt = self.read_pkt_line() 

295 

296 def write_pkt_line(self, line): 

297 """Sends a pkt-line to the remote git process. 

298 

299 Args: 

300 line: A string containing the data to send, without the length 

301 prefix. 

302 """ 

303 try: 

304 line = pkt_line(line) 

305 self.write(line) 

306 if self.report_activity: 

307 self.report_activity(len(line), "write") 

308 except OSError as exc: 

309 raise GitProtocolError(str(exc)) from exc 

310 

311 def write_sideband(self, channel, blob): 

312 """Write multiplexed data to the sideband. 

313 

314 Args: 

315 channel: An int specifying the channel to write to. 

316 blob: A blob of data (as a string) to send on this channel. 

317 """ 

318 # a pktline can be a max of 65520. a sideband line can therefore be 

319 # 65520-5 = 65515 

320 # WTF: Why have the len in ASCII, but the channel in binary. 

321 while blob: 

322 self.write_pkt_line(bytes(bytearray([channel])) + blob[:65515]) 

323 blob = blob[65515:] 

324 

325 def send_cmd(self, cmd, *args): 

326 """Send a command and some arguments to a git server. 

327 

328 Only used for the TCP git protocol (git://). 

329 

330 Args: 

331 cmd: The remote service to access. 

332 args: List of arguments to send to remove service. 

333 """ 

334 self.write_pkt_line(format_cmd_pkt(cmd, *args)) 

335 

336 def read_cmd(self): 

337 """Read a command and some arguments from the git client. 

338 

339 Only used for the TCP git protocol (git://). 

340 

341 Returns: A tuple of (command, [list of arguments]). 

342 """ 

343 line = self.read_pkt_line() 

344 return parse_cmd_pkt(line) 

345 

346 

347_RBUFSIZE = 8192 # Default read buffer size. 

348 

349 

350class ReceivableProtocol(Protocol): 

351 """Variant of Protocol that allows reading up to a size without blocking. 

352 

353 This class has a recv() method that behaves like socket.recv() in addition 

354 to a read() method. 

355 

356 If you want to read n bytes from the wire and block until exactly n bytes 

357 (or EOF) are read, use read(n). If you want to read at most n bytes from 

358 the wire but don't care if you get less, use recv(n). Note that recv(n) 

359 will still block until at least one byte is read. 

360 """ 

361 

362 def __init__( 

363 self, recv, write, close=None, report_activity=None, rbufsize=_RBUFSIZE 

364 ) -> None: 

365 super().__init__(self.read, write, close=close, report_activity=report_activity) 

366 self._recv = recv 

367 self._rbuf = BytesIO() 

368 self._rbufsize = rbufsize 

369 

370 def read(self, size): 

371 # From _fileobj.read in socket.py in the Python 2.6.5 standard library, 

372 # with the following modifications: 

373 # - omit the size <= 0 branch 

374 # - seek back to start rather than 0 in case some buffer has been 

375 # consumed. 

376 # - use SEEK_END instead of the magic number. 

377 # Copyright (c) 2001-2010 Python Software Foundation; All Rights 

378 # Reserved 

379 # Licensed under the Python Software Foundation License. 

380 # TODO: see if buffer is more efficient than cBytesIO. 

381 assert size > 0 

382 

383 # Our use of BytesIO rather than lists of string objects returned by 

384 # recv() minimizes memory usage and fragmentation that occurs when 

385 # rbufsize is large compared to the typical return value of recv(). 

386 buf = self._rbuf 

387 start = buf.tell() 

388 buf.seek(0, SEEK_END) 

389 # buffer may have been partially consumed by recv() 

390 buf_len = buf.tell() - start 

391 if buf_len >= size: 

392 # Already have size bytes in our buffer? Extract and return. 

393 buf.seek(start) 

394 rv = buf.read(size) 

395 self._rbuf = BytesIO() 

396 self._rbuf.write(buf.read()) 

397 self._rbuf.seek(0) 

398 return rv 

399 

400 self._rbuf = BytesIO() # reset _rbuf. we consume it via buf. 

401 while True: 

402 left = size - buf_len 

403 # recv() will malloc the amount of memory given as its 

404 # parameter even though it often returns much less data 

405 # than that. The returned data string is short lived 

406 # as we copy it into a BytesIO and free it. This avoids 

407 # fragmentation issues on many platforms. 

408 data = self._recv(left) 

409 if not data: 

410 break 

411 n = len(data) 

412 if n == size and not buf_len: 

413 # Shortcut. Avoid buffer data copies when: 

414 # - We have no data in our buffer. 

415 # AND 

416 # - Our call to recv returned exactly the 

417 # number of bytes we were asked to read. 

418 return data 

419 if n == left: 

420 buf.write(data) 

421 del data # explicit free 

422 break 

423 assert n <= left, "_recv(%d) returned %d bytes" % (left, n) 

424 buf.write(data) 

425 buf_len += n 

426 del data # explicit free 

427 # assert buf_len == buf.tell() 

428 buf.seek(start) 

429 return buf.read() 

430 

431 def recv(self, size): 

432 assert size > 0 

433 

434 buf = self._rbuf 

435 start = buf.tell() 

436 buf.seek(0, SEEK_END) 

437 buf_len = buf.tell() 

438 buf.seek(start) 

439 

440 left = buf_len - start 

441 if not left: 

442 # only read from the wire if our read buffer is exhausted 

443 data = self._recv(self._rbufsize) 

444 if len(data) == size: 

445 # shortcut: skip the buffer if we read exactly size bytes 

446 return data 

447 buf = BytesIO() 

448 buf.write(data) 

449 buf.seek(0) 

450 del data # explicit free 

451 self._rbuf = buf 

452 return buf.read(size) 

453 

454 

455def extract_capabilities(text): 

456 """Extract a capabilities list from a string, if present. 

457 

458 Args: 

459 text: String to extract from 

460 Returns: Tuple with text with capabilities removed and list of capabilities 

461 """ 

462 if b"\0" not in text: 

463 return text, [] 

464 text, capabilities = text.rstrip().split(b"\0") 

465 return (text, capabilities.strip().split(b" ")) 

466 

467 

468def extract_want_line_capabilities(text): 

469 """Extract a capabilities list from a want line, if present. 

470 

471 Note that want lines have capabilities separated from the rest of the line 

472 by a space instead of a null byte. Thus want lines have the form: 

473 

474 want obj-id cap1 cap2 ... 

475 

476 Args: 

477 text: Want line to extract from 

478 Returns: Tuple with text with capabilities removed and list of capabilities 

479 """ 

480 split_text = text.rstrip().split(b" ") 

481 if len(split_text) < 3: 

482 return text, [] 

483 return (b" ".join(split_text[:2]), split_text[2:]) 

484 

485 

486def ack_type(capabilities): 

487 """Extract the ack type from a capabilities list.""" 

488 if b"multi_ack_detailed" in capabilities: 

489 return MULTI_ACK_DETAILED 

490 elif b"multi_ack" in capabilities: 

491 return MULTI_ACK 

492 return SINGLE_ACK 

493 

494 

495class BufferedPktLineWriter: 

496 """Writer that wraps its data in pkt-lines and has an independent buffer. 

497 

498 Consecutive calls to write() wrap the data in a pkt-line and then buffers 

499 it until enough lines have been written such that their total length 

500 (including length prefix) reach the buffer size. 

501 """ 

502 

503 def __init__(self, write, bufsize=65515) -> None: 

504 """Initialize the BufferedPktLineWriter. 

505 

506 Args: 

507 write: A write callback for the underlying writer. 

508 bufsize: The internal buffer size, including length prefixes. 

509 """ 

510 self._write = write 

511 self._bufsize = bufsize 

512 self._wbuf = BytesIO() 

513 self._buflen = 0 

514 

515 def write(self, data): 

516 """Write data, wrapping it in a pkt-line.""" 

517 line = pkt_line(data) 

518 line_len = len(line) 

519 over = self._buflen + line_len - self._bufsize 

520 if over >= 0: 

521 start = line_len - over 

522 self._wbuf.write(line[:start]) 

523 self.flush() 

524 else: 

525 start = 0 

526 saved = line[start:] 

527 self._wbuf.write(saved) 

528 self._buflen += len(saved) 

529 

530 def flush(self): 

531 """Flush all data from the buffer.""" 

532 data = self._wbuf.getvalue() 

533 if data: 

534 self._write(data) 

535 self._len = 0 

536 self._wbuf = BytesIO() 

537 

538 

539class PktLineParser: 

540 """Packet line parser that hands completed packets off to a callback.""" 

541 

542 def __init__(self, handle_pkt) -> None: 

543 self.handle_pkt = handle_pkt 

544 self._readahead = BytesIO() 

545 

546 def parse(self, data): 

547 """Parse a fragment of data and call back for any completed packets.""" 

548 self._readahead.write(data) 

549 buf = self._readahead.getvalue() 

550 if len(buf) < 4: 

551 return 

552 while len(buf) >= 4: 

553 size = int(buf[:4], 16) 

554 if size == 0: 

555 self.handle_pkt(None) 

556 buf = buf[4:] 

557 elif size <= len(buf): 

558 self.handle_pkt(buf[4:size]) 

559 buf = buf[size:] 

560 else: 

561 break 

562 self._readahead = BytesIO() 

563 self._readahead.write(buf) 

564 

565 def get_tail(self): 

566 """Read back any unused data.""" 

567 return self._readahead.getvalue() 

568 

569 

570def format_capability_line(capabilities): 

571 return b"".join([b" " + c for c in capabilities]) 

572 

573 

574def format_ref_line(ref, sha, capabilities=None): 

575 if capabilities is None: 

576 return sha + b" " + ref + b"\n" 

577 else: 

578 return sha + b" " + ref + b"\0" + format_capability_line(capabilities) + b"\n" 

579 

580 

581def format_shallow_line(sha): 

582 return COMMAND_SHALLOW + b" " + sha 

583 

584 

585def format_unshallow_line(sha): 

586 return COMMAND_UNSHALLOW + b" " + sha 

587 

588 

589def format_ack_line(sha, ack_type=b""): 

590 if ack_type: 

591 ack_type = b" " + ack_type 

592 return b"ACK " + sha + ack_type + b"\n"