Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/dulwich/protocol.py: 34%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# protocol.py -- Shared parts of the git protocols
2# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
3# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@jelmer.uk>
4#
5# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
6# General Public License as public by the Free Software Foundation; version 2.0
7# or (at your option) any later version. You can redistribute it and/or
8# modify it under the terms of either of these two licenses.
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16# You should have received a copy of the licenses; if not, see
17# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
18# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
19# License, Version 2.0.
20#
22"""Generic functions for talking the git smart server protocol."""
24from io import BytesIO
25from os import SEEK_END
27import dulwich
29from .errors import GitProtocolError, HangupException
31TCP_GIT_PORT = 9418
33# Git protocol version 0 is the original Git protocol, which lacked a
34# version number until Git protocol version 1 was introduced by Brandon
35# Williams in 2017.
36#
37# Protocol version 1 is simply the original v0 protocol with the addition of
38# a single packet line, which precedes the ref advertisement, indicating the
39# protocol version being used. This was done in preparation for protocol v2.
40#
41# Git protocol version 2 was first introduced by Brandon Williams in 2018 and
42# adds many features. See the gitprotocol-v2(5) manual page for details.
43# As of 2024, Git only implements version 2 during 'git fetch' and still uses
44# version 0 during 'git push'.
45GIT_PROTOCOL_VERSIONS = [0, 1, 2]
46DEFAULT_GIT_PROTOCOL_VERSION_FETCH = 2
47DEFAULT_GIT_PROTOCOL_VERSION_SEND = 0
49ZERO_SHA = b"0" * 40
51SINGLE_ACK = 0
52MULTI_ACK = 1
53MULTI_ACK_DETAILED = 2
55# pack data
56SIDE_BAND_CHANNEL_DATA = 1
57# progress messages
58SIDE_BAND_CHANNEL_PROGRESS = 2
59# fatal error message just before stream aborts
60SIDE_BAND_CHANNEL_FATAL = 3
62CAPABILITY_ATOMIC = b"atomic"
63CAPABILITY_DEEPEN_SINCE = b"deepen-since"
64CAPABILITY_DEEPEN_NOT = b"deepen-not"
65CAPABILITY_DEEPEN_RELATIVE = b"deepen-relative"
66CAPABILITY_DELETE_REFS = b"delete-refs"
67CAPABILITY_INCLUDE_TAG = b"include-tag"
68CAPABILITY_MULTI_ACK = b"multi_ack"
69CAPABILITY_MULTI_ACK_DETAILED = b"multi_ack_detailed"
70CAPABILITY_NO_DONE = b"no-done"
71CAPABILITY_NO_PROGRESS = b"no-progress"
72CAPABILITY_OFS_DELTA = b"ofs-delta"
73CAPABILITY_QUIET = b"quiet"
74CAPABILITY_REPORT_STATUS = b"report-status"
75CAPABILITY_SHALLOW = b"shallow"
76CAPABILITY_SIDE_BAND = b"side-band"
77CAPABILITY_SIDE_BAND_64K = b"side-band-64k"
78CAPABILITY_THIN_PACK = b"thin-pack"
79CAPABILITY_AGENT = b"agent"
80CAPABILITY_SYMREF = b"symref"
81CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want"
82CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want"
83CAPABILITY_FETCH = b"fetch"
84CAPABILITY_FILTER = b"filter"
86# Magic ref that is used to attach capabilities to when
87# there are no refs. Should always be ste to ZERO_SHA.
88CAPABILITIES_REF = b"capabilities^{}"
90COMMON_CAPABILITIES = [
91 CAPABILITY_OFS_DELTA,
92 CAPABILITY_SIDE_BAND,
93 CAPABILITY_SIDE_BAND_64K,
94 CAPABILITY_AGENT,
95 CAPABILITY_NO_PROGRESS,
96]
97KNOWN_UPLOAD_CAPABILITIES = set(
98 [
99 *COMMON_CAPABILITIES,
100 CAPABILITY_THIN_PACK,
101 CAPABILITY_MULTI_ACK,
102 CAPABILITY_MULTI_ACK_DETAILED,
103 CAPABILITY_INCLUDE_TAG,
104 CAPABILITY_DEEPEN_SINCE,
105 CAPABILITY_SYMREF,
106 CAPABILITY_SHALLOW,
107 CAPABILITY_DEEPEN_NOT,
108 CAPABILITY_DEEPEN_RELATIVE,
109 CAPABILITY_ALLOW_TIP_SHA1_IN_WANT,
110 CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT,
111 CAPABILITY_FETCH,
112 ]
113)
114KNOWN_RECEIVE_CAPABILITIES = set(
115 [
116 *COMMON_CAPABILITIES,
117 CAPABILITY_REPORT_STATUS,
118 CAPABILITY_DELETE_REFS,
119 CAPABILITY_QUIET,
120 CAPABILITY_ATOMIC,
121 ]
122)
124DEPTH_INFINITE = 0x7FFFFFFF
126NAK_LINE = b"NAK\n"
129def agent_string():
130 return ("dulwich/" + ".".join(map(str, dulwich.__version__))).encode("ascii")
133def capability_agent():
134 return CAPABILITY_AGENT + b"=" + agent_string()
137def capability_symref(from_ref, to_ref):
138 return CAPABILITY_SYMREF + b"=" + from_ref + b":" + to_ref
141def extract_capability_names(capabilities):
142 return {parse_capability(c)[0] for c in capabilities}
145def parse_capability(capability):
146 parts = capability.split(b"=", 1)
147 if len(parts) == 1:
148 return (parts[0], None)
149 return tuple(parts)
152def symref_capabilities(symrefs):
153 return [capability_symref(*k) for k in symrefs]
156COMMAND_DEEPEN = b"deepen"
157COMMAND_SHALLOW = b"shallow"
158COMMAND_UNSHALLOW = b"unshallow"
159COMMAND_DONE = b"done"
160COMMAND_WANT = b"want"
161COMMAND_HAVE = b"have"
164def format_cmd_pkt(cmd, *args):
165 return cmd + b" " + b"".join([(a + b"\0") for a in args])
168def parse_cmd_pkt(line):
169 splice_at = line.find(b" ")
170 cmd, args = line[:splice_at], line[splice_at + 1 :]
171 assert args[-1:] == b"\x00"
172 return cmd, args[:-1].split(b"\0")
175def pkt_line(data):
176 """Wrap data in a pkt-line.
178 Args:
179 data: The data to wrap, as a str or None.
180 Returns: The data prefixed with its length in pkt-line format; if data was
181 None, returns the flush-pkt ('0000').
182 """
183 if data is None:
184 return b"0000"
185 return ("%04x" % (len(data) + 4)).encode("ascii") + data
188class Protocol:
189 """Class for interacting with a remote git process over the wire.
191 Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line
192 consists of the length of the line as a 4-byte hex string, followed by the
193 payload data. The length includes the 4-byte header. The special line
194 '0000' indicates the end of a section of input and is called a 'flush-pkt'.
196 For details on the pkt-line format, see the cgit distribution:
197 Documentation/technical/protocol-common.txt
198 """
200 def __init__(self, read, write, close=None, report_activity=None) -> None:
201 self.read = read
202 self.write = write
203 self._close = close
204 self.report_activity = report_activity
205 self._readahead = None
207 def close(self):
208 if self._close:
209 self._close()
211 def __enter__(self):
212 return self
214 def __exit__(self, exc_type, exc_val, exc_tb):
215 self.close()
217 def read_pkt_line(self):
218 """Reads a pkt-line from the remote git process.
220 This method may read from the readahead buffer; see unread_pkt_line.
222 Returns: The next string from the stream, without the length prefix, or
223 None for a flush-pkt ('0000') or delim-pkt ('0001').
224 """
225 if self._readahead is None:
226 read = self.read
227 else:
228 read = self._readahead.read
229 self._readahead = None
231 try:
232 sizestr = read(4)
233 if not sizestr:
234 raise HangupException
235 size = int(sizestr, 16)
236 if size == 0 or size == 1: # flush-pkt or delim-pkt
237 if self.report_activity:
238 self.report_activity(4, "read")
239 return None
240 if self.report_activity:
241 self.report_activity(size, "read")
242 pkt_contents = read(size - 4)
243 except ConnectionResetError as exc:
244 raise HangupException from exc
245 except OSError as exc:
246 raise GitProtocolError(str(exc)) from exc
247 else:
248 if len(pkt_contents) + 4 != size:
249 raise GitProtocolError(
250 f"Length of pkt read {len(pkt_contents) + 4:04x} does not match length prefix {size:04x}"
251 )
252 return pkt_contents
254 def eof(self):
255 """Test whether the protocol stream has reached EOF.
257 Note that this refers to the actual stream EOF and not just a
258 flush-pkt.
260 Returns: True if the stream is at EOF, False otherwise.
261 """
262 try:
263 next_line = self.read_pkt_line()
264 except HangupException:
265 return True
266 self.unread_pkt_line(next_line)
267 return False
269 def unread_pkt_line(self, data):
270 """Unread a single line of data into the readahead buffer.
272 This method can be used to unread a single pkt-line into a fixed
273 readahead buffer.
275 Args:
276 data: The data to unread, without the length prefix.
278 Raises:
279 ValueError: If more than one pkt-line is unread.
280 """
281 if self._readahead is not None:
282 raise ValueError("Attempted to unread multiple pkt-lines.")
283 self._readahead = BytesIO(pkt_line(data))
285 def read_pkt_seq(self):
286 """Read a sequence of pkt-lines from the remote git process.
288 Returns: Yields each line of data up to but not including the next
289 flush-pkt.
290 """
291 pkt = self.read_pkt_line()
292 while pkt:
293 yield pkt
294 pkt = self.read_pkt_line()
296 def write_pkt_line(self, line):
297 """Sends a pkt-line to the remote git process.
299 Args:
300 line: A string containing the data to send, without the length
301 prefix.
302 """
303 try:
304 line = pkt_line(line)
305 self.write(line)
306 if self.report_activity:
307 self.report_activity(len(line), "write")
308 except OSError as exc:
309 raise GitProtocolError(str(exc)) from exc
311 def write_sideband(self, channel, blob):
312 """Write multiplexed data to the sideband.
314 Args:
315 channel: An int specifying the channel to write to.
316 blob: A blob of data (as a string) to send on this channel.
317 """
318 # a pktline can be a max of 65520. a sideband line can therefore be
319 # 65520-5 = 65515
320 # WTF: Why have the len in ASCII, but the channel in binary.
321 while blob:
322 self.write_pkt_line(bytes(bytearray([channel])) + blob[:65515])
323 blob = blob[65515:]
325 def send_cmd(self, cmd, *args):
326 """Send a command and some arguments to a git server.
328 Only used for the TCP git protocol (git://).
330 Args:
331 cmd: The remote service to access.
332 args: List of arguments to send to remove service.
333 """
334 self.write_pkt_line(format_cmd_pkt(cmd, *args))
336 def read_cmd(self):
337 """Read a command and some arguments from the git client.
339 Only used for the TCP git protocol (git://).
341 Returns: A tuple of (command, [list of arguments]).
342 """
343 line = self.read_pkt_line()
344 return parse_cmd_pkt(line)
347_RBUFSIZE = 8192 # Default read buffer size.
350class ReceivableProtocol(Protocol):
351 """Variant of Protocol that allows reading up to a size without blocking.
353 This class has a recv() method that behaves like socket.recv() in addition
354 to a read() method.
356 If you want to read n bytes from the wire and block until exactly n bytes
357 (or EOF) are read, use read(n). If you want to read at most n bytes from
358 the wire but don't care if you get less, use recv(n). Note that recv(n)
359 will still block until at least one byte is read.
360 """
362 def __init__(
363 self, recv, write, close=None, report_activity=None, rbufsize=_RBUFSIZE
364 ) -> None:
365 super().__init__(self.read, write, close=close, report_activity=report_activity)
366 self._recv = recv
367 self._rbuf = BytesIO()
368 self._rbufsize = rbufsize
370 def read(self, size):
371 # From _fileobj.read in socket.py in the Python 2.6.5 standard library,
372 # with the following modifications:
373 # - omit the size <= 0 branch
374 # - seek back to start rather than 0 in case some buffer has been
375 # consumed.
376 # - use SEEK_END instead of the magic number.
377 # Copyright (c) 2001-2010 Python Software Foundation; All Rights
378 # Reserved
379 # Licensed under the Python Software Foundation License.
380 # TODO: see if buffer is more efficient than cBytesIO.
381 assert size > 0
383 # Our use of BytesIO rather than lists of string objects returned by
384 # recv() minimizes memory usage and fragmentation that occurs when
385 # rbufsize is large compared to the typical return value of recv().
386 buf = self._rbuf
387 start = buf.tell()
388 buf.seek(0, SEEK_END)
389 # buffer may have been partially consumed by recv()
390 buf_len = buf.tell() - start
391 if buf_len >= size:
392 # Already have size bytes in our buffer? Extract and return.
393 buf.seek(start)
394 rv = buf.read(size)
395 self._rbuf = BytesIO()
396 self._rbuf.write(buf.read())
397 self._rbuf.seek(0)
398 return rv
400 self._rbuf = BytesIO() # reset _rbuf. we consume it via buf.
401 while True:
402 left = size - buf_len
403 # recv() will malloc the amount of memory given as its
404 # parameter even though it often returns much less data
405 # than that. The returned data string is short lived
406 # as we copy it into a BytesIO and free it. This avoids
407 # fragmentation issues on many platforms.
408 data = self._recv(left)
409 if not data:
410 break
411 n = len(data)
412 if n == size and not buf_len:
413 # Shortcut. Avoid buffer data copies when:
414 # - We have no data in our buffer.
415 # AND
416 # - Our call to recv returned exactly the
417 # number of bytes we were asked to read.
418 return data
419 if n == left:
420 buf.write(data)
421 del data # explicit free
422 break
423 assert n <= left, "_recv(%d) returned %d bytes" % (left, n)
424 buf.write(data)
425 buf_len += n
426 del data # explicit free
427 # assert buf_len == buf.tell()
428 buf.seek(start)
429 return buf.read()
431 def recv(self, size):
432 assert size > 0
434 buf = self._rbuf
435 start = buf.tell()
436 buf.seek(0, SEEK_END)
437 buf_len = buf.tell()
438 buf.seek(start)
440 left = buf_len - start
441 if not left:
442 # only read from the wire if our read buffer is exhausted
443 data = self._recv(self._rbufsize)
444 if len(data) == size:
445 # shortcut: skip the buffer if we read exactly size bytes
446 return data
447 buf = BytesIO()
448 buf.write(data)
449 buf.seek(0)
450 del data # explicit free
451 self._rbuf = buf
452 return buf.read(size)
455def extract_capabilities(text):
456 """Extract a capabilities list from a string, if present.
458 Args:
459 text: String to extract from
460 Returns: Tuple with text with capabilities removed and list of capabilities
461 """
462 if b"\0" not in text:
463 return text, []
464 text, capabilities = text.rstrip().split(b"\0")
465 return (text, capabilities.strip().split(b" "))
468def extract_want_line_capabilities(text):
469 """Extract a capabilities list from a want line, if present.
471 Note that want lines have capabilities separated from the rest of the line
472 by a space instead of a null byte. Thus want lines have the form:
474 want obj-id cap1 cap2 ...
476 Args:
477 text: Want line to extract from
478 Returns: Tuple with text with capabilities removed and list of capabilities
479 """
480 split_text = text.rstrip().split(b" ")
481 if len(split_text) < 3:
482 return text, []
483 return (b" ".join(split_text[:2]), split_text[2:])
486def ack_type(capabilities):
487 """Extract the ack type from a capabilities list."""
488 if b"multi_ack_detailed" in capabilities:
489 return MULTI_ACK_DETAILED
490 elif b"multi_ack" in capabilities:
491 return MULTI_ACK
492 return SINGLE_ACK
495class BufferedPktLineWriter:
496 """Writer that wraps its data in pkt-lines and has an independent buffer.
498 Consecutive calls to write() wrap the data in a pkt-line and then buffers
499 it until enough lines have been written such that their total length
500 (including length prefix) reach the buffer size.
501 """
503 def __init__(self, write, bufsize=65515) -> None:
504 """Initialize the BufferedPktLineWriter.
506 Args:
507 write: A write callback for the underlying writer.
508 bufsize: The internal buffer size, including length prefixes.
509 """
510 self._write = write
511 self._bufsize = bufsize
512 self._wbuf = BytesIO()
513 self._buflen = 0
515 def write(self, data):
516 """Write data, wrapping it in a pkt-line."""
517 line = pkt_line(data)
518 line_len = len(line)
519 over = self._buflen + line_len - self._bufsize
520 if over >= 0:
521 start = line_len - over
522 self._wbuf.write(line[:start])
523 self.flush()
524 else:
525 start = 0
526 saved = line[start:]
527 self._wbuf.write(saved)
528 self._buflen += len(saved)
530 def flush(self):
531 """Flush all data from the buffer."""
532 data = self._wbuf.getvalue()
533 if data:
534 self._write(data)
535 self._len = 0
536 self._wbuf = BytesIO()
539class PktLineParser:
540 """Packet line parser that hands completed packets off to a callback."""
542 def __init__(self, handle_pkt) -> None:
543 self.handle_pkt = handle_pkt
544 self._readahead = BytesIO()
546 def parse(self, data):
547 """Parse a fragment of data and call back for any completed packets."""
548 self._readahead.write(data)
549 buf = self._readahead.getvalue()
550 if len(buf) < 4:
551 return
552 while len(buf) >= 4:
553 size = int(buf[:4], 16)
554 if size == 0:
555 self.handle_pkt(None)
556 buf = buf[4:]
557 elif size <= len(buf):
558 self.handle_pkt(buf[4:size])
559 buf = buf[size:]
560 else:
561 break
562 self._readahead = BytesIO()
563 self._readahead.write(buf)
565 def get_tail(self):
566 """Read back any unused data."""
567 return self._readahead.getvalue()
570def format_capability_line(capabilities):
571 return b"".join([b" " + c for c in capabilities])
574def format_ref_line(ref, sha, capabilities=None):
575 if capabilities is None:
576 return sha + b" " + ref + b"\n"
577 else:
578 return sha + b" " + ref + b"\0" + format_capability_line(capabilities) + b"\n"
581def format_shallow_line(sha):
582 return COMMAND_SHALLOW + b" " + sha
585def format_unshallow_line(sha):
586 return COMMAND_UNSHALLOW + b" " + sha
589def format_ack_line(sha, ack_type=b""):
590 if ack_type:
591 ack_type = b" " + ack_type
592 return b"ACK " + sha + ack_type + b"\n"