Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/protocol.py: 32%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# protocol.py -- Shared parts of the git protocols
2# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
3# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@jelmer.uk>
4#
5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
7# General Public License as published by the Free Software Foundation; version 2.0
8# or (at your option) any later version. You can redistribute it and/or
9# modify it under the terms of either of these two licenses.
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# You should have received a copy of the licenses; if not, see
18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
20# License, Version 2.0.
21#
23"""Generic functions for talking the git smart server protocol."""
25__all__ = [
26 "CAPABILITIES_REF",
27 "CAPABILITY_AGENT",
28 "CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT",
29 "CAPABILITY_ALLOW_TIP_SHA1_IN_WANT",
30 "CAPABILITY_ATOMIC",
31 "CAPABILITY_DEEPEN_NOT",
32 "CAPABILITY_DEEPEN_RELATIVE",
33 "CAPABILITY_DEEPEN_SINCE",
34 "CAPABILITY_DELETE_REFS",
35 "CAPABILITY_FETCH",
36 "CAPABILITY_FILTER",
37 "CAPABILITY_INCLUDE_TAG",
38 "CAPABILITY_MULTI_ACK",
39 "CAPABILITY_MULTI_ACK_DETAILED",
40 "CAPABILITY_NO_DONE",
41 "CAPABILITY_NO_PROGRESS",
42 "CAPABILITY_OFS_DELTA",
43 "CAPABILITY_QUIET",
44 "CAPABILITY_REPORT_STATUS",
45 "CAPABILITY_SHALLOW",
46 "CAPABILITY_SIDE_BAND",
47 "CAPABILITY_SIDE_BAND_64K",
48 "CAPABILITY_SYMREF",
49 "CAPABILITY_THIN_PACK",
50 "COMMAND_DEEPEN",
51 "COMMAND_DEEPEN_NOT",
52 "COMMAND_DEEPEN_SINCE",
53 "COMMAND_DONE",
54 "COMMAND_HAVE",
55 "COMMAND_SHALLOW",
56 "COMMAND_UNSHALLOW",
57 "COMMAND_WANT",
58 "COMMON_CAPABILITIES",
59 "DEFAULT_GIT_PROTOCOL_VERSION_FETCH",
60 "DEFAULT_GIT_PROTOCOL_VERSION_SEND",
61 "DEPTH_INFINITE",
62 "GIT_PROTOCOL_VERSIONS",
63 "KNOWN_RECEIVE_CAPABILITIES",
64 "KNOWN_UPLOAD_CAPABILITIES",
65 "MULTI_ACK",
66 "MULTI_ACK_DETAILED",
67 "NAK_LINE",
68 "PEELED_TAG_SUFFIX",
69 "SIDE_BAND_CHANNEL_DATA",
70 "SIDE_BAND_CHANNEL_FATAL",
71 "SIDE_BAND_CHANNEL_PROGRESS",
72 "SINGLE_ACK",
73 "TCP_GIT_PORT",
74 "BufferedPktLineWriter",
75 "PktLineParser",
76 "Protocol",
77 "ReceivableProtocol",
78 "ack_type",
79 "agent_string",
80 "capability_agent",
81 "capability_symref",
82 "extract_capabilities",
83 "extract_capability_names",
84 "extract_want_line_capabilities",
85 "format_ack_line",
86 "format_capability_line",
87 "format_cmd_pkt",
88 "format_ref_line",
89 "format_shallow_line",
90 "format_unshallow_line",
91 "parse_capability",
92 "parse_cmd_pkt",
93 "pkt_line",
94 "pkt_seq",
95 "serialize_refs",
96 "split_peeled_refs",
97 "strip_peeled_refs",
98 "symref_capabilities",
99 "write_info_refs",
100]
102import types
103from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
104from io import BytesIO
105from os import SEEK_END
106from typing import TYPE_CHECKING
108import dulwich
110from .errors import GitProtocolError, HangupException
111from .objects import ObjectID
113if TYPE_CHECKING:
114 from .pack import ObjectContainer
115 from .refs import Ref
117TCP_GIT_PORT = 9418
119# Git protocol version 0 is the original Git protocol, which lacked a
120# version number until Git protocol version 1 was introduced by Brandon
121# Williams in 2017.
122#
123# Protocol version 1 is simply the original v0 protocol with the addition of
124# a single packet line, which precedes the ref advertisement, indicating the
125# protocol version being used. This was done in preparation for protocol v2.
126#
127# Git protocol version 2 was first introduced by Brandon Williams in 2018 and
128# adds many features. See the gitprotocol-v2(5) manual page for details.
129# As of 2024, Git only implements version 2 during 'git fetch' and still uses
130# version 0 during 'git push'.
131GIT_PROTOCOL_VERSIONS = [0, 1, 2]
132DEFAULT_GIT_PROTOCOL_VERSION_FETCH = 2
133DEFAULT_GIT_PROTOCOL_VERSION_SEND = 0
135# Suffix used in the Git protocol to indicate peeled tag references
136PEELED_TAG_SUFFIX = b"^{}"
138ZERO_SHA: ObjectID = ObjectID(b"0" * 40)
140SINGLE_ACK = 0
141MULTI_ACK = 1
142MULTI_ACK_DETAILED = 2
144# pack data
145SIDE_BAND_CHANNEL_DATA = 1
146# progress messages
147SIDE_BAND_CHANNEL_PROGRESS = 2
148# fatal error message just before stream aborts
149SIDE_BAND_CHANNEL_FATAL = 3
151CAPABILITY_ATOMIC = b"atomic"
152CAPABILITY_DEEPEN_SINCE = b"deepen-since"
153CAPABILITY_DEEPEN_NOT = b"deepen-not"
154CAPABILITY_DEEPEN_RELATIVE = b"deepen-relative"
155CAPABILITY_DELETE_REFS = b"delete-refs"
156CAPABILITY_INCLUDE_TAG = b"include-tag"
157CAPABILITY_MULTI_ACK = b"multi_ack"
158CAPABILITY_MULTI_ACK_DETAILED = b"multi_ack_detailed"
159CAPABILITY_NO_DONE = b"no-done"
160CAPABILITY_NO_PROGRESS = b"no-progress"
161CAPABILITY_OFS_DELTA = b"ofs-delta"
162CAPABILITY_QUIET = b"quiet"
163CAPABILITY_REPORT_STATUS = b"report-status"
164CAPABILITY_SHALLOW = b"shallow"
165CAPABILITY_SIDE_BAND = b"side-band"
166CAPABILITY_SIDE_BAND_64K = b"side-band-64k"
167CAPABILITY_THIN_PACK = b"thin-pack"
168CAPABILITY_AGENT = b"agent"
169CAPABILITY_SYMREF = b"symref"
170CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want"
171CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want"
172CAPABILITY_FETCH = b"fetch"
173CAPABILITY_FILTER = b"filter"
175# Magic ref that is used to attach capabilities to when
176# there are no refs. Should always be ste to ZERO_SHA.
177CAPABILITIES_REF = b"capabilities^{}"
179COMMON_CAPABILITIES = [
180 CAPABILITY_OFS_DELTA,
181 CAPABILITY_SIDE_BAND,
182 CAPABILITY_SIDE_BAND_64K,
183 CAPABILITY_AGENT,
184 CAPABILITY_NO_PROGRESS,
185]
186KNOWN_UPLOAD_CAPABILITIES = set(
187 [
188 *COMMON_CAPABILITIES,
189 CAPABILITY_THIN_PACK,
190 CAPABILITY_MULTI_ACK,
191 CAPABILITY_MULTI_ACK_DETAILED,
192 CAPABILITY_INCLUDE_TAG,
193 CAPABILITY_DEEPEN_SINCE,
194 CAPABILITY_SYMREF,
195 CAPABILITY_SHALLOW,
196 CAPABILITY_DEEPEN_NOT,
197 CAPABILITY_DEEPEN_RELATIVE,
198 CAPABILITY_ALLOW_TIP_SHA1_IN_WANT,
199 CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT,
200 CAPABILITY_FETCH,
201 ]
202)
203KNOWN_RECEIVE_CAPABILITIES = set(
204 [
205 *COMMON_CAPABILITIES,
206 CAPABILITY_REPORT_STATUS,
207 CAPABILITY_DELETE_REFS,
208 CAPABILITY_QUIET,
209 CAPABILITY_ATOMIC,
210 ]
211)
213DEPTH_INFINITE = 0x7FFFFFFF
215NAK_LINE = b"NAK\n"
218def agent_string() -> bytes:
219 """Generate the agent string for dulwich.
221 Returns:
222 Agent string as bytes
223 """
224 return ("dulwich/" + ".".join(map(str, dulwich.__version__))).encode("ascii")
227def capability_agent() -> bytes:
228 """Generate the agent capability string.
230 Returns:
231 Agent capability with dulwich version
232 """
233 return CAPABILITY_AGENT + b"=" + agent_string()
236def capability_symref(from_ref: bytes, to_ref: bytes) -> bytes:
237 """Generate a symref capability string.
239 Args:
240 from_ref: Source reference name
241 to_ref: Target reference name
243 Returns:
244 Symref capability string
245 """
246 return CAPABILITY_SYMREF + b"=" + from_ref + b":" + to_ref
249def extract_capability_names(capabilities: Iterable[bytes]) -> set[bytes]:
250 """Extract capability names from a list of capabilities.
252 Args:
253 capabilities: List of capability strings
255 Returns:
256 Set of capability names
257 """
258 return {parse_capability(c)[0] for c in capabilities}
261def parse_capability(capability: bytes) -> tuple[bytes, bytes | None]:
262 """Parse a capability string into name and value.
264 Args:
265 capability: Capability string
267 Returns:
268 Tuple of (capability_name, capability_value)
269 """
270 parts = capability.split(b"=", 1)
271 if len(parts) == 1:
272 return (parts[0], None)
273 return (parts[0], parts[1])
276def symref_capabilities(symrefs: Iterable[tuple[bytes, bytes]]) -> list[bytes]:
277 """Generate symref capability strings from symref pairs.
279 Args:
280 symrefs: Iterable of (from_ref, to_ref) tuples
282 Returns:
283 List of symref capability strings
284 """
285 return [capability_symref(*k) for k in symrefs]
288COMMAND_DEEPEN = b"deepen"
289COMMAND_DEEPEN_SINCE = b"deepen-since"
290COMMAND_DEEPEN_NOT = b"deepen-not"
291COMMAND_SHALLOW = b"shallow"
292COMMAND_UNSHALLOW = b"unshallow"
293COMMAND_DONE = b"done"
294COMMAND_WANT = b"want"
295COMMAND_HAVE = b"have"
298def format_cmd_pkt(cmd: bytes, *args: bytes) -> bytes:
299 """Format a command packet.
301 Args:
302 cmd: Command name
303 *args: Command arguments
305 Returns:
306 Formatted command packet
307 """
308 return cmd + b" " + b"".join([(a + b"\0") for a in args])
311def parse_cmd_pkt(line: bytes) -> tuple[bytes, list[bytes]]:
312 """Parse a command packet.
314 Args:
315 line: Command line to parse
317 Returns:
318 Tuple of (command, [arguments])
319 """
320 splice_at = line.find(b" ")
321 cmd, args = line[:splice_at], line[splice_at + 1 :]
322 assert args[-1:] == b"\x00"
323 return cmd, args[:-1].split(b"\0")
326def pkt_line(data: bytes | None) -> bytes:
327 """Wrap data in a pkt-line.
329 Args:
330 data: The data to wrap, as a str or None.
331 Returns: The data prefixed with its length in pkt-line format; if data was
332 None, returns the flush-pkt ('0000').
333 """
334 if data is None:
335 return b"0000"
336 return f"{len(data) + 4:04x}".encode("ascii") + data
339def pkt_seq(*seq: bytes | None) -> bytes:
340 """Wrap a sequence of data in pkt-lines.
342 Args:
343 seq: An iterable of strings to wrap.
344 """
345 return b"".join([pkt_line(s) for s in seq]) + pkt_line(None)
348class Protocol:
349 """Class for interacting with a remote git process over the wire.
351 Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line
352 consists of the length of the line as a 4-byte hex string, followed by the
353 payload data. The length includes the 4-byte header. The special line
354 '0000' indicates the end of a section of input and is called a 'flush-pkt'.
356 For details on the pkt-line format, see the cgit distribution:
357 Documentation/technical/protocol-common.txt
358 """
360 def __init__(
361 self,
362 read: Callable[[int], bytes],
363 write: Callable[[bytes], int | None],
364 close: Callable[[], None] | None = None,
365 report_activity: Callable[[int, str], None] | None = None,
366 ) -> None:
367 """Initialize Protocol.
369 Args:
370 read: Function to read bytes from the transport
371 write: Function to write bytes to the transport
372 close: Optional function to close the transport
373 report_activity: Optional function to report activity
374 """
375 self.read = read
376 self.write = write
377 self._close = close
378 self.report_activity = report_activity
379 self._readahead: BytesIO | None = None
381 def close(self) -> None:
382 """Close the underlying transport if a close function was provided."""
383 if self._close:
384 self._close()
386 def __enter__(self) -> "Protocol":
387 """Enter context manager."""
388 return self
390 def __exit__(
391 self,
392 exc_type: type[BaseException] | None,
393 exc_val: BaseException | None,
394 exc_tb: types.TracebackType | None,
395 ) -> None:
396 """Exit context manager and close transport."""
397 self.close()
399 def read_pkt_line(self) -> bytes | None:
400 """Reads a pkt-line from the remote git process.
402 This method may read from the readahead buffer; see unread_pkt_line.
404 Returns: The next string from the stream, without the length prefix, or
405 None for a flush-pkt ('0000') or delim-pkt ('0001').
406 """
407 if self._readahead is None:
408 read = self.read
409 else:
410 read = self._readahead.read
411 self._readahead = None
413 try:
414 sizestr = read(4)
415 if not sizestr:
416 raise HangupException
417 size = int(sizestr, 16)
418 if size == 0 or size == 1: # flush-pkt or delim-pkt
419 if self.report_activity:
420 self.report_activity(4, "read")
421 return None
422 if self.report_activity:
423 self.report_activity(size, "read")
424 pkt_contents = read(size - 4)
425 except ConnectionResetError as exc:
426 raise HangupException from exc
427 except OSError as exc:
428 raise GitProtocolError(str(exc)) from exc
429 else:
430 if len(pkt_contents) + 4 != size:
431 raise GitProtocolError(
432 f"Length of pkt read {len(pkt_contents) + 4:04x} does not match length prefix {size:04x}"
433 )
434 return pkt_contents
436 def eof(self) -> bool:
437 """Test whether the protocol stream has reached EOF.
439 Note that this refers to the actual stream EOF and not just a
440 flush-pkt.
442 Returns: True if the stream is at EOF, False otherwise.
443 """
444 try:
445 next_line = self.read_pkt_line()
446 except HangupException:
447 return True
448 self.unread_pkt_line(next_line)
449 return False
451 def unread_pkt_line(self, data: bytes | None) -> None:
452 """Unread a single line of data into the readahead buffer.
454 This method can be used to unread a single pkt-line into a fixed
455 readahead buffer.
457 Args:
458 data: The data to unread, without the length prefix.
460 Raises:
461 ValueError: If more than one pkt-line is unread.
462 """
463 if self._readahead is not None:
464 raise ValueError("Attempted to unread multiple pkt-lines.")
465 self._readahead = BytesIO(pkt_line(data))
467 def read_pkt_seq(self) -> Iterable[bytes]:
468 """Read a sequence of pkt-lines from the remote git process.
470 Returns: Yields each line of data up to but not including the next
471 flush-pkt.
472 """
473 pkt = self.read_pkt_line()
474 while pkt:
475 yield pkt
476 pkt = self.read_pkt_line()
478 def write_pkt_line(self, line: bytes | None) -> None:
479 """Sends a pkt-line to the remote git process.
481 Args:
482 line: A string containing the data to send, without the length
483 prefix.
484 """
485 try:
486 line = pkt_line(line)
487 self.write(line)
488 if self.report_activity:
489 self.report_activity(len(line), "write")
490 except OSError as exc:
491 raise GitProtocolError(str(exc)) from exc
493 def write_sideband(self, channel: int, blob: bytes) -> None:
494 """Write multiplexed data to the sideband.
496 Args:
497 channel: An int specifying the channel to write to.
498 blob: A blob of data (as a string) to send on this channel.
499 """
500 # a pktline can be a max of 65520. a sideband line can therefore be
501 # 65520-5 = 65515
502 # WTF: Why have the len in ASCII, but the channel in binary.
503 while blob:
504 self.write_pkt_line(bytes(bytearray([channel])) + blob[:65515])
505 blob = blob[65515:]
507 def send_cmd(self, cmd: bytes, *args: bytes) -> None:
508 """Send a command and some arguments to a git server.
510 Only used for the TCP git protocol (git://).
512 Args:
513 cmd: The remote service to access.
514 args: List of arguments to send to remove service.
515 """
516 self.write_pkt_line(format_cmd_pkt(cmd, *args))
518 def read_cmd(self) -> tuple[bytes, list[bytes]]:
519 """Read a command and some arguments from the git client.
521 Only used for the TCP git protocol (git://).
523 Returns: A tuple of (command, [list of arguments]).
524 """
525 line = self.read_pkt_line()
526 if line is None:
527 raise GitProtocolError("Expected command, got flush packet")
528 return parse_cmd_pkt(line)
531_RBUFSIZE = 65536 # 64KB buffer for better network I/O performance
534class ReceivableProtocol(Protocol):
535 """Variant of Protocol that allows reading up to a size without blocking.
537 This class has a recv() method that behaves like socket.recv() in addition
538 to a read() method.
540 If you want to read n bytes from the wire and block until exactly n bytes
541 (or EOF) are read, use read(n). If you want to read at most n bytes from
542 the wire but don't care if you get less, use recv(n). Note that recv(n)
543 will still block until at least one byte is read.
544 """
546 def __init__(
547 self,
548 recv: Callable[[int], bytes],
549 write: Callable[[bytes], int | None],
550 close: Callable[[], None] | None = None,
551 report_activity: Callable[[int, str], None] | None = None,
552 rbufsize: int = _RBUFSIZE,
553 ) -> None:
554 """Initialize ReceivableProtocol.
556 Args:
557 recv: Function to receive bytes from the transport
558 write: Function to write bytes to the transport
559 close: Optional function to close the transport
560 report_activity: Optional function to report activity
561 rbufsize: Read buffer size
562 """
563 super().__init__(self.read, write, close=close, report_activity=report_activity)
564 self._recv = recv
565 self._rbuf = BytesIO()
566 self._rbufsize = rbufsize
568 def read(self, size: int) -> bytes:
569 """Read bytes from the socket.
571 Args:
572 size: Number of bytes to read
574 Returns:
575 Bytes read from socket
576 """
577 # From _fileobj.read in socket.py in the Python 2.6.5 standard library,
578 # with the following modifications:
579 # - omit the size <= 0 branch
580 # - seek back to start rather than 0 in case some buffer has been
581 # consumed.
582 # - use SEEK_END instead of the magic number.
583 # Copyright (c) 2001-2010 Python Software Foundation; All Rights
584 # Reserved
585 # Licensed under the Python Software Foundation License.
586 # TODO: see if buffer is more efficient than cBytesIO.
587 assert size > 0
589 # Our use of BytesIO rather than lists of string objects returned by
590 # recv() minimizes memory usage and fragmentation that occurs when
591 # rbufsize is large compared to the typical return value of recv().
592 buf = self._rbuf
593 start = buf.tell()
594 buf.seek(0, SEEK_END)
595 # buffer may have been partially consumed by recv()
596 buf_len = buf.tell() - start
597 if buf_len >= size:
598 # Already have size bytes in our buffer? Extract and return.
599 buf.seek(start)
600 rv = buf.read(size)
601 self._rbuf = BytesIO()
602 self._rbuf.write(buf.read())
603 self._rbuf.seek(0)
604 return rv
606 self._rbuf = BytesIO() # reset _rbuf. we consume it via buf.
607 while True:
608 left = size - buf_len
609 # recv() will malloc the amount of memory given as its
610 # parameter even though it often returns much less data
611 # than that. The returned data string is short lived
612 # as we copy it into a BytesIO and free it. This avoids
613 # fragmentation issues on many platforms.
614 data = self._recv(left)
615 if not data:
616 break
617 n = len(data)
618 if n == size and not buf_len:
619 # Shortcut. Avoid buffer data copies when:
620 # - We have no data in our buffer.
621 # AND
622 # - Our call to recv returned exactly the
623 # number of bytes we were asked to read.
624 return data
625 if n == left:
626 buf.write(data)
627 del data # explicit free
628 break
629 assert n <= left, f"_recv({left}) returned {n} bytes"
630 buf.write(data)
631 buf_len += n
632 del data # explicit free
633 # assert buf_len == buf.tell()
634 buf.seek(start)
635 return buf.read()
637 def recv(self, size: int) -> bytes:
638 """Receive bytes from the socket with buffering.
640 Args:
641 size: Maximum number of bytes to receive
643 Returns:
644 Bytes received from socket
645 """
646 assert size > 0
648 buf = self._rbuf
649 start = buf.tell()
650 buf.seek(0, SEEK_END)
651 buf_len = buf.tell()
652 buf.seek(start)
654 left = buf_len - start
655 if not left:
656 # only read from the wire if our read buffer is exhausted
657 data = self._recv(self._rbufsize)
658 if len(data) == size:
659 # shortcut: skip the buffer if we read exactly size bytes
660 return data
661 buf = BytesIO()
662 buf.write(data)
663 buf.seek(0)
664 del data # explicit free
665 self._rbuf = buf
666 return buf.read(size)
669def extract_capabilities(text: bytes) -> tuple[bytes, list[bytes]]:
670 """Extract a capabilities list from a string, if present.
672 Args:
673 text: String to extract from
674 Returns: Tuple with text with capabilities removed and list of capabilities
675 """
676 if b"\0" not in text:
677 return text, []
678 text, capabilities = text.rstrip().split(b"\0")
679 return (text, capabilities.strip().split(b" "))
682def extract_want_line_capabilities(text: bytes) -> tuple[bytes, list[bytes]]:
683 """Extract a capabilities list from a want line, if present.
685 Note that want lines have capabilities separated from the rest of the line
686 by a space instead of a null byte. Thus want lines have the form:
688 want obj-id cap1 cap2 ...
690 Args:
691 text: Want line to extract from
692 Returns: Tuple with text with capabilities removed and list of capabilities
693 """
694 split_text = text.rstrip().split(b" ")
695 if len(split_text) < 3:
696 return text, []
697 return (b" ".join(split_text[:2]), split_text[2:])
700def ack_type(capabilities: Iterable[bytes]) -> int:
701 """Extract the ack type from a capabilities list."""
702 if b"multi_ack_detailed" in capabilities:
703 return MULTI_ACK_DETAILED
704 elif b"multi_ack" in capabilities:
705 return MULTI_ACK
706 return SINGLE_ACK
709class BufferedPktLineWriter:
710 """Writer that wraps its data in pkt-lines and has an independent buffer.
712 Consecutive calls to write() wrap the data in a pkt-line and then buffers
713 it until enough lines have been written such that their total length
714 (including length prefix) reach the buffer size.
715 """
717 def __init__(
718 self, write: Callable[[bytes], int | None], bufsize: int = 65515
719 ) -> None:
720 """Initialize the BufferedPktLineWriter.
722 Args:
723 write: A write callback for the underlying writer.
724 bufsize: The internal buffer size, including length prefixes.
725 """
726 self._write = write
727 self._bufsize = bufsize
728 self._wbuf = BytesIO()
729 self._buflen = 0
731 def write(self, data: bytes) -> None:
732 """Write data, wrapping it in a pkt-line."""
733 line = pkt_line(data)
734 line_len = len(line)
735 over = self._buflen + line_len - self._bufsize
736 if over >= 0:
737 start = line_len - over
738 self._wbuf.write(line[:start])
739 self.flush()
740 else:
741 start = 0
742 saved = line[start:]
743 self._wbuf.write(saved)
744 self._buflen += len(saved)
746 def flush(self) -> None:
747 """Flush all data from the buffer."""
748 data = self._wbuf.getvalue()
749 if data:
750 self._write(data)
751 self._len = 0
752 self._wbuf = BytesIO()
755class PktLineParser:
756 """Packet line parser that hands completed packets off to a callback."""
758 def __init__(self, handle_pkt: Callable[[bytes | None], None]) -> None:
759 """Initialize PktLineParser.
761 Args:
762 handle_pkt: Callback function to handle completed packets
763 """
764 self.handle_pkt = handle_pkt
765 self._readahead = BytesIO()
767 def parse(self, data: bytes) -> None:
768 """Parse a fragment of data and call back for any completed packets."""
769 self._readahead.write(data)
770 buf = self._readahead.getvalue()
771 if len(buf) < 4:
772 return
773 while len(buf) >= 4:
774 size = int(buf[:4], 16)
775 if size == 0:
776 self.handle_pkt(None)
777 buf = buf[4:]
778 elif size <= len(buf):
779 self.handle_pkt(buf[4:size])
780 buf = buf[size:]
781 else:
782 break
783 self._readahead = BytesIO()
784 self._readahead.write(buf)
786 def get_tail(self) -> bytes:
787 """Read back any unused data."""
788 return self._readahead.getvalue()
791def format_capability_line(capabilities: Iterable[bytes]) -> bytes:
792 """Format a capabilities list for the wire protocol.
794 Args:
795 capabilities: List of capability strings
797 Returns:
798 Space-separated capabilities as bytes
799 """
800 return b"".join([b" " + c for c in capabilities])
803def format_ref_line(
804 ref: bytes, sha: bytes, capabilities: Sequence[bytes] | None = None
805) -> bytes:
806 """Format a ref advertisement line.
808 Args:
809 ref: Reference name
810 sha: SHA hash
811 capabilities: Optional list of capabilities
813 Returns:
814 Formatted ref line
815 """
816 if capabilities is None:
817 return sha + b" " + ref + b"\n"
818 else:
819 return sha + b" " + ref + b"\0" + format_capability_line(capabilities) + b"\n"
822def format_shallow_line(sha: bytes) -> bytes:
823 """Format a shallow line.
825 Args:
826 sha: SHA to mark as shallow
828 Returns:
829 Formatted shallow line
830 """
831 return COMMAND_SHALLOW + b" " + sha
834def format_unshallow_line(sha: bytes) -> bytes:
835 """Format an unshallow line.
837 Args:
838 sha: SHA to unshallow
840 Returns:
841 Formatted unshallow line
842 """
843 return COMMAND_UNSHALLOW + b" " + sha
846def format_ack_line(sha: bytes, ack_type: bytes = b"") -> bytes:
847 """Format an ACK line.
849 Args:
850 sha: SHA to acknowledge
851 ack_type: Optional ACK type (e.g. b"continue")
853 Returns:
854 Formatted ACK line
855 """
856 if ack_type:
857 ack_type = b" " + ack_type
858 return b"ACK " + sha + ack_type + b"\n"
861def strip_peeled_refs(
862 refs: "Mapping[Ref, ObjectID | None]",
863) -> "dict[Ref, ObjectID | None]":
864 """Remove all peeled refs from a refs dictionary.
866 Args:
867 refs: Dictionary of refs (may include peeled refs with ^{} suffix)
869 Returns:
870 Dictionary with peeled refs removed
871 """
872 return {
873 ref: sha for (ref, sha) in refs.items() if not ref.endswith(PEELED_TAG_SUFFIX)
874 }
877def split_peeled_refs(
878 refs: "Mapping[Ref, ObjectID]",
879) -> "tuple[dict[Ref, ObjectID], dict[Ref, ObjectID]]":
880 """Split peeled refs from regular refs.
882 Args:
883 refs: Dictionary of refs (may include peeled refs with ^{} suffix)
885 Returns:
886 Tuple of (regular_refs, peeled_refs) where peeled_refs keys have
887 the ^{} suffix removed
888 """
889 from .refs import Ref
891 peeled: dict[Ref, ObjectID] = {}
892 regular = {k: v for k, v in refs.items() if not k.endswith(PEELED_TAG_SUFFIX)}
894 for ref, sha in refs.items():
895 if ref.endswith(PEELED_TAG_SUFFIX):
896 # Peeled refs are always ObjectID values
897 peeled[Ref(ref[: -len(PEELED_TAG_SUFFIX)])] = sha
899 return regular, peeled
902def write_info_refs(
903 refs: "Mapping[Ref, ObjectID]", store: "ObjectContainer"
904) -> "Iterator[bytes]":
905 """Generate info refs in the format used by the dumb HTTP protocol.
907 Args:
908 refs: Dictionary of refs
909 store: Object store to peel tags from
911 Yields:
912 Lines in info/refs format (sha + tab + refname)
913 """
914 from .object_store import peel_sha
915 from .refs import HEADREF
917 for name, sha in sorted(refs.items()):
918 # get_refs() includes HEAD as a special case, but we don't want to
919 # advertise it
920 if name == HEADREF:
921 continue
922 try:
923 o = store[sha]
924 except KeyError:
925 continue
926 _unpeeled, peeled = peel_sha(store, sha)
927 yield o.id + b"\t" + name + b"\n"
928 if o.id != peeled.id:
929 yield peeled.id + b"\t" + name + PEELED_TAG_SUFFIX + b"\n"
932def serialize_refs(
933 store: "ObjectContainer", refs: "Mapping[Ref, ObjectID]"
934) -> "dict[bytes, ObjectID]":
935 """Serialize refs with peeled refs for Git protocol v0/v1.
937 This function is used to prepare refs for transmission over the Git protocol.
938 For tags, it includes both the tag object and the dereferenced object.
940 Args:
941 store: Object store to peel refs from
942 refs: Dictionary of ref names to SHAs
944 Returns:
945 Dictionary with refs and peeled refs (marked with ^{})
946 """
947 import warnings
949 from .object_store import peel_sha
950 from .objects import Tag
952 ret: dict[bytes, ObjectID] = {}
953 for ref, sha in refs.items():
954 try:
955 unpeeled, peeled = peel_sha(store, ObjectID(sha))
956 except KeyError:
957 warnings.warn(
958 "ref {} points at non-present sha {}".format(
959 ref.decode("utf-8", "replace"), sha.decode("ascii")
960 ),
961 UserWarning,
962 )
963 continue
964 else:
965 if isinstance(unpeeled, Tag):
966 ret[ref + PEELED_TAG_SUFFIX] = peeled.id
967 ret[ref] = unpeeled.id
968 return ret