Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/protocol.py: 32%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# protocol.py -- Shared parts of the git protocols
2# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
3# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@jelmer.uk>
4#
5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
7# General Public License as published by the Free Software Foundation; version 2.0
8# or (at your option) any later version. You can redistribute it and/or
9# modify it under the terms of either of these two licenses.
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# You should have received a copy of the licenses; if not, see
18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
20# License, Version 2.0.
21#
23"""Generic functions for talking the git smart server protocol."""
25__all__ = [
26 "CAPABILITIES_REF",
27 "CAPABILITY_AGENT",
28 "CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT",
29 "CAPABILITY_ALLOW_TIP_SHA1_IN_WANT",
30 "CAPABILITY_ATOMIC",
31 "CAPABILITY_DEEPEN_NOT",
32 "CAPABILITY_DEEPEN_RELATIVE",
33 "CAPABILITY_DEEPEN_SINCE",
34 "CAPABILITY_DELETE_REFS",
35 "CAPABILITY_FETCH",
36 "CAPABILITY_FILTER",
37 "CAPABILITY_INCLUDE_TAG",
38 "CAPABILITY_MULTI_ACK",
39 "CAPABILITY_MULTI_ACK_DETAILED",
40 "CAPABILITY_NO_DONE",
41 "CAPABILITY_NO_PROGRESS",
42 "CAPABILITY_OBJECT_FORMAT",
43 "CAPABILITY_OFS_DELTA",
44 "CAPABILITY_QUIET",
45 "CAPABILITY_REPORT_STATUS",
46 "CAPABILITY_SHALLOW",
47 "CAPABILITY_SIDE_BAND",
48 "CAPABILITY_SIDE_BAND_64K",
49 "CAPABILITY_SYMREF",
50 "CAPABILITY_THIN_PACK",
51 "COMMAND_DEEPEN",
52 "COMMAND_DEEPEN_NOT",
53 "COMMAND_DEEPEN_SINCE",
54 "COMMAND_DONE",
55 "COMMAND_FILTER",
56 "COMMAND_HAVE",
57 "COMMAND_SHALLOW",
58 "COMMAND_UNSHALLOW",
59 "COMMAND_WANT",
60 "COMMON_CAPABILITIES",
61 "DEFAULT_GIT_PROTOCOL_VERSION_FETCH",
62 "DEFAULT_GIT_PROTOCOL_VERSION_SEND",
63 "DEPTH_INFINITE",
64 "GIT_PROTOCOL_VERSIONS",
65 "KNOWN_RECEIVE_CAPABILITIES",
66 "KNOWN_UPLOAD_CAPABILITIES",
67 "MULTI_ACK",
68 "MULTI_ACK_DETAILED",
69 "NAK_LINE",
70 "PEELED_TAG_SUFFIX",
71 "SIDE_BAND_CHANNEL_DATA",
72 "SIDE_BAND_CHANNEL_FATAL",
73 "SIDE_BAND_CHANNEL_PROGRESS",
74 "SINGLE_ACK",
75 "TCP_GIT_PORT",
76 "BufferedPktLineWriter",
77 "PktLineParser",
78 "Protocol",
79 "ReceivableProtocol",
80 "ack_type",
81 "agent_string",
82 "capability_agent",
83 "capability_object_format",
84 "capability_symref",
85 "extract_capabilities",
86 "extract_capability_names",
87 "extract_want_line_capabilities",
88 "find_capability",
89 "format_ack_line",
90 "format_capability_line",
91 "format_cmd_pkt",
92 "format_ref_line",
93 "format_shallow_line",
94 "format_unshallow_line",
95 "parse_capability",
96 "parse_cmd_pkt",
97 "pkt_line",
98 "pkt_seq",
99 "serialize_refs",
100 "split_peeled_refs",
101 "strip_peeled_refs",
102 "symref_capabilities",
103 "write_info_refs",
104]
106import logging
107import sys
108import types
109from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
110from io import BytesIO
111from os import SEEK_END
112from typing import TYPE_CHECKING
114import dulwich
116from .errors import GitProtocolError, HangupException
117from .objects import ObjectID
119if sys.version_info >= (3, 11):
120 from typing import Self
121else:
122 from typing_extensions import Self
124logger = logging.getLogger(__name__)
126if TYPE_CHECKING:
127 from .pack import ObjectContainer
128 from .refs import Ref
130TCP_GIT_PORT = 9418
132# Git protocol version 0 is the original Git protocol, which lacked a
133# version number until Git protocol version 1 was introduced by Brandon
134# Williams in 2017.
135#
136# Protocol version 1 is simply the original v0 protocol with the addition of
137# a single packet line, which precedes the ref advertisement, indicating the
138# protocol version being used. This was done in preparation for protocol v2.
139#
140# Git protocol version 2 was first introduced by Brandon Williams in 2018 and
141# adds many features. See the gitprotocol-v2(5) manual page for details.
142# As of 2024, Git only implements version 2 during 'git fetch' and still uses
143# version 0 during 'git push'.
144GIT_PROTOCOL_VERSIONS = [0, 1, 2]
145DEFAULT_GIT_PROTOCOL_VERSION_FETCH = 2
146DEFAULT_GIT_PROTOCOL_VERSION_SEND = 0
148# Suffix used in the Git protocol to indicate peeled tag references
149PEELED_TAG_SUFFIX = b"^{}"
151ZERO_SHA: ObjectID = ObjectID(b"0" * 40)
153SINGLE_ACK = 0
154MULTI_ACK = 1
155MULTI_ACK_DETAILED = 2
157# pack data
158SIDE_BAND_CHANNEL_DATA = 1
159# progress messages
160SIDE_BAND_CHANNEL_PROGRESS = 2
161# fatal error message just before stream aborts
162SIDE_BAND_CHANNEL_FATAL = 3
164CAPABILITY_ATOMIC = b"atomic"
165CAPABILITY_DEEPEN_SINCE = b"deepen-since"
166CAPABILITY_DEEPEN_NOT = b"deepen-not"
167CAPABILITY_DEEPEN_RELATIVE = b"deepen-relative"
168CAPABILITY_DELETE_REFS = b"delete-refs"
169CAPABILITY_INCLUDE_TAG = b"include-tag"
170CAPABILITY_MULTI_ACK = b"multi_ack"
171CAPABILITY_MULTI_ACK_DETAILED = b"multi_ack_detailed"
172CAPABILITY_NO_DONE = b"no-done"
173CAPABILITY_NO_PROGRESS = b"no-progress"
174CAPABILITY_OFS_DELTA = b"ofs-delta"
175CAPABILITY_QUIET = b"quiet"
176CAPABILITY_REPORT_STATUS = b"report-status"
177CAPABILITY_SHALLOW = b"shallow"
178CAPABILITY_SIDE_BAND = b"side-band"
179CAPABILITY_SIDE_BAND_64K = b"side-band-64k"
180CAPABILITY_THIN_PACK = b"thin-pack"
181CAPABILITY_AGENT = b"agent"
182CAPABILITY_SYMREF = b"symref"
183CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want"
184CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want"
185CAPABILITY_FETCH = b"fetch"
186CAPABILITY_FILTER = b"filter"
187CAPABILITY_OBJECT_FORMAT = b"object-format"
188CAPABILITY_PACKFILE_URIS = b"packfile-uris"
189CAPABILITY_PUSH_OPTIONS = b"push-options"
191# Magic ref that is used to attach capabilities to when
192# there are no refs. Should always be ste to ZERO_SHA.
193CAPABILITIES_REF = b"capabilities^{}"
195COMMON_CAPABILITIES = [
196 CAPABILITY_OFS_DELTA,
197 CAPABILITY_SIDE_BAND,
198 CAPABILITY_SIDE_BAND_64K,
199 CAPABILITY_AGENT,
200 CAPABILITY_NO_PROGRESS,
201]
202KNOWN_UPLOAD_CAPABILITIES = set(
203 [
204 *COMMON_CAPABILITIES,
205 CAPABILITY_THIN_PACK,
206 CAPABILITY_MULTI_ACK,
207 CAPABILITY_MULTI_ACK_DETAILED,
208 CAPABILITY_INCLUDE_TAG,
209 CAPABILITY_DEEPEN_SINCE,
210 CAPABILITY_SYMREF,
211 CAPABILITY_SHALLOW,
212 CAPABILITY_DEEPEN_NOT,
213 CAPABILITY_DEEPEN_RELATIVE,
214 CAPABILITY_ALLOW_TIP_SHA1_IN_WANT,
215 CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT,
216 CAPABILITY_FETCH,
217 CAPABILITY_FILTER,
218 CAPABILITY_PACKFILE_URIS,
219 ]
220)
221KNOWN_RECEIVE_CAPABILITIES = set(
222 [
223 *COMMON_CAPABILITIES,
224 CAPABILITY_REPORT_STATUS,
225 CAPABILITY_DELETE_REFS,
226 CAPABILITY_QUIET,
227 CAPABILITY_ATOMIC,
228 CAPABILITY_PUSH_OPTIONS,
229 ]
230)
232DEPTH_INFINITE = 0x7FFFFFFF
234NAK_LINE = b"NAK\n"
237def agent_string() -> bytes:
238 """Generate the agent string for dulwich.
240 Returns:
241 Agent string as bytes
242 """
243 return ("dulwich/" + ".".join(map(str, dulwich.__version__))).encode("ascii")
246def capability_agent() -> bytes:
247 """Generate the agent capability string.
249 Returns:
250 Agent capability with dulwich version
251 """
252 return CAPABILITY_AGENT + b"=" + agent_string()
255def capability_object_format(fmt: str) -> bytes:
256 """Generate the object-format capability string.
258 Args:
259 fmt: Object format name (e.g., "sha1" or "sha256")
261 Returns:
262 Object-format capability with format name
263 """
264 return CAPABILITY_OBJECT_FORMAT + b"=" + fmt.encode("ascii")
267def capability_symref(from_ref: bytes, to_ref: bytes) -> bytes:
268 """Generate a symref capability string.
270 Args:
271 from_ref: Source reference name
272 to_ref: Target reference name
274 Returns:
275 Symref capability string
276 """
277 return CAPABILITY_SYMREF + b"=" + from_ref + b":" + to_ref
280def extract_capability_names(capabilities: Iterable[bytes]) -> set[bytes]:
281 """Extract capability names from a list of capabilities.
283 Args:
284 capabilities: List of capability strings
286 Returns:
287 Set of capability names
288 """
289 return {parse_capability(c)[0] for c in capabilities}
292def parse_capability(capability: bytes) -> tuple[bytes, bytes | None]:
293 """Parse a capability string into name and value.
295 Args:
296 capability: Capability string
298 Returns:
299 Tuple of (capability_name, capability_value)
300 """
301 parts = capability.split(b"=", 1)
302 if len(parts) == 1:
303 return (parts[0], None)
304 return (parts[0], parts[1])
307def symref_capabilities(symrefs: Iterable[tuple[bytes, bytes]]) -> list[bytes]:
308 """Generate symref capability strings from symref pairs.
310 Args:
311 symrefs: Iterable of (from_ref, to_ref) tuples
313 Returns:
314 List of symref capability strings
315 """
316 return [capability_symref(*k) for k in symrefs]
319COMMAND_DEEPEN = b"deepen"
320COMMAND_DEEPEN_SINCE = b"deepen-since"
321COMMAND_DEEPEN_NOT = b"deepen-not"
322COMMAND_SHALLOW = b"shallow"
323COMMAND_UNSHALLOW = b"unshallow"
324COMMAND_DONE = b"done"
325COMMAND_WANT = b"want"
326COMMAND_HAVE = b"have"
327COMMAND_FILTER = b"filter"
330def format_cmd_pkt(cmd: bytes, *args: bytes) -> bytes:
331 """Format a command packet.
333 Args:
334 cmd: Command name
335 *args: Command arguments
337 Returns:
338 Formatted command packet
339 """
340 return cmd + b" " + b"".join([(a + b"\0") for a in args])
343def parse_cmd_pkt(line: bytes) -> tuple[bytes, list[bytes]]:
344 """Parse a command packet.
346 Args:
347 line: Command line to parse
349 Returns:
350 Tuple of (command, [arguments])
351 """
352 splice_at = line.find(b" ")
353 cmd, args = line[:splice_at], line[splice_at + 1 :]
354 assert args[-1:] == b"\x00"
355 return cmd, args[:-1].split(b"\0")
358def pkt_line(data: bytes | None) -> bytes:
359 """Wrap data in a pkt-line.
361 Args:
362 data: The data to wrap, as a str or None.
363 Returns: The data prefixed with its length in pkt-line format; if data was
364 None, returns the flush-pkt ('0000').
365 """
366 if data is None:
367 return b"0000"
368 return f"{len(data) + 4:04x}".encode("ascii") + data
371def pkt_seq(*seq: bytes | None) -> bytes:
372 """Wrap a sequence of data in pkt-lines.
374 Args:
375 seq: An iterable of strings to wrap.
376 """
377 return b"".join([pkt_line(s) for s in seq]) + pkt_line(None)
380class Protocol:
381 """Class for interacting with a remote git process over the wire.
383 Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line
384 consists of the length of the line as a 4-byte hex string, followed by the
385 payload data. The length includes the 4-byte header. The special line
386 '0000' indicates the end of a section of input and is called a 'flush-pkt'.
388 For details on the pkt-line format, see the cgit distribution:
389 Documentation/technical/protocol-common.txt
390 """
392 def __init__(
393 self,
394 read: Callable[[int], bytes],
395 write: Callable[[bytes], int | None],
396 close: Callable[[], None] | None = None,
397 report_activity: Callable[[int, str], None] | None = None,
398 ) -> None:
399 """Initialize Protocol.
401 Args:
402 read: Function to read bytes from the transport
403 write: Function to write bytes to the transport
404 close: Optional function to close the transport
405 report_activity: Optional function to report activity
406 """
407 self.read = read
408 self.write = write
409 self._close = close
410 self.report_activity = report_activity
411 self._readahead: BytesIO | None = None
413 def close(self) -> None:
414 """Close the underlying transport if a close function was provided."""
415 if self._close:
416 self._close()
417 self._close = None # Prevent double-close
419 def __del__(self) -> None:
420 """Ensure transport is closed when Protocol is garbage collected."""
421 if self._close is not None:
422 import warnings
424 warnings.warn(
425 f"unclosed Protocol {self!r}",
426 ResourceWarning,
427 stacklevel=2,
428 source=self,
429 )
430 try:
431 self.close()
432 except Exception:
433 # Ignore errors during cleanup
434 pass
436 def __enter__(self) -> Self:
437 """Enter context manager."""
438 return self
440 def __exit__(
441 self,
442 exc_type: type[BaseException] | None,
443 exc_val: BaseException | None,
444 exc_tb: types.TracebackType | None,
445 ) -> None:
446 """Exit context manager and close transport."""
447 self.close()
449 def read_pkt_line(self) -> bytes | None:
450 """Reads a pkt-line from the remote git process.
452 This method may read from the readahead buffer; see unread_pkt_line.
454 Returns: The next string from the stream, without the length prefix, or
455 None for a flush-pkt ('0000') or delim-pkt ('0001').
456 """
457 if self._readahead is None:
458 read = self.read
459 else:
460 read = self._readahead.read
461 self._readahead = None
463 try:
464 sizestr = read(4)
465 if not sizestr:
466 raise HangupException
467 size = int(sizestr, 16)
468 if size == 0 or size == 1: # flush-pkt or delim-pkt
469 if self.report_activity:
470 self.report_activity(4, "read")
471 logger.debug("git< %s", sizestr.decode("ascii"))
472 return None
473 if self.report_activity:
474 self.report_activity(size, "read")
475 pkt_contents = read(size - 4)
476 except ConnectionResetError as exc:
477 raise HangupException from exc
478 except OSError as exc:
479 raise GitProtocolError(str(exc)) from exc
480 else:
481 if len(pkt_contents) + 4 != size:
482 raise GitProtocolError(
483 f"Length of pkt read {len(pkt_contents) + 4:04x} does not match length prefix {size:04x}"
484 )
485 # Log the packet contents (truncate if too long for readability)
486 if len(pkt_contents) > 80:
487 logger.debug(
488 "git< %s... (%d bytes)", pkt_contents[:80], len(pkt_contents)
489 )
490 else:
491 logger.debug("git< %s", pkt_contents)
492 return pkt_contents
494 def eof(self) -> bool:
495 """Test whether the protocol stream has reached EOF.
497 Note that this refers to the actual stream EOF and not just a
498 flush-pkt.
500 Returns: True if the stream is at EOF, False otherwise.
501 """
502 try:
503 next_line = self.read_pkt_line()
504 except HangupException:
505 return True
506 self.unread_pkt_line(next_line)
507 return False
509 def unread_pkt_line(self, data: bytes | None) -> None:
510 """Unread a single line of data into the readahead buffer.
512 This method can be used to unread a single pkt-line into a fixed
513 readahead buffer.
515 Args:
516 data: The data to unread, without the length prefix.
518 Raises:
519 ValueError: If more than one pkt-line is unread.
520 """
521 if self._readahead is not None:
522 raise ValueError("Attempted to unread multiple pkt-lines.")
523 self._readahead = BytesIO(pkt_line(data))
525 def read_pkt_seq(self) -> Iterable[bytes]:
526 """Read a sequence of pkt-lines from the remote git process.
528 Returns: Yields each line of data up to but not including the next
529 flush-pkt.
530 """
531 pkt = self.read_pkt_line()
532 while pkt:
533 yield pkt
534 pkt = self.read_pkt_line()
536 def write_pkt_line(self, line: bytes | None) -> None:
537 """Sends a pkt-line to the remote git process.
539 Args:
540 line: A string containing the data to send, without the length
541 prefix.
542 """
543 try:
544 # Log before converting to pkt format
545 if line is None:
546 logger.debug("git> 0000")
547 elif len(line) > 80:
548 logger.debug("git> %s... (%d bytes)", line[:80], len(line))
549 else:
550 logger.debug("git> %s", line)
552 line = pkt_line(line)
553 self.write(line)
554 if self.report_activity:
555 self.report_activity(len(line), "write")
556 except OSError as exc:
557 raise GitProtocolError(str(exc)) from exc
559 def write_sideband(self, channel: int, blob: bytes) -> None:
560 """Write multiplexed data to the sideband.
562 Args:
563 channel: An int specifying the channel to write to.
564 blob: A blob of data (as a string) to send on this channel.
565 """
566 # a pktline can be a max of 65520. a sideband line can therefore be
567 # 65520-5 = 65515
568 # WTF: Why have the len in ASCII, but the channel in binary.
569 while blob:
570 self.write_pkt_line(bytes(bytearray([channel])) + blob[:65515])
571 blob = blob[65515:]
573 def send_cmd(self, cmd: bytes, *args: bytes) -> None:
574 """Send a command and some arguments to a git server.
576 Only used for the TCP git protocol (git://).
578 Args:
579 cmd: The remote service to access.
580 args: List of arguments to send to remove service.
581 """
582 self.write_pkt_line(format_cmd_pkt(cmd, *args))
584 def read_cmd(self) -> tuple[bytes, list[bytes]]:
585 """Read a command and some arguments from the git client.
587 Only used for the TCP git protocol (git://).
589 Returns: A tuple of (command, [list of arguments]).
590 """
591 line = self.read_pkt_line()
592 if line is None:
593 raise GitProtocolError("Expected command, got flush packet")
594 return parse_cmd_pkt(line)
597_RBUFSIZE = 65536 # 64KB buffer for better network I/O performance
600class ReceivableProtocol(Protocol):
601 """Variant of Protocol that allows reading up to a size without blocking.
603 This class has a recv() method that behaves like socket.recv() in addition
604 to a read() method.
606 If you want to read n bytes from the wire and block until exactly n bytes
607 (or EOF) are read, use read(n). If you want to read at most n bytes from
608 the wire but don't care if you get less, use recv(n). Note that recv(n)
609 will still block until at least one byte is read.
610 """
612 def __init__(
613 self,
614 recv: Callable[[int], bytes],
615 write: Callable[[bytes], int | None],
616 close: Callable[[], None] | None = None,
617 report_activity: Callable[[int, str], None] | None = None,
618 rbufsize: int = _RBUFSIZE,
619 ) -> None:
620 """Initialize ReceivableProtocol.
622 Args:
623 recv: Function to receive bytes from the transport
624 write: Function to write bytes to the transport
625 close: Optional function to close the transport
626 report_activity: Optional function to report activity
627 rbufsize: Read buffer size
628 """
629 super().__init__(self.read, write, close=close, report_activity=report_activity)
630 self._recv = recv
631 self._rbuf = BytesIO()
632 self._rbufsize = rbufsize
634 def read(self, size: int) -> bytes:
635 """Read bytes from the socket.
637 Args:
638 size: Number of bytes to read
640 Returns:
641 Bytes read from socket
642 """
643 # From _fileobj.read in socket.py in the Python 2.6.5 standard library,
644 # with the following modifications:
645 # - omit the size <= 0 branch
646 # - seek back to start rather than 0 in case some buffer has been
647 # consumed.
648 # - use SEEK_END instead of the magic number.
649 # Copyright (c) 2001-2010 Python Software Foundation; All Rights
650 # Reserved
651 # Licensed under the Python Software Foundation License.
652 # TODO: see if buffer is more efficient than cBytesIO.
653 assert size > 0
655 # Our use of BytesIO rather than lists of string objects returned by
656 # recv() minimizes memory usage and fragmentation that occurs when
657 # rbufsize is large compared to the typical return value of recv().
658 buf = self._rbuf
659 start = buf.tell()
660 buf.seek(0, SEEK_END)
661 # buffer may have been partially consumed by recv()
662 buf_len = buf.tell() - start
663 if buf_len >= size:
664 # Already have size bytes in our buffer? Extract and return.
665 buf.seek(start)
666 rv = buf.read(size)
667 self._rbuf = BytesIO()
668 self._rbuf.write(buf.read())
669 self._rbuf.seek(0)
670 return rv
672 self._rbuf = BytesIO() # reset _rbuf. we consume it via buf.
673 while True:
674 left = size - buf_len
675 # recv() will malloc the amount of memory given as its
676 # parameter even though it often returns much less data
677 # than that. The returned data string is short lived
678 # as we copy it into a BytesIO and free it. This avoids
679 # fragmentation issues on many platforms.
680 data = self._recv(left)
681 if not data:
682 break
683 n = len(data)
684 if n == size and not buf_len:
685 # Shortcut. Avoid buffer data copies when:
686 # - We have no data in our buffer.
687 # AND
688 # - Our call to recv returned exactly the
689 # number of bytes we were asked to read.
690 return data
691 if n == left:
692 buf.write(data)
693 del data # explicit free
694 break
695 assert n <= left, f"_recv({left}) returned {n} bytes"
696 buf.write(data)
697 buf_len += n
698 del data # explicit free
699 # assert buf_len == buf.tell()
700 buf.seek(start)
701 return buf.read()
703 def recv(self, size: int) -> bytes:
704 """Receive bytes from the socket with buffering.
706 Args:
707 size: Maximum number of bytes to receive
709 Returns:
710 Bytes received from socket
711 """
712 assert size > 0
714 buf = self._rbuf
715 start = buf.tell()
716 buf.seek(0, SEEK_END)
717 buf_len = buf.tell()
718 buf.seek(start)
720 left = buf_len - start
721 if not left:
722 # only read from the wire if our read buffer is exhausted
723 data = self._recv(self._rbufsize)
724 if len(data) == size:
725 # shortcut: skip the buffer if we read exactly size bytes
726 return data
727 buf = BytesIO()
728 buf.write(data)
729 buf.seek(0)
730 del data # explicit free
731 self._rbuf = buf
732 return buf.read(size)
735def extract_capabilities(text: bytes) -> tuple[bytes, list[bytes]]:
736 """Extract a capabilities list from a string, if present.
738 Args:
739 text: String to extract from
740 Returns: Tuple with text with capabilities removed and list of capabilities
741 """
742 if b"\0" not in text:
743 return text, []
744 text, capabilities = text.rstrip().split(b"\0")
745 return (text, capabilities.strip().split(b" "))
748def extract_want_line_capabilities(text: bytes) -> tuple[bytes, list[bytes]]:
749 """Extract a capabilities list from a want line, if present.
751 Note that want lines have capabilities separated from the rest of the line
752 by a space instead of a null byte. Thus want lines have the form:
754 want obj-id cap1 cap2 ...
756 Args:
757 text: Want line to extract from
758 Returns: Tuple with text with capabilities removed and list of capabilities
759 """
760 split_text = text.rstrip().split(b" ")
761 if len(split_text) < 3:
762 return text, []
763 return (b" ".join(split_text[:2]), split_text[2:])
766def ack_type(capabilities: Iterable[bytes]) -> int:
767 """Extract the ack type from a capabilities list."""
768 if b"multi_ack_detailed" in capabilities:
769 return MULTI_ACK_DETAILED
770 elif b"multi_ack" in capabilities:
771 return MULTI_ACK
772 return SINGLE_ACK
775def find_capability(
776 capabilities: Iterable[bytes], *capability_names: bytes
777) -> bytes | None:
778 """Find a capability value in a list of capabilities.
780 This function looks for capabilities that may include arguments after an equals sign
781 and returns only the value part (after the '='). For capabilities without values,
782 returns the capability name itself.
784 Args:
785 capabilities: List of capability strings
786 capability_names: Capability name(s) to search for
788 Returns:
789 The value after '=' if found, or the capability name if no '=', or None if not found
791 Example:
792 >>> caps = [b'filter=blob:none', b'agent=git/2.0', b'thin-pack']
793 >>> find_capability(caps, b'filter')
794 b'blob:none'
795 >>> find_capability(caps, b'thin-pack')
796 b'thin-pack'
797 >>> find_capability(caps, b'missing')
798 None
799 """
800 for cap in capabilities:
801 for name in capability_names:
802 if cap == name:
803 return cap
804 elif cap.startswith(name + b"="):
805 return cap[len(name) + 1 :]
806 return None
809class BufferedPktLineWriter:
810 """Writer that wraps its data in pkt-lines and has an independent buffer.
812 Consecutive calls to write() wrap the data in a pkt-line and then buffers
813 it until enough lines have been written such that their total length
814 (including length prefix) reach the buffer size.
815 """
817 def __init__(
818 self, write: Callable[[bytes], int | None], bufsize: int = 65515
819 ) -> None:
820 """Initialize the BufferedPktLineWriter.
822 Args:
823 write: A write callback for the underlying writer.
824 bufsize: The internal buffer size, including length prefixes.
825 """
826 self._write = write
827 self._bufsize = bufsize
828 self._wbuf = BytesIO()
829 self._buflen = 0
831 def write(self, data: bytes) -> None:
832 """Write data, wrapping it in a pkt-line."""
833 line = pkt_line(data)
834 line_len = len(line)
835 over = self._buflen + line_len - self._bufsize
836 if over >= 0:
837 start = line_len - over
838 self._wbuf.write(line[:start])
839 self.flush()
840 else:
841 start = 0
842 saved = line[start:]
843 self._wbuf.write(saved)
844 self._buflen += len(saved)
846 def flush(self) -> None:
847 """Flush all data from the buffer."""
848 data = self._wbuf.getvalue()
849 if data:
850 self._write(data)
851 self._len = 0
852 self._wbuf = BytesIO()
855class PktLineParser:
856 """Packet line parser that hands completed packets off to a callback."""
858 def __init__(self, handle_pkt: Callable[[bytes | None], None]) -> None:
859 """Initialize PktLineParser.
861 Args:
862 handle_pkt: Callback function to handle completed packets
863 """
864 self.handle_pkt = handle_pkt
865 self._readahead = BytesIO()
867 def parse(self, data: bytes) -> None:
868 """Parse a fragment of data and call back for any completed packets."""
869 self._readahead.write(data)
870 buf = self._readahead.getvalue()
871 if len(buf) < 4:
872 return
873 while len(buf) >= 4:
874 size = int(buf[:4], 16)
875 if size == 0:
876 self.handle_pkt(None)
877 buf = buf[4:]
878 elif size <= len(buf):
879 self.handle_pkt(buf[4:size])
880 buf = buf[size:]
881 else:
882 break
883 self._readahead = BytesIO()
884 self._readahead.write(buf)
886 def get_tail(self) -> bytes:
887 """Read back any unused data."""
888 return self._readahead.getvalue()
891def format_capability_line(capabilities: Iterable[bytes]) -> bytes:
892 """Format a capabilities list for the wire protocol.
894 Args:
895 capabilities: List of capability strings
897 Returns:
898 Space-separated capabilities as bytes
899 """
900 return b"".join([b" " + c for c in capabilities])
903def format_ref_line(
904 ref: bytes, sha: bytes, capabilities: Sequence[bytes] | None = None
905) -> bytes:
906 """Format a ref advertisement line.
908 Args:
909 ref: Reference name
910 sha: SHA hash
911 capabilities: Optional list of capabilities
913 Returns:
914 Formatted ref line
915 """
916 if capabilities is None:
917 return sha + b" " + ref + b"\n"
918 else:
919 return sha + b" " + ref + b"\0" + format_capability_line(capabilities) + b"\n"
922def format_shallow_line(sha: bytes) -> bytes:
923 """Format a shallow line.
925 Args:
926 sha: SHA to mark as shallow
928 Returns:
929 Formatted shallow line
930 """
931 return COMMAND_SHALLOW + b" " + sha
934def format_unshallow_line(sha: bytes) -> bytes:
935 """Format an unshallow line.
937 Args:
938 sha: SHA to unshallow
940 Returns:
941 Formatted unshallow line
942 """
943 return COMMAND_UNSHALLOW + b" " + sha
946def format_ack_line(sha: bytes, ack_type: bytes = b"") -> bytes:
947 """Format an ACK line.
949 Args:
950 sha: SHA to acknowledge
951 ack_type: Optional ACK type (e.g. b"continue")
953 Returns:
954 Formatted ACK line
955 """
956 if ack_type:
957 ack_type = b" " + ack_type
958 return b"ACK " + sha + ack_type + b"\n"
961def strip_peeled_refs(
962 refs: "Mapping[Ref, ObjectID | None]",
963) -> "dict[Ref, ObjectID | None]":
964 """Remove all peeled refs from a refs dictionary.
966 Args:
967 refs: Dictionary of refs (may include peeled refs with ^{} suffix)
969 Returns:
970 Dictionary with peeled refs removed
971 """
972 return {
973 ref: sha for (ref, sha) in refs.items() if not ref.endswith(PEELED_TAG_SUFFIX)
974 }
977def split_peeled_refs(
978 refs: "Mapping[Ref, ObjectID]",
979) -> "tuple[dict[Ref, ObjectID], dict[Ref, ObjectID]]":
980 """Split peeled refs from regular refs.
982 Args:
983 refs: Dictionary of refs (may include peeled refs with ^{} suffix)
985 Returns:
986 Tuple of (regular_refs, peeled_refs) where peeled_refs keys have
987 the ^{} suffix removed
988 """
989 from .refs import Ref
991 peeled: dict[Ref, ObjectID] = {}
992 regular = {k: v for k, v in refs.items() if not k.endswith(PEELED_TAG_SUFFIX)}
994 for ref, sha in refs.items():
995 if ref.endswith(PEELED_TAG_SUFFIX):
996 # Peeled refs are always ObjectID values
997 peeled[Ref(ref[: -len(PEELED_TAG_SUFFIX)])] = sha
999 return regular, peeled
1002def write_info_refs(
1003 refs: "Mapping[Ref, ObjectID]", store: "ObjectContainer"
1004) -> "Iterator[bytes]":
1005 """Generate info refs in the format used by the dumb HTTP protocol.
1007 Args:
1008 refs: Dictionary of refs
1009 store: Object store to peel tags from
1011 Yields:
1012 Lines in info/refs format (sha + tab + refname)
1013 """
1014 from .object_store import peel_sha
1015 from .refs import HEADREF
1017 for name, sha in sorted(refs.items()):
1018 # get_refs() includes HEAD as a special case, but we don't want to
1019 # advertise it
1020 if name == HEADREF:
1021 continue
1022 try:
1023 o = store[sha]
1024 except KeyError:
1025 continue
1026 _unpeeled, peeled = peel_sha(store, sha)
1027 yield o.id + b"\t" + name + b"\n"
1028 if o.id != peeled.id:
1029 yield peeled.id + b"\t" + name + PEELED_TAG_SUFFIX + b"\n"
1032def serialize_refs(
1033 store: "ObjectContainer", refs: "Mapping[Ref, ObjectID]"
1034) -> "dict[bytes, ObjectID]":
1035 """Serialize refs with peeled refs for Git protocol v0/v1.
1037 This function is used to prepare refs for transmission over the Git protocol.
1038 For tags, it includes both the tag object and the dereferenced object.
1040 Args:
1041 store: Object store to peel refs from
1042 refs: Dictionary of ref names to SHAs
1044 Returns:
1045 Dictionary with refs and peeled refs (marked with ^{})
1046 """
1047 import warnings
1049 from .object_store import peel_sha
1050 from .objects import Tag
1052 ret: dict[bytes, ObjectID] = {}
1053 for ref, sha in refs.items():
1054 try:
1055 unpeeled, peeled = peel_sha(store, ObjectID(sha))
1056 except KeyError:
1057 warnings.warn(
1058 "ref {} points at non-present sha {}".format(
1059 ref.decode("utf-8", "replace"), sha.decode("ascii")
1060 ),
1061 UserWarning,
1062 )
1063 continue
1064 else:
1065 if isinstance(unpeeled, Tag):
1066 ret[ref + PEELED_TAG_SUFFIX] = peeled.id
1067 ret[ref] = unpeeled.id
1068 return ret