Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/dulwich/protocol.py: 32%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# protocol.py -- Shared parts of the git protocols
2# Copyright (C) 2008 John Carr <john.carr@unrouted.co.uk>
3# Copyright (C) 2008-2012 Jelmer Vernooij <jelmer@jelmer.uk>
4#
5# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
6# Dulwich is dual-licensed under the Apache License, Version 2.0 and the GNU
7# General Public License as published by the Free Software Foundation; version 2.0
8# or (at your option) any later version. You can redistribute it and/or
9# modify it under the terms of either of these two licenses.
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17# You should have received a copy of the licenses; if not, see
18# <http://www.gnu.org/licenses/> for a copy of the GNU General Public License
19# and <http://www.apache.org/licenses/LICENSE-2.0> for a copy of the Apache
20# License, Version 2.0.
21#
23"""Generic functions for talking the git smart server protocol."""
25__all__ = [
26 "CAPABILITIES_REF",
27 "CAPABILITY_AGENT",
28 "CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT",
29 "CAPABILITY_ALLOW_TIP_SHA1_IN_WANT",
30 "CAPABILITY_ATOMIC",
31 "CAPABILITY_DEEPEN_NOT",
32 "CAPABILITY_DEEPEN_RELATIVE",
33 "CAPABILITY_DEEPEN_SINCE",
34 "CAPABILITY_DELETE_REFS",
35 "CAPABILITY_FETCH",
36 "CAPABILITY_FILTER",
37 "CAPABILITY_INCLUDE_TAG",
38 "CAPABILITY_MULTI_ACK",
39 "CAPABILITY_MULTI_ACK_DETAILED",
40 "CAPABILITY_NO_DONE",
41 "CAPABILITY_NO_PROGRESS",
42 "CAPABILITY_OBJECT_FORMAT",
43 "CAPABILITY_OFS_DELTA",
44 "CAPABILITY_QUIET",
45 "CAPABILITY_REPORT_STATUS",
46 "CAPABILITY_SHALLOW",
47 "CAPABILITY_SIDE_BAND",
48 "CAPABILITY_SIDE_BAND_64K",
49 "CAPABILITY_SYMREF",
50 "CAPABILITY_THIN_PACK",
51 "COMMAND_DEEPEN",
52 "COMMAND_DEEPEN_NOT",
53 "COMMAND_DEEPEN_SINCE",
54 "COMMAND_DONE",
55 "COMMAND_FILTER",
56 "COMMAND_HAVE",
57 "COMMAND_SHALLOW",
58 "COMMAND_UNSHALLOW",
59 "COMMAND_WANT",
60 "COMMON_CAPABILITIES",
61 "DEFAULT_GIT_PROTOCOL_VERSION_FETCH",
62 "DEFAULT_GIT_PROTOCOL_VERSION_SEND",
63 "DEPTH_INFINITE",
64 "GIT_PROTOCOL_VERSIONS",
65 "KNOWN_RECEIVE_CAPABILITIES",
66 "KNOWN_UPLOAD_CAPABILITIES",
67 "MULTI_ACK",
68 "MULTI_ACK_DETAILED",
69 "NAK_LINE",
70 "PEELED_TAG_SUFFIX",
71 "SIDE_BAND_CHANNEL_DATA",
72 "SIDE_BAND_CHANNEL_FATAL",
73 "SIDE_BAND_CHANNEL_PROGRESS",
74 "SINGLE_ACK",
75 "TCP_GIT_PORT",
76 "BufferedPktLineWriter",
77 "PktLineParser",
78 "Protocol",
79 "ReceivableProtocol",
80 "ack_type",
81 "agent_string",
82 "capability_agent",
83 "capability_object_format",
84 "capability_symref",
85 "extract_capabilities",
86 "extract_capability_names",
87 "extract_want_line_capabilities",
88 "find_capability",
89 "format_ack_line",
90 "format_capability_line",
91 "format_cmd_pkt",
92 "format_ref_line",
93 "format_shallow_line",
94 "format_unshallow_line",
95 "parse_capability",
96 "parse_cmd_pkt",
97 "pkt_line",
98 "pkt_seq",
99 "serialize_refs",
100 "split_peeled_refs",
101 "strip_peeled_refs",
102 "symref_capabilities",
103 "write_info_refs",
104]
106import logging
107import types
108from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
109from io import BytesIO
110from os import SEEK_END
111from typing import TYPE_CHECKING
113import dulwich
115from .errors import GitProtocolError, HangupException
116from .objects import ObjectID
118logger = logging.getLogger(__name__)
120if TYPE_CHECKING:
121 from .pack import ObjectContainer
122 from .refs import Ref
124TCP_GIT_PORT = 9418
126# Git protocol version 0 is the original Git protocol, which lacked a
127# version number until Git protocol version 1 was introduced by Brandon
128# Williams in 2017.
129#
130# Protocol version 1 is simply the original v0 protocol with the addition of
131# a single packet line, which precedes the ref advertisement, indicating the
132# protocol version being used. This was done in preparation for protocol v2.
133#
134# Git protocol version 2 was first introduced by Brandon Williams in 2018 and
135# adds many features. See the gitprotocol-v2(5) manual page for details.
136# As of 2024, Git only implements version 2 during 'git fetch' and still uses
137# version 0 during 'git push'.
138GIT_PROTOCOL_VERSIONS = [0, 1, 2]
139DEFAULT_GIT_PROTOCOL_VERSION_FETCH = 2
140DEFAULT_GIT_PROTOCOL_VERSION_SEND = 0
142# Suffix used in the Git protocol to indicate peeled tag references
143PEELED_TAG_SUFFIX = b"^{}"
145ZERO_SHA: ObjectID = ObjectID(b"0" * 40)
147SINGLE_ACK = 0
148MULTI_ACK = 1
149MULTI_ACK_DETAILED = 2
151# pack data
152SIDE_BAND_CHANNEL_DATA = 1
153# progress messages
154SIDE_BAND_CHANNEL_PROGRESS = 2
155# fatal error message just before stream aborts
156SIDE_BAND_CHANNEL_FATAL = 3
158CAPABILITY_ATOMIC = b"atomic"
159CAPABILITY_DEEPEN_SINCE = b"deepen-since"
160CAPABILITY_DEEPEN_NOT = b"deepen-not"
161CAPABILITY_DEEPEN_RELATIVE = b"deepen-relative"
162CAPABILITY_DELETE_REFS = b"delete-refs"
163CAPABILITY_INCLUDE_TAG = b"include-tag"
164CAPABILITY_MULTI_ACK = b"multi_ack"
165CAPABILITY_MULTI_ACK_DETAILED = b"multi_ack_detailed"
166CAPABILITY_NO_DONE = b"no-done"
167CAPABILITY_NO_PROGRESS = b"no-progress"
168CAPABILITY_OFS_DELTA = b"ofs-delta"
169CAPABILITY_QUIET = b"quiet"
170CAPABILITY_REPORT_STATUS = b"report-status"
171CAPABILITY_SHALLOW = b"shallow"
172CAPABILITY_SIDE_BAND = b"side-band"
173CAPABILITY_SIDE_BAND_64K = b"side-band-64k"
174CAPABILITY_THIN_PACK = b"thin-pack"
175CAPABILITY_AGENT = b"agent"
176CAPABILITY_SYMREF = b"symref"
177CAPABILITY_ALLOW_TIP_SHA1_IN_WANT = b"allow-tip-sha1-in-want"
178CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT = b"allow-reachable-sha1-in-want"
179CAPABILITY_FETCH = b"fetch"
180CAPABILITY_FILTER = b"filter"
181CAPABILITY_OBJECT_FORMAT = b"object-format"
182CAPABILITY_PACKFILE_URIS = b"packfile-uris"
183CAPABILITY_PUSH_OPTIONS = b"push-options"
185# Magic ref that is used to attach capabilities to when
186# there are no refs. Should always be ste to ZERO_SHA.
187CAPABILITIES_REF = b"capabilities^{}"
189COMMON_CAPABILITIES = [
190 CAPABILITY_OFS_DELTA,
191 CAPABILITY_SIDE_BAND,
192 CAPABILITY_SIDE_BAND_64K,
193 CAPABILITY_AGENT,
194 CAPABILITY_NO_PROGRESS,
195]
196KNOWN_UPLOAD_CAPABILITIES = set(
197 [
198 *COMMON_CAPABILITIES,
199 CAPABILITY_THIN_PACK,
200 CAPABILITY_MULTI_ACK,
201 CAPABILITY_MULTI_ACK_DETAILED,
202 CAPABILITY_INCLUDE_TAG,
203 CAPABILITY_DEEPEN_SINCE,
204 CAPABILITY_SYMREF,
205 CAPABILITY_SHALLOW,
206 CAPABILITY_DEEPEN_NOT,
207 CAPABILITY_DEEPEN_RELATIVE,
208 CAPABILITY_ALLOW_TIP_SHA1_IN_WANT,
209 CAPABILITY_ALLOW_REACHABLE_SHA1_IN_WANT,
210 CAPABILITY_FETCH,
211 CAPABILITY_FILTER,
212 CAPABILITY_PACKFILE_URIS,
213 ]
214)
215KNOWN_RECEIVE_CAPABILITIES = set(
216 [
217 *COMMON_CAPABILITIES,
218 CAPABILITY_REPORT_STATUS,
219 CAPABILITY_DELETE_REFS,
220 CAPABILITY_QUIET,
221 CAPABILITY_ATOMIC,
222 CAPABILITY_PUSH_OPTIONS,
223 ]
224)
226DEPTH_INFINITE = 0x7FFFFFFF
228NAK_LINE = b"NAK\n"
231def agent_string() -> bytes:
232 """Generate the agent string for dulwich.
234 Returns:
235 Agent string as bytes
236 """
237 return ("dulwich/" + ".".join(map(str, dulwich.__version__))).encode("ascii")
240def capability_agent() -> bytes:
241 """Generate the agent capability string.
243 Returns:
244 Agent capability with dulwich version
245 """
246 return CAPABILITY_AGENT + b"=" + agent_string()
249def capability_object_format(fmt: str) -> bytes:
250 """Generate the object-format capability string.
252 Args:
253 fmt: Object format name (e.g., "sha1" or "sha256")
255 Returns:
256 Object-format capability with format name
257 """
258 return CAPABILITY_OBJECT_FORMAT + b"=" + fmt.encode("ascii")
261def capability_symref(from_ref: bytes, to_ref: bytes) -> bytes:
262 """Generate a symref capability string.
264 Args:
265 from_ref: Source reference name
266 to_ref: Target reference name
268 Returns:
269 Symref capability string
270 """
271 return CAPABILITY_SYMREF + b"=" + from_ref + b":" + to_ref
274def extract_capability_names(capabilities: Iterable[bytes]) -> set[bytes]:
275 """Extract capability names from a list of capabilities.
277 Args:
278 capabilities: List of capability strings
280 Returns:
281 Set of capability names
282 """
283 return {parse_capability(c)[0] for c in capabilities}
286def parse_capability(capability: bytes) -> tuple[bytes, bytes | None]:
287 """Parse a capability string into name and value.
289 Args:
290 capability: Capability string
292 Returns:
293 Tuple of (capability_name, capability_value)
294 """
295 parts = capability.split(b"=", 1)
296 if len(parts) == 1:
297 return (parts[0], None)
298 return (parts[0], parts[1])
301def symref_capabilities(symrefs: Iterable[tuple[bytes, bytes]]) -> list[bytes]:
302 """Generate symref capability strings from symref pairs.
304 Args:
305 symrefs: Iterable of (from_ref, to_ref) tuples
307 Returns:
308 List of symref capability strings
309 """
310 return [capability_symref(*k) for k in symrefs]
313COMMAND_DEEPEN = b"deepen"
314COMMAND_DEEPEN_SINCE = b"deepen-since"
315COMMAND_DEEPEN_NOT = b"deepen-not"
316COMMAND_SHALLOW = b"shallow"
317COMMAND_UNSHALLOW = b"unshallow"
318COMMAND_DONE = b"done"
319COMMAND_WANT = b"want"
320COMMAND_HAVE = b"have"
321COMMAND_FILTER = b"filter"
324def format_cmd_pkt(cmd: bytes, *args: bytes) -> bytes:
325 """Format a command packet.
327 Args:
328 cmd: Command name
329 *args: Command arguments
331 Returns:
332 Formatted command packet
333 """
334 return cmd + b" " + b"".join([(a + b"\0") for a in args])
337def parse_cmd_pkt(line: bytes) -> tuple[bytes, list[bytes]]:
338 """Parse a command packet.
340 Args:
341 line: Command line to parse
343 Returns:
344 Tuple of (command, [arguments])
345 """
346 splice_at = line.find(b" ")
347 cmd, args = line[:splice_at], line[splice_at + 1 :]
348 assert args[-1:] == b"\x00"
349 return cmd, args[:-1].split(b"\0")
352def pkt_line(data: bytes | None) -> bytes:
353 """Wrap data in a pkt-line.
355 Args:
356 data: The data to wrap, as a str or None.
357 Returns: The data prefixed with its length in pkt-line format; if data was
358 None, returns the flush-pkt ('0000').
359 """
360 if data is None:
361 return b"0000"
362 return f"{len(data) + 4:04x}".encode("ascii") + data
365def pkt_seq(*seq: bytes | None) -> bytes:
366 """Wrap a sequence of data in pkt-lines.
368 Args:
369 seq: An iterable of strings to wrap.
370 """
371 return b"".join([pkt_line(s) for s in seq]) + pkt_line(None)
374class Protocol:
375 """Class for interacting with a remote git process over the wire.
377 Parts of the git wire protocol use 'pkt-lines' to communicate. A pkt-line
378 consists of the length of the line as a 4-byte hex string, followed by the
379 payload data. The length includes the 4-byte header. The special line
380 '0000' indicates the end of a section of input and is called a 'flush-pkt'.
382 For details on the pkt-line format, see the cgit distribution:
383 Documentation/technical/protocol-common.txt
384 """
386 def __init__(
387 self,
388 read: Callable[[int], bytes],
389 write: Callable[[bytes], int | None],
390 close: Callable[[], None] | None = None,
391 report_activity: Callable[[int, str], None] | None = None,
392 ) -> None:
393 """Initialize Protocol.
395 Args:
396 read: Function to read bytes from the transport
397 write: Function to write bytes to the transport
398 close: Optional function to close the transport
399 report_activity: Optional function to report activity
400 """
401 self.read = read
402 self.write = write
403 self._close = close
404 self.report_activity = report_activity
405 self._readahead: BytesIO | None = None
407 def close(self) -> None:
408 """Close the underlying transport if a close function was provided."""
409 if self._close:
410 self._close()
411 self._close = None # Prevent double-close
413 def __del__(self) -> None:
414 """Ensure transport is closed when Protocol is garbage collected."""
415 if self._close is not None:
416 import warnings
418 warnings.warn(
419 f"unclosed Protocol {self!r}",
420 ResourceWarning,
421 stacklevel=2,
422 source=self,
423 )
424 try:
425 self.close()
426 except Exception:
427 # Ignore errors during cleanup
428 pass
430 def __enter__(self) -> "Protocol":
431 """Enter context manager."""
432 return self
434 def __exit__(
435 self,
436 exc_type: type[BaseException] | None,
437 exc_val: BaseException | None,
438 exc_tb: types.TracebackType | None,
439 ) -> None:
440 """Exit context manager and close transport."""
441 self.close()
443 def read_pkt_line(self) -> bytes | None:
444 """Reads a pkt-line from the remote git process.
446 This method may read from the readahead buffer; see unread_pkt_line.
448 Returns: The next string from the stream, without the length prefix, or
449 None for a flush-pkt ('0000') or delim-pkt ('0001').
450 """
451 if self._readahead is None:
452 read = self.read
453 else:
454 read = self._readahead.read
455 self._readahead = None
457 try:
458 sizestr = read(4)
459 if not sizestr:
460 raise HangupException
461 size = int(sizestr, 16)
462 if size == 0 or size == 1: # flush-pkt or delim-pkt
463 if self.report_activity:
464 self.report_activity(4, "read")
465 logger.debug("git< %s", sizestr.decode("ascii"))
466 return None
467 if self.report_activity:
468 self.report_activity(size, "read")
469 pkt_contents = read(size - 4)
470 except ConnectionResetError as exc:
471 raise HangupException from exc
472 except OSError as exc:
473 raise GitProtocolError(str(exc)) from exc
474 else:
475 if len(pkt_contents) + 4 != size:
476 raise GitProtocolError(
477 f"Length of pkt read {len(pkt_contents) + 4:04x} does not match length prefix {size:04x}"
478 )
479 # Log the packet contents (truncate if too long for readability)
480 if len(pkt_contents) > 80:
481 logger.debug(
482 "git< %s... (%d bytes)", pkt_contents[:80], len(pkt_contents)
483 )
484 else:
485 logger.debug("git< %s", pkt_contents)
486 return pkt_contents
488 def eof(self) -> bool:
489 """Test whether the protocol stream has reached EOF.
491 Note that this refers to the actual stream EOF and not just a
492 flush-pkt.
494 Returns: True if the stream is at EOF, False otherwise.
495 """
496 try:
497 next_line = self.read_pkt_line()
498 except HangupException:
499 return True
500 self.unread_pkt_line(next_line)
501 return False
503 def unread_pkt_line(self, data: bytes | None) -> None:
504 """Unread a single line of data into the readahead buffer.
506 This method can be used to unread a single pkt-line into a fixed
507 readahead buffer.
509 Args:
510 data: The data to unread, without the length prefix.
512 Raises:
513 ValueError: If more than one pkt-line is unread.
514 """
515 if self._readahead is not None:
516 raise ValueError("Attempted to unread multiple pkt-lines.")
517 self._readahead = BytesIO(pkt_line(data))
519 def read_pkt_seq(self) -> Iterable[bytes]:
520 """Read a sequence of pkt-lines from the remote git process.
522 Returns: Yields each line of data up to but not including the next
523 flush-pkt.
524 """
525 pkt = self.read_pkt_line()
526 while pkt:
527 yield pkt
528 pkt = self.read_pkt_line()
530 def write_pkt_line(self, line: bytes | None) -> None:
531 """Sends a pkt-line to the remote git process.
533 Args:
534 line: A string containing the data to send, without the length
535 prefix.
536 """
537 try:
538 # Log before converting to pkt format
539 if line is None:
540 logger.debug("git> 0000")
541 elif len(line) > 80:
542 logger.debug("git> %s... (%d bytes)", line[:80], len(line))
543 else:
544 logger.debug("git> %s", line)
546 line = pkt_line(line)
547 self.write(line)
548 if self.report_activity:
549 self.report_activity(len(line), "write")
550 except OSError as exc:
551 raise GitProtocolError(str(exc)) from exc
553 def write_sideband(self, channel: int, blob: bytes) -> None:
554 """Write multiplexed data to the sideband.
556 Args:
557 channel: An int specifying the channel to write to.
558 blob: A blob of data (as a string) to send on this channel.
559 """
560 # a pktline can be a max of 65520. a sideband line can therefore be
561 # 65520-5 = 65515
562 # WTF: Why have the len in ASCII, but the channel in binary.
563 while blob:
564 self.write_pkt_line(bytes(bytearray([channel])) + blob[:65515])
565 blob = blob[65515:]
567 def send_cmd(self, cmd: bytes, *args: bytes) -> None:
568 """Send a command and some arguments to a git server.
570 Only used for the TCP git protocol (git://).
572 Args:
573 cmd: The remote service to access.
574 args: List of arguments to send to remove service.
575 """
576 self.write_pkt_line(format_cmd_pkt(cmd, *args))
578 def read_cmd(self) -> tuple[bytes, list[bytes]]:
579 """Read a command and some arguments from the git client.
581 Only used for the TCP git protocol (git://).
583 Returns: A tuple of (command, [list of arguments]).
584 """
585 line = self.read_pkt_line()
586 if line is None:
587 raise GitProtocolError("Expected command, got flush packet")
588 return parse_cmd_pkt(line)
591_RBUFSIZE = 65536 # 64KB buffer for better network I/O performance
594class ReceivableProtocol(Protocol):
595 """Variant of Protocol that allows reading up to a size without blocking.
597 This class has a recv() method that behaves like socket.recv() in addition
598 to a read() method.
600 If you want to read n bytes from the wire and block until exactly n bytes
601 (or EOF) are read, use read(n). If you want to read at most n bytes from
602 the wire but don't care if you get less, use recv(n). Note that recv(n)
603 will still block until at least one byte is read.
604 """
606 def __init__(
607 self,
608 recv: Callable[[int], bytes],
609 write: Callable[[bytes], int | None],
610 close: Callable[[], None] | None = None,
611 report_activity: Callable[[int, str], None] | None = None,
612 rbufsize: int = _RBUFSIZE,
613 ) -> None:
614 """Initialize ReceivableProtocol.
616 Args:
617 recv: Function to receive bytes from the transport
618 write: Function to write bytes to the transport
619 close: Optional function to close the transport
620 report_activity: Optional function to report activity
621 rbufsize: Read buffer size
622 """
623 super().__init__(self.read, write, close=close, report_activity=report_activity)
624 self._recv = recv
625 self._rbuf = BytesIO()
626 self._rbufsize = rbufsize
628 def read(self, size: int) -> bytes:
629 """Read bytes from the socket.
631 Args:
632 size: Number of bytes to read
634 Returns:
635 Bytes read from socket
636 """
637 # From _fileobj.read in socket.py in the Python 2.6.5 standard library,
638 # with the following modifications:
639 # - omit the size <= 0 branch
640 # - seek back to start rather than 0 in case some buffer has been
641 # consumed.
642 # - use SEEK_END instead of the magic number.
643 # Copyright (c) 2001-2010 Python Software Foundation; All Rights
644 # Reserved
645 # Licensed under the Python Software Foundation License.
646 # TODO: see if buffer is more efficient than cBytesIO.
647 assert size > 0
649 # Our use of BytesIO rather than lists of string objects returned by
650 # recv() minimizes memory usage and fragmentation that occurs when
651 # rbufsize is large compared to the typical return value of recv().
652 buf = self._rbuf
653 start = buf.tell()
654 buf.seek(0, SEEK_END)
655 # buffer may have been partially consumed by recv()
656 buf_len = buf.tell() - start
657 if buf_len >= size:
658 # Already have size bytes in our buffer? Extract and return.
659 buf.seek(start)
660 rv = buf.read(size)
661 self._rbuf = BytesIO()
662 self._rbuf.write(buf.read())
663 self._rbuf.seek(0)
664 return rv
666 self._rbuf = BytesIO() # reset _rbuf. we consume it via buf.
667 while True:
668 left = size - buf_len
669 # recv() will malloc the amount of memory given as its
670 # parameter even though it often returns much less data
671 # than that. The returned data string is short lived
672 # as we copy it into a BytesIO and free it. This avoids
673 # fragmentation issues on many platforms.
674 data = self._recv(left)
675 if not data:
676 break
677 n = len(data)
678 if n == size and not buf_len:
679 # Shortcut. Avoid buffer data copies when:
680 # - We have no data in our buffer.
681 # AND
682 # - Our call to recv returned exactly the
683 # number of bytes we were asked to read.
684 return data
685 if n == left:
686 buf.write(data)
687 del data # explicit free
688 break
689 assert n <= left, f"_recv({left}) returned {n} bytes"
690 buf.write(data)
691 buf_len += n
692 del data # explicit free
693 # assert buf_len == buf.tell()
694 buf.seek(start)
695 return buf.read()
697 def recv(self, size: int) -> bytes:
698 """Receive bytes from the socket with buffering.
700 Args:
701 size: Maximum number of bytes to receive
703 Returns:
704 Bytes received from socket
705 """
706 assert size > 0
708 buf = self._rbuf
709 start = buf.tell()
710 buf.seek(0, SEEK_END)
711 buf_len = buf.tell()
712 buf.seek(start)
714 left = buf_len - start
715 if not left:
716 # only read from the wire if our read buffer is exhausted
717 data = self._recv(self._rbufsize)
718 if len(data) == size:
719 # shortcut: skip the buffer if we read exactly size bytes
720 return data
721 buf = BytesIO()
722 buf.write(data)
723 buf.seek(0)
724 del data # explicit free
725 self._rbuf = buf
726 return buf.read(size)
729def extract_capabilities(text: bytes) -> tuple[bytes, list[bytes]]:
730 """Extract a capabilities list from a string, if present.
732 Args:
733 text: String to extract from
734 Returns: Tuple with text with capabilities removed and list of capabilities
735 """
736 if b"\0" not in text:
737 return text, []
738 text, capabilities = text.rstrip().split(b"\0")
739 return (text, capabilities.strip().split(b" "))
742def extract_want_line_capabilities(text: bytes) -> tuple[bytes, list[bytes]]:
743 """Extract a capabilities list from a want line, if present.
745 Note that want lines have capabilities separated from the rest of the line
746 by a space instead of a null byte. Thus want lines have the form:
748 want obj-id cap1 cap2 ...
750 Args:
751 text: Want line to extract from
752 Returns: Tuple with text with capabilities removed and list of capabilities
753 """
754 split_text = text.rstrip().split(b" ")
755 if len(split_text) < 3:
756 return text, []
757 return (b" ".join(split_text[:2]), split_text[2:])
760def ack_type(capabilities: Iterable[bytes]) -> int:
761 """Extract the ack type from a capabilities list."""
762 if b"multi_ack_detailed" in capabilities:
763 return MULTI_ACK_DETAILED
764 elif b"multi_ack" in capabilities:
765 return MULTI_ACK
766 return SINGLE_ACK
769def find_capability(
770 capabilities: Iterable[bytes], *capability_names: bytes
771) -> bytes | None:
772 """Find a capability value in a list of capabilities.
774 This function looks for capabilities that may include arguments after an equals sign
775 and returns only the value part (after the '='). For capabilities without values,
776 returns the capability name itself.
778 Args:
779 capabilities: List of capability strings
780 capability_names: Capability name(s) to search for
782 Returns:
783 The value after '=' if found, or the capability name if no '=', or None if not found
785 Example:
786 >>> caps = [b'filter=blob:none', b'agent=git/2.0', b'thin-pack']
787 >>> find_capability(caps, b'filter')
788 b'blob:none'
789 >>> find_capability(caps, b'thin-pack')
790 b'thin-pack'
791 >>> find_capability(caps, b'missing')
792 None
793 """
794 for cap in capabilities:
795 for name in capability_names:
796 if cap == name:
797 return cap
798 elif cap.startswith(name + b"="):
799 return cap[len(name) + 1 :]
800 return None
803class BufferedPktLineWriter:
804 """Writer that wraps its data in pkt-lines and has an independent buffer.
806 Consecutive calls to write() wrap the data in a pkt-line and then buffers
807 it until enough lines have been written such that their total length
808 (including length prefix) reach the buffer size.
809 """
811 def __init__(
812 self, write: Callable[[bytes], int | None], bufsize: int = 65515
813 ) -> None:
814 """Initialize the BufferedPktLineWriter.
816 Args:
817 write: A write callback for the underlying writer.
818 bufsize: The internal buffer size, including length prefixes.
819 """
820 self._write = write
821 self._bufsize = bufsize
822 self._wbuf = BytesIO()
823 self._buflen = 0
825 def write(self, data: bytes) -> None:
826 """Write data, wrapping it in a pkt-line."""
827 line = pkt_line(data)
828 line_len = len(line)
829 over = self._buflen + line_len - self._bufsize
830 if over >= 0:
831 start = line_len - over
832 self._wbuf.write(line[:start])
833 self.flush()
834 else:
835 start = 0
836 saved = line[start:]
837 self._wbuf.write(saved)
838 self._buflen += len(saved)
840 def flush(self) -> None:
841 """Flush all data from the buffer."""
842 data = self._wbuf.getvalue()
843 if data:
844 self._write(data)
845 self._len = 0
846 self._wbuf = BytesIO()
849class PktLineParser:
850 """Packet line parser that hands completed packets off to a callback."""
852 def __init__(self, handle_pkt: Callable[[bytes | None], None]) -> None:
853 """Initialize PktLineParser.
855 Args:
856 handle_pkt: Callback function to handle completed packets
857 """
858 self.handle_pkt = handle_pkt
859 self._readahead = BytesIO()
861 def parse(self, data: bytes) -> None:
862 """Parse a fragment of data and call back for any completed packets."""
863 self._readahead.write(data)
864 buf = self._readahead.getvalue()
865 if len(buf) < 4:
866 return
867 while len(buf) >= 4:
868 size = int(buf[:4], 16)
869 if size == 0:
870 self.handle_pkt(None)
871 buf = buf[4:]
872 elif size <= len(buf):
873 self.handle_pkt(buf[4:size])
874 buf = buf[size:]
875 else:
876 break
877 self._readahead = BytesIO()
878 self._readahead.write(buf)
880 def get_tail(self) -> bytes:
881 """Read back any unused data."""
882 return self._readahead.getvalue()
885def format_capability_line(capabilities: Iterable[bytes]) -> bytes:
886 """Format a capabilities list for the wire protocol.
888 Args:
889 capabilities: List of capability strings
891 Returns:
892 Space-separated capabilities as bytes
893 """
894 return b"".join([b" " + c for c in capabilities])
897def format_ref_line(
898 ref: bytes, sha: bytes, capabilities: Sequence[bytes] | None = None
899) -> bytes:
900 """Format a ref advertisement line.
902 Args:
903 ref: Reference name
904 sha: SHA hash
905 capabilities: Optional list of capabilities
907 Returns:
908 Formatted ref line
909 """
910 if capabilities is None:
911 return sha + b" " + ref + b"\n"
912 else:
913 return sha + b" " + ref + b"\0" + format_capability_line(capabilities) + b"\n"
916def format_shallow_line(sha: bytes) -> bytes:
917 """Format a shallow line.
919 Args:
920 sha: SHA to mark as shallow
922 Returns:
923 Formatted shallow line
924 """
925 return COMMAND_SHALLOW + b" " + sha
928def format_unshallow_line(sha: bytes) -> bytes:
929 """Format an unshallow line.
931 Args:
932 sha: SHA to unshallow
934 Returns:
935 Formatted unshallow line
936 """
937 return COMMAND_UNSHALLOW + b" " + sha
940def format_ack_line(sha: bytes, ack_type: bytes = b"") -> bytes:
941 """Format an ACK line.
943 Args:
944 sha: SHA to acknowledge
945 ack_type: Optional ACK type (e.g. b"continue")
947 Returns:
948 Formatted ACK line
949 """
950 if ack_type:
951 ack_type = b" " + ack_type
952 return b"ACK " + sha + ack_type + b"\n"
955def strip_peeled_refs(
956 refs: "Mapping[Ref, ObjectID | None]",
957) -> "dict[Ref, ObjectID | None]":
958 """Remove all peeled refs from a refs dictionary.
960 Args:
961 refs: Dictionary of refs (may include peeled refs with ^{} suffix)
963 Returns:
964 Dictionary with peeled refs removed
965 """
966 return {
967 ref: sha for (ref, sha) in refs.items() if not ref.endswith(PEELED_TAG_SUFFIX)
968 }
971def split_peeled_refs(
972 refs: "Mapping[Ref, ObjectID]",
973) -> "tuple[dict[Ref, ObjectID], dict[Ref, ObjectID]]":
974 """Split peeled refs from regular refs.
976 Args:
977 refs: Dictionary of refs (may include peeled refs with ^{} suffix)
979 Returns:
980 Tuple of (regular_refs, peeled_refs) where peeled_refs keys have
981 the ^{} suffix removed
982 """
983 from .refs import Ref
985 peeled: dict[Ref, ObjectID] = {}
986 regular = {k: v for k, v in refs.items() if not k.endswith(PEELED_TAG_SUFFIX)}
988 for ref, sha in refs.items():
989 if ref.endswith(PEELED_TAG_SUFFIX):
990 # Peeled refs are always ObjectID values
991 peeled[Ref(ref[: -len(PEELED_TAG_SUFFIX)])] = sha
993 return regular, peeled
996def write_info_refs(
997 refs: "Mapping[Ref, ObjectID]", store: "ObjectContainer"
998) -> "Iterator[bytes]":
999 """Generate info refs in the format used by the dumb HTTP protocol.
1001 Args:
1002 refs: Dictionary of refs
1003 store: Object store to peel tags from
1005 Yields:
1006 Lines in info/refs format (sha + tab + refname)
1007 """
1008 from .object_store import peel_sha
1009 from .refs import HEADREF
1011 for name, sha in sorted(refs.items()):
1012 # get_refs() includes HEAD as a special case, but we don't want to
1013 # advertise it
1014 if name == HEADREF:
1015 continue
1016 try:
1017 o = store[sha]
1018 except KeyError:
1019 continue
1020 _unpeeled, peeled = peel_sha(store, sha)
1021 yield o.id + b"\t" + name + b"\n"
1022 if o.id != peeled.id:
1023 yield peeled.id + b"\t" + name + PEELED_TAG_SUFFIX + b"\n"
1026def serialize_refs(
1027 store: "ObjectContainer", refs: "Mapping[Ref, ObjectID]"
1028) -> "dict[bytes, ObjectID]":
1029 """Serialize refs with peeled refs for Git protocol v0/v1.
1031 This function is used to prepare refs for transmission over the Git protocol.
1032 For tags, it includes both the tag object and the dereferenced object.
1034 Args:
1035 store: Object store to peel refs from
1036 refs: Dictionary of ref names to SHAs
1038 Returns:
1039 Dictionary with refs and peeled refs (marked with ^{})
1040 """
1041 import warnings
1043 from .object_store import peel_sha
1044 from .objects import Tag
1046 ret: dict[bytes, ObjectID] = {}
1047 for ref, sha in refs.items():
1048 try:
1049 unpeeled, peeled = peel_sha(store, ObjectID(sha))
1050 except KeyError:
1051 warnings.warn(
1052 "ref {} points at non-present sha {}".format(
1053 ref.decode("utf-8", "replace"), sha.decode("ascii")
1054 ),
1055 UserWarning,
1056 )
1057 continue
1058 else:
1059 if isinstance(unpeeled, Tag):
1060 ret[ref + PEELED_TAG_SUFFIX] = peeled.id
1061 ret[ref] = unpeeled.id
1062 return ret