Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/_cookie_helpers.py: 16%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Internal cookie handling helpers.
4This module contains internal utilities for cookie parsing and manipulation.
5These are not part of the public API and may change without notice.
6"""
8import re
9from collections.abc import Sequence
10from http.cookies import Morsel
11from typing import cast
13from .log import internal_logger
15__all__ = (
16 "parse_set_cookie_headers",
17 "parse_cookie_header",
18 "preserve_morsel_with_coded_value",
19)
21# Cookie parsing constants
22# Allow more characters in cookie names to handle real-world cookies
23# that don't strictly follow RFC standards (fixes #2683)
24# RFC 6265 defines cookie-name token as per RFC 2616 Section 2.2,
25# but many servers send cookies with characters like {} [] () etc.
26# This makes the cookie parser more tolerant of real-world cookies
27# while still providing some validation to catch obviously malformed names.
28_COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$")
29_COOKIE_KNOWN_ATTRS = frozenset( # AKA Morsel._reserved
30 (
31 "path",
32 "domain",
33 "max-age",
34 "expires",
35 "secure",
36 "httponly",
37 "samesite",
38 "partitioned",
39 "version",
40 "comment",
41 )
42)
43_COOKIE_BOOL_ATTRS = frozenset( # AKA Morsel._flags
44 ("secure", "httponly", "partitioned")
45)
47# SimpleCookie's pattern for parsing cookies with relaxed validation
48# Based on http.cookies pattern but extended to allow more characters in cookie names
49# to handle real-world cookies (fixes #2683)
50_COOKIE_PATTERN = re.compile(
51 r"""
52 \s* # Optional whitespace at start of cookie
53 (?P<key> # Start of group 'key'
54 # aiohttp has extended to include [] for compatibility with real-world cookies
55 [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\[\]]+ # Any word of at least one letter
56 ) # End of group 'key'
57 ( # Optional group: there may not be a value.
58 \s*=\s* # Equal Sign
59 (?P<val> # Start of group 'val'
60 "(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed)
61 | # or
62 "[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993)
63 | # or
64 # Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123
65 (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma)
66 [\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format
67 (GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100
68 # NOTE: RFC 2822 timezone support is an aiohttp extension
69 # for issue #4493 - SimpleCookie does NOT support this
70 | # or
71 # ANSI C asctime() format: "Wed Jun 9 10:18:14 2021"
72 # NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format
73 \w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4}
74 | # or
75 [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string
76 ) # End of group 'val'
77 )? # End of optional value group
78 \s* # Any number of spaces.
79 (\s+|;|$) # Ending either at space, semicolon, or EOS.
80 """,
81 re.VERBOSE | re.ASCII,
82)
85def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]:
86 """
87 Preserve a Morsel's coded_value exactly as received from the server.
89 This function ensures that cookie encoding is preserved exactly as sent by
90 the server, which is critical for compatibility with old servers that have
91 strict requirements about cookie formats.
93 This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453
94 where Python's SimpleCookie would re-encode cookies, breaking authentication
95 with certain servers.
97 Args:
98 cookie: A Morsel object from SimpleCookie
100 Returns:
101 A Morsel object with preserved coded_value
103 """
104 mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel()))
105 # We use __setstate__ instead of the public set() API because it allows us to
106 # bypass validation and set already validated state. This is more stable than
107 # setting protected attributes directly and unlikely to change since it would
108 # break pickling.
109 mrsl_val.__setstate__( # type: ignore[attr-defined]
110 {"key": cookie.key, "value": cookie.value, "coded_value": cookie.coded_value}
111 )
112 return mrsl_val
115_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub
118def _unquote_replace(m: re.Match[str]) -> str:
119 """
120 Replace function for _unquote_sub regex substitution.
122 Handles escaped characters in cookie values:
123 - Octal sequences are converted to their character representation
124 - Other escaped characters are unescaped by removing the backslash
125 """
126 if m[1]:
127 return chr(int(m[1], 8))
128 return m[2]
131def _unquote(value: str) -> str:
132 """
133 Unquote a cookie value.
135 Vendored from http.cookies._unquote to ensure compatibility.
137 Note: The original implementation checked for None, but we've removed
138 that check since all callers already ensure the value is not None.
139 """
140 # If there aren't any doublequotes,
141 # then there can't be any special characters. See RFC 2109.
142 if len(value) < 2:
143 return value
144 if value[0] != '"' or value[-1] != '"':
145 return value
147 # We have to assume that we must decode this string.
148 # Down to work.
150 # Remove the "s
151 value = value[1:-1]
153 # Check for special sequences. Examples:
154 # \012 --> \n
155 # \" --> "
156 #
157 return _unquote_sub(_unquote_replace, value)
160def parse_cookie_header(header: str) -> list[tuple[str, Morsel[str]]]:
161 """
162 Parse a Cookie header according to RFC 6265 Section 5.4.
164 Cookie headers contain only name-value pairs separated by semicolons.
165 There are no attributes in Cookie headers - even names that match
166 attribute names (like 'path' or 'secure') should be treated as cookies.
168 This parser uses the same regex-based approach as parse_set_cookie_headers
169 to properly handle quoted values that may contain semicolons. When the
170 regex fails to match a malformed cookie, it falls back to simple parsing
171 to ensure subsequent cookies are not lost
172 https://github.com/aio-libs/aiohttp/issues/11632
174 Args:
175 header: The Cookie header value to parse
177 Returns:
178 List of (name, Morsel) tuples for compatibility with SimpleCookie.update()
179 """
180 if not header:
181 return []
183 morsel: Morsel[str]
184 cookies: list[tuple[str, Morsel[str]]] = []
185 i = 0
186 n = len(header)
188 invalid_names = []
189 while i < n:
190 # Use the same pattern as parse_set_cookie_headers to find cookies
191 match = _COOKIE_PATTERN.match(header, i)
192 if not match:
193 # Fallback for malformed cookies https://github.com/aio-libs/aiohttp/issues/11632
194 # Find next semicolon to skip or attempt simple key=value parsing
195 next_semi = header.find(";", i)
196 eq_pos = header.find("=", i)
198 # Try to extract key=value if '=' comes before ';'
199 if eq_pos != -1 and (next_semi == -1 or eq_pos < next_semi):
200 end_pos = next_semi if next_semi != -1 else n
201 key = header[i:eq_pos].strip()
202 value = header[eq_pos + 1 : end_pos].strip()
204 # Validate the name (same as regex path)
205 if not _COOKIE_NAME_RE.match(key):
206 invalid_names.append(key)
207 else:
208 morsel = Morsel()
209 morsel.__setstate__( # type: ignore[attr-defined]
210 {"key": key, "value": _unquote(value), "coded_value": value}
211 )
212 cookies.append((key, morsel))
214 # Move to next cookie or end
215 i = next_semi + 1 if next_semi != -1 else n
216 continue
218 key = match.group("key")
219 value = match.group("val") or ""
220 i = match.end(0)
222 # Validate the name
223 if not key or not _COOKIE_NAME_RE.match(key):
224 invalid_names.append(key)
225 continue
227 # Create new morsel
228 morsel = Morsel()
229 # Preserve the original value as coded_value (with quotes if present)
230 # We use __setstate__ instead of the public set() API because it allows us to
231 # bypass validation and set already validated state. This is more stable than
232 # setting protected attributes directly and unlikely to change since it would
233 # break pickling.
234 morsel.__setstate__( # type: ignore[attr-defined]
235 {"key": key, "value": _unquote(value), "coded_value": value}
236 )
238 cookies.append((key, morsel))
240 if invalid_names:
241 internal_logger.debug(
242 "Cannot load cookie. Illegal cookie names: %r", invalid_names
243 )
245 return cookies
248def parse_set_cookie_headers(headers: Sequence[str]) -> list[tuple[str, Morsel[str]]]:
249 """
250 Parse cookie headers using a vendored version of SimpleCookie parsing.
252 This implementation is based on SimpleCookie.__parse_string to ensure
253 compatibility with how SimpleCookie parses cookies, including handling
254 of malformed cookies with missing semicolons.
256 This function is used for both Cookie and Set-Cookie headers in order to be
257 forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie
258 headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the
259 real world data makes it impossible since we need to be a bit more forgiving.
261 NOTE: This implementation differs from SimpleCookie in handling unmatched quotes.
262 SimpleCookie will stop parsing when it encounters a cookie value with an unmatched
263 quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped.
264 This implementation handles unmatched quotes more gracefully to prevent cookie loss.
265 See https://github.com/aio-libs/aiohttp/issues/7993
266 """
267 parsed_cookies: list[tuple[str, Morsel[str]]] = []
269 for header in headers:
270 if not header:
271 continue
273 # Parse cookie string using SimpleCookie's algorithm
274 i = 0
275 n = len(header)
276 current_morsel: Morsel[str] | None = None
277 morsel_seen = False
279 while 0 <= i < n:
280 # Start looking for a cookie
281 match = _COOKIE_PATTERN.match(header, i)
282 if not match:
283 # No more cookies
284 break
286 key, value = match.group("key"), match.group("val")
287 i = match.end(0)
288 lower_key = key.lower()
290 if key[0] == "$":
291 if not morsel_seen:
292 # We ignore attributes which pertain to the cookie
293 # mechanism as a whole, such as "$Version".
294 continue
295 # Process as attribute
296 if current_morsel is not None:
297 attr_lower_key = lower_key[1:]
298 if attr_lower_key in _COOKIE_KNOWN_ATTRS:
299 current_morsel[attr_lower_key] = value or ""
300 elif lower_key in _COOKIE_KNOWN_ATTRS:
301 if not morsel_seen:
302 # Invalid cookie string - attribute before cookie
303 break
304 if lower_key in _COOKIE_BOOL_ATTRS:
305 # Boolean attribute with any value should be True
306 if current_morsel is not None and current_morsel.isReservedKey(key):
307 current_morsel[lower_key] = True
308 elif value is None:
309 # Invalid cookie string - non-boolean attribute without value
310 break
311 elif current_morsel is not None:
312 # Regular attribute with value
313 current_morsel[lower_key] = _unquote(value)
314 elif value is not None:
315 # This is a cookie name=value pair
316 # Validate the name
317 if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key):
318 internal_logger.warning(
319 "Can not load cookies: Illegal cookie name %r", key
320 )
321 current_morsel = None
322 else:
323 # Create new morsel
324 current_morsel = Morsel()
325 # Preserve the original value as coded_value (with quotes if present)
326 # We use __setstate__ instead of the public set() API because it allows us to
327 # bypass validation and set already validated state. This is more stable than
328 # setting protected attributes directly and unlikely to change since it would
329 # break pickling.
330 current_morsel.__setstate__( # type: ignore[attr-defined]
331 {"key": key, "value": _unquote(value), "coded_value": value}
332 )
333 parsed_cookies.append((key, current_morsel))
334 morsel_seen = True
335 else:
336 # Invalid cookie string - no value for non-attribute
337 break
339 return parsed_cookies