Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/_cookie_helpers.py: 15%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Internal cookie handling helpers.
4This module contains internal utilities for cookie parsing and manipulation.
5These are not part of the public API and may change without notice.
6"""
8import re
9from collections.abc import Sequence
10from http.cookies import CookieError, Morsel
11from typing import cast
13from .log import internal_logger
15__all__ = (
16 "parse_set_cookie_headers",
17 "parse_cookie_header",
18 "preserve_morsel_with_coded_value",
19)
21# Cookie parsing constants
22# Allow more characters in cookie names to handle real-world cookies
23# that don't strictly follow RFC standards (fixes #2683)
24# RFC 6265 defines cookie-name token as per RFC 2616 Section 2.2,
25# but many servers send cookies with characters like {} [] () etc.
26# This makes the cookie parser more tolerant of real-world cookies
27# while still providing some validation to catch obviously malformed names.
28_COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$")
29_COOKIE_KNOWN_ATTRS = frozenset( # AKA Morsel._reserved
30 (
31 "path",
32 "domain",
33 "max-age",
34 "expires",
35 "secure",
36 "httponly",
37 "samesite",
38 "partitioned",
39 "version",
40 "comment",
41 )
42)
43_COOKIE_BOOL_ATTRS = frozenset( # AKA Morsel._flags
44 ("secure", "httponly", "partitioned")
45)
47# SimpleCookie's pattern for parsing cookies with relaxed validation
48# Based on http.cookies pattern but extended to allow more characters in cookie names
49# to handle real-world cookies (fixes #2683)
50_COOKIE_PATTERN = re.compile(
51 r"""
52 \s* # Optional whitespace at start of cookie
53 (?P<key> # Start of group 'key'
54 # aiohttp has extended to include [] for compatibility with real-world cookies
55 [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\[\]]+ # Any word of at least one letter
56 ) # End of group 'key'
57 ( # Optional group: there may not be a value.
58 \s*=\s* # Equal Sign
59 (?P<val> # Start of group 'val'
60 "(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed)
61 | # or
62 "[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993)
63 | # or
64 # Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123
65 (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma)
66 [\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format
67 (GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100
68 # NOTE: RFC 2822 timezone support is an aiohttp extension
69 # for issue #4493 - SimpleCookie does NOT support this
70 | # or
71 # ANSI C asctime() format: "Wed Jun 9 10:18:14 2021"
72 # NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format
73 \w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4}
74 | # or
75 [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string
76 ) # End of group 'val'
77 )? # End of optional value group
78 \s* # Any number of spaces.
79 (\s+|;|$) # Ending either at space, semicolon, or EOS.
80 """,
81 re.VERBOSE | re.ASCII,
82)
85def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]:
86 """
87 Preserve a Morsel's coded_value exactly as received from the server.
89 This function ensures that cookie encoding is preserved exactly as sent by
90 the server, which is critical for compatibility with old servers that have
91 strict requirements about cookie formats.
93 This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453
94 where Python's SimpleCookie would re-encode cookies, breaking authentication
95 with certain servers.
97 Args:
98 cookie: A Morsel object from SimpleCookie
100 Returns:
101 A Morsel object with preserved coded_value
103 """
104 mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel()))
105 # We use __setstate__ instead of the public set() API because it allows us to
106 # bypass validation and set already validated state. This is more stable than
107 # setting protected attributes directly and unlikely to change since it would
108 # break pickling.
109 try:
110 mrsl_val.__setstate__( # type: ignore[attr-defined]
111 {
112 "key": cookie.key,
113 "value": cookie.value,
114 "coded_value": cookie.coded_value,
115 }
116 )
117 except CookieError:
118 return cookie
119 return mrsl_val
122_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub
125def _unquote_replace(m: re.Match[str]) -> str:
126 """
127 Replace function for _unquote_sub regex substitution.
129 Handles escaped characters in cookie values:
130 - Octal sequences are converted to their character representation
131 - Other escaped characters are unescaped by removing the backslash
132 """
133 if m[1]:
134 return chr(int(m[1], 8))
135 return m[2]
138def _unquote(value: str) -> str:
139 """
140 Unquote a cookie value.
142 Vendored from http.cookies._unquote to ensure compatibility.
144 Note: The original implementation checked for None, but we've removed
145 that check since all callers already ensure the value is not None.
146 """
147 # If there aren't any doublequotes,
148 # then there can't be any special characters. See RFC 2109.
149 if len(value) < 2:
150 return value
151 if value[0] != '"' or value[-1] != '"':
152 return value
154 # We have to assume that we must decode this string.
155 # Down to work.
157 # Remove the "s
158 value = value[1:-1]
160 # Check for special sequences. Examples:
161 # \012 --> \n
162 # \" --> "
163 #
164 return _unquote_sub(_unquote_replace, value)
167def parse_cookie_header(header: str) -> list[tuple[str, Morsel[str]]]:
168 """
169 Parse a Cookie header according to RFC 6265 Section 5.4.
171 Cookie headers contain only name-value pairs separated by semicolons.
172 There are no attributes in Cookie headers - even names that match
173 attribute names (like 'path' or 'secure') should be treated as cookies.
175 This parser uses the same regex-based approach as parse_set_cookie_headers
176 to properly handle quoted values that may contain semicolons. When the
177 regex fails to match a malformed cookie, it falls back to simple parsing
178 to ensure subsequent cookies are not lost
179 https://github.com/aio-libs/aiohttp/issues/11632
181 Args:
182 header: The Cookie header value to parse
184 Returns:
185 List of (name, Morsel) tuples for compatibility with SimpleCookie.update()
186 """
187 if not header:
188 return []
190 morsel: Morsel[str]
191 cookies: list[tuple[str, Morsel[str]]] = []
192 i = 0
193 n = len(header)
195 invalid_names = []
196 while i < n:
197 # Use the same pattern as parse_set_cookie_headers to find cookies
198 match = _COOKIE_PATTERN.match(header, i)
199 if not match:
200 # Fallback for malformed cookies https://github.com/aio-libs/aiohttp/issues/11632
201 # Find next semicolon to skip or attempt simple key=value parsing
202 next_semi = header.find(";", i)
203 eq_pos = header.find("=", i)
205 # Try to extract key=value if '=' comes before ';'
206 if eq_pos != -1 and (next_semi == -1 or eq_pos < next_semi):
207 end_pos = next_semi if next_semi != -1 else n
208 key = header[i:eq_pos].strip()
209 value = header[eq_pos + 1 : end_pos].strip()
211 # Validate the name (same as regex path)
212 if not _COOKIE_NAME_RE.match(key):
213 invalid_names.append(key)
214 else:
215 morsel = Morsel()
216 try:
217 morsel.__setstate__( # type: ignore[attr-defined]
218 {
219 "key": key,
220 "value": _unquote(value),
221 "coded_value": value,
222 }
223 )
224 except CookieError:
225 pass
226 else:
227 cookies.append((key, morsel))
229 # Move to next cookie or end
230 i = next_semi + 1 if next_semi != -1 else n
231 continue
233 key = match.group("key")
234 value = match.group("val") or ""
235 i = match.end(0)
237 # Validate the name
238 if not key or not _COOKIE_NAME_RE.match(key):
239 invalid_names.append(key)
240 continue
242 # Create new morsel
243 morsel = Morsel()
244 # Preserve the original value as coded_value (with quotes if present)
245 # We use __setstate__ instead of the public set() API because it allows us to
246 # bypass validation and set already validated state. This is more stable than
247 # setting protected attributes directly and unlikely to change since it would
248 # break pickling.
249 try:
250 morsel.__setstate__( # type: ignore[attr-defined]
251 {"key": key, "value": _unquote(value), "coded_value": value}
252 )
253 except CookieError:
254 continue
256 cookies.append((key, morsel))
258 if invalid_names:
259 internal_logger.debug(
260 "Cannot load cookie. Illegal cookie names: %r", invalid_names
261 )
263 return cookies
266def parse_set_cookie_headers(headers: Sequence[str]) -> list[tuple[str, Morsel[str]]]:
267 """
268 Parse cookie headers using a vendored version of SimpleCookie parsing.
270 This implementation is based on SimpleCookie.__parse_string to ensure
271 compatibility with how SimpleCookie parses cookies, including handling
272 of malformed cookies with missing semicolons.
274 This function is used for both Cookie and Set-Cookie headers in order to be
275 forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie
276 headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the
277 real world data makes it impossible since we need to be a bit more forgiving.
279 NOTE: This implementation differs from SimpleCookie in handling unmatched quotes.
280 SimpleCookie will stop parsing when it encounters a cookie value with an unmatched
281 quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped.
282 This implementation handles unmatched quotes more gracefully to prevent cookie loss.
283 See https://github.com/aio-libs/aiohttp/issues/7993
284 """
285 parsed_cookies: list[tuple[str, Morsel[str]]] = []
287 for header in headers:
288 if not header:
289 continue
291 # Parse cookie string using SimpleCookie's algorithm
292 i = 0
293 n = len(header)
294 current_morsel: Morsel[str] | None = None
295 morsel_seen = False
297 while 0 <= i < n:
298 # Start looking for a cookie
299 match = _COOKIE_PATTERN.match(header, i)
300 if not match:
301 # No more cookies
302 break
304 key, value = match.group("key"), match.group("val")
305 i = match.end(0)
306 lower_key = key.lower()
308 if key[0] == "$":
309 if not morsel_seen:
310 # We ignore attributes which pertain to the cookie
311 # mechanism as a whole, such as "$Version".
312 continue
313 # Process as attribute
314 if current_morsel is not None:
315 attr_lower_key = lower_key[1:]
316 if attr_lower_key in _COOKIE_KNOWN_ATTRS:
317 current_morsel[attr_lower_key] = value or ""
318 elif lower_key in _COOKIE_KNOWN_ATTRS:
319 if not morsel_seen:
320 # Invalid cookie string - attribute before cookie
321 break
322 if lower_key in _COOKIE_BOOL_ATTRS:
323 # Boolean attribute with any value should be True
324 if current_morsel is not None and current_morsel.isReservedKey(key):
325 current_morsel[lower_key] = True
326 elif value is None:
327 # Invalid cookie string - non-boolean attribute without value
328 break
329 elif current_morsel is not None:
330 # Regular attribute with value
331 current_morsel[lower_key] = _unquote(value)
332 elif value is not None:
333 # This is a cookie name=value pair
334 # Validate the name
335 if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key):
336 internal_logger.warning(
337 "Can not load cookies: Illegal cookie name %r", key
338 )
339 current_morsel = None
340 else:
341 # Create new morsel
342 current_morsel = Morsel()
343 # Preserve the original value as coded_value (with quotes if present)
344 try:
345 current_morsel.__setstate__( # type: ignore[attr-defined]
346 {
347 "key": key,
348 "value": _unquote(value),
349 "coded_value": value,
350 }
351 )
352 except CookieError:
353 current_morsel = None
354 else:
355 parsed_cookies.append((key, current_morsel))
356 morsel_seen = True
357 else:
358 # Invalid cookie string - no value for non-attribute
359 break
361 return parsed_cookies