1"""
2Internal cookie handling helpers.
3
4This module contains internal utilities for cookie parsing and manipulation.
5These are not part of the public API and may change without notice.
6"""
7
8import re
9import sys
10from http.cookies import Morsel
11from typing import List, Optional, Sequence, Tuple, cast
12
13from .log import internal_logger
14
15__all__ = (
16 "parse_set_cookie_headers",
17 "parse_cookie_header",
18 "preserve_morsel_with_coded_value",
19)
20
21# Cookie parsing constants
22# Allow more characters in cookie names to handle real-world cookies
23# that don't strictly follow RFC standards (fixes #2683)
24# RFC 6265 defines cookie-name token as per RFC 2616 Section 2.2,
25# but many servers send cookies with characters like {} [] () etc.
26# This makes the cookie parser more tolerant of real-world cookies
27# while still providing some validation to catch obviously malformed names.
28_COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$")
29_COOKIE_KNOWN_ATTRS = frozenset( # AKA Morsel._reserved
30 (
31 "path",
32 "domain",
33 "max-age",
34 "expires",
35 "secure",
36 "httponly",
37 "samesite",
38 "partitioned",
39 "version",
40 "comment",
41 )
42)
43_COOKIE_BOOL_ATTRS = frozenset( # AKA Morsel._flags
44 ("secure", "httponly", "partitioned")
45)
46
47# SimpleCookie's pattern for parsing cookies with relaxed validation
48# Based on http.cookies pattern but extended to allow more characters in cookie names
49# to handle real-world cookies (fixes #2683)
50_COOKIE_PATTERN = re.compile(
51 r"""
52 \s* # Optional whitespace at start of cookie
53 (?P<key> # Start of group 'key'
54 # aiohttp has extended to include [] for compatibility with real-world cookies
55 [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]+? # Any word of at least one letter
56 ) # End of group 'key'
57 ( # Optional group: there may not be a value.
58 \s*=\s* # Equal Sign
59 (?P<val> # Start of group 'val'
60 "(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed)
61 | # or
62 "[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993)
63 | # or
64 # Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123
65 (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma)
66 [\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format
67 (GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100
68 # NOTE: RFC 2822 timezone support is an aiohttp extension
69 # for issue #4493 - SimpleCookie does NOT support this
70 | # or
71 # ANSI C asctime() format: "Wed Jun 9 10:18:14 2021"
72 # NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format
73 \w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4}
74 | # or
75 [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string
76 ) # End of group 'val'
77 )? # End of optional value group
78 \s* # Any number of spaces.
79 (\s+|;|$) # Ending either at space, semicolon, or EOS.
80 """,
81 re.VERBOSE | re.ASCII,
82)
83
84
85def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]:
86 """
87 Preserve a Morsel's coded_value exactly as received from the server.
88
89 This function ensures that cookie encoding is preserved exactly as sent by
90 the server, which is critical for compatibility with old servers that have
91 strict requirements about cookie formats.
92
93 This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453
94 where Python's SimpleCookie would re-encode cookies, breaking authentication
95 with certain servers.
96
97 Args:
98 cookie: A Morsel object from SimpleCookie
99
100 Returns:
101 A Morsel object with preserved coded_value
102
103 """
104 mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel()))
105 # We use __setstate__ instead of the public set() API because it allows us to
106 # bypass validation and set already validated state. This is more stable than
107 # setting protected attributes directly and unlikely to change since it would
108 # break pickling.
109 mrsl_val.__setstate__( # type: ignore[attr-defined]
110 {"key": cookie.key, "value": cookie.value, "coded_value": cookie.coded_value}
111 )
112 return mrsl_val
113
114
115_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub
116
117
118def _unquote_replace(m: re.Match[str]) -> str:
119 """
120 Replace function for _unquote_sub regex substitution.
121
122 Handles escaped characters in cookie values:
123 - Octal sequences are converted to their character representation
124 - Other escaped characters are unescaped by removing the backslash
125 """
126 if m[1]:
127 return chr(int(m[1], 8))
128 return m[2]
129
130
131def _unquote(value: str) -> str:
132 """
133 Unquote a cookie value.
134
135 Vendored from http.cookies._unquote to ensure compatibility.
136
137 Note: The original implementation checked for None, but we've removed
138 that check since all callers already ensure the value is not None.
139 """
140 # If there aren't any doublequotes,
141 # then there can't be any special characters. See RFC 2109.
142 if len(value) < 2:
143 return value
144 if value[0] != '"' or value[-1] != '"':
145 return value
146
147 # We have to assume that we must decode this string.
148 # Down to work.
149
150 # Remove the "s
151 value = value[1:-1]
152
153 # Check for special sequences. Examples:
154 # \012 --> \n
155 # \" --> "
156 #
157 return _unquote_sub(_unquote_replace, value)
158
159
160def parse_cookie_header(header: str) -> List[Tuple[str, Morsel[str]]]:
161 """
162 Parse a Cookie header according to RFC 6265 Section 5.4.
163
164 Cookie headers contain only name-value pairs separated by semicolons.
165 There are no attributes in Cookie headers - even names that match
166 attribute names (like 'path' or 'secure') should be treated as cookies.
167
168 This parser uses the same regex-based approach as parse_set_cookie_headers
169 to properly handle quoted values that may contain semicolons.
170
171 Args:
172 header: The Cookie header value to parse
173
174 Returns:
175 List of (name, Morsel) tuples for compatibility with SimpleCookie.update()
176 """
177 if not header:
178 return []
179
180 cookies: List[Tuple[str, Morsel[str]]] = []
181 i = 0
182 n = len(header)
183
184 while i < n:
185 # Use the same pattern as parse_set_cookie_headers to find cookies
186 match = _COOKIE_PATTERN.match(header, i)
187 if not match:
188 break
189
190 key = match.group("key")
191 value = match.group("val") or ""
192 i = match.end(0)
193
194 # Validate the name
195 if not key or not _COOKIE_NAME_RE.match(key):
196 internal_logger.warning("Can not load cookie: Illegal cookie name %r", key)
197 continue
198
199 # Create new morsel
200 morsel: Morsel[str] = Morsel()
201 # Preserve the original value as coded_value (with quotes if present)
202 # We use __setstate__ instead of the public set() API because it allows us to
203 # bypass validation and set already validated state. This is more stable than
204 # setting protected attributes directly and unlikely to change since it would
205 # break pickling.
206 morsel.__setstate__( # type: ignore[attr-defined]
207 {"key": key, "value": _unquote(value), "coded_value": value}
208 )
209
210 cookies.append((key, morsel))
211
212 return cookies
213
214
215def parse_set_cookie_headers(headers: Sequence[str]) -> List[Tuple[str, Morsel[str]]]:
216 """
217 Parse cookie headers using a vendored version of SimpleCookie parsing.
218
219 This implementation is based on SimpleCookie.__parse_string to ensure
220 compatibility with how SimpleCookie parses cookies, including handling
221 of malformed cookies with missing semicolons.
222
223 This function is used for both Cookie and Set-Cookie headers in order to be
224 forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie
225 headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the
226 real world data makes it impossible since we need to be a bit more forgiving.
227
228 NOTE: This implementation differs from SimpleCookie in handling unmatched quotes.
229 SimpleCookie will stop parsing when it encounters a cookie value with an unmatched
230 quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped.
231 This implementation handles unmatched quotes more gracefully to prevent cookie loss.
232 See https://github.com/aio-libs/aiohttp/issues/7993
233 """
234 parsed_cookies: List[Tuple[str, Morsel[str]]] = []
235
236 for header in headers:
237 if not header:
238 continue
239
240 # Parse cookie string using SimpleCookie's algorithm
241 i = 0
242 n = len(header)
243 current_morsel: Optional[Morsel[str]] = None
244 morsel_seen = False
245
246 while 0 <= i < n:
247 # Start looking for a cookie
248 match = _COOKIE_PATTERN.match(header, i)
249 if not match:
250 # No more cookies
251 break
252
253 key, value = match.group("key"), match.group("val")
254 i = match.end(0)
255 lower_key = key.lower()
256
257 if key[0] == "$":
258 if not morsel_seen:
259 # We ignore attributes which pertain to the cookie
260 # mechanism as a whole, such as "$Version".
261 continue
262 # Process as attribute
263 if current_morsel is not None:
264 attr_lower_key = lower_key[1:]
265 if attr_lower_key in _COOKIE_KNOWN_ATTRS:
266 current_morsel[attr_lower_key] = value or ""
267 elif lower_key in _COOKIE_KNOWN_ATTRS:
268 if not morsel_seen:
269 # Invalid cookie string - attribute before cookie
270 break
271 if lower_key in _COOKIE_BOOL_ATTRS:
272 # Boolean attribute with any value should be True
273 if current_morsel is not None:
274 if lower_key == "partitioned" and sys.version_info < (3, 14):
275 dict.__setitem__(current_morsel, lower_key, True)
276 else:
277 current_morsel[lower_key] = True
278 elif value is None:
279 # Invalid cookie string - non-boolean attribute without value
280 break
281 elif current_morsel is not None:
282 # Regular attribute with value
283 current_morsel[lower_key] = _unquote(value)
284 elif value is not None:
285 # This is a cookie name=value pair
286 # Validate the name
287 if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key):
288 internal_logger.warning(
289 "Can not load cookies: Illegal cookie name %r", key
290 )
291 current_morsel = None
292 else:
293 # Create new morsel
294 current_morsel = Morsel()
295 # Preserve the original value as coded_value (with quotes if present)
296 # We use __setstate__ instead of the public set() API because it allows us to
297 # bypass validation and set already validated state. This is more stable than
298 # setting protected attributes directly and unlikely to change since it would
299 # break pickling.
300 current_morsel.__setstate__( # type: ignore[attr-defined]
301 {"key": key, "value": _unquote(value), "coded_value": value}
302 )
303 parsed_cookies.append((key, current_morsel))
304 morsel_seen = True
305 else:
306 # Invalid cookie string - no value for non-attribute
307 break
308
309 return parsed_cookies