1"""
2Internal cookie handling helpers.
3
4This module contains internal utilities for cookie parsing and manipulation.
5These are not part of the public API and may change without notice.
6"""
7
8import re
9from http.cookies import Morsel
10from typing import List, Optional, Sequence, Tuple, cast
11
12from .log import internal_logger
13
14__all__ = (
15 "parse_set_cookie_headers",
16 "parse_cookie_header",
17 "preserve_morsel_with_coded_value",
18)
19
20# Cookie parsing constants
21# Allow more characters in cookie names to handle real-world cookies
22# that don't strictly follow RFC standards (fixes #2683)
23# RFC 6265 defines cookie-name token as per RFC 2616 Section 2.2,
24# but many servers send cookies with characters like {} [] () etc.
25# This makes the cookie parser more tolerant of real-world cookies
26# while still providing some validation to catch obviously malformed names.
27_COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$")
28_COOKIE_KNOWN_ATTRS = frozenset( # AKA Morsel._reserved
29 (
30 "path",
31 "domain",
32 "max-age",
33 "expires",
34 "secure",
35 "httponly",
36 "samesite",
37 "partitioned",
38 "version",
39 "comment",
40 )
41)
42_COOKIE_BOOL_ATTRS = frozenset( # AKA Morsel._flags
43 ("secure", "httponly", "partitioned")
44)
45
46# SimpleCookie's pattern for parsing cookies with relaxed validation
47# Based on http.cookies pattern but extended to allow more characters in cookie names
48# to handle real-world cookies (fixes #2683)
49_COOKIE_PATTERN = re.compile(
50 r"""
51 \s* # Optional whitespace at start of cookie
52 (?P<key> # Start of group 'key'
53 # aiohttp has extended to include [] for compatibility with real-world cookies
54 [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]+? # Any word of at least one letter
55 ) # End of group 'key'
56 ( # Optional group: there may not be a value.
57 \s*=\s* # Equal Sign
58 (?P<val> # Start of group 'val'
59 "(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed)
60 | # or
61 "[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993)
62 | # or
63 # Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123
64 (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma)
65 [\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format
66 (GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100
67 # NOTE: RFC 2822 timezone support is an aiohttp extension
68 # for issue #4493 - SimpleCookie does NOT support this
69 | # or
70 # ANSI C asctime() format: "Wed Jun 9 10:18:14 2021"
71 # NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format
72 \w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4}
73 | # or
74 [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string
75 ) # End of group 'val'
76 )? # End of optional value group
77 \s* # Any number of spaces.
78 (\s+|;|$) # Ending either at space, semicolon, or EOS.
79 """,
80 re.VERBOSE | re.ASCII,
81)
82
83
84def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]:
85 """
86 Preserve a Morsel's coded_value exactly as received from the server.
87
88 This function ensures that cookie encoding is preserved exactly as sent by
89 the server, which is critical for compatibility with old servers that have
90 strict requirements about cookie formats.
91
92 This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453
93 where Python's SimpleCookie would re-encode cookies, breaking authentication
94 with certain servers.
95
96 Args:
97 cookie: A Morsel object from SimpleCookie
98
99 Returns:
100 A Morsel object with preserved coded_value
101
102 """
103 mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel()))
104 # We use __setstate__ instead of the public set() API because it allows us to
105 # bypass validation and set already validated state. This is more stable than
106 # setting protected attributes directly and unlikely to change since it would
107 # break pickling.
108 mrsl_val.__setstate__( # type: ignore[attr-defined]
109 {"key": cookie.key, "value": cookie.value, "coded_value": cookie.coded_value}
110 )
111 return mrsl_val
112
113
114_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub
115
116
117def _unquote_replace(m: re.Match[str]) -> str:
118 """
119 Replace function for _unquote_sub regex substitution.
120
121 Handles escaped characters in cookie values:
122 - Octal sequences are converted to their character representation
123 - Other escaped characters are unescaped by removing the backslash
124 """
125 if m[1]:
126 return chr(int(m[1], 8))
127 return m[2]
128
129
130def _unquote(value: str) -> str:
131 """
132 Unquote a cookie value.
133
134 Vendored from http.cookies._unquote to ensure compatibility.
135
136 Note: The original implementation checked for None, but we've removed
137 that check since all callers already ensure the value is not None.
138 """
139 # If there aren't any doublequotes,
140 # then there can't be any special characters. See RFC 2109.
141 if len(value) < 2:
142 return value
143 if value[0] != '"' or value[-1] != '"':
144 return value
145
146 # We have to assume that we must decode this string.
147 # Down to work.
148
149 # Remove the "s
150 value = value[1:-1]
151
152 # Check for special sequences. Examples:
153 # \012 --> \n
154 # \" --> "
155 #
156 return _unquote_sub(_unquote_replace, value)
157
158
159def parse_cookie_header(header: str) -> List[Tuple[str, Morsel[str]]]:
160 """
161 Parse a Cookie header according to RFC 6265 Section 5.4.
162
163 Cookie headers contain only name-value pairs separated by semicolons.
164 There are no attributes in Cookie headers - even names that match
165 attribute names (like 'path' or 'secure') should be treated as cookies.
166
167 This parser uses the same regex-based approach as parse_set_cookie_headers
168 to properly handle quoted values that may contain semicolons. When the
169 regex fails to match a malformed cookie, it falls back to simple parsing
170 to ensure subsequent cookies are not lost
171 https://github.com/aio-libs/aiohttp/issues/11632
172
173 Args:
174 header: The Cookie header value to parse
175
176 Returns:
177 List of (name, Morsel) tuples for compatibility with SimpleCookie.update()
178 """
179 if not header:
180 return []
181
182 cookies: List[Tuple[str, Morsel[str]]] = []
183 morsel: Morsel[str]
184 i = 0
185 n = len(header)
186
187 while i < n:
188 # Use the same pattern as parse_set_cookie_headers to find cookies
189 match = _COOKIE_PATTERN.match(header, i)
190 if not match:
191 # Fallback for malformed cookies https://github.com/aio-libs/aiohttp/issues/11632
192 # Find next semicolon to skip or attempt simple key=value parsing
193 next_semi = header.find(";", i)
194 eq_pos = header.find("=", i)
195
196 # Try to extract key=value if '=' comes before ';'
197 if eq_pos != -1 and (next_semi == -1 or eq_pos < next_semi):
198 end_pos = next_semi if next_semi != -1 else n
199 key = header[i:eq_pos].strip()
200 value = header[eq_pos + 1 : end_pos].strip()
201
202 # Validate the name (same as regex path)
203 if not _COOKIE_NAME_RE.match(key):
204 internal_logger.warning(
205 "Can not load cookie: Illegal cookie name %r", key
206 )
207 else:
208 morsel = Morsel()
209 morsel.__setstate__( # type: ignore[attr-defined]
210 {"key": key, "value": _unquote(value), "coded_value": value}
211 )
212 cookies.append((key, morsel))
213
214 # Move to next cookie or end
215 i = next_semi + 1 if next_semi != -1 else n
216 continue
217
218 key = match.group("key")
219 value = match.group("val") or ""
220 i = match.end(0)
221
222 # Validate the name
223 if not key or not _COOKIE_NAME_RE.match(key):
224 internal_logger.warning("Can not load cookie: Illegal cookie name %r", key)
225 continue
226
227 # Create new morsel
228 morsel = Morsel()
229 # Preserve the original value as coded_value (with quotes if present)
230 # We use __setstate__ instead of the public set() API because it allows us to
231 # bypass validation and set already validated state. This is more stable than
232 # setting protected attributes directly and unlikely to change since it would
233 # break pickling.
234 morsel.__setstate__( # type: ignore[attr-defined]
235 {"key": key, "value": _unquote(value), "coded_value": value}
236 )
237
238 cookies.append((key, morsel))
239
240 return cookies
241
242
243def parse_set_cookie_headers(headers: Sequence[str]) -> List[Tuple[str, Morsel[str]]]:
244 """
245 Parse cookie headers using a vendored version of SimpleCookie parsing.
246
247 This implementation is based on SimpleCookie.__parse_string to ensure
248 compatibility with how SimpleCookie parses cookies, including handling
249 of malformed cookies with missing semicolons.
250
251 This function is used for both Cookie and Set-Cookie headers in order to be
252 forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie
253 headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the
254 real world data makes it impossible since we need to be a bit more forgiving.
255
256 NOTE: This implementation differs from SimpleCookie in handling unmatched quotes.
257 SimpleCookie will stop parsing when it encounters a cookie value with an unmatched
258 quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped.
259 This implementation handles unmatched quotes more gracefully to prevent cookie loss.
260 See https://github.com/aio-libs/aiohttp/issues/7993
261 """
262 parsed_cookies: List[Tuple[str, Morsel[str]]] = []
263
264 for header in headers:
265 if not header:
266 continue
267
268 # Parse cookie string using SimpleCookie's algorithm
269 i = 0
270 n = len(header)
271 current_morsel: Optional[Morsel[str]] = None
272 morsel_seen = False
273
274 while 0 <= i < n:
275 # Start looking for a cookie
276 match = _COOKIE_PATTERN.match(header, i)
277 if not match:
278 # No more cookies
279 break
280
281 key, value = match.group("key"), match.group("val")
282 i = match.end(0)
283 lower_key = key.lower()
284
285 if key[0] == "$":
286 if not morsel_seen:
287 # We ignore attributes which pertain to the cookie
288 # mechanism as a whole, such as "$Version".
289 continue
290 # Process as attribute
291 if current_morsel is not None:
292 attr_lower_key = lower_key[1:]
293 if attr_lower_key in _COOKIE_KNOWN_ATTRS:
294 current_morsel[attr_lower_key] = value or ""
295 elif lower_key in _COOKIE_KNOWN_ATTRS:
296 if not morsel_seen:
297 # Invalid cookie string - attribute before cookie
298 break
299 if lower_key in _COOKIE_BOOL_ATTRS:
300 # Boolean attribute with any value should be True
301 if current_morsel is not None and current_morsel.isReservedKey(key):
302 current_morsel[lower_key] = True
303 elif value is None:
304 # Invalid cookie string - non-boolean attribute without value
305 break
306 elif current_morsel is not None:
307 # Regular attribute with value
308 current_morsel[lower_key] = _unquote(value)
309 elif value is not None:
310 # This is a cookie name=value pair
311 # Validate the name
312 if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key):
313 internal_logger.warning(
314 "Can not load cookies: Illegal cookie name %r", key
315 )
316 current_morsel = None
317 else:
318 # Create new morsel
319 current_morsel = Morsel()
320 # Preserve the original value as coded_value (with quotes if present)
321 # We use __setstate__ instead of the public set() API because it allows us to
322 # bypass validation and set already validated state. This is more stable than
323 # setting protected attributes directly and unlikely to change since it would
324 # break pickling.
325 current_morsel.__setstate__( # type: ignore[attr-defined]
326 {"key": key, "value": _unquote(value), "coded_value": value}
327 )
328 parsed_cookies.append((key, current_morsel))
329 morsel_seen = True
330 else:
331 # Invalid cookie string - no value for non-attribute
332 break
333
334 return parsed_cookies