Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/_cookie_helpers.py: 16%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

105 statements  

1""" 

2Internal cookie handling helpers. 

3 

4This module contains internal utilities for cookie parsing and manipulation. 

5These are not part of the public API and may change without notice. 

6""" 

7 

8import re 

9from collections.abc import Sequence 

10from http.cookies import Morsel 

11from typing import cast 

12 

13from .log import internal_logger 

14 

15__all__ = ( 

16 "parse_set_cookie_headers", 

17 "parse_cookie_header", 

18 "preserve_morsel_with_coded_value", 

19) 

20 

21# Cookie parsing constants 

22# Allow more characters in cookie names to handle real-world cookies 

23# that don't strictly follow RFC standards (fixes #2683) 

24# RFC 6265 defines cookie-name token as per RFC 2616 Section 2.2, 

25# but many servers send cookies with characters like {} [] () etc. 

26# This makes the cookie parser more tolerant of real-world cookies 

27# while still providing some validation to catch obviously malformed names. 

28_COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$") 

29_COOKIE_KNOWN_ATTRS = frozenset( # AKA Morsel._reserved 

30 ( 

31 "path", 

32 "domain", 

33 "max-age", 

34 "expires", 

35 "secure", 

36 "httponly", 

37 "samesite", 

38 "partitioned", 

39 "version", 

40 "comment", 

41 ) 

42) 

43_COOKIE_BOOL_ATTRS = frozenset( # AKA Morsel._flags 

44 ("secure", "httponly", "partitioned") 

45) 

46 

47# SimpleCookie's pattern for parsing cookies with relaxed validation 

48# Based on http.cookies pattern but extended to allow more characters in cookie names 

49# to handle real-world cookies (fixes #2683) 

50_COOKIE_PATTERN = re.compile( 

51 r""" 

52 \s* # Optional whitespace at start of cookie 

53 (?P<key> # Start of group 'key' 

54 # aiohttp has extended to include [] for compatibility with real-world cookies 

55 [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\[\]]+ # Any word of at least one letter 

56 ) # End of group 'key' 

57 ( # Optional group: there may not be a value. 

58 \s*=\s* # Equal Sign 

59 (?P<val> # Start of group 'val' 

60 "(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed) 

61 | # or 

62 "[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993) 

63 | # or 

64 # Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123 

65 (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma) 

66 [\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format 

67 (GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100 

68 # NOTE: RFC 2822 timezone support is an aiohttp extension 

69 # for issue #4493 - SimpleCookie does NOT support this 

70 | # or 

71 # ANSI C asctime() format: "Wed Jun 9 10:18:14 2021" 

72 # NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format 

73 \w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4} 

74 | # or 

75 [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string 

76 ) # End of group 'val' 

77 )? # End of optional value group 

78 \s* # Any number of spaces. 

79 (\s+|;|$) # Ending either at space, semicolon, or EOS. 

80 """, 

81 re.VERBOSE | re.ASCII, 

82) 

83 

84 

85def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]: 

86 """ 

87 Preserve a Morsel's coded_value exactly as received from the server. 

88 

89 This function ensures that cookie encoding is preserved exactly as sent by 

90 the server, which is critical for compatibility with old servers that have 

91 strict requirements about cookie formats. 

92 

93 This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453 

94 where Python's SimpleCookie would re-encode cookies, breaking authentication 

95 with certain servers. 

96 

97 Args: 

98 cookie: A Morsel object from SimpleCookie 

99 

100 Returns: 

101 A Morsel object with preserved coded_value 

102 

103 """ 

104 mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) 

105 # We use __setstate__ instead of the public set() API because it allows us to 

106 # bypass validation and set already validated state. This is more stable than 

107 # setting protected attributes directly and unlikely to change since it would 

108 # break pickling. 

109 mrsl_val.__setstate__( # type: ignore[attr-defined] 

110 {"key": cookie.key, "value": cookie.value, "coded_value": cookie.coded_value} 

111 ) 

112 return mrsl_val 

113 

114 

115_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub 

116 

117 

118def _unquote_replace(m: re.Match[str]) -> str: 

119 """ 

120 Replace function for _unquote_sub regex substitution. 

121 

122 Handles escaped characters in cookie values: 

123 - Octal sequences are converted to their character representation 

124 - Other escaped characters are unescaped by removing the backslash 

125 """ 

126 if m[1]: 

127 return chr(int(m[1], 8)) 

128 return m[2] 

129 

130 

131def _unquote(value: str) -> str: 

132 """ 

133 Unquote a cookie value. 

134 

135 Vendored from http.cookies._unquote to ensure compatibility. 

136 

137 Note: The original implementation checked for None, but we've removed 

138 that check since all callers already ensure the value is not None. 

139 """ 

140 # If there aren't any doublequotes, 

141 # then there can't be any special characters. See RFC 2109. 

142 if len(value) < 2: 

143 return value 

144 if value[0] != '"' or value[-1] != '"': 

145 return value 

146 

147 # We have to assume that we must decode this string. 

148 # Down to work. 

149 

150 # Remove the "s 

151 value = value[1:-1] 

152 

153 # Check for special sequences. Examples: 

154 # \012 --> \n 

155 # \" --> " 

156 # 

157 return _unquote_sub(_unquote_replace, value) 

158 

159 

160def parse_cookie_header(header: str) -> list[tuple[str, Morsel[str]]]: 

161 """ 

162 Parse a Cookie header according to RFC 6265 Section 5.4. 

163 

164 Cookie headers contain only name-value pairs separated by semicolons. 

165 There are no attributes in Cookie headers - even names that match 

166 attribute names (like 'path' or 'secure') should be treated as cookies. 

167 

168 This parser uses the same regex-based approach as parse_set_cookie_headers 

169 to properly handle quoted values that may contain semicolons. When the 

170 regex fails to match a malformed cookie, it falls back to simple parsing 

171 to ensure subsequent cookies are not lost 

172 https://github.com/aio-libs/aiohttp/issues/11632 

173 

174 Args: 

175 header: The Cookie header value to parse 

176 

177 Returns: 

178 List of (name, Morsel) tuples for compatibility with SimpleCookie.update() 

179 """ 

180 if not header: 

181 return [] 

182 

183 morsel: Morsel[str] 

184 cookies: list[tuple[str, Morsel[str]]] = [] 

185 i = 0 

186 n = len(header) 

187 

188 invalid_names = [] 

189 while i < n: 

190 # Use the same pattern as parse_set_cookie_headers to find cookies 

191 match = _COOKIE_PATTERN.match(header, i) 

192 if not match: 

193 # Fallback for malformed cookies https://github.com/aio-libs/aiohttp/issues/11632 

194 # Find next semicolon to skip or attempt simple key=value parsing 

195 next_semi = header.find(";", i) 

196 eq_pos = header.find("=", i) 

197 

198 # Try to extract key=value if '=' comes before ';' 

199 if eq_pos != -1 and (next_semi == -1 or eq_pos < next_semi): 

200 end_pos = next_semi if next_semi != -1 else n 

201 key = header[i:eq_pos].strip() 

202 value = header[eq_pos + 1 : end_pos].strip() 

203 

204 # Validate the name (same as regex path) 

205 if not _COOKIE_NAME_RE.match(key): 

206 invalid_names.append(key) 

207 else: 

208 morsel = Morsel() 

209 morsel.__setstate__( # type: ignore[attr-defined] 

210 {"key": key, "value": _unquote(value), "coded_value": value} 

211 ) 

212 cookies.append((key, morsel)) 

213 

214 # Move to next cookie or end 

215 i = next_semi + 1 if next_semi != -1 else n 

216 continue 

217 

218 key = match.group("key") 

219 value = match.group("val") or "" 

220 i = match.end(0) 

221 

222 # Validate the name 

223 if not key or not _COOKIE_NAME_RE.match(key): 

224 invalid_names.append(key) 

225 continue 

226 

227 # Create new morsel 

228 morsel = Morsel() 

229 # Preserve the original value as coded_value (with quotes if present) 

230 # We use __setstate__ instead of the public set() API because it allows us to 

231 # bypass validation and set already validated state. This is more stable than 

232 # setting protected attributes directly and unlikely to change since it would 

233 # break pickling. 

234 morsel.__setstate__( # type: ignore[attr-defined] 

235 {"key": key, "value": _unquote(value), "coded_value": value} 

236 ) 

237 

238 cookies.append((key, morsel)) 

239 

240 if invalid_names: 

241 internal_logger.debug( 

242 "Cannot load cookie. Illegal cookie names: %r", invalid_names 

243 ) 

244 

245 return cookies 

246 

247 

248def parse_set_cookie_headers(headers: Sequence[str]) -> list[tuple[str, Morsel[str]]]: 

249 """ 

250 Parse cookie headers using a vendored version of SimpleCookie parsing. 

251 

252 This implementation is based on SimpleCookie.__parse_string to ensure 

253 compatibility with how SimpleCookie parses cookies, including handling 

254 of malformed cookies with missing semicolons. 

255 

256 This function is used for both Cookie and Set-Cookie headers in order to be 

257 forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie 

258 headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the 

259 real world data makes it impossible since we need to be a bit more forgiving. 

260 

261 NOTE: This implementation differs from SimpleCookie in handling unmatched quotes. 

262 SimpleCookie will stop parsing when it encounters a cookie value with an unmatched 

263 quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped. 

264 This implementation handles unmatched quotes more gracefully to prevent cookie loss. 

265 See https://github.com/aio-libs/aiohttp/issues/7993 

266 """ 

267 parsed_cookies: list[tuple[str, Morsel[str]]] = [] 

268 

269 for header in headers: 

270 if not header: 

271 continue 

272 

273 # Parse cookie string using SimpleCookie's algorithm 

274 i = 0 

275 n = len(header) 

276 current_morsel: Morsel[str] | None = None 

277 morsel_seen = False 

278 

279 while 0 <= i < n: 

280 # Start looking for a cookie 

281 match = _COOKIE_PATTERN.match(header, i) 

282 if not match: 

283 # No more cookies 

284 break 

285 

286 key, value = match.group("key"), match.group("val") 

287 i = match.end(0) 

288 lower_key = key.lower() 

289 

290 if key[0] == "$": 

291 if not morsel_seen: 

292 # We ignore attributes which pertain to the cookie 

293 # mechanism as a whole, such as "$Version". 

294 continue 

295 # Process as attribute 

296 if current_morsel is not None: 

297 attr_lower_key = lower_key[1:] 

298 if attr_lower_key in _COOKIE_KNOWN_ATTRS: 

299 current_morsel[attr_lower_key] = value or "" 

300 elif lower_key in _COOKIE_KNOWN_ATTRS: 

301 if not morsel_seen: 

302 # Invalid cookie string - attribute before cookie 

303 break 

304 if lower_key in _COOKIE_BOOL_ATTRS: 

305 # Boolean attribute with any value should be True 

306 if current_morsel is not None and current_morsel.isReservedKey(key): 

307 current_morsel[lower_key] = True 

308 elif value is None: 

309 # Invalid cookie string - non-boolean attribute without value 

310 break 

311 elif current_morsel is not None: 

312 # Regular attribute with value 

313 current_morsel[lower_key] = _unquote(value) 

314 elif value is not None: 

315 # This is a cookie name=value pair 

316 # Validate the name 

317 if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key): 

318 internal_logger.warning( 

319 "Can not load cookies: Illegal cookie name %r", key 

320 ) 

321 current_morsel = None 

322 else: 

323 # Create new morsel 

324 current_morsel = Morsel() 

325 # Preserve the original value as coded_value (with quotes if present) 

326 # We use __setstate__ instead of the public set() API because it allows us to 

327 # bypass validation and set already validated state. This is more stable than 

328 # setting protected attributes directly and unlikely to change since it would 

329 # break pickling. 

330 current_morsel.__setstate__( # type: ignore[attr-defined] 

331 {"key": key, "value": _unquote(value), "coded_value": value} 

332 ) 

333 parsed_cookies.append((key, current_morsel)) 

334 morsel_seen = True 

335 else: 

336 # Invalid cookie string - no value for non-attribute 

337 break 

338 

339 return parsed_cookies