Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/_cookie_helpers.py: 15%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

117 statements  

1""" 

2Internal cookie handling helpers. 

3 

4This module contains internal utilities for cookie parsing and manipulation. 

5These are not part of the public API and may change without notice. 

6""" 

7 

8import re 

9from collections.abc import Sequence 

10from http.cookies import CookieError, Morsel 

11from typing import cast 

12 

13from .log import internal_logger 

14 

15__all__ = ( 

16 "parse_set_cookie_headers", 

17 "parse_cookie_header", 

18 "preserve_morsel_with_coded_value", 

19) 

20 

21# Cookie parsing constants 

22# Allow more characters in cookie names to handle real-world cookies 

23# that don't strictly follow RFC standards (fixes #2683) 

24# RFC 6265 defines cookie-name token as per RFC 2616 Section 2.2, 

25# but many servers send cookies with characters like {} [] () etc. 

26# This makes the cookie parser more tolerant of real-world cookies 

27# while still providing some validation to catch obviously malformed names. 

28_COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$") 

29_COOKIE_KNOWN_ATTRS = frozenset( # AKA Morsel._reserved 

30 ( 

31 "path", 

32 "domain", 

33 "max-age", 

34 "expires", 

35 "secure", 

36 "httponly", 

37 "samesite", 

38 "partitioned", 

39 "version", 

40 "comment", 

41 ) 

42) 

43_COOKIE_BOOL_ATTRS = frozenset( # AKA Morsel._flags 

44 ("secure", "httponly", "partitioned") 

45) 

46 

47# SimpleCookie's pattern for parsing cookies with relaxed validation 

48# Based on http.cookies pattern but extended to allow more characters in cookie names 

49# to handle real-world cookies (fixes #2683) 

50_COOKIE_PATTERN = re.compile( 

51 r""" 

52 \s* # Optional whitespace at start of cookie 

53 (?P<key> # Start of group 'key' 

54 # aiohttp has extended to include [] for compatibility with real-world cookies 

55 [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\[\]]+ # Any word of at least one letter 

56 ) # End of group 'key' 

57 ( # Optional group: there may not be a value. 

58 \s*=\s* # Equal Sign 

59 (?P<val> # Start of group 'val' 

60 "(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed) 

61 | # or 

62 "[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993) 

63 | # or 

64 # Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123 

65 (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma) 

66 [\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format 

67 (GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100 

68 # NOTE: RFC 2822 timezone support is an aiohttp extension 

69 # for issue #4493 - SimpleCookie does NOT support this 

70 | # or 

71 # ANSI C asctime() format: "Wed Jun 9 10:18:14 2021" 

72 # NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format 

73 \w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4} 

74 | # or 

75 [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string 

76 ) # End of group 'val' 

77 )? # End of optional value group 

78 \s* # Any number of spaces. 

79 (\s+|;|$) # Ending either at space, semicolon, or EOS. 

80 """, 

81 re.VERBOSE | re.ASCII, 

82) 

83 

84 

85def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]: 

86 """ 

87 Preserve a Morsel's coded_value exactly as received from the server. 

88 

89 This function ensures that cookie encoding is preserved exactly as sent by 

90 the server, which is critical for compatibility with old servers that have 

91 strict requirements about cookie formats. 

92 

93 This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453 

94 where Python's SimpleCookie would re-encode cookies, breaking authentication 

95 with certain servers. 

96 

97 Args: 

98 cookie: A Morsel object from SimpleCookie 

99 

100 Returns: 

101 A Morsel object with preserved coded_value 

102 

103 """ 

104 mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) 

105 # We use __setstate__ instead of the public set() API because it allows us to 

106 # bypass validation and set already validated state. This is more stable than 

107 # setting protected attributes directly and unlikely to change since it would 

108 # break pickling. 

109 try: 

110 mrsl_val.__setstate__( # type: ignore[attr-defined] 

111 { 

112 "key": cookie.key, 

113 "value": cookie.value, 

114 "coded_value": cookie.coded_value, 

115 } 

116 ) 

117 except CookieError: 

118 return cookie 

119 return mrsl_val 

120 

121 

122_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub 

123 

124 

125def _unquote_replace(m: re.Match[str]) -> str: 

126 """ 

127 Replace function for _unquote_sub regex substitution. 

128 

129 Handles escaped characters in cookie values: 

130 - Octal sequences are converted to their character representation 

131 - Other escaped characters are unescaped by removing the backslash 

132 """ 

133 if m[1]: 

134 return chr(int(m[1], 8)) 

135 return m[2] 

136 

137 

138def _unquote(value: str) -> str: 

139 """ 

140 Unquote a cookie value. 

141 

142 Vendored from http.cookies._unquote to ensure compatibility. 

143 

144 Note: The original implementation checked for None, but we've removed 

145 that check since all callers already ensure the value is not None. 

146 """ 

147 # If there aren't any doublequotes, 

148 # then there can't be any special characters. See RFC 2109. 

149 if len(value) < 2: 

150 return value 

151 if value[0] != '"' or value[-1] != '"': 

152 return value 

153 

154 # We have to assume that we must decode this string. 

155 # Down to work. 

156 

157 # Remove the "s 

158 value = value[1:-1] 

159 

160 # Check for special sequences. Examples: 

161 # \012 --> \n 

162 # \" --> " 

163 # 

164 return _unquote_sub(_unquote_replace, value) 

165 

166 

167def parse_cookie_header(header: str) -> list[tuple[str, Morsel[str]]]: 

168 """ 

169 Parse a Cookie header according to RFC 6265 Section 5.4. 

170 

171 Cookie headers contain only name-value pairs separated by semicolons. 

172 There are no attributes in Cookie headers - even names that match 

173 attribute names (like 'path' or 'secure') should be treated as cookies. 

174 

175 This parser uses the same regex-based approach as parse_set_cookie_headers 

176 to properly handle quoted values that may contain semicolons. When the 

177 regex fails to match a malformed cookie, it falls back to simple parsing 

178 to ensure subsequent cookies are not lost 

179 https://github.com/aio-libs/aiohttp/issues/11632 

180 

181 Args: 

182 header: The Cookie header value to parse 

183 

184 Returns: 

185 List of (name, Morsel) tuples for compatibility with SimpleCookie.update() 

186 """ 

187 if not header: 

188 return [] 

189 

190 morsel: Morsel[str] 

191 cookies: list[tuple[str, Morsel[str]]] = [] 

192 i = 0 

193 n = len(header) 

194 

195 invalid_names = [] 

196 while i < n: 

197 # Use the same pattern as parse_set_cookie_headers to find cookies 

198 match = _COOKIE_PATTERN.match(header, i) 

199 if not match: 

200 # Fallback for malformed cookies https://github.com/aio-libs/aiohttp/issues/11632 

201 # Find next semicolon to skip or attempt simple key=value parsing 

202 next_semi = header.find(";", i) 

203 eq_pos = header.find("=", i) 

204 

205 # Try to extract key=value if '=' comes before ';' 

206 if eq_pos != -1 and (next_semi == -1 or eq_pos < next_semi): 

207 end_pos = next_semi if next_semi != -1 else n 

208 key = header[i:eq_pos].strip() 

209 value = header[eq_pos + 1 : end_pos].strip() 

210 

211 # Validate the name (same as regex path) 

212 if not _COOKIE_NAME_RE.match(key): 

213 invalid_names.append(key) 

214 else: 

215 morsel = Morsel() 

216 try: 

217 morsel.__setstate__( # type: ignore[attr-defined] 

218 { 

219 "key": key, 

220 "value": _unquote(value), 

221 "coded_value": value, 

222 } 

223 ) 

224 except CookieError: 

225 pass 

226 else: 

227 cookies.append((key, morsel)) 

228 

229 # Move to next cookie or end 

230 i = next_semi + 1 if next_semi != -1 else n 

231 continue 

232 

233 key = match.group("key") 

234 value = match.group("val") or "" 

235 i = match.end(0) 

236 

237 # Validate the name 

238 if not key or not _COOKIE_NAME_RE.match(key): 

239 invalid_names.append(key) 

240 continue 

241 

242 # Create new morsel 

243 morsel = Morsel() 

244 # Preserve the original value as coded_value (with quotes if present) 

245 # We use __setstate__ instead of the public set() API because it allows us to 

246 # bypass validation and set already validated state. This is more stable than 

247 # setting protected attributes directly and unlikely to change since it would 

248 # break pickling. 

249 try: 

250 morsel.__setstate__( # type: ignore[attr-defined] 

251 {"key": key, "value": _unquote(value), "coded_value": value} 

252 ) 

253 except CookieError: 

254 continue 

255 

256 cookies.append((key, morsel)) 

257 

258 if invalid_names: 

259 internal_logger.debug( 

260 "Cannot load cookie. Illegal cookie names: %r", invalid_names 

261 ) 

262 

263 return cookies 

264 

265 

266def parse_set_cookie_headers(headers: Sequence[str]) -> list[tuple[str, Morsel[str]]]: 

267 """ 

268 Parse cookie headers using a vendored version of SimpleCookie parsing. 

269 

270 This implementation is based on SimpleCookie.__parse_string to ensure 

271 compatibility with how SimpleCookie parses cookies, including handling 

272 of malformed cookies with missing semicolons. 

273 

274 This function is used for both Cookie and Set-Cookie headers in order to be 

275 forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie 

276 headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the 

277 real world data makes it impossible since we need to be a bit more forgiving. 

278 

279 NOTE: This implementation differs from SimpleCookie in handling unmatched quotes. 

280 SimpleCookie will stop parsing when it encounters a cookie value with an unmatched 

281 quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped. 

282 This implementation handles unmatched quotes more gracefully to prevent cookie loss. 

283 See https://github.com/aio-libs/aiohttp/issues/7993 

284 """ 

285 parsed_cookies: list[tuple[str, Morsel[str]]] = [] 

286 

287 for header in headers: 

288 if not header: 

289 continue 

290 

291 # Parse cookie string using SimpleCookie's algorithm 

292 i = 0 

293 n = len(header) 

294 current_morsel: Morsel[str] | None = None 

295 morsel_seen = False 

296 

297 while 0 <= i < n: 

298 # Start looking for a cookie 

299 match = _COOKIE_PATTERN.match(header, i) 

300 if not match: 

301 # No more cookies 

302 break 

303 

304 key, value = match.group("key"), match.group("val") 

305 i = match.end(0) 

306 lower_key = key.lower() 

307 

308 if key[0] == "$": 

309 if not morsel_seen: 

310 # We ignore attributes which pertain to the cookie 

311 # mechanism as a whole, such as "$Version". 

312 continue 

313 # Process as attribute 

314 if current_morsel is not None: 

315 attr_lower_key = lower_key[1:] 

316 if attr_lower_key in _COOKIE_KNOWN_ATTRS: 

317 current_morsel[attr_lower_key] = value or "" 

318 elif lower_key in _COOKIE_KNOWN_ATTRS: 

319 if not morsel_seen: 

320 # Invalid cookie string - attribute before cookie 

321 break 

322 if lower_key in _COOKIE_BOOL_ATTRS: 

323 # Boolean attribute with any value should be True 

324 if current_morsel is not None and current_morsel.isReservedKey(key): 

325 current_morsel[lower_key] = True 

326 elif value is None: 

327 # Invalid cookie string - non-boolean attribute without value 

328 break 

329 elif current_morsel is not None: 

330 # Regular attribute with value 

331 current_morsel[lower_key] = _unquote(value) 

332 elif value is not None: 

333 # This is a cookie name=value pair 

334 # Validate the name 

335 if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key): 

336 internal_logger.warning( 

337 "Can not load cookies: Illegal cookie name %r", key 

338 ) 

339 current_morsel = None 

340 else: 

341 # Create new morsel 

342 current_morsel = Morsel() 

343 # Preserve the original value as coded_value (with quotes if present) 

344 try: 

345 current_morsel.__setstate__( # type: ignore[attr-defined] 

346 { 

347 "key": key, 

348 "value": _unquote(value), 

349 "coded_value": value, 

350 } 

351 ) 

352 except CookieError: 

353 current_morsel = None 

354 else: 

355 parsed_cookies.append((key, current_morsel)) 

356 morsel_seen = True 

357 else: 

358 # Invalid cookie string - no value for non-attribute 

359 break 

360 

361 return parsed_cookies