Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/aiohttp/_cookie_helpers.py: 15%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

104 statements  

1""" 

2Internal cookie handling helpers. 

3 

4This module contains internal utilities for cookie parsing and manipulation. 

5These are not part of the public API and may change without notice. 

6""" 

7 

8import re 

9from http.cookies import Morsel 

10from typing import List, Optional, Sequence, Tuple, cast 

11 

12from .log import internal_logger 

13 

14__all__ = ( 

15 "parse_set_cookie_headers", 

16 "parse_cookie_header", 

17 "preserve_morsel_with_coded_value", 

18) 

19 

20# Cookie parsing constants 

21# Allow more characters in cookie names to handle real-world cookies 

22# that don't strictly follow RFC standards (fixes #2683) 

23# RFC 6265 defines cookie-name token as per RFC 2616 Section 2.2, 

24# but many servers send cookies with characters like {} [] () etc. 

25# This makes the cookie parser more tolerant of real-world cookies 

26# while still providing some validation to catch obviously malformed names. 

27_COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$") 

28_COOKIE_KNOWN_ATTRS = frozenset( # AKA Morsel._reserved 

29 ( 

30 "path", 

31 "domain", 

32 "max-age", 

33 "expires", 

34 "secure", 

35 "httponly", 

36 "samesite", 

37 "partitioned", 

38 "version", 

39 "comment", 

40 ) 

41) 

42_COOKIE_BOOL_ATTRS = frozenset( # AKA Morsel._flags 

43 ("secure", "httponly", "partitioned") 

44) 

45 

46# SimpleCookie's pattern for parsing cookies with relaxed validation 

47# Based on http.cookies pattern but extended to allow more characters in cookie names 

48# to handle real-world cookies (fixes #2683) 

49_COOKIE_PATTERN = re.compile( 

50 r""" 

51 \s* # Optional whitespace at start of cookie 

52 (?P<key> # Start of group 'key' 

53 # aiohttp has extended to include [] for compatibility with real-world cookies 

54 [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\[\]]+ # Any word of at least one letter 

55 ) # End of group 'key' 

56 ( # Optional group: there may not be a value. 

57 \s*=\s* # Equal Sign 

58 (?P<val> # Start of group 'val' 

59 "(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed) 

60 | # or 

61 "[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993) 

62 | # or 

63 # Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123 

64 (\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma) 

65 [\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format 

66 (GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100 

67 # NOTE: RFC 2822 timezone support is an aiohttp extension 

68 # for issue #4493 - SimpleCookie does NOT support this 

69 | # or 

70 # ANSI C asctime() format: "Wed Jun 9 10:18:14 2021" 

71 # NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format 

72 \w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4} 

73 | # or 

74 [\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string 

75 ) # End of group 'val' 

76 )? # End of optional value group 

77 \s* # Any number of spaces. 

78 (\s+|;|$) # Ending either at space, semicolon, or EOS. 

79 """, 

80 re.VERBOSE | re.ASCII, 

81) 

82 

83 

84def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]: 

85 """ 

86 Preserve a Morsel's coded_value exactly as received from the server. 

87 

88 This function ensures that cookie encoding is preserved exactly as sent by 

89 the server, which is critical for compatibility with old servers that have 

90 strict requirements about cookie formats. 

91 

92 This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453 

93 where Python's SimpleCookie would re-encode cookies, breaking authentication 

94 with certain servers. 

95 

96 Args: 

97 cookie: A Morsel object from SimpleCookie 

98 

99 Returns: 

100 A Morsel object with preserved coded_value 

101 

102 """ 

103 mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) 

104 # We use __setstate__ instead of the public set() API because it allows us to 

105 # bypass validation and set already validated state. This is more stable than 

106 # setting protected attributes directly and unlikely to change since it would 

107 # break pickling. 

108 mrsl_val.__setstate__( # type: ignore[attr-defined] 

109 {"key": cookie.key, "value": cookie.value, "coded_value": cookie.coded_value} 

110 ) 

111 return mrsl_val 

112 

113 

114_unquote_sub = re.compile(r"\\(?:([0-3][0-7][0-7])|(.))").sub 

115 

116 

117def _unquote_replace(m: re.Match[str]) -> str: 

118 """ 

119 Replace function for _unquote_sub regex substitution. 

120 

121 Handles escaped characters in cookie values: 

122 - Octal sequences are converted to their character representation 

123 - Other escaped characters are unescaped by removing the backslash 

124 """ 

125 if m[1]: 

126 return chr(int(m[1], 8)) 

127 return m[2] 

128 

129 

130def _unquote(value: str) -> str: 

131 """ 

132 Unquote a cookie value. 

133 

134 Vendored from http.cookies._unquote to ensure compatibility. 

135 

136 Note: The original implementation checked for None, but we've removed 

137 that check since all callers already ensure the value is not None. 

138 """ 

139 # If there aren't any doublequotes, 

140 # then there can't be any special characters. See RFC 2109. 

141 if len(value) < 2: 

142 return value 

143 if value[0] != '"' or value[-1] != '"': 

144 return value 

145 

146 # We have to assume that we must decode this string. 

147 # Down to work. 

148 

149 # Remove the "s 

150 value = value[1:-1] 

151 

152 # Check for special sequences. Examples: 

153 # \012 --> \n 

154 # \" --> " 

155 # 

156 return _unquote_sub(_unquote_replace, value) 

157 

158 

159def parse_cookie_header(header: str) -> List[Tuple[str, Morsel[str]]]: 

160 """ 

161 Parse a Cookie header according to RFC 6265 Section 5.4. 

162 

163 Cookie headers contain only name-value pairs separated by semicolons. 

164 There are no attributes in Cookie headers - even names that match 

165 attribute names (like 'path' or 'secure') should be treated as cookies. 

166 

167 This parser uses the same regex-based approach as parse_set_cookie_headers 

168 to properly handle quoted values that may contain semicolons. When the 

169 regex fails to match a malformed cookie, it falls back to simple parsing 

170 to ensure subsequent cookies are not lost 

171 https://github.com/aio-libs/aiohttp/issues/11632 

172 

173 Args: 

174 header: The Cookie header value to parse 

175 

176 Returns: 

177 List of (name, Morsel) tuples for compatibility with SimpleCookie.update() 

178 """ 

179 if not header: 

180 return [] 

181 

182 cookies: List[Tuple[str, Morsel[str]]] = [] 

183 morsel: Morsel[str] 

184 i = 0 

185 n = len(header) 

186 

187 invalid_names = [] 

188 while i < n: 

189 # Use the same pattern as parse_set_cookie_headers to find cookies 

190 match = _COOKIE_PATTERN.match(header, i) 

191 if not match: 

192 # Fallback for malformed cookies https://github.com/aio-libs/aiohttp/issues/11632 

193 # Find next semicolon to skip or attempt simple key=value parsing 

194 next_semi = header.find(";", i) 

195 eq_pos = header.find("=", i) 

196 

197 # Try to extract key=value if '=' comes before ';' 

198 if eq_pos != -1 and (next_semi == -1 or eq_pos < next_semi): 

199 end_pos = next_semi if next_semi != -1 else n 

200 key = header[i:eq_pos].strip() 

201 value = header[eq_pos + 1 : end_pos].strip() 

202 

203 # Validate the name (same as regex path) 

204 if not _COOKIE_NAME_RE.match(key): 

205 invalid_names.append(key) 

206 else: 

207 morsel = Morsel() 

208 morsel.__setstate__( # type: ignore[attr-defined] 

209 {"key": key, "value": _unquote(value), "coded_value": value} 

210 ) 

211 cookies.append((key, morsel)) 

212 

213 # Move to next cookie or end 

214 i = next_semi + 1 if next_semi != -1 else n 

215 continue 

216 

217 key = match.group("key") 

218 value = match.group("val") or "" 

219 i = match.end(0) 

220 

221 # Validate the name 

222 if not key or not _COOKIE_NAME_RE.match(key): 

223 invalid_names.append(key) 

224 continue 

225 

226 # Create new morsel 

227 morsel = Morsel() 

228 # Preserve the original value as coded_value (with quotes if present) 

229 # We use __setstate__ instead of the public set() API because it allows us to 

230 # bypass validation and set already validated state. This is more stable than 

231 # setting protected attributes directly and unlikely to change since it would 

232 # break pickling. 

233 morsel.__setstate__( # type: ignore[attr-defined] 

234 {"key": key, "value": _unquote(value), "coded_value": value} 

235 ) 

236 

237 cookies.append((key, morsel)) 

238 

239 if invalid_names: 

240 internal_logger.debug( 

241 "Cannot load cookie. Illegal cookie names: %r", invalid_names 

242 ) 

243 

244 return cookies 

245 

246 

247def parse_set_cookie_headers(headers: Sequence[str]) -> List[Tuple[str, Morsel[str]]]: 

248 """ 

249 Parse cookie headers using a vendored version of SimpleCookie parsing. 

250 

251 This implementation is based on SimpleCookie.__parse_string to ensure 

252 compatibility with how SimpleCookie parses cookies, including handling 

253 of malformed cookies with missing semicolons. 

254 

255 This function is used for both Cookie and Set-Cookie headers in order to be 

256 forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie 

257 headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the 

258 real world data makes it impossible since we need to be a bit more forgiving. 

259 

260 NOTE: This implementation differs from SimpleCookie in handling unmatched quotes. 

261 SimpleCookie will stop parsing when it encounters a cookie value with an unmatched 

262 quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped. 

263 This implementation handles unmatched quotes more gracefully to prevent cookie loss. 

264 See https://github.com/aio-libs/aiohttp/issues/7993 

265 """ 

266 parsed_cookies: List[Tuple[str, Morsel[str]]] = [] 

267 

268 for header in headers: 

269 if not header: 

270 continue 

271 

272 # Parse cookie string using SimpleCookie's algorithm 

273 i = 0 

274 n = len(header) 

275 current_morsel: Optional[Morsel[str]] = None 

276 morsel_seen = False 

277 

278 while 0 <= i < n: 

279 # Start looking for a cookie 

280 match = _COOKIE_PATTERN.match(header, i) 

281 if not match: 

282 # No more cookies 

283 break 

284 

285 key, value = match.group("key"), match.group("val") 

286 i = match.end(0) 

287 lower_key = key.lower() 

288 

289 if key[0] == "$": 

290 if not morsel_seen: 

291 # We ignore attributes which pertain to the cookie 

292 # mechanism as a whole, such as "$Version". 

293 continue 

294 # Process as attribute 

295 if current_morsel is not None: 

296 attr_lower_key = lower_key[1:] 

297 if attr_lower_key in _COOKIE_KNOWN_ATTRS: 

298 current_morsel[attr_lower_key] = value or "" 

299 elif lower_key in _COOKIE_KNOWN_ATTRS: 

300 if not morsel_seen: 

301 # Invalid cookie string - attribute before cookie 

302 break 

303 if lower_key in _COOKIE_BOOL_ATTRS: 

304 # Boolean attribute with any value should be True 

305 if current_morsel is not None and current_morsel.isReservedKey(key): 

306 current_morsel[lower_key] = True 

307 elif value is None: 

308 # Invalid cookie string - non-boolean attribute without value 

309 break 

310 elif current_morsel is not None: 

311 # Regular attribute with value 

312 current_morsel[lower_key] = _unquote(value) 

313 elif value is not None: 

314 # This is a cookie name=value pair 

315 # Validate the name 

316 if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key): 

317 internal_logger.warning( 

318 "Can not load cookies: Illegal cookie name %r", key 

319 ) 

320 current_morsel = None 

321 else: 

322 # Create new morsel 

323 current_morsel = Morsel() 

324 # Preserve the original value as coded_value (with quotes if present) 

325 # We use __setstate__ instead of the public set() API because it allows us to 

326 # bypass validation and set already validated state. This is more stable than 

327 # setting protected attributes directly and unlikely to change since it would 

328 # break pickling. 

329 current_morsel.__setstate__( # type: ignore[attr-defined] 

330 {"key": key, "value": _unquote(value), "coded_value": value} 

331 ) 

332 parsed_cookies.append((key, current_morsel)) 

333 morsel_seen = True 

334 else: 

335 # Invalid cookie string - no value for non-attribute 

336 break 

337 

338 return parsed_cookies