Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/django/utils/http.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

194 statements  

1import base64 

2import re 

3import unicodedata 

4from binascii import Error as BinasciiError 

5from datetime import datetime, timezone 

6from email.utils import formatdate 

7from urllib.parse import quote, unquote 

8from urllib.parse import urlencode as original_urlencode 

9from urllib.parse import urlsplit 

10 

11from django.utils.datastructures import MultiValueDict 

12from django.utils.regex_helper import _lazy_re_compile 

13 

14# Based on RFC 9110 Appendix A. 

15ETAG_MATCH = _lazy_re_compile( 

16 r""" 

17 \A( # start of string and capture group 

18 (?:W/)? # optional weak indicator 

19 " # opening quote 

20 [^"]* # any sequence of non-quote characters 

21 " # end quote 

22 )\Z # end of string and capture group 

23""", 

24 re.X, 

25) 

26 

27MONTHS = "jan feb mar apr may jun jul aug sep oct nov dec".split() 

28__D = r"(?P<day>[0-9]{2})" 

29__D2 = r"(?P<day>[ 0-9][0-9])" 

30__M = r"(?P<mon>\w{3})" 

31__Y = r"(?P<year>[0-9]{4})" 

32__Y2 = r"(?P<year>[0-9]{2})" 

33__T = r"(?P<hour>[0-9]{2}):(?P<min>[0-9]{2}):(?P<sec>[0-9]{2})" 

34RFC1123_DATE = _lazy_re_compile(r"^\w{3}, %s %s %s %s GMT$" % (__D, __M, __Y, __T)) 

35RFC850_DATE = _lazy_re_compile(r"^\w{6,9}, %s-%s-%s %s GMT$" % (__D, __M, __Y2, __T)) 

36ASCTIME_DATE = _lazy_re_compile(r"^\w{3} %s %s %s %s$" % (__M, __D2, __T, __Y)) 

37 

38RFC3986_GENDELIMS = ":/?#[]@" 

39RFC3986_SUBDELIMS = "!$&'()*+,;=" 

40 

41 

42def urlencode(query, doseq=False): 

43 """ 

44 A version of Python's urllib.parse.urlencode() function that can operate on 

45 MultiValueDict and non-string values. 

46 """ 

47 if isinstance(query, MultiValueDict): 

48 query = query.lists() 

49 elif hasattr(query, "items"): 

50 query = query.items() 

51 query_params = [] 

52 for key, value in query: 

53 if value is None: 

54 raise TypeError( 

55 "Cannot encode None for key '%s' in a query string. Did you " 

56 "mean to pass an empty string or omit the value?" % key 

57 ) 

58 elif not doseq or isinstance(value, (str, bytes)): 

59 query_val = value 

60 else: 

61 try: 

62 itr = iter(value) 

63 except TypeError: 

64 query_val = value 

65 else: 

66 # Consume generators and iterators, when doseq=True, to 

67 # work around https://bugs.python.org/issue31706. 

68 query_val = [] 

69 for item in itr: 

70 if item is None: 

71 raise TypeError( 

72 "Cannot encode None for key '%s' in a query " 

73 "string. Did you mean to pass an empty string or " 

74 "omit the value?" % key 

75 ) 

76 elif not isinstance(item, bytes): 

77 item = str(item) 

78 query_val.append(item) 

79 query_params.append((key, query_val)) 

80 return original_urlencode(query_params, doseq) 

81 

82 

83def http_date(epoch_seconds=None): 

84 """ 

85 Format the time to match the RFC 5322 date format as specified by RFC 9110 

86 Section 5.6.7. 

87 

88 `epoch_seconds` is a floating point number expressed in seconds since the 

89 epoch, in UTC - such as that outputted by time.time(). If set to None, it 

90 defaults to the current time. 

91 

92 Output a string in the format 'Wdy, DD Mon YYYY HH:MM:SS GMT'. 

93 """ 

94 return formatdate(epoch_seconds, usegmt=True) 

95 

96 

97def parse_http_date(date): 

98 """ 

99 Parse a date format as specified by HTTP RFC 9110 Section 5.6.7. 

100 

101 The three formats allowed by the RFC are accepted, even if only the first 

102 one is still in widespread use. 

103 

104 Return an integer expressed in seconds since the epoch, in UTC. 

105 """ 

106 # email.utils.parsedate() does the job for RFC 1123 dates; unfortunately 

107 # RFC 9110 makes it mandatory to support RFC 850 dates too. So we roll 

108 # our own RFC-compliant parsing. 

109 for regex in RFC1123_DATE, RFC850_DATE, ASCTIME_DATE: 

110 m = regex.match(date) 

111 if m is not None: 

112 break 

113 else: 

114 raise ValueError("%r is not in a valid HTTP date format" % date) 

115 try: 

116 year = int(m["year"]) 

117 if year < 100: 

118 current_year = datetime.now(tz=timezone.utc).year 

119 current_century = current_year - (current_year % 100) 

120 if year - (current_year % 100) > 50: 

121 # year that appears to be more than 50 years in the future are 

122 # interpreted as representing the past. 

123 year += current_century - 100 

124 else: 

125 year += current_century 

126 month = MONTHS.index(m["mon"].lower()) + 1 

127 day = int(m["day"]) 

128 hour = int(m["hour"]) 

129 min = int(m["min"]) 

130 sec = int(m["sec"]) 

131 result = datetime(year, month, day, hour, min, sec, tzinfo=timezone.utc) 

132 return int(result.timestamp()) 

133 except Exception as exc: 

134 raise ValueError("%r is not a valid date" % date) from exc 

135 

136 

137def parse_http_date_safe(date): 

138 """ 

139 Same as parse_http_date, but return None if the input is invalid. 

140 """ 

141 try: 

142 return parse_http_date(date) 

143 except Exception: 

144 pass 

145 

146 

147# Base 36 functions: useful for generating compact URLs 

148 

149 

150def base36_to_int(s): 

151 """ 

152 Convert a base 36 string to an int. Raise ValueError if the input won't fit 

153 into an int. 

154 """ 

155 # To prevent overconsumption of server resources, reject any 

156 # base36 string that is longer than 13 base36 digits (13 digits 

157 # is sufficient to base36-encode any 64-bit integer) 

158 if len(s) > 13: 

159 raise ValueError("Base36 input too large") 

160 return int(s, 36) 

161 

162 

163def int_to_base36(i): 

164 """Convert an integer to a base36 string.""" 

165 char_set = "0123456789abcdefghijklmnopqrstuvwxyz" 

166 if i < 0: 

167 raise ValueError("Negative base36 conversion input.") 

168 if i < 36: 

169 return char_set[i] 

170 b36 = "" 

171 while i != 0: 

172 i, n = divmod(i, 36) 

173 b36 = char_set[n] + b36 

174 return b36 

175 

176 

177def urlsafe_base64_encode(s): 

178 """ 

179 Encode a bytestring to a base64 string for use in URLs. Strip any trailing 

180 equal signs. 

181 """ 

182 return base64.urlsafe_b64encode(s).rstrip(b"\n=").decode("ascii") 

183 

184 

185def urlsafe_base64_decode(s): 

186 """ 

187 Decode a base64 encoded string. Add back any trailing equal signs that 

188 might have been stripped. 

189 """ 

190 s = s.encode() 

191 try: 

192 return base64.urlsafe_b64decode(s.ljust(len(s) + len(s) % 4, b"=")) 

193 except (LookupError, BinasciiError) as e: 

194 raise ValueError(e) 

195 

196 

197def parse_etags(etag_str): 

198 """ 

199 Parse a string of ETags given in an If-None-Match or If-Match header as 

200 defined by RFC 9110. Return a list of quoted ETags, or ['*'] if all ETags 

201 should be matched. 

202 """ 

203 if etag_str.strip() == "*": 

204 return ["*"] 

205 else: 

206 # Parse each ETag individually, and return any that are valid. 

207 etag_matches = (ETAG_MATCH.match(etag.strip()) for etag in etag_str.split(",")) 

208 return [match[1] for match in etag_matches if match] 

209 

210 

211def quote_etag(etag_str): 

212 """ 

213 If the provided string is already a quoted ETag, return it. Otherwise, wrap 

214 the string in quotes, making it a strong ETag. 

215 """ 

216 if ETAG_MATCH.match(etag_str): 

217 return etag_str 

218 else: 

219 return '"%s"' % etag_str 

220 

221 

222def is_same_domain(host, pattern): 

223 """ 

224 Return ``True`` if the host is either an exact match or a match 

225 to the wildcard pattern. 

226 

227 Any pattern beginning with a period matches a domain and all of its 

228 subdomains. (e.g. ``.example.com`` matches ``example.com`` and 

229 ``foo.example.com``). Anything else is an exact string match. 

230 """ 

231 if not pattern: 

232 return False 

233 

234 pattern = pattern.lower() 

235 return ( 

236 pattern[0] == "." 

237 and (host.endswith(pattern) or host == pattern[1:]) 

238 or pattern == host 

239 ) 

240 

241 

242def url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False): 

243 """ 

244 Return ``True`` if the url uses an allowed host and a safe scheme. 

245 

246 Always return ``False`` on an empty url. 

247 

248 If ``require_https`` is ``True``, only 'https' will be considered a valid 

249 scheme, as opposed to 'http' and 'https' with the default, ``False``. 

250 

251 Note: "True" doesn't entail that a URL is "safe". It may still be e.g. 

252 quoted incorrectly. Ensure to also use django.utils.encoding.iri_to_uri() 

253 on the path component of untrusted URLs. 

254 """ 

255 if url is not None: 

256 url = url.strip() 

257 if not url: 

258 return False 

259 if allowed_hosts is None: 

260 allowed_hosts = set() 

261 elif isinstance(allowed_hosts, str): 

262 allowed_hosts = {allowed_hosts} 

263 # Chrome treats \ completely as / in paths but it could be part of some 

264 # basic auth credentials so we need to check both URLs. 

265 return _url_has_allowed_host_and_scheme( 

266 url, allowed_hosts, require_https=require_https 

267 ) and _url_has_allowed_host_and_scheme( 

268 url.replace("\\", "/"), allowed_hosts, require_https=require_https 

269 ) 

270 

271 

272def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False): 

273 # Chrome considers any URL with more than two slashes to be absolute, but 

274 # urlsplit is not so flexible. Treat any url with three slashes as unsafe. 

275 if url.startswith("///"): 

276 return False 

277 try: 

278 url_info = urlsplit(url) 

279 except ValueError: # e.g. invalid IPv6 addresses 

280 return False 

281 # Forbid URLs like http:///example.com - with a scheme, but without a hostname. 

282 # In that URL, example.com is not the hostname but, a path component. However, 

283 # Chrome will still consider example.com to be the hostname, so we must not 

284 # allow this syntax. 

285 if not url_info.netloc and url_info.scheme: 

286 return False 

287 # Forbid URLs that start with control characters. Some browsers (like 

288 # Chrome) ignore quite a few control characters at the start of a 

289 # URL and might consider the URL as scheme relative. 

290 if unicodedata.category(url[0])[0] == "C": 

291 return False 

292 scheme = url_info.scheme 

293 # Consider URLs without a scheme (e.g. //example.com/p) to be http. 

294 if not url_info.scheme and url_info.netloc: 

295 scheme = "http" 

296 valid_schemes = ["https"] if require_https else ["http", "https"] 

297 return (not url_info.netloc or url_info.netloc in allowed_hosts) and ( 

298 not scheme or scheme in valid_schemes 

299 ) 

300 

301 

302def escape_leading_slashes(url): 

303 """ 

304 If redirecting to an absolute path (two leading slashes), a slash must be 

305 escaped to prevent browsers from handling the path as schemaless and 

306 redirecting to another host. 

307 """ 

308 if url.startswith("//"): 

309 url = "/%2F{}".format(url.removeprefix("//")) 

310 return url 

311 

312 

313def _parseparam(s): 

314 while s[:1] == ";": 

315 s = s[1:] 

316 end = s.find(";") 

317 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: 

318 end = s.find(";", end + 1) 

319 if end < 0: 

320 end = len(s) 

321 f = s[:end] 

322 yield f.strip() 

323 s = s[end:] 

324 

325 

326def parse_header_parameters(line): 

327 """ 

328 Parse a Content-type like header. 

329 Return the main content-type and a dictionary of options. 

330 """ 

331 parts = _parseparam(";" + line) 

332 key = parts.__next__().lower() 

333 pdict = {} 

334 for p in parts: 

335 i = p.find("=") 

336 if i >= 0: 

337 has_encoding = False 

338 name = p[:i].strip().lower() 

339 if name.endswith("*"): 

340 # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext") 

341 # https://tools.ietf.org/html/rfc2231#section-4 

342 name = name[:-1] 

343 if p.count("'") == 2: 

344 has_encoding = True 

345 value = p[i + 1 :].strip() 

346 if len(value) >= 2 and value[0] == value[-1] == '"': 

347 value = value[1:-1] 

348 value = value.replace("\\\\", "\\").replace('\\"', '"') 

349 if has_encoding: 

350 encoding, lang, value = value.split("'") 

351 value = unquote(value, encoding=encoding) 

352 pdict[name] = value 

353 return key, pdict 

354 

355 

356def content_disposition_header(as_attachment, filename): 

357 """ 

358 Construct a Content-Disposition HTTP header value from the given filename 

359 as specified by RFC 6266. 

360 """ 

361 if filename: 

362 disposition = "attachment" if as_attachment else "inline" 

363 try: 

364 filename.encode("ascii") 

365 is_ascii = True 

366 except UnicodeEncodeError: 

367 is_ascii = False 

368 # Quoted strings can contain horizontal tabs, space characters, and 

369 # characters from 0x21 to 0x7e, except 0x22 (`"`) and 0x5C (`\`) which 

370 # can still be expressed but must be escaped with their own `\`. 

371 # https://datatracker.ietf.org/doc/html/rfc9110#name-quoted-strings 

372 quotable_characters = r"^[\t \x21-\x7e]*$" 

373 if is_ascii and re.match(quotable_characters, filename): 

374 file_expr = 'filename="{}"'.format( 

375 filename.replace("\\", "\\\\").replace('"', r"\"") 

376 ) 

377 else: 

378 file_expr = "filename*=utf-8''{}".format(quote(filename)) 

379 return f"{disposition}; {file_expr}" 

380 elif as_attachment: 

381 return "attachment" 

382 else: 

383 return None