Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/django/utils/http.py: 27%

226 statements  

« prev     ^ index     » next       coverage.py v7.0.5, created at 2023-01-17 06:13 +0000

1import base64 

2import datetime 

3import re 

4import unicodedata 

5from binascii import Error as BinasciiError 

6from email.utils import formatdate 

7from urllib.parse import ( 

8 ParseResult, 

9 SplitResult, 

10 _coerce_args, 

11 _splitnetloc, 

12 _splitparams, 

13 quote, 

14 scheme_chars, 

15 unquote, 

16) 

17from urllib.parse import urlencode as original_urlencode 

18from urllib.parse import uses_params 

19 

20from django.utils.datastructures import MultiValueDict 

21from django.utils.regex_helper import _lazy_re_compile 

22 

23# Based on RFC 9110 Appendix A. 

24ETAG_MATCH = _lazy_re_compile( 

25 r""" 

26 \A( # start of string and capture group 

27 (?:W/)? # optional weak indicator 

28 " # opening quote 

29 [^"]* # any sequence of non-quote characters 

30 " # end quote 

31 )\Z # end of string and capture group 

32""", 

33 re.X, 

34) 

35 

36MONTHS = "jan feb mar apr may jun jul aug sep oct nov dec".split() 

37__D = r"(?P<day>[0-9]{2})" 

38__D2 = r"(?P<day>[ 0-9][0-9])" 

39__M = r"(?P<mon>\w{3})" 

40__Y = r"(?P<year>[0-9]{4})" 

41__Y2 = r"(?P<year>[0-9]{2})" 

42__T = r"(?P<hour>[0-9]{2}):(?P<min>[0-9]{2}):(?P<sec>[0-9]{2})" 

43RFC1123_DATE = _lazy_re_compile(r"^\w{3}, %s %s %s %s GMT$" % (__D, __M, __Y, __T)) 

44RFC850_DATE = _lazy_re_compile(r"^\w{6,9}, %s-%s-%s %s GMT$" % (__D, __M, __Y2, __T)) 

45ASCTIME_DATE = _lazy_re_compile(r"^\w{3} %s %s %s %s$" % (__M, __D2, __T, __Y)) 

46 

47RFC3986_GENDELIMS = ":/?#[]@" 

48RFC3986_SUBDELIMS = "!$&'()*+,;=" 

49 

50# TODO: Remove when dropping support for PY38. 

51# Unsafe bytes to be removed per WHATWG spec. 

52_UNSAFE_URL_BYTES_TO_REMOVE = ["\t", "\r", "\n"] 

53 

54 

55def urlencode(query, doseq=False): 

56 """ 

57 A version of Python's urllib.parse.urlencode() function that can operate on 

58 MultiValueDict and non-string values. 

59 """ 

60 if isinstance(query, MultiValueDict): 

61 query = query.lists() 

62 elif hasattr(query, "items"): 

63 query = query.items() 

64 query_params = [] 

65 for key, value in query: 

66 if value is None: 

67 raise TypeError( 

68 "Cannot encode None for key '%s' in a query string. Did you " 

69 "mean to pass an empty string or omit the value?" % key 

70 ) 

71 elif not doseq or isinstance(value, (str, bytes)): 

72 query_val = value 

73 else: 

74 try: 

75 itr = iter(value) 

76 except TypeError: 

77 query_val = value 

78 else: 

79 # Consume generators and iterators, when doseq=True, to 

80 # work around https://bugs.python.org/issue31706. 

81 query_val = [] 

82 for item in itr: 

83 if item is None: 

84 raise TypeError( 

85 "Cannot encode None for key '%s' in a query " 

86 "string. Did you mean to pass an empty string or " 

87 "omit the value?" % key 

88 ) 

89 elif not isinstance(item, bytes): 

90 item = str(item) 

91 query_val.append(item) 

92 query_params.append((key, query_val)) 

93 return original_urlencode(query_params, doseq) 

94 

95 

96def http_date(epoch_seconds=None): 

97 """ 

98 Format the time to match the RFC 5322 date format as specified by RFC 9110 

99 Section 5.6.7. 

100 

101 `epoch_seconds` is a floating point number expressed in seconds since the 

102 epoch, in UTC - such as that outputted by time.time(). If set to None, it 

103 defaults to the current time. 

104 

105 Output a string in the format 'Wdy, DD Mon YYYY HH:MM:SS GMT'. 

106 """ 

107 return formatdate(epoch_seconds, usegmt=True) 

108 

109 

110def parse_http_date(date): 

111 """ 

112 Parse a date format as specified by HTTP RFC 9110 Section 5.6.7. 

113 

114 The three formats allowed by the RFC are accepted, even if only the first 

115 one is still in widespread use. 

116 

117 Return an integer expressed in seconds since the epoch, in UTC. 

118 """ 

119 # email.utils.parsedate() does the job for RFC 1123 dates; unfortunately 

120 # RFC 9110 makes it mandatory to support RFC 850 dates too. So we roll 

121 # our own RFC-compliant parsing. 

122 for regex in RFC1123_DATE, RFC850_DATE, ASCTIME_DATE: 

123 m = regex.match(date) 

124 if m is not None: 

125 break 

126 else: 

127 raise ValueError("%r is not in a valid HTTP date format" % date) 

128 try: 

129 tz = datetime.timezone.utc 

130 year = int(m["year"]) 

131 if year < 100: 

132 current_year = datetime.datetime.now(tz=tz).year 

133 current_century = current_year - (current_year % 100) 

134 if year - (current_year % 100) > 50: 

135 # year that appears to be more than 50 years in the future are 

136 # interpreted as representing the past. 

137 year += current_century - 100 

138 else: 

139 year += current_century 

140 month = MONTHS.index(m["mon"].lower()) + 1 

141 day = int(m["day"]) 

142 hour = int(m["hour"]) 

143 min = int(m["min"]) 

144 sec = int(m["sec"]) 

145 result = datetime.datetime(year, month, day, hour, min, sec, tzinfo=tz) 

146 return int(result.timestamp()) 

147 except Exception as exc: 

148 raise ValueError("%r is not a valid date" % date) from exc 

149 

150 

151def parse_http_date_safe(date): 

152 """ 

153 Same as parse_http_date, but return None if the input is invalid. 

154 """ 

155 try: 

156 return parse_http_date(date) 

157 except Exception: 

158 pass 

159 

160 

161# Base 36 functions: useful for generating compact URLs 

162 

163 

164def base36_to_int(s): 

165 """ 

166 Convert a base 36 string to an int. Raise ValueError if the input won't fit 

167 into an int. 

168 """ 

169 # To prevent overconsumption of server resources, reject any 

170 # base36 string that is longer than 13 base36 digits (13 digits 

171 # is sufficient to base36-encode any 64-bit integer) 

172 if len(s) > 13: 

173 raise ValueError("Base36 input too large") 

174 return int(s, 36) 

175 

176 

177def int_to_base36(i): 

178 """Convert an integer to a base36 string.""" 

179 char_set = "0123456789abcdefghijklmnopqrstuvwxyz" 

180 if i < 0: 

181 raise ValueError("Negative base36 conversion input.") 

182 if i < 36: 

183 return char_set[i] 

184 b36 = "" 

185 while i != 0: 

186 i, n = divmod(i, 36) 

187 b36 = char_set[n] + b36 

188 return b36 

189 

190 

191def urlsafe_base64_encode(s): 

192 """ 

193 Encode a bytestring to a base64 string for use in URLs. Strip any trailing 

194 equal signs. 

195 """ 

196 return base64.urlsafe_b64encode(s).rstrip(b"\n=").decode("ascii") 

197 

198 

199def urlsafe_base64_decode(s): 

200 """ 

201 Decode a base64 encoded string. Add back any trailing equal signs that 

202 might have been stripped. 

203 """ 

204 s = s.encode() 

205 try: 

206 return base64.urlsafe_b64decode(s.ljust(len(s) + len(s) % 4, b"=")) 

207 except (LookupError, BinasciiError) as e: 

208 raise ValueError(e) 

209 

210 

211def parse_etags(etag_str): 

212 """ 

213 Parse a string of ETags given in an If-None-Match or If-Match header as 

214 defined by RFC 9110. Return a list of quoted ETags, or ['*'] if all ETags 

215 should be matched. 

216 """ 

217 if etag_str.strip() == "*": 

218 return ["*"] 

219 else: 

220 # Parse each ETag individually, and return any that are valid. 

221 etag_matches = (ETAG_MATCH.match(etag.strip()) for etag in etag_str.split(",")) 

222 return [match[1] for match in etag_matches if match] 

223 

224 

225def quote_etag(etag_str): 

226 """ 

227 If the provided string is already a quoted ETag, return it. Otherwise, wrap 

228 the string in quotes, making it a strong ETag. 

229 """ 

230 if ETAG_MATCH.match(etag_str): 

231 return etag_str 

232 else: 

233 return '"%s"' % etag_str 

234 

235 

236def is_same_domain(host, pattern): 

237 """ 

238 Return ``True`` if the host is either an exact match or a match 

239 to the wildcard pattern. 

240 

241 Any pattern beginning with a period matches a domain and all of its 

242 subdomains. (e.g. ``.example.com`` matches ``example.com`` and 

243 ``foo.example.com``). Anything else is an exact string match. 

244 """ 

245 if not pattern: 

246 return False 

247 

248 pattern = pattern.lower() 

249 return ( 

250 pattern[0] == "." 

251 and (host.endswith(pattern) or host == pattern[1:]) 

252 or pattern == host 

253 ) 

254 

255 

256def url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False): 

257 """ 

258 Return ``True`` if the url uses an allowed host and a safe scheme. 

259 

260 Always return ``False`` on an empty url. 

261 

262 If ``require_https`` is ``True``, only 'https' will be considered a valid 

263 scheme, as opposed to 'http' and 'https' with the default, ``False``. 

264 

265 Note: "True" doesn't entail that a URL is "safe". It may still be e.g. 

266 quoted incorrectly. Ensure to also use django.utils.encoding.iri_to_uri() 

267 on the path component of untrusted URLs. 

268 """ 

269 if url is not None: 

270 url = url.strip() 

271 if not url: 

272 return False 

273 if allowed_hosts is None: 

274 allowed_hosts = set() 

275 elif isinstance(allowed_hosts, str): 

276 allowed_hosts = {allowed_hosts} 

277 # Chrome treats \ completely as / in paths but it could be part of some 

278 # basic auth credentials so we need to check both URLs. 

279 return _url_has_allowed_host_and_scheme( 

280 url, allowed_hosts, require_https=require_https 

281 ) and _url_has_allowed_host_and_scheme( 

282 url.replace("\\", "/"), allowed_hosts, require_https=require_https 

283 ) 

284 

285 

286# TODO: Remove when dropping support for PY38. 

287# Copied from urllib.parse.urlparse() but uses fixed urlsplit() function. 

288def _urlparse(url, scheme="", allow_fragments=True): 

289 """Parse a URL into 6 components: 

290 <scheme>://<netloc>/<path>;<params>?<query>#<fragment> 

291 Return a 6-tuple: (scheme, netloc, path, params, query, fragment). 

292 Note that we don't break the components up in smaller bits 

293 (e.g. netloc is a single string) and we don't expand % escapes.""" 

294 url, scheme, _coerce_result = _coerce_args(url, scheme) 

295 splitresult = _urlsplit(url, scheme, allow_fragments) 

296 scheme, netloc, url, query, fragment = splitresult 

297 if scheme in uses_params and ";" in url: 

298 url, params = _splitparams(url) 

299 else: 

300 params = "" 

301 result = ParseResult(scheme, netloc, url, params, query, fragment) 

302 return _coerce_result(result) 

303 

304 

305# TODO: Remove when dropping support for PY38. 

306def _remove_unsafe_bytes_from_url(url): 

307 for b in _UNSAFE_URL_BYTES_TO_REMOVE: 

308 url = url.replace(b, "") 

309 return url 

310 

311 

312# TODO: Remove when dropping support for PY38. 

313# Backport of urllib.parse.urlsplit() from Python 3.9. 

314def _urlsplit(url, scheme="", allow_fragments=True): 

315 """Parse a URL into 5 components: 

316 <scheme>://<netloc>/<path>?<query>#<fragment> 

317 Return a 5-tuple: (scheme, netloc, path, query, fragment). 

318 Note that we don't break the components up in smaller bits 

319 (e.g. netloc is a single string) and we don't expand % escapes.""" 

320 url, scheme, _coerce_result = _coerce_args(url, scheme) 

321 url = _remove_unsafe_bytes_from_url(url) 

322 scheme = _remove_unsafe_bytes_from_url(scheme) 

323 

324 netloc = query = fragment = "" 

325 i = url.find(":") 

326 if i > 0: 

327 for c in url[:i]: 

328 if c not in scheme_chars: 

329 break 

330 else: 

331 scheme, url = url[:i].lower(), url[i + 1 :] 

332 

333 if url[:2] == "//": 

334 netloc, url = _splitnetloc(url, 2) 

335 if ("[" in netloc and "]" not in netloc) or ( 

336 "]" in netloc and "[" not in netloc 

337 ): 

338 raise ValueError("Invalid IPv6 URL") 

339 if allow_fragments and "#" in url: 

340 url, fragment = url.split("#", 1) 

341 if "?" in url: 

342 url, query = url.split("?", 1) 

343 v = SplitResult(scheme, netloc, url, query, fragment) 

344 return _coerce_result(v) 

345 

346 

347def _url_has_allowed_host_and_scheme(url, allowed_hosts, require_https=False): 

348 # Chrome considers any URL with more than two slashes to be absolute, but 

349 # urlparse is not so flexible. Treat any url with three slashes as unsafe. 

350 if url.startswith("///"): 

351 return False 

352 try: 

353 url_info = _urlparse(url) 

354 except ValueError: # e.g. invalid IPv6 addresses 

355 return False 

356 # Forbid URLs like http:///example.com - with a scheme, but without a hostname. 

357 # In that URL, example.com is not the hostname but, a path component. However, 

358 # Chrome will still consider example.com to be the hostname, so we must not 

359 # allow this syntax. 

360 if not url_info.netloc and url_info.scheme: 

361 return False 

362 # Forbid URLs that start with control characters. Some browsers (like 

363 # Chrome) ignore quite a few control characters at the start of a 

364 # URL and might consider the URL as scheme relative. 

365 if unicodedata.category(url[0])[0] == "C": 

366 return False 

367 scheme = url_info.scheme 

368 # Consider URLs without a scheme (e.g. //example.com/p) to be http. 

369 if not url_info.scheme and url_info.netloc: 

370 scheme = "http" 

371 valid_schemes = ["https"] if require_https else ["http", "https"] 

372 return (not url_info.netloc or url_info.netloc in allowed_hosts) and ( 

373 not scheme or scheme in valid_schemes 

374 ) 

375 

376 

377def escape_leading_slashes(url): 

378 """ 

379 If redirecting to an absolute path (two leading slashes), a slash must be 

380 escaped to prevent browsers from handling the path as schemaless and 

381 redirecting to another host. 

382 """ 

383 if url.startswith("//"): 

384 url = "/%2F{}".format(url[2:]) 

385 return url 

386 

387 

388def _parseparam(s): 

389 while s[:1] == ";": 

390 s = s[1:] 

391 end = s.find(";") 

392 while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: 

393 end = s.find(";", end + 1) 

394 if end < 0: 

395 end = len(s) 

396 f = s[:end] 

397 yield f.strip() 

398 s = s[end:] 

399 

400 

401def parse_header_parameters(line): 

402 """ 

403 Parse a Content-type like header. 

404 Return the main content-type and a dictionary of options. 

405 """ 

406 parts = _parseparam(";" + line) 

407 key = parts.__next__().lower() 

408 pdict = {} 

409 for p in parts: 

410 i = p.find("=") 

411 if i >= 0: 

412 has_encoding = False 

413 name = p[:i].strip().lower() 

414 if name.endswith("*"): 

415 # Lang/encoding embedded in the value (like "filename*=UTF-8''file.ext") 

416 # https://tools.ietf.org/html/rfc2231#section-4 

417 name = name[:-1] 

418 if p.count("'") == 2: 

419 has_encoding = True 

420 value = p[i + 1 :].strip() 

421 if len(value) >= 2 and value[0] == value[-1] == '"': 

422 value = value[1:-1] 

423 value = value.replace("\\\\", "\\").replace('\\"', '"') 

424 if has_encoding: 

425 encoding, lang, value = value.split("'") 

426 value = unquote(value, encoding=encoding) 

427 pdict[name] = value 

428 return key, pdict 

429 

430 

431def content_disposition_header(as_attachment, filename): 

432 """ 

433 Construct a Content-Disposition HTTP header value from the given filename 

434 as specified by RFC 6266. 

435 """ 

436 if filename: 

437 disposition = "attachment" if as_attachment else "inline" 

438 try: 

439 filename.encode("ascii") 

440 file_expr = 'filename="{}"'.format( 

441 filename.replace("\\", "\\\\").replace('"', r"\"") 

442 ) 

443 except UnicodeEncodeError: 

444 file_expr = "filename*=utf-8''{}".format(quote(filename)) 

445 return f"{disposition}; {file_expr}" 

446 elif as_attachment: 

447 return "attachment" 

448 else: 

449 return None