Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/django/utils/html.py: 31%

194 statements  

« prev     ^ index     » next       coverage.py v7.0.5, created at 2023-01-17 06:13 +0000

1"""HTML utilities suitable for global use.""" 

2 

3import html 

4import json 

5import re 

6from html.parser import HTMLParser 

7from urllib.parse import parse_qsl, quote, unquote, urlencode, urlsplit, urlunsplit 

8 

9from django.utils.encoding import punycode 

10from django.utils.functional import Promise, keep_lazy, keep_lazy_text 

11from django.utils.http import RFC3986_GENDELIMS, RFC3986_SUBDELIMS 

12from django.utils.regex_helper import _lazy_re_compile 

13from django.utils.safestring import SafeData, SafeString, mark_safe 

14from django.utils.text import normalize_newlines 

15 

16 

17@keep_lazy(SafeString) 

18def escape(text): 

19 """ 

20 Return the given text with ampersands, quotes and angle brackets encoded 

21 for use in HTML. 

22 

23 Always escape input, even if it's already escaped and marked as such. 

24 This may result in double-escaping. If this is a concern, use 

25 conditional_escape() instead. 

26 """ 

27 return SafeString(html.escape(str(text))) 

28 

29 

30_js_escapes = { 

31 ord("\\"): "\\u005C", 

32 ord("'"): "\\u0027", 

33 ord('"'): "\\u0022", 

34 ord(">"): "\\u003E", 

35 ord("<"): "\\u003C", 

36 ord("&"): "\\u0026", 

37 ord("="): "\\u003D", 

38 ord("-"): "\\u002D", 

39 ord(";"): "\\u003B", 

40 ord("`"): "\\u0060", 

41 ord("\u2028"): "\\u2028", 

42 ord("\u2029"): "\\u2029", 

43} 

44 

45# Escape every ASCII character with a value less than 32. 

46_js_escapes.update((ord("%c" % z), "\\u%04X" % z) for z in range(32)) 

47 

48 

49@keep_lazy(SafeString) 

50def escapejs(value): 

51 """Hex encode characters for use in JavaScript strings.""" 

52 return mark_safe(str(value).translate(_js_escapes)) 

53 

54 

55_json_script_escapes = { 

56 ord(">"): "\\u003E", 

57 ord("<"): "\\u003C", 

58 ord("&"): "\\u0026", 

59} 

60 

61 

62def json_script(value, element_id=None, encoder=None): 

63 """ 

64 Escape all the HTML/XML special characters with their unicode escapes, so 

65 value is safe to be output anywhere except for inside a tag attribute. Wrap 

66 the escaped JSON in a script tag. 

67 """ 

68 from django.core.serializers.json import DjangoJSONEncoder 

69 

70 json_str = json.dumps(value, cls=encoder or DjangoJSONEncoder).translate( 

71 _json_script_escapes 

72 ) 

73 if element_id: 

74 template = '<script id="{}" type="application/json">{}</script>' 

75 args = (element_id, mark_safe(json_str)) 

76 else: 

77 template = '<script type="application/json">{}</script>' 

78 args = (mark_safe(json_str),) 

79 return format_html(template, *args) 

80 

81 

82def conditional_escape(text): 

83 """ 

84 Similar to escape(), except that it doesn't operate on pre-escaped strings. 

85 

86 This function relies on the __html__ convention used both by Django's 

87 SafeData class and by third-party libraries like markupsafe. 

88 """ 

89 if isinstance(text, Promise): 

90 text = str(text) 

91 if hasattr(text, "__html__"): 

92 return text.__html__() 

93 else: 

94 return escape(text) 

95 

96 

97def format_html(format_string, *args, **kwargs): 

98 """ 

99 Similar to str.format, but pass all arguments through conditional_escape(), 

100 and call mark_safe() on the result. This function should be used instead 

101 of str.format or % interpolation to build up small HTML fragments. 

102 """ 

103 args_safe = map(conditional_escape, args) 

104 kwargs_safe = {k: conditional_escape(v) for (k, v) in kwargs.items()} 

105 return mark_safe(format_string.format(*args_safe, **kwargs_safe)) 

106 

107 

108def format_html_join(sep, format_string, args_generator): 

109 """ 

110 A wrapper of format_html, for the common case of a group of arguments that 

111 need to be formatted using the same format string, and then joined using 

112 'sep'. 'sep' is also passed through conditional_escape. 

113 

114 'args_generator' should be an iterator that returns the sequence of 'args' 

115 that will be passed to format_html. 

116 

117 Example: 

118 

119 format_html_join('\n', "<li>{} {}</li>", ((u.first_name, u.last_name) 

120 for u in users)) 

121 """ 

122 return mark_safe( 

123 conditional_escape(sep).join( 

124 format_html(format_string, *args) for args in args_generator 

125 ) 

126 ) 

127 

128 

129@keep_lazy_text 

130def linebreaks(value, autoescape=False): 

131 """Convert newlines into <p> and <br>s.""" 

132 value = normalize_newlines(value) 

133 paras = re.split("\n{2,}", str(value)) 

134 if autoescape: 

135 paras = ["<p>%s</p>" % escape(p).replace("\n", "<br>") for p in paras] 

136 else: 

137 paras = ["<p>%s</p>" % p.replace("\n", "<br>") for p in paras] 

138 return "\n\n".join(paras) 

139 

140 

141class MLStripper(HTMLParser): 

142 def __init__(self): 

143 super().__init__(convert_charrefs=False) 

144 self.reset() 

145 self.fed = [] 

146 

147 def handle_data(self, d): 

148 self.fed.append(d) 

149 

150 def handle_entityref(self, name): 

151 self.fed.append("&%s;" % name) 

152 

153 def handle_charref(self, name): 

154 self.fed.append("&#%s;" % name) 

155 

156 def get_data(self): 

157 return "".join(self.fed) 

158 

159 

160def _strip_once(value): 

161 """ 

162 Internal tag stripping utility used by strip_tags. 

163 """ 

164 s = MLStripper() 

165 s.feed(value) 

166 s.close() 

167 return s.get_data() 

168 

169 

170@keep_lazy_text 

171def strip_tags(value): 

172 """Return the given HTML with all tags stripped.""" 

173 # Note: in typical case this loop executes _strip_once once. Loop condition 

174 # is redundant, but helps to reduce number of executions of _strip_once. 

175 value = str(value) 

176 while "<" in value and ">" in value: 

177 new_value = _strip_once(value) 

178 if value.count("<") == new_value.count("<"): 

179 # _strip_once wasn't able to detect more tags. 

180 break 

181 value = new_value 

182 return value 

183 

184 

185@keep_lazy_text 

186def strip_spaces_between_tags(value): 

187 """Return the given HTML with spaces between tags removed.""" 

188 return re.sub(r">\s+<", "><", str(value)) 

189 

190 

191def smart_urlquote(url): 

192 """Quote a URL if it isn't already quoted.""" 

193 

194 def unquote_quote(segment): 

195 segment = unquote(segment) 

196 # Tilde is part of RFC 3986 Section 2.3 Unreserved Characters, 

197 # see also https://bugs.python.org/issue16285 

198 return quote(segment, safe=RFC3986_SUBDELIMS + RFC3986_GENDELIMS + "~") 

199 

200 # Handle IDN before quoting. 

201 try: 

202 scheme, netloc, path, query, fragment = urlsplit(url) 

203 except ValueError: 

204 # invalid IPv6 URL (normally square brackets in hostname part). 

205 return unquote_quote(url) 

206 

207 try: 

208 netloc = punycode(netloc) # IDN -> ACE 

209 except UnicodeError: # invalid domain part 

210 return unquote_quote(url) 

211 

212 if query: 

213 # Separately unquoting key/value, so as to not mix querystring separators 

214 # included in query values. See #22267. 

215 query_parts = [ 

216 (unquote(q[0]), unquote(q[1])) 

217 for q in parse_qsl(query, keep_blank_values=True) 

218 ] 

219 # urlencode will take care of quoting 

220 query = urlencode(query_parts) 

221 

222 path = unquote_quote(path) 

223 fragment = unquote_quote(fragment) 

224 

225 return urlunsplit((scheme, netloc, path, query, fragment)) 

226 

227 

228class Urlizer: 

229 """ 

230 Convert any URLs in text into clickable links. 

231 

232 Work on http://, https://, www. links, and also on links ending in one of 

233 the original seven gTLDs (.com, .edu, .gov, .int, .mil, .net, and .org). 

234 Links can have trailing punctuation (periods, commas, close-parens) and 

235 leading punctuation (opening parens) and it'll still do the right thing. 

236 """ 

237 

238 trailing_punctuation_chars = ".,:;!" 

239 wrapping_punctuation = [("(", ")"), ("[", "]")] 

240 

241 simple_url_re = _lazy_re_compile(r"^https?://\[?\w", re.IGNORECASE) 

242 simple_url_2_re = _lazy_re_compile( 

243 r"^www\.|^(?!http)\w[^@]+\.(com|edu|gov|int|mil|net|org)($|/.*)$", re.IGNORECASE 

244 ) 

245 word_split_re = _lazy_re_compile(r"""([\s<>"']+)""") 

246 

247 mailto_template = "mailto:{local}@{domain}" 

248 url_template = '<a href="{href}"{attrs}>{url}</a>' 

249 

250 def __call__(self, text, trim_url_limit=None, nofollow=False, autoescape=False): 

251 """ 

252 If trim_url_limit is not None, truncate the URLs in the link text 

253 longer than this limit to trim_url_limit - 1 characters and append an 

254 ellipsis. 

255 

256 If nofollow is True, give the links a rel="nofollow" attribute. 

257 

258 If autoescape is True, autoescape the link text and URLs. 

259 """ 

260 safe_input = isinstance(text, SafeData) 

261 

262 words = self.word_split_re.split(str(text)) 

263 return "".join( 

264 [ 

265 self.handle_word( 

266 word, 

267 safe_input=safe_input, 

268 trim_url_limit=trim_url_limit, 

269 nofollow=nofollow, 

270 autoescape=autoescape, 

271 ) 

272 for word in words 

273 ] 

274 ) 

275 

276 def handle_word( 

277 self, 

278 word, 

279 *, 

280 safe_input, 

281 trim_url_limit=None, 

282 nofollow=False, 

283 autoescape=False, 

284 ): 

285 if "." in word or "@" in word or ":" in word: 

286 # lead: Punctuation trimmed from the beginning of the word. 

287 # middle: State of the word. 

288 # trail: Punctuation trimmed from the end of the word. 

289 lead, middle, trail = self.trim_punctuation(word) 

290 # Make URL we want to point to. 

291 url = None 

292 nofollow_attr = ' rel="nofollow"' if nofollow else "" 

293 if self.simple_url_re.match(middle): 

294 url = smart_urlquote(html.unescape(middle)) 

295 elif self.simple_url_2_re.match(middle): 

296 url = smart_urlquote("http://%s" % html.unescape(middle)) 

297 elif ":" not in middle and self.is_email_simple(middle): 

298 local, domain = middle.rsplit("@", 1) 

299 try: 

300 domain = punycode(domain) 

301 except UnicodeError: 

302 return word 

303 url = self.mailto_template.format(local=local, domain=domain) 

304 nofollow_attr = "" 

305 # Make link. 

306 if url: 

307 trimmed = self.trim_url(middle, limit=trim_url_limit) 

308 if autoescape and not safe_input: 

309 lead, trail = escape(lead), escape(trail) 

310 trimmed = escape(trimmed) 

311 middle = self.url_template.format( 

312 href=escape(url), 

313 attrs=nofollow_attr, 

314 url=trimmed, 

315 ) 

316 return mark_safe(f"{lead}{middle}{trail}") 

317 else: 

318 if safe_input: 

319 return mark_safe(word) 

320 elif autoescape: 

321 return escape(word) 

322 elif safe_input: 

323 return mark_safe(word) 

324 elif autoescape: 

325 return escape(word) 

326 return word 

327 

328 def trim_url(self, x, *, limit): 

329 if limit is None or len(x) <= limit: 

330 return x 

331 return "%s…" % x[: max(0, limit - 1)] 

332 

333 def trim_punctuation(self, word): 

334 """ 

335 Trim trailing and wrapping punctuation from `word`. Return the items of 

336 the new state. 

337 """ 

338 lead, middle, trail = "", word, "" 

339 # Continue trimming until middle remains unchanged. 

340 trimmed_something = True 

341 while trimmed_something: 

342 trimmed_something = False 

343 # Trim wrapping punctuation. 

344 for opening, closing in self.wrapping_punctuation: 

345 if middle.startswith(opening): 

346 middle = middle[len(opening) :] 

347 lead += opening 

348 trimmed_something = True 

349 # Keep parentheses at the end only if they're balanced. 

350 if ( 

351 middle.endswith(closing) 

352 and middle.count(closing) == middle.count(opening) + 1 

353 ): 

354 middle = middle[: -len(closing)] 

355 trail = closing + trail 

356 trimmed_something = True 

357 # Trim trailing punctuation (after trimming wrapping punctuation, 

358 # as encoded entities contain ';'). Unescape entities to avoid 

359 # breaking them by removing ';'. 

360 middle_unescaped = html.unescape(middle) 

361 stripped = middle_unescaped.rstrip(self.trailing_punctuation_chars) 

362 if middle_unescaped != stripped: 

363 punctuation_count = len(middle_unescaped) - len(stripped) 

364 trail = middle[-punctuation_count:] + trail 

365 middle = middle[:-punctuation_count] 

366 trimmed_something = True 

367 return lead, middle, trail 

368 

369 @staticmethod 

370 def is_email_simple(value): 

371 """Return True if value looks like an email address.""" 

372 # An @ must be in the middle of the value. 

373 if "@" not in value or value.startswith("@") or value.endswith("@"): 

374 return False 

375 try: 

376 p1, p2 = value.split("@") 

377 except ValueError: 

378 # value contains more than one @. 

379 return False 

380 # Dot must be in p2 (e.g. example.com) 

381 if "." not in p2 or p2.startswith("."): 

382 return False 

383 return True 

384 

385 

386urlizer = Urlizer() 

387 

388 

389@keep_lazy_text 

390def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): 

391 return urlizer( 

392 text, trim_url_limit=trim_url_limit, nofollow=nofollow, autoescape=autoescape 

393 ) 

394 

395 

396def avoid_wrapping(value): 

397 """ 

398 Avoid text wrapping in the middle of a phrase by adding non-breaking 

399 spaces where there previously were normal spaces. 

400 """ 

401 return value.replace(" ", "\xa0") 

402 

403 

404def html_safe(klass): 

405 """ 

406 A decorator that defines the __html__ method. This helps non-Django 

407 templates to detect classes whose __str__ methods return SafeString. 

408 """ 

409 if "__html__" in klass.__dict__: 

410 raise ValueError( 

411 "can't apply @html_safe to %s because it defines " 

412 "__html__()." % klass.__name__ 

413 ) 

414 if "__str__" not in klass.__dict__: 

415 raise ValueError( 

416 "can't apply @html_safe to %s because it doesn't " 

417 "define __str__()." % klass.__name__ 

418 ) 

419 klass_str = klass.__str__ 

420 klass.__str__ = lambda self: mark_safe(klass_str(self)) 

421 klass.__html__ = lambda self: str(self) 

422 return klass