Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/markdown_it/common/normalize_url.py: 96%

27 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:15 +0000

1from __future__ import annotations 

2 

3from collections.abc import Callable 

4from contextlib import suppress 

5import re 

6from urllib.parse import quote, unquote, urlparse, urlunparse # noqa: F401 

7 

8import mdurl 

9 

10from .. import _punycode 

11 

12RECODE_HOSTNAME_FOR = ("http:", "https:", "mailto:") 

13 

14 

15def normalizeLink(url: str) -> str: 

16 """Normalize destination URLs in links 

17 

18 :: 

19 

20 [label]: destination 'title' 

21 ^^^^^^^^^^^ 

22 """ 

23 parsed = mdurl.parse(url, slashes_denote_host=True) 

24 

25 # Encode hostnames in urls like: 

26 # `http://host/`, `https://host/`, `mailto:user@host`, `//host/` 

27 # 

28 # We don't encode unknown schemas, because it's likely that we encode 

29 # something we shouldn't (e.g. `skype:name` treated as `skype:host`) 

30 # 

31 if parsed.hostname and ( 

32 not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR 

33 ): 

34 with suppress(Exception): 

35 parsed = parsed._replace(hostname=_punycode.to_ascii(parsed.hostname)) 

36 

37 return mdurl.encode(mdurl.format(parsed)) 

38 

39 

40def normalizeLinkText(url: str) -> str: 

41 """Normalize autolink content 

42 

43 :: 

44 

45 <destination> 

46 ~~~~~~~~~~~ 

47 """ 

48 parsed = mdurl.parse(url, slashes_denote_host=True) 

49 

50 # Encode hostnames in urls like: 

51 # `http://host/`, `https://host/`, `mailto:user@host`, `//host/` 

52 # 

53 # We don't encode unknown schemas, because it's likely that we encode 

54 # something we shouldn't (e.g. `skype:name` treated as `skype:host`) 

55 # 

56 if parsed.hostname and ( 

57 not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR 

58 ): 

59 with suppress(Exception): 

60 parsed = parsed._replace(hostname=_punycode.to_unicode(parsed.hostname)) 

61 

62 # add '%' to exclude list because of https://github.com/markdown-it/markdown-it/issues/720 

63 return mdurl.decode(mdurl.format(parsed), mdurl.DECODE_DEFAULT_CHARS + "%") 

64 

65 

66BAD_PROTO_RE = re.compile(r"^(vbscript|javascript|file|data):") 

67GOOD_DATA_RE = re.compile(r"^data:image\/(gif|png|jpeg|webp);") 

68 

69 

70def validateLink(url: str, validator: Callable[[str], bool] | None = None) -> bool: 

71 """Validate URL link is allowed in output. 

72 

73 This validator can prohibit more than really needed to prevent XSS. 

74 It's a tradeoff to keep code simple and to be secure by default. 

75 

76 Note: url should be normalized at this point, and existing entities decoded. 

77 """ 

78 if validator is not None: 

79 return validator(url) 

80 url = url.strip().lower() 

81 return bool(GOOD_DATA_RE.search(url)) if BAD_PROTO_RE.search(url) else True