Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/markdown_it/common/normalize_url.py: 84%

32 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:07 +0000

1from __future__ import annotations 

2 

3from collections.abc import Callable 

4import re 

5from urllib.parse import quote, unquote, urlparse, urlunparse # noqa: F401 

6 

7import mdurl 

8 

9from .. import _punycode 

10 

11RECODE_HOSTNAME_FOR = ("http:", "https:", "mailto:") 

12 

13 

14def normalizeLink(url: str) -> str: 

15 """Normalize destination URLs in links 

16 

17 :: 

18 

19 [label]: destination 'title' 

20 ^^^^^^^^^^^ 

21 """ 

22 parsed = mdurl.parse(url, slashes_denote_host=True) 

23 

24 if parsed.hostname: 

25 # Encode hostnames in urls like: 

26 # `http://host/`, `https://host/`, `mailto:user@host`, `//host/` 

27 # 

28 # We don't encode unknown schemas, because it's likely that we encode 

29 # something we shouldn't (e.g. `skype:name` treated as `skype:host`) 

30 # 

31 if not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR: 

32 try: 

33 parsed = parsed._replace(hostname=_punycode.to_ascii(parsed.hostname)) 

34 except Exception: 

35 pass 

36 

37 return mdurl.encode(mdurl.format(parsed)) 

38 

39 

40def normalizeLinkText(url: str) -> str: 

41 """Normalize autolink content 

42 

43 :: 

44 

45 <destination> 

46 ~~~~~~~~~~~ 

47 """ 

48 parsed = mdurl.parse(url, slashes_denote_host=True) 

49 

50 if parsed.hostname: 

51 # Encode hostnames in urls like: 

52 # `http://host/`, `https://host/`, `mailto:user@host`, `//host/` 

53 # 

54 # We don't encode unknown schemas, because it's likely that we encode 

55 # something we shouldn't (e.g. `skype:name` treated as `skype:host`) 

56 # 

57 if not parsed.protocol or parsed.protocol in RECODE_HOSTNAME_FOR: 

58 try: 

59 parsed = parsed._replace(hostname=_punycode.to_unicode(parsed.hostname)) 

60 except Exception: 

61 pass 

62 

63 # add '%' to exclude list because of https://github.com/markdown-it/markdown-it/issues/720 

64 return mdurl.decode(mdurl.format(parsed), mdurl.DECODE_DEFAULT_CHARS + "%") 

65 

66 

67BAD_PROTO_RE = re.compile(r"^(vbscript|javascript|file|data):") 

68GOOD_DATA_RE = re.compile(r"^data:image\/(gif|png|jpeg|webp);") 

69 

70 

71def validateLink(url: str, validator: Callable | None = None) -> bool: 

72 """Validate URL link is allowed in output. 

73 

74 This validator can prohibit more than really needed to prevent XSS. 

75 It's a tradeoff to keep code simple and to be secure by default. 

76 

77 Note: url should be normalized at this point, and existing entities decoded. 

78 """ 

79 if validator is not None: 

80 return validator(url) 

81 url = url.strip().lower() 

82 return bool(GOOD_DATA_RE.search(url)) if BAD_PROTO_RE.search(url) else True