Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mdurl/_decode.py: 97%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

76 statements  

1from __future__ import annotations 

2 

3import functools 

4import re 

5 

6TYPE_CHECKING = False 

7if TYPE_CHECKING: 

8 from collections.abc import Sequence 

9 

10 

11DECODE_DEFAULT_CHARS = ";/?:@&=+$,#" 

12DECODE_COMPONENT_CHARS = "" 

13 

14decode_cache: dict[str, list[str]] = {} 

15 

16 

17def get_decode_cache(exclude: str) -> Sequence[str]: 

18 if exclude in decode_cache: 

19 return decode_cache[exclude] 

20 

21 cache: list[str] = [] 

22 decode_cache[exclude] = cache 

23 

24 for i in range(128): 

25 ch = chr(i) 

26 cache.append(ch) 

27 

28 for i in range(len(exclude)): 

29 ch_code = ord(exclude[i]) 

30 cache[ch_code] = "%" + ("0" + hex(ch_code)[2:].upper())[-2:] 

31 

32 return cache 

33 

34 

35# Decode percent-encoded string. 

36# 

37def decode(string: str, exclude: str = DECODE_DEFAULT_CHARS) -> str: 

38 cache = get_decode_cache(exclude) 

39 repl_func = functools.partial(repl_func_with_cache, cache=cache) 

40 return re.sub(r"(%[a-f0-9]{2})+", repl_func, string, flags=re.IGNORECASE) 

41 

42 

43def repl_func_with_cache(match: re.Match, cache: Sequence[str]) -> str: 

44 seq = match.group() 

45 result = "" 

46 

47 i = 0 

48 l = len(seq) # noqa: E741 

49 while i < l: 

50 b1 = int(seq[i + 1 : i + 3], 16) 

51 

52 if b1 < 0x80: 

53 result += cache[b1] 

54 i += 3 # emulate JS for loop statement3 

55 continue 

56 

57 if (b1 & 0xE0) == 0xC0 and (i + 3 < l): 

58 # 110xxxxx 10xxxxxx 

59 b2 = int(seq[i + 4 : i + 6], 16) 

60 

61 if (b2 & 0xC0) == 0x80: 

62 all_bytes = bytes((b1, b2)) 

63 try: 

64 result += all_bytes.decode() 

65 except UnicodeDecodeError: 

66 result += "\ufffd" * 2 

67 

68 i += 3 

69 i += 3 # emulate JS for loop statement3 

70 continue 

71 

72 if (b1 & 0xF0) == 0xE0 and (i + 6 < l): 

73 # 1110xxxx 10xxxxxx 10xxxxxx 

74 b2 = int(seq[i + 4 : i + 6], 16) 

75 b3 = int(seq[i + 7 : i + 9], 16) 

76 

77 if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80: 

78 all_bytes = bytes((b1, b2, b3)) 

79 try: 

80 result += all_bytes.decode() 

81 except UnicodeDecodeError: 

82 result += "\ufffd" * 3 

83 

84 i += 6 

85 i += 3 # emulate JS for loop statement3 

86 continue 

87 

88 if (b1 & 0xF8) == 0xF0 and (i + 9 < l): 

89 # 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 

90 b2 = int(seq[i + 4 : i + 6], 16) 

91 b3 = int(seq[i + 7 : i + 9], 16) 

92 b4 = int(seq[i + 10 : i + 12], 16) 

93 

94 if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80 and (b4 & 0xC0) == 0x80: 

95 all_bytes = bytes((b1, b2, b3, b4)) 

96 try: 

97 result += all_bytes.decode() 

98 except UnicodeDecodeError: 

99 result += "\ufffd" * 4 

100 

101 i += 9 

102 i += 3 # emulate JS for loop statement3 

103 continue 

104 

105 result += "\ufffd" 

106 i += 3 # emulate JS for loop statement3 

107 

108 return result