Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mdurl/_decode.py: 100%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

73 statements  

1from __future__ import annotations 

2 

3from collections.abc import Sequence 

4import functools 

5import re 

6 

7DECODE_DEFAULT_CHARS = ";/?:@&=+$,#" 

8DECODE_COMPONENT_CHARS = "" 

9 

10decode_cache: dict[str, list[str]] = {} 

11 

12 

13def get_decode_cache(exclude: str) -> Sequence[str]: 

14 if exclude in decode_cache: 

15 return decode_cache[exclude] 

16 

17 cache: list[str] = [] 

18 decode_cache[exclude] = cache 

19 

20 for i in range(128): 

21 ch = chr(i) 

22 cache.append(ch) 

23 

24 for i in range(len(exclude)): 

25 ch_code = ord(exclude[i]) 

26 cache[ch_code] = "%" + ("0" + hex(ch_code)[2:].upper())[-2:] 

27 

28 return cache 

29 

30 

31# Decode percent-encoded string. 

32# 

33def decode(string: str, exclude: str = DECODE_DEFAULT_CHARS) -> str: 

34 cache = get_decode_cache(exclude) 

35 repl_func = functools.partial(repl_func_with_cache, cache=cache) 

36 return re.sub(r"(%[a-f0-9]{2})+", repl_func, string, flags=re.IGNORECASE) 

37 

38 

39def repl_func_with_cache(match: re.Match, cache: Sequence[str]) -> str: 

40 seq = match.group() 

41 result = "" 

42 

43 i = 0 

44 l = len(seq) # noqa: E741 

45 while i < l: 

46 b1 = int(seq[i + 1 : i + 3], 16) 

47 

48 if b1 < 0x80: 

49 result += cache[b1] 

50 i += 3 # emulate JS for loop statement3 

51 continue 

52 

53 if (b1 & 0xE0) == 0xC0 and (i + 3 < l): 

54 # 110xxxxx 10xxxxxx 

55 b2 = int(seq[i + 4 : i + 6], 16) 

56 

57 if (b2 & 0xC0) == 0x80: 

58 all_bytes = bytes((b1, b2)) 

59 try: 

60 result += all_bytes.decode() 

61 except UnicodeDecodeError: 

62 result += "\ufffd" * 2 

63 

64 i += 3 

65 i += 3 # emulate JS for loop statement3 

66 continue 

67 

68 if (b1 & 0xF0) == 0xE0 and (i + 6 < l): 

69 # 1110xxxx 10xxxxxx 10xxxxxx 

70 b2 = int(seq[i + 4 : i + 6], 16) 

71 b3 = int(seq[i + 7 : i + 9], 16) 

72 

73 if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80: 

74 all_bytes = bytes((b1, b2, b3)) 

75 try: 

76 result += all_bytes.decode() 

77 except UnicodeDecodeError: 

78 result += "\ufffd" * 3 

79 

80 i += 6 

81 i += 3 # emulate JS for loop statement3 

82 continue 

83 

84 if (b1 & 0xF8) == 0xF0 and (i + 9 < l): 

85 # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 

86 b2 = int(seq[i + 4 : i + 6], 16) 

87 b3 = int(seq[i + 7 : i + 9], 16) 

88 b4 = int(seq[i + 10 : i + 12], 16) 

89 

90 if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80 and (b4 & 0xC0) == 0x80: 

91 all_bytes = bytes((b1, b2, b3, b4)) 

92 try: 

93 result += all_bytes.decode() 

94 except UnicodeDecodeError: 

95 result += "\ufffd" * 4 

96 

97 i += 9 

98 i += 3 # emulate JS for loop statement3 

99 continue 

100 

101 result += "\ufffd" 

102 i += 3 # emulate JS for loop statement3 

103 

104 return result