Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mdurl/_decode.py: 100%
73 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:15 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:15 +0000
1from __future__ import annotations
3from collections.abc import Sequence
4import functools
5import re
7DECODE_DEFAULT_CHARS = ";/?:@&=+$,#"
8DECODE_COMPONENT_CHARS = ""
10decode_cache: dict[str, list[str]] = {}
13def get_decode_cache(exclude: str) -> Sequence[str]:
14 if exclude in decode_cache:
15 return decode_cache[exclude]
17 cache: list[str] = []
18 decode_cache[exclude] = cache
20 for i in range(128):
21 ch = chr(i)
22 cache.append(ch)
24 for i in range(len(exclude)):
25 ch_code = ord(exclude[i])
26 cache[ch_code] = "%" + ("0" + hex(ch_code)[2:].upper())[-2:]
28 return cache
31# Decode percent-encoded string.
32#
33def decode(string: str, exclude: str = DECODE_DEFAULT_CHARS) -> str:
34 cache = get_decode_cache(exclude)
35 repl_func = functools.partial(repl_func_with_cache, cache=cache)
36 return re.sub(r"(%[a-f0-9]{2})+", repl_func, string, flags=re.IGNORECASE)
39def repl_func_with_cache(match: re.Match, cache: Sequence[str]) -> str:
40 seq = match.group()
41 result = ""
43 i = 0
44 l = len(seq) # noqa: E741
45 while i < l:
46 b1 = int(seq[i + 1 : i + 3], 16)
48 if b1 < 0x80:
49 result += cache[b1]
50 i += 3 # emulate JS for loop statement3
51 continue
53 if (b1 & 0xE0) == 0xC0 and (i + 3 < l):
54 # 110xxxxx 10xxxxxx
55 b2 = int(seq[i + 4 : i + 6], 16)
57 if (b2 & 0xC0) == 0x80:
58 all_bytes = bytes((b1, b2))
59 try:
60 result += all_bytes.decode()
61 except UnicodeDecodeError:
62 result += "\ufffd" * 2
64 i += 3
65 i += 3 # emulate JS for loop statement3
66 continue
68 if (b1 & 0xF0) == 0xE0 and (i + 6 < l):
69 # 1110xxxx 10xxxxxx 10xxxxxx
70 b2 = int(seq[i + 4 : i + 6], 16)
71 b3 = int(seq[i + 7 : i + 9], 16)
73 if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80:
74 all_bytes = bytes((b1, b2, b3))
75 try:
76 result += all_bytes.decode()
77 except UnicodeDecodeError:
78 result += "\ufffd" * 3
80 i += 6
81 i += 3 # emulate JS for loop statement3
82 continue
84 if (b1 & 0xF8) == 0xF0 and (i + 9 < l):
85 # 111110xx 10xxxxxx 10xxxxxx 10xxxxxx
86 b2 = int(seq[i + 4 : i + 6], 16)
87 b3 = int(seq[i + 7 : i + 9], 16)
88 b4 = int(seq[i + 10 : i + 12], 16)
90 if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80 and (b4 & 0xC0) == 0x80:
91 all_bytes = bytes((b1, b2, b3, b4))
92 try:
93 result += all_bytes.decode()
94 except UnicodeDecodeError:
95 result += "\ufffd" * 4
97 i += 9
98 i += 3 # emulate JS for loop statement3
99 continue
101 result += "\ufffd"
102 i += 3 # emulate JS for loop statement3
104 return result