Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mdurl/_decode.py: 97%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import functools
4import re
6TYPE_CHECKING = False
7if TYPE_CHECKING:
8 from collections.abc import Sequence
11DECODE_DEFAULT_CHARS = ";/?:@&=+$,#"
12DECODE_COMPONENT_CHARS = ""
14decode_cache: dict[str, list[str]] = {}
17def get_decode_cache(exclude: str) -> Sequence[str]:
18 if exclude in decode_cache:
19 return decode_cache[exclude]
21 cache: list[str] = []
22 decode_cache[exclude] = cache
24 for i in range(128):
25 ch = chr(i)
26 cache.append(ch)
28 for i in range(len(exclude)):
29 ch_code = ord(exclude[i])
30 cache[ch_code] = "%" + ("0" + hex(ch_code)[2:].upper())[-2:]
32 return cache
35# Decode percent-encoded string.
36#
37def decode(string: str, exclude: str = DECODE_DEFAULT_CHARS) -> str:
38 cache = get_decode_cache(exclude)
39 repl_func = functools.partial(repl_func_with_cache, cache=cache)
40 return re.sub(r"(%[a-f0-9]{2})+", repl_func, string, flags=re.IGNORECASE)
43def repl_func_with_cache(match: re.Match, cache: Sequence[str]) -> str:
44 seq = match.group()
45 result = ""
47 i = 0
48 l = len(seq) # noqa: E741
49 while i < l:
50 b1 = int(seq[i + 1 : i + 3], 16)
52 if b1 < 0x80:
53 result += cache[b1]
54 i += 3 # emulate JS for loop statement3
55 continue
57 if (b1 & 0xE0) == 0xC0 and (i + 3 < l):
58 # 110xxxxx 10xxxxxx
59 b2 = int(seq[i + 4 : i + 6], 16)
61 if (b2 & 0xC0) == 0x80:
62 all_bytes = bytes((b1, b2))
63 try:
64 result += all_bytes.decode()
65 except UnicodeDecodeError:
66 result += "\ufffd" * 2
68 i += 3
69 i += 3 # emulate JS for loop statement3
70 continue
72 if (b1 & 0xF0) == 0xE0 and (i + 6 < l):
73 # 1110xxxx 10xxxxxx 10xxxxxx
74 b2 = int(seq[i + 4 : i + 6], 16)
75 b3 = int(seq[i + 7 : i + 9], 16)
77 if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80:
78 all_bytes = bytes((b1, b2, b3))
79 try:
80 result += all_bytes.decode()
81 except UnicodeDecodeError:
82 result += "\ufffd" * 3
84 i += 6
85 i += 3 # emulate JS for loop statement3
86 continue
88 if (b1 & 0xF8) == 0xF0 and (i + 9 < l):
89 # 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
90 b2 = int(seq[i + 4 : i + 6], 16)
91 b3 = int(seq[i + 7 : i + 9], 16)
92 b4 = int(seq[i + 10 : i + 12], 16)
94 if (b2 & 0xC0) == 0x80 and (b3 & 0xC0) == 0x80 and (b4 & 0xC0) == 0x80:
95 all_bytes = bytes((b1, b2, b3, b4))
96 try:
97 result += all_bytes.decode()
98 except UnicodeDecodeError:
99 result += "\ufffd" * 4
101 i += 9
102 i += 3 # emulate JS for loop statement3
103 continue
105 result += "\ufffd"
106 i += 3 # emulate JS for loop statement3
108 return result