Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mdurl/_encode.py: 81%
52 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:15 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:15 +0000
1from __future__ import annotations
3from collections.abc import Sequence
4from string import ascii_letters, digits, hexdigits
5from urllib.parse import quote as encode_uri_component
7ASCII_LETTERS_AND_DIGITS = ascii_letters + digits
9ENCODE_DEFAULT_CHARS = ";/?:@&=+$,-_.!~*'()#"
10ENCODE_COMPONENT_CHARS = "-_.!~*'()"
12encode_cache: dict[str, list[str]] = {}
15# Create a lookup array where anything but characters in `chars` string
16# and alphanumeric chars is percent-encoded.
17def get_encode_cache(exclude: str) -> Sequence[str]:
18 if exclude in encode_cache:
19 return encode_cache[exclude]
21 cache: list[str] = []
22 encode_cache[exclude] = cache
24 for i in range(128):
25 ch = chr(i)
27 if ch in ASCII_LETTERS_AND_DIGITS:
28 # always allow unencoded alphanumeric characters
29 cache.append(ch)
30 else:
31 cache.append("%" + ("0" + hex(i)[2:].upper())[-2:])
33 for i in range(len(exclude)):
34 cache[ord(exclude[i])] = exclude[i]
36 return cache
39# Encode unsafe characters with percent-encoding, skipping already
40# encoded sequences.
41#
42# - string - string to encode
43# - exclude - list of characters to ignore (in addition to a-zA-Z0-9)
44# - keepEscaped - don't encode '%' in a correct escape sequence (default: true)
45def encode(
46 string: str, exclude: str = ENCODE_DEFAULT_CHARS, *, keep_escaped: bool = True
47) -> str:
48 result = ""
50 cache = get_encode_cache(exclude)
52 l = len(string) # noqa: E741
53 i = 0
54 while i < l:
55 code = ord(string[i])
57 # %
58 if keep_escaped and code == 0x25 and i + 2 < l:
59 if all(c in hexdigits for c in string[i + 1 : i + 3]):
60 result += string[i : i + 3]
61 i += 2
62 i += 1 # JS for loop statement3
63 continue
65 if code < 128:
66 result += cache[code]
67 i += 1 # JS for loop statement3
68 continue
70 if code >= 0xD800 and code <= 0xDFFF:
71 if code >= 0xD800 and code <= 0xDBFF and i + 1 < l:
72 next_code = ord(string[i + 1])
73 if next_code >= 0xDC00 and next_code <= 0xDFFF:
74 result += encode_uri_component(string[i] + string[i + 1])
75 i += 1
76 i += 1 # JS for loop statement3
77 continue
78 result += "%EF%BF%BD"
79 i += 1 # JS for loop statement3
80 continue
82 result += encode_uri_component(string[i])
83 i += 1 # JS for loop statement3
85 return result