Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/mdurl/_encode.py: 80%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

54 statements  

1from __future__ import annotations 

2 

3from urllib.parse import quote as encode_uri_component 

4 

5TYPE_CHECKING = False 

6if TYPE_CHECKING: 

7 from collections.abc import Sequence 

8 

9ASCII_LETTERS_AND_DIGITS = ( 

10 "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" 

11) 

12HEXDIGITS = "0123456789" "abcdef" "ABCDEF" 

13 

14ENCODE_DEFAULT_CHARS = ";/?:@&=+$,-_.!~*'()#" 

15ENCODE_COMPONENT_CHARS = "-_.!~*'()" 

16 

17encode_cache: dict[str, list[str]] = {} 

18 

19 

20# Create a lookup array where anything but characters in `chars` string 

21# and alphanumeric chars is percent-encoded. 

22def get_encode_cache(exclude: str) -> Sequence[str]: 

23 if exclude in encode_cache: 

24 return encode_cache[exclude] 

25 

26 cache: list[str] = [] 

27 encode_cache[exclude] = cache 

28 

29 for i in range(128): 

30 ch = chr(i) 

31 

32 if ch in ASCII_LETTERS_AND_DIGITS: 

33 # always allow unencoded alphanumeric characters 

34 cache.append(ch) 

35 else: 

36 cache.append("%" + ("0" + hex(i)[2:].upper())[-2:]) 

37 

38 for i in range(len(exclude)): 

39 cache[ord(exclude[i])] = exclude[i] 

40 

41 return cache 

42 

43 

44# Encode unsafe characters with percent-encoding, skipping already 

45# encoded sequences. 

46# 

47# - string - string to encode 

48# - exclude - list of characters to ignore (in addition to a-zA-Z0-9) 

49# - keepEscaped - don't encode '%' in a correct escape sequence (default: true) 

50def encode( 

51 string: str, exclude: str = ENCODE_DEFAULT_CHARS, *, keep_escaped: bool = True 

52) -> str: 

53 result = "" 

54 

55 cache = get_encode_cache(exclude) 

56 

57 l = len(string) # noqa: E741 

58 i = 0 

59 while i < l: 

60 code = ord(string[i]) 

61 

62 # % 

63 if keep_escaped and code == 0x25 and i + 2 < l: 

64 if all(c in HEXDIGITS for c in string[i + 1 : i + 3]): 

65 result += string[i : i + 3] 

66 i += 2 

67 i += 1 # JS for loop statement3 

68 continue 

69 

70 if code < 128: 

71 result += cache[code] 

72 i += 1 # JS for loop statement3 

73 continue 

74 

75 if code >= 0xD800 and code <= 0xDFFF: 

76 if code >= 0xD800 and code <= 0xDBFF and i + 1 < l: 

77 next_code = ord(string[i + 1]) 

78 if next_code >= 0xDC00 and next_code <= 0xDFFF: 

79 result += encode_uri_component(string[i] + string[i + 1]) 

80 i += 1 

81 i += 1 # JS for loop statement3 

82 continue 

83 result += "%EF%BF%BD" 

84 i += 1 # JS for loop statement3 

85 continue 

86 

87 result += encode_uri_component(string[i]) 

88 i += 1 # JS for loop statement3 

89 

90 return result