Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/mdurl/_encode.py: 81%

52 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:15 +0000

1from __future__ import annotations 

2 

3from collections.abc import Sequence 

4from string import ascii_letters, digits, hexdigits 

5from urllib.parse import quote as encode_uri_component 

6 

7ASCII_LETTERS_AND_DIGITS = ascii_letters + digits 

8 

9ENCODE_DEFAULT_CHARS = ";/?:@&=+$,-_.!~*'()#" 

10ENCODE_COMPONENT_CHARS = "-_.!~*'()" 

11 

12encode_cache: dict[str, list[str]] = {} 

13 

14 

15# Create a lookup array where anything but characters in `chars` string 

16# and alphanumeric chars is percent-encoded. 

17def get_encode_cache(exclude: str) -> Sequence[str]: 

18 if exclude in encode_cache: 

19 return encode_cache[exclude] 

20 

21 cache: list[str] = [] 

22 encode_cache[exclude] = cache 

23 

24 for i in range(128): 

25 ch = chr(i) 

26 

27 if ch in ASCII_LETTERS_AND_DIGITS: 

28 # always allow unencoded alphanumeric characters 

29 cache.append(ch) 

30 else: 

31 cache.append("%" + ("0" + hex(i)[2:].upper())[-2:]) 

32 

33 for i in range(len(exclude)): 

34 cache[ord(exclude[i])] = exclude[i] 

35 

36 return cache 

37 

38 

39# Encode unsafe characters with percent-encoding, skipping already 

40# encoded sequences. 

41# 

42# - string - string to encode 

43# - exclude - list of characters to ignore (in addition to a-zA-Z0-9) 

44# - keepEscaped - don't encode '%' in a correct escape sequence (default: true) 

45def encode( 

46 string: str, exclude: str = ENCODE_DEFAULT_CHARS, *, keep_escaped: bool = True 

47) -> str: 

48 result = "" 

49 

50 cache = get_encode_cache(exclude) 

51 

52 l = len(string) # noqa: E741 

53 i = 0 

54 while i < l: 

55 code = ord(string[i]) 

56 

57 # % 

58 if keep_escaped and code == 0x25 and i + 2 < l: 

59 if all(c in hexdigits for c in string[i + 1 : i + 3]): 

60 result += string[i : i + 3] 

61 i += 2 

62 i += 1 # JS for loop statement3 

63 continue 

64 

65 if code < 128: 

66 result += cache[code] 

67 i += 1 # JS for loop statement3 

68 continue 

69 

70 if code >= 0xD800 and code <= 0xDFFF: 

71 if code >= 0xD800 and code <= 0xDBFF and i + 1 < l: 

72 next_code = ord(string[i + 1]) 

73 if next_code >= 0xDC00 and next_code <= 0xDFFF: 

74 result += encode_uri_component(string[i] + string[i + 1]) 

75 i += 1 

76 i += 1 # JS for loop statement3 

77 continue 

78 result += "%EF%BF%BD" 

79 i += 1 # JS for loop statement3 

80 continue 

81 

82 result += encode_uri_component(string[i]) 

83 i += 1 # JS for loop statement3 

84 

85 return result