Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/rfc3986/normalizers.py: 91%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

100 statements  

1# Copyright (c) 2014 Rackspace 

2# Licensed under the Apache License, Version 2.0 (the "License"); 

3# you may not use this file except in compliance with the License. 

4# You may obtain a copy of the License at 

5# 

6# http://www.apache.org/licenses/LICENSE-2.0 

7# 

8# Unless required by applicable law or agreed to in writing, software 

9# distributed under the License is distributed on an "AS IS" BASIS, 

10# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 

11# implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14"""Module with functions to normalize components.""" 

15 

16import re 

17import typing as t 

18from urllib.parse import quote as urlquote 

19 

20from . import compat 

21from . import misc 

22 

23 

24def normalize_scheme(scheme: str) -> str: 

25 """Normalize the scheme component.""" 

26 return scheme.lower() 

27 

28 

29def normalize_authority( 

30 authority: t.Tuple[t.Optional[str], t.Optional[str], t.Optional[str]], 

31) -> str: 

32 """Normalize an authority tuple to a string.""" 

33 userinfo, host, port = authority 

34 result = "" 

35 if userinfo: 

36 result += normalize_percent_characters(userinfo) + "@" 

37 if host: 

38 result += normalize_host(host) 

39 if port: 

40 result += ":" + port 

41 return result 

42 

43 

44def normalize_username(username: str) -> str: 

45 """Normalize a username to make it safe to include in userinfo.""" 

46 return urlquote(username) 

47 

48 

49def normalize_password(password: str) -> str: 

50 """Normalize a password to make safe for userinfo.""" 

51 return urlquote(password) 

52 

53 

54def normalize_host(host: str) -> str: 

55 """Normalize a host string.""" 

56 if misc.IPv6_MATCHER.match(host): 

57 percent = host.find("%") 

58 if percent != -1: 

59 percent_25 = host.find("%25") 

60 

61 # Replace RFC 4007 IPv6 Zone ID delimiter '%' with '%25' 

62 # from RFC 6874. If the host is '[<IPv6 addr>%25]' then we 

63 # assume RFC 4007 and normalize to '[<IPV6 addr>%2525]' 

64 if ( 

65 percent_25 == -1 

66 or percent < percent_25 

67 or (percent == percent_25 and percent_25 == len(host) - 4) 

68 ): 

69 host = host.replace("%", "%25", 1) 

70 

71 # Don't normalize the casing of the Zone ID 

72 return host[:percent].lower() + host[percent:] 

73 

74 return host.lower() 

75 

76 

77def normalize_path(path: str) -> str: 

78 """Normalize the path string.""" 

79 if not path: 

80 return path 

81 

82 path = normalize_percent_characters(path) 

83 return remove_dot_segments(path) 

84 

85 

86@t.overload 

87def normalize_query(query: str) -> str: # noqa: D103 

88 ... 

89 

90 

91@t.overload 

92def normalize_query(query: None) -> None: # noqa: D103 

93 ... 

94 

95 

96def normalize_query(query: t.Optional[str]) -> t.Optional[str]: 

97 """Normalize the query string.""" 

98 if not query: 

99 return query 

100 return normalize_percent_characters(query) 

101 

102 

103@t.overload 

104def normalize_fragment(fragment: str) -> str: # noqa: D103 

105 ... 

106 

107 

108@t.overload 

109def normalize_fragment(fragment: None) -> None: # noqa: D103 

110 ... 

111 

112 

113def normalize_fragment(fragment: t.Optional[str]) -> t.Optional[str]: 

114 """Normalize the fragment string.""" 

115 if not fragment: 

116 return fragment 

117 return normalize_percent_characters(fragment) 

118 

119 

120PERCENT_MATCHER = re.compile("%[A-Fa-f0-9]{2}") 

121 

122 

123def normalize_percent_characters(s: str) -> str: 

124 """All percent characters should be upper-cased. 

125 

126 For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``. 

127 """ 

128 matches = set(PERCENT_MATCHER.findall(s)) 

129 for m in matches: 

130 if not m.isupper(): 

131 s = s.replace(m, m.upper()) 

132 return s 

133 

134 

135def remove_dot_segments(s: str) -> str: 

136 """Remove dot segments from the string. 

137 

138 See also Section 5.2.4 of :rfc:`3986`. 

139 """ 

140 # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code 

141 segments = s.split("/") # Turn the path into a list of segments 

142 output: list[str] = [] # Initialize the variable to use to store output 

143 

144 for segment in segments: 

145 # '.' is the current directory, so ignore it, it is superfluous 

146 if segment == ".": 

147 continue 

148 # Anything other than '..', should be appended to the output 

149 elif segment != "..": 

150 output.append(segment) 

151 # In this case segment == '..', if we can, we should pop the last 

152 # element 

153 elif output: 

154 output.pop() 

155 

156 # If the path starts with '/' and the output is empty or the first string 

157 # is non-empty 

158 if s.startswith("/") and (not output or output[0]): 

159 output.insert(0, "") 

160 

161 # If the path starts with '/.' or '/..' ensure we add one more empty 

162 # string to add a trailing '/' 

163 if s.endswith(("/.", "/..")): 

164 output.append("") 

165 

166 return "/".join(output) 

167 

168 

169@t.overload 

170def encode_component(uri_component: None, encoding: str) -> None: # noqa: D103 

171 ... 

172 

173 

174@t.overload 

175def encode_component(uri_component: str, encoding: str) -> str: # noqa: D103 

176 ... 

177 

178 

179def encode_component( 

180 uri_component: t.Optional[str], 

181 encoding: str, 

182) -> t.Optional[str]: 

183 """Encode the specific component in the provided encoding.""" 

184 if uri_component is None: 

185 return uri_component 

186 

187 # Try to see if the component we're encoding is already percent-encoded 

188 # so we can skip all '%' characters but still encode all others. 

189 percent_encodings = len( 

190 PERCENT_MATCHER.findall(compat.to_str(uri_component, encoding)) 

191 ) 

192 

193 uri_bytes = compat.to_bytes(uri_component, encoding) 

194 is_percent_encoded = percent_encodings == uri_bytes.count(b"%") 

195 

196 encoded_uri = bytearray() 

197 

198 for i in range(0, len(uri_bytes)): 

199 # Will return a single character bytestring on both Python 2 & 3 

200 byte = uri_bytes[i : i + 1] 

201 byte_ord = ord(byte) 

202 if (is_percent_encoded and byte == b"%") or ( 

203 byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED 

204 ): 

205 encoded_uri.extend(byte) 

206 continue 

207 encoded_uri.extend(f"%{byte_ord:02x}".encode().upper()) 

208 

209 return encoded_uri.decode(encoding)