Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/rfc3986/normalizers.py: 91%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

100 statements  

1# Copyright (c) 2014 Rackspace 

2# Licensed under the Apache License, Version 2.0 (the "License"); 

3# you may not use this file except in compliance with the License. 

4# You may obtain a copy of the License at 

5# 

6# http://www.apache.org/licenses/LICENSE-2.0 

7# 

8# Unless required by applicable law or agreed to in writing, software 

9# distributed under the License is distributed on an "AS IS" BASIS, 

10# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 

11# implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14"""Module with functions to normalize components.""" 

15import re 

16import typing as t 

17from urllib.parse import quote as urlquote 

18 

19from . import compat 

20from . import misc 

21 

22 

23def normalize_scheme(scheme: str) -> str: 

24 """Normalize the scheme component.""" 

25 return scheme.lower() 

26 

27 

28def normalize_authority( 

29 authority: t.Tuple[t.Optional[str], t.Optional[str], t.Optional[str]], 

30) -> str: 

31 """Normalize an authority tuple to a string.""" 

32 userinfo, host, port = authority 

33 result = "" 

34 if userinfo: 

35 result += normalize_percent_characters(userinfo) + "@" 

36 if host: 

37 result += normalize_host(host) 

38 if port: 

39 result += ":" + port 

40 return result 

41 

42 

43def normalize_username(username: str) -> str: 

44 """Normalize a username to make it safe to include in userinfo.""" 

45 return urlquote(username) 

46 

47 

48def normalize_password(password: str) -> str: 

49 """Normalize a password to make safe for userinfo.""" 

50 return urlquote(password) 

51 

52 

53def normalize_host(host: str) -> str: 

54 """Normalize a host string.""" 

55 if misc.IPv6_MATCHER.match(host): 

56 percent = host.find("%") 

57 if percent != -1: 

58 percent_25 = host.find("%25") 

59 

60 # Replace RFC 4007 IPv6 Zone ID delimiter '%' with '%25' 

61 # from RFC 6874. If the host is '[<IPv6 addr>%25]' then we 

62 # assume RFC 4007 and normalize to '[<IPV6 addr>%2525]' 

63 if ( 

64 percent_25 == -1 

65 or percent < percent_25 

66 or (percent == percent_25 and percent_25 == len(host) - 4) 

67 ): 

68 host = host.replace("%", "%25", 1) 

69 

70 # Don't normalize the casing of the Zone ID 

71 return host[:percent].lower() + host[percent:] 

72 

73 return host.lower() 

74 

75 

76def normalize_path(path: str) -> str: 

77 """Normalize the path string.""" 

78 if not path: 

79 return path 

80 

81 path = normalize_percent_characters(path) 

82 return remove_dot_segments(path) 

83 

84 

85@t.overload 

86def normalize_query(query: str) -> str: # noqa: D103 

87 ... 

88 

89 

90@t.overload 

91def normalize_query(query: None) -> None: # noqa: D103 

92 ... 

93 

94 

95def normalize_query(query: t.Optional[str]) -> t.Optional[str]: 

96 """Normalize the query string.""" 

97 if not query: 

98 return query 

99 return normalize_percent_characters(query) 

100 

101 

102@t.overload 

103def normalize_fragment(fragment: str) -> str: # noqa: D103 

104 ... 

105 

106 

107@t.overload 

108def normalize_fragment(fragment: None) -> None: # noqa: D103 

109 ... 

110 

111 

112def normalize_fragment(fragment: t.Optional[str]) -> t.Optional[str]: 

113 """Normalize the fragment string.""" 

114 if not fragment: 

115 return fragment 

116 return normalize_percent_characters(fragment) 

117 

118 

119PERCENT_MATCHER = re.compile("%[A-Fa-f0-9]{2}") 

120 

121 

122def normalize_percent_characters(s: str) -> str: 

123 """All percent characters should be upper-cased. 

124 

125 For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``. 

126 """ 

127 matches = set(PERCENT_MATCHER.findall(s)) 

128 for m in matches: 

129 if not m.isupper(): 

130 s = s.replace(m, m.upper()) 

131 return s 

132 

133 

134def remove_dot_segments(s: str) -> str: 

135 """Remove dot segments from the string. 

136 

137 See also Section 5.2.4 of :rfc:`3986`. 

138 """ 

139 # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code 

140 segments = s.split("/") # Turn the path into a list of segments 

141 output: list[str] = [] # Initialize the variable to use to store output 

142 

143 for segment in segments: 

144 # '.' is the current directory, so ignore it, it is superfluous 

145 if segment == ".": 

146 continue 

147 # Anything other than '..', should be appended to the output 

148 elif segment != "..": 

149 output.append(segment) 

150 # In this case segment == '..', if we can, we should pop the last 

151 # element 

152 elif output: 

153 output.pop() 

154 

155 # If the path starts with '/' and the output is empty or the first string 

156 # is non-empty 

157 if s.startswith("/") and (not output or output[0]): 

158 output.insert(0, "") 

159 

160 # If the path starts with '/.' or '/..' ensure we add one more empty 

161 # string to add a trailing '/' 

162 if s.endswith(("/.", "/..")): 

163 output.append("") 

164 

165 return "/".join(output) 

166 

167 

168@t.overload 

169def encode_component(uri_component: None, encoding: str) -> None: # noqa: D103 

170 ... 

171 

172 

173@t.overload 

174def encode_component(uri_component: str, encoding: str) -> str: # noqa: D103 

175 ... 

176 

177 

178def encode_component( 

179 uri_component: t.Optional[str], 

180 encoding: str, 

181) -> t.Optional[str]: 

182 """Encode the specific component in the provided encoding.""" 

183 if uri_component is None: 

184 return uri_component 

185 

186 # Try to see if the component we're encoding is already percent-encoded 

187 # so we can skip all '%' characters but still encode all others. 

188 percent_encodings = len( 

189 PERCENT_MATCHER.findall(compat.to_str(uri_component, encoding)) 

190 ) 

191 

192 uri_bytes = compat.to_bytes(uri_component, encoding) 

193 is_percent_encoded = percent_encodings == uri_bytes.count(b"%") 

194 

195 encoded_uri = bytearray() 

196 

197 for i in range(0, len(uri_bytes)): 

198 # Will return a single character bytestring on both Python 2 & 3 

199 byte = uri_bytes[i : i + 1] 

200 byte_ord = ord(byte) 

201 if (is_percent_encoded and byte == b"%") or ( 

202 byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED 

203 ): 

204 encoded_uri.extend(byte) 

205 continue 

206 encoded_uri.extend(f"%{byte_ord:02x}".encode().upper()) 

207 

208 return encoded_uri.decode(encoding)