Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/rfc3986/normalizers.py: 91%

2# Licensed under the Apache License, Version 2.0 (the "License");

3# you may not use this file except in compliance with the License.

4# You may obtain a copy of the License at

6# http://www.apache.org/licenses/LICENSE-2.0

8# Unless required by applicable law or agreed to in writing, software

9# distributed under the License is distributed on an "AS IS" BASIS,

10# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or

11# implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14"""Module with functions to normalize components."""

15import re

16import typing as t

17from urllib.parse import quote as urlquote

19from . import compat

20from . import misc

23def normalize_scheme(scheme: str) -> str:

24 """Normalize the scheme component."""

25 return scheme.lower()

28def normalize_authority(

29 authority: t.Tuple[t.Optional[str], t.Optional[str], t.Optional[str]],

30) -> str:

31 """Normalize an authority tuple to a string."""

32 userinfo, host, port = authority

33 result = ""

34 if userinfo:

35 result += normalize_percent_characters(userinfo) + "@"

36 if host:

37 result += normalize_host(host)

38 if port:

39 result += ":" + port

40 return result

43def normalize_username(username: str) -> str:

44 """Normalize a username to make it safe to include in userinfo."""

45 return urlquote(username)

48def normalize_password(password: str) -> str:

49 """Normalize a password to make safe for userinfo."""

50 return urlquote(password)

53def normalize_host(host: str) -> str:

54 """Normalize a host string."""

55 if misc.IPv6_MATCHER.match(host):

56 percent = host.find("%")

57 if percent != -1:

58 percent_25 = host.find("%25")

60 # Replace RFC 4007 IPv6 Zone ID delimiter '%' with '%25'

61 # from RFC 6874. If the host is '[<IPv6 addr>%25]' then we

62 # assume RFC 4007 and normalize to '[<IPV6 addr>%2525]'

63 if (

64 percent_25 == -1

65 or percent < percent_25

66 or (percent == percent_25 and percent_25 == len(host) - 4)

67 ):

68 host = host.replace("%", "%25", 1)

70 # Don't normalize the casing of the Zone ID

71 return host[:percent].lower() + host[percent:]

73 return host.lower()

76def normalize_path(path: str) -> str:

77 """Normalize the path string."""

78 if not path:

79 return path

81 path = normalize_percent_characters(path)

82 return remove_dot_segments(path)

85@t.overload

86def normalize_query(query: str) -> str: # noqa: D103

87 ...

90@t.overload

91def normalize_query(query: None) -> None: # noqa: D103

92 ...

95def normalize_query(query: t.Optional[str]) -> t.Optional[str]:

96 """Normalize the query string."""

97 if not query:

98 return query

99 return normalize_percent_characters(query)

100

101

102@t.overload

103def normalize_fragment(fragment: str) -> str: # noqa: D103

104 ...

105

106

107@t.overload

108def normalize_fragment(fragment: None) -> None: # noqa: D103

109 ...

110

111

112def normalize_fragment(fragment: t.Optional[str]) -> t.Optional[str]:

113 """Normalize the fragment string."""

114 if not fragment:

115 return fragment

116 return normalize_percent_characters(fragment)

117

118

119PERCENT_MATCHER = re.compile("%[A-Fa-f0-9]{2}")

120

121

122def normalize_percent_characters(s: str) -> str:

123 """All percent characters should be upper-cased.

124

125 For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``.

126 """

127 matches = set(PERCENT_MATCHER.findall(s))

128 for m in matches:

129 if not m.isupper():

130 s = s.replace(m, m.upper())

131 return s

132

133

134def remove_dot_segments(s: str) -> str:

135 """Remove dot segments from the string.

136

137 See also Section 5.2.4 of :rfc:`3986`.

138 """

139 # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code

140 segments = s.split("/") # Turn the path into a list of segments

141 output: list[str] = [] # Initialize the variable to use to store output

142

143 for segment in segments:

144 # '.' is the current directory, so ignore it, it is superfluous

145 if segment == ".":

146 continue

147 # Anything other than '..', should be appended to the output

148 elif segment != "..":

149 output.append(segment)

150 # In this case segment == '..', if we can, we should pop the last

151 # element

152 elif output:

153 output.pop()

154

155 # If the path starts with '/' and the output is empty or the first string

156 # is non-empty

157 if s.startswith("/") and (not output or output[0]):

158 output.insert(0, "")

159

160 # If the path starts with '/.' or '/..' ensure we add one more empty

161 # string to add a trailing '/'

162 if s.endswith(("/.", "/..")):

163 output.append("")

164

165 return "/".join(output)

166

167

168@t.overload

169def encode_component(uri_component: None, encoding: str) -> None: # noqa: D103

170 ...

171

172

173@t.overload

174def encode_component(uri_component: str, encoding: str) -> str: # noqa: D103

175 ...

176

177

178def encode_component(

179 uri_component: t.Optional[str],

180 encoding: str,

181) -> t.Optional[str]:

182 """Encode the specific component in the provided encoding."""

183 if uri_component is None:

184 return uri_component

185

186 # Try to see if the component we're encoding is already percent-encoded

187 # so we can skip all '%' characters but still encode all others.

188 percent_encodings = len(

189 PERCENT_MATCHER.findall(compat.to_str(uri_component, encoding))

190 )

191

192 uri_bytes = compat.to_bytes(uri_component, encoding)

193 is_percent_encoded = percent_encodings == uri_bytes.count(b"%")

194

195 encoded_uri = bytearray()

196

197 for i in range(0, len(uri_bytes)):

198 # Will return a single character bytestring on both Python 2 & 3

199 byte = uri_bytes[i : i + 1]

200 byte_ord = ord(byte)

201 if (is_percent_encoded and byte == b"%") or (

202 byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED

203 ):

204 encoded_uri.extend(byte)

205 continue

206 encoded_uri.extend(f"%{byte_ord:02x}".encode().upper())

207

208 return encoded_uri.decode(encoding)