Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/rfc3986/normalizers.py: 91%

2# Licensed under the Apache License, Version 2.0 (the "License");

3# you may not use this file except in compliance with the License.

4# You may obtain a copy of the License at

6# http://www.apache.org/licenses/LICENSE-2.0

8# Unless required by applicable law or agreed to in writing, software

9# distributed under the License is distributed on an "AS IS" BASIS,

10# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or

11# implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14"""Module with functions to normalize components."""

16import re

17import typing as t

18from urllib.parse import quote as urlquote

20from . import compat

21from . import misc

24def normalize_scheme(scheme: str) -> str:

25 """Normalize the scheme component."""

26 return scheme.lower()

29def normalize_authority(

30 authority: t.Tuple[t.Optional[str], t.Optional[str], t.Optional[str]],

31) -> str:

32 """Normalize an authority tuple to a string."""

33 userinfo, host, port = authority

34 result = ""

35 if userinfo:

36 result += normalize_percent_characters(userinfo) + "@"

37 if host:

38 result += normalize_host(host)

39 if port:

40 result += ":" + port

41 return result

44def normalize_username(username: str) -> str:

45 """Normalize a username to make it safe to include in userinfo."""

46 return urlquote(username)

49def normalize_password(password: str) -> str:

50 """Normalize a password to make safe for userinfo."""

51 return urlquote(password)

54def normalize_host(host: str) -> str:

55 """Normalize a host string."""

56 if misc.IPv6_MATCHER.match(host):

57 percent = host.find("%")

58 if percent != -1:

59 percent_25 = host.find("%25")

61 # Replace RFC 4007 IPv6 Zone ID delimiter '%' with '%25'

62 # from RFC 6874. If the host is '[<IPv6 addr>%25]' then we

63 # assume RFC 4007 and normalize to '[<IPV6 addr>%2525]'

64 if (

65 percent_25 == -1

66 or percent < percent_25

67 or (percent == percent_25 and percent_25 == len(host) - 4)

68 ):

69 host = host.replace("%", "%25", 1)

71 # Don't normalize the casing of the Zone ID

72 return host[:percent].lower() + host[percent:]

74 return host.lower()

77def normalize_path(path: str) -> str:

78 """Normalize the path string."""

79 if not path:

80 return path

82 path = normalize_percent_characters(path)

83 return remove_dot_segments(path)

86@t.overload

87def normalize_query(query: str) -> str: # noqa: D103

88 ...

91@t.overload

92def normalize_query(query: None) -> None: # noqa: D103

93 ...

96def normalize_query(query: t.Optional[str]) -> t.Optional[str]:

97 """Normalize the query string."""

98 if not query:

99 return query

100 return normalize_percent_characters(query)

101

102

103@t.overload

104def normalize_fragment(fragment: str) -> str: # noqa: D103

105 ...

106

107

108@t.overload

109def normalize_fragment(fragment: None) -> None: # noqa: D103

110 ...

111

112

113def normalize_fragment(fragment: t.Optional[str]) -> t.Optional[str]:

114 """Normalize the fragment string."""

115 if not fragment:

116 return fragment

117 return normalize_percent_characters(fragment)

118

119

120PERCENT_MATCHER = re.compile("%[A-Fa-f0-9]{2}")

121

122

123def normalize_percent_characters(s: str) -> str:

124 """All percent characters should be upper-cased.

125

126 For example, ``"%3afoo%DF%ab"`` should be turned into ``"%3Afoo%DF%AB"``.

127 """

128 matches = set(PERCENT_MATCHER.findall(s))

129 for m in matches:

130 if not m.isupper():

131 s = s.replace(m, m.upper())

132 return s

133

134

135def remove_dot_segments(s: str) -> str:

136 """Remove dot segments from the string.

137

138 See also Section 5.2.4 of :rfc:`3986`.

139 """

140 # See http://tools.ietf.org/html/rfc3986#section-5.2.4 for pseudo-code

141 segments = s.split("/") # Turn the path into a list of segments

142 output: list[str] = [] # Initialize the variable to use to store output

143

144 for segment in segments:

145 # '.' is the current directory, so ignore it, it is superfluous

146 if segment == ".":

147 continue

148 # Anything other than '..', should be appended to the output

149 elif segment != "..":

150 output.append(segment)

151 # In this case segment == '..', if we can, we should pop the last

152 # element

153 elif output:

154 output.pop()

155

156 # If the path starts with '/' and the output is empty or the first string

157 # is non-empty

158 if s.startswith("/") and (not output or output[0]):

159 output.insert(0, "")

160

161 # If the path starts with '/.' or '/..' ensure we add one more empty

162 # string to add a trailing '/'

163 if s.endswith(("/.", "/..")):

164 output.append("")

165

166 return "/".join(output)

167

168

169@t.overload

170def encode_component(uri_component: None, encoding: str) -> None: # noqa: D103

171 ...

172

173

174@t.overload

175def encode_component(uri_component: str, encoding: str) -> str: # noqa: D103

176 ...

177

178

179def encode_component(

180 uri_component: t.Optional[str],

181 encoding: str,

182) -> t.Optional[str]:

183 """Encode the specific component in the provided encoding."""

184 if uri_component is None:

185 return uri_component

186

187 # Try to see if the component we're encoding is already percent-encoded

188 # so we can skip all '%' characters but still encode all others.

189 percent_encodings = len(

190 PERCENT_MATCHER.findall(compat.to_str(uri_component, encoding))

191 )

192

193 uri_bytes = compat.to_bytes(uri_component, encoding)

194 is_percent_encoded = percent_encodings == uri_bytes.count(b"%")

195

196 encoded_uri = bytearray()

197

198 for i in range(0, len(uri_bytes)):

199 # Will return a single character bytestring on both Python 2 & 3

200 byte = uri_bytes[i : i + 1]

201 byte_ord = ord(byte)

202 if (is_percent_encoded and byte == b"%") or (

203 byte_ord < 128 and byte.decode() in misc.NON_PCT_ENCODED

204 ):

205 encoded_uri.extend(byte)

206 continue

207 encoded_uri.extend(f"%{byte_ord:02x}".encode().upper())

208

209 return encoded_uri.decode(encoding)