Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/yarl/

1"""URL parsing utilities."""

3import re

4import unicodedata

5from functools import lru_cache

6from typing import Union

7from urllib.parse import scheme_chars, uses_netloc

9from ._quoters import QUOTER, UNQUOTER_PLUS

11# Leading and trailing C0 control and space to be stripped per WHATWG spec.

12# == "".join([chr(i) for i in range(0, 0x20 + 1)])

13WHATWG_C0_CONTROL_OR_SPACE = (

14 "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10"

15 "\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f "

16)

18# Unsafe bytes to be removed per WHATWG spec

19UNSAFE_URL_BYTES_TO_REMOVE = ["\t", "\r", "\n"]

20USES_AUTHORITY = frozenset(uses_netloc)

22SplitURLType = tuple[str, str, str, str, str]

25def split_url(url: str) -> SplitURLType:

26 """Split URL into parts."""

27 # Adapted from urllib.parse.urlsplit

28 # Only lstrip url as some applications rely on preserving trailing space.

29 # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)

30 url = url.lstrip(WHATWG_C0_CONTROL_OR_SPACE)

31 for b in UNSAFE_URL_BYTES_TO_REMOVE:

32 if b in url:

33 url = url.replace(b, "")

35 scheme = netloc = query = fragment = ""

36 i = url.find(":")

37 if i > 0 and url[0] in scheme_chars:

38 for c in url[1:i]:

39 if c not in scheme_chars:

40 break

41 else:

42 scheme, url = url[:i].lower(), url[i + 1 :]

43 has_hash = "#" in url

44 has_question_mark = "?" in url

45 if url[:2] == "//":

46 delim = len(url) # position of end of domain part of url, default is end

47 if has_hash and has_question_mark:

48 delim_chars = "/?#"

49 elif has_question_mark:

50 delim_chars = "/?"

51 elif has_hash:

52 delim_chars = "/#"

53 else:

54 delim_chars = "/"

55 for c in delim_chars: # look for delimiters; the order is NOT important

56 wdelim = url.find(c, 2) # find first of this delim

57 if wdelim >= 0 and wdelim < delim: # if found

58 delim = wdelim # use earliest delim position

59 netloc = url[2:delim]

60 url = url[delim:]

61 has_left_bracket = "[" in netloc

62 has_right_bracket = "]" in netloc

63 if (has_left_bracket and not has_right_bracket) or (

64 has_right_bracket and not has_left_bracket

65 ):

66 raise ValueError("Invalid IPv6 URL")

67 if has_left_bracket:

68 bracketed_host = netloc.partition("[")[2].partition("]")[0]

69 # Valid bracketed hosts are defined in

70 # https://www.rfc-editor.org/rfc/rfc3986#page-49

71 # https://url.spec.whatwg.org/

72 if bracketed_host and bracketed_host[0] == "v":

73 if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", bracketed_host):

74 raise ValueError("IPvFuture address is invalid")

75 elif ":" not in bracketed_host:

76 raise ValueError("The IPv6 content between brackets is not valid")

77 if has_hash:

78 url, _, fragment = url.partition("#")

79 if has_question_mark:

80 url, _, query = url.partition("?")

81 if netloc and not netloc.isascii():

82 _check_netloc(netloc)

83 return scheme, netloc, url, query, fragment

86def _check_netloc(netloc: str) -> None:

87 # Adapted from urllib.parse._checknetloc

88 # looking for characters like \u2100 that expand to 'a/c'

89 # IDNA uses NFKC equivalence, so normalize for this check

91 # ignore characters already included

92 # but not the surrounding text

93 n = netloc.replace("@", "").replace(":", "").replace("#", "").replace("?", "")

94 normalized_netloc = unicodedata.normalize("NFKC", n)

95 if n == normalized_netloc:

96 return

97 # Note that there are no unicode decompositions for the character '@' so

98 # its currently impossible to have test coverage for this branch, however if the

99 # one should be added in the future we want to make sure its still checked.

100 for c in "/?#@:": # pragma: no branch

101 if c in normalized_netloc:

102 raise ValueError(

103 f"netloc '{netloc}' contains invalid "

104 "characters under NFKC normalization"

105 )

106

107

108@lru_cache # match the same size as urlsplit

109def split_netloc(

110 netloc: str,

111) -> tuple[Union[str, None], Union[str, None], Union[str, None], Union[int, None]]:

112 """Split netloc into username, password, host and port."""

113 if "@" not in netloc:

114 username: Union[str, None] = None

115 password: Union[str, None] = None

116 hostinfo = netloc

117 else:

118 userinfo, _, hostinfo = netloc.rpartition("@")

119 username, have_password, password = userinfo.partition(":")

120 if not have_password:

121 password = None

122

123 if "[" in hostinfo:

124 _, _, bracketed = hostinfo.partition("[")

125 hostname, _, port_str = bracketed.partition("]")

126 _, _, port_str = port_str.partition(":")

127 else:

128 hostname, _, port_str = hostinfo.partition(":")

129

130 if not port_str:

131 return username or None, password, hostname or None, None

132

133 try:

134 port = int(port_str)

135 except ValueError:

136 raise ValueError("Invalid URL: port can't be converted to integer")

137 if not (0 <= port <= 65535):

138 raise ValueError("Port out of range 0-65535")

139 return username or None, password, hostname or None, port

140

141

142def unsplit_result(

143 scheme: str, netloc: str, url: str, query: str, fragment: str

144) -> str:

145 """Unsplit a URL without any normalization."""

146 if netloc or (scheme and scheme in USES_AUTHORITY) or url[:2] == "//":

147 if url and url[:1] != "/":

148 url = f"{scheme}://{netloc}/{url}" if scheme else f"{scheme}:{url}"

149 else:

150 url = f"{scheme}://{netloc}{url}" if scheme else f"//{netloc}{url}"

151 elif scheme:

152 url = f"{scheme}:{url}"

153 if query:

154 url = f"{url}?{query}"

155 return f"{url}#{fragment}" if fragment else url

156

157

158@lru_cache # match the same size as urlsplit

159def make_netloc(

160 user: Union[str, None],

161 password: Union[str, None],

162 host: Union[str, None],

163 port: Union[int, None],

164 encode: bool = False,

165) -> str:

166 """Make netloc from parts.

167

168 The user and password are encoded if encode is True.

169

170 The host must already be encoded with _encode_host.

171 """

172 if host is None:

173 return ""

174 ret = host

175 if port is not None:

176 ret = f"{ret}:{port}"

177 if user is None and password is None:

178 return ret

179 if password is not None:

180 if not user:

181 user = ""

182 elif encode:

183 user = QUOTER(user)

184 if encode:

185 password = QUOTER(password)

186 user = f"{user}:{password}"

187 elif user and encode:

188 user = QUOTER(user)

189 return f"{user}@{ret}" if user else ret

190

191

192def query_to_pairs(query_string: str) -> list[tuple[str, str]]:

193 """Parse a query given as a string argument.

194

195 Works like urllib.parse.parse_qsl with keep empty values.

196 """

197 pairs: list[tuple[str, str]] = []

198 if not query_string:

199 return pairs

200 for k_v in query_string.split("&"):

201 k, _, v = k_v.partition("=")

202 pairs.append((UNQUOTER_PLUS(k), UNQUOTER_PLUS(v)))

203 return pairs

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/yarl/_parse.py: 48%

128 statements