Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/yarl/

1"""URL parsing utilities."""

3import re

4import unicodedata

5from functools import lru_cache

6from urllib.parse import scheme_chars, uses_netloc

8from ._quoters import QUOTER, UNQUOTER_PLUS

10# Leading and trailing C0 control and space to be stripped per WHATWG spec.

11# == "".join([chr(i) for i in range(0, 0x20 + 1)])

12WHATWG_C0_CONTROL_OR_SPACE = (

13 "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10"

14 "\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f "

15)

17# Unsafe bytes to be removed per WHATWG spec

18UNSAFE_URL_BYTES_TO_REMOVE = ["\t", "\r", "\n"]

19USES_AUTHORITY = frozenset(uses_netloc)

21SplitURLType = tuple[str, str, str, str, str]

24def split_url(url: str) -> SplitURLType:

25 """Split URL into parts."""

26 # Adapted from urllib.parse.urlsplit

27 # Only lstrip url as some applications rely on preserving trailing space.

28 # (https://url.spec.whatwg.org/#concept-basic-url-parser would strip both)

29 url = url.lstrip(WHATWG_C0_CONTROL_OR_SPACE)

30 for b in UNSAFE_URL_BYTES_TO_REMOVE:

31 if b in url:

32 url = url.replace(b, "")

34 scheme = netloc = query = fragment = ""

35 i = url.find(":")

36 if i > 0 and url[0] in scheme_chars:

37 for c in url[1:i]:

38 if c not in scheme_chars:

39 break

40 else:

41 scheme, url = url[:i].lower(), url[i + 1 :]

42 has_hash = "#" in url

43 has_question_mark = "?" in url

44 if url[:2] == "//":

45 delim = len(url) # position of end of domain part of url, default is end

46 if has_hash and has_question_mark:

47 delim_chars = "/?#"

48 elif has_question_mark:

49 delim_chars = "/?"

50 elif has_hash:

51 delim_chars = "/#"

52 else:

53 delim_chars = "/"

54 for c in delim_chars: # look for delimiters; the order is NOT important

55 wdelim = url.find(c, 2) # find first of this delim

56 if wdelim >= 0 and wdelim < delim: # if found

57 delim = wdelim # use earliest delim position

58 netloc = url[2:delim]

59 url = url[delim:]

60 has_left_bracket = "[" in netloc

61 has_right_bracket = "]" in netloc

62 if (has_left_bracket and not has_right_bracket) or (

63 has_right_bracket and not has_left_bracket

64 ):

65 raise ValueError("Invalid IPv6 URL")

66 if has_left_bracket:

67 bracketed_host = netloc.partition("[")[2].partition("]")[0]

68 # Valid bracketed hosts are defined in

69 # https://www.rfc-editor.org/rfc/rfc3986#page-49

70 # https://url.spec.whatwg.org/

71 if bracketed_host and bracketed_host[0] == "v":

72 if not re.match(r"\Av[a-fA-F0-9]+\..+\Z", bracketed_host):

73 raise ValueError("IPvFuture address is invalid")

74 elif ":" not in bracketed_host:

75 raise ValueError("The IPv6 content between brackets is not valid")

76 if has_hash:

77 url, _, fragment = url.partition("#")

78 if has_question_mark:

79 url, _, query = url.partition("?")

80 if netloc and not netloc.isascii():

81 _check_netloc(netloc)

82 return scheme, netloc, url, query, fragment

85def _check_netloc(netloc: str) -> None:

86 # Adapted from urllib.parse._checknetloc

87 # looking for characters like \u2100 that expand to 'a/c'

88 # IDNA uses NFKC equivalence, so normalize for this check

90 # ignore characters already included

91 # but not the surrounding text

92 n = netloc.replace("@", "").replace(":", "").replace("#", "").replace("?", "")

93 normalized_netloc = unicodedata.normalize("NFKC", n)

94 if n == normalized_netloc:

95 return

96 # Note that there are no unicode decompositions for the character '@' so

97 # its currently impossible to have test coverage for this branch, however if the

98 # one should be added in the future we want to make sure its still checked.

99 for c in "/?#@:": # pragma: no branch

100 if c in normalized_netloc:

101 raise ValueError(

102 f"netloc '{netloc}' contains invalid "

103 "characters under NFKC normalization"

104 )

105

106

107@lru_cache # match the same size as urlsplit

108def split_netloc(

109 netloc: str,

110) -> tuple[str | None, str | None, str | None, int | None]:

111 """Split netloc into username, password, host and port."""

112 if "@" not in netloc:

113 username: str | None = None

114 password: str | None = None

115 hostinfo = netloc

116 else:

117 userinfo, _, hostinfo = netloc.rpartition("@")

118 username, have_password, password = userinfo.partition(":")

119 if not have_password:

120 password = None

121

122 if "[" in hostinfo:

123 _, _, bracketed = hostinfo.partition("[")

124 hostname, _, port_str = bracketed.partition("]")

125 _, _, port_str = port_str.partition(":")

126 else:

127 hostname, _, port_str = hostinfo.partition(":")

128

129 if not port_str:

130 return username or None, password, hostname or None, None

131

132 try:

133 port = int(port_str)

134 except ValueError:

135 raise ValueError("Invalid URL: port can't be converted to integer")

136 if not (0 <= port <= 65535):

137 raise ValueError("Port out of range 0-65535")

138 return username or None, password, hostname or None, port

139

140

141def unsplit_result(

142 scheme: str, netloc: str, url: str, query: str, fragment: str

143) -> str:

144 """Unsplit a URL without any normalization."""

145 if netloc or (scheme and scheme in USES_AUTHORITY) or url[:2] == "//":

146 if url and url[:1] != "/":

147 url = f"{scheme}://{netloc}/{url}" if scheme else f"{scheme}:{url}"

148 else:

149 url = f"{scheme}://{netloc}{url}" if scheme else f"//{netloc}{url}"

150 elif scheme:

151 url = f"{scheme}:{url}"

152 if query:

153 url = f"{url}?{query}"

154 return f"{url}#{fragment}" if fragment else url

155

156

157@lru_cache # match the same size as urlsplit

158def make_netloc(

159 user: str | None,

160 password: str | None,

161 host: str | None,

162 port: int | None,

163 encode: bool = False,

164) -> str:

165 """Make netloc from parts.

166

167 The user and password are encoded if encode is True.

168

169 The host must already be encoded with _encode_host.

170 """

171 if host is None:

172 return ""

173 ret = host

174 if port is not None:

175 ret = f"{ret}:{port}"

176 if user is None and password is None:

177 return ret

178 if password is not None:

179 if not user:

180 user = ""

181 elif encode:

182 user = QUOTER(user)

183 if encode:

184 password = QUOTER(password)

185 user = f"{user}:{password}"

186 elif user and encode:

187 user = QUOTER(user)

188 return f"{user}@{ret}" if user else ret

189

190

191def query_to_pairs(query_string: str) -> list[tuple[str, str]]:

192 """Parse a query given as a string argument.

193

194 Works like urllib.parse.parse_qsl with keep empty values.

195 """

196 pairs: list[tuple[str, str]] = []

197 if not query_string:

198 return pairs

199 for k_v in query_string.split("&"):

200 k, _, v = k_v.partition("=")

201 pairs.append((UNQUOTER_PLUS(k), UNQUOTER_PLUS(v)))

202 return pairs

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/yarl/_parse.py: 83%

127 statements