Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/httpx/_urls.py: 39%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import typing
4from urllib.parse import parse_qs, unquote
6import idna
8from ._types import QueryParamTypes, RawURL
9from ._urlparse import urlencode, urlparse
10from ._utils import primitive_value_to_str
12__all__ = ["URL", "QueryParams"]
15class URL:
16 """
17 url = httpx.URL("HTTPS://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink")
19 assert url.scheme == "https"
20 assert url.username == "jo@email.com"
21 assert url.password == "a secret"
22 assert url.userinfo == b"jo%40email.com:a%20secret"
23 assert url.host == "müller.de"
24 assert url.raw_host == b"xn--mller-kva.de"
25 assert url.port == 1234
26 assert url.netloc == b"xn--mller-kva.de:1234"
27 assert url.path == "/pa th"
28 assert url.query == b"?search=ab"
29 assert url.raw_path == b"/pa%20th?search=ab"
30 assert url.fragment == "anchorlink"
32 The components of a URL are broken down like this:
34 https://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink
35 [scheme] [ username ] [password] [ host ][port][ path ] [ query ] [fragment]
36 [ userinfo ] [ netloc ][ raw_path ]
38 Note that:
40 * `url.scheme` is normalized to always be lowercased.
42 * `url.host` is normalized to always be lowercased. Internationalized domain
43 names are represented in unicode, without IDNA encoding applied. For instance:
45 url = httpx.URL("http://中国.icom.museum")
46 assert url.host == "中国.icom.museum"
47 url = httpx.URL("http://xn--fiqs8s.icom.museum")
48 assert url.host == "中国.icom.museum"
50 * `url.raw_host` is normalized to always be lowercased, and is IDNA encoded.
52 url = httpx.URL("http://中国.icom.museum")
53 assert url.raw_host == b"xn--fiqs8s.icom.museum"
54 url = httpx.URL("http://xn--fiqs8s.icom.museum")
55 assert url.raw_host == b"xn--fiqs8s.icom.museum"
57 * `url.port` is either None or an integer. URLs that include the default port for
58 "http", "https", "ws", "wss", and "ftp" schemes have their port
59 normalized to `None`.
61 assert httpx.URL("http://example.com") == httpx.URL("http://example.com:80")
62 assert httpx.URL("http://example.com").port is None
63 assert httpx.URL("http://example.com:80").port is None
65 * `url.userinfo` is raw bytes, without URL escaping. Usually you'll want to work
66 with `url.username` and `url.password` instead, which handle the URL escaping.
68 * `url.raw_path` is raw bytes of both the path and query, without URL escaping.
69 This portion is used as the target when constructing HTTP requests. Usually you'll
70 want to work with `url.path` instead.
72 * `url.query` is raw bytes, without URL escaping. A URL query string portion can
73 only be properly URL escaped when decoding the parameter names and values
74 themselves.
75 """
77 def __init__(self, url: URL | str = "", **kwargs: typing.Any) -> None:
78 if kwargs:
79 allowed = {
80 "scheme": str,
81 "username": str,
82 "password": str,
83 "userinfo": bytes,
84 "host": str,
85 "port": int,
86 "netloc": bytes,
87 "path": str,
88 "query": bytes,
89 "raw_path": bytes,
90 "fragment": str,
91 "params": object,
92 }
94 # Perform type checking for all supported keyword arguments.
95 for key, value in kwargs.items():
96 if key not in allowed:
97 message = f"{key!r} is an invalid keyword argument for URL()"
98 raise TypeError(message)
99 if value is not None and not isinstance(value, allowed[key]):
100 expected = allowed[key].__name__
101 seen = type(value).__name__
102 message = f"Argument {key!r} must be {expected} but got {seen}"
103 raise TypeError(message)
104 if isinstance(value, bytes):
105 kwargs[key] = value.decode("ascii")
107 if "params" in kwargs:
108 # Replace any "params" keyword with the raw "query" instead.
109 #
110 # Ensure that empty params use `kwargs["query"] = None` rather
111 # than `kwargs["query"] = ""`, so that generated URLs do not
112 # include an empty trailing "?".
113 params = kwargs.pop("params")
114 kwargs["query"] = None if not params else str(QueryParams(params))
116 if isinstance(url, str):
117 self._uri_reference = urlparse(url, **kwargs)
118 elif isinstance(url, URL):
119 self._uri_reference = url._uri_reference.copy_with(**kwargs)
120 else:
121 raise TypeError(
122 "Invalid type for url. Expected str or httpx.URL,"
123 f" got {type(url)}: {url!r}"
124 )
126 @property
127 def scheme(self) -> str:
128 """
129 The URL scheme, such as "http", "https".
130 Always normalised to lowercase.
131 """
132 return self._uri_reference.scheme
134 @property
135 def raw_scheme(self) -> bytes:
136 """
137 The raw bytes representation of the URL scheme, such as b"http", b"https".
138 Always normalised to lowercase.
139 """
140 return self._uri_reference.scheme.encode("ascii")
142 @property
143 def userinfo(self) -> bytes:
144 """
145 The URL userinfo as a raw bytestring.
146 For example: b"jo%40email.com:a%20secret".
147 """
148 return self._uri_reference.userinfo.encode("ascii")
150 @property
151 def username(self) -> str:
152 """
153 The URL username as a string, with URL decoding applied.
154 For example: "jo@email.com"
155 """
156 userinfo = self._uri_reference.userinfo
157 return unquote(userinfo.partition(":")[0])
159 @property
160 def password(self) -> str:
161 """
162 The URL password as a string, with URL decoding applied.
163 For example: "a secret"
164 """
165 userinfo = self._uri_reference.userinfo
166 return unquote(userinfo.partition(":")[2])
168 @property
169 def host(self) -> str:
170 """
171 The URL host as a string.
172 Always normalized to lowercase, with IDNA hosts decoded into unicode.
174 Examples:
176 url = httpx.URL("http://www.EXAMPLE.org")
177 assert url.host == "www.example.org"
179 url = httpx.URL("http://中国.icom.museum")
180 assert url.host == "中国.icom.museum"
182 url = httpx.URL("http://xn--fiqs8s.icom.museum")
183 assert url.host == "中国.icom.museum"
185 url = httpx.URL("https://[::ffff:192.168.0.1]")
186 assert url.host == "::ffff:192.168.0.1"
187 """
188 host: str = self._uri_reference.host
190 if host.startswith("xn--"):
191 host = idna.decode(host)
193 return host
195 @property
196 def raw_host(self) -> bytes:
197 """
198 The raw bytes representation of the URL host.
199 Always normalized to lowercase, and IDNA encoded.
201 Examples:
203 url = httpx.URL("http://www.EXAMPLE.org")
204 assert url.raw_host == b"www.example.org"
206 url = httpx.URL("http://中国.icom.museum")
207 assert url.raw_host == b"xn--fiqs8s.icom.museum"
209 url = httpx.URL("http://xn--fiqs8s.icom.museum")
210 assert url.raw_host == b"xn--fiqs8s.icom.museum"
212 url = httpx.URL("https://[::ffff:192.168.0.1]")
213 assert url.raw_host == b"::ffff:192.168.0.1"
214 """
215 return self._uri_reference.host.encode("ascii")
217 @property
218 def port(self) -> int | None:
219 """
220 The URL port as an integer.
222 Note that the URL class performs port normalization as per the WHATWG spec.
223 Default ports for "http", "https", "ws", "wss", and "ftp" schemes are always
224 treated as `None`.
226 For example:
228 assert httpx.URL("http://www.example.com") == httpx.URL("http://www.example.com:80")
229 assert httpx.URL("http://www.example.com:80").port is None
230 """
231 return self._uri_reference.port
233 @property
234 def netloc(self) -> bytes:
235 """
236 Either `<host>` or `<host>:<port>` as bytes.
237 Always normalized to lowercase, and IDNA encoded.
239 This property may be used for generating the value of a request
240 "Host" header.
241 """
242 return self._uri_reference.netloc.encode("ascii")
244 @property
245 def path(self) -> str:
246 """
247 The URL path as a string. Excluding the query string, and URL decoded.
249 For example:
251 url = httpx.URL("https://example.com/pa%20th")
252 assert url.path == "/pa th"
253 """
254 path = self._uri_reference.path or "/"
255 return unquote(path)
257 @property
258 def query(self) -> bytes:
259 """
260 The URL query string, as raw bytes, excluding the leading b"?".
262 This is necessarily a bytewise interface, because we cannot
263 perform URL decoding of this representation until we've parsed
264 the keys and values into a QueryParams instance.
266 For example:
268 url = httpx.URL("https://example.com/?filter=some%20search%20terms")
269 assert url.query == b"filter=some%20search%20terms"
270 """
271 query = self._uri_reference.query or ""
272 return query.encode("ascii")
274 @property
275 def params(self) -> QueryParams:
276 """
277 The URL query parameters, neatly parsed and packaged into an immutable
278 multidict representation.
279 """
280 return QueryParams(self._uri_reference.query)
282 @property
283 def raw_path(self) -> bytes:
284 """
285 The complete URL path and query string as raw bytes.
286 Used as the target when constructing HTTP requests.
288 For example:
290 GET /users?search=some%20text HTTP/1.1
291 Host: www.example.org
292 Connection: close
293 """
294 path = self._uri_reference.path or "/"
295 if self._uri_reference.query is not None:
296 path += "?" + self._uri_reference.query
297 return path.encode("ascii")
299 @property
300 def fragment(self) -> str:
301 """
302 The URL fragments, as used in HTML anchors.
303 As a string, without the leading '#'.
304 """
305 return unquote(self._uri_reference.fragment or "")
307 @property
308 def raw(self) -> RawURL:
309 """
310 Provides the (scheme, host, port, target) for the outgoing request.
312 In older versions of `httpx` this was used in the low-level transport API.
313 We no longer use `RawURL`, and this property will be deprecated
314 in a future release.
315 """
316 return RawURL(
317 self.raw_scheme,
318 self.raw_host,
319 self.port,
320 self.raw_path,
321 )
323 @property
324 def is_absolute_url(self) -> bool:
325 """
326 Return `True` for absolute URLs such as 'http://example.com/path',
327 and `False` for relative URLs such as '/path'.
328 """
329 # We don't use `.is_absolute` from `rfc3986` because it treats
330 # URLs with a fragment portion as not absolute.
331 # What we actually care about is if the URL provides
332 # a scheme and hostname to which connections should be made.
333 return bool(self._uri_reference.scheme and self._uri_reference.host)
335 @property
336 def is_relative_url(self) -> bool:
337 """
338 Return `False` for absolute URLs such as 'http://example.com/path',
339 and `True` for relative URLs such as '/path'.
340 """
341 return not self.is_absolute_url
343 def copy_with(self, **kwargs: typing.Any) -> URL:
344 """
345 Copy this URL, returning a new URL with some components altered.
346 Accepts the same set of parameters as the components that are made
347 available via properties on the `URL` class.
349 For example:
351 url = httpx.URL("https://www.example.com").copy_with(
352 username="jo@gmail.com", password="a secret"
353 )
354 assert url == "https://jo%40email.com:a%20secret@www.example.com"
355 """
356 return URL(self, **kwargs)
358 def copy_set_param(self, key: str, value: typing.Any = None) -> URL:
359 return self.copy_with(params=self.params.set(key, value))
361 def copy_add_param(self, key: str, value: typing.Any = None) -> URL:
362 return self.copy_with(params=self.params.add(key, value))
364 def copy_remove_param(self, key: str) -> URL:
365 return self.copy_with(params=self.params.remove(key))
367 def copy_merge_params(self, params: QueryParamTypes) -> URL:
368 return self.copy_with(params=self.params.merge(params))
370 def join(self, url: URL | str) -> URL:
371 """
372 Return an absolute URL, using this URL as the base.
374 Eg.
376 url = httpx.URL("https://www.example.com/test")
377 url = url.join("/new/path")
378 assert url == "https://www.example.com/new/path"
379 """
380 from urllib.parse import urljoin
382 return URL(urljoin(str(self), str(URL(url))))
384 def __hash__(self) -> int:
385 return hash(str(self))
387 def __eq__(self, other: typing.Any) -> bool:
388 return isinstance(other, (URL, str)) and str(self) == str(URL(other))
390 def __str__(self) -> str:
391 return str(self._uri_reference)
393 def __repr__(self) -> str:
394 scheme, userinfo, host, port, path, query, fragment = self._uri_reference
396 if ":" in userinfo:
397 # Mask any password component.
398 userinfo = f'{userinfo.split(":")[0]}:[secure]'
400 authority = "".join(
401 [
402 f"{userinfo}@" if userinfo else "",
403 f"[{host}]" if ":" in host else host,
404 f":{port}" if port is not None else "",
405 ]
406 )
407 url = "".join(
408 [
409 f"{self.scheme}:" if scheme else "",
410 f"//{authority}" if authority else "",
411 path,
412 f"?{query}" if query is not None else "",
413 f"#{fragment}" if fragment is not None else "",
414 ]
415 )
417 return f"{self.__class__.__name__}({url!r})"
420class QueryParams(typing.Mapping[str, str]):
421 """
422 URL query parameters, as a multi-dict.
423 """
425 def __init__(self, *args: QueryParamTypes | None, **kwargs: typing.Any) -> None:
426 assert len(args) < 2, "Too many arguments."
427 assert not (args and kwargs), "Cannot mix named and unnamed arguments."
429 value = args[0] if args else kwargs
431 if value is None or isinstance(value, (str, bytes)):
432 value = value.decode("ascii") if isinstance(value, bytes) else value
433 self._dict = parse_qs(value, keep_blank_values=True)
434 elif isinstance(value, QueryParams):
435 self._dict = {k: list(v) for k, v in value._dict.items()}
436 else:
437 dict_value: dict[typing.Any, list[typing.Any]] = {}
438 if isinstance(value, (list, tuple)):
439 # Convert list inputs like:
440 # [("a", "123"), ("a", "456"), ("b", "789")]
441 # To a dict representation, like:
442 # {"a": ["123", "456"], "b": ["789"]}
443 for item in value:
444 dict_value.setdefault(item[0], []).append(item[1])
445 else:
446 # Convert dict inputs like:
447 # {"a": "123", "b": ["456", "789"]}
448 # To dict inputs where values are always lists, like:
449 # {"a": ["123"], "b": ["456", "789"]}
450 dict_value = {
451 k: list(v) if isinstance(v, (list, tuple)) else [v]
452 for k, v in value.items()
453 }
455 # Ensure that keys and values are neatly coerced to strings.
456 # We coerce values `True` and `False` to JSON-like "true" and "false"
457 # representations, and coerce `None` values to the empty string.
458 self._dict = {
459 str(k): [primitive_value_to_str(item) for item in v]
460 for k, v in dict_value.items()
461 }
463 def keys(self) -> typing.KeysView[str]:
464 """
465 Return all the keys in the query params.
467 Usage:
469 q = httpx.QueryParams("a=123&a=456&b=789")
470 assert list(q.keys()) == ["a", "b"]
471 """
472 return self._dict.keys()
474 def values(self) -> typing.ValuesView[str]:
475 """
476 Return all the values in the query params. If a key occurs more than once
477 only the first item for that key is returned.
479 Usage:
481 q = httpx.QueryParams("a=123&a=456&b=789")
482 assert list(q.values()) == ["123", "789"]
483 """
484 return {k: v[0] for k, v in self._dict.items()}.values()
486 def items(self) -> typing.ItemsView[str, str]:
487 """
488 Return all items in the query params. If a key occurs more than once
489 only the first item for that key is returned.
491 Usage:
493 q = httpx.QueryParams("a=123&a=456&b=789")
494 assert list(q.items()) == [("a", "123"), ("b", "789")]
495 """
496 return {k: v[0] for k, v in self._dict.items()}.items()
498 def multi_items(self) -> list[tuple[str, str]]:
499 """
500 Return all items in the query params. Allow duplicate keys to occur.
502 Usage:
504 q = httpx.QueryParams("a=123&a=456&b=789")
505 assert list(q.multi_items()) == [("a", "123"), ("a", "456"), ("b", "789")]
506 """
507 multi_items: list[tuple[str, str]] = []
508 for k, v in self._dict.items():
509 multi_items.extend([(k, i) for i in v])
510 return multi_items
512 def get(self, key: typing.Any, default: typing.Any = None) -> typing.Any:
513 """
514 Get a value from the query param for a given key. If the key occurs
515 more than once, then only the first value is returned.
517 Usage:
519 q = httpx.QueryParams("a=123&a=456&b=789")
520 assert q.get("a") == "123"
521 """
522 if key in self._dict:
523 return self._dict[str(key)][0]
524 return default
526 def get_list(self, key: str) -> list[str]:
527 """
528 Get all values from the query param for a given key.
530 Usage:
532 q = httpx.QueryParams("a=123&a=456&b=789")
533 assert q.get_list("a") == ["123", "456"]
534 """
535 return list(self._dict.get(str(key), []))
537 def set(self, key: str, value: typing.Any = None) -> QueryParams:
538 """
539 Return a new QueryParams instance, setting the value of a key.
541 Usage:
543 q = httpx.QueryParams("a=123")
544 q = q.set("a", "456")
545 assert q == httpx.QueryParams("a=456")
546 """
547 q = QueryParams()
548 q._dict = dict(self._dict)
549 q._dict[str(key)] = [primitive_value_to_str(value)]
550 return q
552 def add(self, key: str, value: typing.Any = None) -> QueryParams:
553 """
554 Return a new QueryParams instance, setting or appending the value of a key.
556 Usage:
558 q = httpx.QueryParams("a=123")
559 q = q.add("a", "456")
560 assert q == httpx.QueryParams("a=123&a=456")
561 """
562 q = QueryParams()
563 q._dict = dict(self._dict)
564 q._dict[str(key)] = q.get_list(key) + [primitive_value_to_str(value)]
565 return q
567 def remove(self, key: str) -> QueryParams:
568 """
569 Return a new QueryParams instance, removing the value of a key.
571 Usage:
573 q = httpx.QueryParams("a=123")
574 q = q.remove("a")
575 assert q == httpx.QueryParams("")
576 """
577 q = QueryParams()
578 q._dict = dict(self._dict)
579 q._dict.pop(str(key), None)
580 return q
582 def merge(self, params: QueryParamTypes | None = None) -> QueryParams:
583 """
584 Return a new QueryParams instance, updated with.
586 Usage:
588 q = httpx.QueryParams("a=123")
589 q = q.merge({"b": "456"})
590 assert q == httpx.QueryParams("a=123&b=456")
592 q = httpx.QueryParams("a=123")
593 q = q.merge({"a": "456", "b": "789"})
594 assert q == httpx.QueryParams("a=456&b=789")
595 """
596 q = QueryParams(params)
597 q._dict = {**self._dict, **q._dict}
598 return q
600 def __getitem__(self, key: typing.Any) -> str:
601 return self._dict[key][0]
603 def __contains__(self, key: typing.Any) -> bool:
604 return key in self._dict
606 def __iter__(self) -> typing.Iterator[typing.Any]:
607 return iter(self.keys())
609 def __len__(self) -> int:
610 return len(self._dict)
612 def __bool__(self) -> bool:
613 return bool(self._dict)
615 def __hash__(self) -> int:
616 return hash(str(self))
618 def __eq__(self, other: typing.Any) -> bool:
619 if not isinstance(other, self.__class__):
620 return False
621 return sorted(self.multi_items()) == sorted(other.multi_items())
623 def __str__(self) -> str:
624 """
625 Note that we use '%20' encoding for spaces, and treat '/' as a safe
626 character.
628 See https://github.com/encode/httpx/issues/2536 and
629 https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
630 """
631 return urlencode(self.multi_items())
633 def __repr__(self) -> str:
634 class_name = self.__class__.__name__
635 query_string = str(self)
636 return f"{class_name}({query_string!r})"
638 def update(self, params: QueryParamTypes | None = None) -> None:
639 raise RuntimeError(
640 "QueryParams are immutable since 0.18.0. "
641 "Use `q = q.merge(...)` to create an updated copy."
642 )
644 def __setitem__(self, key: str, value: str) -> None:
645 raise RuntimeError(
646 "QueryParams are immutable since 0.18.0. "
647 "Use `q = q.set(key, value)` to create an updated copy."
648 )