Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/uritools/__init__.py: 37%
386 statements
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:08 +0000
« prev ^ index » next coverage.py v7.3.1, created at 2023-09-25 06:08 +0000
1"""RFC 3986 compliant, scheme-agnostic replacement for `urllib.parse`.
3This module defines RFC 3986 compliant replacements for the most
4commonly used functions of the Python Standard Library
5:mod:`urllib.parse` module.
7"""
9import collections
10import collections.abc
11import ipaddress
12import numbers
13import re
14from string import hexdigits
17__all__ = (
18 "GEN_DELIMS",
19 "RESERVED",
20 "SUB_DELIMS",
21 "UNRESERVED",
22 "isabspath",
23 "isabsuri",
24 "isnetpath",
25 "isrelpath",
26 "issamedoc",
27 "isuri",
28 "uricompose",
29 "uridecode",
30 "uridefrag",
31 "uriencode",
32 "urijoin",
33 "urisplit",
34 "uriunsplit",
35)
37__version__ = "4.0.2"
40# RFC 3986 2.2. Reserved Characters
41#
42# reserved = gen-delims / sub-delims
43#
44# gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
45#
46# sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
47# / "*" / "+" / "," / ";" / "="
48#
49GEN_DELIMS = ":/?#[]@"
50SUB_DELIMS = "!$&'()*+,;="
51RESERVED = GEN_DELIMS + SUB_DELIMS
53# RFC 3986 2.3. Unreserved Characters
54#
55# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
56#
57UNRESERVED = (
58 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789" "-._~"
59)
61_unreserved = frozenset(UNRESERVED.encode())
63# RFC 3986 2.1: For consistency, URI producers and normalizers should
64# use uppercase hexadecimal digits for all percent-encodings.
65_encoded = {
66 b"": [
67 bytes([i]) if i in _unreserved else ("%%%02X" % i).encode() for i in range(256)
68 ]
69}
71_decoded = {
72 (a + b).encode(): bytes.fromhex(a + b) for a in hexdigits for b in hexdigits
73}
76def uriencode(uristring, safe="", encoding="utf-8", errors="strict"):
77 """Encode a URI string or string component."""
78 if not isinstance(uristring, bytes):
79 uristring = uristring.encode(encoding, errors)
80 if not isinstance(safe, bytes):
81 safe = safe.encode("ascii")
82 try:
83 encoded = _encoded[safe]
84 except KeyError:
85 encoded = _encoded[b""][:]
86 for i in safe:
87 encoded[i] = bytes([i])
88 _encoded[safe] = encoded
89 return b"".join(map(encoded.__getitem__, uristring))
92def uridecode(uristring, encoding="utf-8", errors="strict"):
93 """Decode a URI string or string component."""
94 if not isinstance(uristring, bytes):
95 uristring = uristring.encode(encoding or "ascii", errors)
96 parts = uristring.split(b"%")
97 result = [parts[0]]
98 append = result.append
99 decode = _decoded.get
100 for s in parts[1:]:
101 append(decode(s[:2], b"%" + s[:2]))
102 append(s[2:])
103 if encoding is not None:
104 return b"".join(result).decode(encoding, errors)
105 else:
106 return b"".join(result)
109class DefragResult(collections.namedtuple("DefragResult", "uri fragment")):
110 """Class to hold :func:`uridefrag` results."""
112 __slots__ = () # prevent creation of instance dictionary
114 def geturi(self):
115 """Return the recombined version of the original URI as a string."""
116 fragment = self.fragment
117 if fragment is None:
118 return self.uri
119 elif isinstance(fragment, bytes):
120 return self.uri + b"#" + fragment
121 else:
122 return self.uri + "#" + fragment
124 def getfragment(self, default=None, encoding="utf-8", errors="strict"):
125 """Return the decoded fragment identifier, or `default` if the
126 original URI did not contain a fragment component.
128 """
129 fragment = self.fragment
130 if fragment is not None:
131 return uridecode(fragment, encoding, errors)
132 else:
133 return default
136class SplitResult(
137 collections.namedtuple("SplitResult", "scheme authority path query fragment")
138):
139 """Base class to hold :func:`urisplit` results."""
141 __slots__ = () # prevent creation of instance dictionary
143 @property
144 def userinfo(self):
145 authority = self.authority
146 if authority is None:
147 return None
148 userinfo, present, _ = authority.rpartition(self.AT)
149 if present:
150 return userinfo
151 else:
152 return None
154 @property
155 def host(self):
156 authority = self.authority
157 if authority is None:
158 return None
159 _, _, hostinfo = authority.rpartition(self.AT)
160 host, _, port = hostinfo.rpartition(self.COLON)
161 if port.lstrip(self.DIGITS):
162 return hostinfo
163 else:
164 return host
166 @property
167 def port(self):
168 authority = self.authority
169 if authority is None:
170 return None
171 _, present, port = authority.rpartition(self.COLON)
172 if present and not port.lstrip(self.DIGITS):
173 return port
174 else:
175 return None
177 def geturi(self):
178 """Return the re-combined version of the original URI reference as a
179 string.
181 """
182 scheme, authority, path, query, fragment = self
184 # RFC 3986 5.3. Component Recomposition
185 result = []
186 if scheme is not None:
187 result.extend([scheme, self.COLON])
188 if authority is not None:
189 result.extend([self.SLASH, self.SLASH, authority])
190 result.append(path)
191 if query is not None:
192 result.extend([self.QUEST, query])
193 if fragment is not None:
194 result.extend([self.HASH, fragment])
195 return self.EMPTY.join(result)
197 def getscheme(self, default=None):
198 """Return the URI scheme in canonical (lowercase) form, or `default`
199 if the original URI reference did not contain a scheme component.
201 """
202 scheme = self.scheme
203 if scheme is None:
204 return default
205 elif isinstance(scheme, bytes):
206 return scheme.decode("ascii").lower()
207 else:
208 return scheme.lower()
210 def getauthority(self, default=None, encoding="utf-8", errors="strict"):
211 """Return the decoded userinfo, host and port subcomponents of the URI
212 authority as a three-item tuple.
214 """
215 # TBD: (userinfo, host, port) kwargs, default string?
216 if default is None:
217 default = (None, None, None)
218 elif not isinstance(default, collections.abc.Iterable):
219 raise TypeError("Invalid default type")
220 elif len(default) != 3:
221 raise ValueError("Invalid default length")
222 # TODO: this could be much more efficient by using a dedicated regex
223 return (
224 self.getuserinfo(default[0], encoding, errors),
225 self.gethost(default[1], errors),
226 self.getport(default[2]),
227 )
229 def getuserinfo(self, default=None, encoding="utf-8", errors="strict"):
230 """Return the decoded userinfo subcomponent of the URI authority, or
231 `default` if the original URI reference did not contain a
232 userinfo field.
234 """
235 userinfo = self.userinfo
236 if userinfo is None:
237 return default
238 else:
239 return uridecode(userinfo, encoding, errors)
241 def gethost(self, default=None, errors="strict"):
242 """Return the decoded host subcomponent of the URI authority as a
243 string or an :mod:`ipaddress` address object, or `default` if
244 the original URI reference did not contain a host.
246 """
247 host = self.host
248 if host is None or (not host and default is not None):
249 return default
250 elif host.startswith(self.LBRACKET) and host.endswith(self.RBRACKET):
251 return self.__parse_ip_literal(host[1:-1])
252 elif host.startswith(self.LBRACKET) or host.endswith(self.RBRACKET):
253 raise ValueError("Invalid host %r" % host)
254 # TODO: faster check for IPv4 address?
255 try:
256 if isinstance(host, bytes):
257 return ipaddress.IPv4Address(host.decode("ascii"))
258 else:
259 return ipaddress.IPv4Address(host)
260 except ValueError:
261 return uridecode(host, "utf-8", errors).lower()
263 def getport(self, default=None):
264 """Return the port subcomponent of the URI authority as an
265 :class:`int`, or `default` if the original URI reference did
266 not contain a port or if the port was empty.
268 """
269 port = self.port
270 if port:
271 return int(port)
272 else:
273 return default
275 def getpath(self, encoding="utf-8", errors="strict"):
276 """Return the normalized decoded URI path."""
277 path = self.__remove_dot_segments(self.path)
278 return uridecode(path, encoding, errors)
280 def getquery(self, default=None, encoding="utf-8", errors="strict"):
281 """Return the decoded query string, or `default` if the original URI
282 reference did not contain a query component.
284 """
285 query = self.query
286 if query is None:
287 return default
288 else:
289 return uridecode(query, encoding, errors)
291 def getquerydict(self, sep="&", encoding="utf-8", errors="strict"):
292 """Split the query component into individual `name=value` pairs
293 separated by `sep` and return a dictionary of query variables.
294 The dictionary keys are the unique query variable names and
295 the values are lists of values for each name.
297 """
298 dict = collections.defaultdict(list)
299 for name, value in self.getquerylist(sep, encoding, errors):
300 dict[name].append(value)
301 return dict
303 def getquerylist(self, sep="&", encoding="utf-8", errors="strict"):
304 """Split the query component into individual `name=value` pairs
305 separated by `sep`, and return a list of `(name, value)`
306 tuples.
308 """
309 if not self.query:
310 return []
311 elif isinstance(sep, type(self.query)):
312 qsl = self.query.split(sep)
313 elif isinstance(sep, bytes):
314 qsl = self.query.split(sep.decode("ascii"))
315 else:
316 qsl = self.query.split(sep.encode("ascii"))
317 items = []
318 for parts in [qs.partition(self.EQ) for qs in qsl if qs]:
319 name = uridecode(parts[0], encoding, errors)
320 if parts[1]:
321 value = uridecode(parts[2], encoding, errors)
322 else:
323 value = None
324 items.append((name, value))
325 return items
327 def getfragment(self, default=None, encoding="utf-8", errors="strict"):
328 """Return the decoded fragment identifier, or `default` if the
329 original URI reference did not contain a fragment component.
331 """
332 fragment = self.fragment
333 if fragment is None:
334 return default
335 else:
336 return uridecode(fragment, encoding, errors)
338 def isuri(self):
339 """Return :const:`True` if this is a URI."""
340 return self.scheme is not None
342 def isabsuri(self):
343 """Return :const:`True` if this is an absolute URI."""
344 return self.scheme is not None and self.fragment is None
346 def isnetpath(self):
347 """Return :const:`True` if this is a network-path reference."""
348 return self.scheme is None and self.authority is not None
350 def isabspath(self):
351 """Return :const:`True` if this is an absolute-path reference."""
352 return (
353 self.scheme is None
354 and self.authority is None
355 and self.path.startswith(self.SLASH)
356 )
358 def isrelpath(self):
359 """Return :const:`True` if this is a relative-path reference."""
360 return (
361 self.scheme is None
362 and self.authority is None
363 and not self.path.startswith(self.SLASH)
364 )
366 def issamedoc(self):
367 """Return :const:`True` if this is a same-document reference."""
368 return (
369 self.scheme is None
370 and self.authority is None
371 and not self.path
372 and self.query is None
373 )
375 def transform(self, ref, strict=False):
376 """Transform a URI reference relative to `self` into a
377 :class:`SplitResult` representing its target URI.
379 """
380 scheme, authority, path, query, fragment = self.RE.match(ref).groups()
382 # RFC 3986 5.2.2. Transform References
383 if scheme is not None and (strict or scheme != self.scheme):
384 path = self.__remove_dot_segments(path)
385 elif authority is not None:
386 scheme = self.scheme
387 path = self.__remove_dot_segments(path)
388 elif not path:
389 scheme = self.scheme
390 authority = self.authority
391 path = self.path
392 query = self.query if query is None else query
393 elif path.startswith(self.SLASH):
394 scheme = self.scheme
395 authority = self.authority
396 path = self.__remove_dot_segments(path)
397 else:
398 scheme = self.scheme
399 authority = self.authority
400 path = self.__remove_dot_segments(self.__merge(path))
401 return type(self)(scheme, authority, path, query, fragment)
403 def __merge(self, path):
404 # RFC 3986 5.2.3. Merge Paths
405 if self.authority is not None and not self.path:
406 return self.SLASH + path
407 else:
408 parts = self.path.rpartition(self.SLASH)
409 return parts[1].join((parts[0], path))
411 @classmethod
412 def __remove_dot_segments(cls, path):
413 # RFC 3986 5.2.4. Remove Dot Segments
414 pseg = []
415 for s in path.split(cls.SLASH):
416 if s == cls.DOT:
417 continue
418 elif s != cls.DOTDOT:
419 pseg.append(s)
420 elif len(pseg) == 1 and not pseg[0]:
421 continue
422 elif pseg and pseg[-1] != cls.DOTDOT:
423 pseg.pop()
424 else:
425 pseg.append(s)
426 # adjust for trailing '/.' or '/..'
427 if path.rpartition(cls.SLASH)[2] in (cls.DOT, cls.DOTDOT):
428 pseg.append(cls.EMPTY)
429 if path and len(pseg) == 1 and pseg[0] == cls.EMPTY:
430 pseg.insert(0, cls.DOT)
431 return cls.SLASH.join(pseg)
433 @classmethod
434 def __parse_ip_literal(cls, address):
435 # RFC 3986 3.2.2: In anticipation of future, as-yet-undefined
436 # IP literal address formats, an implementation may use an
437 # optional version flag to indicate such a format explicitly
438 # rather than rely on heuristic determination.
439 #
440 # IP-literal = "[" ( IPv6address / IPvFuture ) "]"
441 #
442 # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
443 #
444 # If a URI containing an IP-literal that starts with "v"
445 # (case-insensitive), indicating that the version flag is
446 # present, is dereferenced by an application that does not
447 # know the meaning of that version flag, then the application
448 # should return an appropriate error for "address mechanism
449 # not supported".
450 if isinstance(address, bytes):
451 address = address.decode("ascii")
452 if address.startswith("v"):
453 raise ValueError("address mechanism not supported")
454 return ipaddress.IPv6Address(address)
457class SplitResultBytes(SplitResult):
458 __slots__ = () # prevent creation of instance dictionary
460 # RFC 3986 Appendix B
461 RE = re.compile(
462 rb"""
463 (?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1)
464 (?://([^/?#]*))? # authority
465 ([^?#]*) # path
466 (?:\?([^#]*))? # query
467 (?:\#(.*))? # fragment
468 """,
469 flags=re.VERBOSE,
470 )
472 # RFC 3986 2.2 gen-delims
473 COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = (
474 b":",
475 b"/",
476 b"?",
477 b"#",
478 b"[",
479 b"]",
480 b"@",
481 )
483 # RFC 3986 3.3 dot-segments
484 DOT, DOTDOT = b".", b".."
486 EMPTY, EQ = b"", b"="
488 DIGITS = b"0123456789"
491class SplitResultString(SplitResult):
492 __slots__ = () # prevent creation of instance dictionary
494 # RFC 3986 Appendix B
495 RE = re.compile(
496 r"""
497 (?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1)
498 (?://([^/?#]*))? # authority
499 ([^?#]*) # path
500 (?:\?([^#]*))? # query
501 (?:\#(.*))? # fragment
502 """,
503 flags=re.VERBOSE,
504 )
506 # RFC 3986 2.2 gen-delims
507 COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = (
508 ":",
509 "/",
510 "?",
511 "#",
512 "[",
513 "]",
514 "@",
515 )
517 # RFC 3986 3.3 dot-segments
518 DOT, DOTDOT = ".", ".."
520 EMPTY, EQ = "", "="
522 DIGITS = "0123456789"
525def uridefrag(uristring):
526 """Remove an existing fragment component from a URI reference string."""
527 if isinstance(uristring, bytes):
528 parts = uristring.partition(b"#")
529 else:
530 parts = uristring.partition("#")
531 return DefragResult(parts[0], parts[2] if parts[1] else None)
534def urisplit(uristring):
535 """Split a well-formed URI reference string into a tuple with five
536 components corresponding to a URI's general structure::
538 <scheme>://<authority>/<path>?<query>#<fragment>
540 """
541 if isinstance(uristring, bytes):
542 result = SplitResultBytes
543 else:
544 result = SplitResultString
545 return result(*result.RE.match(uristring).groups())
548def uriunsplit(parts):
549 """Combine the elements of a five-item iterable into a URI reference's
550 string representation.
552 """
553 scheme, authority, path, query, fragment = parts
554 if isinstance(path, bytes):
555 result = SplitResultBytes
556 else:
557 result = SplitResultString
558 return result(scheme, authority, path, query, fragment).geturi()
561def urijoin(base, ref, strict=False):
562 """Convert a URI reference relative to a base URI to its target URI
563 string.
565 """
566 if isinstance(base, type(ref)):
567 return urisplit(base).transform(ref, strict).geturi()
568 elif isinstance(base, bytes):
569 return urisplit(base.decode()).transform(ref, strict).geturi()
570 else:
571 return urisplit(base).transform(ref.decode(), strict).geturi()
574def isuri(uristring):
575 """Return :const:`True` if `uristring` is a URI."""
576 return urisplit(uristring).isuri()
579def isabsuri(uristring):
580 """Return :const:`True` if `uristring` is an absolute URI."""
581 return urisplit(uristring).isabsuri()
584def isnetpath(uristring):
585 """Return :const:`True` if `uristring` is a network-path reference."""
586 return urisplit(uristring).isnetpath()
589def isabspath(uristring):
590 """Return :const:`True` if `uristring` is an absolute-path reference."""
591 return urisplit(uristring).isabspath()
594def isrelpath(uristring):
595 """Return :const:`True` if `uristring` is a relative-path reference."""
596 return urisplit(uristring).isrelpath()
599def issamedoc(uristring):
600 """Return :const:`True` if `uristring` is a same-document reference."""
601 return urisplit(uristring).issamedoc()
604# TBD: move compose to its own submodule?
606# RFC 3986 3.1: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
607_SCHEME_RE = re.compile(b"^[A-Za-z][A-Za-z0-9+.-]*$")
609# RFC 3986 3.2: authority = [ userinfo "@" ] host [ ":" port ]
610_AUTHORITY_RE_BYTES = re.compile(b"^(?:(.*)@)?(.*?)(?::([0-9]*))?$")
611_AUTHORITY_RE_STR = re.compile("^(?:(.*)@)?(.*?)(?::([0-9]*))?$")
613# safe component characters
614_SAFE_USERINFO = SUB_DELIMS + ":"
615_SAFE_HOST = SUB_DELIMS
616_SAFE_PATH = SUB_DELIMS + ":@/"
617_SAFE_QUERY = SUB_DELIMS + ":@/?"
618_SAFE_FRAGMENT = SUB_DELIMS + ":@/?"
621def _scheme(scheme):
622 if _SCHEME_RE.match(scheme):
623 return scheme.lower()
624 else:
625 raise ValueError("Invalid scheme component")
628def _authority(userinfo, host, port, encoding):
629 authority = []
631 if userinfo is not None:
632 authority.append(uriencode(userinfo, _SAFE_USERINFO, encoding))
633 authority.append(b"@")
635 if isinstance(host, ipaddress.IPv6Address):
636 authority.append(b"[" + host.compressed.encode() + b"]")
637 elif isinstance(host, ipaddress.IPv4Address):
638 authority.append(host.compressed.encode())
639 elif isinstance(host, bytes):
640 authority.append(_host(host))
641 elif host is not None:
642 authority.append(_host(host.encode("utf-8")))
644 if isinstance(port, numbers.Number):
645 authority.append(_port(str(port).encode()))
646 elif isinstance(port, bytes):
647 authority.append(_port(port))
648 elif port is not None:
649 authority.append(_port(port.encode()))
651 return b"".join(authority) if authority else None
654def _ip_literal(address):
655 if address.startswith("v"):
656 raise ValueError("Address mechanism not supported")
657 else:
658 return b"[" + ipaddress.IPv6Address(address).compressed.encode() + b"]"
661def _host(host):
662 # RFC 3986 3.2.3: Although host is case-insensitive, producers and
663 # normalizers should use lowercase for registered names and
664 # hexadecimal addresses for the sake of uniformity, while only
665 # using uppercase letters for percent-encodings.
666 if host.startswith(b"[") and host.endswith(b"]"):
667 return _ip_literal(host[1:-1].decode())
668 # check for IPv6 addresses as returned by SplitResult.gethost()
669 try:
670 return _ip_literal(host.decode("utf-8"))
671 except ValueError:
672 return uriencode(host.lower(), _SAFE_HOST, "utf-8")
675def _port(port):
676 # RFC 3986 3.2.3: URI producers and normalizers should omit the
677 # port component and its ":" delimiter if port is empty or if its
678 # value would be the same as that of the scheme's default.
679 if port.lstrip(b"0123456789"):
680 raise ValueError("Invalid port subcomponent")
681 elif port:
682 return b":" + port
683 else:
684 return b""
687def _querylist(items, sep, encoding):
688 terms = []
689 append = terms.append
690 safe = _SAFE_QUERY.replace(sep, "")
691 for key, value in items:
692 name = uriencode(key, safe, encoding)
693 if value is None:
694 append(name)
695 elif isinstance(value, (bytes, str)):
696 append(name + b"=" + uriencode(value, safe, encoding))
697 else:
698 append(name + b"=" + uriencode(str(value), safe, encoding))
699 return sep.encode("ascii").join(terms)
702def _querydict(mapping, sep, encoding):
703 items = []
704 for key, value in mapping.items():
705 if isinstance(value, (bytes, str)):
706 items.append((key, value))
707 elif isinstance(value, collections.abc.Iterable):
708 items.extend([(key, v) for v in value])
709 else:
710 items.append((key, value))
711 return _querylist(items, sep, encoding)
714def uricompose(
715 scheme=None,
716 authority=None,
717 path="",
718 query=None,
719 fragment=None,
720 userinfo=None,
721 host=None,
722 port=None,
723 querysep="&",
724 encoding="utf-8",
725):
726 """Compose a URI reference string from its individual components."""
728 # RFC 3986 3.1: Scheme names consist of a sequence of characters
729 # beginning with a letter and followed by any combination of
730 # letters, digits, plus ("+"), period ("."), or hyphen ("-").
731 # Although schemes are case-insensitive, the canonical form is
732 # lowercase and documents that specify schemes must do so with
733 # lowercase letters. An implementation should accept uppercase
734 # letters as equivalent to lowercase in scheme names (e.g., allow
735 # "HTTP" as well as "http") for the sake of robustness but should
736 # only produce lowercase scheme names for consistency.
737 if isinstance(scheme, bytes):
738 scheme = _scheme(scheme)
739 elif scheme is not None:
740 scheme = _scheme(scheme.encode())
742 # authority must be string type or three-item iterable
743 if authority is None:
744 authority = (None, None, None)
745 elif isinstance(authority, bytes):
746 authority = _AUTHORITY_RE_BYTES.match(authority).groups()
747 elif isinstance(authority, str):
748 authority = _AUTHORITY_RE_STR.match(authority).groups()
749 elif not isinstance(authority, collections.abc.Iterable):
750 raise TypeError("Invalid authority type")
751 elif len(authority) != 3:
752 raise ValueError("Invalid authority length")
753 authority = _authority(
754 userinfo if userinfo is not None else authority[0],
755 host if host is not None else authority[1],
756 port if port is not None else authority[2],
757 encoding,
758 )
760 # RFC 3986 3.3: If a URI contains an authority component, then the
761 # path component must either be empty or begin with a slash ("/")
762 # character. If a URI does not contain an authority component,
763 # then the path cannot begin with two slash characters ("//").
764 path = uriencode(path, _SAFE_PATH, encoding)
765 if authority is not None and path and not path.startswith(b"/"):
766 raise ValueError("Invalid path with authority component")
767 if authority is None and path.startswith(b"//"):
768 raise ValueError("Invalid path without authority component")
770 # RFC 3986 4.2: A path segment that contains a colon character
771 # (e.g., "this:that") cannot be used as the first segment of a
772 # relative-path reference, as it would be mistaken for a scheme
773 # name. Such a segment must be preceded by a dot-segment (e.g.,
774 # "./this:that") to make a relative-path reference.
775 if scheme is None and authority is None and not path.startswith(b"/"):
776 if b":" in path.partition(b"/")[0]:
777 path = b"./" + path
779 # RFC 3986 3.4: The characters slash ("/") and question mark ("?")
780 # may represent data within the query component. Beware that some
781 # older, erroneous implementations may not handle such data
782 # correctly when it is used as the base URI for relative
783 # references (Section 5.1), apparently because they fail to
784 # distinguish query data from path data when looking for
785 # hierarchical separators. However, as query components are often
786 # used to carry identifying information in the form of "key=value"
787 # pairs and one frequently used value is a reference to another
788 # URI, it is sometimes better for usability to avoid percent-
789 # encoding those characters.
790 if isinstance(query, (bytes, str)):
791 query = uriencode(query, _SAFE_QUERY, encoding)
792 elif isinstance(query, collections.abc.Mapping):
793 query = _querydict(query, querysep, encoding)
794 elif isinstance(query, collections.abc.Iterable):
795 query = _querylist(query, querysep, encoding)
796 elif query is not None:
797 raise TypeError("Invalid query type")
799 # RFC 3986 3.5: The characters slash ("/") and question mark ("?")
800 # are allowed to represent data within the fragment identifier.
801 # Beware that some older, erroneous implementations may not handle
802 # this data correctly when it is used as the base URI for relative
803 # references.
804 if fragment is not None:
805 fragment = uriencode(fragment, _SAFE_FRAGMENT, encoding)
807 # return URI reference as `str`
808 return uriunsplit((scheme, authority, path, query, fragment)).decode()