Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/uritools/__init__.py: 37%
386 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 07:11 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 07:11 +0000
1"""RFC 3986 compliant, scheme-agnostic replacement for `urllib.parse`.
3This module defines RFC 3986 compliant replacements for the most
4commonly used functions of the Python Standard Library
5:mod:`urllib.parse` module.
7"""
9import collections
10import collections.abc
11import ipaddress
12import numbers
13import re
14from string import hexdigits
17__all__ = (
18 "GEN_DELIMS",
19 "RESERVED",
20 "SUB_DELIMS",
21 "UNRESERVED",
22 "isabspath",
23 "isabsuri",
24 "isnetpath",
25 "isrelpath",
26 "issamedoc",
27 "isuri",
28 "uricompose",
29 "uridecode",
30 "uridefrag",
31 "uriencode",
32 "urijoin",
33 "urisplit",
34 "uriunsplit",
35)
37__version__ = "4.0.1"
40# RFC 3986 2.2. Reserved Characters
41#
42# reserved = gen-delims / sub-delims
43#
44# gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
45#
46# sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
47# / "*" / "+" / "," / ";" / "="
48#
49GEN_DELIMS = ":/?#[]@"
50SUB_DELIMS = "!$&'()*+,;="
51RESERVED = GEN_DELIMS + SUB_DELIMS
53# RFC 3986 2.3. Unreserved Characters
54#
55# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
56#
57UNRESERVED = (
58 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789" "-._~"
59)
61_unreserved = frozenset(UNRESERVED.encode())
63# RFC 3986 2.1: For consistency, URI producers and normalizers should
64# use uppercase hexadecimal digits for all percent-encodings.
65_encoded = {
66 b"": [
67 bytes([i]) if i in _unreserved else ("%%%02X" % i).encode() for i in range(256)
68 ]
69}
71_decoded = {
72 (a + b).encode(): bytes.fromhex(a + b) for a in hexdigits for b in hexdigits
73}
76def uriencode(uristring, safe="", encoding="utf-8", errors="strict"):
77 """Encode a URI string or string component."""
78 if not isinstance(uristring, bytes):
79 uristring = uristring.encode(encoding, errors)
80 if not isinstance(safe, bytes):
81 safe = safe.encode("ascii")
82 try:
83 encoded = _encoded[safe]
84 except KeyError:
85 encoded = _encoded[b""][:]
86 for i in safe:
87 encoded[i] = bytes([i])
88 _encoded[safe] = encoded
89 return b"".join(map(encoded.__getitem__, uristring))
92def uridecode(uristring, encoding="utf-8", errors="strict"):
93 """Decode a URI string or string component."""
94 if not isinstance(uristring, bytes):
95 uristring = uristring.encode(encoding or "ascii", errors)
96 parts = uristring.split(b"%")
97 result = [parts[0]]
98 append = result.append
99 decode = _decoded.get
100 for s in parts[1:]:
101 append(decode(s[:2], b"%" + s[:2]))
102 append(s[2:])
103 if encoding is not None:
104 return b"".join(result).decode(encoding, errors)
105 else:
106 return b"".join(result)
109class DefragResult(collections.namedtuple("DefragResult", "uri fragment")):
110 """Class to hold :func:`uridefrag` results."""
112 __slots__ = () # prevent creation of instance dictionary
114 def geturi(self):
115 """Return the recombined version of the original URI as a string."""
116 fragment = self.fragment
117 if fragment is None:
118 return self.uri
119 elif isinstance(fragment, bytes):
120 return self.uri + b"#" + fragment
121 else:
122 return self.uri + "#" + fragment
124 def getfragment(self, default=None, encoding="utf-8", errors="strict"):
125 """Return the decoded fragment identifier, or `default` if the
126 original URI did not contain a fragment component.
128 """
129 fragment = self.fragment
130 if fragment is not None:
131 return uridecode(fragment, encoding, errors)
132 else:
133 return default
136class SplitResult(
137 collections.namedtuple("SplitResult", "scheme authority path query fragment")
138):
139 """Base class to hold :func:`urisplit` results."""
141 __slots__ = () # prevent creation of instance dictionary
143 @property
144 def userinfo(self):
145 authority = self.authority
146 if authority is None:
147 return None
148 userinfo, present, _ = authority.rpartition(self.AT)
149 if present:
150 return userinfo
151 else:
152 return None
154 @property
155 def host(self):
156 authority = self.authority
157 if authority is None:
158 return None
159 _, _, hostinfo = authority.rpartition(self.AT)
160 host, _, port = hostinfo.rpartition(self.COLON)
161 if port.lstrip(self.DIGITS):
162 return hostinfo
163 else:
164 return host
166 @property
167 def port(self):
168 authority = self.authority
169 if authority is None:
170 return None
171 _, present, port = authority.rpartition(self.COLON)
172 if present and not port.lstrip(self.DIGITS):
173 return port
174 else:
175 return None
177 def geturi(self):
178 """Return the re-combined version of the original URI reference as a
179 string.
181 """
182 scheme, authority, path, query, fragment = self
184 # RFC 3986 5.3. Component Recomposition
185 result = []
186 if scheme is not None:
187 result.extend([scheme, self.COLON])
188 if authority is not None:
189 result.extend([self.SLASH, self.SLASH, authority])
190 result.append(path)
191 if query is not None:
192 result.extend([self.QUEST, query])
193 if fragment is not None:
194 result.extend([self.HASH, fragment])
195 return self.EMPTY.join(result)
197 def getscheme(self, default=None):
198 """Return the URI scheme in canonical (lowercase) form, or `default`
199 if the original URI reference did not contain a scheme component.
201 """
202 scheme = self.scheme
203 if scheme is None:
204 return default
205 elif isinstance(scheme, bytes):
206 return scheme.decode("ascii").lower()
207 else:
208 return scheme.lower()
210 def getauthority(self, default=None, encoding="utf-8", errors="strict"):
211 """Return the decoded userinfo, host and port subcomponents of the URI
212 authority as a three-item tuple.
214 """
215 # TBD: (userinfo, host, port) kwargs, default string?
216 if default is None:
217 default = (None, None, None)
218 elif not isinstance(default, collections.abc.Iterable):
219 raise TypeError("Invalid default type")
220 elif len(default) != 3:
221 raise ValueError("Invalid default length")
222 # TODO: this could be much more efficient by using a dedicated regex
223 return (
224 self.getuserinfo(default[0], encoding, errors),
225 self.gethost(default[1], errors),
226 self.getport(default[2]),
227 )
229 def getuserinfo(self, default=None, encoding="utf-8", errors="strict"):
230 """Return the decoded userinfo subcomponent of the URI authority, or
231 `default` if the original URI reference did not contain a
232 userinfo field.
234 """
235 userinfo = self.userinfo
236 if userinfo is None:
237 return default
238 else:
239 return uridecode(userinfo, encoding, errors)
241 def gethost(self, default=None, errors="strict"):
242 """Return the decoded host subcomponent of the URI authority as a
243 string or an :mod:`ipaddress` address object, or `default` if
244 the original URI reference did not contain a host.
246 """
247 host = self.host
248 if host is None or (not host and default is not None):
249 return default
250 elif host.startswith(self.LBRACKET) and host.endswith(self.RBRACKET):
251 return self.__parse_ip_literal(host[1:-1])
252 elif host.startswith(self.LBRACKET) or host.endswith(self.RBRACKET):
253 raise ValueError("Invalid host %r" % host)
254 # TODO: faster check for IPv4 address?
255 try:
256 if isinstance(host, bytes):
257 return ipaddress.IPv4Address(host.decode("ascii"))
258 else:
259 return ipaddress.IPv4Address(host)
260 except ValueError:
261 return uridecode(host, "utf-8", errors).lower()
263 def getport(self, default=None):
264 """Return the port subcomponent of the URI authority as an
265 :class:`int`, or `default` if the original URI reference did
266 not contain a port or if the port was empty.
268 """
269 port = self.port
270 if port:
271 return int(port)
272 else:
273 return default
275 def getpath(self, encoding="utf-8", errors="strict"):
276 """Return the normalized decoded URI path."""
277 path = self.__remove_dot_segments(self.path)
278 return uridecode(path, encoding, errors)
280 def getquery(self, default=None, encoding="utf-8", errors="strict"):
281 """Return the decoded query string, or `default` if the original URI
282 reference did not contain a query component.
284 """
285 query = self.query
286 if query is None:
287 return default
288 else:
289 return uridecode(query, encoding, errors)
291 def getquerydict(self, sep="&", encoding="utf-8", errors="strict"):
292 """Split the query component into individual `name=value` pairs
293 separated by `sep` and return a dictionary of query variables.
294 The dictionary keys are the unique query variable names and
295 the values are lists of values for each name.
297 """
298 dict = collections.defaultdict(list)
299 for name, value in self.getquerylist(sep, encoding, errors):
300 dict[name].append(value)
301 return dict
303 def getquerylist(self, sep="&", encoding="utf-8", errors="strict"):
304 """Split the query component into individual `name=value` pairs
305 separated by `sep`, and return a list of `(name, value)`
306 tuples.
308 """
309 if not self.query:
310 return []
311 elif isinstance(sep, type(self.query)):
312 qsl = self.query.split(sep)
313 elif isinstance(sep, bytes):
314 qsl = self.query.split(sep.decode("ascii"))
315 else:
316 qsl = self.query.split(sep.encode("ascii"))
317 items = []
318 for parts in [qs.partition(self.EQ) for qs in qsl if qs]:
319 name = uridecode(parts[0], encoding, errors)
320 if parts[1]:
321 value = uridecode(parts[2], encoding, errors)
322 else:
323 value = None
324 items.append((name, value))
325 return items
327 def getfragment(self, default=None, encoding="utf-8", errors="strict"):
328 """Return the decoded fragment identifier, or `default` if the
329 original URI reference did not contain a fragment component.
331 """
332 fragment = self.fragment
333 if fragment is None:
334 return default
335 else:
336 return uridecode(fragment, encoding, errors)
338 def isuri(self):
339 """Return :const:`True` if this is a URI."""
340 return self.scheme is not None
342 def isabsuri(self):
343 """Return :const:`True` if this is an absolute URI."""
344 return self.scheme is not None and self.fragment is None
346 def isnetpath(self):
347 """Return :const:`True` if this is a network-path reference."""
348 return self.scheme is None and self.authority is not None
350 def isabspath(self):
351 """Return :const:`True` if this is an absolute-path reference."""
352 return (
353 self.scheme is None
354 and self.authority is None
355 and self.path.startswith(self.SLASH)
356 )
358 def isrelpath(self):
359 """Return :const:`True` if this is a relative-path reference."""
360 return (
361 self.scheme is None
362 and self.authority is None
363 and not self.path.startswith(self.SLASH)
364 )
366 def issamedoc(self):
367 """Return :const:`True` if this is a same-document reference."""
368 return (
369 self.scheme is None
370 and self.authority is None
371 and not self.path
372 and self.query is None
373 )
375 def transform(self, ref, strict=False):
376 """Transform a URI reference relative to `self` into a
377 :class:`SplitResult` representing its target URI.
379 """
380 scheme, authority, path, query, fragment = self.RE.match(ref).groups()
382 # RFC 3986 5.2.2. Transform References
383 if scheme is not None and (strict or scheme != self.scheme):
384 path = self.__remove_dot_segments(path)
385 elif authority is not None:
386 scheme = self.scheme
387 path = self.__remove_dot_segments(path)
388 elif not path:
389 scheme = self.scheme
390 authority = self.authority
391 path = self.path
392 query = self.query if query is None else query
393 elif path.startswith(self.SLASH):
394 scheme = self.scheme
395 authority = self.authority
396 path = self.__remove_dot_segments(path)
397 else:
398 scheme = self.scheme
399 authority = self.authority
400 path = self.__remove_dot_segments(self.__merge(path))
401 return type(self)(scheme, authority, path, query, fragment)
403 def __merge(self, path):
404 # RFC 3986 5.2.3. Merge Paths
405 if self.authority is not None and not self.path:
406 return self.SLASH + path
407 else:
408 parts = self.path.rpartition(self.SLASH)
409 return parts[1].join((parts[0], path))
411 @classmethod
412 def __remove_dot_segments(cls, path):
413 # RFC 3986 5.2.4. Remove Dot Segments
414 pseg = []
415 for s in path.split(cls.SLASH):
416 if s == cls.DOT:
417 continue
418 elif s != cls.DOTDOT:
419 pseg.append(s)
420 elif len(pseg) == 1 and not pseg[0]:
421 continue
422 elif pseg and pseg[-1] != cls.DOTDOT:
423 pseg.pop()
424 else:
425 pseg.append(s)
426 # adjust for trailing '/.' or '/..'
427 if path.rpartition(cls.SLASH)[2] in (cls.DOT, cls.DOTDOT):
428 pseg.append(cls.EMPTY)
429 if path and len(pseg) == 1 and pseg[0] == cls.EMPTY:
430 pseg.insert(0, cls.DOT)
431 return cls.SLASH.join(pseg)
433 @classmethod
434 def __parse_ip_literal(cls, address):
435 # RFC 3986 3.2.2: In anticipation of future, as-yet-undefined
436 # IP literal address formats, an implementation may use an
437 # optional version flag to indicate such a format explicitly
438 # rather than rely on heuristic determination.
439 #
440 # IP-literal = "[" ( IPv6address / IPvFuture ) "]"
441 #
442 # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
443 #
444 # If a URI containing an IP-literal that starts with "v"
445 # (case-insensitive), indicating that the version flag is
446 # present, is dereferenced by an application that does not
447 # know the meaning of that version flag, then the application
448 # should return an appropriate error for "address mechanism
449 # not supported".
450 if isinstance(address, bytes):
451 address = address.decode("ascii")
452 if address.startswith("v"):
453 raise ValueError("address mechanism not supported")
454 return ipaddress.IPv6Address(address)
457class SplitResultBytes(SplitResult):
459 __slots__ = () # prevent creation of instance dictionary
461 # RFC 3986 Appendix B
462 RE = re.compile(
463 rb"""
464 (?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1)
465 (?://([^/?#]*))? # authority
466 ([^?#]*) # path
467 (?:\?([^#]*))? # query
468 (?:\#(.*))? # fragment
469 """,
470 flags=re.VERBOSE,
471 )
473 # RFC 3986 2.2 gen-delims
474 COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = (
475 b":",
476 b"/",
477 b"?",
478 b"#",
479 b"[",
480 b"]",
481 b"@",
482 )
484 # RFC 3986 3.3 dot-segments
485 DOT, DOTDOT = b".", b".."
487 EMPTY, EQ = b"", b"="
489 DIGITS = b"0123456789"
492class SplitResultString(SplitResult):
494 __slots__ = () # prevent creation of instance dictionary
496 # RFC 3986 Appendix B
497 RE = re.compile(
498 r"""
499 (?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1)
500 (?://([^/?#]*))? # authority
501 ([^?#]*) # path
502 (?:\?([^#]*))? # query
503 (?:\#(.*))? # fragment
504 """,
505 flags=re.VERBOSE,
506 )
508 # RFC 3986 2.2 gen-delims
509 COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = (
510 ":",
511 "/",
512 "?",
513 "#",
514 "[",
515 "]",
516 "@",
517 )
519 # RFC 3986 3.3 dot-segments
520 DOT, DOTDOT = ".", ".."
522 EMPTY, EQ = "", "="
524 DIGITS = "0123456789"
527def uridefrag(uristring):
528 """Remove an existing fragment component from a URI reference string."""
529 if isinstance(uristring, bytes):
530 parts = uristring.partition(b"#")
531 else:
532 parts = uristring.partition("#")
533 return DefragResult(parts[0], parts[2] if parts[1] else None)
536def urisplit(uristring):
537 """Split a well-formed URI reference string into a tuple with five
538 components corresponding to a URI's general structure::
540 <scheme>://<authority>/<path>?<query>#<fragment>
542 """
543 if isinstance(uristring, bytes):
544 result = SplitResultBytes
545 else:
546 result = SplitResultString
547 return result(*result.RE.match(uristring).groups())
550def uriunsplit(parts):
551 """Combine the elements of a five-item iterable into a URI reference's
552 string representation.
554 """
555 scheme, authority, path, query, fragment = parts
556 if isinstance(path, bytes):
557 result = SplitResultBytes
558 else:
559 result = SplitResultString
560 return result(scheme, authority, path, query, fragment).geturi()
563def urijoin(base, ref, strict=False):
564 """Convert a URI reference relative to a base URI to its target URI
565 string.
567 """
568 if isinstance(base, type(ref)):
569 return urisplit(base).transform(ref, strict).geturi()
570 elif isinstance(base, bytes):
571 return urisplit(base.decode()).transform(ref, strict).geturi()
572 else:
573 return urisplit(base).transform(ref.decode(), strict).geturi()
576def isuri(uristring):
577 """Return :const:`True` if `uristring` is a URI."""
578 return urisplit(uristring).isuri()
581def isabsuri(uristring):
582 """Return :const:`True` if `uristring` is an absolute URI."""
583 return urisplit(uristring).isabsuri()
586def isnetpath(uristring):
587 """Return :const:`True` if `uristring` is a network-path reference."""
588 return urisplit(uristring).isnetpath()
591def isabspath(uristring):
592 """Return :const:`True` if `uristring` is an absolute-path reference."""
593 return urisplit(uristring).isabspath()
596def isrelpath(uristring):
597 """Return :const:`True` if `uristring` is a relative-path reference."""
598 return urisplit(uristring).isrelpath()
601def issamedoc(uristring):
602 """Return :const:`True` if `uristring` is a same-document reference."""
603 return urisplit(uristring).issamedoc()
606# TBD: move compose to its own submodule?
608# RFC 3986 3.1: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
609_SCHEME_RE = re.compile(b"^[A-Za-z][A-Za-z0-9+.-]*$")
611# RFC 3986 3.2: authority = [ userinfo "@" ] host [ ":" port ]
612_AUTHORITY_RE_BYTES = re.compile(b"^(?:(.*)@)?(.*?)(?::([0-9]*))?$")
613_AUTHORITY_RE_STR = re.compile("^(?:(.*)@)?(.*?)(?::([0-9]*))?$")
615# safe component characters
616_SAFE_USERINFO = SUB_DELIMS + ":"
617_SAFE_HOST = SUB_DELIMS
618_SAFE_PATH = SUB_DELIMS + ":@/"
619_SAFE_QUERY = SUB_DELIMS + ":@/?"
620_SAFE_FRAGMENT = SUB_DELIMS + ":@/?"
623def _scheme(scheme):
624 if _SCHEME_RE.match(scheme):
625 return scheme.lower()
626 else:
627 raise ValueError("Invalid scheme component")
630def _authority(userinfo, host, port, encoding):
631 authority = []
633 if userinfo is not None:
634 authority.append(uriencode(userinfo, _SAFE_USERINFO, encoding))
635 authority.append(b"@")
637 if isinstance(host, ipaddress.IPv6Address):
638 authority.append(b"[" + host.compressed.encode() + b"]")
639 elif isinstance(host, ipaddress.IPv4Address):
640 authority.append(host.compressed.encode())
641 elif isinstance(host, bytes):
642 authority.append(_host(host))
643 elif host is not None:
644 authority.append(_host(host.encode("utf-8")))
646 if isinstance(port, numbers.Number):
647 authority.append(_port(str(port).encode()))
648 elif isinstance(port, bytes):
649 authority.append(_port(port))
650 elif port is not None:
651 authority.append(_port(port.encode()))
653 return b"".join(authority) if authority else None
656def _ip_literal(address):
657 if address.startswith("v"):
658 raise ValueError("Address mechanism not supported")
659 else:
660 return b"[" + ipaddress.IPv6Address(address).compressed.encode() + b"]"
663def _host(host):
664 # RFC 3986 3.2.3: Although host is case-insensitive, producers and
665 # normalizers should use lowercase for registered names and
666 # hexadecimal addresses for the sake of uniformity, while only
667 # using uppercase letters for percent-encodings.
668 if host.startswith(b"[") and host.endswith(b"]"):
669 return _ip_literal(host[1:-1].decode())
670 # check for IPv6 addresses as returned by SplitResult.gethost()
671 try:
672 return _ip_literal(host.decode("utf-8"))
673 except ValueError:
674 return uriencode(host.lower(), _SAFE_HOST, "utf-8")
677def _port(port):
678 # RFC 3986 3.2.3: URI producers and normalizers should omit the
679 # port component and its ":" delimiter if port is empty or if its
680 # value would be the same as that of the scheme's default.
681 if port.lstrip(b"0123456789"):
682 raise ValueError("Invalid port subcomponent")
683 elif port:
684 return b":" + port
685 else:
686 return b""
689def _querylist(items, sep, encoding):
690 terms = []
691 append = terms.append
692 safe = _SAFE_QUERY.replace(sep, "")
693 for key, value in items:
694 name = uriencode(key, safe, encoding)
695 if value is None:
696 append(name)
697 elif isinstance(value, (bytes, str)):
698 append(name + b"=" + uriencode(value, safe, encoding))
699 else:
700 append(name + b"=" + uriencode(str(value), safe, encoding))
701 return sep.encode("ascii").join(terms)
704def _querydict(mapping, sep, encoding):
705 items = []
706 for key, value in mapping.items():
707 if isinstance(value, (bytes, str)):
708 items.append((key, value))
709 elif isinstance(value, collections.abc.Iterable):
710 items.extend([(key, v) for v in value])
711 else:
712 items.append((key, value))
713 return _querylist(items, sep, encoding)
716def uricompose(
717 scheme=None,
718 authority=None,
719 path="",
720 query=None,
721 fragment=None,
722 userinfo=None,
723 host=None,
724 port=None,
725 querysep="&",
726 encoding="utf-8",
727):
728 """Compose a URI reference string from its individual components."""
730 # RFC 3986 3.1: Scheme names consist of a sequence of characters
731 # beginning with a letter and followed by any combination of
732 # letters, digits, plus ("+"), period ("."), or hyphen ("-").
733 # Although schemes are case-insensitive, the canonical form is
734 # lowercase and documents that specify schemes must do so with
735 # lowercase letters. An implementation should accept uppercase
736 # letters as equivalent to lowercase in scheme names (e.g., allow
737 # "HTTP" as well as "http") for the sake of robustness but should
738 # only produce lowercase scheme names for consistency.
739 if isinstance(scheme, bytes):
740 scheme = _scheme(scheme)
741 elif scheme is not None:
742 scheme = _scheme(scheme.encode())
744 # authority must be string type or three-item iterable
745 if authority is None:
746 authority = (None, None, None)
747 elif isinstance(authority, bytes):
748 authority = _AUTHORITY_RE_BYTES.match(authority).groups()
749 elif isinstance(authority, str):
750 authority = _AUTHORITY_RE_STR.match(authority).groups()
751 elif not isinstance(authority, collections.abc.Iterable):
752 raise TypeError("Invalid authority type")
753 elif len(authority) != 3:
754 raise ValueError("Invalid authority length")
755 authority = _authority(
756 userinfo if userinfo is not None else authority[0],
757 host if host is not None else authority[1],
758 port if port is not None else authority[2],
759 encoding,
760 )
762 # RFC 3986 3.3: If a URI contains an authority component, then the
763 # path component must either be empty or begin with a slash ("/")
764 # character. If a URI does not contain an authority component,
765 # then the path cannot begin with two slash characters ("//").
766 path = uriencode(path, _SAFE_PATH, encoding)
767 if authority is not None and path and not path.startswith(b"/"):
768 raise ValueError("Invalid path with authority component")
769 if authority is None and path.startswith(b"//"):
770 raise ValueError("Invalid path without authority component")
772 # RFC 3986 4.2: A path segment that contains a colon character
773 # (e.g., "this:that") cannot be used as the first segment of a
774 # relative-path reference, as it would be mistaken for a scheme
775 # name. Such a segment must be preceded by a dot-segment (e.g.,
776 # "./this:that") to make a relative-path reference.
777 if scheme is None and authority is None and not path.startswith(b"/"):
778 if b":" in path.partition(b"/")[0]:
779 path = b"./" + path
781 # RFC 3986 3.4: The characters slash ("/") and question mark ("?")
782 # may represent data within the query component. Beware that some
783 # older, erroneous implementations may not handle such data
784 # correctly when it is used as the base URI for relative
785 # references (Section 5.1), apparently because they fail to
786 # distinguish query data from path data when looking for
787 # hierarchical separators. However, as query components are often
788 # used to carry identifying information in the form of "key=value"
789 # pairs and one frequently used value is a reference to another
790 # URI, it is sometimes better for usability to avoid percent-
791 # encoding those characters.
792 if isinstance(query, (bytes, str)):
793 query = uriencode(query, _SAFE_QUERY, encoding)
794 elif isinstance(query, collections.abc.Mapping):
795 query = _querydict(query, querysep, encoding)
796 elif isinstance(query, collections.abc.Iterable):
797 query = _querylist(query, querysep, encoding)
798 elif query is not None:
799 raise TypeError("Invalid query type")
801 # RFC 3986 3.5: The characters slash ("/") and question mark ("?")
802 # are allowed to represent data within the fragment identifier.
803 # Beware that some older, erroneous implementations may not handle
804 # this data correctly when it is used as the base URI for relative
805 # references.
806 if fragment is not None:
807 fragment = uriencode(fragment, _SAFE_FRAGMENT, encoding)
809 # return URI reference as `str`
810 return uriunsplit((scheme, authority, path, query, fragment)).decode()