Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/uritools/__init__.py: 37%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""RFC 3986 compliant, scheme-agnostic replacement for `urllib.parse`.
3This module defines RFC 3986 compliant replacements for the most
4commonly used functions of the Python Standard Library
5:mod:`urllib.parse` module.
7"""
9import collections
10import collections.abc
11import ipaddress
12import numbers
13import re
14from string import hexdigits
17__all__ = (
18 "GEN_DELIMS",
19 "RESERVED",
20 "SUB_DELIMS",
21 "UNRESERVED",
22 "isabspath",
23 "isabsuri",
24 "isnetpath",
25 "isrelpath",
26 "issamedoc",
27 "isuri",
28 "uricompose",
29 "uridecode",
30 "uridefrag",
31 "uriencode",
32 "urijoin",
33 "urisplit",
34 "uriunsplit",
35)
37__version__ = "6.0.1"
40# RFC 3986 2.2. Reserved Characters
41#
42# reserved = gen-delims / sub-delims
43#
44# gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
45#
46# sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
47# / "*" / "+" / "," / ";" / "="
48#
49GEN_DELIMS = ":/?#[]@"
50SUB_DELIMS = "!$&'()*+,;="
51RESERVED = GEN_DELIMS + SUB_DELIMS
53# RFC 3986 2.3. Unreserved Characters
54#
55# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
56#
57UNRESERVED = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
59_unreserved = frozenset(UNRESERVED.encode())
61# RFC 3986 2.1: For consistency, URI producers and normalizers should
62# use uppercase hexadecimal digits for all percent-encodings.
63_encoded = {
64 b"": [
65 bytes([i]) if i in _unreserved else ("%%%02X" % i).encode() for i in range(256)
66 ]
67}
69_decoded = {
70 (a + b).encode(): bytes.fromhex(a + b) for a in hexdigits for b in hexdigits
71}
74def uriencode(uristring, safe="", encoding="utf-8", errors="strict"):
75 """Encode a URI string or string component."""
76 if not isinstance(uristring, bytes):
77 uristring = uristring.encode(encoding, errors)
78 if not isinstance(safe, bytes):
79 safe = safe.encode("ascii")
80 try:
81 encoded = _encoded[safe]
82 except KeyError:
83 encoded = _encoded[b""][:]
84 for i in safe:
85 encoded[i] = bytes([i])
86 _encoded[safe] = encoded
87 return b"".join(map(encoded.__getitem__, uristring))
90def uridecode(uristring, encoding="utf-8", errors="strict"):
91 """Decode a URI string or string component."""
92 if not isinstance(uristring, bytes):
93 uristring = uristring.encode(encoding or "ascii", errors)
94 parts = uristring.split(b"%")
95 result = [parts[0]]
96 append = result.append
97 decode = _decoded.get
98 for s in parts[1:]:
99 append(decode(s[:2], b"%" + s[:2]))
100 append(s[2:])
101 if encoding is not None:
102 return b"".join(result).decode(encoding, errors)
103 else:
104 return b"".join(result)
107class DefragResult(collections.namedtuple("DefragResult", "uri fragment")):
108 """Class to hold :func:`uridefrag` results."""
110 __slots__ = () # prevent creation of instance dictionary
112 def geturi(self):
113 """Return the recombined version of the original URI as a string."""
114 fragment = self.fragment
115 if fragment is None:
116 return self.uri
117 elif isinstance(fragment, bytes):
118 return self.uri + b"#" + fragment
119 else:
120 return self.uri + "#" + fragment
122 def getfragment(self, default=None, encoding="utf-8", errors="strict"):
123 """Return the decoded fragment identifier, or `default` if the
124 original URI did not contain a fragment component.
126 """
127 fragment = self.fragment
128 if fragment is not None:
129 return uridecode(fragment, encoding, errors)
130 else:
131 return default
134class SplitResult(
135 collections.namedtuple("SplitResult", "scheme authority path query fragment")
136):
137 """Base class to hold :func:`urisplit` results."""
139 __slots__ = () # prevent creation of instance dictionary
141 @property
142 def userinfo(self):
143 authority = self.authority
144 if authority is None:
145 return None
146 userinfo, present, _ = authority.rpartition(self.AT)
147 if present:
148 return userinfo
149 else:
150 return None
152 @property
153 def host(self):
154 authority = self.authority
155 if authority is None:
156 return None
157 _, _, hostinfo = authority.rpartition(self.AT)
158 host, _, port = hostinfo.rpartition(self.COLON)
159 if port.lstrip(self.DIGITS):
160 return hostinfo
161 else:
162 return host
164 @property
165 def port(self):
166 authority = self.authority
167 if authority is None:
168 return None
169 _, present, port = authority.rpartition(self.COLON)
170 if present and not port.lstrip(self.DIGITS):
171 return port
172 else:
173 return None
175 def geturi(self):
176 """Return the re-combined version of the original URI reference as a
177 string.
179 """
180 scheme, authority, path, query, fragment = self
182 # RFC 3986 5.3. Component Recomposition
183 result = []
184 if scheme is not None:
185 result.extend([scheme, self.COLON])
186 if authority is not None:
187 result.extend([self.SLASH, self.SLASH, authority])
188 result.append(path)
189 if query is not None:
190 result.extend([self.QUEST, query])
191 if fragment is not None:
192 result.extend([self.HASH, fragment])
193 return self.EMPTY.join(result)
195 def getscheme(self, default=None):
196 """Return the URI scheme in canonical (lowercase) form, or `default`
197 if the original URI reference did not contain a scheme component.
199 """
200 scheme = self.scheme
201 if scheme is None:
202 return default
203 elif isinstance(scheme, bytes):
204 return scheme.decode("ascii").lower()
205 else:
206 return scheme.lower()
208 def getauthority(self, default=None, encoding="utf-8", errors="strict"):
209 """Return the decoded userinfo, host and port subcomponents of the URI
210 authority as a three-item tuple.
212 """
213 # TBD: (userinfo, host, port) kwargs, default string?
214 if default is None:
215 default = (None, None, None)
216 elif not isinstance(default, collections.abc.Iterable):
217 raise TypeError("Invalid default type")
218 elif len(default) != 3:
219 raise ValueError("Invalid default length")
220 # TODO: this could be much more efficient by using a dedicated regex
221 return (
222 self.getuserinfo(default[0], encoding, errors),
223 self.gethost(default[1], errors),
224 self.getport(default[2]),
225 )
227 def getuserinfo(self, default=None, encoding="utf-8", errors="strict"):
228 """Return the decoded userinfo subcomponent of the URI authority, or
229 `default` if the original URI reference did not contain a
230 userinfo field.
232 """
233 userinfo = self.userinfo
234 if userinfo is None:
235 return default
236 else:
237 return uridecode(userinfo, encoding, errors)
239 def gethost(self, default=None, errors="strict"):
240 """Return the decoded host subcomponent of the URI authority as a
241 string or an :mod:`ipaddress` address object, or `default` if
242 the original URI reference did not contain a host.
244 """
245 host = self.host
246 if host is None or (not host and default is not None):
247 return default
248 elif host.startswith(self.LBRACKET) and host.endswith(self.RBRACKET):
249 return self.__parse_ip_literal(host[1:-1])
250 elif host.startswith(self.LBRACKET) or host.endswith(self.RBRACKET):
251 raise ValueError("Invalid host %r: mismatched brackets" % host)
252 # TODO: faster check for IPv4 address?
253 try:
254 if isinstance(host, bytes):
255 return ipaddress.IPv4Address(host.decode("ascii"))
256 else:
257 return ipaddress.IPv4Address(host)
258 except ValueError:
259 return uridecode(host, "utf-8", errors).lower()
261 def getport(self, default=None):
262 """Return the port subcomponent of the URI authority as an
263 :class:`int`, or `default` if the original URI reference did
264 not contain a port or if the port was empty.
266 """
267 port = self.port
268 if port:
269 return int(port)
270 else:
271 return default
273 def getpath(self, encoding="utf-8", errors="strict"):
274 """Return the normalized decoded URI path."""
275 path = self.__remove_dot_segments(self.path)
276 return uridecode(path, encoding, errors)
278 def getquery(self, default=None, encoding="utf-8", errors="strict"):
279 """Return the decoded query string, or `default` if the original URI
280 reference did not contain a query component.
282 """
283 query = self.query
284 if query is None:
285 return default
286 else:
287 return uridecode(query, encoding, errors)
289 def getquerydict(self, sep="&", encoding="utf-8", errors="strict"):
290 """Split the query component into individual `name=value` pairs
291 separated by `sep` and return a dictionary of query variables.
292 The dictionary keys are the unique query variable names and
293 the values are lists of values for each name.
295 """
296 dict = collections.defaultdict(list)
297 for name, value in self.getquerylist(sep, encoding, errors):
298 dict[name].append(value)
299 return dict
301 def getquerylist(self, sep="&", encoding="utf-8", errors="strict"):
302 """Split the query component into individual `name=value` pairs
303 separated by `sep`, and return a list of `(name, value)`
304 tuples.
306 """
307 if not self.query:
308 return []
309 elif isinstance(sep, type(self.query)):
310 qsl = self.query.split(sep)
311 elif isinstance(sep, bytes):
312 qsl = self.query.split(sep.decode("ascii"))
313 else:
314 qsl = self.query.split(sep.encode("ascii"))
315 items = []
316 for parts in [qs.partition(self.EQ) for qs in qsl if qs]:
317 name = uridecode(parts[0], encoding, errors)
318 if parts[1]:
319 value = uridecode(parts[2], encoding, errors)
320 else:
321 value = None
322 items.append((name, value))
323 return items
325 def getfragment(self, default=None, encoding="utf-8", errors="strict"):
326 """Return the decoded fragment identifier, or `default` if the
327 original URI reference did not contain a fragment component.
329 """
330 fragment = self.fragment
331 if fragment is None:
332 return default
333 else:
334 return uridecode(fragment, encoding, errors)
336 def isuri(self):
337 """Return :const:`True` if this is a URI."""
338 return self.scheme is not None
340 def isabsuri(self):
341 """Return :const:`True` if this is an absolute URI."""
342 return self.scheme is not None and self.fragment is None
344 def isnetpath(self):
345 """Return :const:`True` if this is a network-path reference."""
346 return self.scheme is None and self.authority is not None
348 def isabspath(self):
349 """Return :const:`True` if this is an absolute-path reference."""
350 return (
351 self.scheme is None
352 and self.authority is None
353 and self.path.startswith(self.SLASH)
354 )
356 def isrelpath(self):
357 """Return :const:`True` if this is a relative-path reference."""
358 return (
359 self.scheme is None
360 and self.authority is None
361 and not self.path.startswith(self.SLASH)
362 )
364 def issamedoc(self):
365 """Return :const:`True` if this is a same-document reference."""
366 return (
367 self.scheme is None
368 and self.authority is None
369 and not self.path
370 and self.query is None
371 )
373 def transform(self, ref, strict=False):
374 """Transform a URI reference relative to `self` into a
375 :class:`SplitResult` representing its target URI.
377 """
378 scheme, authority, path, query, fragment = self.RE.match(ref).groups()
380 # RFC 3986 5.2.2. Transform References
381 if scheme is not None and (strict or scheme != self.scheme):
382 path = self.__remove_dot_segments(path)
383 elif authority is not None:
384 scheme = self.scheme
385 path = self.__remove_dot_segments(path)
386 elif not path:
387 scheme = self.scheme
388 authority = self.authority
389 path = self.path
390 query = self.query if query is None else query
391 elif path.startswith(self.SLASH):
392 scheme = self.scheme
393 authority = self.authority
394 path = self.__remove_dot_segments(path)
395 else:
396 scheme = self.scheme
397 authority = self.authority
398 path = self.__remove_dot_segments(self.__merge(path))
399 return type(self)(scheme, authority, path, query, fragment)
401 def __merge(self, path):
402 # RFC 3986 5.2.3. Merge Paths
403 if self.authority is not None and not self.path:
404 return self.SLASH + path
405 else:
406 parts = self.path.rpartition(self.SLASH)
407 return parts[1].join((parts[0], path))
409 @classmethod
410 def __remove_dot_segments(cls, path):
411 # RFC 3986 5.2.4. Remove Dot Segments
412 pseg = []
413 for s in path.split(cls.SLASH):
414 if s == cls.DOT:
415 continue
416 elif s != cls.DOTDOT:
417 pseg.append(s)
418 elif len(pseg) == 1 and not pseg[0]:
419 continue
420 elif pseg and pseg[-1] != cls.DOTDOT:
421 pseg.pop()
422 else:
423 pseg.append(s)
424 # adjust for trailing '/.' or '/..'
425 if path.rpartition(cls.SLASH)[2] in (cls.DOT, cls.DOTDOT):
426 pseg.append(cls.EMPTY)
427 if path and len(pseg) == 1 and pseg[0] == cls.EMPTY:
428 pseg.insert(0, cls.DOT)
429 return cls.SLASH.join(pseg)
431 @classmethod
432 def __parse_ip_literal(cls, address):
433 # RFC 3986 3.2.2: In anticipation of future, as-yet-undefined
434 # IP literal address formats, an implementation may use an
435 # optional version flag to indicate such a format explicitly
436 # rather than rely on heuristic determination.
437 #
438 # IP-literal = "[" ( IPv6address / IPvFuture ) "]"
439 #
440 # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" )
441 #
442 # If a URI containing an IP-literal that starts with "v"
443 # (case-insensitive), indicating that the version flag is
444 # present, is dereferenced by an application that does not
445 # know the meaning of that version flag, then the application
446 # should return an appropriate error for "address mechanism
447 # not supported".
448 if isinstance(address, bytes):
449 address = address.decode("ascii")
450 if address.startswith("v"):
451 raise ValueError("address mechanism not supported")
452 return ipaddress.IPv6Address(address)
455class SplitResultBytes(SplitResult):
456 __slots__ = () # prevent creation of instance dictionary
458 # RFC 3986 Appendix B
459 RE = re.compile(
460 rb"""
461 (?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1)
462 (?://([^/?#]*))? # authority
463 ([^?#]*) # path
464 (?:\?([^#]*))? # query
465 (?:\#(.*))? # fragment
466 """,
467 flags=re.VERBOSE,
468 )
470 # RFC 3986 2.2 gen-delims
471 COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = (
472 b":",
473 b"/",
474 b"?",
475 b"#",
476 b"[",
477 b"]",
478 b"@",
479 )
481 # RFC 3986 3.3 dot-segments
482 DOT, DOTDOT = b".", b".."
484 EMPTY, EQ = b"", b"="
486 DIGITS = b"0123456789"
489class SplitResultString(SplitResult):
490 __slots__ = () # prevent creation of instance dictionary
492 # RFC 3986 Appendix B
493 RE = re.compile(
494 r"""
495 (?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1)
496 (?://([^/?#]*))? # authority
497 ([^?#]*) # path
498 (?:\?([^#]*))? # query
499 (?:\#(.*))? # fragment
500 """,
501 flags=re.VERBOSE,
502 )
504 # RFC 3986 2.2 gen-delims
505 COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = (
506 ":",
507 "/",
508 "?",
509 "#",
510 "[",
511 "]",
512 "@",
513 )
515 # RFC 3986 3.3 dot-segments
516 DOT, DOTDOT = ".", ".."
518 EMPTY, EQ = "", "="
520 DIGITS = "0123456789"
523def uridefrag(uristring):
524 """Remove an existing fragment component from a URI reference string."""
525 if isinstance(uristring, bytes):
526 parts = uristring.partition(b"#")
527 else:
528 parts = uristring.partition("#")
529 return DefragResult(parts[0], parts[2] if parts[1] else None)
532def urisplit(uristring):
533 """Split a well-formed URI reference string into a tuple with five
534 components corresponding to a URI's general structure::
536 <scheme>://<authority>/<path>?<query>#<fragment>
538 """
539 if isinstance(uristring, bytes):
540 result = SplitResultBytes
541 else:
542 result = SplitResultString
543 return result(*result.RE.match(uristring).groups())
546def uriunsplit(parts):
547 """Combine the elements of a five-item iterable into a URI reference's
548 string representation.
550 """
551 scheme, authority, path, query, fragment = parts
552 if isinstance(path, bytes):
553 result = SplitResultBytes
554 else:
555 result = SplitResultString
556 return result(scheme, authority, path, query, fragment).geturi()
559def urijoin(base, ref, strict=False):
560 """Convert a URI reference relative to a base URI to its target URI
561 string.
563 """
564 if isinstance(base, type(ref)):
565 return urisplit(base).transform(ref, strict).geturi()
566 elif isinstance(base, bytes):
567 return urisplit(base.decode()).transform(ref, strict).geturi()
568 else:
569 return urisplit(base).transform(ref.decode(), strict).geturi()
572def isuri(uristring):
573 """Return :const:`True` if `uristring` is a URI."""
574 return urisplit(uristring).isuri()
577def isabsuri(uristring):
578 """Return :const:`True` if `uristring` is an absolute URI."""
579 return urisplit(uristring).isabsuri()
582def isnetpath(uristring):
583 """Return :const:`True` if `uristring` is a network-path reference."""
584 return urisplit(uristring).isnetpath()
587def isabspath(uristring):
588 """Return :const:`True` if `uristring` is an absolute-path reference."""
589 return urisplit(uristring).isabspath()
592def isrelpath(uristring):
593 """Return :const:`True` if `uristring` is a relative-path reference."""
594 return urisplit(uristring).isrelpath()
597def issamedoc(uristring):
598 """Return :const:`True` if `uristring` is a same-document reference."""
599 return urisplit(uristring).issamedoc()
602# TBD: move compose to its own submodule?
604# RFC 3986 3.1: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
605_SCHEME_RE = re.compile(b"^[A-Za-z][A-Za-z0-9+.-]*$")
607# RFC 3986 3.2: authority = [ userinfo "@" ] host [ ":" port ]
608_AUTHORITY_RE_BYTES = re.compile(b"^(?:(.*)@)?(.*?)(?::([0-9]*))?$")
609_AUTHORITY_RE_STR = re.compile("^(?:(.*)@)?(.*?)(?::([0-9]*))?$")
611# safe component characters
612_SAFE_USERINFO = SUB_DELIMS + ":"
613_SAFE_HOST = SUB_DELIMS
614_SAFE_PATH = SUB_DELIMS + ":@/"
615_SAFE_QUERY = SUB_DELIMS + ":@/?"
616_SAFE_FRAGMENT = SUB_DELIMS + ":@/?"
619def _scheme(scheme):
620 if _SCHEME_RE.match(scheme):
621 return scheme.lower()
622 else:
623 raise ValueError("Invalid scheme component")
626def _authority(userinfo, host, port, encoding):
627 authority = []
629 if userinfo is not None:
630 authority.append(uriencode(userinfo, _SAFE_USERINFO, encoding))
631 authority.append(b"@")
633 if isinstance(host, ipaddress.IPv6Address):
634 authority.append(b"[" + host.compressed.encode() + b"]")
635 elif isinstance(host, ipaddress.IPv4Address):
636 authority.append(host.compressed.encode())
637 elif isinstance(host, bytes):
638 authority.append(_host(host))
639 elif host is not None:
640 authority.append(_host(host.encode("utf-8")))
642 if isinstance(port, numbers.Number):
643 authority.append(_port(str(port).encode()))
644 elif isinstance(port, bytes):
645 authority.append(_port(port))
646 elif port is not None:
647 authority.append(_port(port.encode()))
649 return b"".join(authority) if authority else None
652def _ip_literal(address):
653 if address.startswith("v"):
654 raise ValueError("Address mechanism not supported")
655 else:
656 return b"[" + ipaddress.IPv6Address(address).compressed.encode() + b"]"
659def _host(host):
660 # RFC 3986 3.2.3: Although host is case-insensitive, producers and
661 # normalizers should use lowercase for registered names and
662 # hexadecimal addresses for the sake of uniformity, while only
663 # using uppercase letters for percent-encodings.
664 if host.startswith(b"[") and host.endswith(b"]"):
665 return _ip_literal(host[1:-1].decode())
666 # check for IPv6 addresses as returned by SplitResult.gethost()
667 try:
668 return _ip_literal(host.decode("utf-8"))
669 except ValueError:
670 return uriencode(host.lower(), _SAFE_HOST, "utf-8")
673def _port(port):
674 # RFC 3986 3.2.3: URI producers and normalizers should omit the
675 # port component and its ":" delimiter if port is empty or if its
676 # value would be the same as that of the scheme's default.
677 if port.lstrip(b"0123456789"):
678 raise ValueError("Invalid port subcomponent")
679 elif port:
680 return b":" + port
681 else:
682 return b""
685def _querylist(items, sep, encoding):
686 terms = []
687 append = terms.append
688 safe = _SAFE_QUERY.replace(sep, "")
689 for key, value in items:
690 name = uriencode(key, safe, encoding)
691 if value is None:
692 append(name)
693 elif isinstance(value, (bytes, str)):
694 append(name + b"=" + uriencode(value, safe, encoding))
695 else:
696 append(name + b"=" + uriencode(str(value), safe, encoding))
697 return sep.encode("ascii").join(terms)
700def _querydict(mapping, sep, encoding):
701 items = []
702 for key, value in mapping.items():
703 if isinstance(value, (bytes, str)):
704 items.append((key, value))
705 elif isinstance(value, collections.abc.Iterable):
706 items.extend([(key, v) for v in value])
707 else:
708 items.append((key, value))
709 return _querylist(items, sep, encoding)
712def uricompose(
713 scheme=None,
714 authority=None,
715 path="",
716 query=None,
717 fragment=None,
718 userinfo=None,
719 host=None,
720 port=None,
721 querysep="&",
722 encoding="utf-8",
723):
724 """Compose a URI reference string from its individual components."""
726 # RFC 3986 3.1: Scheme names consist of a sequence of characters
727 # beginning with a letter and followed by any combination of
728 # letters, digits, plus ("+"), period ("."), or hyphen ("-").
729 # Although schemes are case-insensitive, the canonical form is
730 # lowercase and documents that specify schemes must do so with
731 # lowercase letters. An implementation should accept uppercase
732 # letters as equivalent to lowercase in scheme names (e.g., allow
733 # "HTTP" as well as "http") for the sake of robustness but should
734 # only produce lowercase scheme names for consistency.
735 if isinstance(scheme, bytes):
736 scheme = _scheme(scheme)
737 elif scheme is not None:
738 scheme = _scheme(scheme.encode())
740 # authority must be string type or three-item iterable
741 if authority is None:
742 authority = (None, None, None)
743 elif isinstance(authority, bytes):
744 authority = _AUTHORITY_RE_BYTES.match(authority).groups()
745 elif isinstance(authority, str):
746 authority = _AUTHORITY_RE_STR.match(authority).groups()
747 elif not isinstance(authority, collections.abc.Iterable):
748 raise TypeError("Invalid authority type")
749 elif len(authority) != 3:
750 raise ValueError("Invalid authority length")
751 authority = _authority(
752 userinfo if userinfo is not None else authority[0],
753 host if host is not None else authority[1],
754 port if port is not None else authority[2],
755 encoding,
756 )
758 # RFC 3986 3.3: If a URI contains an authority component, then the
759 # path component must either be empty or begin with a slash ("/")
760 # character. If a URI does not contain an authority component,
761 # then the path cannot begin with two slash characters ("//").
762 path = uriencode(path, _SAFE_PATH, encoding)
763 if authority is not None and path and not path.startswith(b"/"):
764 raise ValueError("Invalid path with authority component")
765 if authority is None and path.startswith(b"//"):
766 raise ValueError("Invalid path without authority component")
768 # RFC 3986 4.2: A path segment that contains a colon character
769 # (e.g., "this:that") cannot be used as the first segment of a
770 # relative-path reference, as it would be mistaken for a scheme
771 # name. Such a segment must be preceded by a dot-segment (e.g.,
772 # "./this:that") to make a relative-path reference.
773 if scheme is None and authority is None and not path.startswith(b"/"):
774 if b":" in path.partition(b"/")[0]:
775 path = b"./" + path
777 # RFC 3986 3.4: The characters slash ("/") and question mark ("?")
778 # may represent data within the query component. Beware that some
779 # older, erroneous implementations may not handle such data
780 # correctly when it is used as the base URI for relative
781 # references (Section 5.1), apparently because they fail to
782 # distinguish query data from path data when looking for
783 # hierarchical separators. However, as query components are often
784 # used to carry identifying information in the form of "key=value"
785 # pairs and one frequently used value is a reference to another
786 # URI, it is sometimes better for usability to avoid percent-
787 # encoding those characters.
788 if isinstance(query, (bytes, str)):
789 query = uriencode(query, _SAFE_QUERY, encoding)
790 elif isinstance(query, collections.abc.Mapping):
791 query = _querydict(query, querysep, encoding)
792 elif isinstance(query, collections.abc.Iterable):
793 query = _querylist(query, querysep, encoding)
794 elif query is not None:
795 raise TypeError("Invalid query type")
797 # RFC 3986 3.5: The characters slash ("/") and question mark ("?")
798 # are allowed to represent data within the fragment identifier.
799 # Beware that some older, erroneous implementations may not handle
800 # this data correctly when it is used as the base URI for relative
801 # references.
802 if fragment is not None:
803 fragment = uriencode(fragment, _SAFE_FRAGMENT, encoding)
805 # return URI reference as `str`
806 return uriunsplit((scheme, authority, path, query, fragment)).decode()