Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/uritools/__init__.py: 37%

386 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:08 +0000

1"""RFC 3986 compliant, scheme-agnostic replacement for `urllib.parse`. 

2 

3This module defines RFC 3986 compliant replacements for the most 

4commonly used functions of the Python Standard Library 

5:mod:`urllib.parse` module. 

6 

7""" 

8 

9import collections 

10import collections.abc 

11import ipaddress 

12import numbers 

13import re 

14from string import hexdigits 

15 

16 

17__all__ = ( 

18 "GEN_DELIMS", 

19 "RESERVED", 

20 "SUB_DELIMS", 

21 "UNRESERVED", 

22 "isabspath", 

23 "isabsuri", 

24 "isnetpath", 

25 "isrelpath", 

26 "issamedoc", 

27 "isuri", 

28 "uricompose", 

29 "uridecode", 

30 "uridefrag", 

31 "uriencode", 

32 "urijoin", 

33 "urisplit", 

34 "uriunsplit", 

35) 

36 

37__version__ = "4.0.2" 

38 

39 

40# RFC 3986 2.2. Reserved Characters 

41# 

42# reserved = gen-delims / sub-delims 

43# 

44# gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 

45# 

46# sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 

47# / "*" / "+" / "," / ";" / "=" 

48# 

49GEN_DELIMS = ":/?#[]@" 

50SUB_DELIMS = "!$&'()*+,;=" 

51RESERVED = GEN_DELIMS + SUB_DELIMS 

52 

53# RFC 3986 2.3. Unreserved Characters 

54# 

55# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 

56# 

57UNRESERVED = ( 

58 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789" "-._~" 

59) 

60 

61_unreserved = frozenset(UNRESERVED.encode()) 

62 

63# RFC 3986 2.1: For consistency, URI producers and normalizers should 

64# use uppercase hexadecimal digits for all percent-encodings. 

65_encoded = { 

66 b"": [ 

67 bytes([i]) if i in _unreserved else ("%%%02X" % i).encode() for i in range(256) 

68 ] 

69} 

70 

71_decoded = { 

72 (a + b).encode(): bytes.fromhex(a + b) for a in hexdigits for b in hexdigits 

73} 

74 

75 

76def uriencode(uristring, safe="", encoding="utf-8", errors="strict"): 

77 """Encode a URI string or string component.""" 

78 if not isinstance(uristring, bytes): 

79 uristring = uristring.encode(encoding, errors) 

80 if not isinstance(safe, bytes): 

81 safe = safe.encode("ascii") 

82 try: 

83 encoded = _encoded[safe] 

84 except KeyError: 

85 encoded = _encoded[b""][:] 

86 for i in safe: 

87 encoded[i] = bytes([i]) 

88 _encoded[safe] = encoded 

89 return b"".join(map(encoded.__getitem__, uristring)) 

90 

91 

92def uridecode(uristring, encoding="utf-8", errors="strict"): 

93 """Decode a URI string or string component.""" 

94 if not isinstance(uristring, bytes): 

95 uristring = uristring.encode(encoding or "ascii", errors) 

96 parts = uristring.split(b"%") 

97 result = [parts[0]] 

98 append = result.append 

99 decode = _decoded.get 

100 for s in parts[1:]: 

101 append(decode(s[:2], b"%" + s[:2])) 

102 append(s[2:]) 

103 if encoding is not None: 

104 return b"".join(result).decode(encoding, errors) 

105 else: 

106 return b"".join(result) 

107 

108 

109class DefragResult(collections.namedtuple("DefragResult", "uri fragment")): 

110 """Class to hold :func:`uridefrag` results.""" 

111 

112 __slots__ = () # prevent creation of instance dictionary 

113 

114 def geturi(self): 

115 """Return the recombined version of the original URI as a string.""" 

116 fragment = self.fragment 

117 if fragment is None: 

118 return self.uri 

119 elif isinstance(fragment, bytes): 

120 return self.uri + b"#" + fragment 

121 else: 

122 return self.uri + "#" + fragment 

123 

124 def getfragment(self, default=None, encoding="utf-8", errors="strict"): 

125 """Return the decoded fragment identifier, or `default` if the 

126 original URI did not contain a fragment component. 

127 

128 """ 

129 fragment = self.fragment 

130 if fragment is not None: 

131 return uridecode(fragment, encoding, errors) 

132 else: 

133 return default 

134 

135 

136class SplitResult( 

137 collections.namedtuple("SplitResult", "scheme authority path query fragment") 

138): 

139 """Base class to hold :func:`urisplit` results.""" 

140 

141 __slots__ = () # prevent creation of instance dictionary 

142 

143 @property 

144 def userinfo(self): 

145 authority = self.authority 

146 if authority is None: 

147 return None 

148 userinfo, present, _ = authority.rpartition(self.AT) 

149 if present: 

150 return userinfo 

151 else: 

152 return None 

153 

154 @property 

155 def host(self): 

156 authority = self.authority 

157 if authority is None: 

158 return None 

159 _, _, hostinfo = authority.rpartition(self.AT) 

160 host, _, port = hostinfo.rpartition(self.COLON) 

161 if port.lstrip(self.DIGITS): 

162 return hostinfo 

163 else: 

164 return host 

165 

166 @property 

167 def port(self): 

168 authority = self.authority 

169 if authority is None: 

170 return None 

171 _, present, port = authority.rpartition(self.COLON) 

172 if present and not port.lstrip(self.DIGITS): 

173 return port 

174 else: 

175 return None 

176 

177 def geturi(self): 

178 """Return the re-combined version of the original URI reference as a 

179 string. 

180 

181 """ 

182 scheme, authority, path, query, fragment = self 

183 

184 # RFC 3986 5.3. Component Recomposition 

185 result = [] 

186 if scheme is not None: 

187 result.extend([scheme, self.COLON]) 

188 if authority is not None: 

189 result.extend([self.SLASH, self.SLASH, authority]) 

190 result.append(path) 

191 if query is not None: 

192 result.extend([self.QUEST, query]) 

193 if fragment is not None: 

194 result.extend([self.HASH, fragment]) 

195 return self.EMPTY.join(result) 

196 

197 def getscheme(self, default=None): 

198 """Return the URI scheme in canonical (lowercase) form, or `default` 

199 if the original URI reference did not contain a scheme component. 

200 

201 """ 

202 scheme = self.scheme 

203 if scheme is None: 

204 return default 

205 elif isinstance(scheme, bytes): 

206 return scheme.decode("ascii").lower() 

207 else: 

208 return scheme.lower() 

209 

210 def getauthority(self, default=None, encoding="utf-8", errors="strict"): 

211 """Return the decoded userinfo, host and port subcomponents of the URI 

212 authority as a three-item tuple. 

213 

214 """ 

215 # TBD: (userinfo, host, port) kwargs, default string? 

216 if default is None: 

217 default = (None, None, None) 

218 elif not isinstance(default, collections.abc.Iterable): 

219 raise TypeError("Invalid default type") 

220 elif len(default) != 3: 

221 raise ValueError("Invalid default length") 

222 # TODO: this could be much more efficient by using a dedicated regex 

223 return ( 

224 self.getuserinfo(default[0], encoding, errors), 

225 self.gethost(default[1], errors), 

226 self.getport(default[2]), 

227 ) 

228 

229 def getuserinfo(self, default=None, encoding="utf-8", errors="strict"): 

230 """Return the decoded userinfo subcomponent of the URI authority, or 

231 `default` if the original URI reference did not contain a 

232 userinfo field. 

233 

234 """ 

235 userinfo = self.userinfo 

236 if userinfo is None: 

237 return default 

238 else: 

239 return uridecode(userinfo, encoding, errors) 

240 

241 def gethost(self, default=None, errors="strict"): 

242 """Return the decoded host subcomponent of the URI authority as a 

243 string or an :mod:`ipaddress` address object, or `default` if 

244 the original URI reference did not contain a host. 

245 

246 """ 

247 host = self.host 

248 if host is None or (not host and default is not None): 

249 return default 

250 elif host.startswith(self.LBRACKET) and host.endswith(self.RBRACKET): 

251 return self.__parse_ip_literal(host[1:-1]) 

252 elif host.startswith(self.LBRACKET) or host.endswith(self.RBRACKET): 

253 raise ValueError("Invalid host %r" % host) 

254 # TODO: faster check for IPv4 address? 

255 try: 

256 if isinstance(host, bytes): 

257 return ipaddress.IPv4Address(host.decode("ascii")) 

258 else: 

259 return ipaddress.IPv4Address(host) 

260 except ValueError: 

261 return uridecode(host, "utf-8", errors).lower() 

262 

263 def getport(self, default=None): 

264 """Return the port subcomponent of the URI authority as an 

265 :class:`int`, or `default` if the original URI reference did 

266 not contain a port or if the port was empty. 

267 

268 """ 

269 port = self.port 

270 if port: 

271 return int(port) 

272 else: 

273 return default 

274 

275 def getpath(self, encoding="utf-8", errors="strict"): 

276 """Return the normalized decoded URI path.""" 

277 path = self.__remove_dot_segments(self.path) 

278 return uridecode(path, encoding, errors) 

279 

280 def getquery(self, default=None, encoding="utf-8", errors="strict"): 

281 """Return the decoded query string, or `default` if the original URI 

282 reference did not contain a query component. 

283 

284 """ 

285 query = self.query 

286 if query is None: 

287 return default 

288 else: 

289 return uridecode(query, encoding, errors) 

290 

291 def getquerydict(self, sep="&", encoding="utf-8", errors="strict"): 

292 """Split the query component into individual `name=value` pairs 

293 separated by `sep` and return a dictionary of query variables. 

294 The dictionary keys are the unique query variable names and 

295 the values are lists of values for each name. 

296 

297 """ 

298 dict = collections.defaultdict(list) 

299 for name, value in self.getquerylist(sep, encoding, errors): 

300 dict[name].append(value) 

301 return dict 

302 

303 def getquerylist(self, sep="&", encoding="utf-8", errors="strict"): 

304 """Split the query component into individual `name=value` pairs 

305 separated by `sep`, and return a list of `(name, value)` 

306 tuples. 

307 

308 """ 

309 if not self.query: 

310 return [] 

311 elif isinstance(sep, type(self.query)): 

312 qsl = self.query.split(sep) 

313 elif isinstance(sep, bytes): 

314 qsl = self.query.split(sep.decode("ascii")) 

315 else: 

316 qsl = self.query.split(sep.encode("ascii")) 

317 items = [] 

318 for parts in [qs.partition(self.EQ) for qs in qsl if qs]: 

319 name = uridecode(parts[0], encoding, errors) 

320 if parts[1]: 

321 value = uridecode(parts[2], encoding, errors) 

322 else: 

323 value = None 

324 items.append((name, value)) 

325 return items 

326 

327 def getfragment(self, default=None, encoding="utf-8", errors="strict"): 

328 """Return the decoded fragment identifier, or `default` if the 

329 original URI reference did not contain a fragment component. 

330 

331 """ 

332 fragment = self.fragment 

333 if fragment is None: 

334 return default 

335 else: 

336 return uridecode(fragment, encoding, errors) 

337 

338 def isuri(self): 

339 """Return :const:`True` if this is a URI.""" 

340 return self.scheme is not None 

341 

342 def isabsuri(self): 

343 """Return :const:`True` if this is an absolute URI.""" 

344 return self.scheme is not None and self.fragment is None 

345 

346 def isnetpath(self): 

347 """Return :const:`True` if this is a network-path reference.""" 

348 return self.scheme is None and self.authority is not None 

349 

350 def isabspath(self): 

351 """Return :const:`True` if this is an absolute-path reference.""" 

352 return ( 

353 self.scheme is None 

354 and self.authority is None 

355 and self.path.startswith(self.SLASH) 

356 ) 

357 

358 def isrelpath(self): 

359 """Return :const:`True` if this is a relative-path reference.""" 

360 return ( 

361 self.scheme is None 

362 and self.authority is None 

363 and not self.path.startswith(self.SLASH) 

364 ) 

365 

366 def issamedoc(self): 

367 """Return :const:`True` if this is a same-document reference.""" 

368 return ( 

369 self.scheme is None 

370 and self.authority is None 

371 and not self.path 

372 and self.query is None 

373 ) 

374 

375 def transform(self, ref, strict=False): 

376 """Transform a URI reference relative to `self` into a 

377 :class:`SplitResult` representing its target URI. 

378 

379 """ 

380 scheme, authority, path, query, fragment = self.RE.match(ref).groups() 

381 

382 # RFC 3986 5.2.2. Transform References 

383 if scheme is not None and (strict or scheme != self.scheme): 

384 path = self.__remove_dot_segments(path) 

385 elif authority is not None: 

386 scheme = self.scheme 

387 path = self.__remove_dot_segments(path) 

388 elif not path: 

389 scheme = self.scheme 

390 authority = self.authority 

391 path = self.path 

392 query = self.query if query is None else query 

393 elif path.startswith(self.SLASH): 

394 scheme = self.scheme 

395 authority = self.authority 

396 path = self.__remove_dot_segments(path) 

397 else: 

398 scheme = self.scheme 

399 authority = self.authority 

400 path = self.__remove_dot_segments(self.__merge(path)) 

401 return type(self)(scheme, authority, path, query, fragment) 

402 

403 def __merge(self, path): 

404 # RFC 3986 5.2.3. Merge Paths 

405 if self.authority is not None and not self.path: 

406 return self.SLASH + path 

407 else: 

408 parts = self.path.rpartition(self.SLASH) 

409 return parts[1].join((parts[0], path)) 

410 

411 @classmethod 

412 def __remove_dot_segments(cls, path): 

413 # RFC 3986 5.2.4. Remove Dot Segments 

414 pseg = [] 

415 for s in path.split(cls.SLASH): 

416 if s == cls.DOT: 

417 continue 

418 elif s != cls.DOTDOT: 

419 pseg.append(s) 

420 elif len(pseg) == 1 and not pseg[0]: 

421 continue 

422 elif pseg and pseg[-1] != cls.DOTDOT: 

423 pseg.pop() 

424 else: 

425 pseg.append(s) 

426 # adjust for trailing '/.' or '/..' 

427 if path.rpartition(cls.SLASH)[2] in (cls.DOT, cls.DOTDOT): 

428 pseg.append(cls.EMPTY) 

429 if path and len(pseg) == 1 and pseg[0] == cls.EMPTY: 

430 pseg.insert(0, cls.DOT) 

431 return cls.SLASH.join(pseg) 

432 

433 @classmethod 

434 def __parse_ip_literal(cls, address): 

435 # RFC 3986 3.2.2: In anticipation of future, as-yet-undefined 

436 # IP literal address formats, an implementation may use an 

437 # optional version flag to indicate such a format explicitly 

438 # rather than rely on heuristic determination. 

439 # 

440 # IP-literal = "[" ( IPv6address / IPvFuture ) "]" 

441 # 

442 # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) 

443 # 

444 # If a URI containing an IP-literal that starts with "v" 

445 # (case-insensitive), indicating that the version flag is 

446 # present, is dereferenced by an application that does not 

447 # know the meaning of that version flag, then the application 

448 # should return an appropriate error for "address mechanism 

449 # not supported". 

450 if isinstance(address, bytes): 

451 address = address.decode("ascii") 

452 if address.startswith("v"): 

453 raise ValueError("address mechanism not supported") 

454 return ipaddress.IPv6Address(address) 

455 

456 

457class SplitResultBytes(SplitResult): 

458 __slots__ = () # prevent creation of instance dictionary 

459 

460 # RFC 3986 Appendix B 

461 RE = re.compile( 

462 rb""" 

463 (?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1) 

464 (?://([^/?#]*))? # authority 

465 ([^?#]*) # path 

466 (?:\?([^#]*))? # query 

467 (?:\#(.*))? # fragment 

468 """, 

469 flags=re.VERBOSE, 

470 ) 

471 

472 # RFC 3986 2.2 gen-delims 

473 COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = ( 

474 b":", 

475 b"/", 

476 b"?", 

477 b"#", 

478 b"[", 

479 b"]", 

480 b"@", 

481 ) 

482 

483 # RFC 3986 3.3 dot-segments 

484 DOT, DOTDOT = b".", b".." 

485 

486 EMPTY, EQ = b"", b"=" 

487 

488 DIGITS = b"0123456789" 

489 

490 

491class SplitResultString(SplitResult): 

492 __slots__ = () # prevent creation of instance dictionary 

493 

494 # RFC 3986 Appendix B 

495 RE = re.compile( 

496 r""" 

497 (?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1) 

498 (?://([^/?#]*))? # authority 

499 ([^?#]*) # path 

500 (?:\?([^#]*))? # query 

501 (?:\#(.*))? # fragment 

502 """, 

503 flags=re.VERBOSE, 

504 ) 

505 

506 # RFC 3986 2.2 gen-delims 

507 COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = ( 

508 ":", 

509 "/", 

510 "?", 

511 "#", 

512 "[", 

513 "]", 

514 "@", 

515 ) 

516 

517 # RFC 3986 3.3 dot-segments 

518 DOT, DOTDOT = ".", ".." 

519 

520 EMPTY, EQ = "", "=" 

521 

522 DIGITS = "0123456789" 

523 

524 

525def uridefrag(uristring): 

526 """Remove an existing fragment component from a URI reference string.""" 

527 if isinstance(uristring, bytes): 

528 parts = uristring.partition(b"#") 

529 else: 

530 parts = uristring.partition("#") 

531 return DefragResult(parts[0], parts[2] if parts[1] else None) 

532 

533 

534def urisplit(uristring): 

535 """Split a well-formed URI reference string into a tuple with five 

536 components corresponding to a URI's general structure:: 

537 

538 <scheme>://<authority>/<path>?<query>#<fragment> 

539 

540 """ 

541 if isinstance(uristring, bytes): 

542 result = SplitResultBytes 

543 else: 

544 result = SplitResultString 

545 return result(*result.RE.match(uristring).groups()) 

546 

547 

548def uriunsplit(parts): 

549 """Combine the elements of a five-item iterable into a URI reference's 

550 string representation. 

551 

552 """ 

553 scheme, authority, path, query, fragment = parts 

554 if isinstance(path, bytes): 

555 result = SplitResultBytes 

556 else: 

557 result = SplitResultString 

558 return result(scheme, authority, path, query, fragment).geturi() 

559 

560 

561def urijoin(base, ref, strict=False): 

562 """Convert a URI reference relative to a base URI to its target URI 

563 string. 

564 

565 """ 

566 if isinstance(base, type(ref)): 

567 return urisplit(base).transform(ref, strict).geturi() 

568 elif isinstance(base, bytes): 

569 return urisplit(base.decode()).transform(ref, strict).geturi() 

570 else: 

571 return urisplit(base).transform(ref.decode(), strict).geturi() 

572 

573 

574def isuri(uristring): 

575 """Return :const:`True` if `uristring` is a URI.""" 

576 return urisplit(uristring).isuri() 

577 

578 

579def isabsuri(uristring): 

580 """Return :const:`True` if `uristring` is an absolute URI.""" 

581 return urisplit(uristring).isabsuri() 

582 

583 

584def isnetpath(uristring): 

585 """Return :const:`True` if `uristring` is a network-path reference.""" 

586 return urisplit(uristring).isnetpath() 

587 

588 

589def isabspath(uristring): 

590 """Return :const:`True` if `uristring` is an absolute-path reference.""" 

591 return urisplit(uristring).isabspath() 

592 

593 

594def isrelpath(uristring): 

595 """Return :const:`True` if `uristring` is a relative-path reference.""" 

596 return urisplit(uristring).isrelpath() 

597 

598 

599def issamedoc(uristring): 

600 """Return :const:`True` if `uristring` is a same-document reference.""" 

601 return urisplit(uristring).issamedoc() 

602 

603 

604# TBD: move compose to its own submodule? 

605 

606# RFC 3986 3.1: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 

607_SCHEME_RE = re.compile(b"^[A-Za-z][A-Za-z0-9+.-]*$") 

608 

609# RFC 3986 3.2: authority = [ userinfo "@" ] host [ ":" port ] 

610_AUTHORITY_RE_BYTES = re.compile(b"^(?:(.*)@)?(.*?)(?::([0-9]*))?$") 

611_AUTHORITY_RE_STR = re.compile("^(?:(.*)@)?(.*?)(?::([0-9]*))?$") 

612 

613# safe component characters 

614_SAFE_USERINFO = SUB_DELIMS + ":" 

615_SAFE_HOST = SUB_DELIMS 

616_SAFE_PATH = SUB_DELIMS + ":@/" 

617_SAFE_QUERY = SUB_DELIMS + ":@/?" 

618_SAFE_FRAGMENT = SUB_DELIMS + ":@/?" 

619 

620 

621def _scheme(scheme): 

622 if _SCHEME_RE.match(scheme): 

623 return scheme.lower() 

624 else: 

625 raise ValueError("Invalid scheme component") 

626 

627 

628def _authority(userinfo, host, port, encoding): 

629 authority = [] 

630 

631 if userinfo is not None: 

632 authority.append(uriencode(userinfo, _SAFE_USERINFO, encoding)) 

633 authority.append(b"@") 

634 

635 if isinstance(host, ipaddress.IPv6Address): 

636 authority.append(b"[" + host.compressed.encode() + b"]") 

637 elif isinstance(host, ipaddress.IPv4Address): 

638 authority.append(host.compressed.encode()) 

639 elif isinstance(host, bytes): 

640 authority.append(_host(host)) 

641 elif host is not None: 

642 authority.append(_host(host.encode("utf-8"))) 

643 

644 if isinstance(port, numbers.Number): 

645 authority.append(_port(str(port).encode())) 

646 elif isinstance(port, bytes): 

647 authority.append(_port(port)) 

648 elif port is not None: 

649 authority.append(_port(port.encode())) 

650 

651 return b"".join(authority) if authority else None 

652 

653 

654def _ip_literal(address): 

655 if address.startswith("v"): 

656 raise ValueError("Address mechanism not supported") 

657 else: 

658 return b"[" + ipaddress.IPv6Address(address).compressed.encode() + b"]" 

659 

660 

661def _host(host): 

662 # RFC 3986 3.2.3: Although host is case-insensitive, producers and 

663 # normalizers should use lowercase for registered names and 

664 # hexadecimal addresses for the sake of uniformity, while only 

665 # using uppercase letters for percent-encodings. 

666 if host.startswith(b"[") and host.endswith(b"]"): 

667 return _ip_literal(host[1:-1].decode()) 

668 # check for IPv6 addresses as returned by SplitResult.gethost() 

669 try: 

670 return _ip_literal(host.decode("utf-8")) 

671 except ValueError: 

672 return uriencode(host.lower(), _SAFE_HOST, "utf-8") 

673 

674 

675def _port(port): 

676 # RFC 3986 3.2.3: URI producers and normalizers should omit the 

677 # port component and its ":" delimiter if port is empty or if its 

678 # value would be the same as that of the scheme's default. 

679 if port.lstrip(b"0123456789"): 

680 raise ValueError("Invalid port subcomponent") 

681 elif port: 

682 return b":" + port 

683 else: 

684 return b"" 

685 

686 

687def _querylist(items, sep, encoding): 

688 terms = [] 

689 append = terms.append 

690 safe = _SAFE_QUERY.replace(sep, "") 

691 for key, value in items: 

692 name = uriencode(key, safe, encoding) 

693 if value is None: 

694 append(name) 

695 elif isinstance(value, (bytes, str)): 

696 append(name + b"=" + uriencode(value, safe, encoding)) 

697 else: 

698 append(name + b"=" + uriencode(str(value), safe, encoding)) 

699 return sep.encode("ascii").join(terms) 

700 

701 

702def _querydict(mapping, sep, encoding): 

703 items = [] 

704 for key, value in mapping.items(): 

705 if isinstance(value, (bytes, str)): 

706 items.append((key, value)) 

707 elif isinstance(value, collections.abc.Iterable): 

708 items.extend([(key, v) for v in value]) 

709 else: 

710 items.append((key, value)) 

711 return _querylist(items, sep, encoding) 

712 

713 

714def uricompose( 

715 scheme=None, 

716 authority=None, 

717 path="", 

718 query=None, 

719 fragment=None, 

720 userinfo=None, 

721 host=None, 

722 port=None, 

723 querysep="&", 

724 encoding="utf-8", 

725): 

726 """Compose a URI reference string from its individual components.""" 

727 

728 # RFC 3986 3.1: Scheme names consist of a sequence of characters 

729 # beginning with a letter and followed by any combination of 

730 # letters, digits, plus ("+"), period ("."), or hyphen ("-"). 

731 # Although schemes are case-insensitive, the canonical form is 

732 # lowercase and documents that specify schemes must do so with 

733 # lowercase letters. An implementation should accept uppercase 

734 # letters as equivalent to lowercase in scheme names (e.g., allow 

735 # "HTTP" as well as "http") for the sake of robustness but should 

736 # only produce lowercase scheme names for consistency. 

737 if isinstance(scheme, bytes): 

738 scheme = _scheme(scheme) 

739 elif scheme is not None: 

740 scheme = _scheme(scheme.encode()) 

741 

742 # authority must be string type or three-item iterable 

743 if authority is None: 

744 authority = (None, None, None) 

745 elif isinstance(authority, bytes): 

746 authority = _AUTHORITY_RE_BYTES.match(authority).groups() 

747 elif isinstance(authority, str): 

748 authority = _AUTHORITY_RE_STR.match(authority).groups() 

749 elif not isinstance(authority, collections.abc.Iterable): 

750 raise TypeError("Invalid authority type") 

751 elif len(authority) != 3: 

752 raise ValueError("Invalid authority length") 

753 authority = _authority( 

754 userinfo if userinfo is not None else authority[0], 

755 host if host is not None else authority[1], 

756 port if port is not None else authority[2], 

757 encoding, 

758 ) 

759 

760 # RFC 3986 3.3: If a URI contains an authority component, then the 

761 # path component must either be empty or begin with a slash ("/") 

762 # character. If a URI does not contain an authority component, 

763 # then the path cannot begin with two slash characters ("//"). 

764 path = uriencode(path, _SAFE_PATH, encoding) 

765 if authority is not None and path and not path.startswith(b"/"): 

766 raise ValueError("Invalid path with authority component") 

767 if authority is None and path.startswith(b"//"): 

768 raise ValueError("Invalid path without authority component") 

769 

770 # RFC 3986 4.2: A path segment that contains a colon character 

771 # (e.g., "this:that") cannot be used as the first segment of a 

772 # relative-path reference, as it would be mistaken for a scheme 

773 # name. Such a segment must be preceded by a dot-segment (e.g., 

774 # "./this:that") to make a relative-path reference. 

775 if scheme is None and authority is None and not path.startswith(b"/"): 

776 if b":" in path.partition(b"/")[0]: 

777 path = b"./" + path 

778 

779 # RFC 3986 3.4: The characters slash ("/") and question mark ("?") 

780 # may represent data within the query component. Beware that some 

781 # older, erroneous implementations may not handle such data 

782 # correctly when it is used as the base URI for relative 

783 # references (Section 5.1), apparently because they fail to 

784 # distinguish query data from path data when looking for 

785 # hierarchical separators. However, as query components are often 

786 # used to carry identifying information in the form of "key=value" 

787 # pairs and one frequently used value is a reference to another 

788 # URI, it is sometimes better for usability to avoid percent- 

789 # encoding those characters. 

790 if isinstance(query, (bytes, str)): 

791 query = uriencode(query, _SAFE_QUERY, encoding) 

792 elif isinstance(query, collections.abc.Mapping): 

793 query = _querydict(query, querysep, encoding) 

794 elif isinstance(query, collections.abc.Iterable): 

795 query = _querylist(query, querysep, encoding) 

796 elif query is not None: 

797 raise TypeError("Invalid query type") 

798 

799 # RFC 3986 3.5: The characters slash ("/") and question mark ("?") 

800 # are allowed to represent data within the fragment identifier. 

801 # Beware that some older, erroneous implementations may not handle 

802 # this data correctly when it is used as the base URI for relative 

803 # references. 

804 if fragment is not None: 

805 fragment = uriencode(fragment, _SAFE_FRAGMENT, encoding) 

806 

807 # return URI reference as `str` 

808 return uriunsplit((scheme, authority, path, query, fragment)).decode()