Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/uritools/__init__.py: 37%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

387 statements  

1"""RFC 3986 compliant, scheme-agnostic replacement for `urllib.parse`. 

2 

3This module defines RFC 3986 compliant replacements for the most 

4commonly used functions of the Python Standard Library 

5:mod:`urllib.parse` module. 

6 

7""" 

8 

9import collections 

10import collections.abc 

11import ipaddress 

12import numbers 

13import re 

14from string import hexdigits 

15 

16 

17__all__ = ( 

18 "GEN_DELIMS", 

19 "RESERVED", 

20 "SUB_DELIMS", 

21 "UNRESERVED", 

22 "isabspath", 

23 "isabsuri", 

24 "isnetpath", 

25 "isrelpath", 

26 "issamedoc", 

27 "isuri", 

28 "uricompose", 

29 "uridecode", 

30 "uridefrag", 

31 "uriencode", 

32 "urijoin", 

33 "urisplit", 

34 "uriunsplit", 

35) 

36 

37__version__ = "6.0.1" 

38 

39 

40# RFC 3986 2.2. Reserved Characters 

41# 

42# reserved = gen-delims / sub-delims 

43# 

44# gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 

45# 

46# sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 

47# / "*" / "+" / "," / ";" / "=" 

48# 

49GEN_DELIMS = ":/?#[]@" 

50SUB_DELIMS = "!$&'()*+,;=" 

51RESERVED = GEN_DELIMS + SUB_DELIMS 

52 

53# RFC 3986 2.3. Unreserved Characters 

54# 

55# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 

56# 

57UNRESERVED = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~" 

58 

59_unreserved = frozenset(UNRESERVED.encode()) 

60 

61# RFC 3986 2.1: For consistency, URI producers and normalizers should 

62# use uppercase hexadecimal digits for all percent-encodings. 

63_encoded = { 

64 b"": [ 

65 bytes([i]) if i in _unreserved else ("%%%02X" % i).encode() for i in range(256) 

66 ] 

67} 

68 

69_decoded = { 

70 (a + b).encode(): bytes.fromhex(a + b) for a in hexdigits for b in hexdigits 

71} 

72 

73 

74def uriencode(uristring, safe="", encoding="utf-8", errors="strict"): 

75 """Encode a URI string or string component.""" 

76 if not isinstance(uristring, bytes): 

77 uristring = uristring.encode(encoding, errors) 

78 if not isinstance(safe, bytes): 

79 safe = safe.encode("ascii") 

80 try: 

81 encoded = _encoded[safe] 

82 except KeyError: 

83 encoded = _encoded[b""][:] 

84 for i in safe: 

85 encoded[i] = bytes([i]) 

86 _encoded[safe] = encoded 

87 return b"".join(map(encoded.__getitem__, uristring)) 

88 

89 

90def uridecode(uristring, encoding="utf-8", errors="strict"): 

91 """Decode a URI string or string component.""" 

92 if not isinstance(uristring, bytes): 

93 uristring = uristring.encode(encoding or "ascii", errors) 

94 parts = uristring.split(b"%") 

95 result = [parts[0]] 

96 append = result.append 

97 decode = _decoded.get 

98 for s in parts[1:]: 

99 append(decode(s[:2], b"%" + s[:2])) 

100 append(s[2:]) 

101 if encoding is not None: 

102 return b"".join(result).decode(encoding, errors) 

103 else: 

104 return b"".join(result) 

105 

106 

107class DefragResult(collections.namedtuple("DefragResult", "uri fragment")): 

108 """Class to hold :func:`uridefrag` results.""" 

109 

110 __slots__ = () # prevent creation of instance dictionary 

111 

112 def geturi(self): 

113 """Return the recombined version of the original URI as a string.""" 

114 fragment = self.fragment 

115 if fragment is None: 

116 return self.uri 

117 elif isinstance(fragment, bytes): 

118 return self.uri + b"#" + fragment 

119 else: 

120 return self.uri + "#" + fragment 

121 

122 def getfragment(self, default=None, encoding="utf-8", errors="strict"): 

123 """Return the decoded fragment identifier, or `default` if the 

124 original URI did not contain a fragment component. 

125 

126 """ 

127 fragment = self.fragment 

128 if fragment is not None: 

129 return uridecode(fragment, encoding, errors) 

130 else: 

131 return default 

132 

133 

134class SplitResult( 

135 collections.namedtuple("SplitResult", "scheme authority path query fragment") 

136): 

137 """Base class to hold :func:`urisplit` results.""" 

138 

139 __slots__ = () # prevent creation of instance dictionary 

140 

141 @property 

142 def userinfo(self): 

143 authority = self.authority 

144 if authority is None: 

145 return None 

146 userinfo, present, _ = authority.rpartition(self.AT) 

147 if present: 

148 return userinfo 

149 else: 

150 return None 

151 

152 @property 

153 def host(self): 

154 authority = self.authority 

155 if authority is None: 

156 return None 

157 _, _, hostinfo = authority.rpartition(self.AT) 

158 host, _, port = hostinfo.rpartition(self.COLON) 

159 if port.lstrip(self.DIGITS): 

160 return hostinfo 

161 else: 

162 return host 

163 

164 @property 

165 def port(self): 

166 authority = self.authority 

167 if authority is None: 

168 return None 

169 _, present, port = authority.rpartition(self.COLON) 

170 if present and not port.lstrip(self.DIGITS): 

171 return port 

172 else: 

173 return None 

174 

175 def geturi(self): 

176 """Return the re-combined version of the original URI reference as a 

177 string. 

178 

179 """ 

180 scheme, authority, path, query, fragment = self 

181 

182 # RFC 3986 5.3. Component Recomposition 

183 result = [] 

184 if scheme is not None: 

185 result.extend([scheme, self.COLON]) 

186 if authority is not None: 

187 result.extend([self.SLASH, self.SLASH, authority]) 

188 result.append(path) 

189 if query is not None: 

190 result.extend([self.QUEST, query]) 

191 if fragment is not None: 

192 result.extend([self.HASH, fragment]) 

193 return self.EMPTY.join(result) 

194 

195 def getscheme(self, default=None): 

196 """Return the URI scheme in canonical (lowercase) form, or `default` 

197 if the original URI reference did not contain a scheme component. 

198 

199 """ 

200 scheme = self.scheme 

201 if scheme is None: 

202 return default 

203 elif isinstance(scheme, bytes): 

204 return scheme.decode("ascii").lower() 

205 else: 

206 return scheme.lower() 

207 

208 def getauthority(self, default=None, encoding="utf-8", errors="strict"): 

209 """Return the decoded userinfo, host and port subcomponents of the URI 

210 authority as a three-item tuple. 

211 

212 """ 

213 # TBD: (userinfo, host, port) kwargs, default string? 

214 if default is None: 

215 default = (None, None, None) 

216 elif not isinstance(default, collections.abc.Iterable): 

217 raise TypeError("Invalid default type") 

218 elif len(default) != 3: 

219 raise ValueError("Invalid default length") 

220 # TODO: this could be much more efficient by using a dedicated regex 

221 return ( 

222 self.getuserinfo(default[0], encoding, errors), 

223 self.gethost(default[1], errors), 

224 self.getport(default[2]), 

225 ) 

226 

227 def getuserinfo(self, default=None, encoding="utf-8", errors="strict"): 

228 """Return the decoded userinfo subcomponent of the URI authority, or 

229 `default` if the original URI reference did not contain a 

230 userinfo field. 

231 

232 """ 

233 userinfo = self.userinfo 

234 if userinfo is None: 

235 return default 

236 else: 

237 return uridecode(userinfo, encoding, errors) 

238 

239 def gethost(self, default=None, errors="strict"): 

240 """Return the decoded host subcomponent of the URI authority as a 

241 string or an :mod:`ipaddress` address object, or `default` if 

242 the original URI reference did not contain a host. 

243 

244 """ 

245 host = self.host 

246 if host is None or (not host and default is not None): 

247 return default 

248 elif host.startswith(self.LBRACKET) and host.endswith(self.RBRACKET): 

249 return self.__parse_ip_literal(host[1:-1]) 

250 elif host.startswith(self.LBRACKET) or host.endswith(self.RBRACKET): 

251 raise ValueError("Invalid host %r: mismatched brackets" % host) 

252 # TODO: faster check for IPv4 address? 

253 try: 

254 if isinstance(host, bytes): 

255 return ipaddress.IPv4Address(host.decode("ascii")) 

256 else: 

257 return ipaddress.IPv4Address(host) 

258 except ValueError: 

259 return uridecode(host, "utf-8", errors).lower() 

260 

261 def getport(self, default=None): 

262 """Return the port subcomponent of the URI authority as an 

263 :class:`int`, or `default` if the original URI reference did 

264 not contain a port or if the port was empty. 

265 

266 """ 

267 port = self.port 

268 if port: 

269 return int(port) 

270 else: 

271 return default 

272 

273 def getpath(self, encoding="utf-8", errors="strict"): 

274 """Return the normalized decoded URI path.""" 

275 path = self.__remove_dot_segments(self.path) 

276 return uridecode(path, encoding, errors) 

277 

278 def getquery(self, default=None, encoding="utf-8", errors="strict"): 

279 """Return the decoded query string, or `default` if the original URI 

280 reference did not contain a query component. 

281 

282 """ 

283 query = self.query 

284 if query is None: 

285 return default 

286 else: 

287 return uridecode(query, encoding, errors) 

288 

289 def getquerydict(self, sep="&", encoding="utf-8", errors="strict"): 

290 """Split the query component into individual `name=value` pairs 

291 separated by `sep` and return a dictionary of query variables. 

292 The dictionary keys are the unique query variable names and 

293 the values are lists of values for each name. 

294 

295 """ 

296 dict = collections.defaultdict(list) 

297 for name, value in self.getquerylist(sep, encoding, errors): 

298 dict[name].append(value) 

299 return dict 

300 

301 def getquerylist(self, sep="&", encoding="utf-8", errors="strict"): 

302 """Split the query component into individual `name=value` pairs 

303 separated by `sep`, and return a list of `(name, value)` 

304 tuples. 

305 

306 """ 

307 if not self.query: 

308 return [] 

309 elif isinstance(sep, type(self.query)): 

310 qsl = self.query.split(sep) 

311 elif isinstance(sep, bytes): 

312 qsl = self.query.split(sep.decode("ascii")) 

313 else: 

314 qsl = self.query.split(sep.encode("ascii")) 

315 items = [] 

316 for parts in [qs.partition(self.EQ) for qs in qsl if qs]: 

317 name = uridecode(parts[0], encoding, errors) 

318 if parts[1]: 

319 value = uridecode(parts[2], encoding, errors) 

320 else: 

321 value = None 

322 items.append((name, value)) 

323 return items 

324 

325 def getfragment(self, default=None, encoding="utf-8", errors="strict"): 

326 """Return the decoded fragment identifier, or `default` if the 

327 original URI reference did not contain a fragment component. 

328 

329 """ 

330 fragment = self.fragment 

331 if fragment is None: 

332 return default 

333 else: 

334 return uridecode(fragment, encoding, errors) 

335 

336 def isuri(self): 

337 """Return :const:`True` if this is a URI.""" 

338 return self.scheme is not None 

339 

340 def isabsuri(self): 

341 """Return :const:`True` if this is an absolute URI.""" 

342 return self.scheme is not None and self.fragment is None 

343 

344 def isnetpath(self): 

345 """Return :const:`True` if this is a network-path reference.""" 

346 return self.scheme is None and self.authority is not None 

347 

348 def isabspath(self): 

349 """Return :const:`True` if this is an absolute-path reference.""" 

350 return ( 

351 self.scheme is None 

352 and self.authority is None 

353 and self.path.startswith(self.SLASH) 

354 ) 

355 

356 def isrelpath(self): 

357 """Return :const:`True` if this is a relative-path reference.""" 

358 return ( 

359 self.scheme is None 

360 and self.authority is None 

361 and not self.path.startswith(self.SLASH) 

362 ) 

363 

364 def issamedoc(self): 

365 """Return :const:`True` if this is a same-document reference.""" 

366 return ( 

367 self.scheme is None 

368 and self.authority is None 

369 and not self.path 

370 and self.query is None 

371 ) 

372 

373 def transform(self, ref, strict=False): 

374 """Transform a URI reference relative to `self` into a 

375 :class:`SplitResult` representing its target URI. 

376 

377 """ 

378 scheme, authority, path, query, fragment = self.RE.match(ref).groups() 

379 

380 # RFC 3986 5.2.2. Transform References 

381 if scheme is not None and (strict or scheme != self.scheme): 

382 path = self.__remove_dot_segments(path) 

383 elif authority is not None: 

384 scheme = self.scheme 

385 path = self.__remove_dot_segments(path) 

386 elif not path: 

387 scheme = self.scheme 

388 authority = self.authority 

389 path = self.path 

390 query = self.query if query is None else query 

391 elif path.startswith(self.SLASH): 

392 scheme = self.scheme 

393 authority = self.authority 

394 path = self.__remove_dot_segments(path) 

395 else: 

396 scheme = self.scheme 

397 authority = self.authority 

398 path = self.__remove_dot_segments(self.__merge(path)) 

399 return type(self)(scheme, authority, path, query, fragment) 

400 

401 def __merge(self, path): 

402 # RFC 3986 5.2.3. Merge Paths 

403 if self.authority is not None and not self.path: 

404 return self.SLASH + path 

405 else: 

406 parts = self.path.rpartition(self.SLASH) 

407 return parts[1].join((parts[0], path)) 

408 

409 @classmethod 

410 def __remove_dot_segments(cls, path): 

411 # RFC 3986 5.2.4. Remove Dot Segments 

412 pseg = [] 

413 for s in path.split(cls.SLASH): 

414 if s == cls.DOT: 

415 continue 

416 elif s != cls.DOTDOT: 

417 pseg.append(s) 

418 elif len(pseg) == 1 and not pseg[0]: 

419 continue 

420 elif pseg and pseg[-1] != cls.DOTDOT: 

421 pseg.pop() 

422 else: 

423 pseg.append(s) 

424 # adjust for trailing '/.' or '/..' 

425 if path.rpartition(cls.SLASH)[2] in (cls.DOT, cls.DOTDOT): 

426 pseg.append(cls.EMPTY) 

427 if path and len(pseg) == 1 and pseg[0] == cls.EMPTY: 

428 pseg.insert(0, cls.DOT) 

429 return cls.SLASH.join(pseg) 

430 

431 @classmethod 

432 def __parse_ip_literal(cls, address): 

433 # RFC 3986 3.2.2: In anticipation of future, as-yet-undefined 

434 # IP literal address formats, an implementation may use an 

435 # optional version flag to indicate such a format explicitly 

436 # rather than rely on heuristic determination. 

437 # 

438 # IP-literal = "[" ( IPv6address / IPvFuture ) "]" 

439 # 

440 # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) 

441 # 

442 # If a URI containing an IP-literal that starts with "v" 

443 # (case-insensitive), indicating that the version flag is 

444 # present, is dereferenced by an application that does not 

445 # know the meaning of that version flag, then the application 

446 # should return an appropriate error for "address mechanism 

447 # not supported". 

448 if isinstance(address, bytes): 

449 address = address.decode("ascii") 

450 if address.startswith("v"): 

451 raise ValueError("address mechanism not supported") 

452 return ipaddress.IPv6Address(address) 

453 

454 

455class SplitResultBytes(SplitResult): 

456 __slots__ = () # prevent creation of instance dictionary 

457 

458 # RFC 3986 Appendix B 

459 RE = re.compile( 

460 rb""" 

461 (?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1) 

462 (?://([^/?#]*))? # authority 

463 ([^?#]*) # path 

464 (?:\?([^#]*))? # query 

465 (?:\#(.*))? # fragment 

466 """, 

467 flags=re.VERBOSE, 

468 ) 

469 

470 # RFC 3986 2.2 gen-delims 

471 COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = ( 

472 b":", 

473 b"/", 

474 b"?", 

475 b"#", 

476 b"[", 

477 b"]", 

478 b"@", 

479 ) 

480 

481 # RFC 3986 3.3 dot-segments 

482 DOT, DOTDOT = b".", b".." 

483 

484 EMPTY, EQ = b"", b"=" 

485 

486 DIGITS = b"0123456789" 

487 

488 

489class SplitResultString(SplitResult): 

490 __slots__ = () # prevent creation of instance dictionary 

491 

492 # RFC 3986 Appendix B 

493 RE = re.compile( 

494 r""" 

495 (?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1) 

496 (?://([^/?#]*))? # authority 

497 ([^?#]*) # path 

498 (?:\?([^#]*))? # query 

499 (?:\#(.*))? # fragment 

500 """, 

501 flags=re.VERBOSE, 

502 ) 

503 

504 # RFC 3986 2.2 gen-delims 

505 COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = ( 

506 ":", 

507 "/", 

508 "?", 

509 "#", 

510 "[", 

511 "]", 

512 "@", 

513 ) 

514 

515 # RFC 3986 3.3 dot-segments 

516 DOT, DOTDOT = ".", ".." 

517 

518 EMPTY, EQ = "", "=" 

519 

520 DIGITS = "0123456789" 

521 

522 

523def uridefrag(uristring): 

524 """Remove an existing fragment component from a URI reference string.""" 

525 if isinstance(uristring, bytes): 

526 parts = uristring.partition(b"#") 

527 else: 

528 parts = uristring.partition("#") 

529 return DefragResult(parts[0], parts[2] if parts[1] else None) 

530 

531 

532def urisplit(uristring): 

533 """Split a well-formed URI reference string into a tuple with five 

534 components corresponding to a URI's general structure:: 

535 

536 <scheme>://<authority>/<path>?<query>#<fragment> 

537 

538 """ 

539 if isinstance(uristring, bytes): 

540 result = SplitResultBytes 

541 else: 

542 result = SplitResultString 

543 return result(*result.RE.match(uristring).groups()) 

544 

545 

546def uriunsplit(parts): 

547 """Combine the elements of a five-item iterable into a URI reference's 

548 string representation. 

549 

550 """ 

551 scheme, authority, path, query, fragment = parts 

552 if isinstance(path, bytes): 

553 result = SplitResultBytes 

554 else: 

555 result = SplitResultString 

556 return result(scheme, authority, path, query, fragment).geturi() 

557 

558 

559def urijoin(base, ref, strict=False): 

560 """Convert a URI reference relative to a base URI to its target URI 

561 string. 

562 

563 """ 

564 if isinstance(base, type(ref)): 

565 return urisplit(base).transform(ref, strict).geturi() 

566 elif isinstance(base, bytes): 

567 return urisplit(base.decode()).transform(ref, strict).geturi() 

568 else: 

569 return urisplit(base).transform(ref.decode(), strict).geturi() 

570 

571 

572def isuri(uristring): 

573 """Return :const:`True` if `uristring` is a URI.""" 

574 return urisplit(uristring).isuri() 

575 

576 

577def isabsuri(uristring): 

578 """Return :const:`True` if `uristring` is an absolute URI.""" 

579 return urisplit(uristring).isabsuri() 

580 

581 

582def isnetpath(uristring): 

583 """Return :const:`True` if `uristring` is a network-path reference.""" 

584 return urisplit(uristring).isnetpath() 

585 

586 

587def isabspath(uristring): 

588 """Return :const:`True` if `uristring` is an absolute-path reference.""" 

589 return urisplit(uristring).isabspath() 

590 

591 

592def isrelpath(uristring): 

593 """Return :const:`True` if `uristring` is a relative-path reference.""" 

594 return urisplit(uristring).isrelpath() 

595 

596 

597def issamedoc(uristring): 

598 """Return :const:`True` if `uristring` is a same-document reference.""" 

599 return urisplit(uristring).issamedoc() 

600 

601 

602# TBD: move compose to its own submodule? 

603 

604# RFC 3986 3.1: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 

605_SCHEME_RE = re.compile(b"^[A-Za-z][A-Za-z0-9+.-]*$") 

606 

607# RFC 3986 3.2: authority = [ userinfo "@" ] host [ ":" port ] 

608_AUTHORITY_RE_BYTES = re.compile(b"^(?:(.*)@)?(.*?)(?::([0-9]*))?$") 

609_AUTHORITY_RE_STR = re.compile("^(?:(.*)@)?(.*?)(?::([0-9]*))?$") 

610 

611# safe component characters 

612_SAFE_USERINFO = SUB_DELIMS + ":" 

613_SAFE_HOST = SUB_DELIMS 

614_SAFE_PATH = SUB_DELIMS + ":@/" 

615_SAFE_QUERY = SUB_DELIMS + ":@/?" 

616_SAFE_FRAGMENT = SUB_DELIMS + ":@/?" 

617 

618 

619def _scheme(scheme): 

620 if _SCHEME_RE.match(scheme): 

621 return scheme.lower() 

622 else: 

623 raise ValueError("Invalid scheme component") 

624 

625 

626def _authority(userinfo, host, port, encoding): 

627 authority = [] 

628 

629 if userinfo is not None: 

630 authority.append(uriencode(userinfo, _SAFE_USERINFO, encoding)) 

631 authority.append(b"@") 

632 

633 if isinstance(host, ipaddress.IPv6Address): 

634 authority.append(b"[" + host.compressed.encode() + b"]") 

635 elif isinstance(host, ipaddress.IPv4Address): 

636 authority.append(host.compressed.encode()) 

637 elif isinstance(host, bytes): 

638 authority.append(_host(host)) 

639 elif host is not None: 

640 authority.append(_host(host.encode("utf-8"))) 

641 

642 if isinstance(port, numbers.Number): 

643 authority.append(_port(str(port).encode())) 

644 elif isinstance(port, bytes): 

645 authority.append(_port(port)) 

646 elif port is not None: 

647 authority.append(_port(port.encode())) 

648 

649 return b"".join(authority) if authority else None 

650 

651 

652def _ip_literal(address): 

653 if address.startswith("v"): 

654 raise ValueError("Address mechanism not supported") 

655 else: 

656 return b"[" + ipaddress.IPv6Address(address).compressed.encode() + b"]" 

657 

658 

659def _host(host): 

660 # RFC 3986 3.2.3: Although host is case-insensitive, producers and 

661 # normalizers should use lowercase for registered names and 

662 # hexadecimal addresses for the sake of uniformity, while only 

663 # using uppercase letters for percent-encodings. 

664 if host.startswith(b"[") and host.endswith(b"]"): 

665 return _ip_literal(host[1:-1].decode()) 

666 # check for IPv6 addresses as returned by SplitResult.gethost() 

667 try: 

668 return _ip_literal(host.decode("utf-8")) 

669 except ValueError: 

670 return uriencode(host.lower(), _SAFE_HOST, "utf-8") 

671 

672 

673def _port(port): 

674 # RFC 3986 3.2.3: URI producers and normalizers should omit the 

675 # port component and its ":" delimiter if port is empty or if its 

676 # value would be the same as that of the scheme's default. 

677 if port.lstrip(b"0123456789"): 

678 raise ValueError("Invalid port subcomponent") 

679 elif port: 

680 return b":" + port 

681 else: 

682 return b"" 

683 

684 

685def _querylist(items, sep, encoding): 

686 terms = [] 

687 append = terms.append 

688 safe = _SAFE_QUERY.replace(sep, "") 

689 for key, value in items: 

690 name = uriencode(key, safe, encoding) 

691 if value is None: 

692 append(name) 

693 elif isinstance(value, (bytes, str)): 

694 append(name + b"=" + uriencode(value, safe, encoding)) 

695 else: 

696 append(name + b"=" + uriencode(str(value), safe, encoding)) 

697 return sep.encode("ascii").join(terms) 

698 

699 

700def _querydict(mapping, sep, encoding): 

701 items = [] 

702 for key, value in mapping.items(): 

703 if isinstance(value, (bytes, str)): 

704 items.append((key, value)) 

705 elif isinstance(value, collections.abc.Iterable): 

706 items.extend([(key, v) for v in value]) 

707 else: 

708 items.append((key, value)) 

709 return _querylist(items, sep, encoding) 

710 

711 

712def uricompose( 

713 scheme=None, 

714 authority=None, 

715 path="", 

716 query=None, 

717 fragment=None, 

718 userinfo=None, 

719 host=None, 

720 port=None, 

721 querysep="&", 

722 encoding="utf-8", 

723): 

724 """Compose a URI reference string from its individual components.""" 

725 

726 # RFC 3986 3.1: Scheme names consist of a sequence of characters 

727 # beginning with a letter and followed by any combination of 

728 # letters, digits, plus ("+"), period ("."), or hyphen ("-"). 

729 # Although schemes are case-insensitive, the canonical form is 

730 # lowercase and documents that specify schemes must do so with 

731 # lowercase letters. An implementation should accept uppercase 

732 # letters as equivalent to lowercase in scheme names (e.g., allow 

733 # "HTTP" as well as "http") for the sake of robustness but should 

734 # only produce lowercase scheme names for consistency. 

735 if isinstance(scheme, bytes): 

736 scheme = _scheme(scheme) 

737 elif scheme is not None: 

738 scheme = _scheme(scheme.encode()) 

739 

740 # authority must be string type or three-item iterable 

741 if authority is None: 

742 authority = (None, None, None) 

743 elif isinstance(authority, bytes): 

744 authority = _AUTHORITY_RE_BYTES.match(authority).groups() 

745 elif isinstance(authority, str): 

746 authority = _AUTHORITY_RE_STR.match(authority).groups() 

747 elif not isinstance(authority, collections.abc.Iterable): 

748 raise TypeError("Invalid authority type") 

749 elif len(authority) != 3: 

750 raise ValueError("Invalid authority length") 

751 authority = _authority( 

752 userinfo if userinfo is not None else authority[0], 

753 host if host is not None else authority[1], 

754 port if port is not None else authority[2], 

755 encoding, 

756 ) 

757 

758 # RFC 3986 3.3: If a URI contains an authority component, then the 

759 # path component must either be empty or begin with a slash ("/") 

760 # character. If a URI does not contain an authority component, 

761 # then the path cannot begin with two slash characters ("//"). 

762 path = uriencode(path, _SAFE_PATH, encoding) 

763 if authority is not None and path and not path.startswith(b"/"): 

764 raise ValueError("Invalid path with authority component") 

765 if authority is None and path.startswith(b"//"): 

766 raise ValueError("Invalid path without authority component") 

767 

768 # RFC 3986 4.2: A path segment that contains a colon character 

769 # (e.g., "this:that") cannot be used as the first segment of a 

770 # relative-path reference, as it would be mistaken for a scheme 

771 # name. Such a segment must be preceded by a dot-segment (e.g., 

772 # "./this:that") to make a relative-path reference. 

773 if scheme is None and authority is None and not path.startswith(b"/"): 

774 if b":" in path.partition(b"/")[0]: 

775 path = b"./" + path 

776 

777 # RFC 3986 3.4: The characters slash ("/") and question mark ("?") 

778 # may represent data within the query component. Beware that some 

779 # older, erroneous implementations may not handle such data 

780 # correctly when it is used as the base URI for relative 

781 # references (Section 5.1), apparently because they fail to 

782 # distinguish query data from path data when looking for 

783 # hierarchical separators. However, as query components are often 

784 # used to carry identifying information in the form of "key=value" 

785 # pairs and one frequently used value is a reference to another 

786 # URI, it is sometimes better for usability to avoid percent- 

787 # encoding those characters. 

788 if isinstance(query, (bytes, str)): 

789 query = uriencode(query, _SAFE_QUERY, encoding) 

790 elif isinstance(query, collections.abc.Mapping): 

791 query = _querydict(query, querysep, encoding) 

792 elif isinstance(query, collections.abc.Iterable): 

793 query = _querylist(query, querysep, encoding) 

794 elif query is not None: 

795 raise TypeError("Invalid query type") 

796 

797 # RFC 3986 3.5: The characters slash ("/") and question mark ("?") 

798 # are allowed to represent data within the fragment identifier. 

799 # Beware that some older, erroneous implementations may not handle 

800 # this data correctly when it is used as the base URI for relative 

801 # references. 

802 if fragment is not None: 

803 fragment = uriencode(fragment, _SAFE_FRAGMENT, encoding) 

804 

805 # return URI reference as `str` 

806 return uriunsplit((scheme, authority, path, query, fragment)).decode()