Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/uritools/__init__.py: 37%

386 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 07:11 +0000

1"""RFC 3986 compliant, scheme-agnostic replacement for `urllib.parse`. 

2 

3This module defines RFC 3986 compliant replacements for the most 

4commonly used functions of the Python Standard Library 

5:mod:`urllib.parse` module. 

6 

7""" 

8 

9import collections 

10import collections.abc 

11import ipaddress 

12import numbers 

13import re 

14from string import hexdigits 

15 

16 

17__all__ = ( 

18 "GEN_DELIMS", 

19 "RESERVED", 

20 "SUB_DELIMS", 

21 "UNRESERVED", 

22 "isabspath", 

23 "isabsuri", 

24 "isnetpath", 

25 "isrelpath", 

26 "issamedoc", 

27 "isuri", 

28 "uricompose", 

29 "uridecode", 

30 "uridefrag", 

31 "uriencode", 

32 "urijoin", 

33 "urisplit", 

34 "uriunsplit", 

35) 

36 

37__version__ = "4.0.1" 

38 

39 

40# RFC 3986 2.2. Reserved Characters 

41# 

42# reserved = gen-delims / sub-delims 

43# 

44# gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" 

45# 

46# sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 

47# / "*" / "+" / "," / ";" / "=" 

48# 

49GEN_DELIMS = ":/?#[]@" 

50SUB_DELIMS = "!$&'()*+,;=" 

51RESERVED = GEN_DELIMS + SUB_DELIMS 

52 

53# RFC 3986 2.3. Unreserved Characters 

54# 

55# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" 

56# 

57UNRESERVED = ( 

58 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789" "-._~" 

59) 

60 

61_unreserved = frozenset(UNRESERVED.encode()) 

62 

63# RFC 3986 2.1: For consistency, URI producers and normalizers should 

64# use uppercase hexadecimal digits for all percent-encodings. 

65_encoded = { 

66 b"": [ 

67 bytes([i]) if i in _unreserved else ("%%%02X" % i).encode() for i in range(256) 

68 ] 

69} 

70 

71_decoded = { 

72 (a + b).encode(): bytes.fromhex(a + b) for a in hexdigits for b in hexdigits 

73} 

74 

75 

76def uriencode(uristring, safe="", encoding="utf-8", errors="strict"): 

77 """Encode a URI string or string component.""" 

78 if not isinstance(uristring, bytes): 

79 uristring = uristring.encode(encoding, errors) 

80 if not isinstance(safe, bytes): 

81 safe = safe.encode("ascii") 

82 try: 

83 encoded = _encoded[safe] 

84 except KeyError: 

85 encoded = _encoded[b""][:] 

86 for i in safe: 

87 encoded[i] = bytes([i]) 

88 _encoded[safe] = encoded 

89 return b"".join(map(encoded.__getitem__, uristring)) 

90 

91 

92def uridecode(uristring, encoding="utf-8", errors="strict"): 

93 """Decode a URI string or string component.""" 

94 if not isinstance(uristring, bytes): 

95 uristring = uristring.encode(encoding or "ascii", errors) 

96 parts = uristring.split(b"%") 

97 result = [parts[0]] 

98 append = result.append 

99 decode = _decoded.get 

100 for s in parts[1:]: 

101 append(decode(s[:2], b"%" + s[:2])) 

102 append(s[2:]) 

103 if encoding is not None: 

104 return b"".join(result).decode(encoding, errors) 

105 else: 

106 return b"".join(result) 

107 

108 

109class DefragResult(collections.namedtuple("DefragResult", "uri fragment")): 

110 """Class to hold :func:`uridefrag` results.""" 

111 

112 __slots__ = () # prevent creation of instance dictionary 

113 

114 def geturi(self): 

115 """Return the recombined version of the original URI as a string.""" 

116 fragment = self.fragment 

117 if fragment is None: 

118 return self.uri 

119 elif isinstance(fragment, bytes): 

120 return self.uri + b"#" + fragment 

121 else: 

122 return self.uri + "#" + fragment 

123 

124 def getfragment(self, default=None, encoding="utf-8", errors="strict"): 

125 """Return the decoded fragment identifier, or `default` if the 

126 original URI did not contain a fragment component. 

127 

128 """ 

129 fragment = self.fragment 

130 if fragment is not None: 

131 return uridecode(fragment, encoding, errors) 

132 else: 

133 return default 

134 

135 

136class SplitResult( 

137 collections.namedtuple("SplitResult", "scheme authority path query fragment") 

138): 

139 """Base class to hold :func:`urisplit` results.""" 

140 

141 __slots__ = () # prevent creation of instance dictionary 

142 

143 @property 

144 def userinfo(self): 

145 authority = self.authority 

146 if authority is None: 

147 return None 

148 userinfo, present, _ = authority.rpartition(self.AT) 

149 if present: 

150 return userinfo 

151 else: 

152 return None 

153 

154 @property 

155 def host(self): 

156 authority = self.authority 

157 if authority is None: 

158 return None 

159 _, _, hostinfo = authority.rpartition(self.AT) 

160 host, _, port = hostinfo.rpartition(self.COLON) 

161 if port.lstrip(self.DIGITS): 

162 return hostinfo 

163 else: 

164 return host 

165 

166 @property 

167 def port(self): 

168 authority = self.authority 

169 if authority is None: 

170 return None 

171 _, present, port = authority.rpartition(self.COLON) 

172 if present and not port.lstrip(self.DIGITS): 

173 return port 

174 else: 

175 return None 

176 

177 def geturi(self): 

178 """Return the re-combined version of the original URI reference as a 

179 string. 

180 

181 """ 

182 scheme, authority, path, query, fragment = self 

183 

184 # RFC 3986 5.3. Component Recomposition 

185 result = [] 

186 if scheme is not None: 

187 result.extend([scheme, self.COLON]) 

188 if authority is not None: 

189 result.extend([self.SLASH, self.SLASH, authority]) 

190 result.append(path) 

191 if query is not None: 

192 result.extend([self.QUEST, query]) 

193 if fragment is not None: 

194 result.extend([self.HASH, fragment]) 

195 return self.EMPTY.join(result) 

196 

197 def getscheme(self, default=None): 

198 """Return the URI scheme in canonical (lowercase) form, or `default` 

199 if the original URI reference did not contain a scheme component. 

200 

201 """ 

202 scheme = self.scheme 

203 if scheme is None: 

204 return default 

205 elif isinstance(scheme, bytes): 

206 return scheme.decode("ascii").lower() 

207 else: 

208 return scheme.lower() 

209 

210 def getauthority(self, default=None, encoding="utf-8", errors="strict"): 

211 """Return the decoded userinfo, host and port subcomponents of the URI 

212 authority as a three-item tuple. 

213 

214 """ 

215 # TBD: (userinfo, host, port) kwargs, default string? 

216 if default is None: 

217 default = (None, None, None) 

218 elif not isinstance(default, collections.abc.Iterable): 

219 raise TypeError("Invalid default type") 

220 elif len(default) != 3: 

221 raise ValueError("Invalid default length") 

222 # TODO: this could be much more efficient by using a dedicated regex 

223 return ( 

224 self.getuserinfo(default[0], encoding, errors), 

225 self.gethost(default[1], errors), 

226 self.getport(default[2]), 

227 ) 

228 

229 def getuserinfo(self, default=None, encoding="utf-8", errors="strict"): 

230 """Return the decoded userinfo subcomponent of the URI authority, or 

231 `default` if the original URI reference did not contain a 

232 userinfo field. 

233 

234 """ 

235 userinfo = self.userinfo 

236 if userinfo is None: 

237 return default 

238 else: 

239 return uridecode(userinfo, encoding, errors) 

240 

241 def gethost(self, default=None, errors="strict"): 

242 """Return the decoded host subcomponent of the URI authority as a 

243 string or an :mod:`ipaddress` address object, or `default` if 

244 the original URI reference did not contain a host. 

245 

246 """ 

247 host = self.host 

248 if host is None or (not host and default is not None): 

249 return default 

250 elif host.startswith(self.LBRACKET) and host.endswith(self.RBRACKET): 

251 return self.__parse_ip_literal(host[1:-1]) 

252 elif host.startswith(self.LBRACKET) or host.endswith(self.RBRACKET): 

253 raise ValueError("Invalid host %r" % host) 

254 # TODO: faster check for IPv4 address? 

255 try: 

256 if isinstance(host, bytes): 

257 return ipaddress.IPv4Address(host.decode("ascii")) 

258 else: 

259 return ipaddress.IPv4Address(host) 

260 except ValueError: 

261 return uridecode(host, "utf-8", errors).lower() 

262 

263 def getport(self, default=None): 

264 """Return the port subcomponent of the URI authority as an 

265 :class:`int`, or `default` if the original URI reference did 

266 not contain a port or if the port was empty. 

267 

268 """ 

269 port = self.port 

270 if port: 

271 return int(port) 

272 else: 

273 return default 

274 

275 def getpath(self, encoding="utf-8", errors="strict"): 

276 """Return the normalized decoded URI path.""" 

277 path = self.__remove_dot_segments(self.path) 

278 return uridecode(path, encoding, errors) 

279 

280 def getquery(self, default=None, encoding="utf-8", errors="strict"): 

281 """Return the decoded query string, or `default` if the original URI 

282 reference did not contain a query component. 

283 

284 """ 

285 query = self.query 

286 if query is None: 

287 return default 

288 else: 

289 return uridecode(query, encoding, errors) 

290 

291 def getquerydict(self, sep="&", encoding="utf-8", errors="strict"): 

292 """Split the query component into individual `name=value` pairs 

293 separated by `sep` and return a dictionary of query variables. 

294 The dictionary keys are the unique query variable names and 

295 the values are lists of values for each name. 

296 

297 """ 

298 dict = collections.defaultdict(list) 

299 for name, value in self.getquerylist(sep, encoding, errors): 

300 dict[name].append(value) 

301 return dict 

302 

303 def getquerylist(self, sep="&", encoding="utf-8", errors="strict"): 

304 """Split the query component into individual `name=value` pairs 

305 separated by `sep`, and return a list of `(name, value)` 

306 tuples. 

307 

308 """ 

309 if not self.query: 

310 return [] 

311 elif isinstance(sep, type(self.query)): 

312 qsl = self.query.split(sep) 

313 elif isinstance(sep, bytes): 

314 qsl = self.query.split(sep.decode("ascii")) 

315 else: 

316 qsl = self.query.split(sep.encode("ascii")) 

317 items = [] 

318 for parts in [qs.partition(self.EQ) for qs in qsl if qs]: 

319 name = uridecode(parts[0], encoding, errors) 

320 if parts[1]: 

321 value = uridecode(parts[2], encoding, errors) 

322 else: 

323 value = None 

324 items.append((name, value)) 

325 return items 

326 

327 def getfragment(self, default=None, encoding="utf-8", errors="strict"): 

328 """Return the decoded fragment identifier, or `default` if the 

329 original URI reference did not contain a fragment component. 

330 

331 """ 

332 fragment = self.fragment 

333 if fragment is None: 

334 return default 

335 else: 

336 return uridecode(fragment, encoding, errors) 

337 

338 def isuri(self): 

339 """Return :const:`True` if this is a URI.""" 

340 return self.scheme is not None 

341 

342 def isabsuri(self): 

343 """Return :const:`True` if this is an absolute URI.""" 

344 return self.scheme is not None and self.fragment is None 

345 

346 def isnetpath(self): 

347 """Return :const:`True` if this is a network-path reference.""" 

348 return self.scheme is None and self.authority is not None 

349 

350 def isabspath(self): 

351 """Return :const:`True` if this is an absolute-path reference.""" 

352 return ( 

353 self.scheme is None 

354 and self.authority is None 

355 and self.path.startswith(self.SLASH) 

356 ) 

357 

358 def isrelpath(self): 

359 """Return :const:`True` if this is a relative-path reference.""" 

360 return ( 

361 self.scheme is None 

362 and self.authority is None 

363 and not self.path.startswith(self.SLASH) 

364 ) 

365 

366 def issamedoc(self): 

367 """Return :const:`True` if this is a same-document reference.""" 

368 return ( 

369 self.scheme is None 

370 and self.authority is None 

371 and not self.path 

372 and self.query is None 

373 ) 

374 

375 def transform(self, ref, strict=False): 

376 """Transform a URI reference relative to `self` into a 

377 :class:`SplitResult` representing its target URI. 

378 

379 """ 

380 scheme, authority, path, query, fragment = self.RE.match(ref).groups() 

381 

382 # RFC 3986 5.2.2. Transform References 

383 if scheme is not None and (strict or scheme != self.scheme): 

384 path = self.__remove_dot_segments(path) 

385 elif authority is not None: 

386 scheme = self.scheme 

387 path = self.__remove_dot_segments(path) 

388 elif not path: 

389 scheme = self.scheme 

390 authority = self.authority 

391 path = self.path 

392 query = self.query if query is None else query 

393 elif path.startswith(self.SLASH): 

394 scheme = self.scheme 

395 authority = self.authority 

396 path = self.__remove_dot_segments(path) 

397 else: 

398 scheme = self.scheme 

399 authority = self.authority 

400 path = self.__remove_dot_segments(self.__merge(path)) 

401 return type(self)(scheme, authority, path, query, fragment) 

402 

403 def __merge(self, path): 

404 # RFC 3986 5.2.3. Merge Paths 

405 if self.authority is not None and not self.path: 

406 return self.SLASH + path 

407 else: 

408 parts = self.path.rpartition(self.SLASH) 

409 return parts[1].join((parts[0], path)) 

410 

411 @classmethod 

412 def __remove_dot_segments(cls, path): 

413 # RFC 3986 5.2.4. Remove Dot Segments 

414 pseg = [] 

415 for s in path.split(cls.SLASH): 

416 if s == cls.DOT: 

417 continue 

418 elif s != cls.DOTDOT: 

419 pseg.append(s) 

420 elif len(pseg) == 1 and not pseg[0]: 

421 continue 

422 elif pseg and pseg[-1] != cls.DOTDOT: 

423 pseg.pop() 

424 else: 

425 pseg.append(s) 

426 # adjust for trailing '/.' or '/..' 

427 if path.rpartition(cls.SLASH)[2] in (cls.DOT, cls.DOTDOT): 

428 pseg.append(cls.EMPTY) 

429 if path and len(pseg) == 1 and pseg[0] == cls.EMPTY: 

430 pseg.insert(0, cls.DOT) 

431 return cls.SLASH.join(pseg) 

432 

433 @classmethod 

434 def __parse_ip_literal(cls, address): 

435 # RFC 3986 3.2.2: In anticipation of future, as-yet-undefined 

436 # IP literal address formats, an implementation may use an 

437 # optional version flag to indicate such a format explicitly 

438 # rather than rely on heuristic determination. 

439 # 

440 # IP-literal = "[" ( IPv6address / IPvFuture ) "]" 

441 # 

442 # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) 

443 # 

444 # If a URI containing an IP-literal that starts with "v" 

445 # (case-insensitive), indicating that the version flag is 

446 # present, is dereferenced by an application that does not 

447 # know the meaning of that version flag, then the application 

448 # should return an appropriate error for "address mechanism 

449 # not supported". 

450 if isinstance(address, bytes): 

451 address = address.decode("ascii") 

452 if address.startswith("v"): 

453 raise ValueError("address mechanism not supported") 

454 return ipaddress.IPv6Address(address) 

455 

456 

457class SplitResultBytes(SplitResult): 

458 

459 __slots__ = () # prevent creation of instance dictionary 

460 

461 # RFC 3986 Appendix B 

462 RE = re.compile( 

463 rb""" 

464 (?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1) 

465 (?://([^/?#]*))? # authority 

466 ([^?#]*) # path 

467 (?:\?([^#]*))? # query 

468 (?:\#(.*))? # fragment 

469 """, 

470 flags=re.VERBOSE, 

471 ) 

472 

473 # RFC 3986 2.2 gen-delims 

474 COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = ( 

475 b":", 

476 b"/", 

477 b"?", 

478 b"#", 

479 b"[", 

480 b"]", 

481 b"@", 

482 ) 

483 

484 # RFC 3986 3.3 dot-segments 

485 DOT, DOTDOT = b".", b".." 

486 

487 EMPTY, EQ = b"", b"=" 

488 

489 DIGITS = b"0123456789" 

490 

491 

492class SplitResultString(SplitResult): 

493 

494 __slots__ = () # prevent creation of instance dictionary 

495 

496 # RFC 3986 Appendix B 

497 RE = re.compile( 

498 r""" 

499 (?:([A-Za-z][A-Za-z0-9+.-]*):)? # scheme (RFC 3986 3.1) 

500 (?://([^/?#]*))? # authority 

501 ([^?#]*) # path 

502 (?:\?([^#]*))? # query 

503 (?:\#(.*))? # fragment 

504 """, 

505 flags=re.VERBOSE, 

506 ) 

507 

508 # RFC 3986 2.2 gen-delims 

509 COLON, SLASH, QUEST, HASH, LBRACKET, RBRACKET, AT = ( 

510 ":", 

511 "/", 

512 "?", 

513 "#", 

514 "[", 

515 "]", 

516 "@", 

517 ) 

518 

519 # RFC 3986 3.3 dot-segments 

520 DOT, DOTDOT = ".", ".." 

521 

522 EMPTY, EQ = "", "=" 

523 

524 DIGITS = "0123456789" 

525 

526 

527def uridefrag(uristring): 

528 """Remove an existing fragment component from a URI reference string.""" 

529 if isinstance(uristring, bytes): 

530 parts = uristring.partition(b"#") 

531 else: 

532 parts = uristring.partition("#") 

533 return DefragResult(parts[0], parts[2] if parts[1] else None) 

534 

535 

536def urisplit(uristring): 

537 """Split a well-formed URI reference string into a tuple with five 

538 components corresponding to a URI's general structure:: 

539 

540 <scheme>://<authority>/<path>?<query>#<fragment> 

541 

542 """ 

543 if isinstance(uristring, bytes): 

544 result = SplitResultBytes 

545 else: 

546 result = SplitResultString 

547 return result(*result.RE.match(uristring).groups()) 

548 

549 

550def uriunsplit(parts): 

551 """Combine the elements of a five-item iterable into a URI reference's 

552 string representation. 

553 

554 """ 

555 scheme, authority, path, query, fragment = parts 

556 if isinstance(path, bytes): 

557 result = SplitResultBytes 

558 else: 

559 result = SplitResultString 

560 return result(scheme, authority, path, query, fragment).geturi() 

561 

562 

563def urijoin(base, ref, strict=False): 

564 """Convert a URI reference relative to a base URI to its target URI 

565 string. 

566 

567 """ 

568 if isinstance(base, type(ref)): 

569 return urisplit(base).transform(ref, strict).geturi() 

570 elif isinstance(base, bytes): 

571 return urisplit(base.decode()).transform(ref, strict).geturi() 

572 else: 

573 return urisplit(base).transform(ref.decode(), strict).geturi() 

574 

575 

576def isuri(uristring): 

577 """Return :const:`True` if `uristring` is a URI.""" 

578 return urisplit(uristring).isuri() 

579 

580 

581def isabsuri(uristring): 

582 """Return :const:`True` if `uristring` is an absolute URI.""" 

583 return urisplit(uristring).isabsuri() 

584 

585 

586def isnetpath(uristring): 

587 """Return :const:`True` if `uristring` is a network-path reference.""" 

588 return urisplit(uristring).isnetpath() 

589 

590 

591def isabspath(uristring): 

592 """Return :const:`True` if `uristring` is an absolute-path reference.""" 

593 return urisplit(uristring).isabspath() 

594 

595 

596def isrelpath(uristring): 

597 """Return :const:`True` if `uristring` is a relative-path reference.""" 

598 return urisplit(uristring).isrelpath() 

599 

600 

601def issamedoc(uristring): 

602 """Return :const:`True` if `uristring` is a same-document reference.""" 

603 return urisplit(uristring).issamedoc() 

604 

605 

606# TBD: move compose to its own submodule? 

607 

608# RFC 3986 3.1: scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 

609_SCHEME_RE = re.compile(b"^[A-Za-z][A-Za-z0-9+.-]*$") 

610 

611# RFC 3986 3.2: authority = [ userinfo "@" ] host [ ":" port ] 

612_AUTHORITY_RE_BYTES = re.compile(b"^(?:(.*)@)?(.*?)(?::([0-9]*))?$") 

613_AUTHORITY_RE_STR = re.compile("^(?:(.*)@)?(.*?)(?::([0-9]*))?$") 

614 

615# safe component characters 

616_SAFE_USERINFO = SUB_DELIMS + ":" 

617_SAFE_HOST = SUB_DELIMS 

618_SAFE_PATH = SUB_DELIMS + ":@/" 

619_SAFE_QUERY = SUB_DELIMS + ":@/?" 

620_SAFE_FRAGMENT = SUB_DELIMS + ":@/?" 

621 

622 

623def _scheme(scheme): 

624 if _SCHEME_RE.match(scheme): 

625 return scheme.lower() 

626 else: 

627 raise ValueError("Invalid scheme component") 

628 

629 

630def _authority(userinfo, host, port, encoding): 

631 authority = [] 

632 

633 if userinfo is not None: 

634 authority.append(uriencode(userinfo, _SAFE_USERINFO, encoding)) 

635 authority.append(b"@") 

636 

637 if isinstance(host, ipaddress.IPv6Address): 

638 authority.append(b"[" + host.compressed.encode() + b"]") 

639 elif isinstance(host, ipaddress.IPv4Address): 

640 authority.append(host.compressed.encode()) 

641 elif isinstance(host, bytes): 

642 authority.append(_host(host)) 

643 elif host is not None: 

644 authority.append(_host(host.encode("utf-8"))) 

645 

646 if isinstance(port, numbers.Number): 

647 authority.append(_port(str(port).encode())) 

648 elif isinstance(port, bytes): 

649 authority.append(_port(port)) 

650 elif port is not None: 

651 authority.append(_port(port.encode())) 

652 

653 return b"".join(authority) if authority else None 

654 

655 

656def _ip_literal(address): 

657 if address.startswith("v"): 

658 raise ValueError("Address mechanism not supported") 

659 else: 

660 return b"[" + ipaddress.IPv6Address(address).compressed.encode() + b"]" 

661 

662 

663def _host(host): 

664 # RFC 3986 3.2.3: Although host is case-insensitive, producers and 

665 # normalizers should use lowercase for registered names and 

666 # hexadecimal addresses for the sake of uniformity, while only 

667 # using uppercase letters for percent-encodings. 

668 if host.startswith(b"[") and host.endswith(b"]"): 

669 return _ip_literal(host[1:-1].decode()) 

670 # check for IPv6 addresses as returned by SplitResult.gethost() 

671 try: 

672 return _ip_literal(host.decode("utf-8")) 

673 except ValueError: 

674 return uriencode(host.lower(), _SAFE_HOST, "utf-8") 

675 

676 

677def _port(port): 

678 # RFC 3986 3.2.3: URI producers and normalizers should omit the 

679 # port component and its ":" delimiter if port is empty or if its 

680 # value would be the same as that of the scheme's default. 

681 if port.lstrip(b"0123456789"): 

682 raise ValueError("Invalid port subcomponent") 

683 elif port: 

684 return b":" + port 

685 else: 

686 return b"" 

687 

688 

689def _querylist(items, sep, encoding): 

690 terms = [] 

691 append = terms.append 

692 safe = _SAFE_QUERY.replace(sep, "") 

693 for key, value in items: 

694 name = uriencode(key, safe, encoding) 

695 if value is None: 

696 append(name) 

697 elif isinstance(value, (bytes, str)): 

698 append(name + b"=" + uriencode(value, safe, encoding)) 

699 else: 

700 append(name + b"=" + uriencode(str(value), safe, encoding)) 

701 return sep.encode("ascii").join(terms) 

702 

703 

704def _querydict(mapping, sep, encoding): 

705 items = [] 

706 for key, value in mapping.items(): 

707 if isinstance(value, (bytes, str)): 

708 items.append((key, value)) 

709 elif isinstance(value, collections.abc.Iterable): 

710 items.extend([(key, v) for v in value]) 

711 else: 

712 items.append((key, value)) 

713 return _querylist(items, sep, encoding) 

714 

715 

716def uricompose( 

717 scheme=None, 

718 authority=None, 

719 path="", 

720 query=None, 

721 fragment=None, 

722 userinfo=None, 

723 host=None, 

724 port=None, 

725 querysep="&", 

726 encoding="utf-8", 

727): 

728 """Compose a URI reference string from its individual components.""" 

729 

730 # RFC 3986 3.1: Scheme names consist of a sequence of characters 

731 # beginning with a letter and followed by any combination of 

732 # letters, digits, plus ("+"), period ("."), or hyphen ("-"). 

733 # Although schemes are case-insensitive, the canonical form is 

734 # lowercase and documents that specify schemes must do so with 

735 # lowercase letters. An implementation should accept uppercase 

736 # letters as equivalent to lowercase in scheme names (e.g., allow 

737 # "HTTP" as well as "http") for the sake of robustness but should 

738 # only produce lowercase scheme names for consistency. 

739 if isinstance(scheme, bytes): 

740 scheme = _scheme(scheme) 

741 elif scheme is not None: 

742 scheme = _scheme(scheme.encode()) 

743 

744 # authority must be string type or three-item iterable 

745 if authority is None: 

746 authority = (None, None, None) 

747 elif isinstance(authority, bytes): 

748 authority = _AUTHORITY_RE_BYTES.match(authority).groups() 

749 elif isinstance(authority, str): 

750 authority = _AUTHORITY_RE_STR.match(authority).groups() 

751 elif not isinstance(authority, collections.abc.Iterable): 

752 raise TypeError("Invalid authority type") 

753 elif len(authority) != 3: 

754 raise ValueError("Invalid authority length") 

755 authority = _authority( 

756 userinfo if userinfo is not None else authority[0], 

757 host if host is not None else authority[1], 

758 port if port is not None else authority[2], 

759 encoding, 

760 ) 

761 

762 # RFC 3986 3.3: If a URI contains an authority component, then the 

763 # path component must either be empty or begin with a slash ("/") 

764 # character. If a URI does not contain an authority component, 

765 # then the path cannot begin with two slash characters ("//"). 

766 path = uriencode(path, _SAFE_PATH, encoding) 

767 if authority is not None and path and not path.startswith(b"/"): 

768 raise ValueError("Invalid path with authority component") 

769 if authority is None and path.startswith(b"//"): 

770 raise ValueError("Invalid path without authority component") 

771 

772 # RFC 3986 4.2: A path segment that contains a colon character 

773 # (e.g., "this:that") cannot be used as the first segment of a 

774 # relative-path reference, as it would be mistaken for a scheme 

775 # name. Such a segment must be preceded by a dot-segment (e.g., 

776 # "./this:that") to make a relative-path reference. 

777 if scheme is None and authority is None and not path.startswith(b"/"): 

778 if b":" in path.partition(b"/")[0]: 

779 path = b"./" + path 

780 

781 # RFC 3986 3.4: The characters slash ("/") and question mark ("?") 

782 # may represent data within the query component. Beware that some 

783 # older, erroneous implementations may not handle such data 

784 # correctly when it is used as the base URI for relative 

785 # references (Section 5.1), apparently because they fail to 

786 # distinguish query data from path data when looking for 

787 # hierarchical separators. However, as query components are often 

788 # used to carry identifying information in the form of "key=value" 

789 # pairs and one frequently used value is a reference to another 

790 # URI, it is sometimes better for usability to avoid percent- 

791 # encoding those characters. 

792 if isinstance(query, (bytes, str)): 

793 query = uriencode(query, _SAFE_QUERY, encoding) 

794 elif isinstance(query, collections.abc.Mapping): 

795 query = _querydict(query, querysep, encoding) 

796 elif isinstance(query, collections.abc.Iterable): 

797 query = _querylist(query, querysep, encoding) 

798 elif query is not None: 

799 raise TypeError("Invalid query type") 

800 

801 # RFC 3986 3.5: The characters slash ("/") and question mark ("?") 

802 # are allowed to represent data within the fragment identifier. 

803 # Beware that some older, erroneous implementations may not handle 

804 # this data correctly when it is used as the base URI for relative 

805 # references. 

806 if fragment is not None: 

807 fragment = uriencode(fragment, _SAFE_FRAGMENT, encoding) 

808 

809 # return URI reference as `str` 

810 return uriunsplit((scheme, authority, path, query, fragment)).decode()