Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/future/backports/urllib/parse.py: 40%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

553 statements  

1""" 

2Ported using Python-Future from the Python 3.3 standard library. 

3 

4Parse (absolute and relative) URLs. 

5 

6urlparse module is based upon the following RFC specifications. 

7 

8RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding 

9and L. Masinter, January 2005. 

10 

11RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter 

12and L.Masinter, December 1999. 

13 

14RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T. 

15Berners-Lee, R. Fielding, and L. Masinter, August 1998. 

16 

17RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998. 

18 

19RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June 

201995. 

21 

22RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M. 

23McCahill, December 1994 

24 

25RFC 3986 is considered the current standard and any future changes to 

26urlparse module should conform with it. The urlparse module is 

27currently not entirely compliant with this RFC due to defacto 

28scenarios for parsing, and for backward compatibility purposes, some 

29parsing quirks from older RFCs are retained. The testcases in 

30test_urlparse.py provides a good indicator of parsing behavior. 

31""" 

32from __future__ import absolute_import, division, unicode_literals 

33from future.builtins import bytes, chr, dict, int, range, str 

34from future.utils import raise_with_traceback 

35 

36import re 

37import sys 

38import collections 

39 

40__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", 

41 "urlsplit", "urlunsplit", "urlencode", "parse_qs", 

42 "parse_qsl", "quote", "quote_plus", "quote_from_bytes", 

43 "unquote", "unquote_plus", "unquote_to_bytes"] 

44 

45# A classification of schemes ('' means apply by default) 

46uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap', 

47 'wais', 'file', 'https', 'shttp', 'mms', 

48 'prospero', 'rtsp', 'rtspu', '', 'sftp', 

49 'svn', 'svn+ssh'] 

50uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet', 

51 'imap', 'wais', 'file', 'mms', 'https', 'shttp', 

52 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '', 

53 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh'] 

54uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap', 

55 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips', 

56 'mms', '', 'sftp', 'tel'] 

57 

58# These are not actually used anymore, but should stay for backwards 

59# compatibility. (They are undocumented, but have a public-looking name.) 

60non_hierarchical = ['gopher', 'hdl', 'mailto', 'news', 

61 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips'] 

62uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms', 

63 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', ''] 

64uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news', 

65 'nntp', 'wais', 'https', 'shttp', 'snews', 

66 'file', 'prospero', ''] 

67 

68# Characters valid in scheme names 

69scheme_chars = ('abcdefghijklmnopqrstuvwxyz' 

70 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 

71 '0123456789' 

72 '+-.') 

73 

74# XXX: Consider replacing with functools.lru_cache 

75MAX_CACHE_SIZE = 20 

76_parse_cache = {} 

77 

78def clear_cache(): 

79 """Clear the parse cache and the quoters cache.""" 

80 _parse_cache.clear() 

81 _safe_quoters.clear() 

82 

83 

84# Helpers for bytes handling 

85# For 3.2, we deliberately require applications that 

86# handle improperly quoted URLs to do their own 

87# decoding and encoding. If valid use cases are 

88# presented, we may relax this by using latin-1 

89# decoding internally for 3.3 

90_implicit_encoding = 'ascii' 

91_implicit_errors = 'strict' 

92 

93def _noop(obj): 

94 return obj 

95 

96def _encode_result(obj, encoding=_implicit_encoding, 

97 errors=_implicit_errors): 

98 return obj.encode(encoding, errors) 

99 

100def _decode_args(args, encoding=_implicit_encoding, 

101 errors=_implicit_errors): 

102 return tuple(x.decode(encoding, errors) if x else '' for x in args) 

103 

104def _coerce_args(*args): 

105 # Invokes decode if necessary to create str args 

106 # and returns the coerced inputs along with 

107 # an appropriate result coercion function 

108 # - noop for str inputs 

109 # - encoding function otherwise 

110 str_input = isinstance(args[0], str) 

111 for arg in args[1:]: 

112 # We special-case the empty string to support the 

113 # "scheme=''" default argument to some functions 

114 if arg and isinstance(arg, str) != str_input: 

115 raise TypeError("Cannot mix str and non-str arguments") 

116 if str_input: 

117 return args + (_noop,) 

118 return _decode_args(args) + (_encode_result,) 

119 

120# Result objects are more helpful than simple tuples 

121class _ResultMixinStr(object): 

122 """Standard approach to encoding parsed results from str to bytes""" 

123 __slots__ = () 

124 

125 def encode(self, encoding='ascii', errors='strict'): 

126 return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self)) 

127 

128 

129class _ResultMixinBytes(object): 

130 """Standard approach to decoding parsed results from bytes to str""" 

131 __slots__ = () 

132 

133 def decode(self, encoding='ascii', errors='strict'): 

134 return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self)) 

135 

136 

137class _NetlocResultMixinBase(object): 

138 """Shared methods for the parsed result objects containing a netloc element""" 

139 __slots__ = () 

140 

141 @property 

142 def username(self): 

143 return self._userinfo[0] 

144 

145 @property 

146 def password(self): 

147 return self._userinfo[1] 

148 

149 @property 

150 def hostname(self): 

151 hostname = self._hostinfo[0] 

152 if not hostname: 

153 hostname = None 

154 elif hostname is not None: 

155 hostname = hostname.lower() 

156 return hostname 

157 

158 @property 

159 def port(self): 

160 port = self._hostinfo[1] 

161 if port is not None: 

162 port = int(port, 10) 

163 # Return None on an illegal port 

164 if not ( 0 <= port <= 65535): 

165 return None 

166 return port 

167 

168 

169class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): 

170 __slots__ = () 

171 

172 @property 

173 def _userinfo(self): 

174 netloc = self.netloc 

175 userinfo, have_info, hostinfo = netloc.rpartition('@') 

176 if have_info: 

177 username, have_password, password = userinfo.partition(':') 

178 if not have_password: 

179 password = None 

180 else: 

181 username = password = None 

182 return username, password 

183 

184 @property 

185 def _hostinfo(self): 

186 netloc = self.netloc 

187 _, _, hostinfo = netloc.rpartition('@') 

188 _, have_open_br, bracketed = hostinfo.partition('[') 

189 if have_open_br: 

190 hostname, _, port = bracketed.partition(']') 

191 _, have_port, port = port.partition(':') 

192 else: 

193 hostname, have_port, port = hostinfo.partition(':') 

194 if not have_port: 

195 port = None 

196 return hostname, port 

197 

198 

199class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes): 

200 __slots__ = () 

201 

202 @property 

203 def _userinfo(self): 

204 netloc = self.netloc 

205 userinfo, have_info, hostinfo = netloc.rpartition(b'@') 

206 if have_info: 

207 username, have_password, password = userinfo.partition(b':') 

208 if not have_password: 

209 password = None 

210 else: 

211 username = password = None 

212 return username, password 

213 

214 @property 

215 def _hostinfo(self): 

216 netloc = self.netloc 

217 _, _, hostinfo = netloc.rpartition(b'@') 

218 _, have_open_br, bracketed = hostinfo.partition(b'[') 

219 if have_open_br: 

220 hostname, _, port = bracketed.partition(b']') 

221 _, have_port, port = port.partition(b':') 

222 else: 

223 hostname, have_port, port = hostinfo.partition(b':') 

224 if not have_port: 

225 port = None 

226 return hostname, port 

227 

228 

229from collections import namedtuple 

230 

231_DefragResultBase = namedtuple('DefragResult', 'url fragment') 

232_SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment') 

233_ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment') 

234 

235# For backwards compatibility, alias _NetlocResultMixinStr 

236# ResultBase is no longer part of the documented API, but it is 

237# retained since deprecating it isn't worth the hassle 

238ResultBase = _NetlocResultMixinStr 

239 

240# Structured result objects for string data 

241class DefragResult(_DefragResultBase, _ResultMixinStr): 

242 __slots__ = () 

243 def geturl(self): 

244 if self.fragment: 

245 return self.url + '#' + self.fragment 

246 else: 

247 return self.url 

248 

249class SplitResult(_SplitResultBase, _NetlocResultMixinStr): 

250 __slots__ = () 

251 def geturl(self): 

252 return urlunsplit(self) 

253 

254class ParseResult(_ParseResultBase, _NetlocResultMixinStr): 

255 __slots__ = () 

256 def geturl(self): 

257 return urlunparse(self) 

258 

259# Structured result objects for bytes data 

260class DefragResultBytes(_DefragResultBase, _ResultMixinBytes): 

261 __slots__ = () 

262 def geturl(self): 

263 if self.fragment: 

264 return self.url + b'#' + self.fragment 

265 else: 

266 return self.url 

267 

268class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes): 

269 __slots__ = () 

270 def geturl(self): 

271 return urlunsplit(self) 

272 

273class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes): 

274 __slots__ = () 

275 def geturl(self): 

276 return urlunparse(self) 

277 

278# Set up the encode/decode result pairs 

279def _fix_result_transcoding(): 

280 _result_pairs = ( 

281 (DefragResult, DefragResultBytes), 

282 (SplitResult, SplitResultBytes), 

283 (ParseResult, ParseResultBytes), 

284 ) 

285 for _decoded, _encoded in _result_pairs: 

286 _decoded._encoded_counterpart = _encoded 

287 _encoded._decoded_counterpart = _decoded 

288 

289_fix_result_transcoding() 

290del _fix_result_transcoding 

291 

292def urlparse(url, scheme='', allow_fragments=True): 

293 """Parse a URL into 6 components: 

294 <scheme>://<netloc>/<path>;<params>?<query>#<fragment> 

295 Return a 6-tuple: (scheme, netloc, path, params, query, fragment). 

296 Note that we don't break the components up in smaller bits 

297 (e.g. netloc is a single string) and we don't expand % escapes.""" 

298 url, scheme, _coerce_result = _coerce_args(url, scheme) 

299 splitresult = urlsplit(url, scheme, allow_fragments) 

300 scheme, netloc, url, query, fragment = splitresult 

301 if scheme in uses_params and ';' in url: 

302 url, params = _splitparams(url) 

303 else: 

304 params = '' 

305 result = ParseResult(scheme, netloc, url, params, query, fragment) 

306 return _coerce_result(result) 

307 

308def _splitparams(url): 

309 if '/' in url: 

310 i = url.find(';', url.rfind('/')) 

311 if i < 0: 

312 return url, '' 

313 else: 

314 i = url.find(';') 

315 return url[:i], url[i+1:] 

316 

317def _splitnetloc(url, start=0): 

318 delim = len(url) # position of end of domain part of url, default is end 

319 for c in '/?#': # look for delimiters; the order is NOT important 

320 wdelim = url.find(c, start) # find first of this delim 

321 if wdelim >= 0: # if found 

322 delim = min(delim, wdelim) # use earliest delim position 

323 return url[start:delim], url[delim:] # return (domain, rest) 

324 

325def urlsplit(url, scheme='', allow_fragments=True): 

326 """Parse a URL into 5 components: 

327 <scheme>://<netloc>/<path>?<query>#<fragment> 

328 Return a 5-tuple: (scheme, netloc, path, query, fragment). 

329 Note that we don't break the components up in smaller bits 

330 (e.g. netloc is a single string) and we don't expand % escapes.""" 

331 url, scheme, _coerce_result = _coerce_args(url, scheme) 

332 allow_fragments = bool(allow_fragments) 

333 key = url, scheme, allow_fragments, type(url), type(scheme) 

334 cached = _parse_cache.get(key, None) 

335 if cached: 

336 return _coerce_result(cached) 

337 if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth 

338 clear_cache() 

339 netloc = query = fragment = '' 

340 i = url.find(':') 

341 if i > 0: 

342 if url[:i] == 'http': # optimize the common case 

343 scheme = url[:i].lower() 

344 url = url[i+1:] 

345 if url[:2] == '//': 

346 netloc, url = _splitnetloc(url, 2) 

347 if (('[' in netloc and ']' not in netloc) or 

348 (']' in netloc and '[' not in netloc)): 

349 raise ValueError("Invalid IPv6 URL") 

350 if allow_fragments and '#' in url: 

351 url, fragment = url.split('#', 1) 

352 if '?' in url: 

353 url, query = url.split('?', 1) 

354 v = SplitResult(scheme, netloc, url, query, fragment) 

355 _parse_cache[key] = v 

356 return _coerce_result(v) 

357 for c in url[:i]: 

358 if c not in scheme_chars: 

359 break 

360 else: 

361 # make sure "url" is not actually a port number (in which case 

362 # "scheme" is really part of the path) 

363 rest = url[i+1:] 

364 if not rest or any(c not in '0123456789' for c in rest): 

365 # not a port number 

366 scheme, url = url[:i].lower(), rest 

367 

368 if url[:2] == '//': 

369 netloc, url = _splitnetloc(url, 2) 

370 if (('[' in netloc and ']' not in netloc) or 

371 (']' in netloc and '[' not in netloc)): 

372 raise ValueError("Invalid IPv6 URL") 

373 if allow_fragments and '#' in url: 

374 url, fragment = url.split('#', 1) 

375 if '?' in url: 

376 url, query = url.split('?', 1) 

377 v = SplitResult(scheme, netloc, url, query, fragment) 

378 _parse_cache[key] = v 

379 return _coerce_result(v) 

380 

381def urlunparse(components): 

382 """Put a parsed URL back together again. This may result in a 

383 slightly different, but equivalent URL, if the URL that was parsed 

384 originally had redundant delimiters, e.g. a ? with an empty query 

385 (the draft states that these are equivalent).""" 

386 scheme, netloc, url, params, query, fragment, _coerce_result = ( 

387 _coerce_args(*components)) 

388 if params: 

389 url = "%s;%s" % (url, params) 

390 return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment))) 

391 

392def urlunsplit(components): 

393 """Combine the elements of a tuple as returned by urlsplit() into a 

394 complete URL as a string. The data argument can be any five-item iterable. 

395 This may result in a slightly different, but equivalent URL, if the URL that 

396 was parsed originally had unnecessary delimiters (for example, a ? with an 

397 empty query; the RFC states that these are equivalent).""" 

398 scheme, netloc, url, query, fragment, _coerce_result = ( 

399 _coerce_args(*components)) 

400 if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'): 

401 if url and url[:1] != '/': url = '/' + url 

402 url = '//' + (netloc or '') + url 

403 if scheme: 

404 url = scheme + ':' + url 

405 if query: 

406 url = url + '?' + query 

407 if fragment: 

408 url = url + '#' + fragment 

409 return _coerce_result(url) 

410 

411def urljoin(base, url, allow_fragments=True): 

412 """Join a base URL and a possibly relative URL to form an absolute 

413 interpretation of the latter.""" 

414 if not base: 

415 return url 

416 if not url: 

417 return base 

418 base, url, _coerce_result = _coerce_args(base, url) 

419 bscheme, bnetloc, bpath, bparams, bquery, bfragment = \ 

420 urlparse(base, '', allow_fragments) 

421 scheme, netloc, path, params, query, fragment = \ 

422 urlparse(url, bscheme, allow_fragments) 

423 if scheme != bscheme or scheme not in uses_relative: 

424 return _coerce_result(url) 

425 if scheme in uses_netloc: 

426 if netloc: 

427 return _coerce_result(urlunparse((scheme, netloc, path, 

428 params, query, fragment))) 

429 netloc = bnetloc 

430 if path[:1] == '/': 

431 return _coerce_result(urlunparse((scheme, netloc, path, 

432 params, query, fragment))) 

433 if not path and not params: 

434 path = bpath 

435 params = bparams 

436 if not query: 

437 query = bquery 

438 return _coerce_result(urlunparse((scheme, netloc, path, 

439 params, query, fragment))) 

440 segments = bpath.split('/')[:-1] + path.split('/') 

441 # XXX The stuff below is bogus in various ways... 

442 if segments[-1] == '.': 

443 segments[-1] = '' 

444 while '.' in segments: 

445 segments.remove('.') 

446 while 1: 

447 i = 1 

448 n = len(segments) - 1 

449 while i < n: 

450 if (segments[i] == '..' 

451 and segments[i-1] not in ('', '..')): 

452 del segments[i-1:i+1] 

453 break 

454 i = i+1 

455 else: 

456 break 

457 if segments == ['', '..']: 

458 segments[-1] = '' 

459 elif len(segments) >= 2 and segments[-1] == '..': 

460 segments[-2:] = [''] 

461 return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments), 

462 params, query, fragment))) 

463 

464def urldefrag(url): 

465 """Removes any existing fragment from URL. 

466 

467 Returns a tuple of the defragmented URL and the fragment. If 

468 the URL contained no fragments, the second element is the 

469 empty string. 

470 """ 

471 url, _coerce_result = _coerce_args(url) 

472 if '#' in url: 

473 s, n, p, a, q, frag = urlparse(url) 

474 defrag = urlunparse((s, n, p, a, q, '')) 

475 else: 

476 frag = '' 

477 defrag = url 

478 return _coerce_result(DefragResult(defrag, frag)) 

479 

480_hexdig = '0123456789ABCDEFabcdef' 

481_hextobyte = dict(((a + b).encode(), bytes([int(a + b, 16)])) 

482 for a in _hexdig for b in _hexdig) 

483 

484def unquote_to_bytes(string): 

485 """unquote_to_bytes('abc%20def') -> b'abc def'.""" 

486 # Note: strings are encoded as UTF-8. This is only an issue if it contains 

487 # unescaped non-ASCII characters, which URIs should not. 

488 if not string: 

489 # Is it a string-like object? 

490 string.split 

491 return bytes(b'') 

492 if isinstance(string, str): 

493 string = string.encode('utf-8') 

494 ### For Python-Future: 

495 # It is already a byte-string object, but force it to be newbytes here on 

496 # Py2: 

497 string = bytes(string) 

498 ### 

499 bits = string.split(b'%') 

500 if len(bits) == 1: 

501 return string 

502 res = [bits[0]] 

503 append = res.append 

504 for item in bits[1:]: 

505 try: 

506 append(_hextobyte[item[:2]]) 

507 append(item[2:]) 

508 except KeyError: 

509 append(b'%') 

510 append(item) 

511 return bytes(b'').join(res) 

512 

513_asciire = re.compile('([\x00-\x7f]+)') 

514 

515def unquote(string, encoding='utf-8', errors='replace'): 

516 """Replace %xx escapes by their single-character equivalent. The optional 

517 encoding and errors parameters specify how to decode percent-encoded 

518 sequences into Unicode characters, as accepted by the bytes.decode() 

519 method. 

520 By default, percent-encoded sequences are decoded with UTF-8, and invalid 

521 sequences are replaced by a placeholder character. 

522 

523 unquote('abc%20def') -> 'abc def'. 

524 """ 

525 if '%' not in string: 

526 string.split 

527 return string 

528 if encoding is None: 

529 encoding = 'utf-8' 

530 if errors is None: 

531 errors = 'replace' 

532 bits = _asciire.split(string) 

533 res = [bits[0]] 

534 append = res.append 

535 for i in range(1, len(bits), 2): 

536 append(unquote_to_bytes(bits[i]).decode(encoding, errors)) 

537 append(bits[i + 1]) 

538 return ''.join(res) 

539 

540def parse_qs(qs, keep_blank_values=False, strict_parsing=False, 

541 encoding='utf-8', errors='replace'): 

542 """Parse a query given as a string argument. 

543 

544 Arguments: 

545 

546 qs: percent-encoded query string to be parsed 

547 

548 keep_blank_values: flag indicating whether blank values in 

549 percent-encoded queries should be treated as blank strings. 

550 A true value indicates that blanks should be retained as 

551 blank strings. The default false value indicates that 

552 blank values are to be ignored and treated as if they were 

553 not included. 

554 

555 strict_parsing: flag indicating what to do with parsing errors. 

556 If false (the default), errors are silently ignored. 

557 If true, errors raise a ValueError exception. 

558 

559 encoding and errors: specify how to decode percent-encoded sequences 

560 into Unicode characters, as accepted by the bytes.decode() method. 

561 """ 

562 parsed_result = {} 

563 pairs = parse_qsl(qs, keep_blank_values, strict_parsing, 

564 encoding=encoding, errors=errors) 

565 for name, value in pairs: 

566 if name in parsed_result: 

567 parsed_result[name].append(value) 

568 else: 

569 parsed_result[name] = [value] 

570 return parsed_result 

571 

572def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, 

573 encoding='utf-8', errors='replace'): 

574 """Parse a query given as a string argument. 

575 

576 Arguments: 

577 

578 qs: percent-encoded query string to be parsed 

579 

580 keep_blank_values: flag indicating whether blank values in 

581 percent-encoded queries should be treated as blank strings. A 

582 true value indicates that blanks should be retained as blank 

583 strings. The default false value indicates that blank values 

584 are to be ignored and treated as if they were not included. 

585 

586 strict_parsing: flag indicating what to do with parsing errors. If 

587 false (the default), errors are silently ignored. If true, 

588 errors raise a ValueError exception. 

589 

590 encoding and errors: specify how to decode percent-encoded sequences 

591 into Unicode characters, as accepted by the bytes.decode() method. 

592 

593 Returns a list, as G-d intended. 

594 """ 

595 qs, _coerce_result = _coerce_args(qs) 

596 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] 

597 r = [] 

598 for name_value in pairs: 

599 if not name_value and not strict_parsing: 

600 continue 

601 nv = name_value.split('=', 1) 

602 if len(nv) != 2: 

603 if strict_parsing: 

604 raise ValueError("bad query field: %r" % (name_value,)) 

605 # Handle case of a control-name with no equal sign 

606 if keep_blank_values: 

607 nv.append('') 

608 else: 

609 continue 

610 if len(nv[1]) or keep_blank_values: 

611 name = nv[0].replace('+', ' ') 

612 name = unquote(name, encoding=encoding, errors=errors) 

613 name = _coerce_result(name) 

614 value = nv[1].replace('+', ' ') 

615 value = unquote(value, encoding=encoding, errors=errors) 

616 value = _coerce_result(value) 

617 r.append((name, value)) 

618 return r 

619 

620def unquote_plus(string, encoding='utf-8', errors='replace'): 

621 """Like unquote(), but also replace plus signs by spaces, as required for 

622 unquoting HTML form values. 

623 

624 unquote_plus('%7e/abc+def') -> '~/abc def' 

625 """ 

626 string = string.replace('+', ' ') 

627 return unquote(string, encoding, errors) 

628 

629_ALWAYS_SAFE = frozenset(bytes(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 

630 b'abcdefghijklmnopqrstuvwxyz' 

631 b'0123456789' 

632 b'_.-')) 

633_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) 

634_safe_quoters = {} 

635 

636class Quoter(collections.defaultdict): 

637 """A mapping from bytes (in range(0,256)) to strings. 

638 

639 String values are percent-encoded byte values, unless the key < 128, and 

640 in the "safe" set (either the specified safe set, or default set). 

641 """ 

642 # Keeps a cache internally, using defaultdict, for efficiency (lookups 

643 # of cached keys don't call Python code at all). 

644 def __init__(self, safe): 

645 """safe: bytes object.""" 

646 self.safe = _ALWAYS_SAFE.union(bytes(safe)) 

647 

648 def __repr__(self): 

649 # Without this, will just display as a defaultdict 

650 return "<Quoter %r>" % dict(self) 

651 

652 def __missing__(self, b): 

653 # Handle a cache miss. Store quoted string in cache and return. 

654 res = chr(b) if b in self.safe else '%{0:02X}'.format(b) 

655 self[b] = res 

656 return res 

657 

658def quote(string, safe='/', encoding=None, errors=None): 

659 """quote('abc def') -> 'abc%20def' 

660 

661 Each part of a URL, e.g. the path info, the query, etc., has a 

662 different set of reserved characters that must be quoted. 

663 

664 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists 

665 the following reserved characters. 

666 

667 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 

668 "$" | "," 

669 

670 Each of these characters is reserved in some component of a URL, 

671 but not necessarily in all of them. 

672 

673 By default, the quote function is intended for quoting the path 

674 section of a URL. Thus, it will not encode '/'. This character 

675 is reserved, but in typical usage the quote function is being 

676 called on a path where the existing slash characters are used as 

677 reserved characters. 

678 

679 string and safe may be either str or bytes objects. encoding must 

680 not be specified if string is a str. 

681 

682 The optional encoding and errors parameters specify how to deal with 

683 non-ASCII characters, as accepted by the str.encode method. 

684 By default, encoding='utf-8' (characters are encoded with UTF-8), and 

685 errors='strict' (unsupported characters raise a UnicodeEncodeError). 

686 """ 

687 if isinstance(string, str): 

688 if not string: 

689 return string 

690 if encoding is None: 

691 encoding = 'utf-8' 

692 if errors is None: 

693 errors = 'strict' 

694 string = string.encode(encoding, errors) 

695 else: 

696 if encoding is not None: 

697 raise TypeError("quote() doesn't support 'encoding' for bytes") 

698 if errors is not None: 

699 raise TypeError("quote() doesn't support 'errors' for bytes") 

700 return quote_from_bytes(string, safe) 

701 

702def quote_plus(string, safe='', encoding=None, errors=None): 

703 """Like quote(), but also replace ' ' with '+', as required for quoting 

704 HTML form values. Plus signs in the original string are escaped unless 

705 they are included in safe. It also does not have safe default to '/'. 

706 """ 

707 # Check if ' ' in string, where string may either be a str or bytes. If 

708 # there are no spaces, the regular quote will produce the right answer. 

709 if ((isinstance(string, str) and ' ' not in string) or 

710 (isinstance(string, bytes) and b' ' not in string)): 

711 return quote(string, safe, encoding, errors) 

712 if isinstance(safe, str): 

713 space = str(' ') 

714 else: 

715 space = bytes(b' ') 

716 string = quote(string, safe + space, encoding, errors) 

717 return string.replace(' ', '+') 

718 

719def quote_from_bytes(bs, safe='/'): 

720 """Like quote(), but accepts a bytes object rather than a str, and does 

721 not perform string-to-bytes encoding. It always returns an ASCII string. 

722 quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f' 

723 """ 

724 if not isinstance(bs, (bytes, bytearray)): 

725 raise TypeError("quote_from_bytes() expected bytes") 

726 if not bs: 

727 return str('') 

728 ### For Python-Future: 

729 bs = bytes(bs) 

730 ### 

731 if isinstance(safe, str): 

732 # Normalize 'safe' by converting to bytes and removing non-ASCII chars 

733 safe = str(safe).encode('ascii', 'ignore') 

734 else: 

735 ### For Python-Future: 

736 safe = bytes(safe) 

737 ### 

738 safe = bytes([c for c in safe if c < 128]) 

739 if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe): 

740 return bs.decode() 

741 try: 

742 quoter = _safe_quoters[safe] 

743 except KeyError: 

744 _safe_quoters[safe] = quoter = Quoter(safe).__getitem__ 

745 return str('').join([quoter(char) for char in bs]) 

746 

747def urlencode(query, doseq=False, safe='', encoding=None, errors=None): 

748 """Encode a sequence of two-element tuples or dictionary into a URL query string. 

749 

750 If any values in the query arg are sequences and doseq is true, each 

751 sequence element is converted to a separate parameter. 

752 

753 If the query arg is a sequence of two-element tuples, the order of the 

754 parameters in the output will match the order of parameters in the 

755 input. 

756 

757 The query arg may be either a string or a bytes type. When query arg is a 

758 string, the safe, encoding and error parameters are sent the quote_plus for 

759 encoding. 

760 """ 

761 

762 if hasattr(query, "items"): 

763 query = query.items() 

764 else: 

765 # It's a bother at times that strings and string-like objects are 

766 # sequences. 

767 try: 

768 # non-sequence items should not work with len() 

769 # non-empty strings will fail this 

770 if len(query) and not isinstance(query[0], tuple): 

771 raise TypeError 

772 # Zero-length sequences of all types will get here and succeed, 

773 # but that's a minor nit. Since the original implementation 

774 # allowed empty dicts that type of behavior probably should be 

775 # preserved for consistency 

776 except TypeError: 

777 ty, va, tb = sys.exc_info() 

778 raise_with_traceback(TypeError("not a valid non-string sequence " 

779 "or mapping object"), tb) 

780 

781 l = [] 

782 if not doseq: 

783 for k, v in query: 

784 if isinstance(k, bytes): 

785 k = quote_plus(k, safe) 

786 else: 

787 k = quote_plus(str(k), safe, encoding, errors) 

788 

789 if isinstance(v, bytes): 

790 v = quote_plus(v, safe) 

791 else: 

792 v = quote_plus(str(v), safe, encoding, errors) 

793 l.append(k + '=' + v) 

794 else: 

795 for k, v in query: 

796 if isinstance(k, bytes): 

797 k = quote_plus(k, safe) 

798 else: 

799 k = quote_plus(str(k), safe, encoding, errors) 

800 

801 if isinstance(v, bytes): 

802 v = quote_plus(v, safe) 

803 l.append(k + '=' + v) 

804 elif isinstance(v, str): 

805 v = quote_plus(v, safe, encoding, errors) 

806 l.append(k + '=' + v) 

807 else: 

808 try: 

809 # Is this a sufficient test for sequence-ness? 

810 x = len(v) 

811 except TypeError: 

812 # not a sequence 

813 v = quote_plus(str(v), safe, encoding, errors) 

814 l.append(k + '=' + v) 

815 else: 

816 # loop over the sequence 

817 for elt in v: 

818 if isinstance(elt, bytes): 

819 elt = quote_plus(elt, safe) 

820 else: 

821 elt = quote_plus(str(elt), safe, encoding, errors) 

822 l.append(k + '=' + elt) 

823 return str('&').join(l) 

824 

825# Utilities to parse URLs (most of these return None for missing parts): 

826# unwrap('<URL:type://host/path>') --> 'type://host/path' 

827# splittype('type:opaquestring') --> 'type', 'opaquestring' 

828# splithost('//host[:port]/path') --> 'host[:port]', '/path' 

829# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]' 

830# splitpasswd('user:passwd') -> 'user', 'passwd' 

831# splitport('host:port') --> 'host', 'port' 

832# splitquery('/path?query') --> '/path', 'query' 

833# splittag('/path#tag') --> '/path', 'tag' 

834# splitattr('/path;attr1=value1;attr2=value2;...') -> 

835# '/path', ['attr1=value1', 'attr2=value2', ...] 

836# splitvalue('attr=value') --> 'attr', 'value' 

837# urllib.parse.unquote('abc%20def') -> 'abc def' 

838# quote('abc def') -> 'abc%20def') 

839 

840def to_bytes(url): 

841 """to_bytes(u"URL") --> 'URL'.""" 

842 # Most URL schemes require ASCII. If that changes, the conversion 

843 # can be relaxed. 

844 # XXX get rid of to_bytes() 

845 if isinstance(url, str): 

846 try: 

847 url = url.encode("ASCII").decode() 

848 except UnicodeError: 

849 raise UnicodeError("URL " + repr(url) + 

850 " contains non-ASCII characters") 

851 return url 

852 

853def unwrap(url): 

854 """unwrap('<URL:type://host/path>') --> 'type://host/path'.""" 

855 url = str(url).strip() 

856 if url[:1] == '<' and url[-1:] == '>': 

857 url = url[1:-1].strip() 

858 if url[:4] == 'URL:': url = url[4:].strip() 

859 return url 

860 

861_typeprog = None 

862def splittype(url): 

863 """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" 

864 global _typeprog 

865 if _typeprog is None: 

866 import re 

867 _typeprog = re.compile('^([^/:]+):') 

868 

869 match = _typeprog.match(url) 

870 if match: 

871 scheme = match.group(1) 

872 return scheme.lower(), url[len(scheme) + 1:] 

873 return None, url 

874 

875_hostprog = None 

876def splithost(url): 

877 """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" 

878 global _hostprog 

879 if _hostprog is None: 

880 import re 

881 _hostprog = re.compile('^//([^/?]*)(.*)$') 

882 

883 match = _hostprog.match(url) 

884 if match: 

885 host_port = match.group(1) 

886 path = match.group(2) 

887 if path and not path.startswith('/'): 

888 path = '/' + path 

889 return host_port, path 

890 return None, url 

891 

892_userprog = None 

893def splituser(host): 

894 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" 

895 global _userprog 

896 if _userprog is None: 

897 import re 

898 _userprog = re.compile('^(.*)@(.*)$') 

899 

900 match = _userprog.match(host) 

901 if match: return match.group(1, 2) 

902 return None, host 

903 

904_passwdprog = None 

905def splitpasswd(user): 

906 """splitpasswd('user:passwd') -> 'user', 'passwd'.""" 

907 global _passwdprog 

908 if _passwdprog is None: 

909 import re 

910 _passwdprog = re.compile('^([^:]*):(.*)$',re.S) 

911 

912 match = _passwdprog.match(user) 

913 if match: return match.group(1, 2) 

914 return user, None 

915 

916# splittag('/path#tag') --> '/path', 'tag' 

917_portprog = None 

918def splitport(host): 

919 """splitport('host:port') --> 'host', 'port'.""" 

920 global _portprog 

921 if _portprog is None: 

922 import re 

923 _portprog = re.compile('^(.*):([0-9]+)$') 

924 

925 match = _portprog.match(host) 

926 if match: return match.group(1, 2) 

927 return host, None 

928 

929_nportprog = None 

930def splitnport(host, defport=-1): 

931 """Split host and port, returning numeric port. 

932 Return given default port if no ':' found; defaults to -1. 

933 Return numerical port if a valid number are found after ':'. 

934 Return None if ':' but not a valid number.""" 

935 global _nportprog 

936 if _nportprog is None: 

937 import re 

938 _nportprog = re.compile('^(.*):(.*)$') 

939 

940 match = _nportprog.match(host) 

941 if match: 

942 host, port = match.group(1, 2) 

943 try: 

944 if not port: raise ValueError("no digits") 

945 nport = int(port) 

946 except ValueError: 

947 nport = None 

948 return host, nport 

949 return host, defport 

950 

951_queryprog = None 

952def splitquery(url): 

953 """splitquery('/path?query') --> '/path', 'query'.""" 

954 global _queryprog 

955 if _queryprog is None: 

956 import re 

957 _queryprog = re.compile('^(.*)\?([^?]*)$') 

958 

959 match = _queryprog.match(url) 

960 if match: return match.group(1, 2) 

961 return url, None 

962 

963_tagprog = None 

964def splittag(url): 

965 """splittag('/path#tag') --> '/path', 'tag'.""" 

966 global _tagprog 

967 if _tagprog is None: 

968 import re 

969 _tagprog = re.compile('^(.*)#([^#]*)$') 

970 

971 match = _tagprog.match(url) 

972 if match: return match.group(1, 2) 

973 return url, None 

974 

975def splitattr(url): 

976 """splitattr('/path;attr1=value1;attr2=value2;...') -> 

977 '/path', ['attr1=value1', 'attr2=value2', ...].""" 

978 words = url.split(';') 

979 return words[0], words[1:] 

980 

981_valueprog = None 

982def splitvalue(attr): 

983 """splitvalue('attr=value') --> 'attr', 'value'.""" 

984 global _valueprog 

985 if _valueprog is None: 

986 import re 

987 _valueprog = re.compile('^([^=]*)=(.*)$') 

988 

989 match = _valueprog.match(attr) 

990 if match: return match.group(1, 2) 

991 return attr, None