Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/future/backports/urllib/parse.py: 40%

1"""

2Ported using Python-Future from the Python 3.3 standard library.

4Parse (absolute and relative) URLs.

6urlparse module is based upon the following RFC specifications.

8RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding

9and L. Masinter, January 2005.

11RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter

12and L.Masinter, December 1999.

14RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.

15Berners-Lee, R. Fielding, and L. Masinter, August 1998.

17RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998.

19RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June

201995.

22RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.

23McCahill, December 1994

25RFC 3986 is considered the current standard and any future changes to

26urlparse module should conform with it. The urlparse module is

27currently not entirely compliant with this RFC due to defacto

28scenarios for parsing, and for backward compatibility purposes, some

29parsing quirks from older RFCs are retained. The testcases in

30test_urlparse.py provides a good indicator of parsing behavior.

31"""

32from __future__ import absolute_import, division, unicode_literals

33from future.builtins import bytes, chr, dict, int, range, str

34from future.utils import raise_with_traceback

36import re

37import sys

38import collections

40__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",

41 "urlsplit", "urlunsplit", "urlencode", "parse_qs",

42 "parse_qsl", "quote", "quote_plus", "quote_from_bytes",

43 "unquote", "unquote_plus", "unquote_to_bytes"]

45# A classification of schemes ('' means apply by default)

46uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',

47 'wais', 'file', 'https', 'shttp', 'mms',

48 'prospero', 'rtsp', 'rtspu', '', 'sftp',

49 'svn', 'svn+ssh']

50uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',

51 'imap', 'wais', 'file', 'mms', 'https', 'shttp',

52 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',

53 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh']

54uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',

55 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',

56 'mms', '', 'sftp', 'tel']

58# These are not actually used anymore, but should stay for backwards

59# compatibility. (They are undocumented, but have a public-looking name.)

60non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',

61 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']

62uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',

63 'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']

64uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',

65 'nntp', 'wais', 'https', 'shttp', 'snews',

66 'file', 'prospero', '']

68# Characters valid in scheme names

69scheme_chars = ('abcdefghijklmnopqrstuvwxyz'

70 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

71 '0123456789'

72 '+-.')

74# XXX: Consider replacing with functools.lru_cache

75MAX_CACHE_SIZE = 20

76_parse_cache = {}

78def clear_cache():

79 """Clear the parse cache and the quoters cache."""

80 _parse_cache.clear()

81 _safe_quoters.clear()

84# Helpers for bytes handling

85# For 3.2, we deliberately require applications that

86# handle improperly quoted URLs to do their own

87# decoding and encoding. If valid use cases are

88# presented, we may relax this by using latin-1

89# decoding internally for 3.3

90_implicit_encoding = 'ascii'

91_implicit_errors = 'strict'

93def _noop(obj):

94 return obj

96def _encode_result(obj, encoding=_implicit_encoding,

97 errors=_implicit_errors):

98 return obj.encode(encoding, errors)

100def _decode_args(args, encoding=_implicit_encoding,

101 errors=_implicit_errors):

102 return tuple(x.decode(encoding, errors) if x else '' for x in args)

103

104def _coerce_args(*args):

105 # Invokes decode if necessary to create str args

106 # and returns the coerced inputs along with

107 # an appropriate result coercion function

108 # - noop for str inputs

109 # - encoding function otherwise

110 str_input = isinstance(args[0], str)

111 for arg in args[1:]:

112 # We special-case the empty string to support the

113 # "scheme=''" default argument to some functions

114 if arg and isinstance(arg, str) != str_input:

115 raise TypeError("Cannot mix str and non-str arguments")

116 if str_input:

117 return args + (_noop,)

118 return _decode_args(args) + (_encode_result,)

119

120# Result objects are more helpful than simple tuples

121class _ResultMixinStr(object):

122 """Standard approach to encoding parsed results from str to bytes"""

123 __slots__ = ()

124

125 def encode(self, encoding='ascii', errors='strict'):

126 return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self))

127

128

129class _ResultMixinBytes(object):

130 """Standard approach to decoding parsed results from bytes to str"""

131 __slots__ = ()

132

133 def decode(self, encoding='ascii', errors='strict'):

134 return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))

135

136

137class _NetlocResultMixinBase(object):

138 """Shared methods for the parsed result objects containing a netloc element"""

139 __slots__ = ()

140

141 @property

142 def username(self):

143 return self._userinfo[0]

144

145 @property

146 def password(self):

147 return self._userinfo[1]

148

149 @property

150 def hostname(self):

151 hostname = self._hostinfo[0]

152 if not hostname:

153 hostname = None

154 elif hostname is not None:

155 hostname = hostname.lower()

156 return hostname

157

158 @property

159 def port(self):

160 port = self._hostinfo[1]

161 if port is not None:

162 port = int(port, 10)

163 # Return None on an illegal port

164 if not ( 0 <= port <= 65535):

165 return None

166 return port

167

168

169class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):

170 __slots__ = ()

171

172 @property

173 def _userinfo(self):

174 netloc = self.netloc

175 userinfo, have_info, hostinfo = netloc.rpartition('@')

176 if have_info:

177 username, have_password, password = userinfo.partition(':')

178 if not have_password:

179 password = None

180 else:

181 username = password = None

182 return username, password

183

184 @property

185 def _hostinfo(self):

186 netloc = self.netloc

187 _, _, hostinfo = netloc.rpartition('@')

188 _, have_open_br, bracketed = hostinfo.partition('[')

189 if have_open_br:

190 hostname, _, port = bracketed.partition(']')

191 _, have_port, port = port.partition(':')

192 else:

193 hostname, have_port, port = hostinfo.partition(':')

194 if not have_port:

195 port = None

196 return hostname, port

197

198

199class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):

200 __slots__ = ()

201

202 @property

203 def _userinfo(self):

204 netloc = self.netloc

205 userinfo, have_info, hostinfo = netloc.rpartition(b'@')

206 if have_info:

207 username, have_password, password = userinfo.partition(b':')

208 if not have_password:

209 password = None

210 else:

211 username = password = None

212 return username, password

213

214 @property

215 def _hostinfo(self):

216 netloc = self.netloc

217 _, _, hostinfo = netloc.rpartition(b'@')

218 _, have_open_br, bracketed = hostinfo.partition(b'[')

219 if have_open_br:

220 hostname, _, port = bracketed.partition(b']')

221 _, have_port, port = port.partition(b':')

222 else:

223 hostname, have_port, port = hostinfo.partition(b':')

224 if not have_port:

225 port = None

226 return hostname, port

227

228

229from collections import namedtuple

230

231_DefragResultBase = namedtuple('DefragResult', 'url fragment')

232_SplitResultBase = namedtuple('SplitResult', 'scheme netloc path query fragment')

233_ParseResultBase = namedtuple('ParseResult', 'scheme netloc path params query fragment')

234

235# For backwards compatibility, alias _NetlocResultMixinStr

236# ResultBase is no longer part of the documented API, but it is

237# retained since deprecating it isn't worth the hassle

238ResultBase = _NetlocResultMixinStr

239

240# Structured result objects for string data

241class DefragResult(_DefragResultBase, _ResultMixinStr):

242 __slots__ = ()

243 def geturl(self):

244 if self.fragment:

245 return self.url + '#' + self.fragment

246 else:

247 return self.url

248

249class SplitResult(_SplitResultBase, _NetlocResultMixinStr):

250 __slots__ = ()

251 def geturl(self):

252 return urlunsplit(self)

253

254class ParseResult(_ParseResultBase, _NetlocResultMixinStr):

255 __slots__ = ()

256 def geturl(self):

257 return urlunparse(self)

258

259# Structured result objects for bytes data

260class DefragResultBytes(_DefragResultBase, _ResultMixinBytes):

261 __slots__ = ()

262 def geturl(self):

263 if self.fragment:

264 return self.url + b'#' + self.fragment

265 else:

266 return self.url

267

268class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes):

269 __slots__ = ()

270 def geturl(self):

271 return urlunsplit(self)

272

273class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes):

274 __slots__ = ()

275 def geturl(self):

276 return urlunparse(self)

277

278# Set up the encode/decode result pairs

279def _fix_result_transcoding():

280 _result_pairs = (

281 (DefragResult, DefragResultBytes),

282 (SplitResult, SplitResultBytes),

283 (ParseResult, ParseResultBytes),

284 )

285 for _decoded, _encoded in _result_pairs:

286 _decoded._encoded_counterpart = _encoded

287 _encoded._decoded_counterpart = _decoded

288

289_fix_result_transcoding()

290del _fix_result_transcoding

291

292def urlparse(url, scheme='', allow_fragments=True):

293 """Parse a URL into 6 components:

294 <scheme>://<netloc>/<path>;<params>?<query>#<fragment>

295 Return a 6-tuple: (scheme, netloc, path, params, query, fragment).

296 Note that we don't break the components up in smaller bits

297 (e.g. netloc is a single string) and we don't expand % escapes."""

298 url, scheme, _coerce_result = _coerce_args(url, scheme)

299 splitresult = urlsplit(url, scheme, allow_fragments)

300 scheme, netloc, url, query, fragment = splitresult

301 if scheme in uses_params and ';' in url:

302 url, params = _splitparams(url)

303 else:

304 params = ''

305 result = ParseResult(scheme, netloc, url, params, query, fragment)

306 return _coerce_result(result)

307

308def _splitparams(url):

309 if '/' in url:

310 i = url.find(';', url.rfind('/'))

311 if i < 0:

312 return url, ''

313 else:

314 i = url.find(';')

315 return url[:i], url[i+1:]

316

317def _splitnetloc(url, start=0):

318 delim = len(url) # position of end of domain part of url, default is end

319 for c in '/?#': # look for delimiters; the order is NOT important

320 wdelim = url.find(c, start) # find first of this delim

321 if wdelim >= 0: # if found

322 delim = min(delim, wdelim) # use earliest delim position

323 return url[start:delim], url[delim:] # return (domain, rest)

324

325def urlsplit(url, scheme='', allow_fragments=True):

326 """Parse a URL into 5 components:

327 <scheme>://<netloc>/<path>?<query>#<fragment>

328 Return a 5-tuple: (scheme, netloc, path, query, fragment).

329 Note that we don't break the components up in smaller bits

330 (e.g. netloc is a single string) and we don't expand % escapes."""

331 url, scheme, _coerce_result = _coerce_args(url, scheme)

332 allow_fragments = bool(allow_fragments)

333 key = url, scheme, allow_fragments, type(url), type(scheme)

334 cached = _parse_cache.get(key, None)

335 if cached:

336 return _coerce_result(cached)

337 if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth

338 clear_cache()

339 netloc = query = fragment = ''

340 i = url.find(':')

341 if i > 0:

342 if url[:i] == 'http': # optimize the common case

343 scheme = url[:i].lower()

344 url = url[i+1:]

345 if url[:2] == '//':

346 netloc, url = _splitnetloc(url, 2)

347 if (('[' in netloc and ']' not in netloc) or

348 (']' in netloc and '[' not in netloc)):

349 raise ValueError("Invalid IPv6 URL")

350 if allow_fragments and '#' in url:

351 url, fragment = url.split('#', 1)

352 if '?' in url:

353 url, query = url.split('?', 1)

354 v = SplitResult(scheme, netloc, url, query, fragment)

355 _parse_cache[key] = v

356 return _coerce_result(v)

357 for c in url[:i]:

358 if c not in scheme_chars:

359 break

360 else:

361 # make sure "url" is not actually a port number (in which case

362 # "scheme" is really part of the path)

363 rest = url[i+1:]

364 if not rest or any(c not in '0123456789' for c in rest):

365 # not a port number

366 scheme, url = url[:i].lower(), rest

367

368 if url[:2] == '//':

369 netloc, url = _splitnetloc(url, 2)

370 if (('[' in netloc and ']' not in netloc) or

371 (']' in netloc and '[' not in netloc)):

372 raise ValueError("Invalid IPv6 URL")

373 if allow_fragments and '#' in url:

374 url, fragment = url.split('#', 1)

375 if '?' in url:

376 url, query = url.split('?', 1)

377 v = SplitResult(scheme, netloc, url, query, fragment)

378 _parse_cache[key] = v

379 return _coerce_result(v)

380

381def urlunparse(components):

382 """Put a parsed URL back together again. This may result in a

383 slightly different, but equivalent URL, if the URL that was parsed

384 originally had redundant delimiters, e.g. a ? with an empty query

385 (the draft states that these are equivalent)."""

386 scheme, netloc, url, params, query, fragment, _coerce_result = (

387 _coerce_args(*components))

388 if params:

389 url = "%s;%s" % (url, params)

390 return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment)))

391

392def urlunsplit(components):

393 """Combine the elements of a tuple as returned by urlsplit() into a

394 complete URL as a string. The data argument can be any five-item iterable.

395 This may result in a slightly different, but equivalent URL, if the URL that

396 was parsed originally had unnecessary delimiters (for example, a ? with an

397 empty query; the RFC states that these are equivalent)."""

398 scheme, netloc, url, query, fragment, _coerce_result = (

399 _coerce_args(*components))

400 if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):

401 if url and url[:1] != '/': url = '/' + url

402 url = '//' + (netloc or '') + url

403 if scheme:

404 url = scheme + ':' + url

405 if query:

406 url = url + '?' + query

407 if fragment:

408 url = url + '#' + fragment

409 return _coerce_result(url)

410

411def urljoin(base, url, allow_fragments=True):

412 """Join a base URL and a possibly relative URL to form an absolute

413 interpretation of the latter."""

414 if not base:

415 return url

416 if not url:

417 return base

418 base, url, _coerce_result = _coerce_args(base, url)

419 bscheme, bnetloc, bpath, bparams, bquery, bfragment = \

420 urlparse(base, '', allow_fragments)

421 scheme, netloc, path, params, query, fragment = \

422 urlparse(url, bscheme, allow_fragments)

423 if scheme != bscheme or scheme not in uses_relative:

424 return _coerce_result(url)

425 if scheme in uses_netloc:

426 if netloc:

427 return _coerce_result(urlunparse((scheme, netloc, path,

428 params, query, fragment)))

429 netloc = bnetloc

430 if path[:1] == '/':

431 return _coerce_result(urlunparse((scheme, netloc, path,

432 params, query, fragment)))

433 if not path and not params:

434 path = bpath

435 params = bparams

436 if not query:

437 query = bquery

438 return _coerce_result(urlunparse((scheme, netloc, path,

439 params, query, fragment)))

440 segments = bpath.split('/')[:-1] + path.split('/')

441 # XXX The stuff below is bogus in various ways...

442 if segments[-1] == '.':

443 segments[-1] = ''

444 while '.' in segments:

445 segments.remove('.')

446 while 1:

447 i = 1

448 n = len(segments) - 1

449 while i < n:

450 if (segments[i] == '..'

451 and segments[i-1] not in ('', '..')):

452 del segments[i-1:i+1]

453 break

454 i = i+1

455 else:

456 break

457 if segments == ['', '..']:

458 segments[-1] = ''

459 elif len(segments) >= 2 and segments[-1] == '..':

460 segments[-2:] = ['']

461 return _coerce_result(urlunparse((scheme, netloc, '/'.join(segments),

462 params, query, fragment)))

463

464def urldefrag(url):

465 """Removes any existing fragment from URL.

466

467 Returns a tuple of the defragmented URL and the fragment. If

468 the URL contained no fragments, the second element is the

469 empty string.

470 """

471 url, _coerce_result = _coerce_args(url)

472 if '#' in url:

473 s, n, p, a, q, frag = urlparse(url)

474 defrag = urlunparse((s, n, p, a, q, ''))

475 else:

476 frag = ''

477 defrag = url

478 return _coerce_result(DefragResult(defrag, frag))

479

480_hexdig = '0123456789ABCDEFabcdef'

481_hextobyte = dict(((a + b).encode(), bytes([int(a + b, 16)]))

482 for a in _hexdig for b in _hexdig)

483

484def unquote_to_bytes(string):

485 """unquote_to_bytes('abc%20def') -> b'abc def'."""

486 # Note: strings are encoded as UTF-8. This is only an issue if it contains

487 # unescaped non-ASCII characters, which URIs should not.

488 if not string:

489 # Is it a string-like object?

490 string.split

491 return bytes(b'')

492 if isinstance(string, str):

493 string = string.encode('utf-8')

494 ### For Python-Future:

495 # It is already a byte-string object, but force it to be newbytes here on

496 # Py2:

497 string = bytes(string)

498 ###

499 bits = string.split(b'%')

500 if len(bits) == 1:

501 return string

502 res = [bits[0]]

503 append = res.append

504 for item in bits[1:]:

505 try:

506 append(_hextobyte[item[:2]])

507 append(item[2:])

508 except KeyError:

509 append(b'%')

510 append(item)

511 return bytes(b'').join(res)

512

513_asciire = re.compile('([\x00-\x7f]+)')

514

515def unquote(string, encoding='utf-8', errors='replace'):

516 """Replace %xx escapes by their single-character equivalent. The optional

517 encoding and errors parameters specify how to decode percent-encoded

518 sequences into Unicode characters, as accepted by the bytes.decode()

519 method.

520 By default, percent-encoded sequences are decoded with UTF-8, and invalid

521 sequences are replaced by a placeholder character.

522

523 unquote('abc%20def') -> 'abc def'.

524 """

525 if '%' not in string:

526 string.split

527 return string

528 if encoding is None:

529 encoding = 'utf-8'

530 if errors is None:

531 errors = 'replace'

532 bits = _asciire.split(string)

533 res = [bits[0]]

534 append = res.append

535 for i in range(1, len(bits), 2):

536 append(unquote_to_bytes(bits[i]).decode(encoding, errors))

537 append(bits[i + 1])

538 return ''.join(res)

539

540def parse_qs(qs, keep_blank_values=False, strict_parsing=False,

541 encoding='utf-8', errors='replace'):

542 """Parse a query given as a string argument.

543

544 Arguments:

545

546 qs: percent-encoded query string to be parsed

547

548 keep_blank_values: flag indicating whether blank values in

549 percent-encoded queries should be treated as blank strings.

550 A true value indicates that blanks should be retained as

551 blank strings. The default false value indicates that

552 blank values are to be ignored and treated as if they were

553 not included.

554

555 strict_parsing: flag indicating what to do with parsing errors.

556 If false (the default), errors are silently ignored.

557 If true, errors raise a ValueError exception.

558

559 encoding and errors: specify how to decode percent-encoded sequences

560 into Unicode characters, as accepted by the bytes.decode() method.

561 """

562 parsed_result = {}

563 pairs = parse_qsl(qs, keep_blank_values, strict_parsing,

564 encoding=encoding, errors=errors)

565 for name, value in pairs:

566 if name in parsed_result:

567 parsed_result[name].append(value)

568 else:

569 parsed_result[name] = [value]

570 return parsed_result

571

572def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,

573 encoding='utf-8', errors='replace'):

574 """Parse a query given as a string argument.

575

576 Arguments:

577

578 qs: percent-encoded query string to be parsed

579

580 keep_blank_values: flag indicating whether blank values in

581 percent-encoded queries should be treated as blank strings. A

582 true value indicates that blanks should be retained as blank

583 strings. The default false value indicates that blank values

584 are to be ignored and treated as if they were not included.

585

586 strict_parsing: flag indicating what to do with parsing errors. If

587 false (the default), errors are silently ignored. If true,

588 errors raise a ValueError exception.

589

590 encoding and errors: specify how to decode percent-encoded sequences

591 into Unicode characters, as accepted by the bytes.decode() method.

592

593 Returns a list, as G-d intended.

594 """

595 qs, _coerce_result = _coerce_args(qs)

596 pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]

597 r = []

598 for name_value in pairs:

599 if not name_value and not strict_parsing:

600 continue

601 nv = name_value.split('=', 1)

602 if len(nv) != 2:

603 if strict_parsing:

604 raise ValueError("bad query field: %r" % (name_value,))

605 # Handle case of a control-name with no equal sign

606 if keep_blank_values:

607 nv.append('')

608 else:

609 continue

610 if len(nv[1]) or keep_blank_values:

611 name = nv[0].replace('+', ' ')

612 name = unquote(name, encoding=encoding, errors=errors)

613 name = _coerce_result(name)

614 value = nv[1].replace('+', ' ')

615 value = unquote(value, encoding=encoding, errors=errors)

616 value = _coerce_result(value)

617 r.append((name, value))

618 return r

619

620def unquote_plus(string, encoding='utf-8', errors='replace'):

621 """Like unquote(), but also replace plus signs by spaces, as required for

622 unquoting HTML form values.

623

624 unquote_plus('%7e/abc+def') -> '~/abc def'

625 """

626 string = string.replace('+', ' ')

627 return unquote(string, encoding, errors)

628

629_ALWAYS_SAFE = frozenset(bytes(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

630 b'abcdefghijklmnopqrstuvwxyz'

631 b'0123456789'

632 b'_.-'))

633_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)

634_safe_quoters = {}

635

636class Quoter(collections.defaultdict):

637 """A mapping from bytes (in range(0,256)) to strings.

638

639 String values are percent-encoded byte values, unless the key < 128, and

640 in the "safe" set (either the specified safe set, or default set).

641 """

642 # Keeps a cache internally, using defaultdict, for efficiency (lookups

643 # of cached keys don't call Python code at all).

644 def __init__(self, safe):

645 """safe: bytes object."""

646 self.safe = _ALWAYS_SAFE.union(bytes(safe))

647

648 def __repr__(self):

649 # Without this, will just display as a defaultdict

650 return "<Quoter %r>" % dict(self)

651

652 def __missing__(self, b):

653 # Handle a cache miss. Store quoted string in cache and return.

654 res = chr(b) if b in self.safe else '%{0:02X}'.format(b)

655 self[b] = res

656 return res

657

658def quote(string, safe='/', encoding=None, errors=None):

659 """quote('abc def') -> 'abc%20def'

660

661 Each part of a URL, e.g. the path info, the query, etc., has a

662 different set of reserved characters that must be quoted.

663

664 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists

665 the following reserved characters.

666

667 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |

668 "$" | ","

669

670 Each of these characters is reserved in some component of a URL,

671 but not necessarily in all of them.

672

673 By default, the quote function is intended for quoting the path

674 section of a URL. Thus, it will not encode '/'. This character

675 is reserved, but in typical usage the quote function is being

676 called on a path where the existing slash characters are used as

677 reserved characters.

678

679 string and safe may be either str or bytes objects. encoding must

680 not be specified if string is a str.

681

682 The optional encoding and errors parameters specify how to deal with

683 non-ASCII characters, as accepted by the str.encode method.

684 By default, encoding='utf-8' (characters are encoded with UTF-8), and

685 errors='strict' (unsupported characters raise a UnicodeEncodeError).

686 """

687 if isinstance(string, str):

688 if not string:

689 return string

690 if encoding is None:

691 encoding = 'utf-8'

692 if errors is None:

693 errors = 'strict'

694 string = string.encode(encoding, errors)

695 else:

696 if encoding is not None:

697 raise TypeError("quote() doesn't support 'encoding' for bytes")

698 if errors is not None:

699 raise TypeError("quote() doesn't support 'errors' for bytes")

700 return quote_from_bytes(string, safe)

701

702def quote_plus(string, safe='', encoding=None, errors=None):

703 """Like quote(), but also replace ' ' with '+', as required for quoting

704 HTML form values. Plus signs in the original string are escaped unless

705 they are included in safe. It also does not have safe default to '/'.

706 """

707 # Check if ' ' in string, where string may either be a str or bytes. If

708 # there are no spaces, the regular quote will produce the right answer.

709 if ((isinstance(string, str) and ' ' not in string) or

710 (isinstance(string, bytes) and b' ' not in string)):

711 return quote(string, safe, encoding, errors)

712 if isinstance(safe, str):

713 space = str(' ')

714 else:

715 space = bytes(b' ')

716 string = quote(string, safe + space, encoding, errors)

717 return string.replace(' ', '+')

718

719def quote_from_bytes(bs, safe='/'):

720 """Like quote(), but accepts a bytes object rather than a str, and does

721 not perform string-to-bytes encoding. It always returns an ASCII string.

722 quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f'

723 """

724 if not isinstance(bs, (bytes, bytearray)):

725 raise TypeError("quote_from_bytes() expected bytes")

726 if not bs:

727 return str('')

728 ### For Python-Future:

729 bs = bytes(bs)

730 ###

731 if isinstance(safe, str):

732 # Normalize 'safe' by converting to bytes and removing non-ASCII chars

733 safe = str(safe).encode('ascii', 'ignore')

734 else:

735 ### For Python-Future:

736 safe = bytes(safe)

737 ###

738 safe = bytes([c for c in safe if c < 128])

739 if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):

740 return bs.decode()

741 try:

742 quoter = _safe_quoters[safe]

743 except KeyError:

744 _safe_quoters[safe] = quoter = Quoter(safe).__getitem__

745 return str('').join([quoter(char) for char in bs])

746

747def urlencode(query, doseq=False, safe='', encoding=None, errors=None):

748 """Encode a sequence of two-element tuples or dictionary into a URL query string.

749

750 If any values in the query arg are sequences and doseq is true, each

751 sequence element is converted to a separate parameter.

752

753 If the query arg is a sequence of two-element tuples, the order of the

754 parameters in the output will match the order of parameters in the

755 input.

756

757 The query arg may be either a string or a bytes type. When query arg is a

758 string, the safe, encoding and error parameters are sent the quote_plus for

759 encoding.

760 """

761

762 if hasattr(query, "items"):

763 query = query.items()

764 else:

765 # It's a bother at times that strings and string-like objects are

766 # sequences.

767 try:

768 # non-sequence items should not work with len()

769 # non-empty strings will fail this

770 if len(query) and not isinstance(query[0], tuple):

771 raise TypeError

772 # Zero-length sequences of all types will get here and succeed,

773 # but that's a minor nit. Since the original implementation

774 # allowed empty dicts that type of behavior probably should be

775 # preserved for consistency

776 except TypeError:

777 ty, va, tb = sys.exc_info()

778 raise_with_traceback(TypeError("not a valid non-string sequence "

779 "or mapping object"), tb)

780

781 l = []

782 if not doseq:

783 for k, v in query:

784 if isinstance(k, bytes):

785 k = quote_plus(k, safe)

786 else:

787 k = quote_plus(str(k), safe, encoding, errors)

788

789 if isinstance(v, bytes):

790 v = quote_plus(v, safe)

791 else:

792 v = quote_plus(str(v), safe, encoding, errors)

793 l.append(k + '=' + v)

794 else:

795 for k, v in query:

796 if isinstance(k, bytes):

797 k = quote_plus(k, safe)

798 else:

799 k = quote_plus(str(k), safe, encoding, errors)

800

801 if isinstance(v, bytes):

802 v = quote_plus(v, safe)

803 l.append(k + '=' + v)

804 elif isinstance(v, str):

805 v = quote_plus(v, safe, encoding, errors)

806 l.append(k + '=' + v)

807 else:

808 try:

809 # Is this a sufficient test for sequence-ness?

810 x = len(v)

811 except TypeError:

812 # not a sequence

813 v = quote_plus(str(v), safe, encoding, errors)

814 l.append(k + '=' + v)

815 else:

816 # loop over the sequence

817 for elt in v:

818 if isinstance(elt, bytes):

819 elt = quote_plus(elt, safe)

820 else:

821 elt = quote_plus(str(elt), safe, encoding, errors)

822 l.append(k + '=' + elt)

823 return str('&').join(l)

824

825# Utilities to parse URLs (most of these return None for missing parts):

826# unwrap('<URL:type://host/path>') --> 'type://host/path'

827# splittype('type:opaquestring') --> 'type', 'opaquestring'

828# splithost('//host[:port]/path') --> 'host[:port]', '/path'

829# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'

830# splitpasswd('user:passwd') -> 'user', 'passwd'

831# splitport('host:port') --> 'host', 'port'

832# splitquery('/path?query') --> '/path', 'query'

833# splittag('/path#tag') --> '/path', 'tag'

834# splitattr('/path;attr1=value1;attr2=value2;...') ->

835# '/path', ['attr1=value1', 'attr2=value2', ...]

836# splitvalue('attr=value') --> 'attr', 'value'

837# urllib.parse.unquote('abc%20def') -> 'abc def'

838# quote('abc def') -> 'abc%20def')

839

840def to_bytes(url):

841 """to_bytes(u"URL") --> 'URL'."""

842 # Most URL schemes require ASCII. If that changes, the conversion

843 # can be relaxed.

844 # XXX get rid of to_bytes()

845 if isinstance(url, str):

846 try:

847 url = url.encode("ASCII").decode()

848 except UnicodeError:

849 raise UnicodeError("URL " + repr(url) +

850 " contains non-ASCII characters")

851 return url

852

853def unwrap(url):

854 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""

855 url = str(url).strip()

856 if url[:1] == '<' and url[-1:] == '>':

857 url = url[1:-1].strip()

858 if url[:4] == 'URL:': url = url[4:].strip()

859 return url

860

861_typeprog = None

862def splittype(url):

863 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""

864 global _typeprog

865 if _typeprog is None:

866 import re

867 _typeprog = re.compile('^([^/:]+):')

868

869 match = _typeprog.match(url)

870 if match:

871 scheme = match.group(1)

872 return scheme.lower(), url[len(scheme) + 1:]

873 return None, url

874

875_hostprog = None

876def splithost(url):

877 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""

878 global _hostprog

879 if _hostprog is None:

880 import re

881 _hostprog = re.compile('^//([^/?]*)(.*)$')

882

883 match = _hostprog.match(url)

884 if match:

885 host_port = match.group(1)

886 path = match.group(2)

887 if path and not path.startswith('/'):

888 path = '/' + path

889 return host_port, path

890 return None, url

891

892_userprog = None

893def splituser(host):

894 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""

895 global _userprog

896 if _userprog is None:

897 import re

898 _userprog = re.compile('^(.*)@(.*)$')

899

900 match = _userprog.match(host)

901 if match: return match.group(1, 2)

902 return None, host

903

904_passwdprog = None

905def splitpasswd(user):

906 """splitpasswd('user:passwd') -> 'user', 'passwd'."""

907 global _passwdprog

908 if _passwdprog is None:

909 import re

910 _passwdprog = re.compile('^([^:]*):(.*)$',re.S)

911

912 match = _passwdprog.match(user)

913 if match: return match.group(1, 2)

914 return user, None

915

916# splittag('/path#tag') --> '/path', 'tag'

917_portprog = None

918def splitport(host):

919 """splitport('host:port') --> 'host', 'port'."""

920 global _portprog

921 if _portprog is None:

922 import re

923 _portprog = re.compile('^(.*):([0-9]+)$')

924

925 match = _portprog.match(host)

926 if match: return match.group(1, 2)

927 return host, None

928

929_nportprog = None

930def splitnport(host, defport=-1):

931 """Split host and port, returning numeric port.

932 Return given default port if no ':' found; defaults to -1.

933 Return numerical port if a valid number are found after ':'.

934 Return None if ':' but not a valid number."""

935 global _nportprog

936 if _nportprog is None:

937 import re

938 _nportprog = re.compile('^(.*):(.*)$')

939

940 match = _nportprog.match(host)

941 if match:

942 host, port = match.group(1, 2)

943 try:

944 if not port: raise ValueError("no digits")

945 nport = int(port)

946 except ValueError:

947 nport = None

948 return host, nport

949 return host, defport

950

951_queryprog = None

952def splitquery(url):

953 """splitquery('/path?query') --> '/path', 'query'."""

954 global _queryprog

955 if _queryprog is None:

956 import re

957 _queryprog = re.compile('^(.*)\?([^?]*)$')

958

959 match = _queryprog.match(url)

960 if match: return match.group(1, 2)

961 return url, None

962

963_tagprog = None

964def splittag(url):

965 """splittag('/path#tag') --> '/path', 'tag'."""

966 global _tagprog

967 if _tagprog is None:

968 import re

969 _tagprog = re.compile('^(.*)#([^#]*)$')

970

971 match = _tagprog.match(url)

972 if match: return match.group(1, 2)

973 return url, None

974

975def splitattr(url):

976 """splitattr('/path;attr1=value1;attr2=value2;...') ->

977 '/path', ['attr1=value1', 'attr2=value2', ...]."""

978 words = url.split(';')

979 return words[0], words[1:]

980

981_valueprog = None

982def splitvalue(attr):

983 """splitvalue('attr=value') --> 'attr', 'value'."""

984 global _valueprog

985 if _valueprog is None:

986 import re

987 _valueprog = re.compile('^([^=]*)=(.*)$')

988

989 match = _valueprog.match(attr)

990 if match: return match.group(1, 2)

991 return attr, None