Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/bleach/

1"""Parse (absolute and relative) URLs.

3urlparse module is based upon the following RFC specifications.

5RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding

6and L. Masinter, January 2005.

8RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter

9and L.Masinter, December 1999.

11RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.

12Berners-Lee, R. Fielding, and L. Masinter, August 1998.

14RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998.

16RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June

171995.

19RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.

20McCahill, December 1994

22RFC 3986 is considered the current standard and any future changes to

23urlparse module should conform with it. The urlparse module is

24currently not entirely compliant with this RFC due to defacto

25scenarios for parsing, and for backward compatibility purposes, some

26parsing quirks from older RFCs are retained. The testcases in

27test_urlparse.py provides a good indicator of parsing behavior.

28"""

30import re

31import sys

32import collections

34__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",

35 "urlsplit", "urlunsplit", "urlencode", "parse_qs",

36 "parse_qsl", "quote", "quote_plus", "quote_from_bytes",

37 "unquote", "unquote_plus", "unquote_to_bytes",

38 "DefragResult", "ParseResult", "SplitResult",

39 "DefragResultBytes", "ParseResultBytes", "SplitResultBytes"]

41# A classification of schemes.

42# The empty string classifies URLs with no scheme specified,

43# being the default value returned by “urlsplit” and “urlparse”.

45uses_relative = ['', 'ftp', 'http', 'gopher', 'nntp', 'imap',

46 'wais', 'file', 'https', 'shttp', 'mms',

47 'prospero', 'rtsp', 'rtspu', 'sftp',

48 'svn', 'svn+ssh', 'ws', 'wss']

50uses_netloc = ['', 'ftp', 'http', 'gopher', 'nntp', 'telnet',

51 'imap', 'wais', 'file', 'mms', 'https', 'shttp',

52 'snews', 'prospero', 'rtsp', 'rtspu', 'rsync',

53 'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh',

54 'ws', 'wss']

56uses_params = ['', 'ftp', 'hdl', 'prospero', 'http', 'imap',

57 'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',

58 'mms', 'sftp', 'tel']

60# These are not actually used anymore, but should stay for backwards

61# compatibility. (They are undocumented, but have a public-looking name.)

63non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',

64 'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']

66uses_query = ['', 'http', 'wais', 'imap', 'https', 'shttp', 'mms',

67 'gopher', 'rtsp', 'rtspu', 'sip', 'sips']

69uses_fragment = ['', 'ftp', 'hdl', 'http', 'gopher', 'news',

70 'nntp', 'wais', 'https', 'shttp', 'snews',

71 'file', 'prospero']

73# Characters valid in scheme names

74scheme_chars = ('abcdefghijklmnopqrstuvwxyz'

75 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

76 '0123456789'

77 '+-.')

79# Unsafe bytes to be removed per WHATWG spec

80_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']

82# XXX: Consider replacing with functools.lru_cache

83MAX_CACHE_SIZE = 20

84_parse_cache = {}

86def clear_cache():

87 """Clear the parse cache and the quoters cache."""

88 _parse_cache.clear()

89 _safe_quoters.clear()

92# Helpers for bytes handling

93# For 3.2, we deliberately require applications that

94# handle improperly quoted URLs to do their own

95# decoding and encoding. If valid use cases are

96# presented, we may relax this by using latin-1

97# decoding internally for 3.3

98_implicit_encoding = 'ascii'

99_implicit_errors = 'strict'

100

101def _noop(obj):

102 return obj

103

104def _encode_result(obj, encoding=_implicit_encoding,

105 errors=_implicit_errors):

106 return obj.encode(encoding, errors)

107

108def _decode_args(args, encoding=_implicit_encoding,

109 errors=_implicit_errors):

110 return tuple(x.decode(encoding, errors) if x else '' for x in args)

111

112def _coerce_args(*args):

113 # Invokes decode if necessary to create str args

114 # and returns the coerced inputs along with

115 # an appropriate result coercion function

116 # - noop for str inputs

117 # - encoding function otherwise

118 str_input = isinstance(args[0], str)

119 for arg in args[1:]:

120 # We special-case the empty string to support the

121 # "scheme=''" default argument to some functions

122 if arg and isinstance(arg, str) != str_input:

123 raise TypeError("Cannot mix str and non-str arguments")

124 if str_input:

125 return args + (_noop,)

126 return _decode_args(args) + (_encode_result,)

127

128# Result objects are more helpful than simple tuples

129class _ResultMixinStr(object):

130 """Standard approach to encoding parsed results from str to bytes"""

131 __slots__ = ()

132

133 def encode(self, encoding='ascii', errors='strict'):

134 return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self))

135

136

137class _ResultMixinBytes(object):

138 """Standard approach to decoding parsed results from bytes to str"""

139 __slots__ = ()

140

141 def decode(self, encoding='ascii', errors='strict'):

142 return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))

143

144

145class _NetlocResultMixinBase(object):

146 """Shared methods for the parsed result objects containing a netloc element"""

147 __slots__ = ()

148

149 @property

150 def username(self):

151 return self._userinfo[0]

152

153 @property

154 def password(self):

155 return self._userinfo[1]

156

157 @property

158 def hostname(self):

159 hostname = self._hostinfo[0]

160 if not hostname:

161 return None

162 # Scoped IPv6 address may have zone info, which must not be lowercased

163 # like http://[fe80::822a:a8ff:fe49:470c%tESt]:1234/keys

164 separator = '%' if isinstance(hostname, str) else b'%'

165 hostname, percent, zone = hostname.partition(separator)

166 return hostname.lower() + percent + zone

167

168 @property

169 def port(self):

170 port = self._hostinfo[1]

171 if port is not None:

172 port = int(port, 10)

173 if not ( 0 <= port <= 65535):

174 raise ValueError("Port out of range 0-65535")

175 return port

176

177

178class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):

179 __slots__ = ()

180

181 @property

182 def _userinfo(self):

183 netloc = self.netloc

184 userinfo, have_info, hostinfo = netloc.rpartition('@')

185 if have_info:

186 username, have_password, password = userinfo.partition(':')

187 if not have_password:

188 password = None

189 else:

190 username = password = None

191 return username, password

192

193 @property

194 def _hostinfo(self):

195 netloc = self.netloc

196 _, _, hostinfo = netloc.rpartition('@')

197 _, have_open_br, bracketed = hostinfo.partition('[')

198 if have_open_br:

199 hostname, _, port = bracketed.partition(']')

200 _, _, port = port.partition(':')

201 else:

202 hostname, _, port = hostinfo.partition(':')

203 if not port:

204 port = None

205 return hostname, port

206

207

208class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):

209 __slots__ = ()

210

211 @property

212 def _userinfo(self):

213 netloc = self.netloc

214 userinfo, have_info, hostinfo = netloc.rpartition(b'@')

215 if have_info:

216 username, have_password, password = userinfo.partition(b':')

217 if not have_password:

218 password = None

219 else:

220 username = password = None

221 return username, password

222

223 @property

224 def _hostinfo(self):

225 netloc = self.netloc

226 _, _, hostinfo = netloc.rpartition(b'@')

227 _, have_open_br, bracketed = hostinfo.partition(b'[')

228 if have_open_br:

229 hostname, _, port = bracketed.partition(b']')

230 _, _, port = port.partition(b':')

231 else:

232 hostname, _, port = hostinfo.partition(b':')

233 if not port:

234 port = None

235 return hostname, port

236

237

238from collections import namedtuple

239

240_DefragResultBase = namedtuple('DefragResult', 'url fragment')

241_SplitResultBase = namedtuple(

242 'SplitResult', 'scheme netloc path query fragment')

243_ParseResultBase = namedtuple(

244 'ParseResult', 'scheme netloc path params query fragment')

245

246_DefragResultBase.__doc__ = """

247DefragResult(url, fragment)

248

249A 2-tuple that contains the url without fragment identifier and the fragment

250identifier as a separate argument.

251"""

252

253_DefragResultBase.url.__doc__ = """The URL with no fragment identifier."""

254

255_DefragResultBase.fragment.__doc__ = """

256Fragment identifier separated from URL, that allows indirect identification of a

257secondary resource by reference to a primary resource and additional identifying

258information.

259"""

260

261_SplitResultBase.__doc__ = """

262SplitResult(scheme, netloc, path, query, fragment)

263

264A 5-tuple that contains the different components of a URL. Similar to

265ParseResult, but does not split params.

266"""

267

268_SplitResultBase.scheme.__doc__ = """Specifies URL scheme for the request."""

269

270_SplitResultBase.netloc.__doc__ = """

271Network location where the request is made to.

272"""

273

274_SplitResultBase.path.__doc__ = """

275The hierarchical path, such as the path to a file to download.

276"""

277

278_SplitResultBase.query.__doc__ = """

279The query component, that contains non-hierarchical data, that along with data

280in path component, identifies a resource in the scope of URI's scheme and

281network location.

282"""

283

284_SplitResultBase.fragment.__doc__ = """

285Fragment identifier, that allows indirect identification of a secondary resource

286by reference to a primary resource and additional identifying information.

287"""

288

289_ParseResultBase.__doc__ = """

290ParseResult(scheme, netloc, path, params, query, fragment)

291

292A 6-tuple that contains components of a parsed URL.

293"""

294

295_ParseResultBase.scheme.__doc__ = _SplitResultBase.scheme.__doc__

296_ParseResultBase.netloc.__doc__ = _SplitResultBase.netloc.__doc__

297_ParseResultBase.path.__doc__ = _SplitResultBase.path.__doc__

298_ParseResultBase.params.__doc__ = """

299Parameters for last path element used to dereference the URI in order to provide

300access to perform some operation on the resource.

301"""

302

303_ParseResultBase.query.__doc__ = _SplitResultBase.query.__doc__

304_ParseResultBase.fragment.__doc__ = _SplitResultBase.fragment.__doc__

305

306

307# For backwards compatibility, alias _NetlocResultMixinStr

308# ResultBase is no longer part of the documented API, but it is

309# retained since deprecating it isn't worth the hassle

310ResultBase = _NetlocResultMixinStr

311

312# Structured result objects for string data

313class DefragResult(_DefragResultBase, _ResultMixinStr):

314 __slots__ = ()

315 def geturl(self):

316 if self.fragment:

317 return self.url + '#' + self.fragment

318 else:

319 return self.url

320

321class SplitResult(_SplitResultBase, _NetlocResultMixinStr):

322 __slots__ = ()

323 def geturl(self):

324 return urlunsplit(self)

325

326class ParseResult(_ParseResultBase, _NetlocResultMixinStr):

327 __slots__ = ()

328 def geturl(self):

329 return urlunparse(self)

330

331# Structured result objects for bytes data

332class DefragResultBytes(_DefragResultBase, _ResultMixinBytes):

333 __slots__ = ()

334 def geturl(self):

335 if self.fragment:

336 return self.url + b'#' + self.fragment

337 else:

338 return self.url

339

340class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes):

341 __slots__ = ()

342 def geturl(self):

343 return urlunsplit(self)

344

345class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes):

346 __slots__ = ()

347 def geturl(self):

348 return urlunparse(self)

349

350# Set up the encode/decode result pairs

351def _fix_result_transcoding():

352 _result_pairs = (

353 (DefragResult, DefragResultBytes),

354 (SplitResult, SplitResultBytes),

355 (ParseResult, ParseResultBytes),

356 )

357 for _decoded, _encoded in _result_pairs:

358 _decoded._encoded_counterpart = _encoded

359 _encoded._decoded_counterpart = _decoded

360

361_fix_result_transcoding()

362del _fix_result_transcoding

363

364def urlparse(url, scheme='', allow_fragments=True):

365 """Parse a URL into 6 components:

366 <scheme>://<netloc>/<path>;<params>?<query>#<fragment>

367 Return a 6-tuple: (scheme, netloc, path, params, query, fragment).

368 Note that we don't break the components up in smaller bits

369 (e.g. netloc is a single string) and we don't expand % escapes."""

370 url, scheme, _coerce_result = _coerce_args(url, scheme)

371 splitresult = urlsplit(url, scheme, allow_fragments)

372 scheme, netloc, url, query, fragment = splitresult

373 if scheme in uses_params and ';' in url:

374 url, params = _splitparams(url)

375 else:

376 params = ''

377 result = ParseResult(scheme, netloc, url, params, query, fragment)

378 return _coerce_result(result)

379

380def _splitparams(url):

381 if '/' in url:

382 i = url.find(';', url.rfind('/'))

383 if i < 0:

384 return url, ''

385 else:

386 i = url.find(';')

387 return url[:i], url[i+1:]

388

389def _splitnetloc(url, start=0):

390 delim = len(url) # position of end of domain part of url, default is end

391 for c in '/?#': # look for delimiters; the order is NOT important

392 wdelim = url.find(c, start) # find first of this delim

393 if wdelim >= 0: # if found

394 delim = min(delim, wdelim) # use earliest delim position

395 return url[start:delim], url[delim:] # return (domain, rest)

396

397def _checknetloc(netloc):

398 if not netloc or not any(ord(c) > 127 for c in netloc):

399 return

400 # looking for characters like \u2100 that expand to 'a/c'

401 # IDNA uses NFKC equivalence, so normalize for this check

402 import unicodedata

403 n = netloc.replace('@', '') # ignore characters already included

404 n = n.replace(':', '') # but not the surrounding text

405 n = n.replace('#', '')

406 n = n.replace('?', '')

407 netloc2 = unicodedata.normalize('NFKC', n)

408 if n == netloc2:

409 return

410 for c in '/?#@:':

411 if c in netloc2:

412 raise ValueError("netloc '" + netloc + "' contains invalid " +

413 "characters under NFKC normalization")

414

415def _remove_unsafe_bytes_from_url(url):

416 for b in _UNSAFE_URL_BYTES_TO_REMOVE:

417 url = url.replace(b, "")

418 return url

419

420def urlsplit(url, scheme='', allow_fragments=True):

421 """Parse a URL into 5 components:

422 <scheme>://<netloc>/<path>?<query>#<fragment>

423 Return a 5-tuple: (scheme, netloc, path, query, fragment).

424 Note that we don't break the components up in smaller bits

425 (e.g. netloc is a single string) and we don't expand % escapes."""

426 url, scheme, _coerce_result = _coerce_args(url, scheme)

427 url = _remove_unsafe_bytes_from_url(url)

428 scheme = _remove_unsafe_bytes_from_url(scheme)

429 allow_fragments = bool(allow_fragments)

430 key = url, scheme, allow_fragments, type(url), type(scheme)

431 cached = _parse_cache.get(key, None)

432 if cached:

433 return _coerce_result(cached)

434 if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth

435 clear_cache()

436 netloc = query = fragment = ''

437 i = url.find(':')

438 if i > 0:

439 if url[:i] == 'http': # optimize the common case

440 scheme = url[:i].lower()

441 url = url[i+1:]

442 if url[:2] == '//':

443 netloc, url = _splitnetloc(url, 2)

444 if (('[' in netloc and ']' not in netloc) or

445 (']' in netloc and '[' not in netloc)):

446 raise ValueError("Invalid IPv6 URL")

447 if allow_fragments and '#' in url:

448 url, fragment = url.split('#', 1)

449 if '?' in url:

450 url, query = url.split('?', 1)

451 _checknetloc(netloc)

452 v = SplitResult(scheme, netloc, url, query, fragment)

453 _parse_cache[key] = v

454 return _coerce_result(v)

455 for c in url[:i]:

456 if c not in scheme_chars:

457 break

458 else:

459 # make sure "url" is not actually a port number (in which case

460 # "scheme" is really part of the path)

461 rest = url[i+1:]

462 if not rest or any(c not in '0123456789' for c in rest):

463 # not a port number

464 scheme, url = url[:i].lower(), rest

465

466 if url[:2] == '//':

467 netloc, url = _splitnetloc(url, 2)

468 if (('[' in netloc and ']' not in netloc) or

469 (']' in netloc and '[' not in netloc)):

470 raise ValueError("Invalid IPv6 URL")

471 if allow_fragments and '#' in url:

472 url, fragment = url.split('#', 1)

473 if '?' in url:

474 url, query = url.split('?', 1)

475 _checknetloc(netloc)

476 v = SplitResult(scheme, netloc, url, query, fragment)

477 _parse_cache[key] = v

478 return _coerce_result(v)

479

480def urlunparse(components):

481 """Put a parsed URL back together again. This may result in a

482 slightly different, but equivalent URL, if the URL that was parsed

483 originally had redundant delimiters, e.g. a ? with an empty query

484 (the draft states that these are equivalent)."""

485 scheme, netloc, url, params, query, fragment, _coerce_result = (

486 _coerce_args(*components))

487 if params:

488 url = "%s;%s" % (url, params)

489 return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment)))

490

491def urlunsplit(components):

492 """Combine the elements of a tuple as returned by urlsplit() into a

493 complete URL as a string. The data argument can be any five-item iterable.

494 This may result in a slightly different, but equivalent URL, if the URL that

495 was parsed originally had unnecessary delimiters (for example, a ? with an

496 empty query; the RFC states that these are equivalent)."""

497 scheme, netloc, url, query, fragment, _coerce_result = (

498 _coerce_args(*components))

499 if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):

500 if url and url[:1] != '/': url = '/' + url

501 url = '//' + (netloc or '') + url

502 if scheme:

503 url = scheme + ':' + url

504 if query:

505 url = url + '?' + query

506 if fragment:

507 url = url + '#' + fragment

508 return _coerce_result(url)

509

510def urljoin(base, url, allow_fragments=True):

511 """Join a base URL and a possibly relative URL to form an absolute

512 interpretation of the latter."""

513 if not base:

514 return url

515 if not url:

516 return base

517

518 base, url, _coerce_result = _coerce_args(base, url)

519 bscheme, bnetloc, bpath, bparams, bquery, bfragment = \

520 urlparse(base, '', allow_fragments)

521 scheme, netloc, path, params, query, fragment = \

522 urlparse(url, bscheme, allow_fragments)

523

524 if scheme != bscheme or scheme not in uses_relative:

525 return _coerce_result(url)

526 if scheme in uses_netloc:

527 if netloc:

528 return _coerce_result(urlunparse((scheme, netloc, path,

529 params, query, fragment)))

530 netloc = bnetloc

531

532 if not path and not params:

533 path = bpath

534 params = bparams

535 if not query:

536 query = bquery

537 return _coerce_result(urlunparse((scheme, netloc, path,

538 params, query, fragment)))

539

540 base_parts = bpath.split('/')

541 if base_parts[-1] != '':

542 # the last item is not a directory, so will not be taken into account

543 # in resolving the relative path

544 del base_parts[-1]

545

546 # for rfc3986, ignore all base path should the first character be root.

547 if path[:1] == '/':

548 segments = path.split('/')

549 else:

550 segments = base_parts + path.split('/')

551 # filter out elements that would cause redundant slashes on re-joining

552 # the resolved_path

553 segments[1:-1] = filter(None, segments[1:-1])

554

555 resolved_path = []

556

557 for seg in segments:

558 if seg == '..':

559 try:

560 resolved_path.pop()

561 except IndexError:

562 # ignore any .. segments that would otherwise cause an IndexError

563 # when popped from resolved_path if resolving for rfc3986

564 pass

565 elif seg == '.':

566 continue

567 else:

568 resolved_path.append(seg)

569

570 if segments[-1] in ('.', '..'):

571 # do some post-processing here. if the last segment was a relative dir,

572 # then we need to append the trailing '/'

573 resolved_path.append('')

574

575 return _coerce_result(urlunparse((scheme, netloc, '/'.join(

576 resolved_path) or '/', params, query, fragment)))

577

578

579def urldefrag(url):

580 """Removes any existing fragment from URL.

581

582 Returns a tuple of the defragmented URL and the fragment. If

583 the URL contained no fragments, the second element is the

584 empty string.

585 """

586 url, _coerce_result = _coerce_args(url)

587 if '#' in url:

588 s, n, p, a, q, frag = urlparse(url)

589 defrag = urlunparse((s, n, p, a, q, ''))

590 else:

591 frag = ''

592 defrag = url

593 return _coerce_result(DefragResult(defrag, frag))

594

595_hexdig = '0123456789ABCDEFabcdef'

596_hextobyte = None

597

598def unquote_to_bytes(string):

599 """unquote_to_bytes('abc%20def') -> b'abc def'."""

600 # Note: strings are encoded as UTF-8. This is only an issue if it contains

601 # unescaped non-ASCII characters, which URIs should not.

602 if not string:

603 # Is it a string-like object?

604 string.split

605 return b''

606 if isinstance(string, str):

607 string = string.encode('utf-8')

608 bits = string.split(b'%')

609 if len(bits) == 1:

610 return string

611 res = [bits[0]]

612 append = res.append

613 # Delay the initialization of the table to not waste memory

614 # if the function is never called

615 global _hextobyte

616 if _hextobyte is None:

617 _hextobyte = {(a + b).encode(): bytes([int(a + b, 16)])

618 for a in _hexdig for b in _hexdig}

619 for item in bits[1:]:

620 try:

621 append(_hextobyte[item[:2]])

622 append(item[2:])

623 except KeyError:

624 append(b'%')

625 append(item)

626 return b''.join(res)

627

628_asciire = re.compile('([\x00-\x7f]+)')

629

630def unquote(string, encoding='utf-8', errors='replace'):

631 """Replace %xx escapes by their single-character equivalent. The optional

632 encoding and errors parameters specify how to decode percent-encoded

633 sequences into Unicode characters, as accepted by the bytes.decode()

634 method.

635 By default, percent-encoded sequences are decoded with UTF-8, and invalid

636 sequences are replaced by a placeholder character.

637

638 unquote('abc%20def') -> 'abc def'.

639 """

640 if '%' not in string:

641 string.split

642 return string

643 if encoding is None:

644 encoding = 'utf-8'

645 if errors is None:

646 errors = 'replace'

647 bits = _asciire.split(string)

648 res = [bits[0]]

649 append = res.append

650 for i in range(1, len(bits), 2):

651 append(unquote_to_bytes(bits[i]).decode(encoding, errors))

652 append(bits[i + 1])

653 return ''.join(res)

654

655

656def parse_qs(qs, keep_blank_values=False, strict_parsing=False,

657 encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):

658 """Parse a query given as a string argument.

659

660 Arguments:

661

662 qs: percent-encoded query string to be parsed

663

664 keep_blank_values: flag indicating whether blank values in

665 percent-encoded queries should be treated as blank strings.

666 A true value indicates that blanks should be retained as

667 blank strings. The default false value indicates that

668 blank values are to be ignored and treated as if they were

669 not included.

670

671 strict_parsing: flag indicating what to do with parsing errors.

672 If false (the default), errors are silently ignored.

673 If true, errors raise a ValueError exception.

674

675 encoding and errors: specify how to decode percent-encoded sequences

676 into Unicode characters, as accepted by the bytes.decode() method.

677

678 max_num_fields: int. If set, then throws a ValueError if there

679 are more than n fields read by parse_qsl().

680

681 separator: str. The symbol to use for separating the query arguments.

682 Defaults to &.

683

684 Returns a dictionary.

685 """

686 parsed_result = {}

687 pairs = parse_qsl(qs, keep_blank_values, strict_parsing,

688 encoding=encoding, errors=errors,

689 max_num_fields=max_num_fields, separator=separator)

690 for name, value in pairs:

691 if name in parsed_result:

692 parsed_result[name].append(value)

693 else:

694 parsed_result[name] = [value]

695 return parsed_result

696

697

698def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,

699 encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):

700 """Parse a query given as a string argument.

701

702 Arguments:

703

704 qs: percent-encoded query string to be parsed

705

706 keep_blank_values: flag indicating whether blank values in

707 percent-encoded queries should be treated as blank strings.

708 A true value indicates that blanks should be retained as blank

709 strings. The default false value indicates that blank values

710 are to be ignored and treated as if they were not included.

711

712 strict_parsing: flag indicating what to do with parsing errors. If

713 false (the default), errors are silently ignored. If true,

714 errors raise a ValueError exception.

715

716 encoding and errors: specify how to decode percent-encoded sequences

717 into Unicode characters, as accepted by the bytes.decode() method.

718

719 max_num_fields: int. If set, then throws a ValueError

720 if there are more than n fields read by parse_qsl().

721

722 separator: str. The symbol to use for separating the query arguments.

723 Defaults to &.

724

725 Returns a list, as G-d intended.

726 """

727 qs, _coerce_result = _coerce_args(qs)

728

729 if not separator or (not isinstance(separator, (str, bytes))):

730 raise ValueError("Separator must be of type string or bytes.")

731

732 # If max_num_fields is defined then check that the number of fields

733 # is less than max_num_fields. This prevents a memory exhaustion DOS

734 # attack via post bodies with many fields.

735 if max_num_fields is not None:

736 num_fields = 1 + qs.count(separator)

737 if max_num_fields < num_fields:

738 raise ValueError('Max number of fields exceeded')

739

740 pairs = [s1 for s1 in qs.split(separator)]

741 r = []

742 for name_value in pairs:

743 if not name_value and not strict_parsing:

744 continue

745 nv = name_value.split('=', 1)

746 if len(nv) != 2:

747 if strict_parsing:

748 raise ValueError("bad query field: %r" % (name_value,))

749 # Handle case of a control-name with no equal sign

750 if keep_blank_values:

751 nv.append('')

752 else:

753 continue

754 if len(nv[1]) or keep_blank_values:

755 name = nv[0].replace('+', ' ')

756 name = unquote(name, encoding=encoding, errors=errors)

757 name = _coerce_result(name)

758 value = nv[1].replace('+', ' ')

759 value = unquote(value, encoding=encoding, errors=errors)

760 value = _coerce_result(value)

761 r.append((name, value))

762 return r

763

764def unquote_plus(string, encoding='utf-8', errors='replace'):

765 """Like unquote(), but also replace plus signs by spaces, as required for

766 unquoting HTML form values.

767

768 unquote_plus('%7e/abc+def') -> '~/abc def'

769 """

770 string = string.replace('+', ' ')

771 return unquote(string, encoding, errors)

772

773_ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

774 b'abcdefghijklmnopqrstuvwxyz'

775 b'0123456789'

776 b'_.-')

777_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)

778_safe_quoters = {}

779

780class Quoter(collections.defaultdict):

781 """A mapping from bytes (in range(0,256)) to strings.

782

783 String values are percent-encoded byte values, unless the key < 128, and

784 in the "safe" set (either the specified safe set, or default set).

785 """

786 # Keeps a cache internally, using defaultdict, for efficiency (lookups

787 # of cached keys don't call Python code at all).

788 def __init__(self, safe):

789 """safe: bytes object."""

790 self.safe = _ALWAYS_SAFE.union(safe)

791

792 def __repr__(self):

793 # Without this, will just display as a defaultdict

794 return "<%s %r>" % (self.__class__.__name__, dict(self))

795

796 def __missing__(self, b):

797 # Handle a cache miss. Store quoted string in cache and return.

798 res = chr(b) if b in self.safe else '%{:02X}'.format(b)

799 self[b] = res

800 return res

801

802def quote(string, safe='/', encoding=None, errors=None):

803 """quote('abc def') -> 'abc%20def'

804

805 Each part of a URL, e.g. the path info, the query, etc., has a

806 different set of reserved characters that must be quoted.

807

808 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists

809 the following reserved characters.

810

811 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |

812 "$" | ","

813

814 Each of these characters is reserved in some component of a URL,

815 but not necessarily in all of them.

816

817 By default, the quote function is intended for quoting the path

818 section of a URL. Thus, it will not encode '/'. This character

819 is reserved, but in typical usage the quote function is being

820 called on a path where the existing slash characters are used as

821 reserved characters.

822

823 string and safe may be either str or bytes objects. encoding and errors

824 must not be specified if string is a bytes object.

825

826 The optional encoding and errors parameters specify how to deal with

827 non-ASCII characters, as accepted by the str.encode method.

828 By default, encoding='utf-8' (characters are encoded with UTF-8), and

829 errors='strict' (unsupported characters raise a UnicodeEncodeError).

830 """

831 if isinstance(string, str):

832 if not string:

833 return string

834 if encoding is None:

835 encoding = 'utf-8'

836 if errors is None:

837 errors = 'strict'

838 string = string.encode(encoding, errors)

839 else:

840 if encoding is not None:

841 raise TypeError("quote() doesn't support 'encoding' for bytes")

842 if errors is not None:

843 raise TypeError("quote() doesn't support 'errors' for bytes")

844 return quote_from_bytes(string, safe)

845

846def quote_plus(string, safe='', encoding=None, errors=None):

847 """Like quote(), but also replace ' ' with '+', as required for quoting

848 HTML form values. Plus signs in the original string are escaped unless

849 they are included in safe. It also does not have safe default to '/'.

850 """

851 # Check if ' ' in string, where string may either be a str or bytes. If

852 # there are no spaces, the regular quote will produce the right answer.

853 if ((isinstance(string, str) and ' ' not in string) or

854 (isinstance(string, bytes) and b' ' not in string)):

855 return quote(string, safe, encoding, errors)

856 if isinstance(safe, str):

857 space = ' '

858 else:

859 space = b' '

860 string = quote(string, safe + space, encoding, errors)

861 return string.replace(' ', '+')

862

863def quote_from_bytes(bs, safe='/'):

864 """Like quote(), but accepts a bytes object rather than a str, and does

865 not perform string-to-bytes encoding. It always returns an ASCII string.

866 quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f'

867 """

868 if not isinstance(bs, (bytes, bytearray)):

869 raise TypeError("quote_from_bytes() expected bytes")

870 if not bs:

871 return ''

872 if isinstance(safe, str):

873 # Normalize 'safe' by converting to bytes and removing non-ASCII chars

874 safe = safe.encode('ascii', 'ignore')

875 else:

876 safe = bytes([c for c in safe if c < 128])

877 if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):

878 return bs.decode()

879 try:

880 quoter = _safe_quoters[safe]

881 except KeyError:

882 _safe_quoters[safe] = quoter = Quoter(safe).__getitem__

883 return ''.join([quoter(char) for char in bs])

884

885def urlencode(query, doseq=False, safe='', encoding=None, errors=None,

886 quote_via=quote_plus):

887 """Encode a dict or sequence of two-element tuples into a URL query string.

888

889 If any values in the query arg are sequences and doseq is true, each

890 sequence element is converted to a separate parameter.

891

892 If the query arg is a sequence of two-element tuples, the order of the

893 parameters in the output will match the order of parameters in the

894 input.

895

896 The components of a query arg may each be either a string or a bytes type.

897

898 The safe, encoding, and errors parameters are passed down to the function

899 specified by quote_via (encoding and errors only if a component is a str).

900 """

901

902 if hasattr(query, "items"):

903 query = query.items()

904 else:

905 # It's a bother at times that strings and string-like objects are

906 # sequences.

907 try:

908 # non-sequence items should not work with len()

909 # non-empty strings will fail this

910 if len(query) and not isinstance(query[0], tuple):

911 raise TypeError

912 # Zero-length sequences of all types will get here and succeed,

913 # but that's a minor nit. Since the original implementation

914 # allowed empty dicts that type of behavior probably should be

915 # preserved for consistency

916 except TypeError:

917 ty, va, tb = sys.exc_info()

918 raise TypeError("not a valid non-string sequence "

919 "or mapping object").with_traceback(tb)

920

921 l = []

922 if not doseq:

923 for k, v in query:

924 if isinstance(k, bytes):

925 k = quote_via(k, safe)

926 else:

927 k = quote_via(str(k), safe, encoding, errors)

928

929 if isinstance(v, bytes):

930 v = quote_via(v, safe)

931 else:

932 v = quote_via(str(v), safe, encoding, errors)

933 l.append(k + '=' + v)

934 else:

935 for k, v in query:

936 if isinstance(k, bytes):

937 k = quote_via(k, safe)

938 else:

939 k = quote_via(str(k), safe, encoding, errors)

940

941 if isinstance(v, bytes):

942 v = quote_via(v, safe)

943 l.append(k + '=' + v)

944 elif isinstance(v, str):

945 v = quote_via(v, safe, encoding, errors)

946 l.append(k + '=' + v)

947 else:

948 try:

949 # Is this a sufficient test for sequence-ness?

950 x = len(v)

951 except TypeError:

952 # not a sequence

953 v = quote_via(str(v), safe, encoding, errors)

954 l.append(k + '=' + v)

955 else:

956 # loop over the sequence

957 for elt in v:

958 if isinstance(elt, bytes):

959 elt = quote_via(elt, safe)

960 else:

961 elt = quote_via(str(elt), safe, encoding, errors)

962 l.append(k + '=' + elt)

963 return '&'.join(l)

964

965def to_bytes(url):

966 """to_bytes(u"URL") --> 'URL'."""

967 # Most URL schemes require ASCII. If that changes, the conversion

968 # can be relaxed.

969 # XXX get rid of to_bytes()

970 if isinstance(url, str):

971 try:

972 url = url.encode("ASCII").decode()

973 except UnicodeError:

974 raise UnicodeError("URL " + repr(url) +

975 " contains non-ASCII characters")

976 return url

977

978def unwrap(url):

979 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""

980 url = str(url).strip()

981 if url[:1] == '<' and url[-1:] == '>':

982 url = url[1:-1].strip()

983 if url[:4] == 'URL:': url = url[4:].strip()

984 return url

985

986_typeprog = None

987def splittype(url):

988 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""

989 global _typeprog

990 if _typeprog is None:

991 _typeprog = re.compile('([^/:]+):(.*)', re.DOTALL)

992

993 match = _typeprog.match(url)

994 if match:

995 scheme, data = match.groups()

996 return scheme.lower(), data

997 return None, url

998

999_hostprog = None

1000def splithost(url):

1001 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""

1002 global _hostprog

1003 if _hostprog is None:

1004 _hostprog = re.compile('//([^/#?]*)(.*)', re.DOTALL)

1005

1006 match = _hostprog.match(url)

1007 if match:

1008 host_port, path = match.groups()

1009 if path and path[0] != '/':

1010 path = '/' + path

1011 return host_port, path

1012 return None, url

1013

1014def splituser(host):

1015 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""

1016 user, delim, host = host.rpartition('@')

1017 return (user if delim else None), host

1018

1019def splitpasswd(user):

1020 """splitpasswd('user:passwd') -> 'user', 'passwd'."""

1021 user, delim, passwd = user.partition(':')

1022 return user, (passwd if delim else None)

1023

1024# splittag('/path#tag') --> '/path', 'tag'

1025_portprog = None

1026def splitport(host):

1027 """splitport('host:port') --> 'host', 'port'."""

1028 global _portprog

1029 if _portprog is None:

1030 _portprog = re.compile('(.*):([0-9]*)$', re.DOTALL)

1031

1032 match = _portprog.match(host)

1033 if match:

1034 host, port = match.groups()

1035 if port:

1036 return host, port

1037 return host, None

1038

1039def splitnport(host, defport=-1):

1040 """Split host and port, returning numeric port.

1041 Return given default port if no ':' found; defaults to -1.

1042 Return numerical port if a valid number are found after ':'.

1043 Return None if ':' but not a valid number."""

1044 host, delim, port = host.rpartition(':')

1045 if not delim:

1046 host = port

1047 elif port:

1048 try:

1049 nport = int(port)

1050 except ValueError:

1051 nport = None

1052 return host, nport

1053 return host, defport

1054

1055def splitquery(url):

1056 """splitquery('/path?query') --> '/path', 'query'."""

1057 path, delim, query = url.rpartition('?')

1058 if delim:

1059 return path, query

1060 return url, None

1061

1062def splittag(url):

1063 """splittag('/path#tag') --> '/path', 'tag'."""

1064 path, delim, tag = url.rpartition('#')

1065 if delim:

1066 return path, tag

1067 return url, None

1068

1069def splitattr(url):

1070 """splitattr('/path;attr1=value1;attr2=value2;...') ->

1071 '/path', ['attr1=value1', 'attr2=value2', ...]."""

1072 words = url.split(';')

1073 return words[0], words[1:]

1074

1075def splitvalue(attr):

1076 """splitvalue('attr=value') --> 'attr', 'value'."""

1077 attr, delim, value = attr.partition('=')

1078 return attr, (value if delim else None)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/bleach/_vendor/parse.py: 25%

566 statements