Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/future/backports/urllib/request.py: 17%

1"""

2Ported using Python-Future from the Python 3.3 standard library.

4An extensible library for opening URLs using a variety of protocols

6The simplest way to use this module is to call the urlopen function,

7which accepts a string containing a URL or a Request object (described

8below). It opens the URL and returns the results as file-like

9object; the returned object has some extra methods described below.

11The OpenerDirector manages a collection of Handler objects that do

12all the actual work. Each Handler implements a particular protocol or

13option. The OpenerDirector is a composite object that invokes the

14Handlers needed to open the requested URL. For example, the

15HTTPHandler performs HTTP GET and POST requests and deals with

16non-error returns. The HTTPRedirectHandler automatically deals with

17HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler

18deals with digest authentication.

20urlopen(url, data=None) -- Basic usage is the same as original

21urllib. pass the url and optionally data to post to an HTTP URL, and

22get a file-like object back. One difference is that you can also pass

23a Request instance instead of URL. Raises a URLError (subclass of

24IOError); for HTTP errors, raises an HTTPError, which can also be

25treated as a valid response.

27build_opener -- Function that creates a new OpenerDirector instance.

28Will install the default handlers. Accepts one or more Handlers as

29arguments, either instances or Handler classes that it will

30instantiate. If one of the argument is a subclass of the default

31handler, the argument will be installed instead of the default.

33install_opener -- Installs a new opener as the default opener.

35objects of interest:

37OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages

38the Handler classes, while dealing with requests and responses.

40Request -- An object that encapsulates the state of a request. The

41state can be as simple as the URL. It can also include extra HTTP

42headers, e.g. a User-Agent.

44BaseHandler --

46internals:

47BaseHandler and parent

48_call_chain conventions

50Example usage:

52import urllib.request

54# set up authentication info

55authinfo = urllib.request.HTTPBasicAuthHandler()

56authinfo.add_password(realm='PDQ Application',

57 uri='https://mahler:8092/site-updates.py',

58 user='klem',

59 passwd='geheim$parole')

61proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})

63# build a new opener that adds authentication and caching FTP handlers

64opener = urllib.request.build_opener(proxy_support, authinfo,

65 urllib.request.CacheFTPHandler)

67# install it

68urllib.request.install_opener(opener)

70f = urllib.request.urlopen('http://www.python.org/')

71"""

73# XXX issues:

74# If an authentication error handler that tries to perform

75# authentication for some reason but fails, how should the error be

76# signalled? The client needs to know the HTTP error code. But if

77# the handler knows that the problem was, e.g., that it didn't know

78# that hash algo that requested in the challenge, it would be good to

79# pass that information along to the client, too.

80# ftp errors aren't handled cleanly

81# check digest against correct (i.e. non-apache) implementation

83# Possible extensions:

84# complex proxies XXX not sure what exactly was meant by this

85# abstract factory for opener

87from __future__ import absolute_import, division, print_function, unicode_literals

88from future.builtins import bytes, dict, filter, input, int, map, open, str

89from future.utils import PY2, PY3, raise_with_traceback

91import base64

92import bisect

93import hashlib

94import array

96from future.backports import email

97from future.backports.http import client as http_client

98from .error import URLError, HTTPError, ContentTooShortError

99from .parse import (

100 urlparse, urlsplit, urljoin, unwrap, quote, unquote,

101 splittype, splithost, splitport, splituser, splitpasswd,

102 splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)

103from .response import addinfourl, addclosehook

104

105import io

106import os

107import posixpath

108import re

109import socket

110import sys

111import time

112import tempfile

113import contextlib

114import warnings

115

116from future.utils import PY2

117

118if PY2:

119 from collections import Iterable

120else:

121 from collections.abc import Iterable

122

123# check for SSL

124try:

125 import ssl

126 # Not available in the SSL module in Py2:

127 from ssl import SSLContext

128except ImportError:

129 _have_ssl = False

130else:

131 _have_ssl = True

132

133__all__ = [

134 # Classes

135 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',

136 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',

137 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',

138 'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',

139 'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',

140 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',

141 'UnknownHandler', 'HTTPErrorProcessor',

142 # Functions

143 'urlopen', 'install_opener', 'build_opener',

144 'pathname2url', 'url2pathname', 'getproxies',

145 # Legacy interface

146 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',

147]

148

149# used in User-Agent header sent

150__version__ = sys.version[:3]

151

152_opener = None

153def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **_3to2kwargs):

154 if 'cadefault' in _3to2kwargs: cadefault = _3to2kwargs['cadefault']; del _3to2kwargs['cadefault']

155 else: cadefault = False

156 if 'capath' in _3to2kwargs: capath = _3to2kwargs['capath']; del _3to2kwargs['capath']

157 else: capath = None

158 if 'cafile' in _3to2kwargs: cafile = _3to2kwargs['cafile']; del _3to2kwargs['cafile']

159 else: cafile = None

160 global _opener

161 if cafile or capath or cadefault:

162 if not _have_ssl:

163 raise ValueError('SSL support not available')

164 context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)

165 context.options |= ssl.OP_NO_SSLv2

166 context.verify_mode = ssl.CERT_REQUIRED

167 if cafile or capath:

168 context.load_verify_locations(cafile, capath)

169 else:

170 context.set_default_verify_paths()

171 https_handler = HTTPSHandler(context=context, check_hostname=True)

172 opener = build_opener(https_handler)

173 elif _opener is None:

174 _opener = opener = build_opener()

175 else:

176 opener = _opener

177 return opener.open(url, data, timeout)

178

179def install_opener(opener):

180 global _opener

181 _opener = opener

182

183_url_tempfiles = []

184def urlretrieve(url, filename=None, reporthook=None, data=None):

185 """

186 Retrieve a URL into a temporary location on disk.

187

188 Requires a URL argument. If a filename is passed, it is used as

189 the temporary file location. The reporthook argument should be

190 a callable that accepts a block number, a read size, and the

191 total file size of the URL target. The data argument should be

192 valid URL encoded data.

193

194 If a filename is passed and the URL points to a local resource,

195 the result is a copy from local file to new file.

196

197 Returns a tuple containing the path to the newly created

198 data file as well as the resulting HTTPMessage object.

199 """

200 url_type, path = splittype(url)

201

202 with contextlib.closing(urlopen(url, data)) as fp:

203 headers = fp.info()

204

205 # Just return the local path and the "headers" for file://

206 # URLs. No sense in performing a copy unless requested.

207 if url_type == "file" and not filename:

208 return os.path.normpath(path), headers

209

210 # Handle temporary file setup.

211 if filename:

212 tfp = open(filename, 'wb')

213 else:

214 tfp = tempfile.NamedTemporaryFile(delete=False)

215 filename = tfp.name

216 _url_tempfiles.append(filename)

217

218 with tfp:

219 result = filename, headers

220 bs = 1024*8

221 size = -1

222 read = 0

223 blocknum = 0

224 if "content-length" in headers:

225 size = int(headers["Content-Length"])

226

227 if reporthook:

228 reporthook(blocknum, bs, size)

229

230 while True:

231 block = fp.read(bs)

232 if not block:

233 break

234 read += len(block)

235 tfp.write(block)

236 blocknum += 1

237 if reporthook:

238 reporthook(blocknum, bs, size)

239

240 if size >= 0 and read < size:

241 raise ContentTooShortError(

242 "retrieval incomplete: got only %i out of %i bytes"

243 % (read, size), result)

244

245 return result

246

247def urlcleanup():

248 for temp_file in _url_tempfiles:

249 try:

250 os.unlink(temp_file)

251 except EnvironmentError:

252 pass

253

254 del _url_tempfiles[:]

255 global _opener

256 if _opener:

257 _opener = None

258

259if PY3:

260 _cut_port_re = re.compile(r":\d+$", re.ASCII)

261else:

262 _cut_port_re = re.compile(r":\d+$")

263

264def request_host(request):

265

266 """Return request-host, as defined by RFC 2965.

267

268 Variation from RFC: returned value is lowercased, for convenient

269 comparison.

270

271 """

272 url = request.full_url

273 host = urlparse(url)[1]

274 if host == "":

275 host = request.get_header("Host", "")

276

277 # remove port, if present

278 host = _cut_port_re.sub("", host, 1)

279 return host.lower()

280

281class Request(object):

282

283 def __init__(self, url, data=None, headers={},

284 origin_req_host=None, unverifiable=False,

285 method=None):

286 # unwrap('<URL:type://host/path>') --> 'type://host/path'

287 self.full_url = unwrap(url)

288 self.full_url, self.fragment = splittag(self.full_url)

289 self.data = data

290 self.headers = {}

291 self._tunnel_host = None

292 for key, value in headers.items():

293 self.add_header(key, value)

294 self.unredirected_hdrs = {}

295 if origin_req_host is None:

296 origin_req_host = request_host(self)

297 self.origin_req_host = origin_req_host

298 self.unverifiable = unverifiable

299 self.method = method

300 self._parse()

301

302 def _parse(self):

303 self.type, rest = splittype(self.full_url)

304 if self.type is None:

305 raise ValueError("unknown url type: %r" % self.full_url)

306 self.host, self.selector = splithost(rest)

307 if self.host:

308 self.host = unquote(self.host)

309

310 def get_method(self):

311 """Return a string indicating the HTTP request method."""

312 if self.method is not None:

313 return self.method

314 elif self.data is not None:

315 return "POST"

316 else:

317 return "GET"

318

319 def get_full_url(self):

320 if self.fragment:

321 return '%s#%s' % (self.full_url, self.fragment)

322 else:

323 return self.full_url

324

325 # Begin deprecated methods

326

327 def add_data(self, data):

328 msg = "Request.add_data method is deprecated."

329 warnings.warn(msg, DeprecationWarning, stacklevel=1)

330 self.data = data

331

332 def has_data(self):

333 msg = "Request.has_data method is deprecated."

334 warnings.warn(msg, DeprecationWarning, stacklevel=1)

335 return self.data is not None

336

337 def get_data(self):

338 msg = "Request.get_data method is deprecated."

339 warnings.warn(msg, DeprecationWarning, stacklevel=1)

340 return self.data

341

342 def get_type(self):

343 msg = "Request.get_type method is deprecated."

344 warnings.warn(msg, DeprecationWarning, stacklevel=1)

345 return self.type

346

347 def get_host(self):

348 msg = "Request.get_host method is deprecated."

349 warnings.warn(msg, DeprecationWarning, stacklevel=1)

350 return self.host

351

352 def get_selector(self):

353 msg = "Request.get_selector method is deprecated."

354 warnings.warn(msg, DeprecationWarning, stacklevel=1)

355 return self.selector

356

357 def is_unverifiable(self):

358 msg = "Request.is_unverifiable method is deprecated."

359 warnings.warn(msg, DeprecationWarning, stacklevel=1)

360 return self.unverifiable

361

362 def get_origin_req_host(self):

363 msg = "Request.get_origin_req_host method is deprecated."

364 warnings.warn(msg, DeprecationWarning, stacklevel=1)

365 return self.origin_req_host

366

367 # End deprecated methods

368

369 def set_proxy(self, host, type):

370 if self.type == 'https' and not self._tunnel_host:

371 self._tunnel_host = self.host

372 else:

373 self.type= type

374 self.selector = self.full_url

375 self.host = host

376

377 def has_proxy(self):

378 return self.selector == self.full_url

379

380 def add_header(self, key, val):

381 # useful for something like authentication

382 self.headers[key.capitalize()] = val

383

384 def add_unredirected_header(self, key, val):

385 # will not be added to a redirected request

386 self.unredirected_hdrs[key.capitalize()] = val

387

388 def has_header(self, header_name):

389 return (header_name in self.headers or

390 header_name in self.unredirected_hdrs)

391

392 def get_header(self, header_name, default=None):

393 return self.headers.get(

394 header_name,

395 self.unredirected_hdrs.get(header_name, default))

396

397 def header_items(self):

398 hdrs = self.unredirected_hdrs.copy()

399 hdrs.update(self.headers)

400 return list(hdrs.items())

401

402class OpenerDirector(object):

403 def __init__(self):

404 client_version = "Python-urllib/%s" % __version__

405 self.addheaders = [('User-agent', client_version)]

406 # self.handlers is retained only for backward compatibility

407 self.handlers = []

408 # manage the individual handlers

409 self.handle_open = {}

410 self.handle_error = {}

411 self.process_response = {}

412 self.process_request = {}

413

414 def add_handler(self, handler):

415 if not hasattr(handler, "add_parent"):

416 raise TypeError("expected BaseHandler instance, got %r" %

417 type(handler))

418

419 added = False

420 for meth in dir(handler):

421 if meth in ["redirect_request", "do_open", "proxy_open"]:

422 # oops, coincidental match

423 continue

424

425 i = meth.find("_")

426 protocol = meth[:i]

427 condition = meth[i+1:]

428

429 if condition.startswith("error"):

430 j = condition.find("_") + i + 1

431 kind = meth[j+1:]

432 try:

433 kind = int(kind)

434 except ValueError:

435 pass

436 lookup = self.handle_error.get(protocol, {})

437 self.handle_error[protocol] = lookup

438 elif condition == "open":

439 kind = protocol

440 lookup = self.handle_open

441 elif condition == "response":

442 kind = protocol

443 lookup = self.process_response

444 elif condition == "request":

445 kind = protocol

446 lookup = self.process_request

447 else:

448 continue

449

450 handlers = lookup.setdefault(kind, [])

451 if handlers:

452 bisect.insort(handlers, handler)

453 else:

454 handlers.append(handler)

455 added = True

456

457 if added:

458 bisect.insort(self.handlers, handler)

459 handler.add_parent(self)

460

461 def close(self):

462 # Only exists for backwards compatibility.

463 pass

464

465 def _call_chain(self, chain, kind, meth_name, *args):

466 # Handlers raise an exception if no one else should try to handle

467 # the request, or return None if they can't but another handler

468 # could. Otherwise, they return the response.

469 handlers = chain.get(kind, ())

470 for handler in handlers:

471 func = getattr(handler, meth_name)

472 result = func(*args)

473 if result is not None:

474 return result

475

476 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):

477 """

478 Accept a URL or a Request object

479

480 Python-Future: if the URL is passed as a byte-string, decode it first.

481 """

482 if isinstance(fullurl, bytes):

483 fullurl = fullurl.decode()

484 if isinstance(fullurl, str):

485 req = Request(fullurl, data)

486 else:

487 req = fullurl

488 if data is not None:

489 req.data = data

490

491 req.timeout = timeout

492 protocol = req.type

493

494 # pre-process request

495 meth_name = protocol+"_request"

496 for processor in self.process_request.get(protocol, []):

497 meth = getattr(processor, meth_name)

498 req = meth(req)

499

500 response = self._open(req, data)

501

502 # post-process response

503 meth_name = protocol+"_response"

504 for processor in self.process_response.get(protocol, []):

505 meth = getattr(processor, meth_name)

506 response = meth(req, response)

507

508 return response

509

510 def _open(self, req, data=None):

511 result = self._call_chain(self.handle_open, 'default',

512 'default_open', req)

513 if result:

514 return result

515

516 protocol = req.type

517 result = self._call_chain(self.handle_open, protocol, protocol +

518 '_open', req)

519 if result:

520 return result

521

522 return self._call_chain(self.handle_open, 'unknown',

523 'unknown_open', req)

524

525 def error(self, proto, *args):

526 if proto in ('http', 'https'):

527 # XXX http[s] protocols are special-cased

528 dict = self.handle_error['http'] # https is not different than http

529 proto = args[2] # YUCK!

530 meth_name = 'http_error_%s' % proto

531 http_err = 1

532 orig_args = args

533 else:

534 dict = self.handle_error

535 meth_name = proto + '_error'

536 http_err = 0

537 args = (dict, proto, meth_name) + args

538 result = self._call_chain(*args)

539 if result:

540 return result

541

542 if http_err:

543 args = (dict, 'default', 'http_error_default') + orig_args

544 return self._call_chain(*args)

545

546# XXX probably also want an abstract factory that knows when it makes

547# sense to skip a superclass in favor of a subclass and when it might

548# make sense to include both

549

550def build_opener(*handlers):

551 """Create an opener object from a list of handlers.

552

553 The opener will use several default handlers, including support

554 for HTTP, FTP and when applicable HTTPS.

555

556 If any of the handlers passed as arguments are subclasses of the

557 default handlers, the default handlers will not be used.

558 """

559 def isclass(obj):

560 return isinstance(obj, type) or hasattr(obj, "__bases__")

561

562 opener = OpenerDirector()

563 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,

564 HTTPDefaultErrorHandler, HTTPRedirectHandler,

565 FTPHandler, FileHandler, HTTPErrorProcessor]

566 if hasattr(http_client, "HTTPSConnection"):

567 default_classes.append(HTTPSHandler)

568 skip = set()

569 for klass in default_classes:

570 for check in handlers:

571 if isclass(check):

572 if issubclass(check, klass):

573 skip.add(klass)

574 elif isinstance(check, klass):

575 skip.add(klass)

576 for klass in skip:

577 default_classes.remove(klass)

578

579 for klass in default_classes:

580 opener.add_handler(klass())

581

582 for h in handlers:

583 if isclass(h):

584 h = h()

585 opener.add_handler(h)

586 return opener

587

588class BaseHandler(object):

589 handler_order = 500

590

591 def add_parent(self, parent):

592 self.parent = parent

593

594 def close(self):

595 # Only exists for backwards compatibility

596 pass

597

598 def __lt__(self, other):

599 if not hasattr(other, "handler_order"):

600 # Try to preserve the old behavior of having custom classes

601 # inserted after default ones (works only for custom user

602 # classes which are not aware of handler_order).

603 return True

604 return self.handler_order < other.handler_order

605

606

607class HTTPErrorProcessor(BaseHandler):

608 """Process HTTP error responses."""

609 handler_order = 1000 # after all other processing

610

611 def http_response(self, request, response):

612 code, msg, hdrs = response.code, response.msg, response.info()

613

614 # According to RFC 2616, "2xx" code indicates that the client's

615 # request was successfully received, understood, and accepted.

616 if not (200 <= code < 300):

617 response = self.parent.error(

618 'http', request, response, code, msg, hdrs)

619

620 return response

621

622 https_response = http_response

623

624class HTTPDefaultErrorHandler(BaseHandler):

625 def http_error_default(self, req, fp, code, msg, hdrs):

626 raise HTTPError(req.full_url, code, msg, hdrs, fp)

627

628class HTTPRedirectHandler(BaseHandler):

629 # maximum number of redirections to any single URL

630 # this is needed because of the state that cookies introduce

631 max_repeats = 4

632 # maximum total number of redirections (regardless of URL) before

633 # assuming we're in a loop

634 max_redirections = 10

635

636 def redirect_request(self, req, fp, code, msg, headers, newurl):

637 """Return a Request or None in response to a redirect.

638

639 This is called by the http_error_30x methods when a

640 redirection response is received. If a redirection should

641 take place, return a new Request to allow http_error_30x to

642 perform the redirect. Otherwise, raise HTTPError if no-one

643 else should try to handle this url. Return None if you can't

644 but another Handler might.

645 """

646 m = req.get_method()

647 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")

648 or code in (301, 302, 303) and m == "POST")):

649 raise HTTPError(req.full_url, code, msg, headers, fp)

650

651 # Strictly (according to RFC 2616), 301 or 302 in response to

652 # a POST MUST NOT cause a redirection without confirmation

653 # from the user (of urllib.request, in this case). In practice,

654 # essentially all clients do redirect in this case, so we do

655 # the same.

656 # be conciliant with URIs containing a space

657 newurl = newurl.replace(' ', '%20')

658 CONTENT_HEADERS = ("content-length", "content-type")

659 newheaders = dict((k, v) for k, v in req.headers.items()

660 if k.lower() not in CONTENT_HEADERS)

661 return Request(newurl,

662 headers=newheaders,

663 origin_req_host=req.origin_req_host,

664 unverifiable=True)

665

666 # Implementation note: To avoid the server sending us into an

667 # infinite loop, the request object needs to track what URLs we

668 # have already seen. Do this by adding a handler-specific

669 # attribute to the Request object.

670 def http_error_302(self, req, fp, code, msg, headers):

671 # Some servers (incorrectly) return multiple Location headers

672 # (so probably same goes for URI). Use first header.

673 if "location" in headers:

674 newurl = headers["location"]

675 elif "uri" in headers:

676 newurl = headers["uri"]

677 else:

678 return

679

680 # fix a possible malformed URL

681 urlparts = urlparse(newurl)

682

683 # For security reasons we don't allow redirection to anything other

684 # than http, https or ftp.

685

686 if urlparts.scheme not in ('http', 'https', 'ftp', ''):

687 raise HTTPError(

688 newurl, code,

689 "%s - Redirection to url '%s' is not allowed" % (msg, newurl),

690 headers, fp)

691

692 if not urlparts.path:

693 urlparts = list(urlparts)

694 urlparts[2] = "/"

695 newurl = urlunparse(urlparts)

696

697 newurl = urljoin(req.full_url, newurl)

698

699 # XXX Probably want to forget about the state of the current

700 # request, although that might interact poorly with other

701 # handlers that also use handler-specific request attributes

702 new = self.redirect_request(req, fp, code, msg, headers, newurl)

703 if new is None:

704 return

705

706 # loop detection

707 # .redirect_dict has a key url if url was previously visited.

708 if hasattr(req, 'redirect_dict'):

709 visited = new.redirect_dict = req.redirect_dict

710 if (visited.get(newurl, 0) >= self.max_repeats or

711 len(visited) >= self.max_redirections):

712 raise HTTPError(req.full_url, code,

713 self.inf_msg + msg, headers, fp)

714 else:

715 visited = new.redirect_dict = req.redirect_dict = {}

716 visited[newurl] = visited.get(newurl, 0) + 1

717

718 # Don't close the fp until we are sure that we won't use it

719 # with HTTPError.

720 fp.read()

721 fp.close()

722

723 return self.parent.open(new, timeout=req.timeout)

724

725 http_error_301 = http_error_303 = http_error_307 = http_error_302

726

727 inf_msg = "The HTTP server returned a redirect error that would " \

728 "lead to an infinite loop.\n" \

729 "The last 30x error message was:\n"

730

731

732def _parse_proxy(proxy):

733 """Return (scheme, user, password, host/port) given a URL or an authority.

734

735 If a URL is supplied, it must have an authority (host:port) component.

736 According to RFC 3986, having an authority component means the URL must

737 have two slashes after the scheme:

738

739 >>> _parse_proxy('file:/ftp.example.com/')

740 Traceback (most recent call last):

741 ValueError: proxy URL with no authority: 'file:/ftp.example.com/'

742

743 The first three items of the returned tuple may be None.

744

745 Examples of authority parsing:

746

747 >>> _parse_proxy('proxy.example.com')

748 (None, None, None, 'proxy.example.com')

749 >>> _parse_proxy('proxy.example.com:3128')

750 (None, None, None, 'proxy.example.com:3128')

751

752 The authority component may optionally include userinfo (assumed to be

753 username:password):

754

755 >>> _parse_proxy('joe:password@proxy.example.com')

756 (None, 'joe', 'password', 'proxy.example.com')

757 >>> _parse_proxy('joe:password@proxy.example.com:3128')

758 (None, 'joe', 'password', 'proxy.example.com:3128')

759

760 Same examples, but with URLs instead:

761

762 >>> _parse_proxy('http://proxy.example.com/')

763 ('http', None, None, 'proxy.example.com')

764 >>> _parse_proxy('http://proxy.example.com:3128/')

765 ('http', None, None, 'proxy.example.com:3128')

766 >>> _parse_proxy('http://joe:password@proxy.example.com/')

767 ('http', 'joe', 'password', 'proxy.example.com')

768 >>> _parse_proxy('http://joe:password@proxy.example.com:3128')

769 ('http', 'joe', 'password', 'proxy.example.com:3128')

770

771 Everything after the authority is ignored:

772

773 >>> _parse_proxy('ftp://joe:password@proxy.example.com/rubbish:3128')

774 ('ftp', 'joe', 'password', 'proxy.example.com')

775

776 Test for no trailing '/' case:

777

778 >>> _parse_proxy('http://joe:password@proxy.example.com')

779 ('http', 'joe', 'password', 'proxy.example.com')

780

781 """

782 scheme, r_scheme = splittype(proxy)

783 if not r_scheme.startswith("/"):

784 # authority

785 scheme = None

786 authority = proxy

787 else:

788 # URL

789 if not r_scheme.startswith("//"):

790 raise ValueError("proxy URL with no authority: %r" % proxy)

791 # We have an authority, so for RFC 3986-compliant URLs (by ss 3.

792 # and 3.3.), path is empty or starts with '/'

793 end = r_scheme.find("/", 2)

794 if end == -1:

795 end = None

796 authority = r_scheme[2:end]

797 userinfo, hostport = splituser(authority)

798 if userinfo is not None:

799 user, password = splitpasswd(userinfo)

800 else:

801 user = password = None

802 return scheme, user, password, hostport

803

804class ProxyHandler(BaseHandler):

805 # Proxies must be in front

806 handler_order = 100

807

808 def __init__(self, proxies=None):

809 if proxies is None:

810 proxies = getproxies()

811 assert hasattr(proxies, 'keys'), "proxies must be a mapping"

812 self.proxies = proxies

813 for type, url in proxies.items():

814 setattr(self, '%s_open' % type,

815 lambda r, proxy=url, type=type, meth=self.proxy_open:

816 meth(r, proxy, type))

817

818 def proxy_open(self, req, proxy, type):

819 orig_type = req.type

820 proxy_type, user, password, hostport = _parse_proxy(proxy)

821 if proxy_type is None:

822 proxy_type = orig_type

823

824 if req.host and proxy_bypass(req.host):

825 return None

826

827 if user and password:

828 user_pass = '%s:%s' % (unquote(user),

829 unquote(password))

830 creds = base64.b64encode(user_pass.encode()).decode("ascii")

831 req.add_header('Proxy-authorization', 'Basic ' + creds)

832 hostport = unquote(hostport)

833 req.set_proxy(hostport, proxy_type)

834 if orig_type == proxy_type or orig_type == 'https':

835 # let other handlers take care of it

836 return None

837 else:

838 # need to start over, because the other handlers don't

839 # grok the proxy's URL type

840 # e.g. if we have a constructor arg proxies like so:

841 # {'http': 'ftp://proxy.example.com'}, we may end up turning

842 # a request for http://acme.example.com/a into one for

843 # ftp://proxy.example.com/a

844 return self.parent.open(req, timeout=req.timeout)

845

846class HTTPPasswordMgr(object):

847

848 def __init__(self):

849 self.passwd = {}

850

851 def add_password(self, realm, uri, user, passwd):

852 # uri could be a single URI or a sequence

853 if isinstance(uri, str):

854 uri = [uri]

855 if realm not in self.passwd:

856 self.passwd[realm] = {}

857 for default_port in True, False:

858 reduced_uri = tuple(

859 [self.reduce_uri(u, default_port) for u in uri])

860 self.passwd[realm][reduced_uri] = (user, passwd)

861

862 def find_user_password(self, realm, authuri):

863 domains = self.passwd.get(realm, {})

864 for default_port in True, False:

865 reduced_authuri = self.reduce_uri(authuri, default_port)

866 for uris, authinfo in domains.items():

867 for uri in uris:

868 if self.is_suburi(uri, reduced_authuri):

869 return authinfo

870 return None, None

871

872 def reduce_uri(self, uri, default_port=True):

873 """Accept authority or URI and extract only the authority and path."""

874 # note HTTP URLs do not have a userinfo component

875 parts = urlsplit(uri)

876 if parts[1]:

877 # URI

878 scheme = parts[0]

879 authority = parts[1]

880 path = parts[2] or '/'

881 else:

882 # host or host:port

883 scheme = None

884 authority = uri

885 path = '/'

886 host, port = splitport(authority)

887 if default_port and port is None and scheme is not None:

888 dport = {"http": 80,

889 "https": 443,

890 }.get(scheme)

891 if dport is not None:

892 authority = "%s:%d" % (host, dport)

893 return authority, path

894

895 def is_suburi(self, base, test):

896 """Check if test is below base in a URI tree

897

898 Both args must be URIs in reduced form.

899 """

900 if base == test:

901 return True

902 if base[0] != test[0]:

903 return False

904 common = posixpath.commonprefix((base[1], test[1]))

905 if len(common) == len(base[1]):

906 return True

907 return False

908

909

910class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):

911

912 def find_user_password(self, realm, authuri):

913 user, password = HTTPPasswordMgr.find_user_password(self, realm,

914 authuri)

915 if user is not None:

916 return user, password

917 return HTTPPasswordMgr.find_user_password(self, None, authuri)

918

919

920class AbstractBasicAuthHandler(object):

921

922 # XXX this allows for multiple auth-schemes, but will stupidly pick

923 # the last one with a realm specified.

924

925 # allow for double- and single-quoted realm values

926 # (single quotes are a violation of the RFC, but appear in the wild)

927 rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'

928 'realm=(["\']?)([^"\']*)\\2', re.I)

929

930 # XXX could pre-emptively send auth info already accepted (RFC 2617,

931 # end of section 2, and section 1.2 immediately after "credentials"

932 # production).

933

934 def __init__(self, password_mgr=None):

935 if password_mgr is None:

936 password_mgr = HTTPPasswordMgr()

937 self.passwd = password_mgr

938 self.add_password = self.passwd.add_password

939 self.retried = 0

940

941 def reset_retry_count(self):

942 self.retried = 0

943

944 def http_error_auth_reqed(self, authreq, host, req, headers):

945 # host may be an authority (without userinfo) or a URL with an

946 # authority

947 # XXX could be multiple headers

948 authreq = headers.get(authreq, None)

949

950 if self.retried > 5:

951 # retry sending the username:password 5 times before failing.

952 raise HTTPError(req.get_full_url(), 401, "basic auth failed",

953 headers, None)

954 else:

955 self.retried += 1

956

957 if authreq:

958 scheme = authreq.split()[0]

959 if scheme.lower() != 'basic':

960 raise ValueError("AbstractBasicAuthHandler does not"

961 " support the following scheme: '%s'" %

962 scheme)

963 else:

964 mo = AbstractBasicAuthHandler.rx.search(authreq)

965 if mo:

966 scheme, quote, realm = mo.groups()

967 if quote not in ['"',"'"]:

968 warnings.warn("Basic Auth Realm was unquoted",

969 UserWarning, 2)

970 if scheme.lower() == 'basic':

971 response = self.retry_http_basic_auth(host, req, realm)

972 if response and response.code != 401:

973 self.retried = 0

974 return response

975

976 def retry_http_basic_auth(self, host, req, realm):

977 user, pw = self.passwd.find_user_password(realm, host)

978 if pw is not None:

979 raw = "%s:%s" % (user, pw)

980 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")

981 if req.headers.get(self.auth_header, None) == auth:

982 return None

983 req.add_unredirected_header(self.auth_header, auth)

984 return self.parent.open(req, timeout=req.timeout)

985 else:

986 return None

987

988

989class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):

990

991 auth_header = 'Authorization'

992

993 def http_error_401(self, req, fp, code, msg, headers):

994 url = req.full_url

995 response = self.http_error_auth_reqed('www-authenticate',

996 url, req, headers)

997 self.reset_retry_count()

998 return response

999

1000

1001class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):

1002

1003 auth_header = 'Proxy-authorization'

1004

1005 def http_error_407(self, req, fp, code, msg, headers):

1006 # http_error_auth_reqed requires that there is no userinfo component in

1007 # authority. Assume there isn't one, since urllib.request does not (and

1008 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing

1009 # userinfo.

1010 authority = req.host

1011 response = self.http_error_auth_reqed('proxy-authenticate',

1012 authority, req, headers)

1013 self.reset_retry_count()

1014 return response

1015

1016

1017# Return n random bytes.

1018_randombytes = os.urandom

1019

1020

1021class AbstractDigestAuthHandler(object):

1022 # Digest authentication is specified in RFC 2617.

1023

1024 # XXX The client does not inspect the Authentication-Info header

1025 # in a successful response.

1026

1027 # XXX It should be possible to test this implementation against

1028 # a mock server that just generates a static set of challenges.

1029

1030 # XXX qop="auth-int" supports is shaky

1031

1032 def __init__(self, passwd=None):

1033 if passwd is None:

1034 passwd = HTTPPasswordMgr()

1035 self.passwd = passwd

1036 self.add_password = self.passwd.add_password

1037 self.retried = 0

1038 self.nonce_count = 0

1039 self.last_nonce = None

1040

1041 def reset_retry_count(self):

1042 self.retried = 0

1043

1044 def http_error_auth_reqed(self, auth_header, host, req, headers):

1045 authreq = headers.get(auth_header, None)

1046 if self.retried > 5:

1047 # Don't fail endlessly - if we failed once, we'll probably

1048 # fail a second time. Hm. Unless the Password Manager is

1049 # prompting for the information. Crap. This isn't great

1050 # but it's better than the current 'repeat until recursion

1051 # depth exceeded' approach <wink>

1052 raise HTTPError(req.full_url, 401, "digest auth failed",

1053 headers, None)

1054 else:

1055 self.retried += 1

1056 if authreq:

1057 scheme = authreq.split()[0]

1058 if scheme.lower() == 'digest':

1059 return self.retry_http_digest_auth(req, authreq)

1060 elif scheme.lower() != 'basic':

1061 raise ValueError("AbstractDigestAuthHandler does not support"

1062 " the following scheme: '%s'" % scheme)

1063

1064 def retry_http_digest_auth(self, req, auth):

1065 token, challenge = auth.split(' ', 1)

1066 chal = parse_keqv_list(filter(None, parse_http_list(challenge)))

1067 auth = self.get_authorization(req, chal)

1068 if auth:

1069 auth_val = 'Digest %s' % auth

1070 if req.headers.get(self.auth_header, None) == auth_val:

1071 return None

1072 req.add_unredirected_header(self.auth_header, auth_val)

1073 resp = self.parent.open(req, timeout=req.timeout)

1074 return resp

1075

1076 def get_cnonce(self, nonce):

1077 # The cnonce-value is an opaque

1078 # quoted string value provided by the client and used by both client

1079 # and server to avoid chosen plaintext attacks, to provide mutual

1080 # authentication, and to provide some message integrity protection.

1081 # This isn't a fabulous effort, but it's probably Good Enough.

1082 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())

1083 b = s.encode("ascii") + _randombytes(8)

1084 dig = hashlib.sha1(b).hexdigest()

1085 return dig[:16]

1086

1087 def get_authorization(self, req, chal):

1088 try:

1089 realm = chal['realm']

1090 nonce = chal['nonce']

1091 qop = chal.get('qop')

1092 algorithm = chal.get('algorithm', 'MD5')

1093 # mod_digest doesn't send an opaque, even though it isn't

1094 # supposed to be optional

1095 opaque = chal.get('opaque', None)

1096 except KeyError:

1097 return None

1098

1099 H, KD = self.get_algorithm_impls(algorithm)

1100 if H is None:

1101 return None

1102

1103 user, pw = self.passwd.find_user_password(realm, req.full_url)

1104 if user is None:

1105 return None

1106

1107 # XXX not implemented yet

1108 if req.data is not None:

1109 entdig = self.get_entity_digest(req.data, chal)

1110 else:

1111 entdig = None

1112

1113 A1 = "%s:%s:%s" % (user, realm, pw)

1114 A2 = "%s:%s" % (req.get_method(),

1115 # XXX selector: what about proxies and full urls

1116 req.selector)

1117 if qop == 'auth':

1118 if nonce == self.last_nonce:

1119 self.nonce_count += 1

1120 else:

1121 self.nonce_count = 1

1122 self.last_nonce = nonce

1123 ncvalue = '%08x' % self.nonce_count

1124 cnonce = self.get_cnonce(nonce)

1125 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))

1126 respdig = KD(H(A1), noncebit)

1127 elif qop is None:

1128 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))

1129 else:

1130 # XXX handle auth-int.

1131 raise URLError("qop '%s' is not supported." % qop)

1132

1133 # XXX should the partial digests be encoded too?

1134

1135 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \

1136 'response="%s"' % (user, realm, nonce, req.selector,

1137 respdig)

1138 if opaque:

1139 base += ', opaque="%s"' % opaque

1140 if entdig:

1141 base += ', digest="%s"' % entdig

1142 base += ', algorithm="%s"' % algorithm

1143 if qop:

1144 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)

1145 return base

1146

1147 def get_algorithm_impls(self, algorithm):

1148 # lambdas assume digest modules are imported at the top level

1149 if algorithm == 'MD5':

1150 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()

1151 elif algorithm == 'SHA':

1152 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()

1153 # XXX MD5-sess

1154 KD = lambda s, d: H("%s:%s" % (s, d))

1155 return H, KD

1156

1157 def get_entity_digest(self, data, chal):

1158 # XXX not implemented yet

1159 return None

1160

1161

1162class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):

1163 """An authentication protocol defined by RFC 2069

1164

1165 Digest authentication improves on basic authentication because it

1166 does not transmit passwords in the clear.

1167 """

1168

1169 auth_header = 'Authorization'

1170 handler_order = 490 # before Basic auth

1171

1172 def http_error_401(self, req, fp, code, msg, headers):

1173 host = urlparse(req.full_url)[1]

1174 retry = self.http_error_auth_reqed('www-authenticate',

1175 host, req, headers)

1176 self.reset_retry_count()

1177 return retry

1178

1179

1180class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):

1181

1182 auth_header = 'Proxy-Authorization'

1183 handler_order = 490 # before Basic auth

1184

1185 def http_error_407(self, req, fp, code, msg, headers):

1186 host = req.host

1187 retry = self.http_error_auth_reqed('proxy-authenticate',

1188 host, req, headers)

1189 self.reset_retry_count()

1190 return retry

1191

1192class AbstractHTTPHandler(BaseHandler):

1193

1194 def __init__(self, debuglevel=0):

1195 self._debuglevel = debuglevel

1196

1197 def set_http_debuglevel(self, level):

1198 self._debuglevel = level

1199

1200 def do_request_(self, request):

1201 host = request.host

1202 if not host:

1203 raise URLError('no host given')

1204

1205 if request.data is not None: # POST

1206 data = request.data

1207 if isinstance(data, str):

1208 msg = "POST data should be bytes or an iterable of bytes. " \

1209 "It cannot be of type str."

1210 raise TypeError(msg)

1211 if not request.has_header('Content-type'):

1212 request.add_unredirected_header(

1213 'Content-type',

1214 'application/x-www-form-urlencoded')

1215 if not request.has_header('Content-length'):

1216 size = None

1217 try:

1218 ### For Python-Future:

1219 if PY2 and isinstance(data, array.array):

1220 # memoryviews of arrays aren't supported

1221 # in Py2.7. (e.g. memoryview(array.array('I',

1222 # [1, 2, 3, 4])) raises a TypeError.)

1223 # So we calculate the size manually instead:

1224 size = len(data) * data.itemsize

1225 ###

1226 else:

1227 mv = memoryview(data)

1228 size = len(mv) * mv.itemsize

1229 except TypeError:

1230 if isinstance(data, Iterable):

1231 raise ValueError("Content-Length should be specified "

1232 "for iterable data of type %r %r" % (type(data),

1233 data))

1234 else:

1235 request.add_unredirected_header(

1236 'Content-length', '%d' % size)

1237

1238 sel_host = host

1239 if request.has_proxy():

1240 scheme, sel = splittype(request.selector)

1241 sel_host, sel_path = splithost(sel)

1242 if not request.has_header('Host'):

1243 request.add_unredirected_header('Host', sel_host)

1244 for name, value in self.parent.addheaders:

1245 name = name.capitalize()

1246 if not request.has_header(name):

1247 request.add_unredirected_header(name, value)

1248

1249 return request

1250

1251 def do_open(self, http_class, req, **http_conn_args):

1252 """Return an HTTPResponse object for the request, using http_class.

1253

1254 http_class must implement the HTTPConnection API from http.client.

1255 """

1256 host = req.host

1257 if not host:

1258 raise URLError('no host given')

1259

1260 # will parse host:port

1261 h = http_class(host, timeout=req.timeout, **http_conn_args)

1262

1263 headers = dict(req.unredirected_hdrs)

1264 headers.update(dict((k, v) for k, v in req.headers.items()

1265 if k not in headers))

1266

1267 # TODO(jhylton): Should this be redesigned to handle

1268 # persistent connections?

1269

1270 # We want to make an HTTP/1.1 request, but the addinfourl

1271 # class isn't prepared to deal with a persistent connection.

1272 # It will try to read all remaining data from the socket,

1273 # which will block while the server waits for the next request.

1274 # So make sure the connection gets closed after the (only)

1275 # request.

1276 headers["Connection"] = "close"

1277 headers = dict((name.title(), val) for name, val in headers.items())

1278

1279 if req._tunnel_host:

1280 tunnel_headers = {}

1281 proxy_auth_hdr = "Proxy-Authorization"

1282 if proxy_auth_hdr in headers:

1283 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]

1284 # Proxy-Authorization should not be sent to origin

1285 # server.

1286 del headers[proxy_auth_hdr]

1287 h.set_tunnel(req._tunnel_host, headers=tunnel_headers)

1288

1289 try:

1290 h.request(req.get_method(), req.selector, req.data, headers)

1291 except socket.error as err: # timeout error

1292 h.close()

1293 raise URLError(err)

1294 else:

1295 r = h.getresponse()

1296 # If the server does not send us a 'Connection: close' header,

1297 # HTTPConnection assumes the socket should be left open. Manually

1298 # mark the socket to be closed when this response object goes away.

1299 if h.sock:

1300 h.sock.close()

1301 h.sock = None

1302

1303

1304 r.url = req.get_full_url()

1305 # This line replaces the .msg attribute of the HTTPResponse

1306 # with .headers, because urllib clients expect the response to

1307 # have the reason in .msg. It would be good to mark this

1308 # attribute is deprecated and get then to use info() or

1309 # .headers.

1310 r.msg = r.reason

1311 return r

1312

1313

1314class HTTPHandler(AbstractHTTPHandler):

1315

1316 def http_open(self, req):

1317 return self.do_open(http_client.HTTPConnection, req)

1318

1319 http_request = AbstractHTTPHandler.do_request_

1320

1321if hasattr(http_client, 'HTTPSConnection'):

1322

1323 class HTTPSHandler(AbstractHTTPHandler):

1324

1325 def __init__(self, debuglevel=0, context=None, check_hostname=None):

1326 AbstractHTTPHandler.__init__(self, debuglevel)

1327 self._context = context

1328 self._check_hostname = check_hostname

1329

1330 def https_open(self, req):

1331 return self.do_open(http_client.HTTPSConnection, req,

1332 context=self._context, check_hostname=self._check_hostname)

1333

1334 https_request = AbstractHTTPHandler.do_request_

1335

1336 __all__.append('HTTPSHandler')

1337

1338class HTTPCookieProcessor(BaseHandler):

1339 def __init__(self, cookiejar=None):

1340 import future.backports.http.cookiejar as http_cookiejar

1341 if cookiejar is None:

1342 cookiejar = http_cookiejar.CookieJar()

1343 self.cookiejar = cookiejar

1344

1345 def http_request(self, request):

1346 self.cookiejar.add_cookie_header(request)

1347 return request

1348

1349 def http_response(self, request, response):

1350 self.cookiejar.extract_cookies(response, request)

1351 return response

1352

1353 https_request = http_request

1354 https_response = http_response

1355

1356class UnknownHandler(BaseHandler):

1357 def unknown_open(self, req):

1358 type = req.type

1359 raise URLError('unknown url type: %s' % type)

1360

1361def parse_keqv_list(l):

1362 """Parse list of key=value strings where keys are not duplicated."""

1363 parsed = {}

1364 for elt in l:

1365 k, v = elt.split('=', 1)

1366 if v[0] == '"' and v[-1] == '"':

1367 v = v[1:-1]

1368 parsed[k] = v

1369 return parsed

1370

1371def parse_http_list(s):

1372 """Parse lists as described by RFC 2068 Section 2.

1373

1374 In particular, parse comma-separated lists where the elements of

1375 the list may include quoted-strings. A quoted-string could

1376 contain a comma. A non-quoted string could have quotes in the

1377 middle. Neither commas nor quotes count if they are escaped.

1378 Only double-quotes count, not single-quotes.

1379 """

1380 res = []

1381 part = ''

1382

1383 escape = quote = False

1384 for cur in s:

1385 if escape:

1386 part += cur

1387 escape = False

1388 continue

1389 if quote:

1390 if cur == '\\':

1391 escape = True

1392 continue

1393 elif cur == '"':

1394 quote = False

1395 part += cur

1396 continue

1397

1398 if cur == ',':

1399 res.append(part)

1400 part = ''

1401 continue

1402

1403 if cur == '"':

1404 quote = True

1405

1406 part += cur

1407

1408 # append last part

1409 if part:

1410 res.append(part)

1411

1412 return [part.strip() for part in res]

1413

1414class FileHandler(BaseHandler):

1415 # Use local file or FTP depending on form of URL

1416 def file_open(self, req):

1417 url = req.selector

1418 if url[:2] == '//' and url[2:3] != '/' and (req.host and

1419 req.host != 'localhost'):

1420 if not req.host is self.get_names():

1421 raise URLError("file:// scheme is supported only on localhost")

1422 else:

1423 return self.open_local_file(req)

1424

1425 # names for the localhost

1426 names = None

1427 def get_names(self):

1428 if FileHandler.names is None:

1429 try:

1430 FileHandler.names = tuple(

1431 socket.gethostbyname_ex('localhost')[2] +

1432 socket.gethostbyname_ex(socket.gethostname())[2])

1433 except socket.gaierror:

1434 FileHandler.names = (socket.gethostbyname('localhost'),)

1435 return FileHandler.names

1436

1437 # not entirely sure what the rules are here

1438 def open_local_file(self, req):

1439 import future.backports.email.utils as email_utils

1440 import mimetypes

1441 host = req.host

1442 filename = req.selector

1443 localfile = url2pathname(filename)

1444 try:

1445 stats = os.stat(localfile)

1446 size = stats.st_size

1447 modified = email_utils.formatdate(stats.st_mtime, usegmt=True)

1448 mtype = mimetypes.guess_type(filename)[0]

1449 headers = email.message_from_string(

1450 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %

1451 (mtype or 'text/plain', size, modified))

1452 if host:

1453 host, port = splitport(host)

1454 if not host or \

1455 (not port and _safe_gethostbyname(host) in self.get_names()):

1456 if host:

1457 origurl = 'file://' + host + filename

1458 else:

1459 origurl = 'file://' + filename

1460 return addinfourl(open(localfile, 'rb'), headers, origurl)

1461 except OSError as exp:

1462 # users shouldn't expect OSErrors coming from urlopen()

1463 raise URLError(exp)

1464 raise URLError('file not on local host')

1465

1466def _safe_gethostbyname(host):

1467 try:

1468 return socket.gethostbyname(host)

1469 except socket.gaierror:

1470 return None

1471

1472class FTPHandler(BaseHandler):

1473 def ftp_open(self, req):

1474 import ftplib

1475 import mimetypes

1476 host = req.host

1477 if not host:

1478 raise URLError('ftp error: no host given')

1479 host, port = splitport(host)

1480 if port is None:

1481 port = ftplib.FTP_PORT

1482 else:

1483 port = int(port)

1484

1485 # username/password handling

1486 user, host = splituser(host)

1487 if user:

1488 user, passwd = splitpasswd(user)

1489 else:

1490 passwd = None

1491 host = unquote(host)

1492 user = user or ''

1493 passwd = passwd or ''

1494

1495 try:

1496 host = socket.gethostbyname(host)

1497 except socket.error as msg:

1498 raise URLError(msg)

1499 path, attrs = splitattr(req.selector)

1500 dirs = path.split('/')

1501 dirs = list(map(unquote, dirs))

1502 dirs, file = dirs[:-1], dirs[-1]

1503 if dirs and not dirs[0]:

1504 dirs = dirs[1:]

1505 try:

1506 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)

1507 type = file and 'I' or 'D'

1508 for attr in attrs:

1509 attr, value = splitvalue(attr)

1510 if attr.lower() == 'type' and \

1511 value in ('a', 'A', 'i', 'I', 'd', 'D'):

1512 type = value.upper()

1513 fp, retrlen = fw.retrfile(file, type)

1514 headers = ""

1515 mtype = mimetypes.guess_type(req.full_url)[0]

1516 if mtype:

1517 headers += "Content-type: %s\n" % mtype

1518 if retrlen is not None and retrlen >= 0:

1519 headers += "Content-length: %d\n" % retrlen

1520 headers = email.message_from_string(headers)

1521 return addinfourl(fp, headers, req.full_url)

1522 except ftplib.all_errors as exp:

1523 exc = URLError('ftp error: %r' % exp)

1524 raise_with_traceback(exc)

1525

1526 def connect_ftp(self, user, passwd, host, port, dirs, timeout):

1527 return ftpwrapper(user, passwd, host, port, dirs, timeout,

1528 persistent=False)

1529

1530class CacheFTPHandler(FTPHandler):

1531 # XXX would be nice to have pluggable cache strategies

1532 # XXX this stuff is definitely not thread safe

1533 def __init__(self):

1534 self.cache = {}

1535 self.timeout = {}

1536 self.soonest = 0

1537 self.delay = 60

1538 self.max_conns = 16

1539

1540 def setTimeout(self, t):

1541 self.delay = t

1542

1543 def setMaxConns(self, m):

1544 self.max_conns = m

1545

1546 def connect_ftp(self, user, passwd, host, port, dirs, timeout):

1547 key = user, host, port, '/'.join(dirs), timeout

1548 if key in self.cache:

1549 self.timeout[key] = time.time() + self.delay

1550 else:

1551 self.cache[key] = ftpwrapper(user, passwd, host, port,

1552 dirs, timeout)

1553 self.timeout[key] = time.time() + self.delay

1554 self.check_cache()

1555 return self.cache[key]

1556

1557 def check_cache(self):

1558 # first check for old ones

1559 t = time.time()

1560 if self.soonest <= t:

1561 for k, v in list(self.timeout.items()):

1562 if v < t:

1563 self.cache[k].close()

1564 del self.cache[k]

1565 del self.timeout[k]

1566 self.soonest = min(list(self.timeout.values()))

1567

1568 # then check the size

1569 if len(self.cache) == self.max_conns:

1570 for k, v in list(self.timeout.items()):

1571 if v == self.soonest:

1572 del self.cache[k]

1573 del self.timeout[k]

1574 break

1575 self.soonest = min(list(self.timeout.values()))

1576

1577 def clear_cache(self):

1578 for conn in self.cache.values():

1579 conn.close()

1580 self.cache.clear()

1581 self.timeout.clear()

1582

1583

1584# Code move from the old urllib module

1585

1586MAXFTPCACHE = 10 # Trim the ftp cache beyond this size

1587

1588# Helper for non-unix systems

1589if os.name == 'nt':

1590 from nturl2path import url2pathname, pathname2url

1591else:

1592 def url2pathname(pathname):

1593 """OS-specific conversion from a relative URL of the 'file' scheme

1594 to a file system path; not recommended for general use."""

1595 return unquote(pathname)

1596

1597 def pathname2url(pathname):

1598 """OS-specific conversion from a file system path to a relative URL

1599 of the 'file' scheme; not recommended for general use."""

1600 return quote(pathname)

1601

1602# This really consists of two pieces:

1603# (1) a class which handles opening of all sorts of URLs

1604# (plus assorted utilities etc.)

1605# (2) a set of functions for parsing URLs

1606# XXX Should these be separated out into different modules?

1607

1608

1609ftpcache = {}

1610class URLopener(object):

1611 """Class to open URLs.

1612 This is a class rather than just a subroutine because we may need

1613 more than one set of global protocol-specific options.

1614 Note -- this is a base class for those who don't want the

1615 automatic handling of errors type 302 (relocated) and 401

1616 (authorization needed)."""

1617

1618 __tempfiles = None

1619

1620 version = "Python-urllib/%s" % __version__

1621

1622 # Constructor

1623 def __init__(self, proxies=None, **x509):

1624 msg = "%(class)s style of invoking requests is deprecated. " \

1625 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}

1626 warnings.warn(msg, DeprecationWarning, stacklevel=3)

1627 if proxies is None:

1628 proxies = getproxies()

1629 assert hasattr(proxies, 'keys'), "proxies must be a mapping"

1630 self.proxies = proxies

1631 self.key_file = x509.get('key_file')

1632 self.cert_file = x509.get('cert_file')

1633 self.addheaders = [('User-Agent', self.version)]

1634 self.__tempfiles = []

1635 self.__unlink = os.unlink # See cleanup()

1636 self.tempcache = None

1637 # Undocumented feature: if you assign {} to tempcache,

1638 # it is used to cache files retrieved with

1639 # self.retrieve(). This is not enabled by default

1640 # since it does not work for changing documents (and I

1641 # haven't got the logic to check expiration headers

1642 # yet).

1643 self.ftpcache = ftpcache

1644 # Undocumented feature: you can use a different

1645 # ftp cache by assigning to the .ftpcache member;

1646 # in case you want logically independent URL openers

1647 # XXX This is not threadsafe. Bah.

1648

1649 def __del__(self):

1650 self.close()

1651

1652 def close(self):

1653 self.cleanup()

1654

1655 def cleanup(self):

1656 # This code sometimes runs when the rest of this module

1657 # has already been deleted, so it can't use any globals

1658 # or import anything.

1659 if self.__tempfiles:

1660 for file in self.__tempfiles:

1661 try:

1662 self.__unlink(file)

1663 except OSError:

1664 pass

1665 del self.__tempfiles[:]

1666 if self.tempcache:

1667 self.tempcache.clear()

1668

1669 def addheader(self, *args):

1670 """Add a header to be used by the HTTP interface only

1671 e.g. u.addheader('Accept', 'sound/basic')"""

1672 self.addheaders.append(args)

1673

1674 # External interface

1675 def open(self, fullurl, data=None):

1676 """Use URLopener().open(file) instead of open(file, 'r')."""

1677 fullurl = unwrap(to_bytes(fullurl))

1678 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")

1679 if self.tempcache and fullurl in self.tempcache:

1680 filename, headers = self.tempcache[fullurl]

1681 fp = open(filename, 'rb')

1682 return addinfourl(fp, headers, fullurl)

1683 urltype, url = splittype(fullurl)

1684 if not urltype:

1685 urltype = 'file'

1686 if urltype in self.proxies:

1687 proxy = self.proxies[urltype]

1688 urltype, proxyhost = splittype(proxy)

1689 host, selector = splithost(proxyhost)

1690 url = (host, fullurl) # Signal special case to open_*()

1691 else:

1692 proxy = None

1693 name = 'open_' + urltype

1694 self.type = urltype

1695 name = name.replace('-', '_')

1696 if not hasattr(self, name):

1697 if proxy:

1698 return self.open_unknown_proxy(proxy, fullurl, data)

1699 else:

1700 return self.open_unknown(fullurl, data)

1701 try:

1702 if data is None:

1703 return getattr(self, name)(url)

1704 else:

1705 return getattr(self, name)(url, data)

1706 except HTTPError:

1707 raise

1708 except socket.error as msg:

1709 raise_with_traceback(IOError('socket error', msg))

1710

1711 def open_unknown(self, fullurl, data=None):

1712 """Overridable interface to open unknown URL type."""

1713 type, url = splittype(fullurl)

1714 raise IOError('url error', 'unknown url type', type)

1715

1716 def open_unknown_proxy(self, proxy, fullurl, data=None):

1717 """Overridable interface to open unknown URL type."""

1718 type, url = splittype(fullurl)

1719 raise IOError('url error', 'invalid proxy for %s' % type, proxy)

1720

1721 # External interface

1722 def retrieve(self, url, filename=None, reporthook=None, data=None):

1723 """retrieve(url) returns (filename, headers) for a local object

1724 or (tempfilename, headers) for a remote object."""

1725 url = unwrap(to_bytes(url))

1726 if self.tempcache and url in self.tempcache:

1727 return self.tempcache[url]

1728 type, url1 = splittype(url)

1729 if filename is None and (not type or type == 'file'):

1730 try:

1731 fp = self.open_local_file(url1)

1732 hdrs = fp.info()

1733 fp.close()

1734 return url2pathname(splithost(url1)[1]), hdrs

1735 except IOError as msg:

1736 pass

1737 fp = self.open(url, data)

1738 try:

1739 headers = fp.info()

1740 if filename:

1741 tfp = open(filename, 'wb')

1742 else:

1743 import tempfile

1744 garbage, path = splittype(url)

1745 garbage, path = splithost(path or "")

1746 path, garbage = splitquery(path or "")

1747 path, garbage = splitattr(path or "")

1748 suffix = os.path.splitext(path)[1]

1749 (fd, filename) = tempfile.mkstemp(suffix)

1750 self.__tempfiles.append(filename)

1751 tfp = os.fdopen(fd, 'wb')

1752 try:

1753 result = filename, headers

1754 if self.tempcache is not None:

1755 self.tempcache[url] = result

1756 bs = 1024*8

1757 size = -1

1758 read = 0

1759 blocknum = 0

1760 if "content-length" in headers:

1761 size = int(headers["Content-Length"])

1762 if reporthook:

1763 reporthook(blocknum, bs, size)

1764 while 1:

1765 block = fp.read(bs)

1766 if not block:

1767 break

1768 read += len(block)

1769 tfp.write(block)

1770 blocknum += 1

1771 if reporthook:

1772 reporthook(blocknum, bs, size)

1773 finally:

1774 tfp.close()

1775 finally:

1776 fp.close()

1777

1778 # raise exception if actual size does not match content-length header

1779 if size >= 0 and read < size:

1780 raise ContentTooShortError(

1781 "retrieval incomplete: got only %i out of %i bytes"

1782 % (read, size), result)

1783

1784 return result

1785

1786 # Each method named open_<type> knows how to open that type of URL

1787

1788 def _open_generic_http(self, connection_factory, url, data):

1789 """Make an HTTP connection using connection_class.

1790

1791 This is an internal method that should be called from

1792 open_http() or open_https().

1793

1794 Arguments:

1795 - connection_factory should take a host name and return an

1796 HTTPConnection instance.

1797 - url is the url to retrieval or a host, relative-path pair.

1798 - data is payload for a POST request or None.

1799 """

1800

1801 user_passwd = None

1802 proxy_passwd= None

1803 if isinstance(url, str):

1804 host, selector = splithost(url)

1805 if host:

1806 user_passwd, host = splituser(host)

1807 host = unquote(host)

1808 realhost = host

1809 else:

1810 host, selector = url

1811 # check whether the proxy contains authorization information

1812 proxy_passwd, host = splituser(host)

1813 # now we proceed with the url we want to obtain

1814 urltype, rest = splittype(selector)

1815 url = rest

1816 user_passwd = None

1817 if urltype.lower() != 'http':

1818 realhost = None

1819 else:

1820 realhost, rest = splithost(rest)

1821 if realhost:

1822 user_passwd, realhost = splituser(realhost)

1823 if user_passwd:

1824 selector = "%s://%s%s" % (urltype, realhost, rest)

1825 if proxy_bypass(realhost):

1826 host = realhost

1827

1828 if not host: raise IOError('http error', 'no host given')

1829

1830 if proxy_passwd:

1831 proxy_passwd = unquote(proxy_passwd)

1832 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')

1833 else:

1834 proxy_auth = None

1835

1836 if user_passwd:

1837 user_passwd = unquote(user_passwd)

1838 auth = base64.b64encode(user_passwd.encode()).decode('ascii')

1839 else:

1840 auth = None

1841 http_conn = connection_factory(host)

1842 headers = {}

1843 if proxy_auth:

1844 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth

1845 if auth:

1846 headers["Authorization"] = "Basic %s" % auth

1847 if realhost:

1848 headers["Host"] = realhost

1849

1850 # Add Connection:close as we don't support persistent connections yet.

1851 # This helps in closing the socket and avoiding ResourceWarning

1852

1853 headers["Connection"] = "close"

1854

1855 for header, value in self.addheaders:

1856 headers[header] = value

1857

1858 if data is not None:

1859 headers["Content-Type"] = "application/x-www-form-urlencoded"

1860 http_conn.request("POST", selector, data, headers)

1861 else:

1862 http_conn.request("GET", selector, headers=headers)

1863

1864 try:

1865 response = http_conn.getresponse()

1866 except http_client.BadStatusLine:

1867 # something went wrong with the HTTP status line

1868 raise URLError("http protocol error: bad status line")

1869

1870 # According to RFC 2616, "2xx" code indicates that the client's

1871 # request was successfully received, understood, and accepted.

1872 if 200 <= response.status < 300:

1873 return addinfourl(response, response.msg, "http:" + url,

1874 response.status)

1875 else:

1876 return self.http_error(

1877 url, response.fp,

1878 response.status, response.reason, response.msg, data)

1879

1880 def open_http(self, url, data=None):

1881 """Use HTTP protocol."""

1882 return self._open_generic_http(http_client.HTTPConnection, url, data)

1883

1884 def http_error(self, url, fp, errcode, errmsg, headers, data=None):

1885 """Handle http errors.

1886

1887 Derived class can override this, or provide specific handlers

1888 named http_error_DDD where DDD is the 3-digit error code."""

1889 # First check if there's a specific handler for this error

1890 name = 'http_error_%d' % errcode

1891 if hasattr(self, name):

1892 method = getattr(self, name)

1893 if data is None:

1894 result = method(url, fp, errcode, errmsg, headers)

1895 else:

1896 result = method(url, fp, errcode, errmsg, headers, data)

1897 if result: return result

1898 return self.http_error_default(url, fp, errcode, errmsg, headers)

1899

1900 def http_error_default(self, url, fp, errcode, errmsg, headers):

1901 """Default error handler: close the connection and raise IOError."""

1902 fp.close()

1903 raise HTTPError(url, errcode, errmsg, headers, None)

1904

1905 if _have_ssl:

1906 def _https_connection(self, host):

1907 return http_client.HTTPSConnection(host,

1908 key_file=self.key_file,

1909 cert_file=self.cert_file)

1910

1911 def open_https(self, url, data=None):

1912 """Use HTTPS protocol."""

1913 return self._open_generic_http(self._https_connection, url, data)

1914

1915 def open_file(self, url):

1916 """Use local file or FTP depending on form of URL."""

1917 if not isinstance(url, str):

1918 raise URLError('file error: proxy support for file protocol currently not implemented')

1919 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':

1920 raise ValueError("file:// scheme is supported only on localhost")

1921 else:

1922 return self.open_local_file(url)

1923

1924 def open_local_file(self, url):

1925 """Use local file."""

1926 import future.backports.email.utils as email_utils

1927 import mimetypes

1928 host, file = splithost(url)

1929 localname = url2pathname(file)

1930 try:

1931 stats = os.stat(localname)

1932 except OSError as e:

1933 raise URLError(e.strerror, e.filename)

1934 size = stats.st_size

1935 modified = email_utils.formatdate(stats.st_mtime, usegmt=True)

1936 mtype = mimetypes.guess_type(url)[0]

1937 headers = email.message_from_string(

1938 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %

1939 (mtype or 'text/plain', size, modified))

1940 if not host:

1941 urlfile = file

1942 if file[:1] == '/':

1943 urlfile = 'file://' + file

1944 return addinfourl(open(localname, 'rb'), headers, urlfile)

1945 host, port = splitport(host)

1946 if (not port

1947 and socket.gethostbyname(host) in ((localhost(),) + thishost())):

1948 urlfile = file

1949 if file[:1] == '/':

1950 urlfile = 'file://' + file

1951 elif file[:2] == './':

1952 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)

1953 return addinfourl(open(localname, 'rb'), headers, urlfile)

1954 raise URLError('local file error: not on local host')

1955

1956 def open_ftp(self, url):

1957 """Use FTP protocol."""

1958 if not isinstance(url, str):

1959 raise URLError('ftp error: proxy support for ftp protocol currently not implemented')

1960 import mimetypes

1961 host, path = splithost(url)

1962 if not host: raise URLError('ftp error: no host given')

1963 host, port = splitport(host)

1964 user, host = splituser(host)

1965 if user: user, passwd = splitpasswd(user)

1966 else: passwd = None

1967 host = unquote(host)

1968 user = unquote(user or '')

1969 passwd = unquote(passwd or '')

1970 host = socket.gethostbyname(host)

1971 if not port:

1972 import ftplib

1973 port = ftplib.FTP_PORT

1974 else:

1975 port = int(port)

1976 path, attrs = splitattr(path)

1977 path = unquote(path)

1978 dirs = path.split('/')

1979 dirs, file = dirs[:-1], dirs[-1]

1980 if dirs and not dirs[0]: dirs = dirs[1:]

1981 if dirs and not dirs[0]: dirs[0] = '/'

1982 key = user, host, port, '/'.join(dirs)

1983 # XXX thread unsafe!

1984 if len(self.ftpcache) > MAXFTPCACHE:

1985 # Prune the cache, rather arbitrarily

1986 for k in self.ftpcache.keys():

1987 if k != key:

1988 v = self.ftpcache[k]

1989 del self.ftpcache[k]

1990 v.close()

1991 try:

1992 if key not in self.ftpcache:

1993 self.ftpcache[key] = \

1994 ftpwrapper(user, passwd, host, port, dirs)

1995 if not file: type = 'D'

1996 else: type = 'I'

1997 for attr in attrs:

1998 attr, value = splitvalue(attr)

1999 if attr.lower() == 'type' and \

2000 value in ('a', 'A', 'i', 'I', 'd', 'D'):

2001 type = value.upper()

2002 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)

2003 mtype = mimetypes.guess_type("ftp:" + url)[0]

2004 headers = ""

2005 if mtype:

2006 headers += "Content-Type: %s\n" % mtype

2007 if retrlen is not None and retrlen >= 0:

2008 headers += "Content-Length: %d\n" % retrlen

2009 headers = email.message_from_string(headers)

2010 return addinfourl(fp, headers, "ftp:" + url)

2011 except ftperrors() as exp:

2012 raise_with_traceback(URLError('ftp error %r' % exp))

2013

2014 def open_data(self, url, data=None):

2015 """Use "data" URL."""

2016 if not isinstance(url, str):

2017 raise URLError('data error: proxy support for data protocol currently not implemented')

2018 # ignore POSTed data

2019 #

2020 # syntax of data URLs:

2021 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data

2022 # mediatype := [ type "/" subtype ] *( ";" parameter )

2023 # data := *urlchar

2024 # parameter := attribute "=" value

2025 try:

2026 [type, data] = url.split(',', 1)

2027 except ValueError:

2028 raise IOError('data error', 'bad data URL')

2029 if not type:

2030 type = 'text/plain;charset=US-ASCII'

2031 semi = type.rfind(';')

2032 if semi >= 0 and '=' not in type[semi:]:

2033 encoding = type[semi+1:]

2034 type = type[:semi]

2035 else:

2036 encoding = ''

2037 msg = []

2038 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',

2039 time.gmtime(time.time())))

2040 msg.append('Content-type: %s' % type)

2041 if encoding == 'base64':

2042 # XXX is this encoding/decoding ok?

2043 data = base64.decodebytes(data.encode('ascii')).decode('latin-1')

2044 else:

2045 data = unquote(data)

2046 msg.append('Content-Length: %d' % len(data))

2047 msg.append('')

2048 msg.append(data)

2049 msg = '\n'.join(msg)

2050 headers = email.message_from_string(msg)

2051 f = io.StringIO(msg)

2052 #f.fileno = None # needed for addinfourl

2053 return addinfourl(f, headers, url)

2054

2055

2056class FancyURLopener(URLopener):

2057 """Derived class with handlers for errors we can handle (perhaps)."""

2058

2059 def __init__(self, *args, **kwargs):

2060 URLopener.__init__(self, *args, **kwargs)

2061 self.auth_cache = {}

2062 self.tries = 0

2063 self.maxtries = 10

2064

2065 def http_error_default(self, url, fp, errcode, errmsg, headers):

2066 """Default error handling -- don't raise an exception."""

2067 return addinfourl(fp, headers, "http:" + url, errcode)

2068

2069 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):

2070 """Error 302 -- relocated (temporarily)."""

2071 self.tries += 1

2072 if self.maxtries and self.tries >= self.maxtries:

2073 if hasattr(self, "http_error_500"):

2074 meth = self.http_error_500

2075 else:

2076 meth = self.http_error_default

2077 self.tries = 0

2078 return meth(url, fp, 500,

2079 "Internal Server Error: Redirect Recursion", headers)

2080 result = self.redirect_internal(url, fp, errcode, errmsg, headers,

2081 data)

2082 self.tries = 0

2083 return result

2084

2085 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):

2086 if 'location' in headers:

2087 newurl = headers['location']

2088 elif 'uri' in headers:

2089 newurl = headers['uri']

2090 else:

2091 return

2092 fp.close()

2093

2094 # In case the server sent a relative URL, join with original:

2095 newurl = urljoin(self.type + ":" + url, newurl)

2096

2097 urlparts = urlparse(newurl)

2098

2099 # For security reasons, we don't allow redirection to anything other

2100 # than http, https and ftp.

2101

2102 # We are using newer HTTPError with older redirect_internal method

2103 # This older method will get deprecated in 3.3

2104

2105 if urlparts.scheme not in ('http', 'https', 'ftp', ''):

2106 raise HTTPError(newurl, errcode,

2107 errmsg +

2108 " Redirection to url '%s' is not allowed." % newurl,

2109 headers, fp)

2110

2111 return self.open(newurl)

2112

2113 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):

2114 """Error 301 -- also relocated (permanently)."""

2115 return self.http_error_302(url, fp, errcode, errmsg, headers, data)

2116

2117 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):

2118 """Error 303 -- also relocated (essentially identical to 302)."""

2119 return self.http_error_302(url, fp, errcode, errmsg, headers, data)

2120

2121 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):

2122 """Error 307 -- relocated, but turn POST into error."""

2123 if data is None:

2124 return self.http_error_302(url, fp, errcode, errmsg, headers, data)

2125 else:

2126 return self.http_error_default(url, fp, errcode, errmsg, headers)

2127

2128 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,

2129 retry=False):

2130 """Error 401 -- authentication required.

2131 This function supports Basic authentication only."""

2132 if 'www-authenticate' not in headers:

2133 URLopener.http_error_default(self, url, fp,

2134 errcode, errmsg, headers)

2135 stuff = headers['www-authenticate']

2136 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)

2137 if not match:

2138 URLopener.http_error_default(self, url, fp,

2139 errcode, errmsg, headers)

2140 scheme, realm = match.groups()

2141 if scheme.lower() != 'basic':

2142 URLopener.http_error_default(self, url, fp,

2143 errcode, errmsg, headers)

2144 if not retry:

2145 URLopener.http_error_default(self, url, fp, errcode, errmsg,

2146 headers)

2147 name = 'retry_' + self.type + '_basic_auth'

2148 if data is None:

2149 return getattr(self,name)(url, realm)

2150 else:

2151 return getattr(self,name)(url, realm, data)

2152

2153 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,

2154 retry=False):

2155 """Error 407 -- proxy authentication required.

2156 This function supports Basic authentication only."""

2157 if 'proxy-authenticate' not in headers:

2158 URLopener.http_error_default(self, url, fp,

2159 errcode, errmsg, headers)

2160 stuff = headers['proxy-authenticate']

2161 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)

2162 if not match:

2163 URLopener.http_error_default(self, url, fp,

2164 errcode, errmsg, headers)

2165 scheme, realm = match.groups()

2166 if scheme.lower() != 'basic':

2167 URLopener.http_error_default(self, url, fp,

2168 errcode, errmsg, headers)

2169 if not retry:

2170 URLopener.http_error_default(self, url, fp, errcode, errmsg,

2171 headers)

2172 name = 'retry_proxy_' + self.type + '_basic_auth'

2173 if data is None:

2174 return getattr(self,name)(url, realm)

2175 else:

2176 return getattr(self,name)(url, realm, data)

2177

2178 def retry_proxy_http_basic_auth(self, url, realm, data=None):

2179 host, selector = splithost(url)

2180 newurl = 'http://' + host + selector

2181 proxy = self.proxies['http']

2182 urltype, proxyhost = splittype(proxy)

2183 proxyhost, proxyselector = splithost(proxyhost)

2184 i = proxyhost.find('@') + 1

2185 proxyhost = proxyhost[i:]

2186 user, passwd = self.get_user_passwd(proxyhost, realm, i)

2187 if not (user or passwd): return None

2188 proxyhost = "%s:%s@%s" % (quote(user, safe=''),

2189 quote(passwd, safe=''), proxyhost)

2190 self.proxies['http'] = 'http://' + proxyhost + proxyselector

2191 if data is None:

2192 return self.open(newurl)

2193 else:

2194 return self.open(newurl, data)

2195

2196 def retry_proxy_https_basic_auth(self, url, realm, data=None):

2197 host, selector = splithost(url)

2198 newurl = 'https://' + host + selector

2199 proxy = self.proxies['https']

2200 urltype, proxyhost = splittype(proxy)

2201 proxyhost, proxyselector = splithost(proxyhost)

2202 i = proxyhost.find('@') + 1

2203 proxyhost = proxyhost[i:]

2204 user, passwd = self.get_user_passwd(proxyhost, realm, i)

2205 if not (user or passwd): return None

2206 proxyhost = "%s:%s@%s" % (quote(user, safe=''),

2207 quote(passwd, safe=''), proxyhost)

2208 self.proxies['https'] = 'https://' + proxyhost + proxyselector

2209 if data is None:

2210 return self.open(newurl)

2211 else:

2212 return self.open(newurl, data)

2213

2214 def retry_http_basic_auth(self, url, realm, data=None):

2215 host, selector = splithost(url)

2216 i = host.find('@') + 1

2217 host = host[i:]

2218 user, passwd = self.get_user_passwd(host, realm, i)

2219 if not (user or passwd): return None

2220 host = "%s:%s@%s" % (quote(user, safe=''),

2221 quote(passwd, safe=''), host)

2222 newurl = 'http://' + host + selector

2223 if data is None:

2224 return self.open(newurl)

2225 else:

2226 return self.open(newurl, data)

2227

2228 def retry_https_basic_auth(self, url, realm, data=None):

2229 host, selector = splithost(url)

2230 i = host.find('@') + 1

2231 host = host[i:]

2232 user, passwd = self.get_user_passwd(host, realm, i)

2233 if not (user or passwd): return None

2234 host = "%s:%s@%s" % (quote(user, safe=''),

2235 quote(passwd, safe=''), host)

2236 newurl = 'https://' + host + selector

2237 if data is None:

2238 return self.open(newurl)

2239 else:

2240 return self.open(newurl, data)

2241

2242 def get_user_passwd(self, host, realm, clear_cache=0):

2243 key = realm + '@' + host.lower()

2244 if key in self.auth_cache:

2245 if clear_cache:

2246 del self.auth_cache[key]

2247 else:

2248 return self.auth_cache[key]

2249 user, passwd = self.prompt_user_passwd(host, realm)

2250 if user or passwd: self.auth_cache[key] = (user, passwd)

2251 return user, passwd

2252

2253 def prompt_user_passwd(self, host, realm):

2254 """Override this in a GUI environment!"""

2255 import getpass

2256 try:

2257 user = input("Enter username for %s at %s: " % (realm, host))

2258 passwd = getpass.getpass("Enter password for %s in %s at %s: " %

2259 (user, realm, host))

2260 return user, passwd

2261 except KeyboardInterrupt:

2262 print()

2263 return None, None

2264

2265

2266# Utility functions

2267

2268_localhost = None

2269def localhost():

2270 """Return the IP address of the magic hostname 'localhost'."""

2271 global _localhost

2272 if _localhost is None:

2273 _localhost = socket.gethostbyname('localhost')

2274 return _localhost

2275

2276_thishost = None

2277def thishost():

2278 """Return the IP addresses of the current host."""

2279 global _thishost

2280 if _thishost is None:

2281 try:

2282 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])

2283 except socket.gaierror:

2284 _thishost = tuple(socket.gethostbyname_ex('localhost')[2])

2285 return _thishost

2286

2287_ftperrors = None

2288def ftperrors():

2289 """Return the set of errors raised by the FTP class."""

2290 global _ftperrors

2291 if _ftperrors is None:

2292 import ftplib

2293 _ftperrors = ftplib.all_errors

2294 return _ftperrors

2295

2296_noheaders = None

2297def noheaders():

2298 """Return an empty email Message object."""

2299 global _noheaders

2300 if _noheaders is None:

2301 _noheaders = email.message_from_string("")

2302 return _noheaders

2303

2304

2305# Utility classes

2306

2307class ftpwrapper(object):

2308 """Class used by open_ftp() for cache of open FTP connections."""

2309

2310 def __init__(self, user, passwd, host, port, dirs, timeout=None,

2311 persistent=True):

2312 self.user = user

2313 self.passwd = passwd

2314 self.host = host

2315 self.port = port

2316 self.dirs = dirs

2317 self.timeout = timeout

2318 self.refcount = 0

2319 self.keepalive = persistent

2320 self.init()

2321

2322 def init(self):

2323 import ftplib

2324 self.busy = 0

2325 self.ftp = ftplib.FTP()

2326 self.ftp.connect(self.host, self.port, self.timeout)

2327 self.ftp.login(self.user, self.passwd)

2328 _target = '/'.join(self.dirs)

2329 self.ftp.cwd(_target)

2330

2331 def retrfile(self, file, type):

2332 import ftplib

2333 self.endtransfer()

2334 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1

2335 else: cmd = 'TYPE ' + type; isdir = 0

2336 try:

2337 self.ftp.voidcmd(cmd)

2338 except ftplib.all_errors:

2339 self.init()

2340 self.ftp.voidcmd(cmd)

2341 conn = None

2342 if file and not isdir:

2343 # Try to retrieve as a file

2344 try:

2345 cmd = 'RETR ' + file

2346 conn, retrlen = self.ftp.ntransfercmd(cmd)

2347 except ftplib.error_perm as reason:

2348 if str(reason)[:3] != '550':

2349 raise_with_traceback(URLError('ftp error: %r' % reason))

2350 if not conn:

2351 # Set transfer mode to ASCII!

2352 self.ftp.voidcmd('TYPE A')

2353 # Try a directory listing. Verify that directory exists.

2354 if file:

2355 pwd = self.ftp.pwd()

2356 try:

2357 try:

2358 self.ftp.cwd(file)

2359 except ftplib.error_perm as reason:

2360 ### Was:

2361 # raise URLError('ftp error: %r' % reason) from reason

2362 exc = URLError('ftp error: %r' % reason)

2363 exc.__cause__ = reason

2364 raise exc

2365 finally:

2366 self.ftp.cwd(pwd)

2367 cmd = 'LIST ' + file

2368 else:

2369 cmd = 'LIST'

2370 conn, retrlen = self.ftp.ntransfercmd(cmd)

2371 self.busy = 1

2372

2373 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)

2374 self.refcount += 1

2375 conn.close()

2376 # Pass back both a suitably decorated object and a retrieval length

2377 return (ftpobj, retrlen)

2378

2379 def endtransfer(self):

2380 self.busy = 0

2381

2382 def close(self):

2383 self.keepalive = False

2384 if self.refcount <= 0:

2385 self.real_close()

2386

2387 def file_close(self):

2388 self.endtransfer()

2389 self.refcount -= 1

2390 if self.refcount <= 0 and not self.keepalive:

2391 self.real_close()

2392

2393 def real_close(self):

2394 self.endtransfer()

2395 try:

2396 self.ftp.close()

2397 except ftperrors():

2398 pass

2399

2400# Proxy handling

2401def getproxies_environment():

2402 """Return a dictionary of scheme -> proxy server URL mappings.

2403

2404 Scan the environment for variables named <scheme>_proxy;

2405 this seems to be the standard convention. If you need a

2406 different way, you can pass a proxies dictionary to the

2407 [Fancy]URLopener constructor.

2408

2409 """

2410 proxies = {}

2411 for name, value in os.environ.items():

2412 name = name.lower()

2413 if value and name[-6:] == '_proxy':

2414 proxies[name[:-6]] = value

2415 return proxies

2416

2417def proxy_bypass_environment(host):

2418 """Test if proxies should not be used for a particular host.

2419

2420 Checks the environment for a variable named no_proxy, which should

2421 be a list of DNS suffixes separated by commas, or '*' for all hosts.

2422 """

2423 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')

2424 # '*' is special case for always bypass

2425 if no_proxy == '*':

2426 return 1

2427 # strip port off host

2428 hostonly, port = splitport(host)

2429 # check if the host ends with any of the DNS suffixes

2430 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]

2431 for name in no_proxy_list:

2432 if name and (hostonly.endswith(name) or host.endswith(name)):

2433 return 1

2434 # otherwise, don't bypass

2435 return 0

2436

2437

2438# This code tests an OSX specific data structure but is testable on all

2439# platforms

2440def _proxy_bypass_macosx_sysconf(host, proxy_settings):

2441 """

2442 Return True iff this host shouldn't be accessed using a proxy

2443

2444 This function uses the MacOSX framework SystemConfiguration

2445 to fetch the proxy information.

2446

2447 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:

2448 { 'exclude_simple': bool,

2449 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']

2450 }

2451 """

2452 from fnmatch import fnmatch

2453

2454 hostonly, port = splitport(host)

2455

2456 def ip2num(ipAddr):

2457 parts = ipAddr.split('.')

2458 parts = list(map(int, parts))

2459 if len(parts) != 4:

2460 parts = (parts + [0, 0, 0, 0])[:4]

2461 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]

2462

2463 # Check for simple host names:

2464 if '.' not in host:

2465 if proxy_settings['exclude_simple']:

2466 return True

2467

2468 hostIP = None

2469

2470 for value in proxy_settings.get('exceptions', ()):

2471 # Items in the list are strings like these: *.local, 169.254/16

2472 if not value: continue

2473

2474 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)

2475 if m is not None:

2476 if hostIP is None:

2477 try:

2478 hostIP = socket.gethostbyname(hostonly)

2479 hostIP = ip2num(hostIP)

2480 except socket.error:

2481 continue

2482

2483 base = ip2num(m.group(1))

2484 mask = m.group(2)

2485 if mask is None:

2486 mask = 8 * (m.group(1).count('.') + 1)

2487 else:

2488 mask = int(mask[1:])

2489 mask = 32 - mask

2490

2491 if (hostIP >> mask) == (base >> mask):

2492 return True

2493

2494 elif fnmatch(host, value):

2495 return True

2496

2497 return False

2498

2499

2500if sys.platform == 'darwin':

2501 from _scproxy import _get_proxy_settings, _get_proxies

2502

2503 def proxy_bypass_macosx_sysconf(host):

2504 proxy_settings = _get_proxy_settings()

2505 return _proxy_bypass_macosx_sysconf(host, proxy_settings)

2506

2507 def getproxies_macosx_sysconf():

2508 """Return a dictionary of scheme -> proxy server URL mappings.

2509

2510 This function uses the MacOSX framework SystemConfiguration

2511 to fetch the proxy information.

2512 """

2513 return _get_proxies()

2514

2515

2516

2517 def proxy_bypass(host):

2518 if getproxies_environment():

2519 return proxy_bypass_environment(host)

2520 else:

2521 return proxy_bypass_macosx_sysconf(host)

2522

2523 def getproxies():

2524 return getproxies_environment() or getproxies_macosx_sysconf()

2525

2526

2527elif os.name == 'nt':

2528 def getproxies_registry():

2529 """Return a dictionary of scheme -> proxy server URL mappings.

2530

2531 Win32 uses the registry to store proxies.

2532

2533 """

2534 proxies = {}

2535 try:

2536 import winreg

2537 except ImportError:

2538 # Std module, so should be around - but you never know!

2539 return proxies

2540 try:

2541 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,

2542 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')

2543 proxyEnable = winreg.QueryValueEx(internetSettings,

2544 'ProxyEnable')[0]

2545 if proxyEnable:

2546 # Returned as Unicode but problems if not converted to ASCII

2547 proxyServer = str(winreg.QueryValueEx(internetSettings,

2548 'ProxyServer')[0])

2549 if '=' in proxyServer:

2550 # Per-protocol settings

2551 for p in proxyServer.split(';'):

2552 protocol, address = p.split('=', 1)

2553 # See if address has a type:// prefix

2554 if not re.match('^([^/:]+)://', address):

2555 address = '%s://%s' % (protocol, address)

2556 proxies[protocol] = address

2557 else:

2558 # Use one setting for all protocols

2559 if proxyServer[:5] == 'http:':

2560 proxies['http'] = proxyServer

2561 else:

2562 proxies['http'] = 'http://%s' % proxyServer

2563 proxies['https'] = 'https://%s' % proxyServer

2564 proxies['ftp'] = 'ftp://%s' % proxyServer

2565 internetSettings.Close()

2566 except (WindowsError, ValueError, TypeError):

2567 # Either registry key not found etc, or the value in an

2568 # unexpected format.

2569 # proxies already set up to be empty so nothing to do

2570 pass

2571 return proxies

2572

2573 def getproxies():

2574 """Return a dictionary of scheme -> proxy server URL mappings.

2575

2576 Returns settings gathered from the environment, if specified,

2577 or the registry.

2578

2579 """

2580 return getproxies_environment() or getproxies_registry()

2581

2582 def proxy_bypass_registry(host):

2583 try:

2584 import winreg

2585 except ImportError:

2586 # Std modules, so should be around - but you never know!

2587 return 0

2588 try:

2589 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,

2590 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')

2591 proxyEnable = winreg.QueryValueEx(internetSettings,

2592 'ProxyEnable')[0]

2593 proxyOverride = str(winreg.QueryValueEx(internetSettings,

2594 'ProxyOverride')[0])

2595 # ^^^^ Returned as Unicode but problems if not converted to ASCII

2596 except WindowsError:

2597 return 0

2598 if not proxyEnable or not proxyOverride:

2599 return 0

2600 # try to make a host list from name and IP address.

2601 rawHost, port = splitport(host)

2602 host = [rawHost]

2603 try:

2604 addr = socket.gethostbyname(rawHost)

2605 if addr != rawHost:

2606 host.append(addr)

2607 except socket.error:

2608 pass

2609 try:

2610 fqdn = socket.getfqdn(rawHost)

2611 if fqdn != rawHost:

2612 host.append(fqdn)

2613 except socket.error:

2614 pass

2615 # make a check value list from the registry entry: replace the

2616 # '<local>' string by the localhost entry and the corresponding

2617 # canonical entry.

2618 proxyOverride = proxyOverride.split(';')

2619 # now check if we match one of the registry values.

2620 for test in proxyOverride:

2621 if test == '<local>':

2622 if '.' not in rawHost:

2623 return 1

2624 test = test.replace(".", r"\.") # mask dots

2625 test = test.replace("*", r".*") # change glob sequence

2626 test = test.replace("?", r".") # change glob char

2627 for val in host:

2628 if re.match(test, val, re.I):

2629 return 1

2630 return 0

2631

2632 def proxy_bypass(host):

2633 """Return a dictionary of scheme -> proxy server URL mappings.

2634

2635 Returns settings gathered from the environment, if specified,

2636 or the registry.

2637

2638 """

2639 if getproxies_environment():

2640 return proxy_bypass_environment(host)

2641 else:

2642 return proxy_bypass_registry(host)

2643

2644else:

2645 # By default use environment variables

2646 getproxies = getproxies_environment

2647 proxy_bypass = proxy_bypass_environment