Coverage for /pythoncovmergedfiles/medio/medio/usr/lib/python3.9/urllib/request.py: 15%

1646 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:05 +0000

1"""An extensible library for opening URLs using a variety of protocols 

2 

3The simplest way to use this module is to call the urlopen function, 

4which accepts a string containing a URL or a Request object (described 

5below). It opens the URL and returns the results as file-like 

6object; the returned object has some extra methods described below. 

7 

8The OpenerDirector manages a collection of Handler objects that do 

9all the actual work. Each Handler implements a particular protocol or 

10option. The OpenerDirector is a composite object that invokes the 

11Handlers needed to open the requested URL. For example, the 

12HTTPHandler performs HTTP GET and POST requests and deals with 

13non-error returns. The HTTPRedirectHandler automatically deals with 

14HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler 

15deals with digest authentication. 

16 

17urlopen(url, data=None) -- Basic usage is the same as original 

18urllib. pass the url and optionally data to post to an HTTP URL, and 

19get a file-like object back. One difference is that you can also pass 

20a Request instance instead of URL. Raises a URLError (subclass of 

21OSError); for HTTP errors, raises an HTTPError, which can also be 

22treated as a valid response. 

23 

24build_opener -- Function that creates a new OpenerDirector instance. 

25Will install the default handlers. Accepts one or more Handlers as 

26arguments, either instances or Handler classes that it will 

27instantiate. If one of the argument is a subclass of the default 

28handler, the argument will be installed instead of the default. 

29 

30install_opener -- Installs a new opener as the default opener. 

31 

32objects of interest: 

33 

34OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages 

35the Handler classes, while dealing with requests and responses. 

36 

37Request -- An object that encapsulates the state of a request. The 

38state can be as simple as the URL. It can also include extra HTTP 

39headers, e.g. a User-Agent. 

40 

41BaseHandler -- 

42 

43internals: 

44BaseHandler and parent 

45_call_chain conventions 

46 

47Example usage: 

48 

49import urllib.request 

50 

51# set up authentication info 

52authinfo = urllib.request.HTTPBasicAuthHandler() 

53authinfo.add_password(realm='PDQ Application', 

54 uri='https://mahler:8092/site-updates.py', 

55 user='klem', 

56 passwd='geheim$parole') 

57 

58proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"}) 

59 

60# build a new opener that adds authentication and caching FTP handlers 

61opener = urllib.request.build_opener(proxy_support, authinfo, 

62 urllib.request.CacheFTPHandler) 

63 

64# install it 

65urllib.request.install_opener(opener) 

66 

67f = urllib.request.urlopen('http://www.python.org/') 

68""" 

69 

70# XXX issues: 

71# If an authentication error handler that tries to perform 

72# authentication for some reason but fails, how should the error be 

73# signalled? The client needs to know the HTTP error code. But if 

74# the handler knows that the problem was, e.g., that it didn't know 

75# that hash algo that requested in the challenge, it would be good to 

76# pass that information along to the client, too. 

77# ftp errors aren't handled cleanly 

78# check digest against correct (i.e. non-apache) implementation 

79 

80# Possible extensions: 

81# complex proxies XXX not sure what exactly was meant by this 

82# abstract factory for opener 

83 

84import base64 

85import bisect 

86import email 

87import hashlib 

88import http.client 

89import io 

90import os 

91import posixpath 

92import re 

93import socket 

94import string 

95import sys 

96import time 

97import tempfile 

98import contextlib 

99import warnings 

100 

101 

102from urllib.error import URLError, HTTPError, ContentTooShortError 

103from urllib.parse import ( 

104 urlparse, urlsplit, urljoin, unwrap, quote, unquote, 

105 _splittype, _splithost, _splitport, _splituser, _splitpasswd, 

106 _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes, 

107 unquote_to_bytes, urlunparse) 

108from urllib.response import addinfourl, addclosehook 

109 

110# check for SSL 

111try: 

112 import ssl 

113except ImportError: 

114 _have_ssl = False 

115else: 

116 _have_ssl = True 

117 

118__all__ = [ 

119 # Classes 

120 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler', 

121 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler', 

122 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', 

123 'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler', 

124 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler', 

125 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler', 

126 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler', 

127 'UnknownHandler', 'HTTPErrorProcessor', 

128 # Functions 

129 'urlopen', 'install_opener', 'build_opener', 

130 'pathname2url', 'url2pathname', 'getproxies', 

131 # Legacy interface 

132 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener', 

133] 

134 

135# used in User-Agent header sent 

136__version__ = '%d.%d' % sys.version_info[:2] 

137 

138_opener = None 

139def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 

140 *, cafile=None, capath=None, cadefault=False, context=None): 

141 '''Open the URL url, which can be either a string or a Request object. 

142 

143 *data* must be an object specifying additional data to be sent to 

144 the server, or None if no such data is needed. See Request for 

145 details. 

146 

147 urllib.request module uses HTTP/1.1 and includes a "Connection:close" 

148 header in its HTTP requests. 

149 

150 The optional *timeout* parameter specifies a timeout in seconds for 

151 blocking operations like the connection attempt (if not specified, the 

152 global default timeout setting will be used). This only works for HTTP, 

153 HTTPS and FTP connections. 

154 

155 If *context* is specified, it must be a ssl.SSLContext instance describing 

156 the various SSL options. See HTTPSConnection for more details. 

157 

158 The optional *cafile* and *capath* parameters specify a set of trusted CA 

159 certificates for HTTPS requests. cafile should point to a single file 

160 containing a bundle of CA certificates, whereas capath should point to a 

161 directory of hashed certificate files. More information can be found in 

162 ssl.SSLContext.load_verify_locations(). 

163 

164 The *cadefault* parameter is ignored. 

165 

166 

167 This function always returns an object which can work as a 

168 context manager and has the properties url, headers, and status. 

169 See urllib.response.addinfourl for more detail on these properties. 

170 

171 For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse 

172 object slightly modified. In addition to the three new methods above, the 

173 msg attribute contains the same information as the reason attribute --- 

174 the reason phrase returned by the server --- instead of the response 

175 headers as it is specified in the documentation for HTTPResponse. 

176 

177 For FTP, file, and data URLs and requests explicitly handled by legacy 

178 URLopener and FancyURLopener classes, this function returns a 

179 urllib.response.addinfourl object. 

180 

181 Note that None may be returned if no handler handles the request (though 

182 the default installed global OpenerDirector uses UnknownHandler to ensure 

183 this never happens). 

184 

185 In addition, if proxy settings are detected (for example, when a *_proxy 

186 environment variable like http_proxy is set), ProxyHandler is default 

187 installed and makes sure the requests are handled through the proxy. 

188 

189 ''' 

190 global _opener 

191 if cafile or capath or cadefault: 

192 import warnings 

193 warnings.warn("cafile, capath and cadefault are deprecated, use a " 

194 "custom context instead.", DeprecationWarning, 2) 

195 if context is not None: 

196 raise ValueError( 

197 "You can't pass both context and any of cafile, capath, and " 

198 "cadefault" 

199 ) 

200 if not _have_ssl: 

201 raise ValueError('SSL support not available') 

202 context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH, 

203 cafile=cafile, 

204 capath=capath) 

205 https_handler = HTTPSHandler(context=context) 

206 opener = build_opener(https_handler) 

207 elif context: 

208 https_handler = HTTPSHandler(context=context) 

209 opener = build_opener(https_handler) 

210 elif _opener is None: 

211 _opener = opener = build_opener() 

212 else: 

213 opener = _opener 

214 return opener.open(url, data, timeout) 

215 

216def install_opener(opener): 

217 global _opener 

218 _opener = opener 

219 

220_url_tempfiles = [] 

221def urlretrieve(url, filename=None, reporthook=None, data=None): 

222 """ 

223 Retrieve a URL into a temporary location on disk. 

224 

225 Requires a URL argument. If a filename is passed, it is used as 

226 the temporary file location. The reporthook argument should be 

227 a callable that accepts a block number, a read size, and the 

228 total file size of the URL target. The data argument should be 

229 valid URL encoded data. 

230 

231 If a filename is passed and the URL points to a local resource, 

232 the result is a copy from local file to new file. 

233 

234 Returns a tuple containing the path to the newly created 

235 data file as well as the resulting HTTPMessage object. 

236 """ 

237 url_type, path = _splittype(url) 

238 

239 with contextlib.closing(urlopen(url, data)) as fp: 

240 headers = fp.info() 

241 

242 # Just return the local path and the "headers" for file:// 

243 # URLs. No sense in performing a copy unless requested. 

244 if url_type == "file" and not filename: 

245 return os.path.normpath(path), headers 

246 

247 # Handle temporary file setup. 

248 if filename: 

249 tfp = open(filename, 'wb') 

250 else: 

251 tfp = tempfile.NamedTemporaryFile(delete=False) 

252 filename = tfp.name 

253 _url_tempfiles.append(filename) 

254 

255 with tfp: 

256 result = filename, headers 

257 bs = 1024*8 

258 size = -1 

259 read = 0 

260 blocknum = 0 

261 if "content-length" in headers: 

262 size = int(headers["Content-Length"]) 

263 

264 if reporthook: 

265 reporthook(blocknum, bs, size) 

266 

267 while True: 

268 block = fp.read(bs) 

269 if not block: 

270 break 

271 read += len(block) 

272 tfp.write(block) 

273 blocknum += 1 

274 if reporthook: 

275 reporthook(blocknum, bs, size) 

276 

277 if size >= 0 and read < size: 

278 raise ContentTooShortError( 

279 "retrieval incomplete: got only %i out of %i bytes" 

280 % (read, size), result) 

281 

282 return result 

283 

284def urlcleanup(): 

285 """Clean up temporary files from urlretrieve calls.""" 

286 for temp_file in _url_tempfiles: 

287 try: 

288 os.unlink(temp_file) 

289 except OSError: 

290 pass 

291 

292 del _url_tempfiles[:] 

293 global _opener 

294 if _opener: 

295 _opener = None 

296 

297# copied from cookielib.py 

298_cut_port_re = re.compile(r":\d+$", re.ASCII) 

299def request_host(request): 

300 """Return request-host, as defined by RFC 2965. 

301 

302 Variation from RFC: returned value is lowercased, for convenient 

303 comparison. 

304 

305 """ 

306 url = request.full_url 

307 host = urlparse(url)[1] 

308 if host == "": 

309 host = request.get_header("Host", "") 

310 

311 # remove port, if present 

312 host = _cut_port_re.sub("", host, 1) 

313 return host.lower() 

314 

315class Request: 

316 

317 def __init__(self, url, data=None, headers={}, 

318 origin_req_host=None, unverifiable=False, 

319 method=None): 

320 self.full_url = url 

321 self.headers = {} 

322 self.unredirected_hdrs = {} 

323 self._data = None 

324 self.data = data 

325 self._tunnel_host = None 

326 for key, value in headers.items(): 

327 self.add_header(key, value) 

328 if origin_req_host is None: 

329 origin_req_host = request_host(self) 

330 self.origin_req_host = origin_req_host 

331 self.unverifiable = unverifiable 

332 if method: 

333 self.method = method 

334 

335 @property 

336 def full_url(self): 

337 if self.fragment: 

338 return '{}#{}'.format(self._full_url, self.fragment) 

339 return self._full_url 

340 

341 @full_url.setter 

342 def full_url(self, url): 

343 # unwrap('<URL:type://host/path>') --> 'type://host/path' 

344 self._full_url = unwrap(url) 

345 self._full_url, self.fragment = _splittag(self._full_url) 

346 self._parse() 

347 

348 @full_url.deleter 

349 def full_url(self): 

350 self._full_url = None 

351 self.fragment = None 

352 self.selector = '' 

353 

354 @property 

355 def data(self): 

356 return self._data 

357 

358 @data.setter 

359 def data(self, data): 

360 if data != self._data: 

361 self._data = data 

362 # issue 16464 

363 # if we change data we need to remove content-length header 

364 # (cause it's most probably calculated for previous value) 

365 if self.has_header("Content-length"): 

366 self.remove_header("Content-length") 

367 

368 @data.deleter 

369 def data(self): 

370 self.data = None 

371 

372 def _parse(self): 

373 self.type, rest = _splittype(self._full_url) 

374 if self.type is None: 

375 raise ValueError("unknown url type: %r" % self.full_url) 

376 self.host, self.selector = _splithost(rest) 

377 if self.host: 

378 self.host = unquote(self.host) 

379 

380 def get_method(self): 

381 """Return a string indicating the HTTP request method.""" 

382 default_method = "POST" if self.data is not None else "GET" 

383 return getattr(self, 'method', default_method) 

384 

385 def get_full_url(self): 

386 return self.full_url 

387 

388 def set_proxy(self, host, type): 

389 if self.type == 'https' and not self._tunnel_host: 

390 self._tunnel_host = self.host 

391 else: 

392 self.type= type 

393 self.selector = self.full_url 

394 self.host = host 

395 

396 def has_proxy(self): 

397 return self.selector == self.full_url 

398 

399 def add_header(self, key, val): 

400 # useful for something like authentication 

401 self.headers[key.capitalize()] = val 

402 

403 def add_unredirected_header(self, key, val): 

404 # will not be added to a redirected request 

405 self.unredirected_hdrs[key.capitalize()] = val 

406 

407 def has_header(self, header_name): 

408 return (header_name in self.headers or 

409 header_name in self.unredirected_hdrs) 

410 

411 def get_header(self, header_name, default=None): 

412 return self.headers.get( 

413 header_name, 

414 self.unredirected_hdrs.get(header_name, default)) 

415 

416 def remove_header(self, header_name): 

417 self.headers.pop(header_name, None) 

418 self.unredirected_hdrs.pop(header_name, None) 

419 

420 def header_items(self): 

421 hdrs = {**self.unredirected_hdrs, **self.headers} 

422 return list(hdrs.items()) 

423 

424class OpenerDirector: 

425 def __init__(self): 

426 client_version = "Python-urllib/%s" % __version__ 

427 self.addheaders = [('User-agent', client_version)] 

428 # self.handlers is retained only for backward compatibility 

429 self.handlers = [] 

430 # manage the individual handlers 

431 self.handle_open = {} 

432 self.handle_error = {} 

433 self.process_response = {} 

434 self.process_request = {} 

435 

436 def add_handler(self, handler): 

437 if not hasattr(handler, "add_parent"): 

438 raise TypeError("expected BaseHandler instance, got %r" % 

439 type(handler)) 

440 

441 added = False 

442 for meth in dir(handler): 

443 if meth in ["redirect_request", "do_open", "proxy_open"]: 

444 # oops, coincidental match 

445 continue 

446 

447 i = meth.find("_") 

448 protocol = meth[:i] 

449 condition = meth[i+1:] 

450 

451 if condition.startswith("error"): 

452 j = condition.find("_") + i + 1 

453 kind = meth[j+1:] 

454 try: 

455 kind = int(kind) 

456 except ValueError: 

457 pass 

458 lookup = self.handle_error.get(protocol, {}) 

459 self.handle_error[protocol] = lookup 

460 elif condition == "open": 

461 kind = protocol 

462 lookup = self.handle_open 

463 elif condition == "response": 

464 kind = protocol 

465 lookup = self.process_response 

466 elif condition == "request": 

467 kind = protocol 

468 lookup = self.process_request 

469 else: 

470 continue 

471 

472 handlers = lookup.setdefault(kind, []) 

473 if handlers: 

474 bisect.insort(handlers, handler) 

475 else: 

476 handlers.append(handler) 

477 added = True 

478 

479 if added: 

480 bisect.insort(self.handlers, handler) 

481 handler.add_parent(self) 

482 

483 def close(self): 

484 # Only exists for backwards compatibility. 

485 pass 

486 

487 def _call_chain(self, chain, kind, meth_name, *args): 

488 # Handlers raise an exception if no one else should try to handle 

489 # the request, or return None if they can't but another handler 

490 # could. Otherwise, they return the response. 

491 handlers = chain.get(kind, ()) 

492 for handler in handlers: 

493 func = getattr(handler, meth_name) 

494 result = func(*args) 

495 if result is not None: 

496 return result 

497 

498 def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): 

499 # accept a URL or a Request object 

500 if isinstance(fullurl, str): 

501 req = Request(fullurl, data) 

502 else: 

503 req = fullurl 

504 if data is not None: 

505 req.data = data 

506 

507 req.timeout = timeout 

508 protocol = req.type 

509 

510 # pre-process request 

511 meth_name = protocol+"_request" 

512 for processor in self.process_request.get(protocol, []): 

513 meth = getattr(processor, meth_name) 

514 req = meth(req) 

515 

516 sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method()) 

517 response = self._open(req, data) 

518 

519 # post-process response 

520 meth_name = protocol+"_response" 

521 for processor in self.process_response.get(protocol, []): 

522 meth = getattr(processor, meth_name) 

523 response = meth(req, response) 

524 

525 return response 

526 

527 def _open(self, req, data=None): 

528 result = self._call_chain(self.handle_open, 'default', 

529 'default_open', req) 

530 if result: 

531 return result 

532 

533 protocol = req.type 

534 result = self._call_chain(self.handle_open, protocol, protocol + 

535 '_open', req) 

536 if result: 

537 return result 

538 

539 return self._call_chain(self.handle_open, 'unknown', 

540 'unknown_open', req) 

541 

542 def error(self, proto, *args): 

543 if proto in ('http', 'https'): 

544 # XXX http[s] protocols are special-cased 

545 dict = self.handle_error['http'] # https is not different than http 

546 proto = args[2] # YUCK! 

547 meth_name = 'http_error_%s' % proto 

548 http_err = 1 

549 orig_args = args 

550 else: 

551 dict = self.handle_error 

552 meth_name = proto + '_error' 

553 http_err = 0 

554 args = (dict, proto, meth_name) + args 

555 result = self._call_chain(*args) 

556 if result: 

557 return result 

558 

559 if http_err: 

560 args = (dict, 'default', 'http_error_default') + orig_args 

561 return self._call_chain(*args) 

562 

563# XXX probably also want an abstract factory that knows when it makes 

564# sense to skip a superclass in favor of a subclass and when it might 

565# make sense to include both 

566 

567def build_opener(*handlers): 

568 """Create an opener object from a list of handlers. 

569 

570 The opener will use several default handlers, including support 

571 for HTTP, FTP and when applicable HTTPS. 

572 

573 If any of the handlers passed as arguments are subclasses of the 

574 default handlers, the default handlers will not be used. 

575 """ 

576 opener = OpenerDirector() 

577 default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, 

578 HTTPDefaultErrorHandler, HTTPRedirectHandler, 

579 FTPHandler, FileHandler, HTTPErrorProcessor, 

580 DataHandler] 

581 if hasattr(http.client, "HTTPSConnection"): 

582 default_classes.append(HTTPSHandler) 

583 skip = set() 

584 for klass in default_classes: 

585 for check in handlers: 

586 if isinstance(check, type): 

587 if issubclass(check, klass): 

588 skip.add(klass) 

589 elif isinstance(check, klass): 

590 skip.add(klass) 

591 for klass in skip: 

592 default_classes.remove(klass) 

593 

594 for klass in default_classes: 

595 opener.add_handler(klass()) 

596 

597 for h in handlers: 

598 if isinstance(h, type): 

599 h = h() 

600 opener.add_handler(h) 

601 return opener 

602 

603class BaseHandler: 

604 handler_order = 500 

605 

606 def add_parent(self, parent): 

607 self.parent = parent 

608 

609 def close(self): 

610 # Only exists for backwards compatibility 

611 pass 

612 

613 def __lt__(self, other): 

614 if not hasattr(other, "handler_order"): 

615 # Try to preserve the old behavior of having custom classes 

616 # inserted after default ones (works only for custom user 

617 # classes which are not aware of handler_order). 

618 return True 

619 return self.handler_order < other.handler_order 

620 

621 

622class HTTPErrorProcessor(BaseHandler): 

623 """Process HTTP error responses.""" 

624 handler_order = 1000 # after all other processing 

625 

626 def http_response(self, request, response): 

627 code, msg, hdrs = response.code, response.msg, response.info() 

628 

629 # According to RFC 2616, "2xx" code indicates that the client's 

630 # request was successfully received, understood, and accepted. 

631 if not (200 <= code < 300): 

632 response = self.parent.error( 

633 'http', request, response, code, msg, hdrs) 

634 

635 return response 

636 

637 https_response = http_response 

638 

639class HTTPDefaultErrorHandler(BaseHandler): 

640 def http_error_default(self, req, fp, code, msg, hdrs): 

641 raise HTTPError(req.full_url, code, msg, hdrs, fp) 

642 

643class HTTPRedirectHandler(BaseHandler): 

644 # maximum number of redirections to any single URL 

645 # this is needed because of the state that cookies introduce 

646 max_repeats = 4 

647 # maximum total number of redirections (regardless of URL) before 

648 # assuming we're in a loop 

649 max_redirections = 10 

650 

651 def redirect_request(self, req, fp, code, msg, headers, newurl): 

652 """Return a Request or None in response to a redirect. 

653 

654 This is called by the http_error_30x methods when a 

655 redirection response is received. If a redirection should 

656 take place, return a new Request to allow http_error_30x to 

657 perform the redirect. Otherwise, raise HTTPError if no-one 

658 else should try to handle this url. Return None if you can't 

659 but another Handler might. 

660 """ 

661 m = req.get_method() 

662 if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD") 

663 or code in (301, 302, 303) and m == "POST")): 

664 raise HTTPError(req.full_url, code, msg, headers, fp) 

665 

666 # Strictly (according to RFC 2616), 301 or 302 in response to 

667 # a POST MUST NOT cause a redirection without confirmation 

668 # from the user (of urllib.request, in this case). In practice, 

669 # essentially all clients do redirect in this case, so we do 

670 # the same. 

671 

672 # Be conciliant with URIs containing a space. This is mainly 

673 # redundant with the more complete encoding done in http_error_302(), 

674 # but it is kept for compatibility with other callers. 

675 newurl = newurl.replace(' ', '%20') 

676 

677 CONTENT_HEADERS = ("content-length", "content-type") 

678 newheaders = {k: v for k, v in req.headers.items() 

679 if k.lower() not in CONTENT_HEADERS} 

680 return Request(newurl, 

681 headers=newheaders, 

682 origin_req_host=req.origin_req_host, 

683 unverifiable=True) 

684 

685 # Implementation note: To avoid the server sending us into an 

686 # infinite loop, the request object needs to track what URLs we 

687 # have already seen. Do this by adding a handler-specific 

688 # attribute to the Request object. 

689 def http_error_302(self, req, fp, code, msg, headers): 

690 # Some servers (incorrectly) return multiple Location headers 

691 # (so probably same goes for URI). Use first header. 

692 if "location" in headers: 

693 newurl = headers["location"] 

694 elif "uri" in headers: 

695 newurl = headers["uri"] 

696 else: 

697 return 

698 

699 # fix a possible malformed URL 

700 urlparts = urlparse(newurl) 

701 

702 # For security reasons we don't allow redirection to anything other 

703 # than http, https or ftp. 

704 

705 if urlparts.scheme not in ('http', 'https', 'ftp', ''): 

706 raise HTTPError( 

707 newurl, code, 

708 "%s - Redirection to url '%s' is not allowed" % (msg, newurl), 

709 headers, fp) 

710 

711 if not urlparts.path and urlparts.netloc: 

712 urlparts = list(urlparts) 

713 urlparts[2] = "/" 

714 newurl = urlunparse(urlparts) 

715 

716 # http.client.parse_headers() decodes as ISO-8859-1. Recover the 

717 # original bytes and percent-encode non-ASCII bytes, and any special 

718 # characters such as the space. 

719 newurl = quote( 

720 newurl, encoding="iso-8859-1", safe=string.punctuation) 

721 newurl = urljoin(req.full_url, newurl) 

722 

723 # XXX Probably want to forget about the state of the current 

724 # request, although that might interact poorly with other 

725 # handlers that also use handler-specific request attributes 

726 new = self.redirect_request(req, fp, code, msg, headers, newurl) 

727 if new is None: 

728 return 

729 

730 # loop detection 

731 # .redirect_dict has a key url if url was previously visited. 

732 if hasattr(req, 'redirect_dict'): 

733 visited = new.redirect_dict = req.redirect_dict 

734 if (visited.get(newurl, 0) >= self.max_repeats or 

735 len(visited) >= self.max_redirections): 

736 raise HTTPError(req.full_url, code, 

737 self.inf_msg + msg, headers, fp) 

738 else: 

739 visited = new.redirect_dict = req.redirect_dict = {} 

740 visited[newurl] = visited.get(newurl, 0) + 1 

741 

742 # Don't close the fp until we are sure that we won't use it 

743 # with HTTPError. 

744 fp.read() 

745 fp.close() 

746 

747 return self.parent.open(new, timeout=req.timeout) 

748 

749 http_error_301 = http_error_303 = http_error_307 = http_error_302 

750 

751 inf_msg = "The HTTP server returned a redirect error that would " \ 

752 "lead to an infinite loop.\n" \ 

753 "The last 30x error message was:\n" 

754 

755 

756def _parse_proxy(proxy): 

757 """Return (scheme, user, password, host/port) given a URL or an authority. 

758 

759 If a URL is supplied, it must have an authority (host:port) component. 

760 According to RFC 3986, having an authority component means the URL must 

761 have two slashes after the scheme. 

762 """ 

763 scheme, r_scheme = _splittype(proxy) 

764 if not r_scheme.startswith("/"): 

765 # authority 

766 scheme = None 

767 authority = proxy 

768 else: 

769 # URL 

770 if not r_scheme.startswith("//"): 

771 raise ValueError("proxy URL with no authority: %r" % proxy) 

772 # We have an authority, so for RFC 3986-compliant URLs (by ss 3. 

773 # and 3.3.), path is empty or starts with '/' 

774 if '@' in r_scheme: 

775 host_separator = r_scheme.find('@') 

776 end = r_scheme.find("/", host_separator) 

777 else: 

778 end = r_scheme.find("/", 2) 

779 if end == -1: 

780 end = None 

781 authority = r_scheme[2:end] 

782 userinfo, hostport = _splituser(authority) 

783 if userinfo is not None: 

784 user, password = _splitpasswd(userinfo) 

785 else: 

786 user = password = None 

787 return scheme, user, password, hostport 

788 

789class ProxyHandler(BaseHandler): 

790 # Proxies must be in front 

791 handler_order = 100 

792 

793 def __init__(self, proxies=None): 

794 if proxies is None: 

795 proxies = getproxies() 

796 assert hasattr(proxies, 'keys'), "proxies must be a mapping" 

797 self.proxies = proxies 

798 for type, url in proxies.items(): 

799 type = type.lower() 

800 setattr(self, '%s_open' % type, 

801 lambda r, proxy=url, type=type, meth=self.proxy_open: 

802 meth(r, proxy, type)) 

803 

804 def proxy_open(self, req, proxy, type): 

805 orig_type = req.type 

806 proxy_type, user, password, hostport = _parse_proxy(proxy) 

807 if proxy_type is None: 

808 proxy_type = orig_type 

809 

810 if req.host and proxy_bypass(req.host): 

811 return None 

812 

813 if user and password: 

814 user_pass = '%s:%s' % (unquote(user), 

815 unquote(password)) 

816 creds = base64.b64encode(user_pass.encode()).decode("ascii") 

817 req.add_header('Proxy-authorization', 'Basic ' + creds) 

818 hostport = unquote(hostport) 

819 req.set_proxy(hostport, proxy_type) 

820 if orig_type == proxy_type or orig_type == 'https': 

821 # let other handlers take care of it 

822 return None 

823 else: 

824 # need to start over, because the other handlers don't 

825 # grok the proxy's URL type 

826 # e.g. if we have a constructor arg proxies like so: 

827 # {'http': 'ftp://proxy.example.com'}, we may end up turning 

828 # a request for http://acme.example.com/a into one for 

829 # ftp://proxy.example.com/a 

830 return self.parent.open(req, timeout=req.timeout) 

831 

832class HTTPPasswordMgr: 

833 

834 def __init__(self): 

835 self.passwd = {} 

836 

837 def add_password(self, realm, uri, user, passwd): 

838 # uri could be a single URI or a sequence 

839 if isinstance(uri, str): 

840 uri = [uri] 

841 if realm not in self.passwd: 

842 self.passwd[realm] = {} 

843 for default_port in True, False: 

844 reduced_uri = tuple( 

845 self.reduce_uri(u, default_port) for u in uri) 

846 self.passwd[realm][reduced_uri] = (user, passwd) 

847 

848 def find_user_password(self, realm, authuri): 

849 domains = self.passwd.get(realm, {}) 

850 for default_port in True, False: 

851 reduced_authuri = self.reduce_uri(authuri, default_port) 

852 for uris, authinfo in domains.items(): 

853 for uri in uris: 

854 if self.is_suburi(uri, reduced_authuri): 

855 return authinfo 

856 return None, None 

857 

858 def reduce_uri(self, uri, default_port=True): 

859 """Accept authority or URI and extract only the authority and path.""" 

860 # note HTTP URLs do not have a userinfo component 

861 parts = urlsplit(uri) 

862 if parts[1]: 

863 # URI 

864 scheme = parts[0] 

865 authority = parts[1] 

866 path = parts[2] or '/' 

867 else: 

868 # host or host:port 

869 scheme = None 

870 authority = uri 

871 path = '/' 

872 host, port = _splitport(authority) 

873 if default_port and port is None and scheme is not None: 

874 dport = {"http": 80, 

875 "https": 443, 

876 }.get(scheme) 

877 if dport is not None: 

878 authority = "%s:%d" % (host, dport) 

879 return authority, path 

880 

881 def is_suburi(self, base, test): 

882 """Check if test is below base in a URI tree 

883 

884 Both args must be URIs in reduced form. 

885 """ 

886 if base == test: 

887 return True 

888 if base[0] != test[0]: 

889 return False 

890 common = posixpath.commonprefix((base[1], test[1])) 

891 if len(common) == len(base[1]): 

892 return True 

893 return False 

894 

895 

896class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): 

897 

898 def find_user_password(self, realm, authuri): 

899 user, password = HTTPPasswordMgr.find_user_password(self, realm, 

900 authuri) 

901 if user is not None: 

902 return user, password 

903 return HTTPPasswordMgr.find_user_password(self, None, authuri) 

904 

905 

906class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm): 

907 

908 def __init__(self, *args, **kwargs): 

909 self.authenticated = {} 

910 super().__init__(*args, **kwargs) 

911 

912 def add_password(self, realm, uri, user, passwd, is_authenticated=False): 

913 self.update_authenticated(uri, is_authenticated) 

914 # Add a default for prior auth requests 

915 if realm is not None: 

916 super().add_password(None, uri, user, passwd) 

917 super().add_password(realm, uri, user, passwd) 

918 

919 def update_authenticated(self, uri, is_authenticated=False): 

920 # uri could be a single URI or a sequence 

921 if isinstance(uri, str): 

922 uri = [uri] 

923 

924 for default_port in True, False: 

925 for u in uri: 

926 reduced_uri = self.reduce_uri(u, default_port) 

927 self.authenticated[reduced_uri] = is_authenticated 

928 

929 def is_authenticated(self, authuri): 

930 for default_port in True, False: 

931 reduced_authuri = self.reduce_uri(authuri, default_port) 

932 for uri in self.authenticated: 

933 if self.is_suburi(uri, reduced_authuri): 

934 return self.authenticated[uri] 

935 

936 

937class AbstractBasicAuthHandler: 

938 

939 # XXX this allows for multiple auth-schemes, but will stupidly pick 

940 # the last one with a realm specified. 

941 

942 # allow for double- and single-quoted realm values 

943 # (single quotes are a violation of the RFC, but appear in the wild) 

944 rx = re.compile('(?:^|,)' # start of the string or ',' 

945 '[ \t]*' # optional whitespaces 

946 '([^ \t,]+)' # scheme like "Basic" 

947 '[ \t]+' # mandatory whitespaces 

948 # realm=xxx 

949 # realm='xxx' 

950 # realm="xxx" 

951 'realm=(["\']?)([^"\']*)\\2', 

952 re.I) 

953 

954 # XXX could pre-emptively send auth info already accepted (RFC 2617, 

955 # end of section 2, and section 1.2 immediately after "credentials" 

956 # production). 

957 

958 def __init__(self, password_mgr=None): 

959 if password_mgr is None: 

960 password_mgr = HTTPPasswordMgr() 

961 self.passwd = password_mgr 

962 self.add_password = self.passwd.add_password 

963 

964 def _parse_realm(self, header): 

965 # parse WWW-Authenticate header: accept multiple challenges per header 

966 found_challenge = False 

967 for mo in AbstractBasicAuthHandler.rx.finditer(header): 

968 scheme, quote, realm = mo.groups() 

969 if quote not in ['"', "'"]: 

970 warnings.warn("Basic Auth Realm was unquoted", 

971 UserWarning, 3) 

972 

973 yield (scheme, realm) 

974 

975 found_challenge = True 

976 

977 if not found_challenge: 

978 if header: 

979 scheme = header.split()[0] 

980 else: 

981 scheme = '' 

982 yield (scheme, None) 

983 

984 def http_error_auth_reqed(self, authreq, host, req, headers): 

985 # host may be an authority (without userinfo) or a URL with an 

986 # authority 

987 headers = headers.get_all(authreq) 

988 if not headers: 

989 # no header found 

990 return 

991 

992 unsupported = None 

993 for header in headers: 

994 for scheme, realm in self._parse_realm(header): 

995 if scheme.lower() != 'basic': 

996 unsupported = scheme 

997 continue 

998 

999 if realm is not None: 

1000 # Use the first matching Basic challenge. 

1001 # Ignore following challenges even if they use the Basic 

1002 # scheme. 

1003 return self.retry_http_basic_auth(host, req, realm) 

1004 

1005 if unsupported is not None: 

1006 raise ValueError("AbstractBasicAuthHandler does not " 

1007 "support the following scheme: %r" 

1008 % (scheme,)) 

1009 

1010 def retry_http_basic_auth(self, host, req, realm): 

1011 user, pw = self.passwd.find_user_password(realm, host) 

1012 if pw is not None: 

1013 raw = "%s:%s" % (user, pw) 

1014 auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii") 

1015 if req.get_header(self.auth_header, None) == auth: 

1016 return None 

1017 req.add_unredirected_header(self.auth_header, auth) 

1018 return self.parent.open(req, timeout=req.timeout) 

1019 else: 

1020 return None 

1021 

1022 def http_request(self, req): 

1023 if (not hasattr(self.passwd, 'is_authenticated') or 

1024 not self.passwd.is_authenticated(req.full_url)): 

1025 return req 

1026 

1027 if not req.has_header('Authorization'): 

1028 user, passwd = self.passwd.find_user_password(None, req.full_url) 

1029 credentials = '{0}:{1}'.format(user, passwd).encode() 

1030 auth_str = base64.standard_b64encode(credentials).decode() 

1031 req.add_unredirected_header('Authorization', 

1032 'Basic {}'.format(auth_str.strip())) 

1033 return req 

1034 

1035 def http_response(self, req, response): 

1036 if hasattr(self.passwd, 'is_authenticated'): 

1037 if 200 <= response.code < 300: 

1038 self.passwd.update_authenticated(req.full_url, True) 

1039 else: 

1040 self.passwd.update_authenticated(req.full_url, False) 

1041 return response 

1042 

1043 https_request = http_request 

1044 https_response = http_response 

1045 

1046 

1047 

1048class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): 

1049 

1050 auth_header = 'Authorization' 

1051 

1052 def http_error_401(self, req, fp, code, msg, headers): 

1053 url = req.full_url 

1054 response = self.http_error_auth_reqed('www-authenticate', 

1055 url, req, headers) 

1056 return response 

1057 

1058 

1059class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): 

1060 

1061 auth_header = 'Proxy-authorization' 

1062 

1063 def http_error_407(self, req, fp, code, msg, headers): 

1064 # http_error_auth_reqed requires that there is no userinfo component in 

1065 # authority. Assume there isn't one, since urllib.request does not (and 

1066 # should not, RFC 3986 s. 3.2.1) support requests for URLs containing 

1067 # userinfo. 

1068 authority = req.host 

1069 response = self.http_error_auth_reqed('proxy-authenticate', 

1070 authority, req, headers) 

1071 return response 

1072 

1073 

1074# Return n random bytes. 

1075_randombytes = os.urandom 

1076 

1077 

1078class AbstractDigestAuthHandler: 

1079 # Digest authentication is specified in RFC 2617. 

1080 

1081 # XXX The client does not inspect the Authentication-Info header 

1082 # in a successful response. 

1083 

1084 # XXX It should be possible to test this implementation against 

1085 # a mock server that just generates a static set of challenges. 

1086 

1087 # XXX qop="auth-int" supports is shaky 

1088 

1089 def __init__(self, passwd=None): 

1090 if passwd is None: 

1091 passwd = HTTPPasswordMgr() 

1092 self.passwd = passwd 

1093 self.add_password = self.passwd.add_password 

1094 self.retried = 0 

1095 self.nonce_count = 0 

1096 self.last_nonce = None 

1097 

1098 def reset_retry_count(self): 

1099 self.retried = 0 

1100 

1101 def http_error_auth_reqed(self, auth_header, host, req, headers): 

1102 authreq = headers.get(auth_header, None) 

1103 if self.retried > 5: 

1104 # Don't fail endlessly - if we failed once, we'll probably 

1105 # fail a second time. Hm. Unless the Password Manager is 

1106 # prompting for the information. Crap. This isn't great 

1107 # but it's better than the current 'repeat until recursion 

1108 # depth exceeded' approach <wink> 

1109 raise HTTPError(req.full_url, 401, "digest auth failed", 

1110 headers, None) 

1111 else: 

1112 self.retried += 1 

1113 if authreq: 

1114 scheme = authreq.split()[0] 

1115 if scheme.lower() == 'digest': 

1116 return self.retry_http_digest_auth(req, authreq) 

1117 elif scheme.lower() != 'basic': 

1118 raise ValueError("AbstractDigestAuthHandler does not support" 

1119 " the following scheme: '%s'" % scheme) 

1120 

1121 def retry_http_digest_auth(self, req, auth): 

1122 token, challenge = auth.split(' ', 1) 

1123 chal = parse_keqv_list(filter(None, parse_http_list(challenge))) 

1124 auth = self.get_authorization(req, chal) 

1125 if auth: 

1126 auth_val = 'Digest %s' % auth 

1127 if req.headers.get(self.auth_header, None) == auth_val: 

1128 return None 

1129 req.add_unredirected_header(self.auth_header, auth_val) 

1130 resp = self.parent.open(req, timeout=req.timeout) 

1131 return resp 

1132 

1133 def get_cnonce(self, nonce): 

1134 # The cnonce-value is an opaque 

1135 # quoted string value provided by the client and used by both client 

1136 # and server to avoid chosen plaintext attacks, to provide mutual 

1137 # authentication, and to provide some message integrity protection. 

1138 # This isn't a fabulous effort, but it's probably Good Enough. 

1139 s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime()) 

1140 b = s.encode("ascii") + _randombytes(8) 

1141 dig = hashlib.sha1(b).hexdigest() 

1142 return dig[:16] 

1143 

1144 def get_authorization(self, req, chal): 

1145 try: 

1146 realm = chal['realm'] 

1147 nonce = chal['nonce'] 

1148 qop = chal.get('qop') 

1149 algorithm = chal.get('algorithm', 'MD5') 

1150 # mod_digest doesn't send an opaque, even though it isn't 

1151 # supposed to be optional 

1152 opaque = chal.get('opaque', None) 

1153 except KeyError: 

1154 return None 

1155 

1156 H, KD = self.get_algorithm_impls(algorithm) 

1157 if H is None: 

1158 return None 

1159 

1160 user, pw = self.passwd.find_user_password(realm, req.full_url) 

1161 if user is None: 

1162 return None 

1163 

1164 # XXX not implemented yet 

1165 if req.data is not None: 

1166 entdig = self.get_entity_digest(req.data, chal) 

1167 else: 

1168 entdig = None 

1169 

1170 A1 = "%s:%s:%s" % (user, realm, pw) 

1171 A2 = "%s:%s" % (req.get_method(), 

1172 # XXX selector: what about proxies and full urls 

1173 req.selector) 

1174 # NOTE: As per RFC 2617, when server sends "auth,auth-int", the client could use either `auth` 

1175 # or `auth-int` to the response back. we use `auth` to send the response back. 

1176 if qop is None: 

1177 respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) 

1178 elif 'auth' in qop.split(','): 

1179 if nonce == self.last_nonce: 

1180 self.nonce_count += 1 

1181 else: 

1182 self.nonce_count = 1 

1183 self.last_nonce = nonce 

1184 ncvalue = '%08x' % self.nonce_count 

1185 cnonce = self.get_cnonce(nonce) 

1186 noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, 'auth', H(A2)) 

1187 respdig = KD(H(A1), noncebit) 

1188 else: 

1189 # XXX handle auth-int. 

1190 raise URLError("qop '%s' is not supported." % qop) 

1191 

1192 # XXX should the partial digests be encoded too? 

1193 

1194 base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ 

1195 'response="%s"' % (user, realm, nonce, req.selector, 

1196 respdig) 

1197 if opaque: 

1198 base += ', opaque="%s"' % opaque 

1199 if entdig: 

1200 base += ', digest="%s"' % entdig 

1201 base += ', algorithm="%s"' % algorithm 

1202 if qop: 

1203 base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) 

1204 return base 

1205 

1206 def get_algorithm_impls(self, algorithm): 

1207 # lambdas assume digest modules are imported at the top level 

1208 if algorithm == 'MD5': 

1209 H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest() 

1210 elif algorithm == 'SHA': 

1211 H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest() 

1212 # XXX MD5-sess 

1213 else: 

1214 raise ValueError("Unsupported digest authentication " 

1215 "algorithm %r" % algorithm) 

1216 KD = lambda s, d: H("%s:%s" % (s, d)) 

1217 return H, KD 

1218 

1219 def get_entity_digest(self, data, chal): 

1220 # XXX not implemented yet 

1221 return None 

1222 

1223 

1224class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): 

1225 """An authentication protocol defined by RFC 2069 

1226 

1227 Digest authentication improves on basic authentication because it 

1228 does not transmit passwords in the clear. 

1229 """ 

1230 

1231 auth_header = 'Authorization' 

1232 handler_order = 490 # before Basic auth 

1233 

1234 def http_error_401(self, req, fp, code, msg, headers): 

1235 host = urlparse(req.full_url)[1] 

1236 retry = self.http_error_auth_reqed('www-authenticate', 

1237 host, req, headers) 

1238 self.reset_retry_count() 

1239 return retry 

1240 

1241 

1242class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): 

1243 

1244 auth_header = 'Proxy-Authorization' 

1245 handler_order = 490 # before Basic auth 

1246 

1247 def http_error_407(self, req, fp, code, msg, headers): 

1248 host = req.host 

1249 retry = self.http_error_auth_reqed('proxy-authenticate', 

1250 host, req, headers) 

1251 self.reset_retry_count() 

1252 return retry 

1253 

1254class AbstractHTTPHandler(BaseHandler): 

1255 

1256 def __init__(self, debuglevel=0): 

1257 self._debuglevel = debuglevel 

1258 

1259 def set_http_debuglevel(self, level): 

1260 self._debuglevel = level 

1261 

1262 def _get_content_length(self, request): 

1263 return http.client.HTTPConnection._get_content_length( 

1264 request.data, 

1265 request.get_method()) 

1266 

1267 def do_request_(self, request): 

1268 host = request.host 

1269 if not host: 

1270 raise URLError('no host given') 

1271 

1272 if request.data is not None: # POST 

1273 data = request.data 

1274 if isinstance(data, str): 

1275 msg = "POST data should be bytes, an iterable of bytes, " \ 

1276 "or a file object. It cannot be of type str." 

1277 raise TypeError(msg) 

1278 if not request.has_header('Content-type'): 

1279 request.add_unredirected_header( 

1280 'Content-type', 

1281 'application/x-www-form-urlencoded') 

1282 if (not request.has_header('Content-length') 

1283 and not request.has_header('Transfer-encoding')): 

1284 content_length = self._get_content_length(request) 

1285 if content_length is not None: 

1286 request.add_unredirected_header( 

1287 'Content-length', str(content_length)) 

1288 else: 

1289 request.add_unredirected_header( 

1290 'Transfer-encoding', 'chunked') 

1291 

1292 sel_host = host 

1293 if request.has_proxy(): 

1294 scheme, sel = _splittype(request.selector) 

1295 sel_host, sel_path = _splithost(sel) 

1296 if not request.has_header('Host'): 

1297 request.add_unredirected_header('Host', sel_host) 

1298 for name, value in self.parent.addheaders: 

1299 name = name.capitalize() 

1300 if not request.has_header(name): 

1301 request.add_unredirected_header(name, value) 

1302 

1303 return request 

1304 

1305 def do_open(self, http_class, req, **http_conn_args): 

1306 """Return an HTTPResponse object for the request, using http_class. 

1307 

1308 http_class must implement the HTTPConnection API from http.client. 

1309 """ 

1310 host = req.host 

1311 if not host: 

1312 raise URLError('no host given') 

1313 

1314 # will parse host:port 

1315 h = http_class(host, timeout=req.timeout, **http_conn_args) 

1316 h.set_debuglevel(self._debuglevel) 

1317 

1318 headers = dict(req.unredirected_hdrs) 

1319 headers.update({k: v for k, v in req.headers.items() 

1320 if k not in headers}) 

1321 

1322 # TODO(jhylton): Should this be redesigned to handle 

1323 # persistent connections? 

1324 

1325 # We want to make an HTTP/1.1 request, but the addinfourl 

1326 # class isn't prepared to deal with a persistent connection. 

1327 # It will try to read all remaining data from the socket, 

1328 # which will block while the server waits for the next request. 

1329 # So make sure the connection gets closed after the (only) 

1330 # request. 

1331 headers["Connection"] = "close" 

1332 headers = {name.title(): val for name, val in headers.items()} 

1333 

1334 if req._tunnel_host: 

1335 tunnel_headers = {} 

1336 proxy_auth_hdr = "Proxy-Authorization" 

1337 if proxy_auth_hdr in headers: 

1338 tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] 

1339 # Proxy-Authorization should not be sent to origin 

1340 # server. 

1341 del headers[proxy_auth_hdr] 

1342 h.set_tunnel(req._tunnel_host, headers=tunnel_headers) 

1343 

1344 try: 

1345 try: 

1346 h.request(req.get_method(), req.selector, req.data, headers, 

1347 encode_chunked=req.has_header('Transfer-encoding')) 

1348 except OSError as err: # timeout error 

1349 raise URLError(err) 

1350 r = h.getresponse() 

1351 except: 

1352 h.close() 

1353 raise 

1354 

1355 # If the server does not send us a 'Connection: close' header, 

1356 # HTTPConnection assumes the socket should be left open. Manually 

1357 # mark the socket to be closed when this response object goes away. 

1358 if h.sock: 

1359 h.sock.close() 

1360 h.sock = None 

1361 

1362 r.url = req.get_full_url() 

1363 # This line replaces the .msg attribute of the HTTPResponse 

1364 # with .headers, because urllib clients expect the response to 

1365 # have the reason in .msg. It would be good to mark this 

1366 # attribute is deprecated and get then to use info() or 

1367 # .headers. 

1368 r.msg = r.reason 

1369 return r 

1370 

1371 

1372class HTTPHandler(AbstractHTTPHandler): 

1373 

1374 def http_open(self, req): 

1375 return self.do_open(http.client.HTTPConnection, req) 

1376 

1377 http_request = AbstractHTTPHandler.do_request_ 

1378 

1379if hasattr(http.client, 'HTTPSConnection'): 

1380 

1381 class HTTPSHandler(AbstractHTTPHandler): 

1382 

1383 def __init__(self, debuglevel=0, context=None, check_hostname=None): 

1384 AbstractHTTPHandler.__init__(self, debuglevel) 

1385 self._context = context 

1386 self._check_hostname = check_hostname 

1387 

1388 def https_open(self, req): 

1389 return self.do_open(http.client.HTTPSConnection, req, 

1390 context=self._context, check_hostname=self._check_hostname) 

1391 

1392 https_request = AbstractHTTPHandler.do_request_ 

1393 

1394 __all__.append('HTTPSHandler') 

1395 

1396class HTTPCookieProcessor(BaseHandler): 

1397 def __init__(self, cookiejar=None): 

1398 import http.cookiejar 

1399 if cookiejar is None: 

1400 cookiejar = http.cookiejar.CookieJar() 

1401 self.cookiejar = cookiejar 

1402 

1403 def http_request(self, request): 

1404 self.cookiejar.add_cookie_header(request) 

1405 return request 

1406 

1407 def http_response(self, request, response): 

1408 self.cookiejar.extract_cookies(response, request) 

1409 return response 

1410 

1411 https_request = http_request 

1412 https_response = http_response 

1413 

1414class UnknownHandler(BaseHandler): 

1415 def unknown_open(self, req): 

1416 type = req.type 

1417 raise URLError('unknown url type: %s' % type) 

1418 

1419def parse_keqv_list(l): 

1420 """Parse list of key=value strings where keys are not duplicated.""" 

1421 parsed = {} 

1422 for elt in l: 

1423 k, v = elt.split('=', 1) 

1424 if v[0] == '"' and v[-1] == '"': 

1425 v = v[1:-1] 

1426 parsed[k] = v 

1427 return parsed 

1428 

1429def parse_http_list(s): 

1430 """Parse lists as described by RFC 2068 Section 2. 

1431 

1432 In particular, parse comma-separated lists where the elements of 

1433 the list may include quoted-strings. A quoted-string could 

1434 contain a comma. A non-quoted string could have quotes in the 

1435 middle. Neither commas nor quotes count if they are escaped. 

1436 Only double-quotes count, not single-quotes. 

1437 """ 

1438 res = [] 

1439 part = '' 

1440 

1441 escape = quote = False 

1442 for cur in s: 

1443 if escape: 

1444 part += cur 

1445 escape = False 

1446 continue 

1447 if quote: 

1448 if cur == '\\': 

1449 escape = True 

1450 continue 

1451 elif cur == '"': 

1452 quote = False 

1453 part += cur 

1454 continue 

1455 

1456 if cur == ',': 

1457 res.append(part) 

1458 part = '' 

1459 continue 

1460 

1461 if cur == '"': 

1462 quote = True 

1463 

1464 part += cur 

1465 

1466 # append last part 

1467 if part: 

1468 res.append(part) 

1469 

1470 return [part.strip() for part in res] 

1471 

1472class FileHandler(BaseHandler): 

1473 # Use local file or FTP depending on form of URL 

1474 def file_open(self, req): 

1475 url = req.selector 

1476 if url[:2] == '//' and url[2:3] != '/' and (req.host and 

1477 req.host != 'localhost'): 

1478 if not req.host in self.get_names(): 

1479 raise URLError("file:// scheme is supported only on localhost") 

1480 else: 

1481 return self.open_local_file(req) 

1482 

1483 # names for the localhost 

1484 names = None 

1485 def get_names(self): 

1486 if FileHandler.names is None: 

1487 try: 

1488 FileHandler.names = tuple( 

1489 socket.gethostbyname_ex('localhost')[2] + 

1490 socket.gethostbyname_ex(socket.gethostname())[2]) 

1491 except socket.gaierror: 

1492 FileHandler.names = (socket.gethostbyname('localhost'),) 

1493 return FileHandler.names 

1494 

1495 # not entirely sure what the rules are here 

1496 def open_local_file(self, req): 

1497 import email.utils 

1498 import mimetypes 

1499 host = req.host 

1500 filename = req.selector 

1501 localfile = url2pathname(filename) 

1502 try: 

1503 stats = os.stat(localfile) 

1504 size = stats.st_size 

1505 modified = email.utils.formatdate(stats.st_mtime, usegmt=True) 

1506 mtype = mimetypes.guess_type(filename)[0] 

1507 headers = email.message_from_string( 

1508 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % 

1509 (mtype or 'text/plain', size, modified)) 

1510 if host: 

1511 host, port = _splitport(host) 

1512 if not host or \ 

1513 (not port and _safe_gethostbyname(host) in self.get_names()): 

1514 if host: 

1515 origurl = 'file://' + host + filename 

1516 else: 

1517 origurl = 'file://' + filename 

1518 return addinfourl(open(localfile, 'rb'), headers, origurl) 

1519 except OSError as exp: 

1520 raise URLError(exp) 

1521 raise URLError('file not on local host') 

1522 

1523def _safe_gethostbyname(host): 

1524 try: 

1525 return socket.gethostbyname(host) 

1526 except socket.gaierror: 

1527 return None 

1528 

1529class FTPHandler(BaseHandler): 

1530 def ftp_open(self, req): 

1531 import ftplib 

1532 import mimetypes 

1533 host = req.host 

1534 if not host: 

1535 raise URLError('ftp error: no host given') 

1536 host, port = _splitport(host) 

1537 if port is None: 

1538 port = ftplib.FTP_PORT 

1539 else: 

1540 port = int(port) 

1541 

1542 # username/password handling 

1543 user, host = _splituser(host) 

1544 if user: 

1545 user, passwd = _splitpasswd(user) 

1546 else: 

1547 passwd = None 

1548 host = unquote(host) 

1549 user = user or '' 

1550 passwd = passwd or '' 

1551 

1552 try: 

1553 host = socket.gethostbyname(host) 

1554 except OSError as msg: 

1555 raise URLError(msg) 

1556 path, attrs = _splitattr(req.selector) 

1557 dirs = path.split('/') 

1558 dirs = list(map(unquote, dirs)) 

1559 dirs, file = dirs[:-1], dirs[-1] 

1560 if dirs and not dirs[0]: 

1561 dirs = dirs[1:] 

1562 try: 

1563 fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout) 

1564 type = file and 'I' or 'D' 

1565 for attr in attrs: 

1566 attr, value = _splitvalue(attr) 

1567 if attr.lower() == 'type' and \ 

1568 value in ('a', 'A', 'i', 'I', 'd', 'D'): 

1569 type = value.upper() 

1570 fp, retrlen = fw.retrfile(file, type) 

1571 headers = "" 

1572 mtype = mimetypes.guess_type(req.full_url)[0] 

1573 if mtype: 

1574 headers += "Content-type: %s\n" % mtype 

1575 if retrlen is not None and retrlen >= 0: 

1576 headers += "Content-length: %d\n" % retrlen 

1577 headers = email.message_from_string(headers) 

1578 return addinfourl(fp, headers, req.full_url) 

1579 except ftplib.all_errors as exp: 

1580 exc = URLError('ftp error: %r' % exp) 

1581 raise exc.with_traceback(sys.exc_info()[2]) 

1582 

1583 def connect_ftp(self, user, passwd, host, port, dirs, timeout): 

1584 return ftpwrapper(user, passwd, host, port, dirs, timeout, 

1585 persistent=False) 

1586 

1587class CacheFTPHandler(FTPHandler): 

1588 # XXX would be nice to have pluggable cache strategies 

1589 # XXX this stuff is definitely not thread safe 

1590 def __init__(self): 

1591 self.cache = {} 

1592 self.timeout = {} 

1593 self.soonest = 0 

1594 self.delay = 60 

1595 self.max_conns = 16 

1596 

1597 def setTimeout(self, t): 

1598 self.delay = t 

1599 

1600 def setMaxConns(self, m): 

1601 self.max_conns = m 

1602 

1603 def connect_ftp(self, user, passwd, host, port, dirs, timeout): 

1604 key = user, host, port, '/'.join(dirs), timeout 

1605 if key in self.cache: 

1606 self.timeout[key] = time.time() + self.delay 

1607 else: 

1608 self.cache[key] = ftpwrapper(user, passwd, host, port, 

1609 dirs, timeout) 

1610 self.timeout[key] = time.time() + self.delay 

1611 self.check_cache() 

1612 return self.cache[key] 

1613 

1614 def check_cache(self): 

1615 # first check for old ones 

1616 t = time.time() 

1617 if self.soonest <= t: 

1618 for k, v in list(self.timeout.items()): 

1619 if v < t: 

1620 self.cache[k].close() 

1621 del self.cache[k] 

1622 del self.timeout[k] 

1623 self.soonest = min(list(self.timeout.values())) 

1624 

1625 # then check the size 

1626 if len(self.cache) == self.max_conns: 

1627 for k, v in list(self.timeout.items()): 

1628 if v == self.soonest: 

1629 del self.cache[k] 

1630 del self.timeout[k] 

1631 break 

1632 self.soonest = min(list(self.timeout.values())) 

1633 

1634 def clear_cache(self): 

1635 for conn in self.cache.values(): 

1636 conn.close() 

1637 self.cache.clear() 

1638 self.timeout.clear() 

1639 

1640class DataHandler(BaseHandler): 

1641 def data_open(self, req): 

1642 # data URLs as specified in RFC 2397. 

1643 # 

1644 # ignores POSTed data 

1645 # 

1646 # syntax: 

1647 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data 

1648 # mediatype := [ type "/" subtype ] *( ";" parameter ) 

1649 # data := *urlchar 

1650 # parameter := attribute "=" value 

1651 url = req.full_url 

1652 

1653 scheme, data = url.split(":",1) 

1654 mediatype, data = data.split(",",1) 

1655 

1656 # even base64 encoded data URLs might be quoted so unquote in any case: 

1657 data = unquote_to_bytes(data) 

1658 if mediatype.endswith(";base64"): 

1659 data = base64.decodebytes(data) 

1660 mediatype = mediatype[:-7] 

1661 

1662 if not mediatype: 

1663 mediatype = "text/plain;charset=US-ASCII" 

1664 

1665 headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" % 

1666 (mediatype, len(data))) 

1667 

1668 return addinfourl(io.BytesIO(data), headers, url) 

1669 

1670 

1671# Code move from the old urllib module 

1672 

1673MAXFTPCACHE = 10 # Trim the ftp cache beyond this size 

1674 

1675# Helper for non-unix systems 

1676if os.name == 'nt': 

1677 from nturl2path import url2pathname, pathname2url 

1678else: 

1679 def url2pathname(pathname): 

1680 """OS-specific conversion from a relative URL of the 'file' scheme 

1681 to a file system path; not recommended for general use.""" 

1682 return unquote(pathname) 

1683 

1684 def pathname2url(pathname): 

1685 """OS-specific conversion from a file system path to a relative URL 

1686 of the 'file' scheme; not recommended for general use.""" 

1687 return quote(pathname) 

1688 

1689 

1690ftpcache = {} 

1691 

1692 

1693class URLopener: 

1694 """Class to open URLs. 

1695 This is a class rather than just a subroutine because we may need 

1696 more than one set of global protocol-specific options. 

1697 Note -- this is a base class for those who don't want the 

1698 automatic handling of errors type 302 (relocated) and 401 

1699 (authorization needed).""" 

1700 

1701 __tempfiles = None 

1702 

1703 version = "Python-urllib/%s" % __version__ 

1704 

1705 # Constructor 

1706 def __init__(self, proxies=None, **x509): 

1707 msg = "%(class)s style of invoking requests is deprecated. " \ 

1708 "Use newer urlopen functions/methods" % {'class': self.__class__.__name__} 

1709 warnings.warn(msg, DeprecationWarning, stacklevel=3) 

1710 if proxies is None: 

1711 proxies = getproxies() 

1712 assert hasattr(proxies, 'keys'), "proxies must be a mapping" 

1713 self.proxies = proxies 

1714 self.key_file = x509.get('key_file') 

1715 self.cert_file = x509.get('cert_file') 

1716 self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')] 

1717 self.__tempfiles = [] 

1718 self.__unlink = os.unlink # See cleanup() 

1719 self.tempcache = None 

1720 # Undocumented feature: if you assign {} to tempcache, 

1721 # it is used to cache files retrieved with 

1722 # self.retrieve(). This is not enabled by default 

1723 # since it does not work for changing documents (and I 

1724 # haven't got the logic to check expiration headers 

1725 # yet). 

1726 self.ftpcache = ftpcache 

1727 # Undocumented feature: you can use a different 

1728 # ftp cache by assigning to the .ftpcache member; 

1729 # in case you want logically independent URL openers 

1730 # XXX This is not threadsafe. Bah. 

1731 

1732 def __del__(self): 

1733 self.close() 

1734 

1735 def close(self): 

1736 self.cleanup() 

1737 

1738 def cleanup(self): 

1739 # This code sometimes runs when the rest of this module 

1740 # has already been deleted, so it can't use any globals 

1741 # or import anything. 

1742 if self.__tempfiles: 

1743 for file in self.__tempfiles: 

1744 try: 

1745 self.__unlink(file) 

1746 except OSError: 

1747 pass 

1748 del self.__tempfiles[:] 

1749 if self.tempcache: 

1750 self.tempcache.clear() 

1751 

1752 def addheader(self, *args): 

1753 """Add a header to be used by the HTTP interface only 

1754 e.g. u.addheader('Accept', 'sound/basic')""" 

1755 self.addheaders.append(args) 

1756 

1757 # External interface 

1758 def open(self, fullurl, data=None): 

1759 """Use URLopener().open(file) instead of open(file, 'r').""" 

1760 fullurl = unwrap(_to_bytes(fullurl)) 

1761 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") 

1762 if self.tempcache and fullurl in self.tempcache: 

1763 filename, headers = self.tempcache[fullurl] 

1764 fp = open(filename, 'rb') 

1765 return addinfourl(fp, headers, fullurl) 

1766 urltype, url = _splittype(fullurl) 

1767 if not urltype: 

1768 urltype = 'file' 

1769 if urltype in self.proxies: 

1770 proxy = self.proxies[urltype] 

1771 urltype, proxyhost = _splittype(proxy) 

1772 host, selector = _splithost(proxyhost) 

1773 url = (host, fullurl) # Signal special case to open_*() 

1774 else: 

1775 proxy = None 

1776 name = 'open_' + urltype 

1777 self.type = urltype 

1778 name = name.replace('-', '_') 

1779 if not hasattr(self, name) or name == 'open_local_file': 

1780 if proxy: 

1781 return self.open_unknown_proxy(proxy, fullurl, data) 

1782 else: 

1783 return self.open_unknown(fullurl, data) 

1784 try: 

1785 if data is None: 

1786 return getattr(self, name)(url) 

1787 else: 

1788 return getattr(self, name)(url, data) 

1789 except (HTTPError, URLError): 

1790 raise 

1791 except OSError as msg: 

1792 raise OSError('socket error', msg).with_traceback(sys.exc_info()[2]) 

1793 

1794 def open_unknown(self, fullurl, data=None): 

1795 """Overridable interface to open unknown URL type.""" 

1796 type, url = _splittype(fullurl) 

1797 raise OSError('url error', 'unknown url type', type) 

1798 

1799 def open_unknown_proxy(self, proxy, fullurl, data=None): 

1800 """Overridable interface to open unknown URL type.""" 

1801 type, url = _splittype(fullurl) 

1802 raise OSError('url error', 'invalid proxy for %s' % type, proxy) 

1803 

1804 # External interface 

1805 def retrieve(self, url, filename=None, reporthook=None, data=None): 

1806 """retrieve(url) returns (filename, headers) for a local object 

1807 or (tempfilename, headers) for a remote object.""" 

1808 url = unwrap(_to_bytes(url)) 

1809 if self.tempcache and url in self.tempcache: 

1810 return self.tempcache[url] 

1811 type, url1 = _splittype(url) 

1812 if filename is None and (not type or type == 'file'): 

1813 try: 

1814 fp = self.open_local_file(url1) 

1815 hdrs = fp.info() 

1816 fp.close() 

1817 return url2pathname(_splithost(url1)[1]), hdrs 

1818 except OSError: 

1819 pass 

1820 fp = self.open(url, data) 

1821 try: 

1822 headers = fp.info() 

1823 if filename: 

1824 tfp = open(filename, 'wb') 

1825 else: 

1826 garbage, path = _splittype(url) 

1827 garbage, path = _splithost(path or "") 

1828 path, garbage = _splitquery(path or "") 

1829 path, garbage = _splitattr(path or "") 

1830 suffix = os.path.splitext(path)[1] 

1831 (fd, filename) = tempfile.mkstemp(suffix) 

1832 self.__tempfiles.append(filename) 

1833 tfp = os.fdopen(fd, 'wb') 

1834 try: 

1835 result = filename, headers 

1836 if self.tempcache is not None: 

1837 self.tempcache[url] = result 

1838 bs = 1024*8 

1839 size = -1 

1840 read = 0 

1841 blocknum = 0 

1842 if "content-length" in headers: 

1843 size = int(headers["Content-Length"]) 

1844 if reporthook: 

1845 reporthook(blocknum, bs, size) 

1846 while 1: 

1847 block = fp.read(bs) 

1848 if not block: 

1849 break 

1850 read += len(block) 

1851 tfp.write(block) 

1852 blocknum += 1 

1853 if reporthook: 

1854 reporthook(blocknum, bs, size) 

1855 finally: 

1856 tfp.close() 

1857 finally: 

1858 fp.close() 

1859 

1860 # raise exception if actual size does not match content-length header 

1861 if size >= 0 and read < size: 

1862 raise ContentTooShortError( 

1863 "retrieval incomplete: got only %i out of %i bytes" 

1864 % (read, size), result) 

1865 

1866 return result 

1867 

1868 # Each method named open_<type> knows how to open that type of URL 

1869 

1870 def _open_generic_http(self, connection_factory, url, data): 

1871 """Make an HTTP connection using connection_class. 

1872 

1873 This is an internal method that should be called from 

1874 open_http() or open_https(). 

1875 

1876 Arguments: 

1877 - connection_factory should take a host name and return an 

1878 HTTPConnection instance. 

1879 - url is the url to retrieval or a host, relative-path pair. 

1880 - data is payload for a POST request or None. 

1881 """ 

1882 

1883 user_passwd = None 

1884 proxy_passwd= None 

1885 if isinstance(url, str): 

1886 host, selector = _splithost(url) 

1887 if host: 

1888 user_passwd, host = _splituser(host) 

1889 host = unquote(host) 

1890 realhost = host 

1891 else: 

1892 host, selector = url 

1893 # check whether the proxy contains authorization information 

1894 proxy_passwd, host = _splituser(host) 

1895 # now we proceed with the url we want to obtain 

1896 urltype, rest = _splittype(selector) 

1897 url = rest 

1898 user_passwd = None 

1899 if urltype.lower() != 'http': 

1900 realhost = None 

1901 else: 

1902 realhost, rest = _splithost(rest) 

1903 if realhost: 

1904 user_passwd, realhost = _splituser(realhost) 

1905 if user_passwd: 

1906 selector = "%s://%s%s" % (urltype, realhost, rest) 

1907 if proxy_bypass(realhost): 

1908 host = realhost 

1909 

1910 if not host: raise OSError('http error', 'no host given') 

1911 

1912 if proxy_passwd: 

1913 proxy_passwd = unquote(proxy_passwd) 

1914 proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii') 

1915 else: 

1916 proxy_auth = None 

1917 

1918 if user_passwd: 

1919 user_passwd = unquote(user_passwd) 

1920 auth = base64.b64encode(user_passwd.encode()).decode('ascii') 

1921 else: 

1922 auth = None 

1923 http_conn = connection_factory(host) 

1924 headers = {} 

1925 if proxy_auth: 

1926 headers["Proxy-Authorization"] = "Basic %s" % proxy_auth 

1927 if auth: 

1928 headers["Authorization"] = "Basic %s" % auth 

1929 if realhost: 

1930 headers["Host"] = realhost 

1931 

1932 # Add Connection:close as we don't support persistent connections yet. 

1933 # This helps in closing the socket and avoiding ResourceWarning 

1934 

1935 headers["Connection"] = "close" 

1936 

1937 for header, value in self.addheaders: 

1938 headers[header] = value 

1939 

1940 if data is not None: 

1941 headers["Content-Type"] = "application/x-www-form-urlencoded" 

1942 http_conn.request("POST", selector, data, headers) 

1943 else: 

1944 http_conn.request("GET", selector, headers=headers) 

1945 

1946 try: 

1947 response = http_conn.getresponse() 

1948 except http.client.BadStatusLine: 

1949 # something went wrong with the HTTP status line 

1950 raise URLError("http protocol error: bad status line") 

1951 

1952 # According to RFC 2616, "2xx" code indicates that the client's 

1953 # request was successfully received, understood, and accepted. 

1954 if 200 <= response.status < 300: 

1955 return addinfourl(response, response.msg, "http:" + url, 

1956 response.status) 

1957 else: 

1958 return self.http_error( 

1959 url, response.fp, 

1960 response.status, response.reason, response.msg, data) 

1961 

1962 def open_http(self, url, data=None): 

1963 """Use HTTP protocol.""" 

1964 return self._open_generic_http(http.client.HTTPConnection, url, data) 

1965 

1966 def http_error(self, url, fp, errcode, errmsg, headers, data=None): 

1967 """Handle http errors. 

1968 

1969 Derived class can override this, or provide specific handlers 

1970 named http_error_DDD where DDD is the 3-digit error code.""" 

1971 # First check if there's a specific handler for this error 

1972 name = 'http_error_%d' % errcode 

1973 if hasattr(self, name): 

1974 method = getattr(self, name) 

1975 if data is None: 

1976 result = method(url, fp, errcode, errmsg, headers) 

1977 else: 

1978 result = method(url, fp, errcode, errmsg, headers, data) 

1979 if result: return result 

1980 return self.http_error_default(url, fp, errcode, errmsg, headers) 

1981 

1982 def http_error_default(self, url, fp, errcode, errmsg, headers): 

1983 """Default error handler: close the connection and raise OSError.""" 

1984 fp.close() 

1985 raise HTTPError(url, errcode, errmsg, headers, None) 

1986 

1987 if _have_ssl: 

1988 def _https_connection(self, host): 

1989 return http.client.HTTPSConnection(host, 

1990 key_file=self.key_file, 

1991 cert_file=self.cert_file) 

1992 

1993 def open_https(self, url, data=None): 

1994 """Use HTTPS protocol.""" 

1995 return self._open_generic_http(self._https_connection, url, data) 

1996 

1997 def open_file(self, url): 

1998 """Use local file or FTP depending on form of URL.""" 

1999 if not isinstance(url, str): 

2000 raise URLError('file error: proxy support for file protocol currently not implemented') 

2001 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': 

2002 raise ValueError("file:// scheme is supported only on localhost") 

2003 else: 

2004 return self.open_local_file(url) 

2005 

2006 def open_local_file(self, url): 

2007 """Use local file.""" 

2008 import email.utils 

2009 import mimetypes 

2010 host, file = _splithost(url) 

2011 localname = url2pathname(file) 

2012 try: 

2013 stats = os.stat(localname) 

2014 except OSError as e: 

2015 raise URLError(e.strerror, e.filename) 

2016 size = stats.st_size 

2017 modified = email.utils.formatdate(stats.st_mtime, usegmt=True) 

2018 mtype = mimetypes.guess_type(url)[0] 

2019 headers = email.message_from_string( 

2020 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % 

2021 (mtype or 'text/plain', size, modified)) 

2022 if not host: 

2023 urlfile = file 

2024 if file[:1] == '/': 

2025 urlfile = 'file://' + file 

2026 return addinfourl(open(localname, 'rb'), headers, urlfile) 

2027 host, port = _splitport(host) 

2028 if (not port 

2029 and socket.gethostbyname(host) in ((localhost(),) + thishost())): 

2030 urlfile = file 

2031 if file[:1] == '/': 

2032 urlfile = 'file://' + file 

2033 elif file[:2] == './': 

2034 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) 

2035 return addinfourl(open(localname, 'rb'), headers, urlfile) 

2036 raise URLError('local file error: not on local host') 

2037 

2038 def open_ftp(self, url): 

2039 """Use FTP protocol.""" 

2040 if not isinstance(url, str): 

2041 raise URLError('ftp error: proxy support for ftp protocol currently not implemented') 

2042 import mimetypes 

2043 host, path = _splithost(url) 

2044 if not host: raise URLError('ftp error: no host given') 

2045 host, port = _splitport(host) 

2046 user, host = _splituser(host) 

2047 if user: user, passwd = _splitpasswd(user) 

2048 else: passwd = None 

2049 host = unquote(host) 

2050 user = unquote(user or '') 

2051 passwd = unquote(passwd or '') 

2052 host = socket.gethostbyname(host) 

2053 if not port: 

2054 import ftplib 

2055 port = ftplib.FTP_PORT 

2056 else: 

2057 port = int(port) 

2058 path, attrs = _splitattr(path) 

2059 path = unquote(path) 

2060 dirs = path.split('/') 

2061 dirs, file = dirs[:-1], dirs[-1] 

2062 if dirs and not dirs[0]: dirs = dirs[1:] 

2063 if dirs and not dirs[0]: dirs[0] = '/' 

2064 key = user, host, port, '/'.join(dirs) 

2065 # XXX thread unsafe! 

2066 if len(self.ftpcache) > MAXFTPCACHE: 

2067 # Prune the cache, rather arbitrarily 

2068 for k in list(self.ftpcache): 

2069 if k != key: 

2070 v = self.ftpcache[k] 

2071 del self.ftpcache[k] 

2072 v.close() 

2073 try: 

2074 if key not in self.ftpcache: 

2075 self.ftpcache[key] = \ 

2076 ftpwrapper(user, passwd, host, port, dirs) 

2077 if not file: type = 'D' 

2078 else: type = 'I' 

2079 for attr in attrs: 

2080 attr, value = _splitvalue(attr) 

2081 if attr.lower() == 'type' and \ 

2082 value in ('a', 'A', 'i', 'I', 'd', 'D'): 

2083 type = value.upper() 

2084 (fp, retrlen) = self.ftpcache[key].retrfile(file, type) 

2085 mtype = mimetypes.guess_type("ftp:" + url)[0] 

2086 headers = "" 

2087 if mtype: 

2088 headers += "Content-Type: %s\n" % mtype 

2089 if retrlen is not None and retrlen >= 0: 

2090 headers += "Content-Length: %d\n" % retrlen 

2091 headers = email.message_from_string(headers) 

2092 return addinfourl(fp, headers, "ftp:" + url) 

2093 except ftperrors() as exp: 

2094 raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2]) 

2095 

2096 def open_data(self, url, data=None): 

2097 """Use "data" URL.""" 

2098 if not isinstance(url, str): 

2099 raise URLError('data error: proxy support for data protocol currently not implemented') 

2100 # ignore POSTed data 

2101 # 

2102 # syntax of data URLs: 

2103 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data 

2104 # mediatype := [ type "/" subtype ] *( ";" parameter ) 

2105 # data := *urlchar 

2106 # parameter := attribute "=" value 

2107 try: 

2108 [type, data] = url.split(',', 1) 

2109 except ValueError: 

2110 raise OSError('data error', 'bad data URL') 

2111 if not type: 

2112 type = 'text/plain;charset=US-ASCII' 

2113 semi = type.rfind(';') 

2114 if semi >= 0 and '=' not in type[semi:]: 

2115 encoding = type[semi+1:] 

2116 type = type[:semi] 

2117 else: 

2118 encoding = '' 

2119 msg = [] 

2120 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', 

2121 time.gmtime(time.time()))) 

2122 msg.append('Content-type: %s' % type) 

2123 if encoding == 'base64': 

2124 # XXX is this encoding/decoding ok? 

2125 data = base64.decodebytes(data.encode('ascii')).decode('latin-1') 

2126 else: 

2127 data = unquote(data) 

2128 msg.append('Content-Length: %d' % len(data)) 

2129 msg.append('') 

2130 msg.append(data) 

2131 msg = '\n'.join(msg) 

2132 headers = email.message_from_string(msg) 

2133 f = io.StringIO(msg) 

2134 #f.fileno = None # needed for addinfourl 

2135 return addinfourl(f, headers, url) 

2136 

2137 

2138class FancyURLopener(URLopener): 

2139 """Derived class with handlers for errors we can handle (perhaps).""" 

2140 

2141 def __init__(self, *args, **kwargs): 

2142 URLopener.__init__(self, *args, **kwargs) 

2143 self.auth_cache = {} 

2144 self.tries = 0 

2145 self.maxtries = 10 

2146 

2147 def http_error_default(self, url, fp, errcode, errmsg, headers): 

2148 """Default error handling -- don't raise an exception.""" 

2149 return addinfourl(fp, headers, "http:" + url, errcode) 

2150 

2151 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): 

2152 """Error 302 -- relocated (temporarily).""" 

2153 self.tries += 1 

2154 try: 

2155 if self.maxtries and self.tries >= self.maxtries: 

2156 if hasattr(self, "http_error_500"): 

2157 meth = self.http_error_500 

2158 else: 

2159 meth = self.http_error_default 

2160 return meth(url, fp, 500, 

2161 "Internal Server Error: Redirect Recursion", 

2162 headers) 

2163 result = self.redirect_internal(url, fp, errcode, errmsg, 

2164 headers, data) 

2165 return result 

2166 finally: 

2167 self.tries = 0 

2168 

2169 def redirect_internal(self, url, fp, errcode, errmsg, headers, data): 

2170 if 'location' in headers: 

2171 newurl = headers['location'] 

2172 elif 'uri' in headers: 

2173 newurl = headers['uri'] 

2174 else: 

2175 return 

2176 fp.close() 

2177 

2178 # In case the server sent a relative URL, join with original: 

2179 newurl = urljoin(self.type + ":" + url, newurl) 

2180 

2181 urlparts = urlparse(newurl) 

2182 

2183 # For security reasons, we don't allow redirection to anything other 

2184 # than http, https and ftp. 

2185 

2186 # We are using newer HTTPError with older redirect_internal method 

2187 # This older method will get deprecated in 3.3 

2188 

2189 if urlparts.scheme not in ('http', 'https', 'ftp', ''): 

2190 raise HTTPError(newurl, errcode, 

2191 errmsg + 

2192 " Redirection to url '%s' is not allowed." % newurl, 

2193 headers, fp) 

2194 

2195 return self.open(newurl) 

2196 

2197 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): 

2198 """Error 301 -- also relocated (permanently).""" 

2199 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 

2200 

2201 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): 

2202 """Error 303 -- also relocated (essentially identical to 302).""" 

2203 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 

2204 

2205 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): 

2206 """Error 307 -- relocated, but turn POST into error.""" 

2207 if data is None: 

2208 return self.http_error_302(url, fp, errcode, errmsg, headers, data) 

2209 else: 

2210 return self.http_error_default(url, fp, errcode, errmsg, headers) 

2211 

2212 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None, 

2213 retry=False): 

2214 """Error 401 -- authentication required. 

2215 This function supports Basic authentication only.""" 

2216 if 'www-authenticate' not in headers: 

2217 URLopener.http_error_default(self, url, fp, 

2218 errcode, errmsg, headers) 

2219 stuff = headers['www-authenticate'] 

2220 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 

2221 if not match: 

2222 URLopener.http_error_default(self, url, fp, 

2223 errcode, errmsg, headers) 

2224 scheme, realm = match.groups() 

2225 if scheme.lower() != 'basic': 

2226 URLopener.http_error_default(self, url, fp, 

2227 errcode, errmsg, headers) 

2228 if not retry: 

2229 URLopener.http_error_default(self, url, fp, errcode, errmsg, 

2230 headers) 

2231 name = 'retry_' + self.type + '_basic_auth' 

2232 if data is None: 

2233 return getattr(self,name)(url, realm) 

2234 else: 

2235 return getattr(self,name)(url, realm, data) 

2236 

2237 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None, 

2238 retry=False): 

2239 """Error 407 -- proxy authentication required. 

2240 This function supports Basic authentication only.""" 

2241 if 'proxy-authenticate' not in headers: 

2242 URLopener.http_error_default(self, url, fp, 

2243 errcode, errmsg, headers) 

2244 stuff = headers['proxy-authenticate'] 

2245 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) 

2246 if not match: 

2247 URLopener.http_error_default(self, url, fp, 

2248 errcode, errmsg, headers) 

2249 scheme, realm = match.groups() 

2250 if scheme.lower() != 'basic': 

2251 URLopener.http_error_default(self, url, fp, 

2252 errcode, errmsg, headers) 

2253 if not retry: 

2254 URLopener.http_error_default(self, url, fp, errcode, errmsg, 

2255 headers) 

2256 name = 'retry_proxy_' + self.type + '_basic_auth' 

2257 if data is None: 

2258 return getattr(self,name)(url, realm) 

2259 else: 

2260 return getattr(self,name)(url, realm, data) 

2261 

2262 def retry_proxy_http_basic_auth(self, url, realm, data=None): 

2263 host, selector = _splithost(url) 

2264 newurl = 'http://' + host + selector 

2265 proxy = self.proxies['http'] 

2266 urltype, proxyhost = _splittype(proxy) 

2267 proxyhost, proxyselector = _splithost(proxyhost) 

2268 i = proxyhost.find('@') + 1 

2269 proxyhost = proxyhost[i:] 

2270 user, passwd = self.get_user_passwd(proxyhost, realm, i) 

2271 if not (user or passwd): return None 

2272 proxyhost = "%s:%s@%s" % (quote(user, safe=''), 

2273 quote(passwd, safe=''), proxyhost) 

2274 self.proxies['http'] = 'http://' + proxyhost + proxyselector 

2275 if data is None: 

2276 return self.open(newurl) 

2277 else: 

2278 return self.open(newurl, data) 

2279 

2280 def retry_proxy_https_basic_auth(self, url, realm, data=None): 

2281 host, selector = _splithost(url) 

2282 newurl = 'https://' + host + selector 

2283 proxy = self.proxies['https'] 

2284 urltype, proxyhost = _splittype(proxy) 

2285 proxyhost, proxyselector = _splithost(proxyhost) 

2286 i = proxyhost.find('@') + 1 

2287 proxyhost = proxyhost[i:] 

2288 user, passwd = self.get_user_passwd(proxyhost, realm, i) 

2289 if not (user or passwd): return None 

2290 proxyhost = "%s:%s@%s" % (quote(user, safe=''), 

2291 quote(passwd, safe=''), proxyhost) 

2292 self.proxies['https'] = 'https://' + proxyhost + proxyselector 

2293 if data is None: 

2294 return self.open(newurl) 

2295 else: 

2296 return self.open(newurl, data) 

2297 

2298 def retry_http_basic_auth(self, url, realm, data=None): 

2299 host, selector = _splithost(url) 

2300 i = host.find('@') + 1 

2301 host = host[i:] 

2302 user, passwd = self.get_user_passwd(host, realm, i) 

2303 if not (user or passwd): return None 

2304 host = "%s:%s@%s" % (quote(user, safe=''), 

2305 quote(passwd, safe=''), host) 

2306 newurl = 'http://' + host + selector 

2307 if data is None: 

2308 return self.open(newurl) 

2309 else: 

2310 return self.open(newurl, data) 

2311 

2312 def retry_https_basic_auth(self, url, realm, data=None): 

2313 host, selector = _splithost(url) 

2314 i = host.find('@') + 1 

2315 host = host[i:] 

2316 user, passwd = self.get_user_passwd(host, realm, i) 

2317 if not (user or passwd): return None 

2318 host = "%s:%s@%s" % (quote(user, safe=''), 

2319 quote(passwd, safe=''), host) 

2320 newurl = 'https://' + host + selector 

2321 if data is None: 

2322 return self.open(newurl) 

2323 else: 

2324 return self.open(newurl, data) 

2325 

2326 def get_user_passwd(self, host, realm, clear_cache=0): 

2327 key = realm + '@' + host.lower() 

2328 if key in self.auth_cache: 

2329 if clear_cache: 

2330 del self.auth_cache[key] 

2331 else: 

2332 return self.auth_cache[key] 

2333 user, passwd = self.prompt_user_passwd(host, realm) 

2334 if user or passwd: self.auth_cache[key] = (user, passwd) 

2335 return user, passwd 

2336 

2337 def prompt_user_passwd(self, host, realm): 

2338 """Override this in a GUI environment!""" 

2339 import getpass 

2340 try: 

2341 user = input("Enter username for %s at %s: " % (realm, host)) 

2342 passwd = getpass.getpass("Enter password for %s in %s at %s: " % 

2343 (user, realm, host)) 

2344 return user, passwd 

2345 except KeyboardInterrupt: 

2346 print() 

2347 return None, None 

2348 

2349 

2350# Utility functions 

2351 

2352_localhost = None 

2353def localhost(): 

2354 """Return the IP address of the magic hostname 'localhost'.""" 

2355 global _localhost 

2356 if _localhost is None: 

2357 _localhost = socket.gethostbyname('localhost') 

2358 return _localhost 

2359 

2360_thishost = None 

2361def thishost(): 

2362 """Return the IP addresses of the current host.""" 

2363 global _thishost 

2364 if _thishost is None: 

2365 try: 

2366 _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2]) 

2367 except socket.gaierror: 

2368 _thishost = tuple(socket.gethostbyname_ex('localhost')[2]) 

2369 return _thishost 

2370 

2371_ftperrors = None 

2372def ftperrors(): 

2373 """Return the set of errors raised by the FTP class.""" 

2374 global _ftperrors 

2375 if _ftperrors is None: 

2376 import ftplib 

2377 _ftperrors = ftplib.all_errors 

2378 return _ftperrors 

2379 

2380_noheaders = None 

2381def noheaders(): 

2382 """Return an empty email Message object.""" 

2383 global _noheaders 

2384 if _noheaders is None: 

2385 _noheaders = email.message_from_string("") 

2386 return _noheaders 

2387 

2388 

2389# Utility classes 

2390 

2391class ftpwrapper: 

2392 """Class used by open_ftp() for cache of open FTP connections.""" 

2393 

2394 def __init__(self, user, passwd, host, port, dirs, timeout=None, 

2395 persistent=True): 

2396 self.user = user 

2397 self.passwd = passwd 

2398 self.host = host 

2399 self.port = port 

2400 self.dirs = dirs 

2401 self.timeout = timeout 

2402 self.refcount = 0 

2403 self.keepalive = persistent 

2404 try: 

2405 self.init() 

2406 except: 

2407 self.close() 

2408 raise 

2409 

2410 def init(self): 

2411 import ftplib 

2412 self.busy = 0 

2413 self.ftp = ftplib.FTP() 

2414 self.ftp.connect(self.host, self.port, self.timeout) 

2415 self.ftp.login(self.user, self.passwd) 

2416 _target = '/'.join(self.dirs) 

2417 self.ftp.cwd(_target) 

2418 

2419 def retrfile(self, file, type): 

2420 import ftplib 

2421 self.endtransfer() 

2422 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 

2423 else: cmd = 'TYPE ' + type; isdir = 0 

2424 try: 

2425 self.ftp.voidcmd(cmd) 

2426 except ftplib.all_errors: 

2427 self.init() 

2428 self.ftp.voidcmd(cmd) 

2429 conn = None 

2430 if file and not isdir: 

2431 # Try to retrieve as a file 

2432 try: 

2433 cmd = 'RETR ' + file 

2434 conn, retrlen = self.ftp.ntransfercmd(cmd) 

2435 except ftplib.error_perm as reason: 

2436 if str(reason)[:3] != '550': 

2437 raise URLError('ftp error: %r' % reason).with_traceback( 

2438 sys.exc_info()[2]) 

2439 if not conn: 

2440 # Set transfer mode to ASCII! 

2441 self.ftp.voidcmd('TYPE A') 

2442 # Try a directory listing. Verify that directory exists. 

2443 if file: 

2444 pwd = self.ftp.pwd() 

2445 try: 

2446 try: 

2447 self.ftp.cwd(file) 

2448 except ftplib.error_perm as reason: 

2449 raise URLError('ftp error: %r' % reason) from reason 

2450 finally: 

2451 self.ftp.cwd(pwd) 

2452 cmd = 'LIST ' + file 

2453 else: 

2454 cmd = 'LIST' 

2455 conn, retrlen = self.ftp.ntransfercmd(cmd) 

2456 self.busy = 1 

2457 

2458 ftpobj = addclosehook(conn.makefile('rb'), self.file_close) 

2459 self.refcount += 1 

2460 conn.close() 

2461 # Pass back both a suitably decorated object and a retrieval length 

2462 return (ftpobj, retrlen) 

2463 

2464 def endtransfer(self): 

2465 self.busy = 0 

2466 

2467 def close(self): 

2468 self.keepalive = False 

2469 if self.refcount <= 0: 

2470 self.real_close() 

2471 

2472 def file_close(self): 

2473 self.endtransfer() 

2474 self.refcount -= 1 

2475 if self.refcount <= 0 and not self.keepalive: 

2476 self.real_close() 

2477 

2478 def real_close(self): 

2479 self.endtransfer() 

2480 try: 

2481 self.ftp.close() 

2482 except ftperrors(): 

2483 pass 

2484 

2485# Proxy handling 

2486def getproxies_environment(): 

2487 """Return a dictionary of scheme -> proxy server URL mappings. 

2488 

2489 Scan the environment for variables named <scheme>_proxy; 

2490 this seems to be the standard convention. If you need a 

2491 different way, you can pass a proxies dictionary to the 

2492 [Fancy]URLopener constructor. 

2493 

2494 """ 

2495 proxies = {} 

2496 # in order to prefer lowercase variables, process environment in 

2497 # two passes: first matches any, second pass matches lowercase only 

2498 for name, value in os.environ.items(): 

2499 name = name.lower() 

2500 if value and name[-6:] == '_proxy': 

2501 proxies[name[:-6]] = value 

2502 # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY 

2503 # (non-all-lowercase) as it may be set from the web server by a "Proxy:" 

2504 # header from the client 

2505 # If "proxy" is lowercase, it will still be used thanks to the next block 

2506 if 'REQUEST_METHOD' in os.environ: 

2507 proxies.pop('http', None) 

2508 for name, value in os.environ.items(): 

2509 if name[-6:] == '_proxy': 

2510 name = name.lower() 

2511 if value: 

2512 proxies[name[:-6]] = value 

2513 else: 

2514 proxies.pop(name[:-6], None) 

2515 return proxies 

2516 

2517def proxy_bypass_environment(host, proxies=None): 

2518 """Test if proxies should not be used for a particular host. 

2519 

2520 Checks the proxy dict for the value of no_proxy, which should 

2521 be a list of comma separated DNS suffixes, or '*' for all hosts. 

2522 

2523 """ 

2524 if proxies is None: 

2525 proxies = getproxies_environment() 

2526 # don't bypass, if no_proxy isn't specified 

2527 try: 

2528 no_proxy = proxies['no'] 

2529 except KeyError: 

2530 return False 

2531 # '*' is special case for always bypass 

2532 if no_proxy == '*': 

2533 return True 

2534 host = host.lower() 

2535 # strip port off host 

2536 hostonly, port = _splitport(host) 

2537 # check if the host ends with any of the DNS suffixes 

2538 for name in no_proxy.split(','): 

2539 name = name.strip() 

2540 if name: 

2541 name = name.lstrip('.') # ignore leading dots 

2542 name = name.lower() 

2543 if hostonly == name or host == name: 

2544 return True 

2545 name = '.' + name 

2546 if hostonly.endswith(name) or host.endswith(name): 

2547 return True 

2548 # otherwise, don't bypass 

2549 return False 

2550 

2551 

2552# This code tests an OSX specific data structure but is testable on all 

2553# platforms 

2554def _proxy_bypass_macosx_sysconf(host, proxy_settings): 

2555 """ 

2556 Return True iff this host shouldn't be accessed using a proxy 

2557 

2558 This function uses the MacOSX framework SystemConfiguration 

2559 to fetch the proxy information. 

2560 

2561 proxy_settings come from _scproxy._get_proxy_settings or get mocked ie: 

2562 { 'exclude_simple': bool, 

2563 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16'] 

2564 } 

2565 """ 

2566 from fnmatch import fnmatch 

2567 

2568 hostonly, port = _splitport(host) 

2569 

2570 def ip2num(ipAddr): 

2571 parts = ipAddr.split('.') 

2572 parts = list(map(int, parts)) 

2573 if len(parts) != 4: 

2574 parts = (parts + [0, 0, 0, 0])[:4] 

2575 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] 

2576 

2577 # Check for simple host names: 

2578 if '.' not in host: 

2579 if proxy_settings['exclude_simple']: 

2580 return True 

2581 

2582 hostIP = None 

2583 

2584 for value in proxy_settings.get('exceptions', ()): 

2585 # Items in the list are strings like these: *.local, 169.254/16 

2586 if not value: continue 

2587 

2588 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) 

2589 if m is not None: 

2590 if hostIP is None: 

2591 try: 

2592 hostIP = socket.gethostbyname(hostonly) 

2593 hostIP = ip2num(hostIP) 

2594 except OSError: 

2595 continue 

2596 

2597 base = ip2num(m.group(1)) 

2598 mask = m.group(2) 

2599 if mask is None: 

2600 mask = 8 * (m.group(1).count('.') + 1) 

2601 else: 

2602 mask = int(mask[1:]) 

2603 

2604 if mask < 0 or mask > 32: 

2605 # System libraries ignore invalid prefix lengths 

2606 continue 

2607 

2608 mask = 32 - mask 

2609 

2610 if (hostIP >> mask) == (base >> mask): 

2611 return True 

2612 

2613 elif fnmatch(host, value): 

2614 return True 

2615 

2616 return False 

2617 

2618 

2619if sys.platform == 'darwin': 

2620 from _scproxy import _get_proxy_settings, _get_proxies 

2621 

2622 def proxy_bypass_macosx_sysconf(host): 

2623 proxy_settings = _get_proxy_settings() 

2624 return _proxy_bypass_macosx_sysconf(host, proxy_settings) 

2625 

2626 def getproxies_macosx_sysconf(): 

2627 """Return a dictionary of scheme -> proxy server URL mappings. 

2628 

2629 This function uses the MacOSX framework SystemConfiguration 

2630 to fetch the proxy information. 

2631 """ 

2632 return _get_proxies() 

2633 

2634 

2635 

2636 def proxy_bypass(host): 

2637 """Return True, if host should be bypassed. 

2638 

2639 Checks proxy settings gathered from the environment, if specified, 

2640 or from the MacOSX framework SystemConfiguration. 

2641 

2642 """ 

2643 proxies = getproxies_environment() 

2644 if proxies: 

2645 return proxy_bypass_environment(host, proxies) 

2646 else: 

2647 return proxy_bypass_macosx_sysconf(host) 

2648 

2649 def getproxies(): 

2650 return getproxies_environment() or getproxies_macosx_sysconf() 

2651 

2652 

2653elif os.name == 'nt': 

2654 def getproxies_registry(): 

2655 """Return a dictionary of scheme -> proxy server URL mappings. 

2656 

2657 Win32 uses the registry to store proxies. 

2658 

2659 """ 

2660 proxies = {} 

2661 try: 

2662 import winreg 

2663 except ImportError: 

2664 # Std module, so should be around - but you never know! 

2665 return proxies 

2666 try: 

2667 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, 

2668 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 

2669 proxyEnable = winreg.QueryValueEx(internetSettings, 

2670 'ProxyEnable')[0] 

2671 if proxyEnable: 

2672 # Returned as Unicode but problems if not converted to ASCII 

2673 proxyServer = str(winreg.QueryValueEx(internetSettings, 

2674 'ProxyServer')[0]) 

2675 if '=' in proxyServer: 

2676 # Per-protocol settings 

2677 for p in proxyServer.split(';'): 

2678 protocol, address = p.split('=', 1) 

2679 # See if address has a type:// prefix 

2680 if not re.match('(?:[^/:]+)://', address): 

2681 address = '%s://%s' % (protocol, address) 

2682 proxies[protocol] = address 

2683 else: 

2684 # Use one setting for all protocols 

2685 if proxyServer[:5] == 'http:': 

2686 proxies['http'] = proxyServer 

2687 else: 

2688 proxies['http'] = 'http://%s' % proxyServer 

2689 proxies['https'] = 'https://%s' % proxyServer 

2690 proxies['ftp'] = 'ftp://%s' % proxyServer 

2691 internetSettings.Close() 

2692 except (OSError, ValueError, TypeError): 

2693 # Either registry key not found etc, or the value in an 

2694 # unexpected format. 

2695 # proxies already set up to be empty so nothing to do 

2696 pass 

2697 return proxies 

2698 

2699 def getproxies(): 

2700 """Return a dictionary of scheme -> proxy server URL mappings. 

2701 

2702 Returns settings gathered from the environment, if specified, 

2703 or the registry. 

2704 

2705 """ 

2706 return getproxies_environment() or getproxies_registry() 

2707 

2708 def proxy_bypass_registry(host): 

2709 try: 

2710 import winreg 

2711 except ImportError: 

2712 # Std modules, so should be around - but you never know! 

2713 return 0 

2714 try: 

2715 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, 

2716 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') 

2717 proxyEnable = winreg.QueryValueEx(internetSettings, 

2718 'ProxyEnable')[0] 

2719 proxyOverride = str(winreg.QueryValueEx(internetSettings, 

2720 'ProxyOverride')[0]) 

2721 # ^^^^ Returned as Unicode but problems if not converted to ASCII 

2722 except OSError: 

2723 return 0 

2724 if not proxyEnable or not proxyOverride: 

2725 return 0 

2726 # try to make a host list from name and IP address. 

2727 rawHost, port = _splitport(host) 

2728 host = [rawHost] 

2729 try: 

2730 addr = socket.gethostbyname(rawHost) 

2731 if addr != rawHost: 

2732 host.append(addr) 

2733 except OSError: 

2734 pass 

2735 try: 

2736 fqdn = socket.getfqdn(rawHost) 

2737 if fqdn != rawHost: 

2738 host.append(fqdn) 

2739 except OSError: 

2740 pass 

2741 # make a check value list from the registry entry: replace the 

2742 # '<local>' string by the localhost entry and the corresponding 

2743 # canonical entry. 

2744 proxyOverride = proxyOverride.split(';') 

2745 # now check if we match one of the registry values. 

2746 for test in proxyOverride: 

2747 if test == '<local>': 

2748 if '.' not in rawHost: 

2749 return 1 

2750 test = test.replace(".", r"\.") # mask dots 

2751 test = test.replace("*", r".*") # change glob sequence 

2752 test = test.replace("?", r".") # change glob char 

2753 for val in host: 

2754 if re.match(test, val, re.I): 

2755 return 1 

2756 return 0 

2757 

2758 def proxy_bypass(host): 

2759 """Return True, if host should be bypassed. 

2760 

2761 Checks proxy settings gathered from the environment, if specified, 

2762 or the registry. 

2763 

2764 """ 

2765 proxies = getproxies_environment() 

2766 if proxies: 

2767 return proxy_bypass_environment(host, proxies) 

2768 else: 

2769 return proxy_bypass_registry(host) 

2770 

2771else: 

2772 # By default use environment variables 

2773 getproxies = getproxies_environment 

2774 proxy_bypass = proxy_bypass_environment