Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 20%

427 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 06:51 +0000

1from __future__ import absolute_import 

2 

3import io 

4import logging 

5import sys 

6import warnings 

7import zlib 

8from contextlib import contextmanager 

9from socket import error as SocketError 

10from socket import timeout as SocketTimeout 

11 

12try: 

13 try: 

14 import brotlicffi as brotli 

15 except ImportError: 

16 import brotli 

17except ImportError: 

18 brotli = None 

19 

20from . import util 

21from ._collections import HTTPHeaderDict 

22from .connection import BaseSSLError, HTTPException 

23from .exceptions import ( 

24 BodyNotHttplibCompatible, 

25 DecodeError, 

26 HTTPError, 

27 IncompleteRead, 

28 InvalidChunkLength, 

29 InvalidHeader, 

30 ProtocolError, 

31 ReadTimeoutError, 

32 ResponseNotChunked, 

33 SSLError, 

34) 

35from .packages import six 

36from .util.response import is_fp_closed, is_response_to_head 

37 

38log = logging.getLogger(__name__) 

39 

40 

41class DeflateDecoder(object): 

42 def __init__(self): 

43 self._first_try = True 

44 self._data = b"" 

45 self._obj = zlib.decompressobj() 

46 

47 def __getattr__(self, name): 

48 return getattr(self._obj, name) 

49 

50 def decompress(self, data): 

51 if not data: 

52 return data 

53 

54 if not self._first_try: 

55 return self._obj.decompress(data) 

56 

57 self._data += data 

58 try: 

59 decompressed = self._obj.decompress(data) 

60 if decompressed: 

61 self._first_try = False 

62 self._data = None 

63 return decompressed 

64 except zlib.error: 

65 self._first_try = False 

66 self._obj = zlib.decompressobj(-zlib.MAX_WBITS) 

67 try: 

68 return self.decompress(self._data) 

69 finally: 

70 self._data = None 

71 

72 

73class GzipDecoderState(object): 

74 

75 FIRST_MEMBER = 0 

76 OTHER_MEMBERS = 1 

77 SWALLOW_DATA = 2 

78 

79 

80class GzipDecoder(object): 

81 def __init__(self): 

82 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

83 self._state = GzipDecoderState.FIRST_MEMBER 

84 

85 def __getattr__(self, name): 

86 return getattr(self._obj, name) 

87 

88 def decompress(self, data): 

89 ret = bytearray() 

90 if self._state == GzipDecoderState.SWALLOW_DATA or not data: 

91 return bytes(ret) 

92 while True: 

93 try: 

94 ret += self._obj.decompress(data) 

95 except zlib.error: 

96 previous_state = self._state 

97 # Ignore data after the first error 

98 self._state = GzipDecoderState.SWALLOW_DATA 

99 if previous_state == GzipDecoderState.OTHER_MEMBERS: 

100 # Allow trailing garbage acceptable in other gzip clients 

101 return bytes(ret) 

102 raise 

103 data = self._obj.unused_data 

104 if not data: 

105 return bytes(ret) 

106 self._state = GzipDecoderState.OTHER_MEMBERS 

107 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

108 

109 

110if brotli is not None: 

111 

112 class BrotliDecoder(object): 

113 # Supports both 'brotlipy' and 'Brotli' packages 

114 # since they share an import name. The top branches 

115 # are for 'brotlipy' and bottom branches for 'Brotli' 

116 def __init__(self): 

117 self._obj = brotli.Decompressor() 

118 if hasattr(self._obj, "decompress"): 

119 self.decompress = self._obj.decompress 

120 else: 

121 self.decompress = self._obj.process 

122 

123 def flush(self): 

124 if hasattr(self._obj, "flush"): 

125 return self._obj.flush() 

126 return b"" 

127 

128 

129class MultiDecoder(object): 

130 """ 

131 From RFC7231: 

132 If one or more encodings have been applied to a representation, the 

133 sender that applied the encodings MUST generate a Content-Encoding 

134 header field that lists the content codings in the order in which 

135 they were applied. 

136 """ 

137 

138 def __init__(self, modes): 

139 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] 

140 

141 def flush(self): 

142 return self._decoders[0].flush() 

143 

144 def decompress(self, data): 

145 for d in reversed(self._decoders): 

146 data = d.decompress(data) 

147 return data 

148 

149 

150def _get_decoder(mode): 

151 if "," in mode: 

152 return MultiDecoder(mode) 

153 

154 if mode == "gzip": 

155 return GzipDecoder() 

156 

157 if brotli is not None and mode == "br": 

158 return BrotliDecoder() 

159 

160 return DeflateDecoder() 

161 

162 

163class HTTPResponse(io.IOBase): 

164 """ 

165 HTTP Response container. 

166 

167 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is 

168 loaded and decoded on-demand when the ``data`` property is accessed. This 

169 class is also compatible with the Python standard library's :mod:`io` 

170 module, and can hence be treated as a readable object in the context of that 

171 framework. 

172 

173 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: 

174 

175 :param preload_content: 

176 If True, the response's body will be preloaded during construction. 

177 

178 :param decode_content: 

179 If True, will attempt to decode the body based on the 

180 'content-encoding' header. 

181 

182 :param original_response: 

183 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` 

184 object, it's convenient to include the original for debug purposes. It's 

185 otherwise unused. 

186 

187 :param retries: 

188 The retries contains the last :class:`~urllib3.util.retry.Retry` that 

189 was used during the request. 

190 

191 :param enforce_content_length: 

192 Enforce content length checking. Body returned by server must match 

193 value of Content-Length header, if present. Otherwise, raise error. 

194 """ 

195 

196 CONTENT_DECODERS = ["gzip", "deflate"] 

197 if brotli is not None: 

198 CONTENT_DECODERS += ["br"] 

199 REDIRECT_STATUSES = [301, 302, 303, 307, 308] 

200 

201 def __init__( 

202 self, 

203 body="", 

204 headers=None, 

205 status=0, 

206 version=0, 

207 reason=None, 

208 strict=0, 

209 preload_content=True, 

210 decode_content=True, 

211 original_response=None, 

212 pool=None, 

213 connection=None, 

214 msg=None, 

215 retries=None, 

216 enforce_content_length=False, 

217 request_method=None, 

218 request_url=None, 

219 auto_close=True, 

220 ): 

221 

222 if isinstance(headers, HTTPHeaderDict): 

223 self.headers = headers 

224 else: 

225 self.headers = HTTPHeaderDict(headers) 

226 self.status = status 

227 self.version = version 

228 self.reason = reason 

229 self.strict = strict 

230 self.decode_content = decode_content 

231 self.retries = retries 

232 self.enforce_content_length = enforce_content_length 

233 self.auto_close = auto_close 

234 

235 self._decoder = None 

236 self._body = None 

237 self._fp = None 

238 self._original_response = original_response 

239 self._fp_bytes_read = 0 

240 self.msg = msg 

241 self._request_url = request_url 

242 

243 if body and isinstance(body, (six.string_types, bytes)): 

244 self._body = body 

245 

246 self._pool = pool 

247 self._connection = connection 

248 

249 if hasattr(body, "read"): 

250 self._fp = body 

251 

252 # Are we using the chunked-style of transfer encoding? 

253 self.chunked = False 

254 self.chunk_left = None 

255 tr_enc = self.headers.get("transfer-encoding", "").lower() 

256 # Don't incur the penalty of creating a list and then discarding it 

257 encodings = (enc.strip() for enc in tr_enc.split(",")) 

258 if "chunked" in encodings: 

259 self.chunked = True 

260 

261 # Determine length of response 

262 self.length_remaining = self._init_length(request_method) 

263 

264 # If requested, preload the body. 

265 if preload_content and not self._body: 

266 self._body = self.read(decode_content=decode_content) 

267 

268 def get_redirect_location(self): 

269 """ 

270 Should we redirect and where to? 

271 

272 :returns: Truthy redirect location string if we got a redirect status 

273 code and valid location. ``None`` if redirect status and no 

274 location. ``False`` if not a redirect status code. 

275 """ 

276 if self.status in self.REDIRECT_STATUSES: 

277 return self.headers.get("location") 

278 

279 return False 

280 

281 def release_conn(self): 

282 if not self._pool or not self._connection: 

283 return 

284 

285 self._pool._put_conn(self._connection) 

286 self._connection = None 

287 

288 def drain_conn(self): 

289 """ 

290 Read and discard any remaining HTTP response data in the response connection. 

291 

292 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. 

293 """ 

294 try: 

295 self.read() 

296 except (HTTPError, SocketError, BaseSSLError, HTTPException): 

297 pass 

298 

299 @property 

300 def data(self): 

301 # For backwards-compat with earlier urllib3 0.4 and earlier. 

302 if self._body: 

303 return self._body 

304 

305 if self._fp: 

306 return self.read(cache_content=True) 

307 

308 @property 

309 def connection(self): 

310 return self._connection 

311 

312 def isclosed(self): 

313 return is_fp_closed(self._fp) 

314 

315 def tell(self): 

316 """ 

317 Obtain the number of bytes pulled over the wire so far. May differ from 

318 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` 

319 if bytes are encoded on the wire (e.g, compressed). 

320 """ 

321 return self._fp_bytes_read 

322 

323 def _init_length(self, request_method): 

324 """ 

325 Set initial length value for Response content if available. 

326 """ 

327 length = self.headers.get("content-length") 

328 

329 if length is not None: 

330 if self.chunked: 

331 # This Response will fail with an IncompleteRead if it can't be 

332 # received as chunked. This method falls back to attempt reading 

333 # the response before raising an exception. 

334 log.warning( 

335 "Received response with both Content-Length and " 

336 "Transfer-Encoding set. This is expressly forbidden " 

337 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " 

338 "attempting to process response as Transfer-Encoding: " 

339 "chunked." 

340 ) 

341 return None 

342 

343 try: 

344 # RFC 7230 section 3.3.2 specifies multiple content lengths can 

345 # be sent in a single Content-Length header 

346 # (e.g. Content-Length: 42, 42). This line ensures the values 

347 # are all valid ints and that as long as the `set` length is 1, 

348 # all values are the same. Otherwise, the header is invalid. 

349 lengths = set([int(val) for val in length.split(",")]) 

350 if len(lengths) > 1: 

351 raise InvalidHeader( 

352 "Content-Length contained multiple " 

353 "unmatching values (%s)" % length 

354 ) 

355 length = lengths.pop() 

356 except ValueError: 

357 length = None 

358 else: 

359 if length < 0: 

360 length = None 

361 

362 # Convert status to int for comparison 

363 # In some cases, httplib returns a status of "_UNKNOWN" 

364 try: 

365 status = int(self.status) 

366 except ValueError: 

367 status = 0 

368 

369 # Check for responses that shouldn't include a body 

370 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD": 

371 length = 0 

372 

373 return length 

374 

375 def _init_decoder(self): 

376 """ 

377 Set-up the _decoder attribute if necessary. 

378 """ 

379 # Note: content-encoding value should be case-insensitive, per RFC 7230 

380 # Section 3.2 

381 content_encoding = self.headers.get("content-encoding", "").lower() 

382 if self._decoder is None: 

383 if content_encoding in self.CONTENT_DECODERS: 

384 self._decoder = _get_decoder(content_encoding) 

385 elif "," in content_encoding: 

386 encodings = [ 

387 e.strip() 

388 for e in content_encoding.split(",") 

389 if e.strip() in self.CONTENT_DECODERS 

390 ] 

391 if len(encodings): 

392 self._decoder = _get_decoder(content_encoding) 

393 

394 DECODER_ERROR_CLASSES = (IOError, zlib.error) 

395 if brotli is not None: 

396 DECODER_ERROR_CLASSES += (brotli.error,) 

397 

398 def _decode(self, data, decode_content, flush_decoder): 

399 """ 

400 Decode the data passed in and potentially flush the decoder. 

401 """ 

402 if not decode_content: 

403 return data 

404 

405 try: 

406 if self._decoder: 

407 data = self._decoder.decompress(data) 

408 except self.DECODER_ERROR_CLASSES as e: 

409 content_encoding = self.headers.get("content-encoding", "").lower() 

410 raise DecodeError( 

411 "Received response with content-encoding: %s, but " 

412 "failed to decode it." % content_encoding, 

413 e, 

414 ) 

415 if flush_decoder: 

416 data += self._flush_decoder() 

417 

418 return data 

419 

420 def _flush_decoder(self): 

421 """ 

422 Flushes the decoder. Should only be called if the decoder is actually 

423 being used. 

424 """ 

425 if self._decoder: 

426 buf = self._decoder.decompress(b"") 

427 return buf + self._decoder.flush() 

428 

429 return b"" 

430 

431 @contextmanager 

432 def _error_catcher(self): 

433 """ 

434 Catch low-level python exceptions, instead re-raising urllib3 

435 variants, so that low-level exceptions are not leaked in the 

436 high-level api. 

437 

438 On exit, release the connection back to the pool. 

439 """ 

440 clean_exit = False 

441 

442 try: 

443 try: 

444 yield 

445 

446 except SocketTimeout: 

447 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but 

448 # there is yet no clean way to get at it from this context. 

449 raise ReadTimeoutError(self._pool, None, "Read timed out.") 

450 

451 except BaseSSLError as e: 

452 # FIXME: Is there a better way to differentiate between SSLErrors? 

453 if "read operation timed out" not in str(e): 

454 # SSL errors related to framing/MAC get wrapped and reraised here 

455 raise SSLError(e) 

456 

457 raise ReadTimeoutError(self._pool, None, "Read timed out.") 

458 

459 except (HTTPException, SocketError) as e: 

460 # This includes IncompleteRead. 

461 raise ProtocolError("Connection broken: %r" % e, e) 

462 

463 # If no exception is thrown, we should avoid cleaning up 

464 # unnecessarily. 

465 clean_exit = True 

466 finally: 

467 # If we didn't terminate cleanly, we need to throw away our 

468 # connection. 

469 if not clean_exit: 

470 # The response may not be closed but we're not going to use it 

471 # anymore so close it now to ensure that the connection is 

472 # released back to the pool. 

473 if self._original_response: 

474 self._original_response.close() 

475 

476 # Closing the response may not actually be sufficient to close 

477 # everything, so if we have a hold of the connection close that 

478 # too. 

479 if self._connection: 

480 self._connection.close() 

481 

482 # If we hold the original response but it's closed now, we should 

483 # return the connection back to the pool. 

484 if self._original_response and self._original_response.isclosed(): 

485 self.release_conn() 

486 

487 def _fp_read(self, amt): 

488 """ 

489 Read a response with the thought that reading the number of bytes 

490 larger than can fit in a 32-bit int at a time via SSL in some 

491 known cases leads to an overflow error that has to be prevented 

492 if `amt` or `self.length_remaining` indicate that a problem may 

493 happen. 

494 

495 The known cases: 

496 * 3.8 <= CPython < 3.9.7 because of a bug 

497 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. 

498 * urllib3 injected with pyOpenSSL-backed SSL-support. 

499 * CPython < 3.10 only when `amt` does not fit 32-bit int. 

500 """ 

501 assert self._fp 

502 c_int_max = 2 ** 31 - 1 

503 if ( 

504 ( 

505 (amt and amt > c_int_max) 

506 or (self.length_remaining and self.length_remaining > c_int_max) 

507 ) 

508 and not util.IS_SECURETRANSPORT 

509 and (util.IS_PYOPENSSL or sys.version_info < (3, 10)) 

510 ): 

511 buffer = io.BytesIO() 

512 # Besides `max_chunk_amt` being a maximum chunk size, it 

513 # affects memory overhead of reading a response by this 

514 # method in CPython. 

515 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum 

516 # chunk size that does not lead to an overflow error, but 

517 # 256 MiB is a compromise. 

518 max_chunk_amt = 2 ** 28 

519 while amt is None or amt != 0: 

520 if amt is not None: 

521 chunk_amt = min(amt, max_chunk_amt) 

522 amt -= chunk_amt 

523 else: 

524 chunk_amt = max_chunk_amt 

525 data = self._fp.read(chunk_amt) 

526 if not data: 

527 break 

528 buffer.write(data) 

529 del data # to reduce peak memory usage by `max_chunk_amt`. 

530 return buffer.getvalue() 

531 else: 

532 # StringIO doesn't like amt=None 

533 return self._fp.read(amt) if amt is not None else self._fp.read() 

534 

535 def read(self, amt=None, decode_content=None, cache_content=False): 

536 """ 

537 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional 

538 parameters: ``decode_content`` and ``cache_content``. 

539 

540 :param amt: 

541 How much of the content to read. If specified, caching is skipped 

542 because it doesn't make sense to cache partial content as the full 

543 response. 

544 

545 :param decode_content: 

546 If True, will attempt to decode the body based on the 

547 'content-encoding' header. 

548 

549 :param cache_content: 

550 If True, will save the returned data such that the same result is 

551 returned despite of the state of the underlying file object. This 

552 is useful if you want the ``.data`` property to continue working 

553 after having ``.read()`` the file object. (Overridden if ``amt`` is 

554 set.) 

555 """ 

556 self._init_decoder() 

557 if decode_content is None: 

558 decode_content = self.decode_content 

559 

560 if self._fp is None: 

561 return 

562 

563 flush_decoder = False 

564 fp_closed = getattr(self._fp, "closed", False) 

565 

566 with self._error_catcher(): 

567 data = self._fp_read(amt) if not fp_closed else b"" 

568 if amt is None: 

569 flush_decoder = True 

570 else: 

571 cache_content = False 

572 if ( 

573 amt != 0 and not data 

574 ): # Platform-specific: Buggy versions of Python. 

575 # Close the connection when no data is returned 

576 # 

577 # This is redundant to what httplib/http.client _should_ 

578 # already do. However, versions of python released before 

579 # December 15, 2012 (http://bugs.python.org/issue16298) do 

580 # not properly close the connection in all cases. There is 

581 # no harm in redundantly calling close. 

582 self._fp.close() 

583 flush_decoder = True 

584 if self.enforce_content_length and self.length_remaining not in ( 

585 0, 

586 None, 

587 ): 

588 # This is an edge case that httplib failed to cover due 

589 # to concerns of backward compatibility. We're 

590 # addressing it here to make sure IncompleteRead is 

591 # raised during streaming, so all calls with incorrect 

592 # Content-Length are caught. 

593 raise IncompleteRead(self._fp_bytes_read, self.length_remaining) 

594 

595 if data: 

596 self._fp_bytes_read += len(data) 

597 if self.length_remaining is not None: 

598 self.length_remaining -= len(data) 

599 

600 data = self._decode(data, decode_content, flush_decoder) 

601 

602 if cache_content: 

603 self._body = data 

604 

605 return data 

606 

607 def stream(self, amt=2 ** 16, decode_content=None): 

608 """ 

609 A generator wrapper for the read() method. A call will block until 

610 ``amt`` bytes have been read from the connection or until the 

611 connection is closed. 

612 

613 :param amt: 

614 How much of the content to read. The generator will return up to 

615 much data per iteration, but may return less. This is particularly 

616 likely when using compressed data. However, the empty string will 

617 never be returned. 

618 

619 :param decode_content: 

620 If True, will attempt to decode the body based on the 

621 'content-encoding' header. 

622 """ 

623 if self.chunked and self.supports_chunked_reads(): 

624 for line in self.read_chunked(amt, decode_content=decode_content): 

625 yield line 

626 else: 

627 while not is_fp_closed(self._fp): 

628 data = self.read(amt=amt, decode_content=decode_content) 

629 

630 if data: 

631 yield data 

632 

633 @classmethod 

634 def from_httplib(ResponseCls, r, **response_kw): 

635 """ 

636 Given an :class:`http.client.HTTPResponse` instance ``r``, return a 

637 corresponding :class:`urllib3.response.HTTPResponse` object. 

638 

639 Remaining parameters are passed to the HTTPResponse constructor, along 

640 with ``original_response=r``. 

641 """ 

642 headers = r.msg 

643 

644 if not isinstance(headers, HTTPHeaderDict): 

645 if six.PY2: 

646 # Python 2.7 

647 headers = HTTPHeaderDict.from_httplib(headers) 

648 else: 

649 headers = HTTPHeaderDict(headers.items()) 

650 

651 # HTTPResponse objects in Python 3 don't have a .strict attribute 

652 strict = getattr(r, "strict", 0) 

653 resp = ResponseCls( 

654 body=r, 

655 headers=headers, 

656 status=r.status, 

657 version=r.version, 

658 reason=r.reason, 

659 strict=strict, 

660 original_response=r, 

661 **response_kw 

662 ) 

663 return resp 

664 

665 # Backwards-compatibility methods for http.client.HTTPResponse 

666 def getheaders(self): 

667 warnings.warn( 

668 "HTTPResponse.getheaders() is deprecated and will be removed " 

669 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.", 

670 category=DeprecationWarning, 

671 stacklevel=2, 

672 ) 

673 return self.headers 

674 

675 def getheader(self, name, default=None): 

676 warnings.warn( 

677 "HTTPResponse.getheader() is deprecated and will be removed " 

678 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).", 

679 category=DeprecationWarning, 

680 stacklevel=2, 

681 ) 

682 return self.headers.get(name, default) 

683 

684 # Backwards compatibility for http.cookiejar 

685 def info(self): 

686 return self.headers 

687 

688 # Overrides from io.IOBase 

689 def close(self): 

690 if not self.closed: 

691 self._fp.close() 

692 

693 if self._connection: 

694 self._connection.close() 

695 

696 if not self.auto_close: 

697 io.IOBase.close(self) 

698 

699 @property 

700 def closed(self): 

701 if not self.auto_close: 

702 return io.IOBase.closed.__get__(self) 

703 elif self._fp is None: 

704 return True 

705 elif hasattr(self._fp, "isclosed"): 

706 return self._fp.isclosed() 

707 elif hasattr(self._fp, "closed"): 

708 return self._fp.closed 

709 else: 

710 return True 

711 

712 def fileno(self): 

713 if self._fp is None: 

714 raise IOError("HTTPResponse has no file to get a fileno from") 

715 elif hasattr(self._fp, "fileno"): 

716 return self._fp.fileno() 

717 else: 

718 raise IOError( 

719 "The file-like object this HTTPResponse is wrapped " 

720 "around has no file descriptor" 

721 ) 

722 

723 def flush(self): 

724 if ( 

725 self._fp is not None 

726 and hasattr(self._fp, "flush") 

727 and not getattr(self._fp, "closed", False) 

728 ): 

729 return self._fp.flush() 

730 

731 def readable(self): 

732 # This method is required for `io` module compatibility. 

733 return True 

734 

735 def readinto(self, b): 

736 # This method is required for `io` module compatibility. 

737 temp = self.read(len(b)) 

738 if len(temp) == 0: 

739 return 0 

740 else: 

741 b[: len(temp)] = temp 

742 return len(temp) 

743 

744 def supports_chunked_reads(self): 

745 """ 

746 Checks if the underlying file-like object looks like a 

747 :class:`http.client.HTTPResponse` object. We do this by testing for 

748 the fp attribute. If it is present we assume it returns raw chunks as 

749 processed by read_chunked(). 

750 """ 

751 return hasattr(self._fp, "fp") 

752 

753 def _update_chunk_length(self): 

754 # First, we'll figure out length of a chunk and then 

755 # we'll try to read it from socket. 

756 if self.chunk_left is not None: 

757 return 

758 line = self._fp.fp.readline() 

759 line = line.split(b";", 1)[0] 

760 try: 

761 self.chunk_left = int(line, 16) 

762 except ValueError: 

763 # Invalid chunked protocol response, abort. 

764 self.close() 

765 raise InvalidChunkLength(self, line) 

766 

767 def _handle_chunk(self, amt): 

768 returned_chunk = None 

769 if amt is None: 

770 chunk = self._fp._safe_read(self.chunk_left) 

771 returned_chunk = chunk 

772 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. 

773 self.chunk_left = None 

774 elif amt < self.chunk_left: 

775 value = self._fp._safe_read(amt) 

776 self.chunk_left = self.chunk_left - amt 

777 returned_chunk = value 

778 elif amt == self.chunk_left: 

779 value = self._fp._safe_read(amt) 

780 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. 

781 self.chunk_left = None 

782 returned_chunk = value 

783 else: # amt > self.chunk_left 

784 returned_chunk = self._fp._safe_read(self.chunk_left) 

785 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk. 

786 self.chunk_left = None 

787 return returned_chunk 

788 

789 def read_chunked(self, amt=None, decode_content=None): 

790 """ 

791 Similar to :meth:`HTTPResponse.read`, but with an additional 

792 parameter: ``decode_content``. 

793 

794 :param amt: 

795 How much of the content to read. If specified, caching is skipped 

796 because it doesn't make sense to cache partial content as the full 

797 response. 

798 

799 :param decode_content: 

800 If True, will attempt to decode the body based on the 

801 'content-encoding' header. 

802 """ 

803 self._init_decoder() 

804 # FIXME: Rewrite this method and make it a class with a better structured logic. 

805 if not self.chunked: 

806 raise ResponseNotChunked( 

807 "Response is not chunked. " 

808 "Header 'transfer-encoding: chunked' is missing." 

809 ) 

810 if not self.supports_chunked_reads(): 

811 raise BodyNotHttplibCompatible( 

812 "Body should be http.client.HTTPResponse like. " 

813 "It should have have an fp attribute which returns raw chunks." 

814 ) 

815 

816 with self._error_catcher(): 

817 # Don't bother reading the body of a HEAD request. 

818 if self._original_response and is_response_to_head(self._original_response): 

819 self._original_response.close() 

820 return 

821 

822 # If a response is already read and closed 

823 # then return immediately. 

824 if self._fp.fp is None: 

825 return 

826 

827 while True: 

828 self._update_chunk_length() 

829 if self.chunk_left == 0: 

830 break 

831 chunk = self._handle_chunk(amt) 

832 decoded = self._decode( 

833 chunk, decode_content=decode_content, flush_decoder=False 

834 ) 

835 if decoded: 

836 yield decoded 

837 

838 if decode_content: 

839 # On CPython and PyPy, we should never need to flush the 

840 # decoder. However, on Jython we *might* need to, so 

841 # lets defensively do it anyway. 

842 decoded = self._flush_decoder() 

843 if decoded: # Platform-specific: Jython. 

844 yield decoded 

845 

846 # Chunk content ends with \r\n: discard it. 

847 while True: 

848 line = self._fp.fp.readline() 

849 if not line: 

850 # Some sites may not end with '\r\n'. 

851 break 

852 if line == b"\r\n": 

853 break 

854 

855 # We read everything; close the "file". 

856 if self._original_response: 

857 self._original_response.close() 

858 

859 def geturl(self): 

860 """ 

861 Returns the URL that was the source of this response. 

862 If the request that generated this response redirected, this method 

863 will return the final redirect location. 

864 """ 

865 if self.retries is not None and len(self.retries.history): 

866 return self.retries.history[-1].redirect_location 

867 else: 

868 return self._request_url 

869 

870 def __iter__(self): 

871 buffer = [] 

872 for chunk in self.stream(decode_content=True): 

873 if b"\n" in chunk: 

874 chunk = chunk.split(b"\n") 

875 yield b"".join(buffer) + chunk[0] + b"\n" 

876 for x in chunk[1:-1]: 

877 yield x + b"\n" 

878 if chunk[-1]: 

879 buffer = [chunk[-1]] 

880 else: 

881 buffer = [] 

882 else: 

883 buffer.append(chunk) 

884 if buffer: 

885 yield b"".join(buffer)