Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/storage/_media/requests/download.py: 14%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

176 statements  

1# Copyright 2017 Google Inc. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Support for downloading media from Google APIs.""" 

16 

17import urllib3.response # type: ignore 

18import http 

19 

20from google.cloud.storage._media import _download 

21from google.cloud.storage._media import _helpers 

22from google.cloud.storage._media.requests import _request_helpers 

23from google.cloud.storage.exceptions import DataCorruption 

24 

25_CHECKSUM_MISMATCH = """\ 

26Checksum mismatch while downloading: 

27 

28 {} 

29 

30The X-Goog-Hash header indicated an {checksum_type} checksum of: 

31 

32 {} 

33 

34but the actual {checksum_type} checksum of the downloaded contents was: 

35 

36 {} 

37""" 

38 

39_STREAM_SEEK_ERROR = """\ 

40Incomplete download for: 

41{} 

42Error writing to stream while handling a gzip-compressed file download. 

43Please restart the download. 

44""" 

45 

46_RESPONSE_HEADERS_INFO = """\ 

47The X-Goog-Stored-Content-Length is {}. The X-Goog-Stored-Content-Encoding is {}. 

48The download request read {} bytes of data. 

49If the download was incomplete, please check the network connection and restart the download. 

50""" 

51 

52 

53class Download(_request_helpers.RequestsMixin, _download.Download): 

54 """Helper to manage downloading a resource from a Google API. 

55 

56 "Slices" of the resource can be retrieved by specifying a range 

57 with ``start`` and / or ``end``. However, in typical usage, neither 

58 ``start`` nor ``end`` is expected to be provided. 

59 

60 Args: 

61 media_url (str): The URL containing the media to be downloaded. 

62 stream (IO[bytes]): A write-able stream (i.e. file-like object) that 

63 the downloaded resource can be written to. 

64 start (int): The first byte in a range to be downloaded. If not 

65 provided, but ``end`` is provided, will download from the 

66 beginning to ``end`` of the media. 

67 end (int): The last byte in a range to be downloaded. If not 

68 provided, but ``start`` is provided, will download from the 

69 ``start`` to the end of the media. 

70 headers (Optional[Mapping[str, str]]): Extra headers that should 

71 be sent with the request, e.g. headers for encrypted data. 

72 checksum Optional([str]): The type of checksum to compute to verify 

73 the integrity of the object. The response headers must contain 

74 a checksum of the requested type. If the headers lack an 

75 appropriate checksum (for instance in the case of transcoded or 

76 ranged downloads where the remote service does not know the 

77 correct checksum) an INFO-level log will be emitted. Supported 

78 values are "md5", "crc32c", "auto" and None. The default is "auto", 

79 which will try to detect if the C extension for crc32c is installed 

80 and fall back to md5 otherwise. 

81 retry (Optional[google.api_core.retry.Retry]): How to retry the 

82 RPC. A None value will disable retries. A 

83 google.api_core.retry.Retry value will enable retries, and the 

84 object will configure backoff and timeout options. 

85 

86 See the retry.py source code and docstrings in this package 

87 (google.cloud.storage.retry) for information on retry types and how 

88 to configure them. 

89 

90 Attributes: 

91 media_url (str): The URL containing the media to be downloaded. 

92 start (Optional[int]): The first byte in a range to be downloaded. 

93 end (Optional[int]): The last byte in a range to be downloaded. 

94 """ 

95 

96 def _write_to_stream(self, response): 

97 """Write response body to a write-able stream. 

98 

99 .. note: 

100 

101 This method assumes that the ``_stream`` attribute is set on the 

102 current download. 

103 

104 Args: 

105 response (~requests.Response): The HTTP response object. 

106 

107 Raises: 

108 ~google.cloud.storage.exceptions.DataCorruption: If the download's 

109 checksum doesn't agree with server-computed checksum. 

110 """ 

111 

112 # Retrieve the expected checksum only once for the download request, 

113 # then compute and validate the checksum when the full download completes. 

114 # Retried requests are range requests, and there's no way to detect 

115 # data corruption for that byte range alone. 

116 if self._expected_checksum is None and self._checksum_object is None: 

117 # `_get_expected_checksum()` may return None even if a checksum was 

118 # requested, in which case it will emit an info log _MISSING_CHECKSUM. 

119 # If an invalid checksum type is specified, this will raise ValueError. 

120 expected_checksum, checksum_object = _helpers._get_expected_checksum( 

121 response, self._get_headers, self.media_url, checksum_type=self.checksum 

122 ) 

123 self._expected_checksum = expected_checksum 

124 self._checksum_object = checksum_object 

125 else: 

126 expected_checksum = self._expected_checksum 

127 checksum_object = self._checksum_object 

128 

129 with response: 

130 # NOTE: In order to handle compressed streams gracefully, we try 

131 # to insert our checksum object into the decompression stream. If 

132 # the stream is indeed compressed, this will delegate the checksum 

133 # object to the decoder and return a _DoNothingHash here. 

134 local_checksum_object = _add_decoder(response.raw, checksum_object) 

135 

136 # This is useful for smaller files, or when the user wants to 

137 # download the entire file in one go. 

138 if self.single_shot_download: 

139 content = response.raw.read(decode_content=True) 

140 self._stream.write(content) 

141 self._bytes_downloaded += len(content) 

142 local_checksum_object.update(content) 

143 response._content_consumed = True 

144 else: 

145 body_iter = response.iter_content( 

146 chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE, 

147 decode_unicode=False, 

148 ) 

149 for chunk in body_iter: 

150 self._stream.write(chunk) 

151 self._bytes_downloaded += len(chunk) 

152 local_checksum_object.update(chunk) 

153 

154 # Don't validate the checksum for partial responses. 

155 if ( 

156 expected_checksum is not None 

157 and response.status_code != http.client.PARTIAL_CONTENT 

158 ): 

159 actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest()) 

160 if actual_checksum != expected_checksum: 

161 headers = self._get_headers(response) 

162 x_goog_encoding = headers.get("x-goog-stored-content-encoding") 

163 x_goog_length = headers.get("x-goog-stored-content-length") 

164 content_length_msg = _RESPONSE_HEADERS_INFO.format( 

165 x_goog_length, x_goog_encoding, self._bytes_downloaded 

166 ) 

167 if ( 

168 x_goog_length 

169 and self._bytes_downloaded < int(x_goog_length) 

170 and x_goog_encoding != "gzip" 

171 ): 

172 # The library will attempt to trigger a retry by raising a ConnectionError, if 

173 # (a) bytes_downloaded is less than response header x-goog-stored-content-length, and 

174 # (b) the object is not gzip-compressed when stored in Cloud Storage. 

175 raise ConnectionError(content_length_msg) 

176 else: 

177 msg = _CHECKSUM_MISMATCH.format( 

178 self.media_url, 

179 expected_checksum, 

180 actual_checksum, 

181 checksum_type=self.checksum.upper(), 

182 ) 

183 msg += content_length_msg 

184 raise DataCorruption(response, msg) 

185 

186 def consume( 

187 self, 

188 transport, 

189 timeout=( 

190 _request_helpers._DEFAULT_CONNECT_TIMEOUT, 

191 _request_helpers._DEFAULT_READ_TIMEOUT, 

192 ), 

193 ): 

194 """Consume the resource to be downloaded. 

195 

196 If a ``stream`` is attached to this download, then the downloaded 

197 resource will be written to the stream. 

198 

199 Args: 

200 transport (~requests.Session): A ``requests`` object which can 

201 make authenticated requests. 

202 timeout (Optional[Union[float, Tuple[float, float]]]): 

203 The number of seconds to wait for the server response. 

204 Depending on the retry strategy, a request may be repeated 

205 several times using the same timeout each time. 

206 

207 Can also be passed as a tuple (connect_timeout, read_timeout). 

208 See :meth:`requests.Session.request` documentation for details. 

209 

210 Returns: 

211 ~requests.Response: The HTTP response returned by ``transport``. 

212 

213 Raises: 

214 ~google.cloud.storage.exceptions.DataCorruption: If the download's 

215 checksum doesn't agree with server-computed checksum. 

216 ValueError: If the current :class:`Download` has already 

217 finished. 

218 """ 

219 method, _, payload, headers = self._prepare_request() 

220 # NOTE: We assume "payload is None" but pass it along anyway. 

221 request_kwargs = { 

222 "data": payload, 

223 "headers": headers, 

224 "timeout": timeout, 

225 } 

226 if self._stream is not None: 

227 request_kwargs["stream"] = True 

228 

229 # Assign object generation if generation is specified in the media url. 

230 if self._object_generation is None: 

231 self._object_generation = _helpers._get_generation_from_url(self.media_url) 

232 

233 # Wrap the request business logic in a function to be retried. 

234 def retriable_request(): 

235 url = self.media_url 

236 

237 # To restart an interrupted download, read from the offset of last byte 

238 # received using a range request, and set object generation query param. 

239 if self._bytes_downloaded > 0: 

240 _download.add_bytes_range( 

241 (self.start or 0) + self._bytes_downloaded, self.end, self._headers 

242 ) 

243 request_kwargs["headers"] = self._headers 

244 

245 # Set object generation query param to ensure the same object content is requested. 

246 if ( 

247 self._object_generation is not None 

248 and _helpers._get_generation_from_url(self.media_url) is None 

249 ): 

250 query_param = {"generation": self._object_generation} 

251 url = _helpers.add_query_parameters(self.media_url, query_param) 

252 

253 result = transport.request(method, url, **request_kwargs) 

254 

255 # If a generation hasn't been specified, and this is the first response we get, let's record the 

256 # generation. In future requests we'll specify the generation query param to avoid data races. 

257 if self._object_generation is None: 

258 self._object_generation = _helpers._parse_generation_header( 

259 result, self._get_headers 

260 ) 

261 

262 self._process_response(result) 

263 

264 # With decompressive transcoding, GCS serves back the whole file regardless of the range request, 

265 # thus we reset the stream position to the start of the stream. 

266 # See: https://cloud.google.com/storage/docs/transcoding#range 

267 if self._stream is not None: 

268 if _helpers._is_decompressive_transcoding(result, self._get_headers): 

269 try: 

270 self._stream.seek(0) 

271 except Exception as exc: 

272 msg = _STREAM_SEEK_ERROR.format(url) 

273 raise Exception(msg) from exc 

274 self._bytes_downloaded = 0 

275 

276 self._write_to_stream(result) 

277 

278 return result 

279 

280 return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) 

281 

282 

283class RawDownload(_request_helpers.RawRequestsMixin, _download.Download): 

284 """Helper to manage downloading a raw resource from a Google API. 

285 

286 "Slices" of the resource can be retrieved by specifying a range 

287 with ``start`` and / or ``end``. However, in typical usage, neither 

288 ``start`` nor ``end`` is expected to be provided. 

289 

290 Args: 

291 media_url (str): The URL containing the media to be downloaded. 

292 stream (IO[bytes]): A write-able stream (i.e. file-like object) that 

293 the downloaded resource can be written to. 

294 start (int): The first byte in a range to be downloaded. If not 

295 provided, but ``end`` is provided, will download from the 

296 beginning to ``end`` of the media. 

297 end (int): The last byte in a range to be downloaded. If not 

298 provided, but ``start`` is provided, will download from the 

299 ``start`` to the end of the media. 

300 headers (Optional[Mapping[str, str]]): Extra headers that should 

301 be sent with the request, e.g. headers for encrypted data. 

302 checksum Optional([str]): The type of checksum to compute to verify 

303 the integrity of the object. The response headers must contain 

304 a checksum of the requested type. If the headers lack an 

305 appropriate checksum (for instance in the case of transcoded or 

306 ranged downloads where the remote service does not know the 

307 correct checksum) an INFO-level log will be emitted. Supported 

308 values are "md5", "crc32c", "auto" and None. The default is "auto", 

309 which will try to detect if the C extension for crc32c is installed 

310 and fall back to md5 otherwise. 

311 retry (Optional[google.api_core.retry.Retry]): How to retry the 

312 RPC. A None value will disable retries. A 

313 google.api_core.retry.Retry value will enable retries, and the 

314 object will configure backoff and timeout options. 

315 

316 See the retry.py source code and docstrings in this package 

317 (google.cloud.storage.retry) for information on retry types and how 

318 to configure them. 

319 

320 Attributes: 

321 media_url (str): The URL containing the media to be downloaded. 

322 start (Optional[int]): The first byte in a range to be downloaded. 

323 end (Optional[int]): The last byte in a range to be downloaded. 

324 """ 

325 

326 def _write_to_stream(self, response): 

327 """Write response body to a write-able stream. 

328 

329 .. note: 

330 

331 This method assumes that the ``_stream`` attribute is set on the 

332 current download. 

333 

334 Args: 

335 response (~requests.Response): The HTTP response object. 

336 

337 Raises: 

338 ~google.cloud.storage.exceptions.DataCorruption: If the download's 

339 checksum doesn't agree with server-computed checksum. 

340 """ 

341 # Retrieve the expected checksum only once for the download request, 

342 # then compute and validate the checksum when the full download completes. 

343 # Retried requests are range requests, and there's no way to detect 

344 # data corruption for that byte range alone. 

345 if self._expected_checksum is None and self._checksum_object is None: 

346 # `_get_expected_checksum()` may return None even if a checksum was 

347 # requested, in which case it will emit an info log _MISSING_CHECKSUM. 

348 # If an invalid checksum type is specified, this will raise ValueError. 

349 expected_checksum, checksum_object = _helpers._get_expected_checksum( 

350 response, self._get_headers, self.media_url, checksum_type=self.checksum 

351 ) 

352 self._expected_checksum = expected_checksum 

353 self._checksum_object = checksum_object 

354 else: 

355 expected_checksum = self._expected_checksum 

356 checksum_object = self._checksum_object 

357 

358 with response: 

359 # This is useful for smaller files, or when the user wants to 

360 # download the entire file in one go. 

361 if self.single_shot_download: 

362 content = response.raw.read() 

363 self._stream.write(content) 

364 self._bytes_downloaded += len(content) 

365 checksum_object.update(content) 

366 else: 

367 body_iter = response.raw.stream( 

368 _request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False 

369 ) 

370 for chunk in body_iter: 

371 self._stream.write(chunk) 

372 self._bytes_downloaded += len(chunk) 

373 checksum_object.update(chunk) 

374 response._content_consumed = True 

375 

376 # Don't validate the checksum for partial responses. 

377 if ( 

378 expected_checksum is not None 

379 and response.status_code != http.client.PARTIAL_CONTENT 

380 ): 

381 actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest()) 

382 

383 if actual_checksum != expected_checksum: 

384 headers = self._get_headers(response) 

385 x_goog_encoding = headers.get("x-goog-stored-content-encoding") 

386 x_goog_length = headers.get("x-goog-stored-content-length") 

387 content_length_msg = _RESPONSE_HEADERS_INFO.format( 

388 x_goog_length, x_goog_encoding, self._bytes_downloaded 

389 ) 

390 if ( 

391 x_goog_length 

392 and self._bytes_downloaded < int(x_goog_length) 

393 and x_goog_encoding != "gzip" 

394 ): 

395 # The library will attempt to trigger a retry by raising a ConnectionError, if 

396 # (a) bytes_downloaded is less than response header x-goog-stored-content-length, and 

397 # (b) the object is not gzip-compressed when stored in Cloud Storage. 

398 raise ConnectionError(content_length_msg) 

399 else: 

400 msg = _CHECKSUM_MISMATCH.format( 

401 self.media_url, 

402 expected_checksum, 

403 actual_checksum, 

404 checksum_type=self.checksum.upper(), 

405 ) 

406 msg += content_length_msg 

407 raise DataCorruption(response, msg) 

408 

409 def consume( 

410 self, 

411 transport, 

412 timeout=( 

413 _request_helpers._DEFAULT_CONNECT_TIMEOUT, 

414 _request_helpers._DEFAULT_READ_TIMEOUT, 

415 ), 

416 ): 

417 """Consume the resource to be downloaded. 

418 

419 If a ``stream`` is attached to this download, then the downloaded 

420 resource will be written to the stream. 

421 

422 Args: 

423 transport (~requests.Session): A ``requests`` object which can 

424 make authenticated requests. 

425 timeout (Optional[Union[float, Tuple[float, float]]]): 

426 The number of seconds to wait for the server response. 

427 Depending on the retry strategy, a request may be repeated 

428 several times using the same timeout each time. 

429 

430 Can also be passed as a tuple (connect_timeout, read_timeout). 

431 See :meth:`requests.Session.request` documentation for details. 

432 

433 Returns: 

434 ~requests.Response: The HTTP response returned by ``transport``. 

435 

436 Raises: 

437 ~google.cloud.storage.exceptions.DataCorruption: If the download's 

438 checksum doesn't agree with server-computed checksum. 

439 ValueError: If the current :class:`Download` has already 

440 finished. 

441 """ 

442 method, _, payload, headers = self._prepare_request() 

443 # NOTE: We assume "payload is None" but pass it along anyway. 

444 request_kwargs = { 

445 "data": payload, 

446 "headers": headers, 

447 "timeout": timeout, 

448 "stream": True, 

449 } 

450 

451 # Assign object generation if generation is specified in the media url. 

452 if self._object_generation is None: 

453 self._object_generation = _helpers._get_generation_from_url(self.media_url) 

454 

455 # Wrap the request business logic in a function to be retried. 

456 def retriable_request(): 

457 url = self.media_url 

458 

459 # To restart an interrupted download, read from the offset of last byte 

460 # received using a range request, and set object generation query param. 

461 if self._bytes_downloaded > 0: 

462 _download.add_bytes_range( 

463 (self.start or 0) + self._bytes_downloaded, self.end, self._headers 

464 ) 

465 request_kwargs["headers"] = self._headers 

466 

467 # Set object generation query param to ensure the same object content is requested. 

468 if ( 

469 self._object_generation is not None 

470 and _helpers._get_generation_from_url(self.media_url) is None 

471 ): 

472 query_param = {"generation": self._object_generation} 

473 url = _helpers.add_query_parameters(self.media_url, query_param) 

474 

475 result = transport.request(method, url, **request_kwargs) 

476 

477 # If a generation hasn't been specified, and this is the first response we get, let's record the 

478 # generation. In future requests we'll specify the generation query param to avoid data races. 

479 if self._object_generation is None: 

480 self._object_generation = _helpers._parse_generation_header( 

481 result, self._get_headers 

482 ) 

483 

484 self._process_response(result) 

485 

486 # With decompressive transcoding, GCS serves back the whole file regardless of the range request, 

487 # thus we reset the stream position to the start of the stream. 

488 # See: https://cloud.google.com/storage/docs/transcoding#range 

489 if self._stream is not None: 

490 if _helpers._is_decompressive_transcoding(result, self._get_headers): 

491 try: 

492 self._stream.seek(0) 

493 except Exception as exc: 

494 msg = _STREAM_SEEK_ERROR.format(url) 

495 raise Exception(msg) from exc 

496 self._bytes_downloaded = 0 

497 

498 self._write_to_stream(result) 

499 

500 return result 

501 

502 return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) 

503 

504 

505class ChunkedDownload(_request_helpers.RequestsMixin, _download.ChunkedDownload): 

506 """Download a resource in chunks from a Google API. 

507 

508 Args: 

509 media_url (str): The URL containing the media to be downloaded. 

510 chunk_size (int): The number of bytes to be retrieved in each 

511 request. 

512 stream (IO[bytes]): A write-able stream (i.e. file-like object) that 

513 will be used to concatenate chunks of the resource as they are 

514 downloaded. 

515 start (int): The first byte in a range to be downloaded. If not 

516 provided, defaults to ``0``. 

517 end (int): The last byte in a range to be downloaded. If not 

518 provided, will download to the end of the media. 

519 headers (Optional[Mapping[str, str]]): Extra headers that should 

520 be sent with each request, e.g. headers for data encryption 

521 key headers. 

522 retry (Optional[google.api_core.retry.Retry]): How to retry the 

523 RPC. A None value will disable retries. A 

524 google.api_core.retry.Retry value will enable retries, and the 

525 object will configure backoff and timeout options. 

526 

527 See the retry.py source code and docstrings in this package 

528 (google.cloud.storage.retry) for information on retry types and how 

529 to configure them. 

530 

531 Attributes: 

532 media_url (str): The URL containing the media to be downloaded. 

533 start (Optional[int]): The first byte in a range to be downloaded. 

534 end (Optional[int]): The last byte in a range to be downloaded. 

535 chunk_size (int): The number of bytes to be retrieved in each request. 

536 

537 Raises: 

538 ValueError: If ``start`` is negative. 

539 """ 

540 

541 def consume_next_chunk( 

542 self, 

543 transport, 

544 timeout=( 

545 _request_helpers._DEFAULT_CONNECT_TIMEOUT, 

546 _request_helpers._DEFAULT_READ_TIMEOUT, 

547 ), 

548 ): 

549 """Consume the next chunk of the resource to be downloaded. 

550 

551 Args: 

552 transport (~requests.Session): A ``requests`` object which can 

553 make authenticated requests. 

554 timeout (Optional[Union[float, Tuple[float, float]]]): 

555 The number of seconds to wait for the server response. 

556 Depending on the retry strategy, a request may be repeated 

557 several times using the same timeout each time. 

558 

559 Can also be passed as a tuple (connect_timeout, read_timeout). 

560 See :meth:`requests.Session.request` documentation for details. 

561 

562 Returns: 

563 ~requests.Response: The HTTP response returned by ``transport``. 

564 

565 Raises: 

566 ValueError: If the current download has finished. 

567 """ 

568 method, url, payload, headers = self._prepare_request() 

569 

570 # Wrap the request business logic in a function to be retried. 

571 def retriable_request(): 

572 # NOTE: We assume "payload is None" but pass it along anyway. 

573 result = transport.request( 

574 method, 

575 url, 

576 data=payload, 

577 headers=headers, 

578 timeout=timeout, 

579 ) 

580 self._process_response(result) 

581 return result 

582 

583 return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) 

584 

585 

586class RawChunkedDownload(_request_helpers.RawRequestsMixin, _download.ChunkedDownload): 

587 """Download a raw resource in chunks from a Google API. 

588 

589 Args: 

590 media_url (str): The URL containing the media to be downloaded. 

591 chunk_size (int): The number of bytes to be retrieved in each 

592 request. 

593 stream (IO[bytes]): A write-able stream (i.e. file-like object) that 

594 will be used to concatenate chunks of the resource as they are 

595 downloaded. 

596 start (int): The first byte in a range to be downloaded. If not 

597 provided, defaults to ``0``. 

598 end (int): The last byte in a range to be downloaded. If not 

599 provided, will download to the end of the media. 

600 headers (Optional[Mapping[str, str]]): Extra headers that should 

601 be sent with each request, e.g. headers for data encryption 

602 key headers. 

603 retry (Optional[google.api_core.retry.Retry]): How to retry the 

604 RPC. A None value will disable retries. A 

605 google.api_core.retry.Retry value will enable retries, and the 

606 object will configure backoff and timeout options. 

607 

608 See the retry.py source code and docstrings in this package 

609 (google.cloud.storage.retry) for information on retry types and how 

610 to configure them. 

611 

612 Attributes: 

613 media_url (str): The URL containing the media to be downloaded. 

614 start (Optional[int]): The first byte in a range to be downloaded. 

615 end (Optional[int]): The last byte in a range to be downloaded. 

616 chunk_size (int): The number of bytes to be retrieved in each request. 

617 

618 Raises: 

619 ValueError: If ``start`` is negative. 

620 """ 

621 

622 def consume_next_chunk( 

623 self, 

624 transport, 

625 timeout=( 

626 _request_helpers._DEFAULT_CONNECT_TIMEOUT, 

627 _request_helpers._DEFAULT_READ_TIMEOUT, 

628 ), 

629 ): 

630 """Consume the next chunk of the resource to be downloaded. 

631 

632 Args: 

633 transport (~requests.Session): A ``requests`` object which can 

634 make authenticated requests. 

635 timeout (Optional[Union[float, Tuple[float, float]]]): 

636 The number of seconds to wait for the server response. 

637 Depending on the retry strategy, a request may be repeated 

638 several times using the same timeout each time. 

639 

640 Can also be passed as a tuple (connect_timeout, read_timeout). 

641 See :meth:`requests.Session.request` documentation for details. 

642 

643 Returns: 

644 ~requests.Response: The HTTP response returned by ``transport``. 

645 

646 Raises: 

647 ValueError: If the current download has finished. 

648 """ 

649 method, url, payload, headers = self._prepare_request() 

650 

651 # Wrap the request business logic in a function to be retried. 

652 def retriable_request(): 

653 # NOTE: We assume "payload is None" but pass it along anyway. 

654 result = transport.request( 

655 method, 

656 url, 

657 data=payload, 

658 headers=headers, 

659 stream=True, 

660 timeout=timeout, 

661 ) 

662 self._process_response(result) 

663 return result 

664 

665 return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy) 

666 

667 

668def _add_decoder(response_raw, checksum): 

669 """Patch the ``_decoder`` on a ``urllib3`` response. 

670 

671 This is so that we can intercept the compressed bytes before they are 

672 decoded. 

673 

674 Only patches if the content encoding is ``gzip`` or ``br``. 

675 

676 Args: 

677 response_raw (urllib3.response.HTTPResponse): The raw response for 

678 an HTTP request. 

679 checksum (object): 

680 A checksum which will be updated with compressed bytes. 

681 

682 Returns: 

683 object: Either the original ``checksum`` if ``_decoder`` is not 

684 patched, or a ``_DoNothingHash`` if the decoder is patched, since the 

685 caller will no longer need to hash to decoded bytes. 

686 """ 

687 encoding = response_raw.headers.get("content-encoding", "").lower() 

688 if encoding == "gzip": 

689 response_raw._decoder = _GzipDecoder(checksum) 

690 return _helpers._DoNothingHash() 

691 # Only activate if brotli is installed 

692 elif encoding == "br" and _BrotliDecoder: # type: ignore 

693 response_raw._decoder = _BrotliDecoder(checksum) 

694 return _helpers._DoNothingHash() 

695 else: 

696 return checksum 

697 

698 

699class _GzipDecoder(urllib3.response.GzipDecoder): 

700 """Custom subclass of ``urllib3`` decoder for ``gzip``-ed bytes. 

701 

702 Allows a checksum function to see the compressed bytes before they are 

703 decoded. This way the checksum of the compressed value can be computed. 

704 

705 Args: 

706 checksum (object): 

707 A checksum which will be updated with compressed bytes. 

708 """ 

709 

710 def __init__(self, checksum): 

711 super().__init__() 

712 self._checksum = checksum 

713 

714 def decompress(self, data): 

715 """Decompress the bytes. 

716 

717 Args: 

718 data (bytes): The compressed bytes to be decompressed. 

719 

720 Returns: 

721 bytes: The decompressed bytes from ``data``. 

722 """ 

723 self._checksum.update(data) 

724 return super().decompress(data) 

725 

726 

727# urllib3.response.BrotliDecoder might not exist depending on whether brotli is 

728# installed. 

729if hasattr(urllib3.response, "BrotliDecoder"): 

730 

731 class _BrotliDecoder: 

732 """Handler for ``brotli`` encoded bytes. 

733 

734 Allows a checksum function to see the compressed bytes before they are 

735 decoded. This way the checksum of the compressed value can be computed. 

736 

737 Because BrotliDecoder's decompress method is dynamically created in 

738 urllib3, a subclass is not practical. Instead, this class creates a 

739 captive urllib3.requests.BrotliDecoder instance and acts as a proxy. 

740 

741 Args: 

742 checksum (object): 

743 A checksum which will be updated with compressed bytes. 

744 """ 

745 

746 def __init__(self, checksum): 

747 self._decoder = urllib3.response.BrotliDecoder() 

748 self._checksum = checksum 

749 

750 def decompress(self, data): 

751 """Decompress the bytes. 

752 

753 Args: 

754 data (bytes): The compressed bytes to be decompressed. 

755 

756 Returns: 

757 bytes: The decompressed bytes from ``data``. 

758 """ 

759 self._checksum.update(data) 

760 return self._decoder.decompress(data) 

761 

762 def flush(self): 

763 return self._decoder.flush() 

764 

765else: # pragma: NO COVER 

766 _BrotliDecoder = None # type: ignore # pragma: NO COVER