Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/storage/

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

15"""Support for downloading media from Google APIs."""

17import urllib3.response # type: ignore

18import http

20from google.cloud.storage._media import _download

21from google.cloud.storage._media import _helpers

22from google.cloud.storage._media.requests import _request_helpers

23from google.cloud.storage.exceptions import DataCorruption

25_CHECKSUM_MISMATCH = """\

26Checksum mismatch while downloading:

28 {}

30The X-Goog-Hash header indicated an {checksum_type} checksum of:

32 {}

34but the actual {checksum_type} checksum of the downloaded contents was:

36 {}

37"""

39_STREAM_SEEK_ERROR = """\

40Incomplete download for:

41{}

42Error writing to stream while handling a gzip-compressed file download.

43Please restart the download.

44"""

46_RESPONSE_HEADERS_INFO = """\

47The X-Goog-Stored-Content-Length is {}. The X-Goog-Stored-Content-Encoding is {}.

48The download request read {} bytes of data.

49If the download was incomplete, please check the network connection and restart the download.

50"""

53class Download(_request_helpers.RequestsMixin, _download.Download):

54 """Helper to manage downloading a resource from a Google API.

56 "Slices" of the resource can be retrieved by specifying a range

57 with ``start`` and / or ``end``. However, in typical usage, neither

58 ``start`` nor ``end`` is expected to be provided.

60 Args:

61 media_url (str): The URL containing the media to be downloaded.

62 stream (IO[bytes]): A write-able stream (i.e. file-like object) that

63 the downloaded resource can be written to.

64 start (int): The first byte in a range to be downloaded. If not

65 provided, but ``end`` is provided, will download from the

66 beginning to ``end`` of the media.

67 end (int): The last byte in a range to be downloaded. If not

68 provided, but ``start`` is provided, will download from the

69 ``start`` to the end of the media.

70 headers (Optional[Mapping[str, str]]): Extra headers that should

71 be sent with the request, e.g. headers for encrypted data.

72 checksum Optional([str]): The type of checksum to compute to verify

73 the integrity of the object. The response headers must contain

74 a checksum of the requested type. If the headers lack an

75 appropriate checksum (for instance in the case of transcoded or

76 ranged downloads where the remote service does not know the

77 correct checksum) an INFO-level log will be emitted. Supported

78 values are "md5", "crc32c", "auto" and None. The default is "auto",

79 which will try to detect if the C extension for crc32c is installed

80 and fall back to md5 otherwise.

81 retry (Optional[google.api_core.retry.Retry]): How to retry the

82 RPC. A None value will disable retries. A

83 google.api_core.retry.Retry value will enable retries, and the

84 object will configure backoff and timeout options.

86 See the retry.py source code and docstrings in this package

87 (google.cloud.storage.retry) for information on retry types and how

88 to configure them.

90 Attributes:

91 media_url (str): The URL containing the media to be downloaded.

92 start (Optional[int]): The first byte in a range to be downloaded.

93 end (Optional[int]): The last byte in a range to be downloaded.

94 """

96 def _write_to_stream(self, response):

97 """Write response body to a write-able stream.

99 .. note:

100

101 This method assumes that the ``_stream`` attribute is set on the

102 current download.

103

104 Args:

105 response (~requests.Response): The HTTP response object.

106

107 Raises:

108 ~google.cloud.storage.exceptions.DataCorruption: If the download's

109 checksum doesn't agree with server-computed checksum.

110 """

111

112 # Retrieve the expected checksum only once for the download request,

113 # then compute and validate the checksum when the full download completes.

114 # Retried requests are range requests, and there's no way to detect

115 # data corruption for that byte range alone.

116 if self._expected_checksum is None and self._checksum_object is None:

117 # `_get_expected_checksum()` may return None even if a checksum was

118 # requested, in which case it will emit an info log _MISSING_CHECKSUM.

119 # If an invalid checksum type is specified, this will raise ValueError.

120 expected_checksum, checksum_object = _helpers._get_expected_checksum(

121 response, self._get_headers, self.media_url, checksum_type=self.checksum

122 )

123 self._expected_checksum = expected_checksum

124 self._checksum_object = checksum_object

125 else:

126 expected_checksum = self._expected_checksum

127 checksum_object = self._checksum_object

128

129 with response:

130 # NOTE: In order to handle compressed streams gracefully, we try

131 # to insert our checksum object into the decompression stream. If

132 # the stream is indeed compressed, this will delegate the checksum

133 # object to the decoder and return a _DoNothingHash here.

134 local_checksum_object = _add_decoder(response.raw, checksum_object)

135

136 # This is useful for smaller files, or when the user wants to

137 # download the entire file in one go.

138 if self.single_shot_download:

139 content = response.raw.read(decode_content=True)

140 self._stream.write(content)

141 self._bytes_downloaded += len(content)

142 local_checksum_object.update(content)

143 response._content_consumed = True

144 else:

145 body_iter = response.iter_content(

146 chunk_size=_request_helpers._SINGLE_GET_CHUNK_SIZE,

147 decode_unicode=False,

148 )

149 for chunk in body_iter:

150 self._stream.write(chunk)

151 self._bytes_downloaded += len(chunk)

152 local_checksum_object.update(chunk)

153

154 # Don't validate the checksum for partial responses.

155 if (

156 expected_checksum is not None

157 and response.status_code != http.client.PARTIAL_CONTENT

158 ):

159 actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest())

160 if actual_checksum != expected_checksum:

161 headers = self._get_headers(response)

162 x_goog_encoding = headers.get("x-goog-stored-content-encoding")

163 x_goog_length = headers.get("x-goog-stored-content-length")

164 content_length_msg = _RESPONSE_HEADERS_INFO.format(

165 x_goog_length, x_goog_encoding, self._bytes_downloaded

166 )

167 if (

168 x_goog_length

169 and self._bytes_downloaded < int(x_goog_length)

170 and x_goog_encoding != "gzip"

171 ):

172 # The library will attempt to trigger a retry by raising a ConnectionError, if

173 # (a) bytes_downloaded is less than response header x-goog-stored-content-length, and

174 # (b) the object is not gzip-compressed when stored in Cloud Storage.

175 raise ConnectionError(content_length_msg)

176 else:

177 msg = _CHECKSUM_MISMATCH.format(

178 self.media_url,

179 expected_checksum,

180 actual_checksum,

181 checksum_type=self.checksum.upper(),

182 )

183 msg += content_length_msg

184 raise DataCorruption(response, msg)

185

186 def consume(

187 self,

188 transport,

189 timeout=(

190 _request_helpers._DEFAULT_CONNECT_TIMEOUT,

191 _request_helpers._DEFAULT_READ_TIMEOUT,

192 ),

193 ):

194 """Consume the resource to be downloaded.

195

196 If a ``stream`` is attached to this download, then the downloaded

197 resource will be written to the stream.

198

199 Args:

200 transport (~requests.Session): A ``requests`` object which can

201 make authenticated requests.

202 timeout (Optional[Union[float, Tuple[float, float]]]):

203 The number of seconds to wait for the server response.

204 Depending on the retry strategy, a request may be repeated

205 several times using the same timeout each time.

206

207 Can also be passed as a tuple (connect_timeout, read_timeout).

208 See :meth:`requests.Session.request` documentation for details.

209

210 Returns:

211 ~requests.Response: The HTTP response returned by ``transport``.

212

213 Raises:

214 ~google.cloud.storage.exceptions.DataCorruption: If the download's

215 checksum doesn't agree with server-computed checksum.

216 ValueError: If the current :class:`Download` has already

217 finished.

218 """

219 method, _, payload, headers = self._prepare_request()

220 # NOTE: We assume "payload is None" but pass it along anyway.

221 request_kwargs = {

222 "data": payload,

223 "headers": headers,

224 "timeout": timeout,

225 }

226 if self._stream is not None:

227 request_kwargs["stream"] = True

228

229 # Assign object generation if generation is specified in the media url.

230 if self._object_generation is None:

231 self._object_generation = _helpers._get_generation_from_url(self.media_url)

232

233 # Wrap the request business logic in a function to be retried.

234 def retriable_request():

235 url = self.media_url

236

237 # To restart an interrupted download, read from the offset of last byte

238 # received using a range request, and set object generation query param.

239 if self._bytes_downloaded > 0:

240 _download.add_bytes_range(

241 (self.start or 0) + self._bytes_downloaded, self.end, self._headers

242 )

243 request_kwargs["headers"] = self._headers

244

245 # Set object generation query param to ensure the same object content is requested.

246 if (

247 self._object_generation is not None

248 and _helpers._get_generation_from_url(self.media_url) is None

249 ):

250 query_param = {"generation": self._object_generation}

251 url = _helpers.add_query_parameters(self.media_url, query_param)

252

253 result = transport.request(method, url, **request_kwargs)

254

255 # If a generation hasn't been specified, and this is the first response we get, let's record the

256 # generation. In future requests we'll specify the generation query param to avoid data races.

257 if self._object_generation is None:

258 self._object_generation = _helpers._parse_generation_header(

259 result, self._get_headers

260 )

261

262 self._process_response(result)

263

264 # With decompressive transcoding, GCS serves back the whole file regardless of the range request,

265 # thus we reset the stream position to the start of the stream.

266 # See: https://cloud.google.com/storage/docs/transcoding#range

267 if self._stream is not None:

268 if _helpers._is_decompressive_transcoding(result, self._get_headers):

269 try:

270 self._stream.seek(0)

271 except Exception as exc:

272 msg = _STREAM_SEEK_ERROR.format(url)

273 raise Exception(msg) from exc

274 self._bytes_downloaded = 0

275

276 self._write_to_stream(result)

277

278 return result

279

280 return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy)

281

282

283class RawDownload(_request_helpers.RawRequestsMixin, _download.Download):

284 """Helper to manage downloading a raw resource from a Google API.

285

286 "Slices" of the resource can be retrieved by specifying a range

287 with ``start`` and / or ``end``. However, in typical usage, neither

288 ``start`` nor ``end`` is expected to be provided.

289

290 Args:

291 media_url (str): The URL containing the media to be downloaded.

292 stream (IO[bytes]): A write-able stream (i.e. file-like object) that

293 the downloaded resource can be written to.

294 start (int): The first byte in a range to be downloaded. If not

295 provided, but ``end`` is provided, will download from the

296 beginning to ``end`` of the media.

297 end (int): The last byte in a range to be downloaded. If not

298 provided, but ``start`` is provided, will download from the

299 ``start`` to the end of the media.

300 headers (Optional[Mapping[str, str]]): Extra headers that should

301 be sent with the request, e.g. headers for encrypted data.

302 checksum Optional([str]): The type of checksum to compute to verify

303 the integrity of the object. The response headers must contain

304 a checksum of the requested type. If the headers lack an

305 appropriate checksum (for instance in the case of transcoded or

306 ranged downloads where the remote service does not know the

307 correct checksum) an INFO-level log will be emitted. Supported

308 values are "md5", "crc32c", "auto" and None. The default is "auto",

309 which will try to detect if the C extension for crc32c is installed

310 and fall back to md5 otherwise.

311 retry (Optional[google.api_core.retry.Retry]): How to retry the

312 RPC. A None value will disable retries. A

313 google.api_core.retry.Retry value will enable retries, and the

314 object will configure backoff and timeout options.

315

316 See the retry.py source code and docstrings in this package

317 (google.cloud.storage.retry) for information on retry types and how

318 to configure them.

319

320 Attributes:

321 media_url (str): The URL containing the media to be downloaded.

322 start (Optional[int]): The first byte in a range to be downloaded.

323 end (Optional[int]): The last byte in a range to be downloaded.

324 """

325

326 def _write_to_stream(self, response):

327 """Write response body to a write-able stream.

328

329 .. note:

330

331 This method assumes that the ``_stream`` attribute is set on the

332 current download.

333

334 Args:

335 response (~requests.Response): The HTTP response object.

336

337 Raises:

338 ~google.cloud.storage.exceptions.DataCorruption: If the download's

339 checksum doesn't agree with server-computed checksum.

340 """

341 # Retrieve the expected checksum only once for the download request,

342 # then compute and validate the checksum when the full download completes.

343 # Retried requests are range requests, and there's no way to detect

344 # data corruption for that byte range alone.

345 if self._expected_checksum is None and self._checksum_object is None:

346 # `_get_expected_checksum()` may return None even if a checksum was

347 # requested, in which case it will emit an info log _MISSING_CHECKSUM.

348 # If an invalid checksum type is specified, this will raise ValueError.

349 expected_checksum, checksum_object = _helpers._get_expected_checksum(

350 response, self._get_headers, self.media_url, checksum_type=self.checksum

351 )

352 self._expected_checksum = expected_checksum

353 self._checksum_object = checksum_object

354 else:

355 expected_checksum = self._expected_checksum

356 checksum_object = self._checksum_object

357

358 with response:

359 # This is useful for smaller files, or when the user wants to

360 # download the entire file in one go.

361 if self.single_shot_download:

362 content = response.raw.read()

363 self._stream.write(content)

364 self._bytes_downloaded += len(content)

365 checksum_object.update(content)

366 else:

367 body_iter = response.raw.stream(

368 _request_helpers._SINGLE_GET_CHUNK_SIZE, decode_content=False

369 )

370 for chunk in body_iter:

371 self._stream.write(chunk)

372 self._bytes_downloaded += len(chunk)

373 checksum_object.update(chunk)

374 response._content_consumed = True

375

376 # Don't validate the checksum for partial responses.

377 if (

378 expected_checksum is not None

379 and response.status_code != http.client.PARTIAL_CONTENT

380 ):

381 actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest())

382

383 if actual_checksum != expected_checksum:

384 headers = self._get_headers(response)

385 x_goog_encoding = headers.get("x-goog-stored-content-encoding")

386 x_goog_length = headers.get("x-goog-stored-content-length")

387 content_length_msg = _RESPONSE_HEADERS_INFO.format(

388 x_goog_length, x_goog_encoding, self._bytes_downloaded

389 )

390 if (

391 x_goog_length

392 and self._bytes_downloaded < int(x_goog_length)

393 and x_goog_encoding != "gzip"

394 ):

395 # The library will attempt to trigger a retry by raising a ConnectionError, if

396 # (a) bytes_downloaded is less than response header x-goog-stored-content-length, and

397 # (b) the object is not gzip-compressed when stored in Cloud Storage.

398 raise ConnectionError(content_length_msg)

399 else:

400 msg = _CHECKSUM_MISMATCH.format(

401 self.media_url,

402 expected_checksum,

403 actual_checksum,

404 checksum_type=self.checksum.upper(),

405 )

406 msg += content_length_msg

407 raise DataCorruption(response, msg)

408

409 def consume(

410 self,

411 transport,

412 timeout=(

413 _request_helpers._DEFAULT_CONNECT_TIMEOUT,

414 _request_helpers._DEFAULT_READ_TIMEOUT,

415 ),

416 ):

417 """Consume the resource to be downloaded.

418

419 If a ``stream`` is attached to this download, then the downloaded

420 resource will be written to the stream.

421

422 Args:

423 transport (~requests.Session): A ``requests`` object which can

424 make authenticated requests.

425 timeout (Optional[Union[float, Tuple[float, float]]]):

426 The number of seconds to wait for the server response.

427 Depending on the retry strategy, a request may be repeated

428 several times using the same timeout each time.

429

430 Can also be passed as a tuple (connect_timeout, read_timeout).

431 See :meth:`requests.Session.request` documentation for details.

432

433 Returns:

434 ~requests.Response: The HTTP response returned by ``transport``.

435

436 Raises:

437 ~google.cloud.storage.exceptions.DataCorruption: If the download's

438 checksum doesn't agree with server-computed checksum.

439 ValueError: If the current :class:`Download` has already

440 finished.

441 """

442 method, _, payload, headers = self._prepare_request()

443 # NOTE: We assume "payload is None" but pass it along anyway.

444 request_kwargs = {

445 "data": payload,

446 "headers": headers,

447 "timeout": timeout,

448 "stream": True,

449 }

450

451 # Assign object generation if generation is specified in the media url.

452 if self._object_generation is None:

453 self._object_generation = _helpers._get_generation_from_url(self.media_url)

454

455 # Wrap the request business logic in a function to be retried.

456 def retriable_request():

457 url = self.media_url

458

459 # To restart an interrupted download, read from the offset of last byte

460 # received using a range request, and set object generation query param.

461 if self._bytes_downloaded > 0:

462 _download.add_bytes_range(

463 (self.start or 0) + self._bytes_downloaded, self.end, self._headers

464 )

465 request_kwargs["headers"] = self._headers

466

467 # Set object generation query param to ensure the same object content is requested.

468 if (

469 self._object_generation is not None

470 and _helpers._get_generation_from_url(self.media_url) is None

471 ):

472 query_param = {"generation": self._object_generation}

473 url = _helpers.add_query_parameters(self.media_url, query_param)

474

475 result = transport.request(method, url, **request_kwargs)

476

477 # If a generation hasn't been specified, and this is the first response we get, let's record the

478 # generation. In future requests we'll specify the generation query param to avoid data races.

479 if self._object_generation is None:

480 self._object_generation = _helpers._parse_generation_header(

481 result, self._get_headers

482 )

483

484 self._process_response(result)

485

486 # With decompressive transcoding, GCS serves back the whole file regardless of the range request,

487 # thus we reset the stream position to the start of the stream.

488 # See: https://cloud.google.com/storage/docs/transcoding#range

489 if self._stream is not None:

490 if _helpers._is_decompressive_transcoding(result, self._get_headers):

491 try:

492 self._stream.seek(0)

493 except Exception as exc:

494 msg = _STREAM_SEEK_ERROR.format(url)

495 raise Exception(msg) from exc

496 self._bytes_downloaded = 0

497

498 self._write_to_stream(result)

499

500 return result

501

502 return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy)

503

504

505class ChunkedDownload(_request_helpers.RequestsMixin, _download.ChunkedDownload):

506 """Download a resource in chunks from a Google API.

507

508 Args:

509 media_url (str): The URL containing the media to be downloaded.

510 chunk_size (int): The number of bytes to be retrieved in each

511 request.

512 stream (IO[bytes]): A write-able stream (i.e. file-like object) that

513 will be used to concatenate chunks of the resource as they are

514 downloaded.

515 start (int): The first byte in a range to be downloaded. If not

516 provided, defaults to ``0``.

517 end (int): The last byte in a range to be downloaded. If not

518 provided, will download to the end of the media.

519 headers (Optional[Mapping[str, str]]): Extra headers that should

520 be sent with each request, e.g. headers for data encryption

521 key headers.

522 retry (Optional[google.api_core.retry.Retry]): How to retry the

523 RPC. A None value will disable retries. A

524 google.api_core.retry.Retry value will enable retries, and the

525 object will configure backoff and timeout options.

526

527 See the retry.py source code and docstrings in this package

528 (google.cloud.storage.retry) for information on retry types and how

529 to configure them.

530

531 Attributes:

532 media_url (str): The URL containing the media to be downloaded.

533 start (Optional[int]): The first byte in a range to be downloaded.

534 end (Optional[int]): The last byte in a range to be downloaded.

535 chunk_size (int): The number of bytes to be retrieved in each request.

536

537 Raises:

538 ValueError: If ``start`` is negative.

539 """

540

541 def consume_next_chunk(

542 self,

543 transport,

544 timeout=(

545 _request_helpers._DEFAULT_CONNECT_TIMEOUT,

546 _request_helpers._DEFAULT_READ_TIMEOUT,

547 ),

548 ):

549 """Consume the next chunk of the resource to be downloaded.

550

551 Args:

552 transport (~requests.Session): A ``requests`` object which can

553 make authenticated requests.

554 timeout (Optional[Union[float, Tuple[float, float]]]):

555 The number of seconds to wait for the server response.

556 Depending on the retry strategy, a request may be repeated

557 several times using the same timeout each time.

558

559 Can also be passed as a tuple (connect_timeout, read_timeout).

560 See :meth:`requests.Session.request` documentation for details.

561

562 Returns:

563 ~requests.Response: The HTTP response returned by ``transport``.

564

565 Raises:

566 ValueError: If the current download has finished.

567 """

568 method, url, payload, headers = self._prepare_request()

569

570 # Wrap the request business logic in a function to be retried.

571 def retriable_request():

572 # NOTE: We assume "payload is None" but pass it along anyway.

573 result = transport.request(

574 method,

575 url,

576 data=payload,

577 headers=headers,

578 timeout=timeout,

579 )

580 self._process_response(result)

581 return result

582

583 return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy)

584

585

586class RawChunkedDownload(_request_helpers.RawRequestsMixin, _download.ChunkedDownload):

587 """Download a raw resource in chunks from a Google API.

588

589 Args:

590 media_url (str): The URL containing the media to be downloaded.

591 chunk_size (int): The number of bytes to be retrieved in each

592 request.

593 stream (IO[bytes]): A write-able stream (i.e. file-like object) that

594 will be used to concatenate chunks of the resource as they are

595 downloaded.

596 start (int): The first byte in a range to be downloaded. If not

597 provided, defaults to ``0``.

598 end (int): The last byte in a range to be downloaded. If not

599 provided, will download to the end of the media.

600 headers (Optional[Mapping[str, str]]): Extra headers that should

601 be sent with each request, e.g. headers for data encryption

602 key headers.

603 retry (Optional[google.api_core.retry.Retry]): How to retry the

604 RPC. A None value will disable retries. A

605 google.api_core.retry.Retry value will enable retries, and the

606 object will configure backoff and timeout options.

607

608 See the retry.py source code and docstrings in this package

609 (google.cloud.storage.retry) for information on retry types and how

610 to configure them.

611

612 Attributes:

613 media_url (str): The URL containing the media to be downloaded.

614 start (Optional[int]): The first byte in a range to be downloaded.

615 end (Optional[int]): The last byte in a range to be downloaded.

616 chunk_size (int): The number of bytes to be retrieved in each request.

617

618 Raises:

619 ValueError: If ``start`` is negative.

620 """

621

622 def consume_next_chunk(

623 self,

624 transport,

625 timeout=(

626 _request_helpers._DEFAULT_CONNECT_TIMEOUT,

627 _request_helpers._DEFAULT_READ_TIMEOUT,

628 ),

629 ):

630 """Consume the next chunk of the resource to be downloaded.

631

632 Args:

633 transport (~requests.Session): A ``requests`` object which can

634 make authenticated requests.

635 timeout (Optional[Union[float, Tuple[float, float]]]):

636 The number of seconds to wait for the server response.

637 Depending on the retry strategy, a request may be repeated

638 several times using the same timeout each time.

639

640 Can also be passed as a tuple (connect_timeout, read_timeout).

641 See :meth:`requests.Session.request` documentation for details.

642

643 Returns:

644 ~requests.Response: The HTTP response returned by ``transport``.

645

646 Raises:

647 ValueError: If the current download has finished.

648 """

649 method, url, payload, headers = self._prepare_request()

650

651 # Wrap the request business logic in a function to be retried.

652 def retriable_request():

653 # NOTE: We assume "payload is None" but pass it along anyway.

654 result = transport.request(

655 method,

656 url,

657 data=payload,

658 headers=headers,

659 stream=True,

660 timeout=timeout,

661 )

662 self._process_response(result)

663 return result

664

665 return _request_helpers.wait_and_retry(retriable_request, self._retry_strategy)

666

667

668def _add_decoder(response_raw, checksum):

669 """Patch the ``_decoder`` on a ``urllib3`` response.

670

671 This is so that we can intercept the compressed bytes before they are

672 decoded.

673

674 Only patches if the content encoding is ``gzip`` or ``br``.

675

676 Args:

677 response_raw (urllib3.response.HTTPResponse): The raw response for

678 an HTTP request.

679 checksum (object):

680 A checksum which will be updated with compressed bytes.

681

682 Returns:

683 object: Either the original ``checksum`` if ``_decoder`` is not

684 patched, or a ``_DoNothingHash`` if the decoder is patched, since the

685 caller will no longer need to hash to decoded bytes.

686 """

687 encoding = response_raw.headers.get("content-encoding", "").lower()

688 if encoding == "gzip":

689 response_raw._decoder = _GzipDecoder(checksum)

690 return _helpers._DoNothingHash()

691 # Only activate if brotli is installed

692 elif encoding == "br" and _BrotliDecoder: # type: ignore

693 response_raw._decoder = _BrotliDecoder(checksum)

694 return _helpers._DoNothingHash()

695 else:

696 return checksum

697

698

699class _GzipDecoder(urllib3.response.GzipDecoder):

700 """Custom subclass of ``urllib3`` decoder for ``gzip``-ed bytes.

701

702 Allows a checksum function to see the compressed bytes before they are

703 decoded. This way the checksum of the compressed value can be computed.

704

705 Args:

706 checksum (object):

707 A checksum which will be updated with compressed bytes.

708 """

709

710 def __init__(self, checksum):

711 super().__init__()

712 self._checksum = checksum

713

714 def decompress(self, data):

715 """Decompress the bytes.

716

717 Args:

718 data (bytes): The compressed bytes to be decompressed.

719

720 Returns:

721 bytes: The decompressed bytes from ``data``.

722 """

723 self._checksum.update(data)

724 return super().decompress(data)

725

726

727# urllib3.response.BrotliDecoder might not exist depending on whether brotli is

728# installed.

729if hasattr(urllib3.response, "BrotliDecoder"):

730

731 class _BrotliDecoder:

732 """Handler for ``brotli`` encoded bytes.

733

734 Allows a checksum function to see the compressed bytes before they are

735 decoded. This way the checksum of the compressed value can be computed.

736

737 Because BrotliDecoder's decompress method is dynamically created in

738 urllib3, a subclass is not practical. Instead, this class creates a

739 captive urllib3.requests.BrotliDecoder instance and acts as a proxy.

740

741 Args:

742 checksum (object):

743 A checksum which will be updated with compressed bytes.

744 """

745

746 def __init__(self, checksum):

747 self._decoder = urllib3.response.BrotliDecoder()

748 self._checksum = checksum

749

750 def decompress(self, data):

751 """Decompress the bytes.

752

753 Args:

754 data (bytes): The compressed bytes to be decompressed.

755

756 Returns:

757 bytes: The decompressed bytes from ``data``.

758 """

759 self._checksum.update(data)

760 return self._decoder.decompress(data)

761

762 def flush(self):

763 return self._decoder.flush()

764

765else: # pragma: NO COVER

766 _BrotliDecoder = None # type: ignore # pragma: NO COVER

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/storage/_media/requests/download.py: 14%

176 statements