Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/resumable_media/_download.py: 29%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

137 statements  

1# Copyright 2017 Google Inc. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Virtual bases classes for downloading media from Google APIs.""" 

16 

17import http.client 

18import re 

19 

20from google.resumable_media import _helpers 

21from google.resumable_media import common 

22 

23 

24_CONTENT_RANGE_RE = re.compile( 

25 r"bytes (?P<start_byte>\d+)-(?P<end_byte>\d+)/(?P<total_bytes>\d+)", 

26 flags=re.IGNORECASE, 

27) 

28_ACCEPTABLE_STATUS_CODES = (http.client.OK, http.client.PARTIAL_CONTENT) 

29_GET = "GET" 

30_ZERO_CONTENT_RANGE_HEADER = "bytes */0" 

31 

32 

33class DownloadBase(object): 

34 """Base class for download helpers. 

35 

36 Defines core shared behavior across different download types. 

37 

38 Args: 

39 media_url (str): The URL containing the media to be downloaded. 

40 stream (IO[bytes]): A write-able stream (i.e. file-like object) that 

41 the downloaded resource can be written to. 

42 start (int): The first byte in a range to be downloaded. 

43 end (int): The last byte in a range to be downloaded. 

44 headers (Optional[Mapping[str, str]]): Extra headers that should 

45 be sent with the request, e.g. headers for encrypted data. 

46 

47 Attributes: 

48 media_url (str): The URL containing the media to be downloaded. 

49 start (Optional[int]): The first byte in a range to be downloaded. 

50 end (Optional[int]): The last byte in a range to be downloaded. 

51 """ 

52 

53 def __init__(self, media_url, stream=None, start=None, end=None, headers=None): 

54 self.media_url = media_url 

55 self._stream = stream 

56 self.start = start 

57 self.end = end 

58 if headers is None: 

59 headers = {} 

60 self._headers = headers 

61 self._finished = False 

62 self._retry_strategy = common.RetryStrategy() 

63 

64 @property 

65 def finished(self): 

66 """bool: Flag indicating if the download has completed.""" 

67 return self._finished 

68 

69 @staticmethod 

70 def _get_status_code(response): 

71 """Access the status code from an HTTP response. 

72 

73 Args: 

74 response (object): The HTTP response object. 

75 

76 Raises: 

77 NotImplementedError: Always, since virtual. 

78 """ 

79 raise NotImplementedError("This implementation is virtual.") 

80 

81 @staticmethod 

82 def _get_headers(response): 

83 """Access the headers from an HTTP response. 

84 

85 Args: 

86 response (object): The HTTP response object. 

87 

88 Raises: 

89 NotImplementedError: Always, since virtual. 

90 """ 

91 raise NotImplementedError("This implementation is virtual.") 

92 

93 @staticmethod 

94 def _get_body(response): 

95 """Access the response body from an HTTP response. 

96 

97 Args: 

98 response (object): The HTTP response object. 

99 

100 Raises: 

101 NotImplementedError: Always, since virtual. 

102 """ 

103 raise NotImplementedError("This implementation is virtual.") 

104 

105 

106class Download(DownloadBase): 

107 """Helper to manage downloading a resource from a Google API. 

108 

109 "Slices" of the resource can be retrieved by specifying a range 

110 with ``start`` and / or ``end``. However, in typical usage, neither 

111 ``start`` nor ``end`` is expected to be provided. 

112 

113 Args: 

114 media_url (str): The URL containing the media to be downloaded. 

115 stream (IO[bytes]): A write-able stream (i.e. file-like object) that 

116 the downloaded resource can be written to. 

117 start (int): The first byte in a range to be downloaded. If not 

118 provided, but ``end`` is provided, will download from the 

119 beginning to ``end`` of the media. 

120 end (int): The last byte in a range to be downloaded. If not 

121 provided, but ``start`` is provided, will download from the 

122 ``start`` to the end of the media. 

123 headers (Optional[Mapping[str, str]]): Extra headers that should 

124 be sent with the request, e.g. headers for encrypted data. 

125 checksum Optional([str]): The type of checksum to compute to verify 

126 the integrity of the object. The response headers must contain 

127 a checksum of the requested type. If the headers lack an 

128 appropriate checksum (for instance in the case of transcoded or 

129 ranged downloads where the remote service does not know the 

130 correct checksum) an INFO-level log will be emitted. Supported 

131 values are "md5", "crc32c" and None. 

132 """ 

133 

134 def __init__( 

135 self, media_url, stream=None, start=None, end=None, headers=None, checksum="md5" 

136 ): 

137 super(Download, self).__init__( 

138 media_url, stream=stream, start=start, end=end, headers=headers 

139 ) 

140 self.checksum = checksum 

141 self._bytes_downloaded = 0 

142 self._expected_checksum = None 

143 self._checksum_object = None 

144 self._object_generation = None 

145 

146 def _prepare_request(self): 

147 """Prepare the contents of an HTTP request. 

148 

149 This is everything that must be done before a request that doesn't 

150 require network I/O (or other I/O). This is based on the `sans-I/O`_ 

151 philosophy. 

152 

153 Returns: 

154 Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple 

155 

156 * HTTP verb for the request (always GET) 

157 * the URL for the request 

158 * the body of the request (always :data:`None`) 

159 * headers for the request 

160 

161 Raises: 

162 ValueError: If the current :class:`Download` has already 

163 finished. 

164 

165 .. _sans-I/O: https://sans-io.readthedocs.io/ 

166 """ 

167 if self.finished: 

168 raise ValueError("A download can only be used once.") 

169 

170 add_bytes_range(self.start, self.end, self._headers) 

171 return _GET, self.media_url, None, self._headers 

172 

173 def _process_response(self, response): 

174 """Process the response from an HTTP request. 

175 

176 This is everything that must be done after a request that doesn't 

177 require network I/O (or other I/O). This is based on the `sans-I/O`_ 

178 philosophy. 

179 

180 Args: 

181 response (object): The HTTP response object. 

182 

183 .. _sans-I/O: https://sans-io.readthedocs.io/ 

184 """ 

185 # Tombstone the current Download so it cannot be used again. 

186 self._finished = True 

187 _helpers.require_status_code( 

188 response, _ACCEPTABLE_STATUS_CODES, self._get_status_code 

189 ) 

190 

191 def consume(self, transport, timeout=None): 

192 """Consume the resource to be downloaded. 

193 

194 If a ``stream`` is attached to this download, then the downloaded 

195 resource will be written to the stream. 

196 

197 Args: 

198 transport (object): An object which can make authenticated 

199 requests. 

200 timeout (Optional[Union[float, Tuple[float, float]]]): 

201 The number of seconds to wait for the server response. 

202 Depending on the retry strategy, a request may be repeated 

203 several times using the same timeout each time. 

204 

205 Can also be passed as a tuple (connect_timeout, read_timeout). 

206 See :meth:`requests.Session.request` documentation for details. 

207 

208 Raises: 

209 NotImplementedError: Always, since virtual. 

210 """ 

211 raise NotImplementedError("This implementation is virtual.") 

212 

213 

214class ChunkedDownload(DownloadBase): 

215 """Download a resource in chunks from a Google API. 

216 

217 Args: 

218 media_url (str): The URL containing the media to be downloaded. 

219 chunk_size (int): The number of bytes to be retrieved in each 

220 request. 

221 stream (IO[bytes]): A write-able stream (i.e. file-like object) that 

222 will be used to concatenate chunks of the resource as they are 

223 downloaded. 

224 start (int): The first byte in a range to be downloaded. If not 

225 provided, defaults to ``0``. 

226 end (int): The last byte in a range to be downloaded. If not 

227 provided, will download to the end of the media. 

228 headers (Optional[Mapping[str, str]]): Extra headers that should 

229 be sent with each request, e.g. headers for data encryption 

230 key headers. 

231 

232 Attributes: 

233 media_url (str): The URL containing the media to be downloaded. 

234 start (Optional[int]): The first byte in a range to be downloaded. 

235 end (Optional[int]): The last byte in a range to be downloaded. 

236 chunk_size (int): The number of bytes to be retrieved in each request. 

237 

238 Raises: 

239 ValueError: If ``start`` is negative. 

240 """ 

241 

242 def __init__(self, media_url, chunk_size, stream, start=0, end=None, headers=None): 

243 if start < 0: 

244 raise ValueError( 

245 "On a chunked download the starting value cannot be negative." 

246 ) 

247 super(ChunkedDownload, self).__init__( 

248 media_url, stream=stream, start=start, end=end, headers=headers 

249 ) 

250 self.chunk_size = chunk_size 

251 self._bytes_downloaded = 0 

252 self._total_bytes = None 

253 self._invalid = False 

254 

255 @property 

256 def bytes_downloaded(self): 

257 """int: Number of bytes that have been downloaded.""" 

258 return self._bytes_downloaded 

259 

260 @property 

261 def total_bytes(self): 

262 """Optional[int]: The total number of bytes to be downloaded.""" 

263 return self._total_bytes 

264 

265 @property 

266 def invalid(self): 

267 """bool: Indicates if the download is in an invalid state. 

268 

269 This will occur if a call to :meth:`consume_next_chunk` fails. 

270 """ 

271 return self._invalid 

272 

273 def _get_byte_range(self): 

274 """Determines the byte range for the next request. 

275 

276 Returns: 

277 Tuple[int, int]: The pair of begin and end byte for the next 

278 chunked request. 

279 """ 

280 curr_start = self.start + self.bytes_downloaded 

281 curr_end = curr_start + self.chunk_size - 1 

282 # Make sure ``curr_end`` does not exceed ``end``. 

283 if self.end is not None: 

284 curr_end = min(curr_end, self.end) 

285 # Make sure ``curr_end`` does not exceed ``total_bytes - 1``. 

286 if self.total_bytes is not None: 

287 curr_end = min(curr_end, self.total_bytes - 1) 

288 return curr_start, curr_end 

289 

290 def _prepare_request(self): 

291 """Prepare the contents of an HTTP request. 

292 

293 This is everything that must be done before a request that doesn't 

294 require network I/O (or other I/O). This is based on the `sans-I/O`_ 

295 philosophy. 

296 

297 .. note: 

298 

299 This method will be used multiple times, so ``headers`` will 

300 be mutated in between requests. However, we don't make a copy 

301 since the same keys are being updated. 

302 

303 Returns: 

304 Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple 

305 

306 * HTTP verb for the request (always GET) 

307 * the URL for the request 

308 * the body of the request (always :data:`None`) 

309 * headers for the request 

310 

311 Raises: 

312 ValueError: If the current download has finished. 

313 ValueError: If the current download is invalid. 

314 

315 .. _sans-I/O: https://sans-io.readthedocs.io/ 

316 """ 

317 if self.finished: 

318 raise ValueError("Download has finished.") 

319 if self.invalid: 

320 raise ValueError("Download is invalid and cannot be re-used.") 

321 

322 curr_start, curr_end = self._get_byte_range() 

323 add_bytes_range(curr_start, curr_end, self._headers) 

324 return _GET, self.media_url, None, self._headers 

325 

326 def _make_invalid(self): 

327 """Simple setter for ``invalid``. 

328 

329 This is intended to be passed along as a callback to helpers that 

330 raise an exception so they can mark this instance as invalid before 

331 raising. 

332 """ 

333 self._invalid = True 

334 

335 def _process_response(self, response): 

336 """Process the response from an HTTP request. 

337 

338 This is everything that must be done after a request that doesn't 

339 require network I/O. This is based on the `sans-I/O`_ philosophy. 

340 

341 For the time being, this **does require** some form of I/O to write 

342 a chunk to ``stream``. However, this will (almost) certainly not be 

343 network I/O. 

344 

345 Updates the current state after consuming a chunk. First, 

346 increments ``bytes_downloaded`` by the number of bytes in the 

347 ``content-length`` header. 

348 

349 If ``total_bytes`` is already set, this assumes (but does not check) 

350 that we already have the correct value and doesn't bother to check 

351 that it agrees with the headers. 

352 

353 We expect the **total** length to be in the ``content-range`` header, 

354 but this header is only present on requests which sent the ``range`` 

355 header. This response header should be of the form 

356 ``bytes {start}-{end}/{total}`` and ``{end} - {start} + 1`` 

357 should be the same as the ``Content-Length``. 

358 

359 Args: 

360 response (object): The HTTP response object (need headers). 

361 

362 Raises: 

363 ~google.resumable_media.common.InvalidResponse: If the number 

364 of bytes in the body doesn't match the content length header. 

365 

366 .. _sans-I/O: https://sans-io.readthedocs.io/ 

367 """ 

368 # Verify the response before updating the current instance. 

369 if _check_for_zero_content_range( 

370 response, self._get_status_code, self._get_headers 

371 ): 

372 self._finished = True 

373 return 

374 

375 _helpers.require_status_code( 

376 response, 

377 _ACCEPTABLE_STATUS_CODES, 

378 self._get_status_code, 

379 callback=self._make_invalid, 

380 ) 

381 headers = self._get_headers(response) 

382 response_body = self._get_body(response) 

383 

384 start_byte, end_byte, total_bytes = get_range_info( 

385 response, self._get_headers, callback=self._make_invalid 

386 ) 

387 

388 transfer_encoding = headers.get("transfer-encoding") 

389 

390 if transfer_encoding is None: 

391 content_length = _helpers.header_required( 

392 response, 

393 "content-length", 

394 self._get_headers, 

395 callback=self._make_invalid, 

396 ) 

397 num_bytes = int(content_length) 

398 if len(response_body) != num_bytes: 

399 self._make_invalid() 

400 raise common.InvalidResponse( 

401 response, 

402 "Response is different size than content-length", 

403 "Expected", 

404 num_bytes, 

405 "Received", 

406 len(response_body), 

407 ) 

408 else: 

409 # 'content-length' header not allowed with chunked encoding. 

410 num_bytes = end_byte - start_byte + 1 

411 

412 # First update ``bytes_downloaded``. 

413 self._bytes_downloaded += num_bytes 

414 # If the end byte is past ``end`` or ``total_bytes - 1`` we are done. 

415 if self.end is not None and end_byte >= self.end: 

416 self._finished = True 

417 elif end_byte >= total_bytes - 1: 

418 self._finished = True 

419 # NOTE: We only use ``total_bytes`` if not already known. 

420 if self.total_bytes is None: 

421 self._total_bytes = total_bytes 

422 # Write the response body to the stream. 

423 self._stream.write(response_body) 

424 

425 def consume_next_chunk(self, transport, timeout=None): 

426 """Consume the next chunk of the resource to be downloaded. 

427 

428 Args: 

429 transport (object): An object which can make authenticated 

430 requests. 

431 timeout (Optional[Union[float, Tuple[float, float]]]): 

432 The number of seconds to wait for the server response. 

433 Depending on the retry strategy, a request may be repeated 

434 several times using the same timeout each time. 

435 

436 Can also be passed as a tuple (connect_timeout, read_timeout). 

437 See :meth:`requests.Session.request` documentation for details. 

438 

439 Raises: 

440 NotImplementedError: Always, since virtual. 

441 """ 

442 raise NotImplementedError("This implementation is virtual.") 

443 

444 

445def add_bytes_range(start, end, headers): 

446 """Add a bytes range to a header dictionary. 

447 

448 Some possible inputs and the corresponding bytes ranges:: 

449 

450 >>> headers = {} 

451 >>> add_bytes_range(None, None, headers) 

452 >>> headers 

453 {} 

454 >>> add_bytes_range(500, 999, headers) 

455 >>> headers['range'] 

456 'bytes=500-999' 

457 >>> add_bytes_range(None, 499, headers) 

458 >>> headers['range'] 

459 'bytes=0-499' 

460 >>> add_bytes_range(-500, None, headers) 

461 >>> headers['range'] 

462 'bytes=-500' 

463 >>> add_bytes_range(9500, None, headers) 

464 >>> headers['range'] 

465 'bytes=9500-' 

466 

467 Args: 

468 start (Optional[int]): The first byte in a range. Can be zero, 

469 positive, negative or :data:`None`. 

470 end (Optional[int]): The last byte in a range. Assumed to be 

471 positive. 

472 headers (Mapping[str, str]): A headers mapping which can have the 

473 bytes range added if at least one of ``start`` or ``end`` 

474 is not :data:`None`. 

475 """ 

476 if start is None: 

477 if end is None: 

478 # No range to add. 

479 return 

480 else: 

481 # NOTE: This assumes ``end`` is non-negative. 

482 bytes_range = "0-{:d}".format(end) 

483 else: 

484 if end is None: 

485 if start < 0: 

486 bytes_range = "{:d}".format(start) 

487 else: 

488 bytes_range = "{:d}-".format(start) 

489 else: 

490 # NOTE: This is invalid if ``start < 0``. 

491 bytes_range = "{:d}-{:d}".format(start, end) 

492 

493 headers[_helpers.RANGE_HEADER] = "bytes=" + bytes_range 

494 

495 

496def get_range_info(response, get_headers, callback=_helpers.do_nothing): 

497 """Get the start, end and total bytes from a content range header. 

498 

499 Args: 

500 response (object): An HTTP response object. 

501 get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers 

502 from an HTTP response. 

503 callback (Optional[Callable]): A callback that takes no arguments, 

504 to be executed when an exception is being raised. 

505 

506 Returns: 

507 Tuple[int, int, int]: The start byte, end byte and total bytes. 

508 

509 Raises: 

510 ~google.resumable_media.common.InvalidResponse: If the 

511 ``Content-Range`` header is not of the form 

512 ``bytes {start}-{end}/{total}``. 

513 """ 

514 content_range = _helpers.header_required( 

515 response, _helpers.CONTENT_RANGE_HEADER, get_headers, callback=callback 

516 ) 

517 match = _CONTENT_RANGE_RE.match(content_range) 

518 if match is None: 

519 callback() 

520 raise common.InvalidResponse( 

521 response, 

522 "Unexpected content-range header", 

523 content_range, 

524 'Expected to be of the form "bytes {start}-{end}/{total}"', 

525 ) 

526 

527 return ( 

528 int(match.group("start_byte")), 

529 int(match.group("end_byte")), 

530 int(match.group("total_bytes")), 

531 ) 

532 

533 

534def _check_for_zero_content_range(response, get_status_code, get_headers): 

535 """Validate if response status code is 416 and content range is zero. 

536 

537 This is the special case for handling zero bytes files. 

538 

539 Args: 

540 response (object): An HTTP response object. 

541 get_status_code (Callable[Any, int]): Helper to get a status code 

542 from a response. 

543 get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers 

544 from an HTTP response. 

545 

546 Returns: 

547 bool: True if content range total bytes is zero, false otherwise. 

548 """ 

549 if get_status_code(response) == http.client.REQUESTED_RANGE_NOT_SATISFIABLE: 

550 content_range = _helpers.header_required( 

551 response, 

552 _helpers.CONTENT_RANGE_HEADER, 

553 get_headers, 

554 callback=_helpers.do_nothing, 

555 ) 

556 if content_range == _ZERO_CONTENT_RANGE_HEADER: 

557 return True 

558 return False