Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/resumable_media/_download.py: 29%

136 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-20 06:09 +0000

1# Copyright 2017 Google Inc. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Virtual bases classes for downloading media from Google APIs.""" 

16 

17 

18import http.client 

19import re 

20 

21from google.resumable_media import _helpers 

22from google.resumable_media import common 

23 

24 

25_CONTENT_RANGE_RE = re.compile( 

26 r"bytes (?P<start_byte>\d+)-(?P<end_byte>\d+)/(?P<total_bytes>\d+)", 

27 flags=re.IGNORECASE, 

28) 

29_ACCEPTABLE_STATUS_CODES = (http.client.OK, http.client.PARTIAL_CONTENT) 

30_GET = "GET" 

31_ZERO_CONTENT_RANGE_HEADER = "bytes */0" 

32 

33 

34class DownloadBase(object): 

35 """Base class for download helpers. 

36 

37 Defines core shared behavior across different download types. 

38 

39 Args: 

40 media_url (str): The URL containing the media to be downloaded. 

41 stream (IO[bytes]): A write-able stream (i.e. file-like object) that 

42 the downloaded resource can be written to. 

43 start (int): The first byte in a range to be downloaded. 

44 end (int): The last byte in a range to be downloaded. 

45 headers (Optional[Mapping[str, str]]): Extra headers that should 

46 be sent with the request, e.g. headers for encrypted data. 

47 

48 Attributes: 

49 media_url (str): The URL containing the media to be downloaded. 

50 start (Optional[int]): The first byte in a range to be downloaded. 

51 end (Optional[int]): The last byte in a range to be downloaded. 

52 """ 

53 

54 def __init__(self, media_url, stream=None, start=None, end=None, headers=None): 

55 self.media_url = media_url 

56 self._stream = stream 

57 self.start = start 

58 self.end = end 

59 if headers is None: 

60 headers = {} 

61 self._headers = headers 

62 self._finished = False 

63 self._retry_strategy = common.RetryStrategy() 

64 

65 @property 

66 def finished(self): 

67 """bool: Flag indicating if the download has completed.""" 

68 return self._finished 

69 

70 @staticmethod 

71 def _get_status_code(response): 

72 """Access the status code from an HTTP response. 

73 

74 Args: 

75 response (object): The HTTP response object. 

76 

77 Raises: 

78 NotImplementedError: Always, since virtual. 

79 """ 

80 raise NotImplementedError("This implementation is virtual.") 

81 

82 @staticmethod 

83 def _get_headers(response): 

84 """Access the headers from an HTTP response. 

85 

86 Args: 

87 response (object): The HTTP response object. 

88 

89 Raises: 

90 NotImplementedError: Always, since virtual. 

91 """ 

92 raise NotImplementedError("This implementation is virtual.") 

93 

94 @staticmethod 

95 def _get_body(response): 

96 """Access the response body from an HTTP response. 

97 

98 Args: 

99 response (object): The HTTP response object. 

100 

101 Raises: 

102 NotImplementedError: Always, since virtual. 

103 """ 

104 raise NotImplementedError("This implementation is virtual.") 

105 

106 

107class Download(DownloadBase): 

108 """Helper to manage downloading a resource from a Google API. 

109 

110 "Slices" of the resource can be retrieved by specifying a range 

111 with ``start`` and / or ``end``. However, in typical usage, neither 

112 ``start`` nor ``end`` is expected to be provided. 

113 

114 Args: 

115 media_url (str): The URL containing the media to be downloaded. 

116 stream (IO[bytes]): A write-able stream (i.e. file-like object) that 

117 the downloaded resource can be written to. 

118 start (int): The first byte in a range to be downloaded. If not 

119 provided, but ``end`` is provided, will download from the 

120 beginning to ``end`` of the media. 

121 end (int): The last byte in a range to be downloaded. If not 

122 provided, but ``start`` is provided, will download from the 

123 ``start`` to the end of the media. 

124 headers (Optional[Mapping[str, str]]): Extra headers that should 

125 be sent with the request, e.g. headers for encrypted data. 

126 checksum Optional([str]): The type of checksum to compute to verify 

127 the integrity of the object. The response headers must contain 

128 a checksum of the requested type. If the headers lack an 

129 appropriate checksum (for instance in the case of transcoded or 

130 ranged downloads where the remote service does not know the 

131 correct checksum) an INFO-level log will be emitted. Supported 

132 values are "md5", "crc32c" and None. 

133 """ 

134 

135 def __init__( 

136 self, media_url, stream=None, start=None, end=None, headers=None, checksum="md5" 

137 ): 

138 super(Download, self).__init__( 

139 media_url, stream=stream, start=start, end=end, headers=headers 

140 ) 

141 self.checksum = checksum 

142 self._bytes_downloaded = 0 

143 self._expected_checksum = None 

144 self._checksum_object = None 

145 self._object_generation = None 

146 

147 def _prepare_request(self): 

148 """Prepare the contents of an HTTP request. 

149 

150 This is everything that must be done before a request that doesn't 

151 require network I/O (or other I/O). This is based on the `sans-I/O`_ 

152 philosophy. 

153 

154 Returns: 

155 Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple 

156 

157 * HTTP verb for the request (always GET) 

158 * the URL for the request 

159 * the body of the request (always :data:`None`) 

160 * headers for the request 

161 

162 Raises: 

163 ValueError: If the current :class:`Download` has already 

164 finished. 

165 

166 .. _sans-I/O: https://sans-io.readthedocs.io/ 

167 """ 

168 if self.finished: 

169 raise ValueError("A download can only be used once.") 

170 

171 add_bytes_range(self.start, self.end, self._headers) 

172 return _GET, self.media_url, None, self._headers 

173 

174 def _process_response(self, response): 

175 """Process the response from an HTTP request. 

176 

177 This is everything that must be done after a request that doesn't 

178 require network I/O (or other I/O). This is based on the `sans-I/O`_ 

179 philosophy. 

180 

181 Args: 

182 response (object): The HTTP response object. 

183 

184 .. _sans-I/O: https://sans-io.readthedocs.io/ 

185 """ 

186 # Tombstone the current Download so it cannot be used again. 

187 self._finished = True 

188 _helpers.require_status_code( 

189 response, _ACCEPTABLE_STATUS_CODES, self._get_status_code 

190 ) 

191 

192 def consume(self, transport, timeout=None): 

193 """Consume the resource to be downloaded. 

194 

195 If a ``stream`` is attached to this download, then the downloaded 

196 resource will be written to the stream. 

197 

198 Args: 

199 transport (object): An object which can make authenticated 

200 requests. 

201 timeout (Optional[Union[float, Tuple[float, float]]]): 

202 The number of seconds to wait for the server response. 

203 Depending on the retry strategy, a request may be repeated 

204 several times using the same timeout each time. 

205 

206 Can also be passed as a tuple (connect_timeout, read_timeout). 

207 See :meth:`requests.Session.request` documentation for details. 

208 

209 Raises: 

210 NotImplementedError: Always, since virtual. 

211 """ 

212 raise NotImplementedError("This implementation is virtual.") 

213 

214 

215class ChunkedDownload(DownloadBase): 

216 """Download a resource in chunks from a Google API. 

217 

218 Args: 

219 media_url (str): The URL containing the media to be downloaded. 

220 chunk_size (int): The number of bytes to be retrieved in each 

221 request. 

222 stream (IO[bytes]): A write-able stream (i.e. file-like object) that 

223 will be used to concatenate chunks of the resource as they are 

224 downloaded. 

225 start (int): The first byte in a range to be downloaded. If not 

226 provided, defaults to ``0``. 

227 end (int): The last byte in a range to be downloaded. If not 

228 provided, will download to the end of the media. 

229 headers (Optional[Mapping[str, str]]): Extra headers that should 

230 be sent with each request, e.g. headers for data encryption 

231 key headers. 

232 

233 Attributes: 

234 media_url (str): The URL containing the media to be downloaded. 

235 start (Optional[int]): The first byte in a range to be downloaded. 

236 end (Optional[int]): The last byte in a range to be downloaded. 

237 chunk_size (int): The number of bytes to be retrieved in each request. 

238 

239 Raises: 

240 ValueError: If ``start`` is negative. 

241 """ 

242 

243 def __init__(self, media_url, chunk_size, stream, start=0, end=None, headers=None): 

244 if start < 0: 

245 raise ValueError( 

246 "On a chunked download the starting " "value cannot be negative." 

247 ) 

248 super(ChunkedDownload, self).__init__( 

249 media_url, stream=stream, start=start, end=end, headers=headers 

250 ) 

251 self.chunk_size = chunk_size 

252 self._bytes_downloaded = 0 

253 self._total_bytes = None 

254 self._invalid = False 

255 

256 @property 

257 def bytes_downloaded(self): 

258 """int: Number of bytes that have been downloaded.""" 

259 return self._bytes_downloaded 

260 

261 @property 

262 def total_bytes(self): 

263 """Optional[int]: The total number of bytes to be downloaded.""" 

264 return self._total_bytes 

265 

266 @property 

267 def invalid(self): 

268 """bool: Indicates if the download is in an invalid state. 

269 

270 This will occur if a call to :meth:`consume_next_chunk` fails. 

271 """ 

272 return self._invalid 

273 

274 def _get_byte_range(self): 

275 """Determines the byte range for the next request. 

276 

277 Returns: 

278 Tuple[int, int]: The pair of begin and end byte for the next 

279 chunked request. 

280 """ 

281 curr_start = self.start + self.bytes_downloaded 

282 curr_end = curr_start + self.chunk_size - 1 

283 # Make sure ``curr_end`` does not exceed ``end``. 

284 if self.end is not None: 

285 curr_end = min(curr_end, self.end) 

286 # Make sure ``curr_end`` does not exceed ``total_bytes - 1``. 

287 if self.total_bytes is not None: 

288 curr_end = min(curr_end, self.total_bytes - 1) 

289 return curr_start, curr_end 

290 

291 def _prepare_request(self): 

292 """Prepare the contents of an HTTP request. 

293 

294 This is everything that must be done before a request that doesn't 

295 require network I/O (or other I/O). This is based on the `sans-I/O`_ 

296 philosophy. 

297 

298 .. note: 

299 

300 This method will be used multiple times, so ``headers`` will 

301 be mutated in between requests. However, we don't make a copy 

302 since the same keys are being updated. 

303 

304 Returns: 

305 Tuple[str, str, NoneType, Mapping[str, str]]: The quadruple 

306 

307 * HTTP verb for the request (always GET) 

308 * the URL for the request 

309 * the body of the request (always :data:`None`) 

310 * headers for the request 

311 

312 Raises: 

313 ValueError: If the current download has finished. 

314 ValueError: If the current download is invalid. 

315 

316 .. _sans-I/O: https://sans-io.readthedocs.io/ 

317 """ 

318 if self.finished: 

319 raise ValueError("Download has finished.") 

320 if self.invalid: 

321 raise ValueError("Download is invalid and cannot be re-used.") 

322 

323 curr_start, curr_end = self._get_byte_range() 

324 add_bytes_range(curr_start, curr_end, self._headers) 

325 return _GET, self.media_url, None, self._headers 

326 

327 def _make_invalid(self): 

328 """Simple setter for ``invalid``. 

329 

330 This is intended to be passed along as a callback to helpers that 

331 raise an exception so they can mark this instance as invalid before 

332 raising. 

333 """ 

334 self._invalid = True 

335 

336 def _process_response(self, response): 

337 """Process the response from an HTTP request. 

338 

339 This is everything that must be done after a request that doesn't 

340 require network I/O. This is based on the `sans-I/O`_ philosophy. 

341 

342 For the time being, this **does require** some form of I/O to write 

343 a chunk to ``stream``. However, this will (almost) certainly not be 

344 network I/O. 

345 

346 Updates the current state after consuming a chunk. First, 

347 increments ``bytes_downloaded`` by the number of bytes in the 

348 ``content-length`` header. 

349 

350 If ``total_bytes`` is already set, this assumes (but does not check) 

351 that we already have the correct value and doesn't bother to check 

352 that it agrees with the headers. 

353 

354 We expect the **total** length to be in the ``content-range`` header, 

355 but this header is only present on requests which sent the ``range`` 

356 header. This response header should be of the form 

357 ``bytes {start}-{end}/{total}`` and ``{end} - {start} + 1`` 

358 should be the same as the ``Content-Length``. 

359 

360 Args: 

361 response (object): The HTTP response object (need headers). 

362 

363 Raises: 

364 ~google.resumable_media.common.InvalidResponse: If the number 

365 of bytes in the body doesn't match the content length header. 

366 

367 .. _sans-I/O: https://sans-io.readthedocs.io/ 

368 """ 

369 # Verify the response before updating the current instance. 

370 if _check_for_zero_content_range( 

371 response, self._get_status_code, self._get_headers 

372 ): 

373 self._finished = True 

374 return 

375 

376 _helpers.require_status_code( 

377 response, 

378 _ACCEPTABLE_STATUS_CODES, 

379 self._get_status_code, 

380 callback=self._make_invalid, 

381 ) 

382 headers = self._get_headers(response) 

383 response_body = self._get_body(response) 

384 

385 start_byte, end_byte, total_bytes = get_range_info( 

386 response, self._get_headers, callback=self._make_invalid 

387 ) 

388 

389 transfer_encoding = headers.get("transfer-encoding") 

390 

391 if transfer_encoding is None: 

392 content_length = _helpers.header_required( 

393 response, 

394 "content-length", 

395 self._get_headers, 

396 callback=self._make_invalid, 

397 ) 

398 num_bytes = int(content_length) 

399 if len(response_body) != num_bytes: 

400 self._make_invalid() 

401 raise common.InvalidResponse( 

402 response, 

403 "Response is different size than content-length", 

404 "Expected", 

405 num_bytes, 

406 "Received", 

407 len(response_body), 

408 ) 

409 else: 

410 # 'content-length' header not allowed with chunked encoding. 

411 num_bytes = end_byte - start_byte + 1 

412 

413 # First update ``bytes_downloaded``. 

414 self._bytes_downloaded += num_bytes 

415 # If the end byte is past ``end`` or ``total_bytes - 1`` we are done. 

416 if self.end is not None and end_byte >= self.end: 

417 self._finished = True 

418 elif end_byte >= total_bytes - 1: 

419 self._finished = True 

420 # NOTE: We only use ``total_bytes`` if not already known. 

421 if self.total_bytes is None: 

422 self._total_bytes = total_bytes 

423 # Write the response body to the stream. 

424 self._stream.write(response_body) 

425 

426 def consume_next_chunk(self, transport, timeout=None): 

427 """Consume the next chunk of the resource to be downloaded. 

428 

429 Args: 

430 transport (object): An object which can make authenticated 

431 requests. 

432 timeout (Optional[Union[float, Tuple[float, float]]]): 

433 The number of seconds to wait for the server response. 

434 Depending on the retry strategy, a request may be repeated 

435 several times using the same timeout each time. 

436 

437 Can also be passed as a tuple (connect_timeout, read_timeout). 

438 See :meth:`requests.Session.request` documentation for details. 

439 

440 Raises: 

441 NotImplementedError: Always, since virtual. 

442 """ 

443 raise NotImplementedError("This implementation is virtual.") 

444 

445 

446def add_bytes_range(start, end, headers): 

447 """Add a bytes range to a header dictionary. 

448 

449 Some possible inputs and the corresponding bytes ranges:: 

450 

451 >>> headers = {} 

452 >>> add_bytes_range(None, None, headers) 

453 >>> headers 

454 {} 

455 >>> add_bytes_range(500, 999, headers) 

456 >>> headers['range'] 

457 'bytes=500-999' 

458 >>> add_bytes_range(None, 499, headers) 

459 >>> headers['range'] 

460 'bytes=0-499' 

461 >>> add_bytes_range(-500, None, headers) 

462 >>> headers['range'] 

463 'bytes=-500' 

464 >>> add_bytes_range(9500, None, headers) 

465 >>> headers['range'] 

466 'bytes=9500-' 

467 

468 Args: 

469 start (Optional[int]): The first byte in a range. Can be zero, 

470 positive, negative or :data:`None`. 

471 end (Optional[int]): The last byte in a range. Assumed to be 

472 positive. 

473 headers (Mapping[str, str]): A headers mapping which can have the 

474 bytes range added if at least one of ``start`` or ``end`` 

475 is not :data:`None`. 

476 """ 

477 if start is None: 

478 if end is None: 

479 # No range to add. 

480 return 

481 else: 

482 # NOTE: This assumes ``end`` is non-negative. 

483 bytes_range = "0-{:d}".format(end) 

484 else: 

485 if end is None: 

486 if start < 0: 

487 bytes_range = "{:d}".format(start) 

488 else: 

489 bytes_range = "{:d}-".format(start) 

490 else: 

491 # NOTE: This is invalid if ``start < 0``. 

492 bytes_range = "{:d}-{:d}".format(start, end) 

493 

494 headers[_helpers.RANGE_HEADER] = "bytes=" + bytes_range 

495 

496 

497def get_range_info(response, get_headers, callback=_helpers.do_nothing): 

498 """Get the start, end and total bytes from a content range header. 

499 

500 Args: 

501 response (object): An HTTP response object. 

502 get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers 

503 from an HTTP response. 

504 callback (Optional[Callable]): A callback that takes no arguments, 

505 to be executed when an exception is being raised. 

506 

507 Returns: 

508 Tuple[int, int, int]: The start byte, end byte and total bytes. 

509 

510 Raises: 

511 ~google.resumable_media.common.InvalidResponse: If the 

512 ``Content-Range`` header is not of the form 

513 ``bytes {start}-{end}/{total}``. 

514 """ 

515 content_range = _helpers.header_required( 

516 response, _helpers.CONTENT_RANGE_HEADER, get_headers, callback=callback 

517 ) 

518 match = _CONTENT_RANGE_RE.match(content_range) 

519 if match is None: 

520 callback() 

521 raise common.InvalidResponse( 

522 response, 

523 "Unexpected content-range header", 

524 content_range, 

525 'Expected to be of the form "bytes {start}-{end}/{total}"', 

526 ) 

527 

528 return ( 

529 int(match.group("start_byte")), 

530 int(match.group("end_byte")), 

531 int(match.group("total_bytes")), 

532 ) 

533 

534 

535def _check_for_zero_content_range(response, get_status_code, get_headers): 

536 """Validate if response status code is 416 and content range is zero. 

537 

538 This is the special case for handling zero bytes files. 

539 

540 Args: 

541 response (object): An HTTP response object. 

542 get_status_code (Callable[Any, int]): Helper to get a status code 

543 from a response. 

544 get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers 

545 from an HTTP response. 

546 

547 Returns: 

548 bool: True if content range total bytes is zero, false otherwise. 

549 """ 

550 if get_status_code(response) == http.client.REQUESTED_RANGE_NOT_SATISFIABLE: 

551 content_range = _helpers.header_required( 

552 response, 

553 _helpers.CONTENT_RANGE_HEADER, 

554 get_headers, 

555 callback=_helpers.do_nothing, 

556 ) 

557 if content_range == _ZERO_CONTENT_RANGE_HEADER: 

558 return True 

559 return False