Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/storage/_helpers.py: 39%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

169 statements  

1# Copyright 2014 Google LLC 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Helper functions for Cloud Storage utility classes. 

16 

17These are *not* part of the API. 

18""" 

19 

20import base64 

21import datetime 

22from hashlib import md5 

23import os 

24import sys 

25from urllib.parse import urlsplit 

26from urllib.parse import urlunsplit 

27from uuid import uuid4 

28 

29from google.auth import environment_vars 

30from google.cloud.storage.constants import _DEFAULT_TIMEOUT 

31from google.cloud.storage.retry import DEFAULT_RETRY 

32from google.cloud.storage.retry import DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED 

33 

34 

35STORAGE_EMULATOR_ENV_VAR = "STORAGE_EMULATOR_HOST" # Despite name, includes scheme. 

36"""Environment variable defining host for Storage emulator.""" 

37 

38_API_ENDPOINT_OVERRIDE_ENV_VAR = "API_ENDPOINT_OVERRIDE" # Includes scheme. 

39"""This is an experimental configuration variable. Use api_endpoint instead.""" 

40 

41_API_VERSION_OVERRIDE_ENV_VAR = "API_VERSION_OVERRIDE" 

42"""This is an experimental configuration variable used for internal testing.""" 

43 

44_DEFAULT_UNIVERSE_DOMAIN = "googleapis.com" 

45 

46_STORAGE_HOST_TEMPLATE = "storage.{universe_domain}" 

47 

48_TRUE_DEFAULT_STORAGE_HOST = _STORAGE_HOST_TEMPLATE.format( 

49 universe_domain=_DEFAULT_UNIVERSE_DOMAIN 

50) 

51 

52_DEFAULT_SCHEME = "https://" 

53 

54_API_VERSION = os.getenv(_API_VERSION_OVERRIDE_ENV_VAR, "v1") 

55"""API version of the default storage host""" 

56 

57# etag match parameters in snake case and equivalent header 

58_ETAG_MATCH_PARAMETERS = ( 

59 ("if_etag_match", "If-Match"), 

60 ("if_etag_not_match", "If-None-Match"), 

61) 

62 

63# generation match parameters in camel and snake cases 

64_GENERATION_MATCH_PARAMETERS = ( 

65 ("if_generation_match", "ifGenerationMatch"), 

66 ("if_generation_not_match", "ifGenerationNotMatch"), 

67 ("if_metageneration_match", "ifMetagenerationMatch"), 

68 ("if_metageneration_not_match", "ifMetagenerationNotMatch"), 

69 ("if_source_generation_match", "ifSourceGenerationMatch"), 

70 ("if_source_generation_not_match", "ifSourceGenerationNotMatch"), 

71 ("if_source_metageneration_match", "ifSourceMetagenerationMatch"), 

72 ("if_source_metageneration_not_match", "ifSourceMetagenerationNotMatch"), 

73) 

74 

75# _NOW() returns the current local date and time. 

76# It is preferred to use timezone-aware datetimes _NOW(_UTC), 

77# which returns the current UTC date and time. 

78_NOW = datetime.datetime.now 

79_UTC = datetime.timezone.utc 

80 

81 

82def _get_storage_emulator_override(): 

83 return os.environ.get(STORAGE_EMULATOR_ENV_VAR, None) 

84 

85 

86def _get_default_storage_base_url(): 

87 return os.getenv( 

88 _API_ENDPOINT_OVERRIDE_ENV_VAR, _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST 

89 ) 

90 

91 

92def _get_api_endpoint_override(): 

93 """This is an experimental configuration variable. Use api_endpoint instead.""" 

94 if _get_default_storage_base_url() != _DEFAULT_SCHEME + _TRUE_DEFAULT_STORAGE_HOST: 

95 return _get_default_storage_base_url() 

96 return None 

97 

98 

99def _virtual_hosted_style_base_url(url, bucket, trailing_slash=False): 

100 """Returns the scheme and netloc sections of the url, with the bucket 

101 prepended to the netloc. 

102 

103 Not intended for use with netlocs which include a username and password. 

104 """ 

105 parsed_url = urlsplit(url) 

106 new_netloc = f"{bucket}.{parsed_url.netloc}" 

107 base_url = urlunsplit( 

108 (parsed_url.scheme, new_netloc, "/" if trailing_slash else "", "", "") 

109 ) 

110 return base_url 

111 

112 

113def _use_client_cert(): 

114 return os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true" 

115 

116 

117def _get_environ_project(): 

118 return os.getenv( 

119 environment_vars.PROJECT, 

120 os.getenv(environment_vars.LEGACY_PROJECT), 

121 ) 

122 

123 

124def _validate_name(name): 

125 """Pre-flight ``Bucket`` name validation. 

126 

127 :type name: str or :data:`NoneType` 

128 :param name: Proposed bucket name. 

129 

130 :rtype: str or :data:`NoneType` 

131 :returns: ``name`` if valid. 

132 """ 

133 if name is None: 

134 return 

135 

136 # The first and last characters must be alphanumeric. 

137 if not all([name[0].isalnum(), name[-1].isalnum()]): 

138 raise ValueError("Bucket names must start and end with a number or letter.") 

139 return name 

140 

141 

142class _PropertyMixin(object): 

143 """Abstract mixin for cloud storage classes with associated properties. 

144 

145 Non-abstract subclasses should implement: 

146 - path 

147 - client 

148 - user_project 

149 

150 :type name: str 

151 :param name: The name of the object. Bucket names must start and end with a 

152 number or letter. 

153 """ 

154 

155 def __init__(self, name=None): 

156 self.name = name 

157 self._properties = {} 

158 self._changes = set() 

159 

160 @property 

161 def path(self): 

162 """Abstract getter for the object path.""" 

163 raise NotImplementedError 

164 

165 @property 

166 def client(self): 

167 """Abstract getter for the object client.""" 

168 raise NotImplementedError 

169 

170 @property 

171 def user_project(self): 

172 """Abstract getter for the object user_project.""" 

173 raise NotImplementedError 

174 

175 def _require_client(self, client): 

176 """Check client or verify over-ride. 

177 

178 :type client: :class:`~google.cloud.storage.client.Client` or 

179 ``NoneType`` 

180 :param client: the client to use. If not passed, falls back to the 

181 ``client`` stored on the current object. 

182 

183 :rtype: :class:`google.cloud.storage.client.Client` 

184 :returns: The client passed in or the currently bound client. 

185 """ 

186 if client is None: 

187 client = self.client 

188 return client 

189 

190 def _encryption_headers(self): 

191 """Return any encryption headers needed to fetch the object. 

192 

193 .. note:: 

194 Defined here because :meth:`reload` calls it, but this method is 

195 really only relevant for :class:`~google.cloud.storage.blob.Blob`. 

196 

197 :rtype: dict 

198 :returns: a mapping of encryption-related headers. 

199 """ 

200 return {} 

201 

202 @property 

203 def _query_params(self): 

204 """Default query parameters.""" 

205 params = {} 

206 if self.user_project is not None: 

207 params["userProject"] = self.user_project 

208 return params 

209 

210 def reload( 

211 self, 

212 client=None, 

213 projection="noAcl", 

214 if_etag_match=None, 

215 if_etag_not_match=None, 

216 if_generation_match=None, 

217 if_generation_not_match=None, 

218 if_metageneration_match=None, 

219 if_metageneration_not_match=None, 

220 timeout=_DEFAULT_TIMEOUT, 

221 retry=DEFAULT_RETRY, 

222 soft_deleted=None, 

223 ): 

224 """Reload properties from Cloud Storage. 

225 

226 If :attr:`user_project` is set, bills the API request to that project. 

227 

228 :type client: :class:`~google.cloud.storage.client.Client` or 

229 ``NoneType`` 

230 :param client: the client to use. If not passed, falls back to the 

231 ``client`` stored on the current object. 

232 

233 :type projection: str 

234 :param projection: (Optional) If used, must be 'full' or 'noAcl'. 

235 Defaults to ``'noAcl'``. Specifies the set of 

236 properties to return. 

237 

238 :type if_etag_match: Union[str, Set[str]] 

239 :param if_etag_match: (Optional) See :ref:`using-if-etag-match` 

240 

241 :type if_etag_not_match: Union[str, Set[str]]) 

242 :param if_etag_not_match: (Optional) See :ref:`using-if-etag-not-match` 

243 

244 :type if_generation_match: long 

245 :param if_generation_match: 

246 (Optional) See :ref:`using-if-generation-match` 

247 

248 :type if_generation_not_match: long 

249 :param if_generation_not_match: 

250 (Optional) See :ref:`using-if-generation-not-match` 

251 

252 :type if_metageneration_match: long 

253 :param if_metageneration_match: 

254 (Optional) See :ref:`using-if-metageneration-match` 

255 

256 :type if_metageneration_not_match: long 

257 :param if_metageneration_not_match: 

258 (Optional) See :ref:`using-if-metageneration-not-match` 

259 

260 :type timeout: float or tuple 

261 :param timeout: 

262 (Optional) The amount of time, in seconds, to wait 

263 for the server response. See: :ref:`configuring_timeouts` 

264 

265 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy 

266 :param retry: 

267 (Optional) How to retry the RPC. See: :ref:`configuring_retries` 

268 

269 :type soft_deleted: bool 

270 :param soft_deleted: 

271 (Optional) If True, looks for a soft-deleted object. Will only return 

272 the object metadata if the object exists and is in a soft-deleted state. 

273 :attr:`generation` is required to be set on the blob if ``soft_deleted`` is set to True. 

274 See: https://cloud.google.com/storage/docs/soft-delete 

275 """ 

276 client = self._require_client(client) 

277 query_params = self._query_params 

278 # Pass only '?projection=noAcl' here because 'acl' and related 

279 # are handled via custom endpoints. 

280 query_params["projection"] = projection 

281 _add_generation_match_parameters( 

282 query_params, 

283 if_generation_match=if_generation_match, 

284 if_generation_not_match=if_generation_not_match, 

285 if_metageneration_match=if_metageneration_match, 

286 if_metageneration_not_match=if_metageneration_not_match, 

287 ) 

288 if soft_deleted is not None: 

289 query_params["softDeleted"] = soft_deleted 

290 # Soft delete reload requires a generation, even for targets 

291 # that don't include them in default query params (buckets). 

292 query_params["generation"] = self.generation 

293 headers = self._encryption_headers() 

294 _add_etag_match_headers( 

295 headers, if_etag_match=if_etag_match, if_etag_not_match=if_etag_not_match 

296 ) 

297 api_response = client._get_resource( 

298 self.path, 

299 query_params=query_params, 

300 headers=headers, 

301 timeout=timeout, 

302 retry=retry, 

303 _target_object=self, 

304 ) 

305 self._set_properties(api_response) 

306 

307 def _patch_property(self, name, value): 

308 """Update field of this object's properties. 

309 

310 This method will only update the field provided and will not 

311 touch the other fields. 

312 

313 It **will not** reload the properties from the server. The behavior is 

314 local only and syncing occurs via :meth:`patch`. 

315 

316 :type name: str 

317 :param name: The field name to update. 

318 

319 :type value: object 

320 :param value: The value being updated. 

321 """ 

322 self._changes.add(name) 

323 self._properties[name] = value 

324 

325 def _set_properties(self, value): 

326 """Set the properties for the current object. 

327 

328 :type value: dict or :class:`google.cloud.storage.batch._FutureDict` 

329 :param value: The properties to be set. 

330 """ 

331 self._properties = value 

332 # If the values are reset, the changes must as well. 

333 self._changes = set() 

334 

335 def patch( 

336 self, 

337 client=None, 

338 if_generation_match=None, 

339 if_generation_not_match=None, 

340 if_metageneration_match=None, 

341 if_metageneration_not_match=None, 

342 timeout=_DEFAULT_TIMEOUT, 

343 retry=DEFAULT_RETRY, 

344 override_unlocked_retention=False, 

345 ): 

346 """Sends all changed properties in a PATCH request. 

347 

348 Updates the ``_properties`` with the response from the backend. 

349 

350 If :attr:`user_project` is set, bills the API request to that project. 

351 

352 :type client: :class:`~google.cloud.storage.client.Client` or 

353 ``NoneType`` 

354 :param client: the client to use. If not passed, falls back to the 

355 ``client`` stored on the current object. 

356 

357 :type if_generation_match: long 

358 :param if_generation_match: 

359 (Optional) See :ref:`using-if-generation-match` 

360 

361 :type if_generation_not_match: long 

362 :param if_generation_not_match: 

363 (Optional) See :ref:`using-if-generation-not-match` 

364 

365 :type if_metageneration_match: long 

366 :param if_metageneration_match: 

367 (Optional) See :ref:`using-if-metageneration-match` 

368 

369 :type if_metageneration_not_match: long 

370 :param if_metageneration_not_match: 

371 (Optional) See :ref:`using-if-metageneration-not-match` 

372 

373 :type timeout: float or tuple 

374 :param timeout: 

375 (Optional) The amount of time, in seconds, to wait 

376 for the server response. See: :ref:`configuring_timeouts` 

377 

378 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy 

379 :param retry: 

380 (Optional) How to retry the RPC. See: :ref:`configuring_retries` 

381 

382 :type override_unlocked_retention: bool 

383 :param override_unlocked_retention: 

384 (Optional) override_unlocked_retention must be set to True if the operation includes 

385 a retention property that changes the mode from Unlocked to Locked, reduces the 

386 retainUntilTime, or removes the retention configuration from the object. See: 

387 https://cloud.google.com/storage/docs/json_api/v1/objects/patch 

388 """ 

389 client = self._require_client(client) 

390 query_params = self._query_params 

391 # Pass '?projection=full' here because 'PATCH' documented not 

392 # to work properly w/ 'noAcl'. 

393 query_params["projection"] = "full" 

394 if override_unlocked_retention: 

395 query_params["overrideUnlockedRetention"] = override_unlocked_retention 

396 _add_generation_match_parameters( 

397 query_params, 

398 if_generation_match=if_generation_match, 

399 if_generation_not_match=if_generation_not_match, 

400 if_metageneration_match=if_metageneration_match, 

401 if_metageneration_not_match=if_metageneration_not_match, 

402 ) 

403 update_properties = {key: self._properties[key] for key in self._changes} 

404 

405 # Make the API call. 

406 api_response = client._patch_resource( 

407 self.path, 

408 update_properties, 

409 query_params=query_params, 

410 _target_object=self, 

411 timeout=timeout, 

412 retry=retry, 

413 ) 

414 self._set_properties(api_response) 

415 

416 def update( 

417 self, 

418 client=None, 

419 if_generation_match=None, 

420 if_generation_not_match=None, 

421 if_metageneration_match=None, 

422 if_metageneration_not_match=None, 

423 timeout=_DEFAULT_TIMEOUT, 

424 retry=DEFAULT_RETRY_IF_METAGENERATION_SPECIFIED, 

425 override_unlocked_retention=False, 

426 ): 

427 """Sends all properties in a PUT request. 

428 

429 Updates the ``_properties`` with the response from the backend. 

430 

431 If :attr:`user_project` is set, bills the API request to that project. 

432 

433 :type client: :class:`~google.cloud.storage.client.Client` or 

434 ``NoneType`` 

435 :param client: the client to use. If not passed, falls back to the 

436 ``client`` stored on the current object. 

437 

438 :type if_generation_match: long 

439 :param if_generation_match: 

440 (Optional) See :ref:`using-if-generation-match` 

441 

442 :type if_generation_not_match: long 

443 :param if_generation_not_match: 

444 (Optional) See :ref:`using-if-generation-not-match` 

445 

446 :type if_metageneration_match: long 

447 :param if_metageneration_match: 

448 (Optional) See :ref:`using-if-metageneration-match` 

449 

450 :type if_metageneration_not_match: long 

451 :param if_metageneration_not_match: 

452 (Optional) See :ref:`using-if-metageneration-not-match` 

453 

454 :type timeout: float or tuple 

455 :param timeout: 

456 (Optional) The amount of time, in seconds, to wait 

457 for the server response. See: :ref:`configuring_timeouts` 

458 

459 :type retry: google.api_core.retry.Retry or google.cloud.storage.retry.ConditionalRetryPolicy 

460 :param retry: 

461 (Optional) How to retry the RPC. See: :ref:`configuring_retries` 

462 

463 :type override_unlocked_retention: bool 

464 :param override_unlocked_retention: 

465 (Optional) override_unlocked_retention must be set to True if the operation includes 

466 a retention property that changes the mode from Unlocked to Locked, reduces the 

467 retainUntilTime, or removes the retention configuration from the object. See: 

468 https://cloud.google.com/storage/docs/json_api/v1/objects/patch 

469 """ 

470 client = self._require_client(client) 

471 

472 query_params = self._query_params 

473 query_params["projection"] = "full" 

474 if override_unlocked_retention: 

475 query_params["overrideUnlockedRetention"] = override_unlocked_retention 

476 _add_generation_match_parameters( 

477 query_params, 

478 if_generation_match=if_generation_match, 

479 if_generation_not_match=if_generation_not_match, 

480 if_metageneration_match=if_metageneration_match, 

481 if_metageneration_not_match=if_metageneration_not_match, 

482 ) 

483 

484 api_response = client._put_resource( 

485 self.path, 

486 self._properties, 

487 query_params=query_params, 

488 timeout=timeout, 

489 retry=retry, 

490 _target_object=self, 

491 ) 

492 self._set_properties(api_response) 

493 

494 

495def _scalar_property(fieldname): 

496 """Create a property descriptor around the :class:`_PropertyMixin` helpers.""" 

497 

498 def _getter(self): 

499 """Scalar property getter.""" 

500 return self._properties.get(fieldname) 

501 

502 def _setter(self, value): 

503 """Scalar property setter.""" 

504 self._patch_property(fieldname, value) 

505 

506 return property(_getter, _setter) 

507 

508 

509def _write_buffer_to_hash(buffer_object, hash_obj, digest_block_size=8192): 

510 """Read blocks from a buffer and update a hash with them. 

511 

512 :type buffer_object: bytes buffer 

513 :param buffer_object: Buffer containing bytes used to update a hash object. 

514 

515 :type hash_obj: object that implements update 

516 :param hash_obj: A hash object (MD5 or CRC32-C). 

517 

518 :type digest_block_size: int 

519 :param digest_block_size: The block size to write to the hash. 

520 Defaults to 8192. 

521 """ 

522 block = buffer_object.read(digest_block_size) 

523 

524 while len(block) > 0: 

525 hash_obj.update(block) 

526 # Update the block for the next iteration. 

527 block = buffer_object.read(digest_block_size) 

528 

529 

530def _base64_md5hash(buffer_object): 

531 """Get MD5 hash of bytes (as base64). 

532 

533 :type buffer_object: bytes buffer 

534 :param buffer_object: Buffer containing bytes used to compute an MD5 

535 hash (as base64). 

536 

537 :rtype: str 

538 :returns: A base64 encoded digest of the MD5 hash. 

539 """ 

540 if sys.version_info >= (3, 9): 

541 hash_obj = md5(usedforsecurity=False) 

542 else: 

543 hash_obj = md5() 

544 _write_buffer_to_hash(buffer_object, hash_obj) 

545 digest_bytes = hash_obj.digest() 

546 return base64.b64encode(digest_bytes) 

547 

548 

549def _add_etag_match_headers(headers, **match_parameters): 

550 """Add generation match parameters into the given parameters list. 

551 

552 :type headers: dict 

553 :param headers: Headers dict. 

554 

555 :type match_parameters: dict 

556 :param match_parameters: if*etag*match parameters to add. 

557 """ 

558 for snakecase_name, header_name in _ETAG_MATCH_PARAMETERS: 

559 value = match_parameters.get(snakecase_name) 

560 

561 if value is not None: 

562 if isinstance(value, str): 

563 value = [value] 

564 headers[header_name] = ", ".join(value) 

565 

566 

567def _add_generation_match_parameters(parameters, **match_parameters): 

568 """Add generation match parameters into the given parameters list. 

569 

570 :type parameters: list or dict 

571 :param parameters: Parameters list or dict. 

572 

573 :type match_parameters: dict 

574 :param match_parameters: if*generation*match parameters to add. 

575 

576 :raises: :exc:`ValueError` if ``parameters`` is not a ``list()`` 

577 or a ``dict()``. 

578 """ 

579 for snakecase_name, camelcase_name in _GENERATION_MATCH_PARAMETERS: 

580 value = match_parameters.get(snakecase_name) 

581 

582 if value is not None: 

583 if isinstance(parameters, list): 

584 parameters.append((camelcase_name, value)) 

585 

586 elif isinstance(parameters, dict): 

587 parameters[camelcase_name] = value 

588 

589 else: 

590 raise ValueError( 

591 "`parameters` argument should be a dict() or a list()." 

592 ) 

593 

594 

595def _raise_if_more_than_one_set(**kwargs): 

596 """Raise ``ValueError`` exception if more than one parameter was set. 

597 

598 :type error: :exc:`ValueError` 

599 :param error: Description of which fields were set 

600 

601 :raises: :class:`~ValueError` containing the fields that were set 

602 """ 

603 if sum(arg is not None for arg in kwargs.values()) > 1: 

604 escaped_keys = [f"'{name}'" for name in kwargs.keys()] 

605 

606 keys_but_last = ", ".join(escaped_keys[:-1]) 

607 last_key = escaped_keys[-1] 

608 

609 msg = f"Pass at most one of {keys_but_last} and {last_key}" 

610 

611 raise ValueError(msg) 

612 

613 

614def _bucket_bound_hostname_url(host, scheme=None): 

615 """Helper to build bucket bound hostname URL. 

616 

617 :type host: str 

618 :param host: Host name. 

619 

620 :type scheme: str 

621 :param scheme: (Optional) Web scheme. If passed, use it 

622 as a scheme in the result URL. 

623 

624 :rtype: str 

625 :returns: A bucket bound hostname URL. 

626 """ 

627 url_parts = urlsplit(host) 

628 if url_parts.scheme and url_parts.netloc: 

629 return host 

630 

631 return f"{scheme}://{host}" 

632 

633 

634def _get_invocation_id(): 

635 return "gccl-invocation-id/" + str(uuid4()) 

636 

637 

638def _get_default_headers( 

639 user_agent, 

640 content_type="application/json; charset=UTF-8", 

641 x_upload_content_type=None, 

642 command=None, 

643): 

644 """Get the headers for a request. 

645 

646 :type user_agent: str 

647 :param user_agent: The user-agent for requests. 

648 

649 :type command: str 

650 :param command: 

651 (Optional) Information about which interface for the operation was 

652 used, to be included in the X-Goog-API-Client header. Please leave 

653 as None unless otherwise directed. 

654 

655 :rtype: dict 

656 :returns: The headers to be used for the request. 

657 """ 

658 x_goog_api_client = f"{user_agent} {_get_invocation_id()}" 

659 

660 if command: 

661 x_goog_api_client += f" gccl-gcs-cmd/{command}" 

662 

663 return { 

664 "Accept": "application/json", 

665 "Accept-Encoding": "gzip, deflate", 

666 "User-Agent": user_agent, 

667 "X-Goog-API-Client": x_goog_api_client, 

668 "content-type": content_type, 

669 "x-upload-content-type": x_upload_content_type or content_type, 

670 }