Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/resumable_media/_helpers.py: 33%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

137 statements  

1# Copyright 2017 Google Inc. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Shared utilities used by both downloads and uploads.""" 

16 

17from __future__ import absolute_import 

18 

19import base64 

20import hashlib 

21import logging 

22import random 

23import warnings 

24 

25from urllib.parse import parse_qs 

26from urllib.parse import urlencode 

27from urllib.parse import urlsplit 

28from urllib.parse import urlunsplit 

29 

30from google.resumable_media import common 

31 

32 

33RANGE_HEADER = "range" 

34CONTENT_RANGE_HEADER = "content-range" 

35CONTENT_ENCODING_HEADER = "content-encoding" 

36 

37_SLOW_CRC32C_WARNING = ( 

38 "Currently using crcmod in pure python form. This is a slow " 

39 "implementation. Python 3 has a faster implementation, `google-crc32c`, " 

40 "which will be used if it is installed." 

41) 

42_GENERATION_HEADER = "x-goog-generation" 

43_HASH_HEADER = "x-goog-hash" 

44_STORED_CONTENT_ENCODING_HEADER = "x-goog-stored-content-encoding" 

45 

46_MISSING_CHECKSUM = """\ 

47No {checksum_type} checksum was returned from the service while downloading {} 

48(which happens for composite objects), so client-side content integrity 

49checking is not being performed.""" 

50_LOGGER = logging.getLogger(__name__) 

51 

52 

53def do_nothing(): 

54 """Simple default callback.""" 

55 

56 

57def header_required(response, name, get_headers, callback=do_nothing): 

58 """Checks that a specific header is in a headers dictionary. 

59 

60 Args: 

61 response (object): An HTTP response object, expected to have a 

62 ``headers`` attribute that is a ``Mapping[str, str]``. 

63 name (str): The name of a required header. 

64 get_headers (Callable[Any, Mapping[str, str]]): Helper to get headers 

65 from an HTTP response. 

66 callback (Optional[Callable]): A callback that takes no arguments, 

67 to be executed when an exception is being raised. 

68 

69 Returns: 

70 str: The desired header. 

71 

72 Raises: 

73 ~google.resumable_media.common.InvalidResponse: If the header 

74 is missing. 

75 """ 

76 headers = get_headers(response) 

77 if name not in headers: 

78 callback() 

79 raise common.InvalidResponse( 

80 response, "Response headers must contain header", name 

81 ) 

82 

83 return headers[name] 

84 

85 

86def require_status_code(response, status_codes, get_status_code, callback=do_nothing): 

87 """Require a response has a status code among a list. 

88 

89 Args: 

90 response (object): The HTTP response object. 

91 status_codes (tuple): The acceptable status codes. 

92 get_status_code (Callable[Any, int]): Helper to get a status code 

93 from a response. 

94 callback (Optional[Callable]): A callback that takes no arguments, 

95 to be executed when an exception is being raised. 

96 

97 Returns: 

98 int: The status code. 

99 

100 Raises: 

101 ~google.resumable_media.common.InvalidResponse: If the status code 

102 is not one of the values in ``status_codes``. 

103 """ 

104 status_code = get_status_code(response) 

105 if status_code not in status_codes: 

106 if status_code not in common.RETRYABLE: 

107 callback() 

108 raise common.InvalidResponse( 

109 response, 

110 "Request failed with status code", 

111 status_code, 

112 "Expected one of", 

113 *status_codes 

114 ) 

115 return status_code 

116 

117 

118def calculate_retry_wait(base_wait, max_sleep, multiplier=2.0): 

119 """Calculate the amount of time to wait before a retry attempt. 

120 

121 Wait time grows exponentially with the number of attempts, until 

122 ``max_sleep``. 

123 

124 A random amount of jitter (between 0 and 1 seconds) is added to spread out 

125 retry attempts from different clients. 

126 

127 Args: 

128 base_wait (float): The "base" wait time (i.e. without any jitter) 

129 that will be multiplied until it reaches the maximum sleep. 

130 max_sleep (float): Maximum value that a sleep time is allowed to be. 

131 multiplier (float): Multiplier to apply to the base wait. 

132 

133 Returns: 

134 Tuple[float, float]: The new base wait time as well as the wait time 

135 to be applied (with a random amount of jitter between 0 and 1 seconds 

136 added). 

137 """ 

138 new_base_wait = multiplier * base_wait 

139 if new_base_wait > max_sleep: 

140 new_base_wait = max_sleep 

141 

142 jitter_ms = random.randint(0, 1000) 

143 return new_base_wait, new_base_wait + 0.001 * jitter_ms 

144 

145 

146def _get_crc32c_object(): 

147 """Get crc32c object 

148 Attempt to use the Google-CRC32c package. If it isn't available, try 

149 to use CRCMod. CRCMod might be using a 'slow' varietal. If so, warn... 

150 """ 

151 try: 

152 import google_crc32c # type: ignore 

153 

154 crc_obj = google_crc32c.Checksum() 

155 except ImportError: 

156 try: 

157 import crcmod # type: ignore 

158 

159 crc_obj = crcmod.predefined.Crc("crc-32c") 

160 _is_fast_crcmod() 

161 

162 except ImportError: 

163 raise ImportError("Failed to import either `google-crc32c` or `crcmod`") 

164 

165 return crc_obj 

166 

167 

168def _is_fast_crcmod(): 

169 # Determine if this is using the slow form of crcmod. 

170 nested_crcmod = __import__( 

171 "crcmod.crcmod", 

172 globals(), 

173 locals(), 

174 ["_usingExtension"], 

175 0, 

176 ) 

177 fast_crc = getattr(nested_crcmod, "_usingExtension", False) 

178 if not fast_crc: 

179 warnings.warn(_SLOW_CRC32C_WARNING, RuntimeWarning, stacklevel=2) 

180 return fast_crc 

181 

182 

183def _get_metadata_key(checksum_type): 

184 if checksum_type == "md5": 

185 return "md5Hash" 

186 else: 

187 return checksum_type 

188 

189 

190def prepare_checksum_digest(digest_bytestring): 

191 """Convert a checksum object into a digest encoded for an HTTP header. 

192 

193 Args: 

194 bytes: A checksum digest bytestring. 

195 

196 Returns: 

197 str: A base64 string representation of the input. 

198 """ 

199 encoded_digest = base64.b64encode(digest_bytestring) 

200 # NOTE: ``b64encode`` returns ``bytes``, but HTTP headers expect ``str``. 

201 return encoded_digest.decode("utf-8") 

202 

203 

204def _get_expected_checksum(response, get_headers, media_url, checksum_type): 

205 """Get the expected checksum and checksum object for the download response. 

206 

207 Args: 

208 response (~requests.Response): The HTTP response object. 

209 get_headers (callable: response->dict): returns response headers. 

210 media_url (str): The URL containing the media to be downloaded. 

211 checksum_type Optional(str): The checksum type to read from the headers, 

212 exactly as it will appear in the headers (case-sensitive). Must be 

213 "md5", "crc32c" or None. 

214 

215 Returns: 

216 Tuple (Optional[str], object): The expected checksum of the response, 

217 if it can be detected from the ``X-Goog-Hash`` header, and the 

218 appropriate checksum object for the expected checksum. 

219 """ 

220 if checksum_type not in ["md5", "crc32c", None]: 

221 raise ValueError("checksum must be ``'md5'``, ``'crc32c'`` or ``None``") 

222 elif checksum_type in ["md5", "crc32c"]: 

223 headers = get_headers(response) 

224 expected_checksum = _parse_checksum_header( 

225 headers.get(_HASH_HEADER), response, checksum_label=checksum_type 

226 ) 

227 

228 if expected_checksum is None: 

229 msg = _MISSING_CHECKSUM.format( 

230 media_url, checksum_type=checksum_type.upper() 

231 ) 

232 _LOGGER.info(msg) 

233 checksum_object = _DoNothingHash() 

234 else: 

235 if checksum_type == "md5": 

236 checksum_object = hashlib.md5() 

237 else: 

238 checksum_object = _get_crc32c_object() 

239 else: 

240 expected_checksum = None 

241 checksum_object = _DoNothingHash() 

242 

243 return (expected_checksum, checksum_object) 

244 

245 

246def _get_uploaded_checksum_from_headers(response, get_headers, checksum_type): 

247 """Get the computed checksum and checksum object from the response headers. 

248 

249 Args: 

250 response (~requests.Response): The HTTP response object. 

251 get_headers (callable: response->dict): returns response headers. 

252 checksum_type Optional(str): The checksum type to read from the headers, 

253 exactly as it will appear in the headers (case-sensitive). Must be 

254 "md5", "crc32c" or None. 

255 

256 Returns: 

257 Tuple (Optional[str], object): The checksum of the response, 

258 if it can be detected from the ``X-Goog-Hash`` header, and the 

259 appropriate checksum object for the expected checksum. 

260 """ 

261 if checksum_type not in ["md5", "crc32c", None]: 

262 raise ValueError("checksum must be ``'md5'``, ``'crc32c'`` or ``None``") 

263 elif checksum_type in ["md5", "crc32c"]: 

264 headers = get_headers(response) 

265 remote_checksum = _parse_checksum_header( 

266 headers.get(_HASH_HEADER), response, checksum_label=checksum_type 

267 ) 

268 else: 

269 remote_checksum = None 

270 

271 return remote_checksum 

272 

273 

274def _parse_checksum_header(header_value, response, checksum_label): 

275 """Parses the checksum header from an ``X-Goog-Hash`` value. 

276 

277 .. _header reference: https://cloud.google.com/storage/docs/\ 

278 xml-api/reference-headers#xgooghash 

279 

280 Expects ``header_value`` (if not :data:`None`) to be in one of the three 

281 following formats: 

282 

283 * ``crc32c=n03x6A==`` 

284 * ``md5=Ojk9c3dhfxgoKVVHYwFbHQ==`` 

285 * ``crc32c=n03x6A==,md5=Ojk9c3dhfxgoKVVHYwFbHQ==`` 

286 

287 See the `header reference`_ for more information. 

288 

289 Args: 

290 header_value (Optional[str]): The ``X-Goog-Hash`` header from 

291 a download response. 

292 response (~requests.Response): The HTTP response object. 

293 checksum_label (str): The label of the header value to read, as in the 

294 examples above. Typically "md5" or "crc32c" 

295 

296 Returns: 

297 Optional[str]: The expected checksum of the response, if it 

298 can be detected from the ``X-Goog-Hash`` header; otherwise, None. 

299 

300 Raises: 

301 ~google.resumable_media.common.InvalidResponse: If there are 

302 multiple checksums of the requested type in ``header_value``. 

303 """ 

304 if header_value is None: 

305 return None 

306 

307 matches = [] 

308 for checksum in header_value.split(","): 

309 name, value = checksum.split("=", 1) 

310 # Official docs say "," is the separator, but real-world responses have encountered ", " 

311 if name.lstrip() == checksum_label: 

312 matches.append(value) 

313 

314 if len(matches) == 0: 

315 return None 

316 elif len(matches) == 1: 

317 return matches[0] 

318 else: 

319 raise common.InvalidResponse( 

320 response, 

321 "X-Goog-Hash header had multiple ``{}`` values.".format(checksum_label), 

322 header_value, 

323 matches, 

324 ) 

325 

326 

327def _get_checksum_object(checksum_type): 

328 """Respond with a checksum object for a supported type, if not None. 

329 

330 Raises ValueError if checksum_type is unsupported. 

331 """ 

332 if checksum_type == "md5": 

333 return hashlib.md5() 

334 elif checksum_type == "crc32c": 

335 return _get_crc32c_object() 

336 elif checksum_type is None: 

337 return None 

338 else: 

339 raise ValueError("checksum must be ``'md5'``, ``'crc32c'`` or ``None``") 

340 

341 

342def _parse_generation_header(response, get_headers): 

343 """Parses the generation header from an ``X-Goog-Generation`` value. 

344 

345 Args: 

346 response (~requests.Response): The HTTP response object. 

347 get_headers (callable: response->dict): returns response headers. 

348 

349 Returns: 

350 Optional[long]: The object generation from the response, if it 

351 can be detected from the ``X-Goog-Generation`` header; otherwise, None. 

352 """ 

353 headers = get_headers(response) 

354 object_generation = headers.get(_GENERATION_HEADER, None) 

355 

356 if object_generation is None: 

357 return None 

358 else: 

359 return int(object_generation) 

360 

361 

362def _get_generation_from_url(media_url): 

363 """Retrieve the object generation query param specified in the media url. 

364 

365 Args: 

366 media_url (str): The URL containing the media to be downloaded. 

367 

368 Returns: 

369 long: The object generation from the media url if exists; otherwise, None. 

370 """ 

371 

372 _, _, _, query, _ = urlsplit(media_url) 

373 query_params = parse_qs(query) 

374 object_generation = query_params.get("generation", None) 

375 

376 if object_generation is None: 

377 return None 

378 else: 

379 return int(object_generation[0]) 

380 

381 

382def add_query_parameters(media_url, query_params): 

383 """Add query parameters to a base url. 

384 

385 Args: 

386 media_url (str): The URL containing the media to be downloaded. 

387 query_params (dict): Names and values of the query parameters to add. 

388 

389 Returns: 

390 str: URL with additional query strings appended. 

391 """ 

392 

393 if len(query_params) == 0: 

394 return media_url 

395 

396 scheme, netloc, path, query, frag = urlsplit(media_url) 

397 params = parse_qs(query) 

398 new_params = {**params, **query_params} 

399 query = urlencode(new_params, doseq=True) 

400 return urlunsplit((scheme, netloc, path, query, frag)) 

401 

402 

403def _is_decompressive_transcoding(response, get_headers): 

404 """Returns True if the object was served decompressed. This happens when the 

405 "x-goog-stored-content-encoding" header is "gzip" and "content-encoding" header 

406 is not "gzip". See more at: https://cloud.google.com/storage/docs/transcoding#transcoding_and_gzip 

407 Args: 

408 response (~requests.Response): The HTTP response object. 

409 get_headers (callable: response->dict): returns response headers. 

410 Returns: 

411 bool: Returns True if decompressive transcoding has occurred; otherwise, False. 

412 """ 

413 headers = get_headers(response) 

414 return ( 

415 headers.get(_STORED_CONTENT_ENCODING_HEADER) == "gzip" 

416 and headers.get(CONTENT_ENCODING_HEADER) != "gzip" 

417 ) 

418 

419 

420class _DoNothingHash(object): 

421 """Do-nothing hash object. 

422 

423 Intended as a stand-in for ``hashlib.md5`` or a crc32c checksum 

424 implementation in cases where it isn't necessary to compute the hash. 

425 """ 

426 

427 def update(self, unused_chunk): 

428 """Do-nothing ``update`` method. 

429 

430 Intended to match the interface of ``hashlib.md5`` and other checksums. 

431 

432 Args: 

433 unused_chunk (bytes): A chunk of data. 

434 """