Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/bigquery/retry.py: 59%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

49 statements  

1# Copyright 2018 Google LLC 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15from google.api_core import exceptions 

16from google.api_core import retry 

17import google.api_core.future.polling 

18from google.auth import exceptions as auth_exceptions # type: ignore 

19import requests.exceptions 

20 

21 

22_RETRYABLE_REASONS = frozenset( 

23 ["rateLimitExceeded", "backendError", "internalError", "badGateway"] 

24) 

25 

26_UNSTRUCTURED_RETRYABLE_TYPES = ( 

27 ConnectionError, 

28 exceptions.TooManyRequests, 

29 exceptions.InternalServerError, 

30 exceptions.BadGateway, 

31 exceptions.ServiceUnavailable, 

32 requests.exceptions.ChunkedEncodingError, 

33 requests.exceptions.ConnectionError, 

34 requests.exceptions.Timeout, 

35 auth_exceptions.TransportError, 

36) 

37 

38_DEFAULT_RETRY_DEADLINE = 10.0 * 60.0 # 10 minutes 

39 

40# Ambiguous errors (e.g. internalError, backendError, rateLimitExceeded) retry 

41# until the full `_DEFAULT_RETRY_DEADLINE`. This is because the 

42# `jobs.getQueryResults` REST API translates a job failure into an HTTP error. 

43# 

44# TODO(https://github.com/googleapis/python-bigquery/issues/1903): Investigate 

45# if we can fail early for ambiguous errors in `QueryJob.result()`'s call to 

46# the `jobs.getQueryResult` API. 

47# 

48# We need `_DEFAULT_JOB_DEADLINE` to be some multiple of 

49# `_DEFAULT_RETRY_DEADLINE` to allow for a few retries after the retry 

50# timeout is reached. 

51# 

52# Note: This multiple should actually be a multiple of 

53# (2 * _DEFAULT_RETRY_DEADLINE). After an ambiguous exception, the first 

54# call from `job_retry()` refreshes the job state without actually restarting 

55# the query. The second `job_retry()` actually restarts the query. For a more 

56# detailed explanation, see the comments where we set `restart_query_job = True` 

57# in `QueryJob.result()`'s inner `is_job_done()` function. 

58_DEFAULT_JOB_DEADLINE = 2.0 * (2.0 * _DEFAULT_RETRY_DEADLINE) 

59 

60 

61def _should_retry(exc): 

62 """Predicate for determining when to retry. 

63 

64 We retry if and only if the 'reason' is 'backendError' 

65 or 'rateLimitExceeded'. 

66 """ 

67 if not hasattr(exc, "errors") or len(exc.errors) == 0: 

68 # Check for unstructured error returns, e.g. from GFE 

69 return isinstance(exc, _UNSTRUCTURED_RETRYABLE_TYPES) 

70 

71 reason = exc.errors[0]["reason"] 

72 return reason in _RETRYABLE_REASONS 

73 

74 

75DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=_DEFAULT_RETRY_DEADLINE) 

76"""The default retry object. 

77 

78Any method with a ``retry`` parameter will be retried automatically, 

79with reasonable defaults. To disable retry, pass ``retry=None``. 

80To modify the default retry behavior, call a ``with_XXX`` method 

81on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds, 

82pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. 

83""" 

84 

85 

86def _should_retry_get_job_conflict(exc): 

87 """Predicate for determining when to retry a jobs.get call after a conflict error. 

88 

89 Sometimes we get a 404 after a Conflict. In this case, we 

90 have pretty high confidence that by retrying the 404, we'll 

91 (hopefully) eventually recover the job. 

92 https://github.com/googleapis/python-bigquery/issues/2134 

93 

94 Note: we may be able to extend this to user-specified predicates 

95 after https://github.com/googleapis/python-api-core/issues/796 

96 to tweak existing Retry object predicates. 

97 """ 

98 return isinstance(exc, exceptions.NotFound) or _should_retry(exc) 

99 

100 

101# Pick a deadline smaller than our other deadlines since we want to timeout 

102# before those expire. 

103_DEFAULT_GET_JOB_CONFLICT_DEADLINE = _DEFAULT_RETRY_DEADLINE / 3.0 

104_DEFAULT_GET_JOB_CONFLICT_RETRY = retry.Retry( 

105 predicate=_should_retry_get_job_conflict, 

106 deadline=_DEFAULT_GET_JOB_CONFLICT_DEADLINE, 

107) 

108"""Private, may be removed in future.""" 

109 

110 

111# Note: Take care when updating DEFAULT_TIMEOUT to anything but None. We 

112# briefly had a default timeout, but even setting it at more than twice the 

113# theoretical server-side default timeout of 2 minutes was not enough for 

114# complex queries. See: 

115# https://github.com/googleapis/python-bigquery/issues/970#issuecomment-921934647 

116DEFAULT_TIMEOUT = None 

117"""The default API timeout. 

118 

119This is the time to wait per request. To adjust the total wait time, set a 

120deadline on the retry object. 

121""" 

122 

123job_retry_reasons = ( 

124 "rateLimitExceeded", 

125 "backendError", 

126 "internalError", 

127 "jobRateLimitExceeded", 

128) 

129 

130 

131def _job_should_retry(exc): 

132 # Sometimes we have ambiguous errors, such as 'backendError' which could 

133 # be due to an API problem or a job problem. For these, make sure we retry 

134 # our is_job_done() function. 

135 # 

136 # Note: This won't restart the job unless we know for sure it's because of 

137 # the job status and set restart_query_job = True in that loop. This means 

138 # that we might end up calling this predicate twice for the same job 

139 # but from different paths: (1) from jobs.getQueryResults RetryError and 

140 # (2) from translating the job error from the body of a jobs.get response. 

141 # 

142 # Note: If we start retrying job types other than queries where we don't 

143 # call the problematic getQueryResults API to check the status, we need 

144 # to provide a different predicate, as there shouldn't be ambiguous 

145 # errors in those cases. 

146 if isinstance(exc, exceptions.RetryError): 

147 exc = exc.cause 

148 

149 # Per https://github.com/googleapis/python-bigquery/issues/1929, sometimes 

150 # retriable errors make their way here. Because of the separate 

151 # `restart_query_job` logic to make sure we aren't restarting non-failed 

152 # jobs, it should be safe to continue and not totally fail our attempt at 

153 # waiting for the query to complete. 

154 if _should_retry(exc): 

155 return True 

156 

157 if not hasattr(exc, "errors") or len(exc.errors) == 0: 

158 return False 

159 

160 reason = exc.errors[0]["reason"] 

161 return reason in job_retry_reasons 

162 

163 

164DEFAULT_JOB_RETRY = retry.Retry( 

165 predicate=_job_should_retry, deadline=_DEFAULT_JOB_DEADLINE 

166) 

167""" 

168The default job retry object. 

169""" 

170 

171 

172def _query_job_insert_should_retry(exc): 

173 # Per https://github.com/googleapis/python-bigquery/issues/2134, sometimes 

174 # we get a 404 error. In this case, if we get this far, assume that the job 

175 # doesn't actually exist and try again. We can't add 404 to the default 

176 # job_retry because that happens for errors like "this table does not 

177 # exist", which probably won't resolve with a retry. 

178 if isinstance(exc, exceptions.RetryError): 

179 exc = exc.cause 

180 

181 if isinstance(exc, exceptions.NotFound): 

182 message = exc.message 

183 # Don't try to retry table/dataset not found, just job not found. 

184 # The URL contains jobs, so use whitespace to disambiguate. 

185 return message is not None and " job" in message.lower() 

186 

187 return _job_should_retry(exc) 

188 

189 

190_DEFAULT_QUERY_JOB_INSERT_RETRY = retry.Retry( 

191 predicate=_query_job_insert_should_retry, 

192 # jobs.insert doesn't wait for the job to complete, so we don't need the 

193 # long _DEFAULT_JOB_DEADLINE for this part. 

194 deadline=_DEFAULT_RETRY_DEADLINE, 

195) 

196"""Private, may be removed in future.""" 

197 

198 

199DEFAULT_GET_JOB_TIMEOUT = 128 

200""" 

201Default timeout for Client.get_job(). 

202""" 

203 

204POLLING_DEFAULT_VALUE = google.api_core.future.polling.PollingFuture._DEFAULT_VALUE 

205""" 

206Default value defined in google.api_core.future.polling.PollingFuture. 

207"""