1# Copyright 2018 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15import logging
16
17from google.api_core import exceptions
18from google.api_core import retry
19import google.api_core.future.polling
20from google.auth import exceptions as auth_exceptions # type: ignore
21import requests.exceptions
22
23_LOGGER = logging.getLogger(__name__)
24
25_RETRYABLE_REASONS = frozenset(
26 ["rateLimitExceeded", "backendError", "internalError", "badGateway"]
27)
28
29_UNSTRUCTURED_RETRYABLE_TYPES = (
30 ConnectionError,
31 exceptions.TooManyRequests,
32 exceptions.InternalServerError,
33 exceptions.BadGateway,
34 exceptions.ServiceUnavailable,
35 requests.exceptions.ChunkedEncodingError,
36 requests.exceptions.ConnectionError,
37 requests.exceptions.Timeout,
38 auth_exceptions.TransportError,
39)
40
41_DEFAULT_RETRY_DEADLINE = 10.0 * 60.0 # 10 minutes
42
43# Ambiguous errors (e.g. internalError, backendError, rateLimitExceeded) retry
44# until the full `_DEFAULT_RETRY_DEADLINE`. This is because the
45# `jobs.getQueryResults` REST API translates a job failure into an HTTP error.
46#
47# TODO(https://github.com/googleapis/python-bigquery/issues/1903): Investigate
48# if we can fail early for ambiguous errors in `QueryJob.result()`'s call to
49# the `jobs.getQueryResult` API.
50#
51# We need `_DEFAULT_JOB_DEADLINE` to be some multiple of
52# `_DEFAULT_RETRY_DEADLINE` to allow for a few retries after the retry
53# timeout is reached.
54#
55# Note: This multiple should actually be a multiple of
56# (2 * _DEFAULT_RETRY_DEADLINE). After an ambiguous exception, the first
57# call from `job_retry()` refreshes the job state without actually restarting
58# the query. The second `job_retry()` actually restarts the query. For a more
59# detailed explanation, see the comments where we set `restart_query_job = True`
60# in `QueryJob.result()`'s inner `is_job_done()` function.
61_DEFAULT_JOB_DEADLINE = 2.0 * (2.0 * _DEFAULT_RETRY_DEADLINE)
62
63
64def _should_retry(exc):
65 """Predicate for determining when to retry.
66
67 We retry if and only if the 'reason' is in _RETRYABLE_REASONS or is
68 in _UNSTRUCTURED_RETRYABLE_TYPES.
69 """
70 try:
71 reason = exc.errors[0]["reason"]
72 except (AttributeError, IndexError, TypeError, KeyError):
73 # Fallback for when errors attribute is missing, empty, or not a dict
74 # or doesn't contain "reason" (e.g. gRPC exceptions).
75 _LOGGER.debug("Inspecting unstructured error for retry: %r", exc)
76 return isinstance(exc, _UNSTRUCTURED_RETRYABLE_TYPES)
77
78 return reason in _RETRYABLE_REASONS
79
80
81DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=_DEFAULT_RETRY_DEADLINE)
82"""The default retry object.
83
84Any method with a ``retry`` parameter will be retried automatically,
85with reasonable defaults. To disable retry, pass ``retry=None``.
86To modify the default retry behavior, call a ``with_XXX`` method
87on ``DEFAULT_RETRY``. For example, to change the deadline to 30 seconds,
88pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``.
89"""
90
91
92def _should_retry_get_job_conflict(exc):
93 """Predicate for determining when to retry a jobs.get call after a conflict error.
94
95 Sometimes we get a 404 after a Conflict. In this case, we
96 have pretty high confidence that by retrying the 404, we'll
97 (hopefully) eventually recover the job.
98 https://github.com/googleapis/python-bigquery/issues/2134
99
100 Note: we may be able to extend this to user-specified predicates
101 after https://github.com/googleapis/python-api-core/issues/796
102 to tweak existing Retry object predicates.
103 """
104 return isinstance(exc, exceptions.NotFound) or _should_retry(exc)
105
106
107# Pick a deadline smaller than our other deadlines since we want to timeout
108# before those expire.
109_DEFAULT_GET_JOB_CONFLICT_DEADLINE = _DEFAULT_RETRY_DEADLINE / 3.0
110_DEFAULT_GET_JOB_CONFLICT_RETRY = retry.Retry(
111 predicate=_should_retry_get_job_conflict,
112 deadline=_DEFAULT_GET_JOB_CONFLICT_DEADLINE,
113)
114"""Private, may be removed in future."""
115
116
117# Note: Take care when updating DEFAULT_TIMEOUT to anything but None. We
118# briefly had a default timeout, but even setting it at more than twice the
119# theoretical server-side default timeout of 2 minutes was not enough for
120# complex queries. See:
121# https://github.com/googleapis/python-bigquery/issues/970#issuecomment-921934647
122DEFAULT_TIMEOUT = None
123"""The default API timeout.
124
125This is the time to wait per request. To adjust the total wait time, set a
126deadline on the retry object.
127"""
128
129job_retry_reasons = (
130 "jobBackendError",
131 "jobInternalError",
132 "jobRateLimitExceeded",
133)
134
135
136def _job_should_retry(exc):
137 # Sometimes we have ambiguous errors, such as 'backendError' which could
138 # be due to an API problem or a job problem. For these, make sure we retry
139 # our is_job_done() function.
140 #
141 # Note: This won't restart the job unless we know for sure it's because of
142 # the job status and set restart_query_job = True in that loop. This means
143 # that we might end up calling this predicate twice for the same job
144 # but from different paths: (1) from jobs.getQueryResults RetryError and
145 # (2) from translating the job error from the body of a jobs.get response.
146 #
147 # Note: If we start retrying job types other than queries where we don't
148 # call the problematic getQueryResults API to check the status, we need
149 # to provide a different predicate, as there shouldn't be ambiguous
150 # errors in those cases.
151 if isinstance(exc, exceptions.RetryError):
152 exc = exc.cause
153
154 # Per https://github.com/googleapis/python-bigquery/issues/1929, sometimes
155 # retriable errors make their way here. Because of the separate
156 # `restart_query_job` logic to make sure we aren't restarting non-failed
157 # jobs, it should be safe to continue and not totally fail our attempt at
158 # waiting for the query to complete.
159 if _should_retry(exc):
160 return True
161
162 if not hasattr(exc, "errors") or len(exc.errors) == 0:
163 return False
164
165 reason = exc.errors[0]["reason"]
166 return reason in job_retry_reasons
167
168
169DEFAULT_JOB_RETRY = retry.Retry(
170 predicate=_job_should_retry, deadline=_DEFAULT_JOB_DEADLINE
171)
172"""
173The default job retry object.
174"""
175
176
177def _query_job_insert_should_retry(exc):
178 # Per https://github.com/googleapis/python-bigquery/issues/2134, sometimes
179 # we get a 404 error. In this case, if we get this far, assume that the job
180 # doesn't actually exist and try again. We can't add 404 to the default
181 # job_retry because that happens for errors like "this table does not
182 # exist", which probably won't resolve with a retry.
183 if isinstance(exc, exceptions.RetryError):
184 exc = exc.cause
185
186 if isinstance(exc, exceptions.NotFound):
187 message = exc.message
188 # Don't try to retry table/dataset not found, just job not found.
189 # The URL contains jobs, so use whitespace to disambiguate.
190 return message is not None and " job" in message.lower()
191
192 return _job_should_retry(exc)
193
194
195_DEFAULT_QUERY_JOB_INSERT_RETRY = retry.Retry(
196 predicate=_query_job_insert_should_retry,
197 # jobs.insert doesn't wait for the job to complete, so we don't need the
198 # long _DEFAULT_JOB_DEADLINE for this part.
199 deadline=_DEFAULT_RETRY_DEADLINE,
200)
201"""Private, may be removed in future."""
202
203
204DEFAULT_GET_JOB_TIMEOUT = 128
205"""
206Default timeout for Client.get_job().
207"""
208
209POLLING_DEFAULT_VALUE = google.api_core.future.polling.PollingFuture._DEFAULT_VALUE
210"""
211Default value defined in google.api_core.future.polling.PollingFuture.
212"""