Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/cloud/bigquery/client.py: 2%

885 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-20 06:09 +0000

1# Copyright 2015 Google LLC 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Client for interacting with the Google BigQuery API.""" 

16 

17from __future__ import absolute_import 

18from __future__ import division 

19 

20from collections import abc as collections_abc 

21import copy 

22import datetime 

23import functools 

24import gzip 

25import io 

26import itertools 

27import json 

28import math 

29import os 

30import tempfile 

31import typing 

32from typing import ( 

33 Any, 

34 Dict, 

35 IO, 

36 Iterable, 

37 Mapping, 

38 List, 

39 Optional, 

40 Sequence, 

41 Tuple, 

42 Union, 

43) 

44import uuid 

45import warnings 

46 

47from google import resumable_media # type: ignore 

48from google.resumable_media.requests import MultipartUpload # type: ignore 

49from google.resumable_media.requests import ResumableUpload 

50 

51import google.api_core.client_options 

52import google.api_core.exceptions as core_exceptions 

53from google.api_core.iam import Policy 

54from google.api_core import page_iterator 

55from google.api_core import retry as retries 

56import google.cloud._helpers # type: ignore 

57from google.cloud import exceptions # pytype: disable=import-error 

58from google.cloud.client import ClientWithProject # type: ignore # pytype: disable=import-error 

59 

60try: 

61 from google.cloud.bigquery_storage_v1.services.big_query_read.client import ( 

62 DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO, 

63 ) 

64except ImportError: 

65 DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore 

66 

67 

68from google.cloud.bigquery._http import Connection 

69from google.cloud.bigquery import _job_helpers 

70from google.cloud.bigquery import _pandas_helpers 

71from google.cloud.bigquery import _versions_helpers 

72from google.cloud.bigquery import enums 

73from google.cloud.bigquery import exceptions as bq_exceptions 

74from google.cloud.bigquery import job 

75from google.cloud.bigquery._helpers import _get_sub_prop 

76from google.cloud.bigquery._helpers import _record_field_to_json 

77from google.cloud.bigquery._helpers import _str_or_none 

78from google.cloud.bigquery._helpers import _verify_job_config_type 

79from google.cloud.bigquery._helpers import _get_bigquery_host 

80from google.cloud.bigquery._helpers import _DEFAULT_HOST 

81from google.cloud.bigquery._helpers import _DEFAULT_HOST_TEMPLATE 

82from google.cloud.bigquery._helpers import _DEFAULT_UNIVERSE 

83from google.cloud.bigquery._helpers import _validate_universe 

84from google.cloud.bigquery._helpers import _get_client_universe 

85from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id 

86from google.cloud.bigquery.dataset import Dataset 

87from google.cloud.bigquery.dataset import DatasetListItem 

88from google.cloud.bigquery.dataset import DatasetReference 

89from google.cloud.bigquery.enums import AutoRowIDs 

90from google.cloud.bigquery.format_options import ParquetOptions 

91from google.cloud.bigquery.job import ( 

92 CopyJob, 

93 CopyJobConfig, 

94 ExtractJob, 

95 ExtractJobConfig, 

96 LoadJob, 

97 LoadJobConfig, 

98 QueryJob, 

99 QueryJobConfig, 

100) 

101from google.cloud.bigquery.model import Model 

102from google.cloud.bigquery.model import ModelReference 

103from google.cloud.bigquery.model import _model_arg_to_model_ref 

104from google.cloud.bigquery.opentelemetry_tracing import create_span 

105from google.cloud.bigquery.query import _QueryResults 

106from google.cloud.bigquery.retry import ( 

107 DEFAULT_JOB_RETRY, 

108 DEFAULT_RETRY, 

109 DEFAULT_TIMEOUT, 

110) 

111from google.cloud.bigquery.routine import Routine 

112from google.cloud.bigquery.routine import RoutineReference 

113from google.cloud.bigquery.schema import SchemaField 

114from google.cloud.bigquery.table import _table_arg_to_table 

115from google.cloud.bigquery.table import _table_arg_to_table_ref 

116from google.cloud.bigquery.table import Table 

117from google.cloud.bigquery.table import TableListItem 

118from google.cloud.bigquery.table import TableReference 

119from google.cloud.bigquery.table import RowIterator 

120 

121pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import() 

122pandas = ( 

123 _versions_helpers.PANDAS_VERSIONS.try_import() 

124) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this 

125 

126TimeoutType = Union[float, None] 

127ResumableTimeoutType = Union[ 

128 None, float, Tuple[float, float] 

129] # for resumable media methods 

130 

131if typing.TYPE_CHECKING: # pragma: NO COVER 

132 # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition. 

133 PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] 

134 import requests # required by api-core 

135 

136_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB 

137_MAX_MULTIPART_SIZE = 5 * 1024 * 1024 

138_DEFAULT_NUM_RETRIES = 6 

139_BASE_UPLOAD_TEMPLATE = "{host}/upload/bigquery/v2/projects/{project}/jobs?uploadType=" 

140_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart" 

141_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable" 

142_GENERIC_CONTENT_TYPE = "*/*" 

143_READ_LESS_THAN_SIZE = ( 

144 "Size {:d} was specified but the file-like object only had " "{:d} bytes remaining." 

145) 

146_NEED_TABLE_ARGUMENT = ( 

147 "The table argument should be a table ID string, Table, or TableReference" 

148) 

149_LIST_ROWS_FROM_QUERY_RESULTS_FIELDS = "jobReference,totalRows,pageToken,rows" 

150 

151# In microbenchmarks, it's been shown that even in ideal conditions (query 

152# finished, local data), requests to getQueryResults can take 10+ seconds. 

153# In less-than-ideal situations, the response can take even longer, as it must 

154# be able to download a full 100+ MB row in that time. Don't let the 

155# connection timeout before data can be downloaded. 

156# https://github.com/googleapis/python-bigquery/issues/438 

157_MIN_GET_QUERY_RESULTS_TIMEOUT = 120 

158 

159TIMEOUT_HEADER = "X-Server-Timeout" 

160 

161 

162class Project(object): 

163 """Wrapper for resource describing a BigQuery project. 

164 

165 Args: 

166 project_id (str): Opaque ID of the project 

167 

168 numeric_id (int): Numeric ID of the project 

169 

170 friendly_name (str): Display name of the project 

171 """ 

172 

173 def __init__(self, project_id, numeric_id, friendly_name): 

174 self.project_id = project_id 

175 self.numeric_id = numeric_id 

176 self.friendly_name = friendly_name 

177 

178 @classmethod 

179 def from_api_repr(cls, resource): 

180 """Factory: construct an instance from a resource dict.""" 

181 return cls(resource["id"], resource["numericId"], resource["friendlyName"]) 

182 

183 

184class Client(ClientWithProject): 

185 """Client to bundle configuration needed for API requests. 

186 

187 Args: 

188 project (Optional[str]): 

189 Project ID for the project which the client acts on behalf of. 

190 Will be passed when creating a dataset / job. If not passed, 

191 falls back to the default inferred from the environment. 

192 credentials (Optional[google.auth.credentials.Credentials]): 

193 The OAuth2 Credentials to use for this client. If not passed 

194 (and if no ``_http`` object is passed), falls back to the 

195 default inferred from the environment. 

196 _http (Optional[requests.Session]): 

197 HTTP object to make requests. Can be any object that 

198 defines ``request()`` with the same interface as 

199 :meth:`requests.Session.request`. If not passed, an ``_http`` 

200 object is created that is bound to the ``credentials`` for the 

201 current object. 

202 This parameter should be considered private, and could change in 

203 the future. 

204 location (Optional[str]): 

205 Default location for jobs / datasets / tables. 

206 default_query_job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): 

207 Default ``QueryJobConfig``. 

208 Will be merged into job configs passed into the ``query`` method. 

209 default_load_job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): 

210 Default ``LoadJobConfig``. 

211 Will be merged into job configs passed into the ``load_table_*`` methods. 

212 client_info (Optional[google.api_core.client_info.ClientInfo]): 

213 The client info used to send a user-agent string along with API 

214 requests. If ``None``, then default info will be used. Generally, 

215 you only need to set this if you're developing your own library 

216 or partner tool. 

217 client_options (Optional[Union[google.api_core.client_options.ClientOptions, Dict]]): 

218 Client options used to set user options on the client. API Endpoint 

219 should be set through client_options. 

220 

221 Raises: 

222 google.auth.exceptions.DefaultCredentialsError: 

223 Raised if ``credentials`` is not specified and the library fails 

224 to acquire default credentials. 

225 """ 

226 

227 SCOPE = ("https://www.googleapis.com/auth/cloud-platform",) # type: ignore 

228 """The scopes required for authenticating as a BigQuery consumer.""" 

229 

230 def __init__( 

231 self, 

232 project=None, 

233 credentials=None, 

234 _http=None, 

235 location=None, 

236 default_query_job_config=None, 

237 default_load_job_config=None, 

238 client_info=None, 

239 client_options=None, 

240 ) -> None: 

241 super(Client, self).__init__( 

242 project=project, 

243 credentials=credentials, 

244 client_options=client_options, 

245 _http=_http, 

246 ) 

247 

248 kw_args = {"client_info": client_info} 

249 bq_host = _get_bigquery_host() 

250 kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None 

251 client_universe = None 

252 if client_options is None: 

253 client_options = {} 

254 if isinstance(client_options, dict): 

255 client_options = google.api_core.client_options.from_dict(client_options) 

256 if client_options.api_endpoint: 

257 api_endpoint = client_options.api_endpoint 

258 kw_args["api_endpoint"] = api_endpoint 

259 else: 

260 client_universe = _get_client_universe(client_options) 

261 if client_universe != _DEFAULT_UNIVERSE: 

262 kw_args["api_endpoint"] = _DEFAULT_HOST_TEMPLATE.replace( 

263 "{UNIVERSE_DOMAIN}", client_universe 

264 ) 

265 # Ensure credentials and universe are not in conflict. 

266 if hasattr(self, "_credentials") and client_universe is not None: 

267 _validate_universe(client_universe, self._credentials) 

268 

269 self._connection = Connection(self, **kw_args) 

270 self._location = location 

271 self._default_load_job_config = copy.deepcopy(default_load_job_config) 

272 

273 # Use property setter so validation can run. 

274 self.default_query_job_config = default_query_job_config 

275 

276 @property 

277 def location(self): 

278 """Default location for jobs / datasets / tables.""" 

279 return self._location 

280 

281 @property 

282 def default_query_job_config(self) -> Optional[QueryJobConfig]: 

283 """Default ``QueryJobConfig`` or ``None``. 

284 

285 Will be merged into job configs passed into the ``query`` or 

286 ``query_and_wait`` methods. 

287 """ 

288 return self._default_query_job_config 

289 

290 @default_query_job_config.setter 

291 def default_query_job_config(self, value: Optional[QueryJobConfig]): 

292 if value is not None: 

293 _verify_job_config_type( 

294 value, QueryJobConfig, param_name="default_query_job_config" 

295 ) 

296 self._default_query_job_config = copy.deepcopy(value) 

297 

298 @property 

299 def default_load_job_config(self): 

300 """Default ``LoadJobConfig``. 

301 Will be merged into job configs passed into the ``load_table_*`` methods. 

302 """ 

303 return self._default_load_job_config 

304 

305 @default_load_job_config.setter 

306 def default_load_job_config(self, value: LoadJobConfig): 

307 self._default_load_job_config = copy.deepcopy(value) 

308 

309 def close(self): 

310 """Close the underlying transport objects, releasing system resources. 

311 

312 .. note:: 

313 

314 The client instance can be used for making additional requests even 

315 after closing, in which case the underlying connections are 

316 automatically re-created. 

317 """ 

318 self._http._auth_request.session.close() 

319 self._http.close() 

320 

321 def get_service_account_email( 

322 self, 

323 project: Optional[str] = None, 

324 retry: retries.Retry = DEFAULT_RETRY, 

325 timeout: TimeoutType = DEFAULT_TIMEOUT, 

326 ) -> str: 

327 """Get the email address of the project's BigQuery service account 

328 

329 Note: 

330 This is the service account that BigQuery uses to manage tables 

331 encrypted by a key in KMS. 

332 

333 Args: 

334 project (Optional[str]): 

335 Project ID to use for retreiving service account email. 

336 Defaults to the client's project. 

337 retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. 

338 timeout (Optional[float]): 

339 The number of seconds to wait for the underlying HTTP transport 

340 before using ``retry``. 

341 

342 Returns: 

343 str: 

344 service account email address 

345 

346 Example: 

347 

348 >>> from google.cloud import bigquery 

349 >>> client = bigquery.Client() 

350 >>> client.get_service_account_email() 

351 my_service_account@my-project.iam.gserviceaccount.com 

352 

353 """ 

354 if project is None: 

355 project = self.project 

356 path = "/projects/%s/serviceAccount" % (project,) 

357 span_attributes = {"path": path} 

358 api_response = self._call_api( 

359 retry, 

360 span_name="BigQuery.getServiceAccountEmail", 

361 span_attributes=span_attributes, 

362 method="GET", 

363 path=path, 

364 timeout=timeout, 

365 ) 

366 return api_response["email"] 

367 

368 def list_projects( 

369 self, 

370 max_results: Optional[int] = None, 

371 page_token: Optional[str] = None, 

372 retry: retries.Retry = DEFAULT_RETRY, 

373 timeout: TimeoutType = DEFAULT_TIMEOUT, 

374 page_size: Optional[int] = None, 

375 ) -> page_iterator.Iterator: 

376 """List projects for the project associated with this client. 

377 

378 See 

379 https://cloud.google.com/bigquery/docs/reference/rest/v2/projects/list 

380 

381 Args: 

382 max_results (Optional[int]): 

383 Maximum number of projects to return. 

384 Defaults to a value set by the API. 

385 

386 page_token (Optional[str]): 

387 Token representing a cursor into the projects. If not passed, 

388 the API will return the first page of projects. The token marks 

389 the beginning of the iterator to be returned and the value of 

390 the ``page_token`` can be accessed at ``next_page_token`` of the 

391 :class:`~google.api_core.page_iterator.HTTPIterator`. 

392 

393 retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. 

394 

395 timeout (Optional[float]): 

396 The number of seconds to wait for the underlying HTTP transport 

397 before using ``retry``. 

398 

399 page_size (Optional[int]): 

400 Maximum number of projects to return in each page. 

401 Defaults to a value set by the API. 

402 

403 Returns: 

404 google.api_core.page_iterator.Iterator: 

405 Iterator of :class:`~google.cloud.bigquery.client.Project` 

406 accessible to the current client. 

407 """ 

408 span_attributes = {"path": "/projects"} 

409 

410 def api_request(*args, **kwargs): 

411 return self._call_api( 

412 retry, 

413 span_name="BigQuery.listProjects", 

414 span_attributes=span_attributes, 

415 *args, 

416 timeout=timeout, 

417 **kwargs, 

418 ) 

419 

420 return page_iterator.HTTPIterator( 

421 client=self, 

422 api_request=api_request, 

423 path="/projects", 

424 item_to_value=_item_to_project, 

425 items_key="projects", 

426 page_token=page_token, 

427 max_results=max_results, 

428 page_size=page_size, 

429 ) 

430 

431 def list_datasets( 

432 self, 

433 project: Optional[str] = None, 

434 include_all: bool = False, 

435 filter: Optional[str] = None, 

436 max_results: Optional[int] = None, 

437 page_token: Optional[str] = None, 

438 retry: retries.Retry = DEFAULT_RETRY, 

439 timeout: TimeoutType = DEFAULT_TIMEOUT, 

440 page_size: Optional[int] = None, 

441 ) -> page_iterator.Iterator: 

442 """List datasets for the project associated with this client. 

443 

444 See 

445 https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list 

446 

447 Args: 

448 project (Optional[str]): 

449 Project ID to use for retreiving datasets. Defaults to the 

450 client's project. 

451 include_all (Optional[bool]): 

452 True if results include hidden datasets. Defaults to False. 

453 filter (Optional[str]): 

454 An expression for filtering the results by label. 

455 For syntax, see 

456 https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#body.QUERY_PARAMETERS.filter 

457 max_results (Optional[int]): 

458 Maximum number of datasets to return. 

459 page_token (Optional[str]): 

460 Token representing a cursor into the datasets. If not passed, 

461 the API will return the first page of datasets. The token marks 

462 the beginning of the iterator to be returned and the value of 

463 the ``page_token`` can be accessed at ``next_page_token`` of the 

464 :class:`~google.api_core.page_iterator.HTTPIterator`. 

465 retry (Optional[google.api_core.retry.Retry]): 

466 How to retry the RPC. 

467 timeout (Optional[float]): 

468 The number of seconds to wait for the underlying HTTP transport 

469 before using ``retry``. 

470 page_size (Optional[int]): 

471 Maximum number of datasets to return per page. 

472 

473 Returns: 

474 google.api_core.page_iterator.Iterator: 

475 Iterator of :class:`~google.cloud.bigquery.dataset.DatasetListItem`. 

476 associated with the project. 

477 """ 

478 extra_params: Dict[str, Any] = {} 

479 if project is None: 

480 project = self.project 

481 if include_all: 

482 extra_params["all"] = True 

483 if filter: 

484 # TODO: consider supporting a dict of label -> value for filter, 

485 # and converting it into a string here. 

486 extra_params["filter"] = filter 

487 path = "/projects/%s/datasets" % (project,) 

488 

489 span_attributes = {"path": path} 

490 

491 def api_request(*args, **kwargs): 

492 return self._call_api( 

493 retry, 

494 span_name="BigQuery.listDatasets", 

495 span_attributes=span_attributes, 

496 *args, 

497 timeout=timeout, 

498 **kwargs, 

499 ) 

500 

501 return page_iterator.HTTPIterator( 

502 client=self, 

503 api_request=api_request, 

504 path=path, 

505 item_to_value=_item_to_dataset, 

506 items_key="datasets", 

507 page_token=page_token, 

508 max_results=max_results, 

509 extra_params=extra_params, 

510 page_size=page_size, 

511 ) 

512 

513 def dataset( 

514 self, dataset_id: str, project: Optional[str] = None 

515 ) -> DatasetReference: 

516 """Deprecated: Construct a reference to a dataset. 

517 

518 .. deprecated:: 1.24.0 

519 Construct a 

520 :class:`~google.cloud.bigquery.dataset.DatasetReference` using its 

521 constructor or use a string where previously a reference object 

522 was used. 

523 

524 As of ``google-cloud-bigquery`` version 1.7.0, all client methods 

525 that take a 

526 :class:`~google.cloud.bigquery.dataset.DatasetReference` or 

527 :class:`~google.cloud.bigquery.table.TableReference` also take a 

528 string in standard SQL format, e.g. ``project.dataset_id`` or 

529 ``project.dataset_id.table_id``. 

530 

531 Args: 

532 dataset_id (str): ID of the dataset. 

533 

534 project (Optional[str]): 

535 Project ID for the dataset (defaults to the project of the client). 

536 

537 Returns: 

538 google.cloud.bigquery.dataset.DatasetReference: 

539 a new ``DatasetReference`` instance. 

540 """ 

541 if project is None: 

542 project = self.project 

543 

544 warnings.warn( 

545 "Client.dataset is deprecated and will be removed in a future version. " 

546 "Use a string like 'my_project.my_dataset' or a " 

547 "cloud.google.bigquery.DatasetReference object, instead.", 

548 PendingDeprecationWarning, 

549 stacklevel=2, 

550 ) 

551 return DatasetReference(project, dataset_id) 

552 

553 def _ensure_bqstorage_client( 

554 self, 

555 bqstorage_client: Optional[ 

556 "google.cloud.bigquery_storage.BigQueryReadClient" 

557 ] = None, 

558 client_options: Optional[google.api_core.client_options.ClientOptions] = None, 

559 client_info: Optional[ 

560 "google.api_core.gapic_v1.client_info.ClientInfo" 

561 ] = DEFAULT_BQSTORAGE_CLIENT_INFO, 

562 ) -> Optional["google.cloud.bigquery_storage.BigQueryReadClient"]: 

563 """Create a BigQuery Storage API client using this client's credentials. 

564 

565 Args: 

566 bqstorage_client: 

567 An existing BigQuery Storage client instance. If ``None``, a new 

568 instance is created and returned. 

569 client_options: 

570 Custom options used with a new BigQuery Storage client instance 

571 if one is created. 

572 client_info: 

573 The client info used with a new BigQuery Storage client 

574 instance if one is created. 

575 

576 Returns: 

577 A BigQuery Storage API client. 

578 """ 

579 

580 try: 

581 bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import( 

582 raise_if_error=True 

583 ) 

584 except bq_exceptions.BigQueryStorageNotFoundError: 

585 warnings.warn( 

586 "Cannot create BigQuery Storage client, the dependency " 

587 "google-cloud-bigquery-storage is not installed." 

588 ) 

589 return None 

590 except bq_exceptions.LegacyBigQueryStorageError as exc: 

591 warnings.warn( 

592 "Dependency google-cloud-bigquery-storage is outdated: " + str(exc) 

593 ) 

594 return None 

595 

596 if bqstorage_client is None: # pragma: NO COVER 

597 bqstorage_client = bigquery_storage.BigQueryReadClient( 

598 credentials=self._credentials, 

599 client_options=client_options, 

600 client_info=client_info, # type: ignore # (None is also accepted) 

601 ) 

602 

603 return bqstorage_client 

604 

605 def _dataset_from_arg(self, dataset) -> Union[Dataset, DatasetReference]: 

606 if isinstance(dataset, str): 

607 dataset = DatasetReference.from_string( 

608 dataset, default_project=self.project 

609 ) 

610 

611 if not isinstance(dataset, (Dataset, DatasetReference)): 

612 if isinstance(dataset, DatasetListItem): 

613 dataset = dataset.reference 

614 else: 

615 raise TypeError( 

616 "dataset must be a Dataset, DatasetReference, DatasetListItem," 

617 " or string" 

618 ) 

619 return dataset 

620 

621 def create_dataset( 

622 self, 

623 dataset: Union[str, Dataset, DatasetReference, DatasetListItem], 

624 exists_ok: bool = False, 

625 retry: retries.Retry = DEFAULT_RETRY, 

626 timeout: TimeoutType = DEFAULT_TIMEOUT, 

627 ) -> Dataset: 

628 """API call: create the dataset via a POST request. 

629 

630 See 

631 https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/insert 

632 

633 Args: 

634 dataset (Union[ \ 

635 google.cloud.bigquery.dataset.Dataset, \ 

636 google.cloud.bigquery.dataset.DatasetReference, \ 

637 google.cloud.bigquery.dataset.DatasetListItem, \ 

638 str, \ 

639 ]): 

640 A :class:`~google.cloud.bigquery.dataset.Dataset` to create. 

641 If ``dataset`` is a reference, an empty dataset is created 

642 with the specified ID and client's default location. 

643 exists_ok (Optional[bool]): 

644 Defaults to ``False``. If ``True``, ignore "already exists" 

645 errors when creating the dataset. 

646 retry (Optional[google.api_core.retry.Retry]): 

647 How to retry the RPC. 

648 timeout (Optional[float]): 

649 The number of seconds to wait for the underlying HTTP transport 

650 before using ``retry``. 

651 

652 Returns: 

653 google.cloud.bigquery.dataset.Dataset: 

654 A new ``Dataset`` returned from the API. 

655 

656 Raises: 

657 google.cloud.exceptions.Conflict: 

658 If the dataset already exists. 

659 

660 Example: 

661 

662 >>> from google.cloud import bigquery 

663 >>> client = bigquery.Client() 

664 >>> dataset = bigquery.Dataset('my_project.my_dataset') 

665 >>> dataset = client.create_dataset(dataset) 

666 

667 """ 

668 dataset = self._dataset_from_arg(dataset) 

669 if isinstance(dataset, DatasetReference): 

670 dataset = Dataset(dataset) 

671 

672 path = "/projects/%s/datasets" % (dataset.project,) 

673 

674 data = dataset.to_api_repr() 

675 if data.get("location") is None and self.location is not None: 

676 data["location"] = self.location 

677 

678 try: 

679 span_attributes = {"path": path} 

680 

681 api_response = self._call_api( 

682 retry, 

683 span_name="BigQuery.createDataset", 

684 span_attributes=span_attributes, 

685 method="POST", 

686 path=path, 

687 data=data, 

688 timeout=timeout, 

689 ) 

690 return Dataset.from_api_repr(api_response) 

691 except core_exceptions.Conflict: 

692 if not exists_ok: 

693 raise 

694 return self.get_dataset(dataset.reference, retry=retry) 

695 

696 def create_routine( 

697 self, 

698 routine: Routine, 

699 exists_ok: bool = False, 

700 retry: retries.Retry = DEFAULT_RETRY, 

701 timeout: TimeoutType = DEFAULT_TIMEOUT, 

702 ) -> Routine: 

703 """[Beta] Create a routine via a POST request. 

704 

705 See 

706 https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/insert 

707 

708 Args: 

709 routine (google.cloud.bigquery.routine.Routine): 

710 A :class:`~google.cloud.bigquery.routine.Routine` to create. 

711 The dataset that the routine belongs to must already exist. 

712 exists_ok (Optional[bool]): 

713 Defaults to ``False``. If ``True``, ignore "already exists" 

714 errors when creating the routine. 

715 retry (Optional[google.api_core.retry.Retry]): 

716 How to retry the RPC. 

717 timeout (Optional[float]): 

718 The number of seconds to wait for the underlying HTTP transport 

719 before using ``retry``. 

720 

721 Returns: 

722 google.cloud.bigquery.routine.Routine: 

723 A new ``Routine`` returned from the service. 

724 

725 Raises: 

726 google.cloud.exceptions.Conflict: 

727 If the routine already exists. 

728 """ 

729 reference = routine.reference 

730 path = "/projects/{}/datasets/{}/routines".format( 

731 reference.project, reference.dataset_id 

732 ) 

733 resource = routine.to_api_repr() 

734 try: 

735 span_attributes = {"path": path} 

736 api_response = self._call_api( 

737 retry, 

738 span_name="BigQuery.createRoutine", 

739 span_attributes=span_attributes, 

740 method="POST", 

741 path=path, 

742 data=resource, 

743 timeout=timeout, 

744 ) 

745 return Routine.from_api_repr(api_response) 

746 except core_exceptions.Conflict: 

747 if not exists_ok: 

748 raise 

749 return self.get_routine(routine.reference, retry=retry) 

750 

751 def create_table( 

752 self, 

753 table: Union[str, Table, TableReference, TableListItem], 

754 exists_ok: bool = False, 

755 retry: retries.Retry = DEFAULT_RETRY, 

756 timeout: TimeoutType = DEFAULT_TIMEOUT, 

757 ) -> Table: 

758 """API call: create a table via a PUT request 

759 

760 See 

761 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert 

762 

763 Args: 

764 table (Union[ \ 

765 google.cloud.bigquery.table.Table, \ 

766 google.cloud.bigquery.table.TableReference, \ 

767 google.cloud.bigquery.table.TableListItem, \ 

768 str, \ 

769 ]): 

770 A :class:`~google.cloud.bigquery.table.Table` to create. 

771 If ``table`` is a reference, an empty table is created 

772 with the specified ID. The dataset that the table belongs to 

773 must already exist. 

774 exists_ok (Optional[bool]): 

775 Defaults to ``False``. If ``True``, ignore "already exists" 

776 errors when creating the table. 

777 retry (Optional[google.api_core.retry.Retry]): 

778 How to retry the RPC. 

779 timeout (Optional[float]): 

780 The number of seconds to wait for the underlying HTTP transport 

781 before using ``retry``. 

782 

783 Returns: 

784 google.cloud.bigquery.table.Table: 

785 A new ``Table`` returned from the service. 

786 

787 Raises: 

788 google.cloud.exceptions.Conflict: 

789 If the table already exists. 

790 """ 

791 table = _table_arg_to_table(table, default_project=self.project) 

792 dataset_id = table.dataset_id 

793 path = "/projects/%s/datasets/%s/tables" % (table.project, dataset_id) 

794 data = table.to_api_repr() 

795 try: 

796 span_attributes = {"path": path, "dataset_id": dataset_id} 

797 api_response = self._call_api( 

798 retry, 

799 span_name="BigQuery.createTable", 

800 span_attributes=span_attributes, 

801 method="POST", 

802 path=path, 

803 data=data, 

804 timeout=timeout, 

805 ) 

806 return Table.from_api_repr(api_response) 

807 except core_exceptions.Conflict: 

808 if not exists_ok: 

809 raise 

810 return self.get_table(table.reference, retry=retry) 

811 

812 def _call_api( 

813 self, 

814 retry, 

815 span_name=None, 

816 span_attributes=None, 

817 job_ref=None, 

818 headers: Optional[Dict[str, str]] = None, 

819 **kwargs, 

820 ): 

821 kwargs = _add_server_timeout_header(headers, kwargs) 

822 call = functools.partial(self._connection.api_request, **kwargs) 

823 

824 if retry: 

825 call = retry(call) 

826 

827 if span_name is not None: 

828 with create_span( 

829 name=span_name, attributes=span_attributes, client=self, job_ref=job_ref 

830 ): 

831 return call() 

832 

833 return call() 

834 

835 def get_dataset( 

836 self, 

837 dataset_ref: Union[DatasetReference, str], 

838 retry: retries.Retry = DEFAULT_RETRY, 

839 timeout: TimeoutType = DEFAULT_TIMEOUT, 

840 ) -> Dataset: 

841 """Fetch the dataset referenced by ``dataset_ref`` 

842 

843 Args: 

844 dataset_ref (Union[ \ 

845 google.cloud.bigquery.dataset.DatasetReference, \ 

846 str, \ 

847 ]): 

848 A reference to the dataset to fetch from the BigQuery API. 

849 If a string is passed in, this method attempts to create a 

850 dataset reference from a string using 

851 :func:`~google.cloud.bigquery.dataset.DatasetReference.from_string`. 

852 retry (Optional[google.api_core.retry.Retry]): 

853 How to retry the RPC. 

854 timeout (Optional[float]): 

855 The number of seconds to wait for the underlying HTTP transport 

856 before using ``retry``. 

857 

858 Returns: 

859 google.cloud.bigquery.dataset.Dataset: 

860 A ``Dataset`` instance. 

861 """ 

862 if isinstance(dataset_ref, str): 

863 dataset_ref = DatasetReference.from_string( 

864 dataset_ref, default_project=self.project 

865 ) 

866 path = dataset_ref.path 

867 span_attributes = {"path": path} 

868 api_response = self._call_api( 

869 retry, 

870 span_name="BigQuery.getDataset", 

871 span_attributes=span_attributes, 

872 method="GET", 

873 path=path, 

874 timeout=timeout, 

875 ) 

876 return Dataset.from_api_repr(api_response) 

877 

878 def get_iam_policy( 

879 self, 

880 table: Union[Table, TableReference, TableListItem, str], 

881 requested_policy_version: int = 1, 

882 retry: retries.Retry = DEFAULT_RETRY, 

883 timeout: TimeoutType = DEFAULT_TIMEOUT, 

884 ) -> Policy: 

885 """Return the access control policy for a table resource. 

886 

887 Args: 

888 table (Union[ \ 

889 google.cloud.bigquery.table.Table, \ 

890 google.cloud.bigquery.table.TableReference, \ 

891 google.cloud.bigquery.table.TableListItem, \ 

892 str, \ 

893 ]): 

894 The table to get the access control policy for. 

895 If a string is passed in, this method attempts to create a 

896 table reference from a string using 

897 :func:`~google.cloud.bigquery.table.TableReference.from_string`. 

898 requested_policy_version (int): 

899 Optional. The maximum policy version that will be used to format the policy. 

900 

901 Only version ``1`` is currently supported. 

902 

903 See: https://cloud.google.com/bigquery/docs/reference/rest/v2/GetPolicyOptions 

904 retry (Optional[google.api_core.retry.Retry]): 

905 How to retry the RPC. 

906 timeout (Optional[float]): 

907 The number of seconds to wait for the underlying HTTP transport 

908 before using ``retry``. 

909 

910 Returns: 

911 google.api_core.iam.Policy: 

912 The access control policy. 

913 """ 

914 table = _table_arg_to_table_ref(table, default_project=self.project) 

915 

916 if requested_policy_version != 1: 

917 raise ValueError("only IAM policy version 1 is supported") 

918 

919 body = {"options": {"requestedPolicyVersion": 1}} 

920 

921 path = "{}:getIamPolicy".format(table.path) 

922 span_attributes = {"path": path} 

923 response = self._call_api( 

924 retry, 

925 span_name="BigQuery.getIamPolicy", 

926 span_attributes=span_attributes, 

927 method="POST", 

928 path=path, 

929 data=body, 

930 timeout=timeout, 

931 ) 

932 

933 return Policy.from_api_repr(response) 

934 

935 def set_iam_policy( 

936 self, 

937 table: Union[Table, TableReference, TableListItem, str], 

938 policy: Policy, 

939 updateMask: Optional[str] = None, 

940 retry: retries.Retry = DEFAULT_RETRY, 

941 timeout: TimeoutType = DEFAULT_TIMEOUT, 

942 *, 

943 fields: Sequence[str] = (), 

944 ) -> Policy: 

945 """Return the access control policy for a table resource. 

946 

947 Args: 

948 table (Union[ \ 

949 google.cloud.bigquery.table.Table, \ 

950 google.cloud.bigquery.table.TableReference, \ 

951 google.cloud.bigquery.table.TableListItem, \ 

952 str, \ 

953 ]): 

954 The table to get the access control policy for. 

955 If a string is passed in, this method attempts to create a 

956 table reference from a string using 

957 :func:`~google.cloud.bigquery.table.TableReference.from_string`. 

958 policy (google.api_core.iam.Policy): 

959 The access control policy to set. 

960 updateMask (Optional[str]): 

961 Mask as defined by 

962 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/setIamPolicy#body.request_body.FIELDS.update_mask 

963 

964 Incompatible with ``fields``. 

965 retry (Optional[google.api_core.retry.Retry]): 

966 How to retry the RPC. 

967 timeout (Optional[float]): 

968 The number of seconds to wait for the underlying HTTP transport 

969 before using ``retry``. 

970 fields (Sequence[str]): 

971 Which properties to set on the policy. See: 

972 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/setIamPolicy#body.request_body.FIELDS.update_mask 

973 

974 Incompatible with ``updateMask``. 

975 

976 Returns: 

977 google.api_core.iam.Policy: 

978 The updated access control policy. 

979 """ 

980 if updateMask is not None and not fields: 

981 update_mask = updateMask 

982 elif updateMask is not None and fields: 

983 raise ValueError("Cannot set both fields and updateMask") 

984 elif fields: 

985 update_mask = ",".join(fields) 

986 else: 

987 update_mask = None 

988 

989 table = _table_arg_to_table_ref(table, default_project=self.project) 

990 

991 if not isinstance(policy, (Policy)): 

992 raise TypeError("policy must be a Policy") 

993 

994 body = {"policy": policy.to_api_repr()} 

995 

996 if update_mask is not None: 

997 body["updateMask"] = update_mask 

998 

999 path = "{}:setIamPolicy".format(table.path) 

1000 span_attributes = {"path": path} 

1001 

1002 response = self._call_api( 

1003 retry, 

1004 span_name="BigQuery.setIamPolicy", 

1005 span_attributes=span_attributes, 

1006 method="POST", 

1007 path=path, 

1008 data=body, 

1009 timeout=timeout, 

1010 ) 

1011 

1012 return Policy.from_api_repr(response) 

1013 

1014 def test_iam_permissions( 

1015 self, 

1016 table: Union[Table, TableReference, TableListItem, str], 

1017 permissions: Sequence[str], 

1018 retry: retries.Retry = DEFAULT_RETRY, 

1019 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1020 ) -> Dict[str, Any]: 

1021 table = _table_arg_to_table_ref(table, default_project=self.project) 

1022 

1023 body = {"permissions": permissions} 

1024 

1025 path = "{}:testIamPermissions".format(table.path) 

1026 span_attributes = {"path": path} 

1027 response = self._call_api( 

1028 retry, 

1029 span_name="BigQuery.testIamPermissions", 

1030 span_attributes=span_attributes, 

1031 method="POST", 

1032 path=path, 

1033 data=body, 

1034 timeout=timeout, 

1035 ) 

1036 

1037 return response 

1038 

1039 def get_model( 

1040 self, 

1041 model_ref: Union[ModelReference, str], 

1042 retry: retries.Retry = DEFAULT_RETRY, 

1043 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1044 ) -> Model: 

1045 """[Beta] Fetch the model referenced by ``model_ref``. 

1046 

1047 Args: 

1048 model_ref (Union[ \ 

1049 google.cloud.bigquery.model.ModelReference, \ 

1050 str, \ 

1051 ]): 

1052 A reference to the model to fetch from the BigQuery API. 

1053 If a string is passed in, this method attempts to create a 

1054 model reference from a string using 

1055 :func:`google.cloud.bigquery.model.ModelReference.from_string`. 

1056 retry (Optional[google.api_core.retry.Retry]): 

1057 How to retry the RPC. 

1058 timeout (Optional[float]): 

1059 The number of seconds to wait for the underlying HTTP transport 

1060 before using ``retry``. 

1061 

1062 Returns: 

1063 google.cloud.bigquery.model.Model: A ``Model`` instance. 

1064 """ 

1065 if isinstance(model_ref, str): 

1066 model_ref = ModelReference.from_string( 

1067 model_ref, default_project=self.project 

1068 ) 

1069 path = model_ref.path 

1070 span_attributes = {"path": path} 

1071 

1072 api_response = self._call_api( 

1073 retry, 

1074 span_name="BigQuery.getModel", 

1075 span_attributes=span_attributes, 

1076 method="GET", 

1077 path=path, 

1078 timeout=timeout, 

1079 ) 

1080 return Model.from_api_repr(api_response) 

1081 

1082 def get_routine( 

1083 self, 

1084 routine_ref: Union[Routine, RoutineReference, str], 

1085 retry: retries.Retry = DEFAULT_RETRY, 

1086 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1087 ) -> Routine: 

1088 """[Beta] Get the routine referenced by ``routine_ref``. 

1089 

1090 Args: 

1091 routine_ref (Union[ \ 

1092 google.cloud.bigquery.routine.Routine, \ 

1093 google.cloud.bigquery.routine.RoutineReference, \ 

1094 str, \ 

1095 ]): 

1096 A reference to the routine to fetch from the BigQuery API. If 

1097 a string is passed in, this method attempts to create a 

1098 reference from a string using 

1099 :func:`google.cloud.bigquery.routine.RoutineReference.from_string`. 

1100 retry (Optional[google.api_core.retry.Retry]): 

1101 How to retry the API call. 

1102 timeout (Optional[float]): 

1103 The number of seconds to wait for the underlying HTTP transport 

1104 before using ``retry``. 

1105 

1106 Returns: 

1107 google.cloud.bigquery.routine.Routine: 

1108 A ``Routine`` instance. 

1109 """ 

1110 if isinstance(routine_ref, str): 

1111 routine_ref = RoutineReference.from_string( 

1112 routine_ref, default_project=self.project 

1113 ) 

1114 path = routine_ref.path 

1115 span_attributes = {"path": path} 

1116 api_response = self._call_api( 

1117 retry, 

1118 span_name="BigQuery.getRoutine", 

1119 span_attributes=span_attributes, 

1120 method="GET", 

1121 path=path, 

1122 timeout=timeout, 

1123 ) 

1124 return Routine.from_api_repr(api_response) 

1125 

1126 def get_table( 

1127 self, 

1128 table: Union[Table, TableReference, TableListItem, str], 

1129 retry: retries.Retry = DEFAULT_RETRY, 

1130 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1131 ) -> Table: 

1132 """Fetch the table referenced by ``table``. 

1133 

1134 Args: 

1135 table (Union[ \ 

1136 google.cloud.bigquery.table.Table, \ 

1137 google.cloud.bigquery.table.TableReference, \ 

1138 google.cloud.bigquery.table.TableListItem, \ 

1139 str, \ 

1140 ]): 

1141 A reference to the table to fetch from the BigQuery API. 

1142 If a string is passed in, this method attempts to create a 

1143 table reference from a string using 

1144 :func:`google.cloud.bigquery.table.TableReference.from_string`. 

1145 retry (Optional[google.api_core.retry.Retry]): 

1146 How to retry the RPC. 

1147 timeout (Optional[float]): 

1148 The number of seconds to wait for the underlying HTTP transport 

1149 before using ``retry``. 

1150 

1151 Returns: 

1152 google.cloud.bigquery.table.Table: 

1153 A ``Table`` instance. 

1154 """ 

1155 table_ref = _table_arg_to_table_ref(table, default_project=self.project) 

1156 path = table_ref.path 

1157 span_attributes = {"path": path} 

1158 api_response = self._call_api( 

1159 retry, 

1160 span_name="BigQuery.getTable", 

1161 span_attributes=span_attributes, 

1162 method="GET", 

1163 path=path, 

1164 timeout=timeout, 

1165 ) 

1166 return Table.from_api_repr(api_response) 

1167 

1168 def update_dataset( 

1169 self, 

1170 dataset: Dataset, 

1171 fields: Sequence[str], 

1172 retry: retries.Retry = DEFAULT_RETRY, 

1173 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1174 ) -> Dataset: 

1175 """Change some fields of a dataset. 

1176 

1177 Use ``fields`` to specify which fields to update. At least one field 

1178 must be provided. If a field is listed in ``fields`` and is ``None`` in 

1179 ``dataset``, it will be deleted. 

1180 

1181 If ``dataset.etag`` is not ``None``, the update will only 

1182 succeed if the dataset on the server has the same ETag. Thus 

1183 reading a dataset with ``get_dataset``, changing its fields, 

1184 and then passing it to ``update_dataset`` will ensure that the changes 

1185 will only be saved if no modifications to the dataset occurred 

1186 since the read. 

1187 

1188 Args: 

1189 dataset (google.cloud.bigquery.dataset.Dataset): 

1190 The dataset to update. 

1191 fields (Sequence[str]): 

1192 The properties of ``dataset`` to change. These are strings 

1193 corresponding to the properties of 

1194 :class:`~google.cloud.bigquery.dataset.Dataset`. 

1195 

1196 For example, to update the default expiration times, specify 

1197 both properties in the ``fields`` argument: 

1198 

1199 .. code-block:: python 

1200 

1201 bigquery_client.update_dataset( 

1202 dataset, 

1203 [ 

1204 "default_partition_expiration_ms", 

1205 "default_table_expiration_ms", 

1206 ] 

1207 ) 

1208 retry (Optional[google.api_core.retry.Retry]): 

1209 How to retry the RPC. 

1210 timeout (Optional[float]): 

1211 The number of seconds to wait for the underlying HTTP transport 

1212 before using ``retry``. 

1213 

1214 Returns: 

1215 google.cloud.bigquery.dataset.Dataset: 

1216 The modified ``Dataset`` instance. 

1217 """ 

1218 partial = dataset._build_resource(fields) 

1219 if dataset.etag is not None: 

1220 headers: Optional[Dict[str, str]] = {"If-Match": dataset.etag} 

1221 else: 

1222 headers = None 

1223 path = dataset.path 

1224 span_attributes = {"path": path, "fields": fields} 

1225 

1226 api_response = self._call_api( 

1227 retry, 

1228 span_name="BigQuery.updateDataset", 

1229 span_attributes=span_attributes, 

1230 method="PATCH", 

1231 path=path, 

1232 data=partial, 

1233 headers=headers, 

1234 timeout=timeout, 

1235 ) 

1236 return Dataset.from_api_repr(api_response) 

1237 

1238 def update_model( 

1239 self, 

1240 model: Model, 

1241 fields: Sequence[str], 

1242 retry: retries.Retry = DEFAULT_RETRY, 

1243 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1244 ) -> Model: 

1245 """[Beta] Change some fields of a model. 

1246 

1247 Use ``fields`` to specify which fields to update. At least one field 

1248 must be provided. If a field is listed in ``fields`` and is ``None`` 

1249 in ``model``, the field value will be deleted. 

1250 

1251 If ``model.etag`` is not ``None``, the update will only succeed if 

1252 the model on the server has the same ETag. Thus reading a model with 

1253 ``get_model``, changing its fields, and then passing it to 

1254 ``update_model`` will ensure that the changes will only be saved if 

1255 no modifications to the model occurred since the read. 

1256 

1257 Args: 

1258 model (google.cloud.bigquery.model.Model): The model to update. 

1259 fields (Sequence[str]): 

1260 The properties of ``model`` to change. These are strings 

1261 corresponding to the properties of 

1262 :class:`~google.cloud.bigquery.model.Model`. 

1263 

1264 For example, to update the descriptive properties of the model, 

1265 specify them in the ``fields`` argument: 

1266 

1267 .. code-block:: python 

1268 

1269 bigquery_client.update_model( 

1270 model, ["description", "friendly_name"] 

1271 ) 

1272 retry (Optional[google.api_core.retry.Retry]): 

1273 A description of how to retry the API call. 

1274 timeout (Optional[float]): 

1275 The number of seconds to wait for the underlying HTTP transport 

1276 before using ``retry``. 

1277 

1278 Returns: 

1279 google.cloud.bigquery.model.Model: 

1280 The model resource returned from the API call. 

1281 """ 

1282 partial = model._build_resource(fields) 

1283 if model.etag: 

1284 headers: Optional[Dict[str, str]] = {"If-Match": model.etag} 

1285 else: 

1286 headers = None 

1287 path = model.path 

1288 span_attributes = {"path": path, "fields": fields} 

1289 

1290 api_response = self._call_api( 

1291 retry, 

1292 span_name="BigQuery.updateModel", 

1293 span_attributes=span_attributes, 

1294 method="PATCH", 

1295 path=path, 

1296 data=partial, 

1297 headers=headers, 

1298 timeout=timeout, 

1299 ) 

1300 return Model.from_api_repr(api_response) 

1301 

1302 def update_routine( 

1303 self, 

1304 routine: Routine, 

1305 fields: Sequence[str], 

1306 retry: retries.Retry = DEFAULT_RETRY, 

1307 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1308 ) -> Routine: 

1309 """[Beta] Change some fields of a routine. 

1310 

1311 Use ``fields`` to specify which fields to update. At least one field 

1312 must be provided. If a field is listed in ``fields`` and is ``None`` 

1313 in ``routine``, the field value will be deleted. 

1314 

1315 .. warning:: 

1316 During beta, partial updates are not supported. You must provide 

1317 all fields in the resource. 

1318 

1319 If :attr:`~google.cloud.bigquery.routine.Routine.etag` is not 

1320 ``None``, the update will only succeed if the resource on the server 

1321 has the same ETag. Thus reading a routine with 

1322 :func:`~google.cloud.bigquery.client.Client.get_routine`, changing 

1323 its fields, and then passing it to this method will ensure that the 

1324 changes will only be saved if no modifications to the resource 

1325 occurred since the read. 

1326 

1327 Args: 

1328 routine (google.cloud.bigquery.routine.Routine): 

1329 The routine to update. 

1330 fields (Sequence[str]): 

1331 The fields of ``routine`` to change, spelled as the 

1332 :class:`~google.cloud.bigquery.routine.Routine` properties. 

1333 

1334 For example, to update the description property of the routine, 

1335 specify it in the ``fields`` argument: 

1336 

1337 .. code-block:: python 

1338 

1339 bigquery_client.update_routine( 

1340 routine, ["description"] 

1341 ) 

1342 retry (Optional[google.api_core.retry.Retry]): 

1343 A description of how to retry the API call. 

1344 timeout (Optional[float]): 

1345 The number of seconds to wait for the underlying HTTP transport 

1346 before using ``retry``. 

1347 

1348 Returns: 

1349 google.cloud.bigquery.routine.Routine: 

1350 The routine resource returned from the API call. 

1351 """ 

1352 partial = routine._build_resource(fields) 

1353 if routine.etag: 

1354 headers: Optional[Dict[str, str]] = {"If-Match": routine.etag} 

1355 else: 

1356 headers = None 

1357 

1358 # TODO: remove when routines update supports partial requests. 

1359 partial["routineReference"] = routine.reference.to_api_repr() 

1360 

1361 path = routine.path 

1362 span_attributes = {"path": path, "fields": fields} 

1363 

1364 api_response = self._call_api( 

1365 retry, 

1366 span_name="BigQuery.updateRoutine", 

1367 span_attributes=span_attributes, 

1368 method="PUT", 

1369 path=path, 

1370 data=partial, 

1371 headers=headers, 

1372 timeout=timeout, 

1373 ) 

1374 return Routine.from_api_repr(api_response) 

1375 

1376 def update_table( 

1377 self, 

1378 table: Table, 

1379 fields: Sequence[str], 

1380 retry: retries.Retry = DEFAULT_RETRY, 

1381 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1382 ) -> Table: 

1383 """Change some fields of a table. 

1384 

1385 Use ``fields`` to specify which fields to update. At least one field 

1386 must be provided. If a field is listed in ``fields`` and is ``None`` 

1387 in ``table``, the field value will be deleted. 

1388 

1389 If ``table.etag`` is not ``None``, the update will only succeed if 

1390 the table on the server has the same ETag. Thus reading a table with 

1391 ``get_table``, changing its fields, and then passing it to 

1392 ``update_table`` will ensure that the changes will only be saved if 

1393 no modifications to the table occurred since the read. 

1394 

1395 Args: 

1396 table (google.cloud.bigquery.table.Table): The table to update. 

1397 fields (Sequence[str]): 

1398 The fields of ``table`` to change, spelled as the 

1399 :class:`~google.cloud.bigquery.table.Table` properties. 

1400 

1401 For example, to update the descriptive properties of the table, 

1402 specify them in the ``fields`` argument: 

1403 

1404 .. code-block:: python 

1405 

1406 bigquery_client.update_table( 

1407 table, 

1408 ["description", "friendly_name"] 

1409 ) 

1410 retry (Optional[google.api_core.retry.Retry]): 

1411 A description of how to retry the API call. 

1412 timeout (Optional[float]): 

1413 The number of seconds to wait for the underlying HTTP transport 

1414 before using ``retry``. 

1415 

1416 Returns: 

1417 google.cloud.bigquery.table.Table: 

1418 The table resource returned from the API call. 

1419 """ 

1420 partial = table._build_resource(fields) 

1421 if table.etag is not None: 

1422 headers: Optional[Dict[str, str]] = {"If-Match": table.etag} 

1423 else: 

1424 headers = None 

1425 

1426 path = table.path 

1427 span_attributes = {"path": path, "fields": fields} 

1428 

1429 api_response = self._call_api( 

1430 retry, 

1431 span_name="BigQuery.updateTable", 

1432 span_attributes=span_attributes, 

1433 method="PATCH", 

1434 path=path, 

1435 data=partial, 

1436 headers=headers, 

1437 timeout=timeout, 

1438 ) 

1439 return Table.from_api_repr(api_response) 

1440 

1441 def list_models( 

1442 self, 

1443 dataset: Union[Dataset, DatasetReference, DatasetListItem, str], 

1444 max_results: Optional[int] = None, 

1445 page_token: Optional[str] = None, 

1446 retry: retries.Retry = DEFAULT_RETRY, 

1447 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1448 page_size: Optional[int] = None, 

1449 ) -> page_iterator.Iterator: 

1450 """[Beta] List models in the dataset. 

1451 

1452 See 

1453 https://cloud.google.com/bigquery/docs/reference/rest/v2/models/list 

1454 

1455 Args: 

1456 dataset (Union[ \ 

1457 google.cloud.bigquery.dataset.Dataset, \ 

1458 google.cloud.bigquery.dataset.DatasetReference, \ 

1459 google.cloud.bigquery.dataset.DatasetListItem, \ 

1460 str, \ 

1461 ]): 

1462 A reference to the dataset whose models to list from the 

1463 BigQuery API. If a string is passed in, this method attempts 

1464 to create a dataset reference from a string using 

1465 :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. 

1466 max_results (Optional[int]): 

1467 Maximum number of models to return. Defaults to a 

1468 value set by the API. 

1469 page_token (Optional[str]): 

1470 Token representing a cursor into the models. If not passed, 

1471 the API will return the first page of models. The token marks 

1472 the beginning of the iterator to be returned and the value of 

1473 the ``page_token`` can be accessed at ``next_page_token`` of the 

1474 :class:`~google.api_core.page_iterator.HTTPIterator`. 

1475 retry (Optional[google.api_core.retry.Retry]): 

1476 How to retry the RPC. 

1477 timeout (Optional[float]): 

1478 The number of seconds to wait for the underlying HTTP transport 

1479 before using ``retry``. 

1480 page_size (Optional[int]): 

1481 Maximum number of models to return per page. 

1482 Defaults to a value set by the API. 

1483 

1484 Returns: 

1485 google.api_core.page_iterator.Iterator: 

1486 Iterator of 

1487 :class:`~google.cloud.bigquery.model.Model` contained 

1488 within the requested dataset. 

1489 """ 

1490 dataset = self._dataset_from_arg(dataset) 

1491 

1492 path = "%s/models" % dataset.path 

1493 span_attributes = {"path": path} 

1494 

1495 def api_request(*args, **kwargs): 

1496 return self._call_api( 

1497 retry, 

1498 span_name="BigQuery.listModels", 

1499 span_attributes=span_attributes, 

1500 *args, 

1501 timeout=timeout, 

1502 **kwargs, 

1503 ) 

1504 

1505 result = page_iterator.HTTPIterator( 

1506 client=self, 

1507 api_request=api_request, 

1508 path=path, 

1509 item_to_value=_item_to_model, 

1510 items_key="models", 

1511 page_token=page_token, 

1512 max_results=max_results, 

1513 page_size=page_size, 

1514 ) 

1515 result.dataset = dataset # type: ignore 

1516 return result 

1517 

1518 def list_routines( 

1519 self, 

1520 dataset: Union[Dataset, DatasetReference, DatasetListItem, str], 

1521 max_results: Optional[int] = None, 

1522 page_token: Optional[str] = None, 

1523 retry: retries.Retry = DEFAULT_RETRY, 

1524 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1525 page_size: Optional[int] = None, 

1526 ) -> page_iterator.Iterator: 

1527 """[Beta] List routines in the dataset. 

1528 

1529 See 

1530 https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/list 

1531 

1532 Args: 

1533 dataset (Union[ \ 

1534 google.cloud.bigquery.dataset.Dataset, \ 

1535 google.cloud.bigquery.dataset.DatasetReference, \ 

1536 google.cloud.bigquery.dataset.DatasetListItem, \ 

1537 str, \ 

1538 ]): 

1539 A reference to the dataset whose routines to list from the 

1540 BigQuery API. If a string is passed in, this method attempts 

1541 to create a dataset reference from a string using 

1542 :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. 

1543 max_results (Optional[int]): 

1544 Maximum number of routines to return. Defaults 

1545 to a value set by the API. 

1546 page_token (Optional[str]): 

1547 Token representing a cursor into the routines. If not passed, 

1548 the API will return the first page of routines. The token marks 

1549 the beginning of the iterator to be returned and the value of the 

1550 ``page_token`` can be accessed at ``next_page_token`` of the 

1551 :class:`~google.api_core.page_iterator.HTTPIterator`. 

1552 retry (Optional[google.api_core.retry.Retry]): 

1553 How to retry the RPC. 

1554 timeout (Optional[float]): 

1555 The number of seconds to wait for the underlying HTTP transport 

1556 before using ``retry``. 

1557 page_size (Optional[int]): 

1558 Maximum number of routines to return per page. 

1559 Defaults to a value set by the API. 

1560 

1561 Returns: 

1562 google.api_core.page_iterator.Iterator: 

1563 Iterator of all 

1564 :class:`~google.cloud.bigquery.routine.Routine`s contained 

1565 within the requested dataset, limited by ``max_results``. 

1566 """ 

1567 dataset = self._dataset_from_arg(dataset) 

1568 path = "{}/routines".format(dataset.path) 

1569 

1570 span_attributes = {"path": path} 

1571 

1572 def api_request(*args, **kwargs): 

1573 return self._call_api( 

1574 retry, 

1575 span_name="BigQuery.listRoutines", 

1576 span_attributes=span_attributes, 

1577 *args, 

1578 timeout=timeout, 

1579 **kwargs, 

1580 ) 

1581 

1582 result = page_iterator.HTTPIterator( 

1583 client=self, 

1584 api_request=api_request, 

1585 path=path, 

1586 item_to_value=_item_to_routine, 

1587 items_key="routines", 

1588 page_token=page_token, 

1589 max_results=max_results, 

1590 page_size=page_size, 

1591 ) 

1592 result.dataset = dataset # type: ignore 

1593 return result 

1594 

1595 def list_tables( 

1596 self, 

1597 dataset: Union[Dataset, DatasetReference, DatasetListItem, str], 

1598 max_results: Optional[int] = None, 

1599 page_token: Optional[str] = None, 

1600 retry: retries.Retry = DEFAULT_RETRY, 

1601 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1602 page_size: Optional[int] = None, 

1603 ) -> page_iterator.Iterator: 

1604 """List tables in the dataset. 

1605 

1606 See 

1607 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list 

1608 

1609 Args: 

1610 dataset (Union[ \ 

1611 google.cloud.bigquery.dataset.Dataset, \ 

1612 google.cloud.bigquery.dataset.DatasetReference, \ 

1613 google.cloud.bigquery.dataset.DatasetListItem, \ 

1614 str, \ 

1615 ]): 

1616 A reference to the dataset whose tables to list from the 

1617 BigQuery API. If a string is passed in, this method attempts 

1618 to create a dataset reference from a string using 

1619 :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. 

1620 max_results (Optional[int]): 

1621 Maximum number of tables to return. Defaults 

1622 to a value set by the API. 

1623 page_token (Optional[str]): 

1624 Token representing a cursor into the tables. If not passed, 

1625 the API will return the first page of tables. The token marks 

1626 the beginning of the iterator to be returned and the value of 

1627 the ``page_token`` can be accessed at ``next_page_token`` of the 

1628 :class:`~google.api_core.page_iterator.HTTPIterator`. 

1629 retry (Optional[google.api_core.retry.Retry]): 

1630 How to retry the RPC. 

1631 timeout (Optional[float]): 

1632 The number of seconds to wait for the underlying HTTP transport 

1633 before using ``retry``. 

1634 page_size (Optional[int]): 

1635 Maximum number of tables to return per page. 

1636 Defaults to a value set by the API. 

1637 

1638 Returns: 

1639 google.api_core.page_iterator.Iterator: 

1640 Iterator of 

1641 :class:`~google.cloud.bigquery.table.TableListItem` contained 

1642 within the requested dataset. 

1643 """ 

1644 dataset = self._dataset_from_arg(dataset) 

1645 path = "%s/tables" % dataset.path 

1646 span_attributes = {"path": path} 

1647 

1648 def api_request(*args, **kwargs): 

1649 return self._call_api( 

1650 retry, 

1651 span_name="BigQuery.listTables", 

1652 span_attributes=span_attributes, 

1653 *args, 

1654 timeout=timeout, 

1655 **kwargs, 

1656 ) 

1657 

1658 result = page_iterator.HTTPIterator( 

1659 client=self, 

1660 api_request=api_request, 

1661 path=path, 

1662 item_to_value=_item_to_table, 

1663 items_key="tables", 

1664 page_token=page_token, 

1665 max_results=max_results, 

1666 page_size=page_size, 

1667 ) 

1668 result.dataset = dataset # type: ignore 

1669 return result 

1670 

1671 def delete_dataset( 

1672 self, 

1673 dataset: Union[Dataset, DatasetReference, DatasetListItem, str], 

1674 delete_contents: bool = False, 

1675 retry: retries.Retry = DEFAULT_RETRY, 

1676 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1677 not_found_ok: bool = False, 

1678 ) -> None: 

1679 """Delete a dataset. 

1680 

1681 See 

1682 https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete 

1683 

1684 Args: 

1685 dataset (Union[ \ 

1686 google.cloud.bigquery.dataset.Dataset, \ 

1687 google.cloud.bigquery.dataset.DatasetReference, \ 

1688 google.cloud.bigquery.dataset.DatasetListItem, \ 

1689 str, \ 

1690 ]): 

1691 A reference to the dataset to delete. If a string is passed 

1692 in, this method attempts to create a dataset reference from a 

1693 string using 

1694 :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`. 

1695 delete_contents (Optional[bool]): 

1696 If True, delete all the tables in the dataset. If False and 

1697 the dataset contains tables, the request will fail. 

1698 Default is False. 

1699 retry (Optional[google.api_core.retry.Retry]): 

1700 How to retry the RPC. 

1701 timeout (Optional[float]): 

1702 The number of seconds to wait for the underlying HTTP transport 

1703 before using ``retry``. 

1704 not_found_ok (Optional[bool]): 

1705 Defaults to ``False``. If ``True``, ignore "not found" errors 

1706 when deleting the dataset. 

1707 """ 

1708 dataset = self._dataset_from_arg(dataset) 

1709 params = {} 

1710 path = dataset.path 

1711 if delete_contents: 

1712 params["deleteContents"] = "true" 

1713 span_attributes = {"path": path, "deleteContents": delete_contents} 

1714 else: 

1715 span_attributes = {"path": path} 

1716 

1717 try: 

1718 self._call_api( 

1719 retry, 

1720 span_name="BigQuery.deleteDataset", 

1721 span_attributes=span_attributes, 

1722 method="DELETE", 

1723 path=path, 

1724 query_params=params, 

1725 timeout=timeout, 

1726 ) 

1727 except core_exceptions.NotFound: 

1728 if not not_found_ok: 

1729 raise 

1730 

1731 def delete_model( 

1732 self, 

1733 model: Union[Model, ModelReference, str], 

1734 retry: retries.Retry = DEFAULT_RETRY, 

1735 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1736 not_found_ok: bool = False, 

1737 ) -> None: 

1738 """[Beta] Delete a model 

1739 

1740 See 

1741 https://cloud.google.com/bigquery/docs/reference/rest/v2/models/delete 

1742 

1743 Args: 

1744 model (Union[ \ 

1745 google.cloud.bigquery.model.Model, \ 

1746 google.cloud.bigquery.model.ModelReference, \ 

1747 str, \ 

1748 ]): 

1749 A reference to the model to delete. If a string is passed in, 

1750 this method attempts to create a model reference from a 

1751 string using 

1752 :func:`google.cloud.bigquery.model.ModelReference.from_string`. 

1753 retry (Optional[google.api_core.retry.Retry]): 

1754 How to retry the RPC. 

1755 timeout (Optional[float]): 

1756 The number of seconds to wait for the underlying HTTP transport 

1757 before using ``retry``. 

1758 not_found_ok (Optional[bool]): 

1759 Defaults to ``False``. If ``True``, ignore "not found" errors 

1760 when deleting the model. 

1761 """ 

1762 if isinstance(model, str): 

1763 model = ModelReference.from_string(model, default_project=self.project) 

1764 

1765 if not isinstance(model, (Model, ModelReference)): 

1766 raise TypeError("model must be a Model or a ModelReference") 

1767 

1768 path = model.path 

1769 try: 

1770 span_attributes = {"path": path} 

1771 self._call_api( 

1772 retry, 

1773 span_name="BigQuery.deleteModel", 

1774 span_attributes=span_attributes, 

1775 method="DELETE", 

1776 path=path, 

1777 timeout=timeout, 

1778 ) 

1779 except core_exceptions.NotFound: 

1780 if not not_found_ok: 

1781 raise 

1782 

1783 def delete_job_metadata( 

1784 self, 

1785 job_id: Union[str, LoadJob, CopyJob, ExtractJob, QueryJob], 

1786 project: Optional[str] = None, 

1787 location: Optional[str] = None, 

1788 retry: retries.Retry = DEFAULT_RETRY, 

1789 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1790 not_found_ok: bool = False, 

1791 ): 

1792 """[Beta] Delete job metadata from job history. 

1793 

1794 Note: This does not stop a running job. Use 

1795 :func:`~google.cloud.bigquery.client.Client.cancel_job` instead. 

1796 

1797 Args: 

1798 job_id (Union[ \ 

1799 str, \ 

1800 LoadJob, \ 

1801 CopyJob, \ 

1802 ExtractJob, \ 

1803 QueryJob \ 

1804 ]): Job or job identifier. 

1805 project (Optional[str]): 

1806 ID of the project which owns the job (defaults to the client's project). 

1807 location (Optional[str]): 

1808 Location where the job was run. Ignored if ``job_id`` is a job 

1809 object. 

1810 retry (Optional[google.api_core.retry.Retry]): 

1811 How to retry the RPC. 

1812 timeout (Optional[float]): 

1813 The number of seconds to wait for the underlying HTTP transport 

1814 before using ``retry``. 

1815 not_found_ok (Optional[bool]): 

1816 Defaults to ``False``. If ``True``, ignore "not found" errors 

1817 when deleting the job. 

1818 """ 

1819 extra_params = {} 

1820 

1821 project, location, job_id = _extract_job_reference( 

1822 job_id, project=project, location=location 

1823 ) 

1824 

1825 if project is None: 

1826 project = self.project 

1827 

1828 if location is None: 

1829 location = self.location 

1830 

1831 # Location is always required for jobs.delete() 

1832 extra_params["location"] = location 

1833 

1834 path = f"/projects/{project}/jobs/{job_id}/delete" 

1835 

1836 span_attributes = {"path": path, "job_id": job_id, "location": location} 

1837 

1838 try: 

1839 self._call_api( 

1840 retry, 

1841 span_name="BigQuery.deleteJob", 

1842 span_attributes=span_attributes, 

1843 method="DELETE", 

1844 path=path, 

1845 query_params=extra_params, 

1846 timeout=timeout, 

1847 ) 

1848 except google.api_core.exceptions.NotFound: 

1849 if not not_found_ok: 

1850 raise 

1851 

1852 def delete_routine( 

1853 self, 

1854 routine: Union[Routine, RoutineReference, str], 

1855 retry: retries.Retry = DEFAULT_RETRY, 

1856 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1857 not_found_ok: bool = False, 

1858 ) -> None: 

1859 """[Beta] Delete a routine. 

1860 

1861 See 

1862 https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/delete 

1863 

1864 Args: 

1865 routine (Union[ \ 

1866 google.cloud.bigquery.routine.Routine, \ 

1867 google.cloud.bigquery.routine.RoutineReference, \ 

1868 str, \ 

1869 ]): 

1870 A reference to the routine to delete. If a string is passed 

1871 in, this method attempts to create a routine reference from a 

1872 string using 

1873 :func:`google.cloud.bigquery.routine.RoutineReference.from_string`. 

1874 retry (Optional[google.api_core.retry.Retry]): 

1875 How to retry the RPC. 

1876 timeout (Optional[float]): 

1877 The number of seconds to wait for the underlying HTTP transport 

1878 before using ``retry``. 

1879 not_found_ok (Optional[bool]): 

1880 Defaults to ``False``. If ``True``, ignore "not found" errors 

1881 when deleting the routine. 

1882 """ 

1883 if isinstance(routine, str): 

1884 routine = RoutineReference.from_string( 

1885 routine, default_project=self.project 

1886 ) 

1887 path = routine.path 

1888 

1889 if not isinstance(routine, (Routine, RoutineReference)): 

1890 raise TypeError("routine must be a Routine or a RoutineReference") 

1891 

1892 try: 

1893 span_attributes = {"path": path} 

1894 self._call_api( 

1895 retry, 

1896 span_name="BigQuery.deleteRoutine", 

1897 span_attributes=span_attributes, 

1898 method="DELETE", 

1899 path=path, 

1900 timeout=timeout, 

1901 ) 

1902 except core_exceptions.NotFound: 

1903 if not not_found_ok: 

1904 raise 

1905 

1906 def delete_table( 

1907 self, 

1908 table: Union[Table, TableReference, TableListItem, str], 

1909 retry: retries.Retry = DEFAULT_RETRY, 

1910 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1911 not_found_ok: bool = False, 

1912 ) -> None: 

1913 """Delete a table 

1914 

1915 See 

1916 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete 

1917 

1918 Args: 

1919 table (Union[ \ 

1920 google.cloud.bigquery.table.Table, \ 

1921 google.cloud.bigquery.table.TableReference, \ 

1922 google.cloud.bigquery.table.TableListItem, \ 

1923 str, \ 

1924 ]): 

1925 A reference to the table to delete. If a string is passed in, 

1926 this method attempts to create a table reference from a 

1927 string using 

1928 :func:`google.cloud.bigquery.table.TableReference.from_string`. 

1929 retry (Optional[google.api_core.retry.Retry]): 

1930 How to retry the RPC. 

1931 timeout (Optional[float]): 

1932 The number of seconds to wait for the underlying HTTP transport 

1933 before using ``retry``. 

1934 not_found_ok (Optional[bool]): 

1935 Defaults to ``False``. If ``True``, ignore "not found" errors 

1936 when deleting the table. 

1937 """ 

1938 table = _table_arg_to_table_ref(table, default_project=self.project) 

1939 if not isinstance(table, TableReference): 

1940 raise TypeError("Unable to get TableReference for table '{}'".format(table)) 

1941 

1942 try: 

1943 path = table.path 

1944 span_attributes = {"path": path} 

1945 self._call_api( 

1946 retry, 

1947 span_name="BigQuery.deleteTable", 

1948 span_attributes=span_attributes, 

1949 method="DELETE", 

1950 path=path, 

1951 timeout=timeout, 

1952 ) 

1953 except core_exceptions.NotFound: 

1954 if not not_found_ok: 

1955 raise 

1956 

1957 def _get_query_results( 

1958 self, 

1959 job_id: str, 

1960 retry: retries.Retry, 

1961 project: Optional[str] = None, 

1962 timeout_ms: Optional[int] = None, 

1963 location: Optional[str] = None, 

1964 timeout: TimeoutType = DEFAULT_TIMEOUT, 

1965 ) -> _QueryResults: 

1966 """Get the query results object for a query job. 

1967 

1968 Args: 

1969 job_id (str): Name of the query job. 

1970 retry (google.api_core.retry.Retry): 

1971 How to retry the RPC. 

1972 project (Optional[str]): 

1973 Project ID for the query job (defaults to the project of the client). 

1974 timeout_ms (Optional[int]): 

1975 Number of milliseconds the the API call should wait for the query 

1976 to complete before the request times out. 

1977 location (Optional[str]): Location of the query job. 

1978 timeout (Optional[float]): 

1979 The number of seconds to wait for the underlying HTTP transport 

1980 before using ``retry``. If set, this connection timeout may be 

1981 increased to a minimum value. This prevents retries on what 

1982 would otherwise be a successful response. 

1983 

1984 Returns: 

1985 google.cloud.bigquery.query._QueryResults: 

1986 A new ``_QueryResults`` instance. 

1987 """ 

1988 

1989 extra_params: Dict[str, Any] = {"maxResults": 0} 

1990 

1991 if timeout is not None: 

1992 if not isinstance(timeout, (int, float)): 

1993 timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT 

1994 else: 

1995 timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) 

1996 

1997 if project is None: 

1998 project = self.project 

1999 

2000 if timeout_ms is not None: 

2001 extra_params["timeoutMs"] = timeout_ms 

2002 

2003 if location is None: 

2004 location = self.location 

2005 

2006 if location is not None: 

2007 extra_params["location"] = location 

2008 

2009 path = "/projects/{}/queries/{}".format(project, job_id) 

2010 

2011 # This call is typically made in a polling loop that checks whether the 

2012 # job is complete (from QueryJob.done(), called ultimately from 

2013 # QueryJob.result()). So we don't need to poll here. 

2014 span_attributes = {"path": path} 

2015 resource = self._call_api( 

2016 retry, 

2017 span_name="BigQuery.getQueryResults", 

2018 span_attributes=span_attributes, 

2019 method="GET", 

2020 path=path, 

2021 query_params=extra_params, 

2022 timeout=timeout, 

2023 ) 

2024 return _QueryResults.from_api_repr(resource) 

2025 

2026 def job_from_resource( 

2027 self, resource: dict 

2028 ) -> Union[job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob]: 

2029 """Detect correct job type from resource and instantiate. 

2030 

2031 Args: 

2032 resource (Dict): one job resource from API response 

2033 

2034 Returns: 

2035 Union[job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob]: 

2036 The job instance, constructed via the resource. 

2037 """ 

2038 config = resource.get("configuration", {}) 

2039 if "load" in config: 

2040 return job.LoadJob.from_api_repr(resource, self) 

2041 elif "copy" in config: 

2042 return job.CopyJob.from_api_repr(resource, self) 

2043 elif "extract" in config: 

2044 return job.ExtractJob.from_api_repr(resource, self) 

2045 elif "query" in config: 

2046 return job.QueryJob.from_api_repr(resource, self) 

2047 return job.UnknownJob.from_api_repr(resource, self) 

2048 

2049 def create_job( 

2050 self, 

2051 job_config: dict, 

2052 retry: retries.Retry = DEFAULT_RETRY, 

2053 timeout: TimeoutType = DEFAULT_TIMEOUT, 

2054 ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: 

2055 """Create a new job. 

2056 

2057 Args: 

2058 job_config (dict): configuration job representation returned from the API. 

2059 retry (Optional[google.api_core.retry.Retry]): How to retry the RPC. 

2060 timeout (Optional[float]): 

2061 The number of seconds to wait for the underlying HTTP transport 

2062 before using ``retry``. 

2063 

2064 Returns: 

2065 Union[ \ 

2066 google.cloud.bigquery.job.LoadJob, \ 

2067 google.cloud.bigquery.job.CopyJob, \ 

2068 google.cloud.bigquery.job.ExtractJob, \ 

2069 google.cloud.bigquery.job.QueryJob \ 

2070 ]: 

2071 A new job instance. 

2072 """ 

2073 

2074 if "load" in job_config: 

2075 load_job_config = google.cloud.bigquery.job.LoadJobConfig.from_api_repr( 

2076 job_config 

2077 ) 

2078 destination = _get_sub_prop(job_config, ["load", "destinationTable"]) 

2079 source_uris = _get_sub_prop(job_config, ["load", "sourceUris"]) 

2080 destination = TableReference.from_api_repr(destination) 

2081 return self.load_table_from_uri( 

2082 source_uris, 

2083 destination, 

2084 job_config=typing.cast(LoadJobConfig, load_job_config), 

2085 retry=retry, 

2086 timeout=timeout, 

2087 ) 

2088 elif "copy" in job_config: 

2089 copy_job_config = google.cloud.bigquery.job.CopyJobConfig.from_api_repr( 

2090 job_config 

2091 ) 

2092 destination = _get_sub_prop(job_config, ["copy", "destinationTable"]) 

2093 destination = TableReference.from_api_repr(destination) 

2094 return self.copy_table( 

2095 [], # Source table(s) already in job_config resource. 

2096 destination, 

2097 job_config=typing.cast(CopyJobConfig, copy_job_config), 

2098 retry=retry, 

2099 timeout=timeout, 

2100 ) 

2101 elif "extract" in job_config: 

2102 extract_job_config = ( 

2103 google.cloud.bigquery.job.ExtractJobConfig.from_api_repr(job_config) 

2104 ) 

2105 source = _get_sub_prop(job_config, ["extract", "sourceTable"]) 

2106 if source: 

2107 source_type = "Table" 

2108 source = TableReference.from_api_repr(source) 

2109 else: 

2110 source = _get_sub_prop(job_config, ["extract", "sourceModel"]) 

2111 source_type = "Model" 

2112 source = ModelReference.from_api_repr(source) 

2113 destination_uris = _get_sub_prop(job_config, ["extract", "destinationUris"]) 

2114 return self.extract_table( 

2115 source, 

2116 destination_uris, 

2117 job_config=typing.cast(ExtractJobConfig, extract_job_config), 

2118 retry=retry, 

2119 timeout=timeout, 

2120 source_type=source_type, 

2121 ) 

2122 elif "query" in job_config: 

2123 query_job_config = google.cloud.bigquery.job.QueryJobConfig.from_api_repr( 

2124 job_config 

2125 ) 

2126 query = _get_sub_prop(job_config, ["query", "query"]) 

2127 return self.query( 

2128 query, 

2129 job_config=typing.cast(QueryJobConfig, query_job_config), 

2130 retry=retry, 

2131 timeout=timeout, 

2132 ) 

2133 else: 

2134 raise TypeError("Invalid job configuration received.") 

2135 

2136 def get_job( 

2137 self, 

2138 job_id: Union[str, job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], 

2139 project: Optional[str] = None, 

2140 location: Optional[str] = None, 

2141 retry: retries.Retry = DEFAULT_RETRY, 

2142 timeout: TimeoutType = DEFAULT_TIMEOUT, 

2143 ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]: 

2144 """Fetch a job for the project associated with this client. 

2145 

2146 See 

2147 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get 

2148 

2149 Args: 

2150 job_id (Union[ \ 

2151 str, \ 

2152 job.LoadJob, \ 

2153 job.CopyJob, \ 

2154 job.ExtractJob, \ 

2155 job.QueryJob \ 

2156 ]): 

2157 Job identifier. 

2158 project (Optional[str]): 

2159 ID of the project which owns the job (defaults to the client's project). 

2160 location (Optional[str]): 

2161 Location where the job was run. Ignored if ``job_id`` is a job 

2162 object. 

2163 retry (Optional[google.api_core.retry.Retry]): 

2164 How to retry the RPC. 

2165 timeout (Optional[float]): 

2166 The number of seconds to wait for the underlying HTTP transport 

2167 before using ``retry``. 

2168 

2169 Returns: 

2170 Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]: 

2171 Job instance, based on the resource returned by the API. 

2172 """ 

2173 extra_params = {"projection": "full"} 

2174 

2175 project, location, job_id = _extract_job_reference( 

2176 job_id, project=project, location=location 

2177 ) 

2178 

2179 if project is None: 

2180 project = self.project 

2181 

2182 if location is None: 

2183 location = self.location 

2184 

2185 if location is not None: 

2186 extra_params["location"] = location 

2187 

2188 path = "/projects/{}/jobs/{}".format(project, job_id) 

2189 

2190 span_attributes = {"path": path, "job_id": job_id, "location": location} 

2191 

2192 resource = self._call_api( 

2193 retry, 

2194 span_name="BigQuery.getJob", 

2195 span_attributes=span_attributes, 

2196 method="GET", 

2197 path=path, 

2198 query_params=extra_params, 

2199 timeout=timeout, 

2200 ) 

2201 

2202 return self.job_from_resource(resource) 

2203 

2204 def cancel_job( 

2205 self, 

2206 job_id: str, 

2207 project: Optional[str] = None, 

2208 location: Optional[str] = None, 

2209 retry: retries.Retry = DEFAULT_RETRY, 

2210 timeout: TimeoutType = DEFAULT_TIMEOUT, 

2211 ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]: 

2212 """Attempt to cancel a job from a job ID. 

2213 

2214 See 

2215 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel 

2216 

2217 Args: 

2218 job_id (Union[ \ 

2219 str, \ 

2220 google.cloud.bigquery.job.LoadJob, \ 

2221 google.cloud.bigquery.job.CopyJob, \ 

2222 google.cloud.bigquery.job.ExtractJob, \ 

2223 google.cloud.bigquery.job.QueryJob \ 

2224 ]): Job identifier. 

2225 project (Optional[str]): 

2226 ID of the project which owns the job (defaults to the client's project). 

2227 location (Optional[str]): 

2228 Location where the job was run. Ignored if ``job_id`` is a job 

2229 object. 

2230 retry (Optional[google.api_core.retry.Retry]): 

2231 How to retry the RPC. 

2232 timeout (Optional[float]): 

2233 The number of seconds to wait for the underlying HTTP transport 

2234 before using ``retry``. 

2235 

2236 Returns: 

2237 Union[ \ 

2238 google.cloud.bigquery.job.LoadJob, \ 

2239 google.cloud.bigquery.job.CopyJob, \ 

2240 google.cloud.bigquery.job.ExtractJob, \ 

2241 google.cloud.bigquery.job.QueryJob, \ 

2242 ]: 

2243 Job instance, based on the resource returned by the API. 

2244 """ 

2245 extra_params = {"projection": "full"} 

2246 

2247 project, location, job_id = _extract_job_reference( 

2248 job_id, project=project, location=location 

2249 ) 

2250 

2251 if project is None: 

2252 project = self.project 

2253 

2254 if location is None: 

2255 location = self.location 

2256 

2257 if location is not None: 

2258 extra_params["location"] = location 

2259 

2260 path = "/projects/{}/jobs/{}/cancel".format(project, job_id) 

2261 

2262 span_attributes = {"path": path, "job_id": job_id, "location": location} 

2263 

2264 resource = self._call_api( 

2265 retry, 

2266 span_name="BigQuery.cancelJob", 

2267 span_attributes=span_attributes, 

2268 method="POST", 

2269 path=path, 

2270 query_params=extra_params, 

2271 timeout=timeout, 

2272 ) 

2273 

2274 job_instance = self.job_from_resource(resource["job"]) # never an UnknownJob 

2275 

2276 return typing.cast( 

2277 Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob], 

2278 job_instance, 

2279 ) 

2280 

2281 def list_jobs( 

2282 self, 

2283 project: Optional[str] = None, 

2284 parent_job: Optional[Union[QueryJob, str]] = None, 

2285 max_results: Optional[int] = None, 

2286 page_token: Optional[str] = None, 

2287 all_users: Optional[bool] = None, 

2288 state_filter: Optional[str] = None, 

2289 retry: retries.Retry = DEFAULT_RETRY, 

2290 timeout: TimeoutType = DEFAULT_TIMEOUT, 

2291 min_creation_time: Optional[datetime.datetime] = None, 

2292 max_creation_time: Optional[datetime.datetime] = None, 

2293 page_size: Optional[int] = None, 

2294 ) -> page_iterator.Iterator: 

2295 """List jobs for the project associated with this client. 

2296 

2297 See 

2298 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/list 

2299 

2300 Args: 

2301 project (Optional[str]): 

2302 Project ID to use for retreiving datasets. Defaults 

2303 to the client's project. 

2304 parent_job (Optional[Union[ \ 

2305 google.cloud.bigquery.job._AsyncJob, \ 

2306 str, \ 

2307 ]]): 

2308 If set, retrieve only child jobs of the specified parent. 

2309 max_results (Optional[int]): 

2310 Maximum number of jobs to return. 

2311 page_token (Optional[str]): 

2312 Opaque marker for the next "page" of jobs. If not 

2313 passed, the API will return the first page of jobs. The token 

2314 marks the beginning of the iterator to be returned and the 

2315 value of the ``page_token`` can be accessed at 

2316 ``next_page_token`` of 

2317 :class:`~google.api_core.page_iterator.HTTPIterator`. 

2318 all_users (Optional[bool]): 

2319 If true, include jobs owned by all users in the project. 

2320 Defaults to :data:`False`. 

2321 state_filter (Optional[str]): 

2322 If set, include only jobs matching the given state. One of: 

2323 * ``"done"`` 

2324 * ``"pending"`` 

2325 * ``"running"`` 

2326 retry (Optional[google.api_core.retry.Retry]): 

2327 How to retry the RPC. 

2328 timeout (Optional[float]): 

2329 The number of seconds to wait for the underlying HTTP transport 

2330 before using ``retry``. 

2331 min_creation_time (Optional[datetime.datetime]): 

2332 Min value for job creation time. If set, only jobs created 

2333 after or at this timestamp are returned. If the datetime has 

2334 no time zone assumes UTC time. 

2335 max_creation_time (Optional[datetime.datetime]): 

2336 Max value for job creation time. If set, only jobs created 

2337 before or at this timestamp are returned. If the datetime has 

2338 no time zone assumes UTC time. 

2339 page_size (Optional[int]): 

2340 Maximum number of jobs to return per page. 

2341 

2342 Returns: 

2343 google.api_core.page_iterator.Iterator: 

2344 Iterable of job instances. 

2345 """ 

2346 if isinstance(parent_job, job._AsyncJob): 

2347 parent_job = parent_job.job_id # pytype: disable=attribute-error 

2348 

2349 extra_params = { 

2350 "allUsers": all_users, 

2351 "stateFilter": state_filter, 

2352 "minCreationTime": _str_or_none( 

2353 google.cloud._helpers._millis_from_datetime(min_creation_time) 

2354 ), 

2355 "maxCreationTime": _str_or_none( 

2356 google.cloud._helpers._millis_from_datetime(max_creation_time) 

2357 ), 

2358 "projection": "full", 

2359 "parentJobId": parent_job, 

2360 } 

2361 

2362 extra_params = { 

2363 param: value for param, value in extra_params.items() if value is not None 

2364 } 

2365 

2366 if project is None: 

2367 project = self.project 

2368 

2369 path = "/projects/%s/jobs" % (project,) 

2370 

2371 span_attributes = {"path": path} 

2372 

2373 def api_request(*args, **kwargs): 

2374 return self._call_api( 

2375 retry, 

2376 span_name="BigQuery.listJobs", 

2377 span_attributes=span_attributes, 

2378 *args, 

2379 timeout=timeout, 

2380 **kwargs, 

2381 ) 

2382 

2383 return page_iterator.HTTPIterator( 

2384 client=self, 

2385 api_request=api_request, 

2386 path=path, 

2387 item_to_value=_item_to_job, 

2388 items_key="jobs", 

2389 page_token=page_token, 

2390 max_results=max_results, 

2391 extra_params=extra_params, 

2392 page_size=page_size, 

2393 ) 

2394 

2395 def load_table_from_uri( 

2396 self, 

2397 source_uris: Union[str, Sequence[str]], 

2398 destination: Union[Table, TableReference, TableListItem, str], 

2399 job_id: Optional[str] = None, 

2400 job_id_prefix: Optional[str] = None, 

2401 location: Optional[str] = None, 

2402 project: Optional[str] = None, 

2403 job_config: Optional[LoadJobConfig] = None, 

2404 retry: retries.Retry = DEFAULT_RETRY, 

2405 timeout: TimeoutType = DEFAULT_TIMEOUT, 

2406 ) -> job.LoadJob: 

2407 """Starts a job for loading data into a table from Cloud Storage. 

2408 

2409 See 

2410 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationload 

2411 

2412 Args: 

2413 source_uris (Union[str, Sequence[str]]): 

2414 URIs of data files to be loaded; in format 

2415 ``gs://<bucket_name>/<object_name_or_glob>``. 

2416 destination (Union[ \ 

2417 google.cloud.bigquery.table.Table, \ 

2418 google.cloud.bigquery.table.TableReference, \ 

2419 google.cloud.bigquery.table.TableListItem, \ 

2420 str, \ 

2421 ]): 

2422 Table into which data is to be loaded. If a string is passed 

2423 in, this method attempts to create a table reference from a 

2424 string using 

2425 :func:`google.cloud.bigquery.table.TableReference.from_string`. 

2426 job_id (Optional[str]): Name of the job. 

2427 job_id_prefix (Optional[str]): 

2428 The user-provided prefix for a randomly generated job ID. 

2429 This parameter will be ignored if a ``job_id`` is also given. 

2430 location (Optional[str]): 

2431 Location where to run the job. Must match the location of the 

2432 destination table. 

2433 project (Optional[str]): 

2434 Project ID of the project of where to run the job. Defaults 

2435 to the client's project. 

2436 job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]): 

2437 Extra configuration options for the job. 

2438 retry (Optional[google.api_core.retry.Retry]): 

2439 How to retry the RPC. 

2440 timeout (Optional[float]): 

2441 The number of seconds to wait for the underlying HTTP transport 

2442 before using ``retry``. 

2443 

2444 Returns: 

2445 google.cloud.bigquery.job.LoadJob: A new load job. 

2446 

2447 Raises: 

2448 TypeError: 

2449 If ``job_config`` is not an instance of 

2450 :class:`~google.cloud.bigquery.job.LoadJobConfig` class. 

2451 """ 

2452 job_id = _make_job_id(job_id, job_id_prefix) 

2453 

2454 if project is None: 

2455 project = self.project 

2456 

2457 if location is None: 

2458 location = self.location 

2459 

2460 job_ref = job._JobReference(job_id, project=project, location=location) 

2461 

2462 if isinstance(source_uris, str): 

2463 source_uris = [source_uris] 

2464 

2465 destination = _table_arg_to_table_ref(destination, default_project=self.project) 

2466 

2467 if job_config is not None: 

2468 _verify_job_config_type(job_config, LoadJobConfig) 

2469 else: 

2470 job_config = job.LoadJobConfig() 

2471 

2472 new_job_config = job_config._fill_from_default(self._default_load_job_config) 

2473 

2474 load_job = job.LoadJob(job_ref, source_uris, destination, self, new_job_config) 

2475 load_job._begin(retry=retry, timeout=timeout) 

2476 

2477 return load_job 

2478 

2479 def load_table_from_file( 

2480 self, 

2481 file_obj: IO[bytes], 

2482 destination: Union[Table, TableReference, TableListItem, str], 

2483 rewind: bool = False, 

2484 size: Optional[int] = None, 

2485 num_retries: int = _DEFAULT_NUM_RETRIES, 

2486 job_id: Optional[str] = None, 

2487 job_id_prefix: Optional[str] = None, 

2488 location: Optional[str] = None, 

2489 project: Optional[str] = None, 

2490 job_config: Optional[LoadJobConfig] = None, 

2491 timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, 

2492 ) -> job.LoadJob: 

2493 """Upload the contents of this table from a file-like object. 

2494 

2495 Similar to :meth:`load_table_from_uri`, this method creates, starts and 

2496 returns a :class:`~google.cloud.bigquery.job.LoadJob`. 

2497 

2498 Args: 

2499 file_obj (IO[bytes]): 

2500 A file handle opened in binary mode for reading. 

2501 destination (Union[Table, \ 

2502 TableReference, \ 

2503 TableListItem, \ 

2504 str \ 

2505 ]): 

2506 Table into which data is to be loaded. If a string is passed 

2507 in, this method attempts to create a table reference from a 

2508 string using 

2509 :func:`google.cloud.bigquery.table.TableReference.from_string`. 

2510 rewind (Optional[bool]): 

2511 If True, seek to the beginning of the file handle before 

2512 reading the file. Defaults to False. 

2513 size (Optional[int]): 

2514 The number of bytes to read from the file handle. If size is 

2515 ``None`` or large, resumable upload will be used. Otherwise, 

2516 multipart upload will be used. 

2517 num_retries (Optional[int]): Number of upload retries. Defaults to 6. 

2518 job_id (Optional[str]): Name of the job. 

2519 job_id_prefix (Optional[str]): 

2520 The user-provided prefix for a randomly generated job ID. 

2521 This parameter will be ignored if a ``job_id`` is also given. 

2522 location (Optional[str]): 

2523 Location where to run the job. Must match the location of the 

2524 destination table. 

2525 project (Optional[str]): 

2526 Project ID of the project of where to run the job. Defaults 

2527 to the client's project. 

2528 job_config (Optional[LoadJobConfig]): 

2529 Extra configuration options for the job. 

2530 timeout (Optional[float]): 

2531 The number of seconds to wait for the underlying HTTP transport 

2532 before using ``retry``. Depending on the retry strategy, a request 

2533 may be repeated several times using the same timeout each time. 

2534 Defaults to None. 

2535 

2536 Can also be passed as a tuple (connect_timeout, read_timeout). 

2537 See :meth:`requests.Session.request` documentation for details. 

2538 

2539 Returns: 

2540 google.cloud.bigquery.job.LoadJob: A new load job. 

2541 

2542 Raises: 

2543 ValueError: 

2544 If ``size`` is not passed in and can not be determined, or if 

2545 the ``file_obj`` can be detected to be a file opened in text 

2546 mode. 

2547 

2548 TypeError: 

2549 If ``job_config`` is not an instance of 

2550 :class:`~google.cloud.bigquery.job.LoadJobConfig` class. 

2551 """ 

2552 job_id = _make_job_id(job_id, job_id_prefix) 

2553 

2554 if project is None: 

2555 project = self.project 

2556 

2557 if location is None: 

2558 location = self.location 

2559 

2560 destination = _table_arg_to_table_ref(destination, default_project=self.project) 

2561 job_ref = job._JobReference(job_id, project=project, location=location) 

2562 

2563 if job_config is not None: 

2564 _verify_job_config_type(job_config, LoadJobConfig) 

2565 else: 

2566 job_config = job.LoadJobConfig() 

2567 

2568 new_job_config = job_config._fill_from_default(self._default_load_job_config) 

2569 

2570 load_job = job.LoadJob(job_ref, None, destination, self, new_job_config) 

2571 job_resource = load_job.to_api_repr() 

2572 

2573 if rewind: 

2574 file_obj.seek(0, os.SEEK_SET) 

2575 

2576 _check_mode(file_obj) 

2577 

2578 try: 

2579 if size is None or size >= _MAX_MULTIPART_SIZE: 

2580 response = self._do_resumable_upload( 

2581 file_obj, job_resource, num_retries, timeout, project=project 

2582 ) 

2583 else: 

2584 response = self._do_multipart_upload( 

2585 file_obj, job_resource, size, num_retries, timeout, project=project 

2586 ) 

2587 except resumable_media.InvalidResponse as exc: 

2588 raise exceptions.from_http_response(exc.response) 

2589 

2590 return typing.cast(LoadJob, self.job_from_resource(response.json())) 

2591 

2592 def load_table_from_dataframe( 

2593 self, 

2594 dataframe: "pandas.DataFrame", # type: ignore 

2595 destination: Union[Table, TableReference, str], 

2596 num_retries: int = _DEFAULT_NUM_RETRIES, 

2597 job_id: Optional[str] = None, 

2598 job_id_prefix: Optional[str] = None, 

2599 location: Optional[str] = None, 

2600 project: Optional[str] = None, 

2601 job_config: Optional[LoadJobConfig] = None, 

2602 parquet_compression: str = "snappy", 

2603 timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, 

2604 ) -> job.LoadJob: 

2605 """Upload the contents of a table from a pandas DataFrame. 

2606 

2607 Similar to :meth:`load_table_from_uri`, this method creates, starts and 

2608 returns a :class:`~google.cloud.bigquery.job.LoadJob`. 

2609 

2610 .. note:: 

2611 

2612 REPEATED fields are NOT supported when using the CSV source format. 

2613 They are supported when using the PARQUET source format, but 

2614 due to the way they are encoded in the ``parquet`` file, 

2615 a mismatch with the existing table schema can occur, so 

2616 REPEATED fields are not properly supported when using ``pyarrow<4.0.0`` 

2617 using the parquet format. 

2618 

2619 https://github.com/googleapis/python-bigquery/issues/19 

2620 

2621 Args: 

2622 dataframe (pandas.Dataframe): 

2623 A :class:`~pandas.DataFrame` containing the data to load. 

2624 destination (Union[ \ 

2625 Table, \ 

2626 TableReference, \ 

2627 str \ 

2628 ]): 

2629 The destination table to use for loading the data. If it is an 

2630 existing table, the schema of the :class:`~pandas.DataFrame` 

2631 must match the schema of the destination table. If the table 

2632 does not yet exist, the schema is inferred from the 

2633 :class:`~pandas.DataFrame`. 

2634 

2635 If a string is passed in, this method attempts to create a 

2636 table reference from a string using 

2637 :func:`google.cloud.bigquery.table.TableReference.from_string`. 

2638 num_retries (Optional[int]): Number of upload retries. Defaults to 6. 

2639 job_id (Optional[str]): Name of the job. 

2640 job_id_prefix (Optional[str]): 

2641 The user-provided prefix for a randomly generated 

2642 job ID. This parameter will be ignored if a ``job_id`` is 

2643 also given. 

2644 location (Optional[str]): 

2645 Location where to run the job. Must match the location of the 

2646 destination table. 

2647 project (Optional[str]): 

2648 Project ID of the project of where to run the job. Defaults 

2649 to the client's project. 

2650 job_config (Optional[LoadJobConfig]): 

2651 Extra configuration options for the job. 

2652 

2653 To override the default pandas data type conversions, supply 

2654 a value for 

2655 :attr:`~google.cloud.bigquery.job.LoadJobConfig.schema` with 

2656 column names matching those of the dataframe. The BigQuery 

2657 schema is used to determine the correct data type conversion. 

2658 Indexes are not loaded. 

2659 

2660 By default, this method uses the parquet source format. To 

2661 override this, supply a value for 

2662 :attr:`~google.cloud.bigquery.job.LoadJobConfig.source_format` 

2663 with the format name. Currently only 

2664 :attr:`~google.cloud.bigquery.job.SourceFormat.CSV` and 

2665 :attr:`~google.cloud.bigquery.job.SourceFormat.PARQUET` are 

2666 supported. 

2667 parquet_compression (Optional[str]): 

2668 [Beta] The compression method to use if intermittently 

2669 serializing ``dataframe`` to a parquet file. 

2670 Defaults to "snappy". 

2671 

2672 The argument is directly passed as the ``compression`` 

2673 argument to the underlying ``pyarrow.parquet.write_table()`` 

2674 method (the default value "snappy" gets converted to uppercase). 

2675 https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table 

2676 

2677 If the job config schema is missing, the argument is directly 

2678 passed as the ``compression`` argument to the underlying 

2679 ``DataFrame.to_parquet()`` method. 

2680 https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet 

2681 timeout (Optional[flaot]): 

2682 The number of seconds to wait for the underlying HTTP transport 

2683 before using ``retry``. Depending on the retry strategy, a request may 

2684 be repeated several times using the same timeout each time. 

2685 Defaults to None. 

2686 

2687 Can also be passed as a tuple (connect_timeout, read_timeout). 

2688 See :meth:`requests.Session.request` documentation for details. 

2689 

2690 Returns: 

2691 google.cloud.bigquery.job.LoadJob: A new load job. 

2692 

2693 Raises: 

2694 ValueError: 

2695 If a usable parquet engine cannot be found. This method 

2696 requires :mod:`pyarrow` to be installed. 

2697 TypeError: 

2698 If ``job_config`` is not an instance of 

2699 :class:`~google.cloud.bigquery.job.LoadJobConfig` class. 

2700 """ 

2701 job_id = _make_job_id(job_id, job_id_prefix) 

2702 

2703 if job_config is not None: 

2704 _verify_job_config_type(job_config, LoadJobConfig) 

2705 else: 

2706 job_config = job.LoadJobConfig() 

2707 

2708 new_job_config = job_config._fill_from_default(self._default_load_job_config) 

2709 

2710 supported_formats = {job.SourceFormat.CSV, job.SourceFormat.PARQUET} 

2711 if new_job_config.source_format is None: 

2712 # default value 

2713 new_job_config.source_format = job.SourceFormat.PARQUET 

2714 

2715 if ( 

2716 new_job_config.source_format == job.SourceFormat.PARQUET 

2717 and new_job_config.parquet_options is None 

2718 ): 

2719 parquet_options = ParquetOptions() 

2720 # default value 

2721 parquet_options.enable_list_inference = True 

2722 new_job_config.parquet_options = parquet_options 

2723 

2724 if new_job_config.source_format not in supported_formats: 

2725 raise ValueError( 

2726 "Got unexpected source_format: '{}'. Currently, only PARQUET and CSV are supported".format( 

2727 new_job_config.source_format 

2728 ) 

2729 ) 

2730 

2731 if pyarrow is None and new_job_config.source_format == job.SourceFormat.PARQUET: 

2732 # pyarrow is now the only supported parquet engine. 

2733 raise ValueError("This method requires pyarrow to be installed") 

2734 

2735 if location is None: 

2736 location = self.location 

2737 

2738 # If table schema is not provided, we try to fetch the existing table 

2739 # schema, and check if dataframe schema is compatible with it - except 

2740 # for WRITE_TRUNCATE jobs, the existing schema does not matter then. 

2741 if ( 

2742 not new_job_config.schema 

2743 and new_job_config.write_disposition != job.WriteDisposition.WRITE_TRUNCATE 

2744 ): 

2745 try: 

2746 table = self.get_table(destination) 

2747 except core_exceptions.NotFound: 

2748 pass 

2749 else: 

2750 columns_and_indexes = frozenset( 

2751 name 

2752 for name, _ in _pandas_helpers.list_columns_and_indexes(dataframe) 

2753 ) 

2754 new_job_config.schema = [ 

2755 # Field description and policy tags are not needed to 

2756 # serialize a data frame. 

2757 SchemaField( 

2758 field.name, 

2759 field.field_type, 

2760 mode=field.mode, 

2761 fields=field.fields, 

2762 ) 

2763 # schema fields not present in the dataframe are not needed 

2764 for field in table.schema 

2765 if field.name in columns_and_indexes 

2766 ] 

2767 

2768 new_job_config.schema = _pandas_helpers.dataframe_to_bq_schema( 

2769 dataframe, new_job_config.schema 

2770 ) 

2771 

2772 if not new_job_config.schema: 

2773 # the schema could not be fully detected 

2774 warnings.warn( 

2775 "Schema could not be detected for all columns. Loading from a " 

2776 "dataframe without a schema will be deprecated in the future, " 

2777 "please provide a schema.", 

2778 PendingDeprecationWarning, 

2779 stacklevel=2, 

2780 ) 

2781 

2782 tmpfd, tmppath = tempfile.mkstemp( 

2783 suffix="_job_{}.{}".format(job_id[:8], new_job_config.source_format.lower()) 

2784 ) 

2785 os.close(tmpfd) 

2786 

2787 try: 

2788 if new_job_config.source_format == job.SourceFormat.PARQUET: 

2789 if new_job_config.schema: 

2790 if parquet_compression == "snappy": # adjust the default value 

2791 parquet_compression = parquet_compression.upper() 

2792 

2793 _pandas_helpers.dataframe_to_parquet( 

2794 dataframe, 

2795 new_job_config.schema, 

2796 tmppath, 

2797 parquet_compression=parquet_compression, 

2798 parquet_use_compliant_nested_type=True, 

2799 ) 

2800 else: 

2801 dataframe.to_parquet( 

2802 tmppath, 

2803 engine="pyarrow", 

2804 compression=parquet_compression, 

2805 **( 

2806 {"use_compliant_nested_type": True} 

2807 if _versions_helpers.PYARROW_VERSIONS.use_compliant_nested_type 

2808 else {} 

2809 ), 

2810 ) 

2811 

2812 else: 

2813 dataframe.to_csv( 

2814 tmppath, 

2815 index=False, 

2816 header=False, 

2817 encoding="utf-8", 

2818 float_format="%.17g", 

2819 date_format="%Y-%m-%d %H:%M:%S.%f", 

2820 ) 

2821 

2822 with open(tmppath, "rb") as tmpfile: 

2823 file_size = os.path.getsize(tmppath) 

2824 return self.load_table_from_file( 

2825 tmpfile, 

2826 destination, 

2827 num_retries=num_retries, 

2828 rewind=True, 

2829 size=file_size, 

2830 job_id=job_id, 

2831 job_id_prefix=job_id_prefix, 

2832 location=location, 

2833 project=project, 

2834 job_config=new_job_config, 

2835 timeout=timeout, 

2836 ) 

2837 

2838 finally: 

2839 os.remove(tmppath) 

2840 

2841 def load_table_from_json( 

2842 self, 

2843 json_rows: Iterable[Dict[str, Any]], 

2844 destination: Union[Table, TableReference, TableListItem, str], 

2845 num_retries: int = _DEFAULT_NUM_RETRIES, 

2846 job_id: Optional[str] = None, 

2847 job_id_prefix: Optional[str] = None, 

2848 location: Optional[str] = None, 

2849 project: Optional[str] = None, 

2850 job_config: Optional[LoadJobConfig] = None, 

2851 timeout: ResumableTimeoutType = DEFAULT_TIMEOUT, 

2852 ) -> job.LoadJob: 

2853 """Upload the contents of a table from a JSON string or dict. 

2854 

2855 Args: 

2856 json_rows (Iterable[Dict[str, Any]]): 

2857 Row data to be inserted. Keys must match the table schema fields 

2858 and values must be JSON-compatible representations. 

2859 

2860 .. note:: 

2861 

2862 If your data is already a newline-delimited JSON string, 

2863 it is best to wrap it into a file-like object and pass it 

2864 to :meth:`~google.cloud.bigquery.client.Client.load_table_from_file`:: 

2865 

2866 import io 

2867 from google.cloud import bigquery 

2868 

2869 data = u'{"foo": "bar"}' 

2870 data_as_file = io.StringIO(data) 

2871 

2872 client = bigquery.Client() 

2873 client.load_table_from_file(data_as_file, ...) 

2874 

2875 destination (Union[ \ 

2876 Table, \ 

2877 TableReference, \ 

2878 TableListItem, \ 

2879 str \ 

2880 ]): 

2881 Table into which data is to be loaded. If a string is passed 

2882 in, this method attempts to create a table reference from a 

2883 string using 

2884 :func:`google.cloud.bigquery.table.TableReference.from_string`. 

2885 num_retries (Optional[int]): Number of upload retries. Defaults to 6. 

2886 job_id (Optional[str]): Name of the job. 

2887 job_id_prefix (Optional[str]): 

2888 The user-provided prefix for a randomly generated job ID. 

2889 This parameter will be ignored if a ``job_id`` is also given. 

2890 location (Optional[str]): 

2891 Location where to run the job. Must match the location of the 

2892 destination table. 

2893 project (Optional[str]): 

2894 Project ID of the project of where to run the job. Defaults 

2895 to the client's project. 

2896 job_config (Optional[LoadJobConfig]): 

2897 Extra configuration options for the job. The ``source_format`` 

2898 setting is always set to 

2899 :attr:`~google.cloud.bigquery.job.SourceFormat.NEWLINE_DELIMITED_JSON`. 

2900 timeout (Optional[float]): 

2901 The number of seconds to wait for the underlying HTTP transport 

2902 before using ``retry``. Depending on the retry strategy, a request may 

2903 be repeated several times using the same timeout each time. 

2904 Defaults to None. 

2905 

2906 Can also be passed as a tuple (connect_timeout, read_timeout). 

2907 See :meth:`requests.Session.request` documentation for details. 

2908 

2909 Returns: 

2910 google.cloud.bigquery.job.LoadJob: A new load job. 

2911 

2912 Raises: 

2913 TypeError: 

2914 If ``job_config`` is not an instance of 

2915 :class:`~google.cloud.bigquery.job.LoadJobConfig` class. 

2916 """ 

2917 job_id = _make_job_id(job_id, job_id_prefix) 

2918 

2919 if job_config is not None: 

2920 _verify_job_config_type(job_config, LoadJobConfig) 

2921 else: 

2922 job_config = job.LoadJobConfig() 

2923 

2924 new_job_config = job_config._fill_from_default(self._default_load_job_config) 

2925 

2926 new_job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON 

2927 

2928 # In specific conditions, we check if the table alread exists, and/or 

2929 # set the autodetect value for the user. For exact conditions, see table 

2930 # https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297 

2931 if new_job_config.schema is None and new_job_config.autodetect is None: 

2932 if new_job_config.write_disposition in ( 

2933 job.WriteDisposition.WRITE_TRUNCATE, 

2934 job.WriteDisposition.WRITE_EMPTY, 

2935 ): 

2936 new_job_config.autodetect = True 

2937 else: 

2938 try: 

2939 self.get_table(destination) 

2940 except core_exceptions.NotFound: 

2941 new_job_config.autodetect = True 

2942 else: 

2943 new_job_config.autodetect = False 

2944 

2945 if project is None: 

2946 project = self.project 

2947 

2948 if location is None: 

2949 location = self.location 

2950 

2951 destination = _table_arg_to_table_ref(destination, default_project=self.project) 

2952 

2953 data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows) 

2954 encoded_str = data_str.encode() 

2955 data_file = io.BytesIO(encoded_str) 

2956 return self.load_table_from_file( 

2957 data_file, 

2958 destination, 

2959 size=len(encoded_str), 

2960 num_retries=num_retries, 

2961 job_id=job_id, 

2962 job_id_prefix=job_id_prefix, 

2963 location=location, 

2964 project=project, 

2965 job_config=new_job_config, 

2966 timeout=timeout, 

2967 ) 

2968 

2969 def _do_resumable_upload( 

2970 self, 

2971 stream: IO[bytes], 

2972 metadata: Mapping[str, str], 

2973 num_retries: int, 

2974 timeout: Optional[ResumableTimeoutType], 

2975 project: Optional[str] = None, 

2976 ) -> "requests.Response": 

2977 """Perform a resumable upload. 

2978 

2979 Args: 

2980 stream (IO[bytes]): A bytes IO object open for reading. 

2981 metadata (Mapping[str, str]): The metadata associated with the upload. 

2982 num_retries (int): 

2983 Number of upload retries. (Deprecated: This 

2984 argument will be removed in a future release.) 

2985 timeout (Optional[float]): 

2986 The number of seconds to wait for the underlying HTTP transport 

2987 before using ``retry``. Depending on the retry strategy, a request may 

2988 be repeated several times using the same timeout each time. 

2989 

2990 Can also be passed as a tuple (connect_timeout, read_timeout). 

2991 See :meth:`requests.Session.request` documentation for details. 

2992 project (Optional[str]): 

2993 Project ID of the project of where to run the upload. Defaults 

2994 to the client's project. 

2995 

2996 Returns: 

2997 The "200 OK" response object returned after the final chunk 

2998 is uploaded. 

2999 """ 

3000 upload, transport = self._initiate_resumable_upload( 

3001 stream, metadata, num_retries, timeout, project=project 

3002 ) 

3003 

3004 while not upload.finished: 

3005 response = upload.transmit_next_chunk(transport, timeout=timeout) 

3006 

3007 return response 

3008 

3009 def _initiate_resumable_upload( 

3010 self, 

3011 stream: IO[bytes], 

3012 metadata: Mapping[str, str], 

3013 num_retries: int, 

3014 timeout: Optional[ResumableTimeoutType], 

3015 project: Optional[str] = None, 

3016 ): 

3017 """Initiate a resumable upload. 

3018 

3019 Args: 

3020 stream (IO[bytes]): A bytes IO object open for reading. 

3021 metadata (Mapping[str, str]): The metadata associated with the upload. 

3022 num_retries (int): 

3023 Number of upload retries. (Deprecated: This 

3024 argument will be removed in a future release.) 

3025 timeout (Optional[float]): 

3026 The number of seconds to wait for the underlying HTTP transport 

3027 before using ``retry``. Depending on the retry strategy, a request may 

3028 be repeated several times using the same timeout each time. 

3029 

3030 Can also be passed as a tuple (connect_timeout, read_timeout). 

3031 See :meth:`requests.Session.request` documentation for details. 

3032 project (Optional[str]): 

3033 Project ID of the project of where to run the upload. Defaults 

3034 to the client's project. 

3035 

3036 Returns: 

3037 Tuple: 

3038 Pair of 

3039 

3040 * The :class:`~google.resumable_media.requests.ResumableUpload` 

3041 that was created 

3042 * The ``transport`` used to initiate the upload. 

3043 """ 

3044 chunk_size = _DEFAULT_CHUNKSIZE 

3045 transport = self._http 

3046 headers = _get_upload_headers(self._connection.user_agent) 

3047 

3048 if project is None: 

3049 project = self.project 

3050 # TODO: Increase the minimum version of google-cloud-core to 1.6.0 

3051 # and remove this logic. See: 

3052 # https://github.com/googleapis/python-bigquery/issues/509 

3053 hostname = ( 

3054 self._connection.API_BASE_URL 

3055 if not hasattr(self._connection, "get_api_base_url_for_mtls") 

3056 else self._connection.get_api_base_url_for_mtls() 

3057 ) 

3058 upload_url = _RESUMABLE_URL_TEMPLATE.format(host=hostname, project=project) 

3059 

3060 # TODO: modify ResumableUpload to take a retry.Retry object 

3061 # that it can use for the initial RPC. 

3062 upload = ResumableUpload(upload_url, chunk_size, headers=headers) 

3063 

3064 if num_retries is not None: 

3065 upload._retry_strategy = resumable_media.RetryStrategy( 

3066 max_retries=num_retries 

3067 ) 

3068 

3069 upload.initiate( 

3070 transport, 

3071 stream, 

3072 metadata, 

3073 _GENERIC_CONTENT_TYPE, 

3074 stream_final=False, 

3075 timeout=timeout, 

3076 ) 

3077 

3078 return upload, transport 

3079 

3080 def _do_multipart_upload( 

3081 self, 

3082 stream: IO[bytes], 

3083 metadata: Mapping[str, str], 

3084 size: int, 

3085 num_retries: int, 

3086 timeout: Optional[ResumableTimeoutType], 

3087 project: Optional[str] = None, 

3088 ): 

3089 """Perform a multipart upload. 

3090 

3091 Args: 

3092 stream (IO[bytes]): A bytes IO object open for reading. 

3093 metadata (Mapping[str, str]): The metadata associated with the upload. 

3094 size (int): 

3095 The number of bytes to be uploaded (which will be read 

3096 from ``stream``). If not provided, the upload will be 

3097 concluded once ``stream`` is exhausted (or :data:`None`). 

3098 num_retries (int): 

3099 Number of upload retries. (Deprecated: This 

3100 argument will be removed in a future release.) 

3101 timeout (Optional[float]): 

3102 The number of seconds to wait for the underlying HTTP transport 

3103 before using ``retry``. Depending on the retry strategy, a request may 

3104 be repeated several times using the same timeout each time. 

3105 

3106 Can also be passed as a tuple (connect_timeout, read_timeout). 

3107 See :meth:`requests.Session.request` documentation for details. 

3108 project (Optional[str]): 

3109 Project ID of the project of where to run the upload. Defaults 

3110 to the client's project. 

3111 

3112 Returns: 

3113 requests.Response: 

3114 The "200 OK" response object returned after the multipart 

3115 upload request. 

3116 

3117 Raises: 

3118 ValueError: 

3119 if the ``stream`` has fewer than ``size`` 

3120 bytes remaining. 

3121 """ 

3122 data = stream.read(size) 

3123 if len(data) < size: 

3124 msg = _READ_LESS_THAN_SIZE.format(size, len(data)) 

3125 raise ValueError(msg) 

3126 

3127 headers = _get_upload_headers(self._connection.user_agent) 

3128 

3129 if project is None: 

3130 project = self.project 

3131 

3132 # TODO: Increase the minimum version of google-cloud-core to 1.6.0 

3133 # and remove this logic. See: 

3134 # https://github.com/googleapis/python-bigquery/issues/509 

3135 hostname = ( 

3136 self._connection.API_BASE_URL 

3137 if not hasattr(self._connection, "get_api_base_url_for_mtls") 

3138 else self._connection.get_api_base_url_for_mtls() 

3139 ) 

3140 upload_url = _MULTIPART_URL_TEMPLATE.format(host=hostname, project=project) 

3141 upload = MultipartUpload(upload_url, headers=headers) 

3142 

3143 if num_retries is not None: 

3144 upload._retry_strategy = resumable_media.RetryStrategy( 

3145 max_retries=num_retries 

3146 ) 

3147 

3148 response = upload.transmit( 

3149 self._http, data, metadata, _GENERIC_CONTENT_TYPE, timeout=timeout 

3150 ) 

3151 

3152 return response 

3153 

3154 def copy_table( 

3155 self, 

3156 sources: Union[ 

3157 Table, 

3158 TableReference, 

3159 TableListItem, 

3160 str, 

3161 Sequence[Union[Table, TableReference, TableListItem, str]], 

3162 ], 

3163 destination: Union[Table, TableReference, TableListItem, str], 

3164 job_id: Optional[str] = None, 

3165 job_id_prefix: Optional[str] = None, 

3166 location: Optional[str] = None, 

3167 project: Optional[str] = None, 

3168 job_config: Optional[CopyJobConfig] = None, 

3169 retry: retries.Retry = DEFAULT_RETRY, 

3170 timeout: TimeoutType = DEFAULT_TIMEOUT, 

3171 ) -> job.CopyJob: 

3172 """Copy one or more tables to another table. 

3173 

3174 See 

3175 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationtablecopy 

3176 

3177 Args: 

3178 sources (Union[ \ 

3179 google.cloud.bigquery.table.Table, \ 

3180 google.cloud.bigquery.table.TableReference, \ 

3181 google.cloud.bigquery.table.TableListItem, \ 

3182 str, \ 

3183 Sequence[ \ 

3184 Union[ \ 

3185 google.cloud.bigquery.table.Table, \ 

3186 google.cloud.bigquery.table.TableReference, \ 

3187 google.cloud.bigquery.table.TableListItem, \ 

3188 str, \ 

3189 ] \ 

3190 ], \ 

3191 ]): 

3192 Table or tables to be copied. 

3193 destination (Union[ \ 

3194 google.cloud.bigquery.table.Table, \ 

3195 google.cloud.bigquery.table.TableReference, \ 

3196 google.cloud.bigquery.table.TableListItem, \ 

3197 str, \ 

3198 ]): 

3199 Table into which data is to be copied. 

3200 job_id (Optional[str]): The ID of the job. 

3201 job_id_prefix (Optional[str]): 

3202 The user-provided prefix for a randomly generated job ID. 

3203 This parameter will be ignored if a ``job_id`` is also given. 

3204 location (Optional[str]): 

3205 Location where to run the job. Must match the location of any 

3206 source table as well as the destination table. 

3207 project (Optional[str]): 

3208 Project ID of the project of where to run the job. Defaults 

3209 to the client's project. 

3210 job_config (Optional[google.cloud.bigquery.job.CopyJobConfig]): 

3211 Extra configuration options for the job. 

3212 retry (Optional[google.api_core.retry.Retry]): 

3213 How to retry the RPC. 

3214 timeout (Optional[float]): 

3215 The number of seconds to wait for the underlying HTTP transport 

3216 before using ``retry``. 

3217 

3218 Returns: 

3219 google.cloud.bigquery.job.CopyJob: A new copy job instance. 

3220 

3221 Raises: 

3222 TypeError: 

3223 If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.CopyJobConfig` 

3224 class. 

3225 """ 

3226 job_id = _make_job_id(job_id, job_id_prefix) 

3227 

3228 if project is None: 

3229 project = self.project 

3230 

3231 if location is None: 

3232 location = self.location 

3233 

3234 job_ref = job._JobReference(job_id, project=project, location=location) 

3235 

3236 # sources can be one of many different input types. (string, Table, 

3237 # TableReference, or a sequence of any of those.) Convert them all to a 

3238 # list of TableReferences. 

3239 # 

3240 # _table_arg_to_table_ref leaves lists unmodified. 

3241 sources = _table_arg_to_table_ref(sources, default_project=self.project) 

3242 

3243 if not isinstance(sources, collections_abc.Sequence): 

3244 sources = [sources] 

3245 

3246 sources = [ 

3247 _table_arg_to_table_ref(source, default_project=self.project) 

3248 for source in sources 

3249 ] 

3250 

3251 destination = _table_arg_to_table_ref(destination, default_project=self.project) 

3252 

3253 if job_config: 

3254 _verify_job_config_type(job_config, google.cloud.bigquery.job.CopyJobConfig) 

3255 job_config = copy.deepcopy(job_config) 

3256 

3257 copy_job = job.CopyJob( 

3258 job_ref, sources, destination, client=self, job_config=job_config 

3259 ) 

3260 copy_job._begin(retry=retry, timeout=timeout) 

3261 

3262 return copy_job 

3263 

3264 def extract_table( 

3265 self, 

3266 source: Union[Table, TableReference, TableListItem, Model, ModelReference, str], 

3267 destination_uris: Union[str, Sequence[str]], 

3268 job_id: Optional[str] = None, 

3269 job_id_prefix: Optional[str] = None, 

3270 location: Optional[str] = None, 

3271 project: Optional[str] = None, 

3272 job_config: Optional[ExtractJobConfig] = None, 

3273 retry: retries.Retry = DEFAULT_RETRY, 

3274 timeout: TimeoutType = DEFAULT_TIMEOUT, 

3275 source_type: str = "Table", 

3276 ) -> job.ExtractJob: 

3277 """Start a job to extract a table into Cloud Storage files. 

3278 

3279 See 

3280 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationextract 

3281 

3282 Args: 

3283 source (Union[ \ 

3284 google.cloud.bigquery.table.Table, \ 

3285 google.cloud.bigquery.table.TableReference, \ 

3286 google.cloud.bigquery.table.TableListItem, \ 

3287 google.cloud.bigquery.model.Model, \ 

3288 google.cloud.bigquery.model.ModelReference, \ 

3289 src, \ 

3290 ]): 

3291 Table or Model to be extracted. 

3292 destination_uris (Union[str, Sequence[str]]): 

3293 URIs of Cloud Storage file(s) into which table data is to be 

3294 extracted; in format 

3295 ``gs://<bucket_name>/<object_name_or_glob>``. 

3296 job_id (Optional[str]): The ID of the job. 

3297 job_id_prefix (Optional[str]): 

3298 The user-provided prefix for a randomly generated job ID. 

3299 This parameter will be ignored if a ``job_id`` is also given. 

3300 location (Optional[str]): 

3301 Location where to run the job. Must match the location of the 

3302 source table. 

3303 project (Optional[str]): 

3304 Project ID of the project of where to run the job. Defaults 

3305 to the client's project. 

3306 job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]): 

3307 Extra configuration options for the job. 

3308 retry (Optional[google.api_core.retry.Retry]): 

3309 How to retry the RPC. 

3310 timeout (Optional[float]): 

3311 The number of seconds to wait for the underlying HTTP transport 

3312 before using ``retry``. 

3313 source_type (Optional[str]): 

3314 Type of source to be extracted.``Table`` or ``Model``. Defaults to ``Table``. 

3315 Returns: 

3316 google.cloud.bigquery.job.ExtractJob: A new extract job instance. 

3317 

3318 Raises: 

3319 TypeError: 

3320 If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.ExtractJobConfig` 

3321 class. 

3322 ValueError: 

3323 If ``source_type`` is not among ``Table``,``Model``. 

3324 """ 

3325 job_id = _make_job_id(job_id, job_id_prefix) 

3326 

3327 if project is None: 

3328 project = self.project 

3329 

3330 if location is None: 

3331 location = self.location 

3332 

3333 job_ref = job._JobReference(job_id, project=project, location=location) 

3334 src = source_type.lower() 

3335 if src == "table": 

3336 source = _table_arg_to_table_ref(source, default_project=self.project) 

3337 elif src == "model": 

3338 source = _model_arg_to_model_ref(source, default_project=self.project) 

3339 else: 

3340 raise ValueError( 

3341 "Cannot pass `{}` as a ``source_type``, pass Table or Model".format( 

3342 source_type 

3343 ) 

3344 ) 

3345 

3346 if isinstance(destination_uris, str): 

3347 destination_uris = [destination_uris] 

3348 

3349 if job_config: 

3350 _verify_job_config_type( 

3351 job_config, google.cloud.bigquery.job.ExtractJobConfig 

3352 ) 

3353 job_config = copy.deepcopy(job_config) 

3354 

3355 extract_job = job.ExtractJob( 

3356 job_ref, source, destination_uris, client=self, job_config=job_config 

3357 ) 

3358 extract_job._begin(retry=retry, timeout=timeout) 

3359 

3360 return extract_job 

3361 

3362 def query( 

3363 self, 

3364 query: str, 

3365 job_config: Optional[QueryJobConfig] = None, 

3366 job_id: Optional[str] = None, 

3367 job_id_prefix: Optional[str] = None, 

3368 location: Optional[str] = None, 

3369 project: Optional[str] = None, 

3370 retry: retries.Retry = DEFAULT_RETRY, 

3371 timeout: TimeoutType = DEFAULT_TIMEOUT, 

3372 job_retry: retries.Retry = DEFAULT_JOB_RETRY, 

3373 api_method: Union[str, enums.QueryApiMethod] = enums.QueryApiMethod.INSERT, 

3374 ) -> job.QueryJob: 

3375 """Run a SQL query. 

3376 

3377 See 

3378 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationquery 

3379 

3380 Args: 

3381 query (str): 

3382 SQL query to be executed. Defaults to the standard SQL 

3383 dialect. Use the ``job_config`` parameter to change dialects. 

3384 job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): 

3385 Extra configuration options for the job. 

3386 To override any options that were previously set in 

3387 the ``default_query_job_config`` given to the 

3388 ``Client`` constructor, manually set those options to ``None``, 

3389 or whatever value is preferred. 

3390 job_id (Optional[str]): ID to use for the query job. 

3391 job_id_prefix (Optional[str]): 

3392 The prefix to use for a randomly generated job ID. This parameter 

3393 will be ignored if a ``job_id`` is also given. 

3394 location (Optional[str]): 

3395 Location where to run the job. Must match the location of the 

3396 table used in the query as well as the destination table. 

3397 project (Optional[str]): 

3398 Project ID of the project of where to run the job. Defaults 

3399 to the client's project. 

3400 retry (Optional[google.api_core.retry.Retry]): 

3401 How to retry the RPC. This only applies to making RPC 

3402 calls. It isn't used to retry failed jobs. This has 

3403 a reasonable default that should only be overridden 

3404 with care. 

3405 timeout (Optional[float]): 

3406 The number of seconds to wait for the underlying HTTP transport 

3407 before using ``retry``. 

3408 job_retry (Optional[google.api_core.retry.Retry]): 

3409 How to retry failed jobs. The default retries 

3410 rate-limit-exceeded errors. Passing ``None`` disables 

3411 job retry. 

3412 

3413 Not all jobs can be retried. If ``job_id`` is 

3414 provided, then the job returned by the query will not 

3415 be retryable, and an exception will be raised if a 

3416 non-``None`` (and non-default) value for ``job_retry`` 

3417 is also provided. 

3418 

3419 Note that errors aren't detected until ``result()`` is 

3420 called on the job returned. The ``job_retry`` 

3421 specified here becomes the default ``job_retry`` for 

3422 ``result()``, where it can also be specified. 

3423 api_method (Union[str, enums.QueryApiMethod]): 

3424 Method with which to start the query job. 

3425 

3426 See :class:`google.cloud.bigquery.enums.QueryApiMethod` for 

3427 details on the difference between the query start methods. 

3428 

3429 Returns: 

3430 google.cloud.bigquery.job.QueryJob: A new query job instance. 

3431 

3432 Raises: 

3433 TypeError: 

3434 If ``job_config`` is not an instance of 

3435 :class:`~google.cloud.bigquery.job.QueryJobConfig` 

3436 class, or if both ``job_id`` and non-``None`` non-default 

3437 ``job_retry`` are provided. 

3438 """ 

3439 job_id_given = job_id is not None 

3440 if ( 

3441 job_id_given 

3442 and job_retry is not None 

3443 and job_retry is not DEFAULT_JOB_RETRY 

3444 ): 

3445 raise TypeError( 

3446 "`job_retry` was provided, but the returned job is" 

3447 " not retryable, because a custom `job_id` was" 

3448 " provided." 

3449 ) 

3450 

3451 if job_id_given and api_method == enums.QueryApiMethod.QUERY: 

3452 raise TypeError( 

3453 "`job_id` was provided, but the 'QUERY' `api_method` was requested." 

3454 ) 

3455 

3456 if project is None: 

3457 project = self.project 

3458 

3459 if location is None: 

3460 location = self.location 

3461 

3462 if job_config is not None: 

3463 _verify_job_config_type(job_config, QueryJobConfig) 

3464 

3465 job_config = _job_helpers.job_config_with_defaults( 

3466 job_config, self._default_query_job_config 

3467 ) 

3468 

3469 # Note that we haven't modified the original job_config (or 

3470 # _default_query_job_config) up to this point. 

3471 if api_method == enums.QueryApiMethod.QUERY: 

3472 return _job_helpers.query_jobs_query( 

3473 self, 

3474 query, 

3475 job_config, 

3476 location, 

3477 project, 

3478 retry, 

3479 timeout, 

3480 job_retry, 

3481 ) 

3482 elif api_method == enums.QueryApiMethod.INSERT: 

3483 return _job_helpers.query_jobs_insert( 

3484 self, 

3485 query, 

3486 job_config, 

3487 job_id, 

3488 job_id_prefix, 

3489 location, 

3490 project, 

3491 retry, 

3492 timeout, 

3493 job_retry, 

3494 ) 

3495 else: 

3496 raise ValueError(f"Got unexpected value for api_method: {repr(api_method)}") 

3497 

3498 def query_and_wait( 

3499 self, 

3500 query, 

3501 *, 

3502 job_config: Optional[QueryJobConfig] = None, 

3503 location: Optional[str] = None, 

3504 project: Optional[str] = None, 

3505 api_timeout: TimeoutType = DEFAULT_TIMEOUT, 

3506 wait_timeout: TimeoutType = None, 

3507 retry: retries.Retry = DEFAULT_RETRY, 

3508 job_retry: retries.Retry = DEFAULT_JOB_RETRY, 

3509 page_size: Optional[int] = None, 

3510 max_results: Optional[int] = None, 

3511 ) -> RowIterator: 

3512 """Run the query, wait for it to finish, and return the results. 

3513 

3514 While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the 

3515 ``jobs.query`` REST API, use the default ``jobCreationMode`` unless 

3516 the environment variable ``QUERY_PREVIEW_ENABLED=true``. After 

3517 ``jobCreationMode`` is GA, this method will always use 

3518 ``jobCreationMode=JOB_CREATION_OPTIONAL``. See: 

3519 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query 

3520 

3521 Args: 

3522 query (str): 

3523 SQL query to be executed. Defaults to the standard SQL 

3524 dialect. Use the ``job_config`` parameter to change dialects. 

3525 job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]): 

3526 Extra configuration options for the job. 

3527 To override any options that were previously set in 

3528 the ``default_query_job_config`` given to the 

3529 ``Client`` constructor, manually set those options to ``None``, 

3530 or whatever value is preferred. 

3531 location (Optional[str]): 

3532 Location where to run the job. Must match the location of the 

3533 table used in the query as well as the destination table. 

3534 project (Optional[str]): 

3535 Project ID of the project of where to run the job. Defaults 

3536 to the client's project. 

3537 api_timeout (Optional[float]): 

3538 The number of seconds to wait for the underlying HTTP transport 

3539 before using ``retry``. 

3540 wait_timeout (Optional[float]): 

3541 The number of seconds to wait for the query to finish. If the 

3542 query doesn't finish before this timeout, the client attempts 

3543 to cancel the query. 

3544 retry (Optional[google.api_core.retry.Retry]): 

3545 How to retry the RPC. This only applies to making RPC 

3546 calls. It isn't used to retry failed jobs. This has 

3547 a reasonable default that should only be overridden 

3548 with care. 

3549 job_retry (Optional[google.api_core.retry.Retry]): 

3550 How to retry failed jobs. The default retries 

3551 rate-limit-exceeded errors. Passing ``None`` disables 

3552 job retry. Not all jobs can be retried. 

3553 page_size (Optional[int]): 

3554 The maximum number of rows in each page of results from this 

3555 request. Non-positive values are ignored. 

3556 max_results (Optional[int]): 

3557 The maximum total number of rows from this request. 

3558 

3559 Returns: 

3560 google.cloud.bigquery.table.RowIterator: 

3561 Iterator of row data 

3562 :class:`~google.cloud.bigquery.table.Row`-s. During each 

3563 page, the iterator will have the ``total_rows`` attribute 

3564 set, which counts the total number of rows **in the result 

3565 set** (this is distinct from the total number of rows in the 

3566 current page: ``iterator.page.num_items``). 

3567 

3568 If the query is a special query that produces no results, e.g. 

3569 a DDL query, an ``_EmptyRowIterator`` instance is returned. 

3570 

3571 Raises: 

3572 TypeError: 

3573 If ``job_config`` is not an instance of 

3574 :class:`~google.cloud.bigquery.job.QueryJobConfig` 

3575 class. 

3576 """ 

3577 if project is None: 

3578 project = self.project 

3579 

3580 if location is None: 

3581 location = self.location 

3582 

3583 if job_config is not None: 

3584 _verify_job_config_type(job_config, QueryJobConfig) 

3585 

3586 job_config = _job_helpers.job_config_with_defaults( 

3587 job_config, self._default_query_job_config 

3588 ) 

3589 

3590 return _job_helpers.query_and_wait( 

3591 self, 

3592 query, 

3593 job_config=job_config, 

3594 location=location, 

3595 project=project, 

3596 api_timeout=api_timeout, 

3597 wait_timeout=wait_timeout, 

3598 retry=retry, 

3599 job_retry=job_retry, 

3600 page_size=page_size, 

3601 max_results=max_results, 

3602 ) 

3603 

3604 def insert_rows( 

3605 self, 

3606 table: Union[Table, TableReference, str], 

3607 rows: Union[Iterable[Tuple], Iterable[Mapping[str, Any]]], 

3608 selected_fields: Optional[Sequence[SchemaField]] = None, 

3609 **kwargs, 

3610 ) -> Sequence[Dict[str, Any]]: 

3611 """Insert rows into a table via the streaming API. 

3612 

3613 See 

3614 https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll 

3615 

3616 BigQuery will reject insertAll payloads that exceed a defined limit (10MB). 

3617 Additionally, if a payload vastly exceeds this limit, the request is rejected 

3618 by the intermediate architecture, which returns a 413 (Payload Too Large) status code. 

3619 

3620 

3621 See 

3622 https://cloud.google.com/bigquery/quotas#streaming_inserts 

3623 

3624 Args: 

3625 table (Union[ \ 

3626 google.cloud.bigquery.table.Table, \ 

3627 google.cloud.bigquery.table.TableReference, \ 

3628 str, \ 

3629 ]): 

3630 The destination table for the row data, or a reference to it. 

3631 rows (Union[Sequence[Tuple], Sequence[Dict]]): 

3632 Row data to be inserted. If a list of tuples is given, each 

3633 tuple should contain data for each schema field on the 

3634 current table and in the same order as the schema fields. If 

3635 a list of dictionaries is given, the keys must include all 

3636 required fields in the schema. Keys which do not correspond 

3637 to a field in the schema are ignored. 

3638 selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]): 

3639 The fields to return. Required if ``table`` is a 

3640 :class:`~google.cloud.bigquery.table.TableReference`. 

3641 kwargs (dict): 

3642 Keyword arguments to 

3643 :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`. 

3644 

3645 Returns: 

3646 Sequence[Mappings]: 

3647 One mapping per row with insert errors: the "index" key 

3648 identifies the row, and the "errors" key contains a list of 

3649 the mappings describing one or more problems with the row. 

3650 

3651 Raises: 

3652 ValueError: if table's schema is not set or `rows` is not a `Sequence`. 

3653 """ 

3654 if not isinstance(rows, (collections_abc.Sequence, collections_abc.Iterator)): 

3655 raise TypeError("rows argument should be a sequence of dicts or tuples") 

3656 

3657 table = _table_arg_to_table(table, default_project=self.project) 

3658 

3659 if not isinstance(table, Table): 

3660 raise TypeError(_NEED_TABLE_ARGUMENT) 

3661 

3662 schema = table.schema 

3663 

3664 # selected_fields can override the table schema. 

3665 if selected_fields is not None: 

3666 schema = selected_fields 

3667 

3668 if len(schema) == 0: 

3669 raise ValueError( 

3670 ( 

3671 "Could not determine schema for table '{}'. Call client.get_table() " 

3672 "or pass in a list of schema fields to the selected_fields argument." 

3673 ).format(table) 

3674 ) 

3675 

3676 json_rows = [_record_field_to_json(schema, row) for row in rows] 

3677 

3678 return self.insert_rows_json(table, json_rows, **kwargs) 

3679 

3680 def insert_rows_from_dataframe( 

3681 self, 

3682 table: Union[Table, TableReference, str], 

3683 dataframe, 

3684 selected_fields: Optional[Sequence[SchemaField]] = None, 

3685 chunk_size: int = 500, 

3686 **kwargs: Dict, 

3687 ) -> Sequence[Sequence[dict]]: 

3688 """Insert rows into a table from a dataframe via the streaming API. 

3689 

3690 BigQuery will reject insertAll payloads that exceed a defined limit (10MB). 

3691 Additionally, if a payload vastly exceeds this limit, the request is rejected 

3692 by the intermediate architecture, which returns a 413 (Payload Too Large) status code. 

3693 

3694 See 

3695 https://cloud.google.com/bigquery/quotas#streaming_inserts 

3696 

3697 Args: 

3698 table (Union[ \ 

3699 google.cloud.bigquery.table.Table, \ 

3700 google.cloud.bigquery.table.TableReference, \ 

3701 str, \ 

3702 ]): 

3703 The destination table for the row data, or a reference to it. 

3704 dataframe (pandas.DataFrame): 

3705 A :class:`~pandas.DataFrame` containing the data to load. Any 

3706 ``NaN`` values present in the dataframe are omitted from the 

3707 streaming API request(s). 

3708 selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]): 

3709 The fields to return. Required if ``table`` is a 

3710 :class:`~google.cloud.bigquery.table.TableReference`. 

3711 chunk_size (int): 

3712 The number of rows to stream in a single chunk. Must be positive. 

3713 kwargs (Dict): 

3714 Keyword arguments to 

3715 :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`. 

3716 

3717 Returns: 

3718 Sequence[Sequence[Mappings]]: 

3719 A list with insert errors for each insert chunk. Each element 

3720 is a list containing one mapping per row with insert errors: 

3721 the "index" key identifies the row, and the "errors" key 

3722 contains a list of the mappings describing one or more problems 

3723 with the row. 

3724 

3725 Raises: 

3726 ValueError: if table's schema is not set 

3727 """ 

3728 insert_results = [] 

3729 

3730 chunk_count = int(math.ceil(len(dataframe) / chunk_size)) 

3731 rows_iter = _pandas_helpers.dataframe_to_json_generator(dataframe) 

3732 

3733 for _ in range(chunk_count): 

3734 rows_chunk = itertools.islice(rows_iter, chunk_size) 

3735 result = self.insert_rows(table, rows_chunk, selected_fields, **kwargs) 

3736 insert_results.append(result) 

3737 

3738 return insert_results 

3739 

3740 def insert_rows_json( 

3741 self, 

3742 table: Union[Table, TableReference, TableListItem, str], 

3743 json_rows: Sequence[Mapping[str, Any]], 

3744 row_ids: Union[ 

3745 Iterable[Optional[str]], AutoRowIDs, None 

3746 ] = AutoRowIDs.GENERATE_UUID, 

3747 skip_invalid_rows: Optional[bool] = None, 

3748 ignore_unknown_values: Optional[bool] = None, 

3749 template_suffix: Optional[str] = None, 

3750 retry: retries.Retry = DEFAULT_RETRY, 

3751 timeout: TimeoutType = DEFAULT_TIMEOUT, 

3752 ) -> Sequence[dict]: 

3753 """Insert rows into a table without applying local type conversions. 

3754 

3755 See 

3756 https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll 

3757 

3758 BigQuery will reject insertAll payloads that exceed a defined limit (10MB). 

3759 Additionally, if a payload vastly exceeds this limit, the request is rejected 

3760 by the intermediate architecture, which returns a 413 (Payload Too Large) status code. 

3761 

3762 See 

3763 https://cloud.google.com/bigquery/quotas#streaming_inserts 

3764 

3765 Args: 

3766 table (Union[ \ 

3767 google.cloud.bigquery.table.Table \ 

3768 google.cloud.bigquery.table.TableReference, \ 

3769 google.cloud.bigquery.table.TableListItem, \ 

3770 str \ 

3771 ]): 

3772 The destination table for the row data, or a reference to it. 

3773 json_rows (Sequence[Dict]): 

3774 Row data to be inserted. Keys must match the table schema fields 

3775 and values must be JSON-compatible representations. 

3776 row_ids (Union[Iterable[str], AutoRowIDs, None]): 

3777 Unique IDs, one per row being inserted. An ID can also be 

3778 ``None``, indicating that an explicit insert ID should **not** 

3779 be used for that row. If the argument is omitted altogether, 

3780 unique IDs are created automatically. 

3781 

3782 .. versionchanged:: 2.21.0 

3783 Can also be an iterable, not just a sequence, or an 

3784 :class:`AutoRowIDs` enum member. 

3785 

3786 .. deprecated:: 2.21.0 

3787 Passing ``None`` to explicitly request autogenerating insert IDs is 

3788 deprecated, use :attr:`AutoRowIDs.GENERATE_UUID` instead. 

3789 

3790 skip_invalid_rows (Optional[bool]): 

3791 Insert all valid rows of a request, even if invalid rows exist. 

3792 The default value is ``False``, which causes the entire request 

3793 to fail if any invalid rows exist. 

3794 ignore_unknown_values (Optional[bool]): 

3795 Accept rows that contain values that do not match the schema. 

3796 The unknown values are ignored. Default is ``False``, which 

3797 treats unknown values as errors. 

3798 template_suffix (Optional[str]): 

3799 Treat ``name`` as a template table and provide a suffix. 

3800 BigQuery will create the table ``<name> + <template_suffix>`` 

3801 based on the schema of the template table. See 

3802 https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables 

3803 retry (Optional[google.api_core.retry.Retry]): 

3804 How to retry the RPC. 

3805 timeout (Optional[float]): 

3806 The number of seconds to wait for the underlying HTTP transport 

3807 before using ``retry``. 

3808 

3809 Returns: 

3810 Sequence[Mappings]: 

3811 One mapping per row with insert errors: the "index" key 

3812 identifies the row, and the "errors" key contains a list of 

3813 the mappings describing one or more problems with the row. 

3814 

3815 Raises: 

3816 TypeError: if `json_rows` is not a `Sequence`. 

3817 """ 

3818 if not isinstance( 

3819 json_rows, (collections_abc.Sequence, collections_abc.Iterator) 

3820 ): 

3821 raise TypeError("json_rows argument should be a sequence of dicts") 

3822 # Convert table to just a reference because unlike insert_rows, 

3823 # insert_rows_json doesn't need the table schema. It's not doing any 

3824 # type conversions. 

3825 table = _table_arg_to_table_ref(table, default_project=self.project) 

3826 rows_info: List[Any] = [] 

3827 data: Dict[str, Any] = {"rows": rows_info} 

3828 

3829 if row_ids is None: 

3830 warnings.warn( 

3831 "Passing None for row_ids is deprecated. To explicitly request " 

3832 "autogenerated insert IDs, use AutoRowIDs.GENERATE_UUID instead", 

3833 category=DeprecationWarning, 

3834 ) 

3835 row_ids = AutoRowIDs.GENERATE_UUID 

3836 

3837 if not isinstance(row_ids, AutoRowIDs): 

3838 try: 

3839 row_ids_iter = iter(row_ids) 

3840 except TypeError: 

3841 msg = "row_ids is neither an iterable nor an AutoRowIDs enum member" 

3842 raise TypeError(msg) 

3843 

3844 for i, row in enumerate(json_rows): 

3845 info: Dict[str, Any] = {"json": row} 

3846 

3847 if row_ids is AutoRowIDs.GENERATE_UUID: 

3848 info["insertId"] = str(uuid.uuid4()) 

3849 elif row_ids is AutoRowIDs.DISABLED: 

3850 info["insertId"] = None 

3851 else: 

3852 try: 

3853 insert_id = next(row_ids_iter) 

3854 except StopIteration: 

3855 msg = f"row_ids did not generate enough IDs, error at index {i}" 

3856 raise ValueError(msg) 

3857 else: 

3858 info["insertId"] = insert_id 

3859 

3860 rows_info.append(info) 

3861 

3862 if skip_invalid_rows is not None: 

3863 data["skipInvalidRows"] = skip_invalid_rows 

3864 

3865 if ignore_unknown_values is not None: 

3866 data["ignoreUnknownValues"] = ignore_unknown_values 

3867 

3868 if template_suffix is not None: 

3869 data["templateSuffix"] = template_suffix 

3870 

3871 path = "%s/insertAll" % table.path 

3872 # We can always retry, because every row has an insert ID. 

3873 span_attributes = {"path": path} 

3874 response = self._call_api( 

3875 retry, 

3876 span_name="BigQuery.insertRowsJson", 

3877 span_attributes=span_attributes, 

3878 method="POST", 

3879 path=path, 

3880 data=data, 

3881 timeout=timeout, 

3882 ) 

3883 errors = [] 

3884 

3885 for error in response.get("insertErrors", ()): 

3886 errors.append({"index": int(error["index"]), "errors": error["errors"]}) 

3887 

3888 return errors 

3889 

3890 def list_partitions( 

3891 self, 

3892 table: Union[Table, TableReference, TableListItem, str], 

3893 retry: retries.Retry = DEFAULT_RETRY, 

3894 timeout: TimeoutType = DEFAULT_TIMEOUT, 

3895 ) -> Sequence[str]: 

3896 """List the partitions in a table. 

3897 

3898 Args: 

3899 table (Union[ \ 

3900 google.cloud.bigquery.table.Table, \ 

3901 google.cloud.bigquery.table.TableReference, \ 

3902 google.cloud.bigquery.table.TableListItem, \ 

3903 str, \ 

3904 ]): 

3905 The table or reference from which to get partition info 

3906 retry (Optional[google.api_core.retry.Retry]): 

3907 How to retry the RPC. 

3908 timeout (Optional[float]): 

3909 The number of seconds to wait for the underlying HTTP transport 

3910 before using ``retry``. 

3911 If multiple requests are made under the hood, ``timeout`` 

3912 applies to each individual request. 

3913 

3914 Returns: 

3915 List[str]: 

3916 A list of the partition ids present in the partitioned table 

3917 """ 

3918 table = _table_arg_to_table_ref(table, default_project=self.project) 

3919 meta_table = self.get_table( 

3920 TableReference( 

3921 DatasetReference(table.project, table.dataset_id), 

3922 "%s$__PARTITIONS_SUMMARY__" % table.table_id, 

3923 ), 

3924 retry=retry, 

3925 timeout=timeout, 

3926 ) 

3927 

3928 subset = [col for col in meta_table.schema if col.name == "partition_id"] 

3929 return [ 

3930 row[0] 

3931 for row in self.list_rows( 

3932 meta_table, selected_fields=subset, retry=retry, timeout=timeout 

3933 ) 

3934 ] 

3935 

3936 def list_rows( 

3937 self, 

3938 table: Union[Table, TableListItem, TableReference, str], 

3939 selected_fields: Optional[Sequence[SchemaField]] = None, 

3940 max_results: Optional[int] = None, 

3941 page_token: Optional[str] = None, 

3942 start_index: Optional[int] = None, 

3943 page_size: Optional[int] = None, 

3944 retry: retries.Retry = DEFAULT_RETRY, 

3945 timeout: TimeoutType = DEFAULT_TIMEOUT, 

3946 ) -> RowIterator: 

3947 """List the rows of the table. 

3948 

3949 See 

3950 https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list 

3951 

3952 .. note:: 

3953 

3954 This method assumes that the provided schema is up-to-date with the 

3955 schema as defined on the back-end: if the two schemas are not 

3956 identical, the values returned may be incomplete. To ensure that the 

3957 local copy of the schema is up-to-date, call ``client.get_table``. 

3958 

3959 Args: 

3960 table (Union[ \ 

3961 google.cloud.bigquery.table.Table, \ 

3962 google.cloud.bigquery.table.TableListItem, \ 

3963 google.cloud.bigquery.table.TableReference, \ 

3964 str, \ 

3965 ]): 

3966 The table to list, or a reference to it. When the table 

3967 object does not contain a schema and ``selected_fields`` is 

3968 not supplied, this method calls ``get_table`` to fetch the 

3969 table schema. 

3970 selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]): 

3971 The fields to return. If not supplied, data for all columns 

3972 are downloaded. 

3973 max_results (Optional[int]): 

3974 Maximum number of rows to return. 

3975 page_token (Optional[str]): 

3976 Token representing a cursor into the table's rows. 

3977 If not passed, the API will return the first page of the 

3978 rows. The token marks the beginning of the iterator to be 

3979 returned and the value of the ``page_token`` can be accessed 

3980 at ``next_page_token`` of the 

3981 :class:`~google.cloud.bigquery.table.RowIterator`. 

3982 start_index (Optional[int]): 

3983 The zero-based index of the starting row to read. 

3984 page_size (Optional[int]): 

3985 The maximum number of rows in each page of results from this request. 

3986 Non-positive values are ignored. Defaults to a sensible value set by the API. 

3987 retry (Optional[google.api_core.retry.Retry]): 

3988 How to retry the RPC. 

3989 timeout (Optional[float]): 

3990 The number of seconds to wait for the underlying HTTP transport 

3991 before using ``retry``. 

3992 If multiple requests are made under the hood, ``timeout`` 

3993 applies to each individual request. 

3994 

3995 Returns: 

3996 google.cloud.bigquery.table.RowIterator: 

3997 Iterator of row data 

3998 :class:`~google.cloud.bigquery.table.Row`-s. During each 

3999 page, the iterator will have the ``total_rows`` attribute 

4000 set, which counts the total number of rows **in the table** 

4001 (this is distinct from the total number of rows in the 

4002 current page: ``iterator.page.num_items``). 

4003 """ 

4004 table = _table_arg_to_table(table, default_project=self.project) 

4005 

4006 if not isinstance(table, Table): 

4007 raise TypeError(_NEED_TABLE_ARGUMENT) 

4008 

4009 schema = table.schema 

4010 

4011 # selected_fields can override the table schema. 

4012 if selected_fields is not None: 

4013 schema = selected_fields 

4014 

4015 # No schema, but no selected_fields. Assume the developer wants all 

4016 # columns, so get the table resource for them rather than failing. 

4017 elif len(schema) == 0: 

4018 table = self.get_table(table.reference, retry=retry, timeout=timeout) 

4019 schema = table.schema 

4020 

4021 params: Dict[str, Any] = {} 

4022 if selected_fields is not None: 

4023 params["selectedFields"] = ",".join(field.name for field in selected_fields) 

4024 if start_index is not None: 

4025 params["startIndex"] = start_index 

4026 

4027 params["formatOptions.useInt64Timestamp"] = True 

4028 row_iterator = RowIterator( 

4029 client=self, 

4030 api_request=functools.partial(self._call_api, retry, timeout=timeout), 

4031 path="%s/data" % (table.path,), 

4032 schema=schema, 

4033 page_token=page_token, 

4034 max_results=max_results, 

4035 page_size=page_size, 

4036 extra_params=params, 

4037 table=table, 

4038 # Pass in selected_fields separately from schema so that full 

4039 # tables can be fetched without a column filter. 

4040 selected_fields=selected_fields, 

4041 total_rows=getattr(table, "num_rows", None), 

4042 project=table.project, 

4043 location=table.location, 

4044 ) 

4045 return row_iterator 

4046 

4047 def _list_rows_from_query_results( 

4048 self, 

4049 job_id: str, 

4050 location: str, 

4051 project: str, 

4052 schema: Sequence[SchemaField], 

4053 total_rows: Optional[int] = None, 

4054 destination: Optional[Union[Table, TableReference, TableListItem, str]] = None, 

4055 max_results: Optional[int] = None, 

4056 start_index: Optional[int] = None, 

4057 page_size: Optional[int] = None, 

4058 retry: retries.Retry = DEFAULT_RETRY, 

4059 timeout: TimeoutType = DEFAULT_TIMEOUT, 

4060 query_id: Optional[str] = None, 

4061 first_page_response: Optional[Dict[str, Any]] = None, 

4062 num_dml_affected_rows: Optional[int] = None, 

4063 ) -> RowIterator: 

4064 """List the rows of a completed query. 

4065 See 

4066 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults 

4067 Args: 

4068 job_id (str): 

4069 ID of a query job. 

4070 location (str): Location of the query job. 

4071 project (str): 

4072 ID of the project where the query job was run. 

4073 schema (Sequence[google.cloud.bigquery.schema.SchemaField]): 

4074 The fields expected in these query results. Used to convert 

4075 from JSON to expected Python types. 

4076 total_rows (Optional[int]): 

4077 Total number of rows in the query results. 

4078 destination (Optional[Union[ \ 

4079 google.cloud.bigquery.table.Table, \ 

4080 google.cloud.bigquery.table.TableListItem, \ 

4081 google.cloud.bigquery.table.TableReference, \ 

4082 str, \ 

4083 ]]): 

4084 Destination table reference. Used to fetch the query results 

4085 with the BigQuery Storage API. 

4086 max_results (Optional[int]): 

4087 Maximum number of rows to return across the whole iterator. 

4088 start_index (Optional[int]): 

4089 The zero-based index of the starting row to read. 

4090 page_size (Optional[int]): 

4091 The maximum number of rows in each page of results from this request. 

4092 Non-positive values are ignored. Defaults to a sensible value set by the API. 

4093 retry (Optional[google.api_core.retry.Retry]): 

4094 How to retry the RPC. 

4095 timeout (Optional[float]): 

4096 The number of seconds to wait for the underlying HTTP transport 

4097 before using ``retry``. If set, this connection timeout may be 

4098 increased to a minimum value. This prevents retries on what 

4099 would otherwise be a successful response. 

4100 If multiple requests are made under the hood, ``timeout`` 

4101 applies to each individual request. 

4102 query_id (Optional[str]): 

4103 [Preview] ID of a completed query. This ID is auto-generated 

4104 and not guaranteed to be populated. 

4105 first_page_response (Optional[dict]): 

4106 API response for the first page of results (if available). 

4107 num_dml_affected_rows (Optional[int]): 

4108 If this RowIterator is the result of a DML query, the number of 

4109 rows that were affected. 

4110 

4111 Returns: 

4112 google.cloud.bigquery.table.RowIterator: 

4113 Iterator of row data 

4114 :class:`~google.cloud.bigquery.table.Row`-s. 

4115 """ 

4116 params: Dict[str, Any] = { 

4117 "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS, 

4118 "location": location, 

4119 } 

4120 

4121 if timeout is not None: 

4122 if not isinstance(timeout, (int, float)): 

4123 timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT 

4124 else: 

4125 timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT) 

4126 

4127 if start_index is not None: 

4128 params["startIndex"] = start_index 

4129 

4130 # We don't call jobs.query with a page size, so if the user explicitly 

4131 # requests a certain size, invalidate the cache. 

4132 if page_size is not None: 

4133 first_page_response = None 

4134 

4135 params["formatOptions.useInt64Timestamp"] = True 

4136 row_iterator = RowIterator( 

4137 client=self, 

4138 api_request=functools.partial(self._call_api, retry, timeout=timeout), 

4139 path=f"/projects/{project}/queries/{job_id}", 

4140 schema=schema, 

4141 max_results=max_results, 

4142 page_size=page_size, 

4143 table=destination, 

4144 extra_params=params, 

4145 total_rows=total_rows, 

4146 project=project, 

4147 location=location, 

4148 job_id=job_id, 

4149 query_id=query_id, 

4150 first_page_response=first_page_response, 

4151 num_dml_affected_rows=num_dml_affected_rows, 

4152 ) 

4153 return row_iterator 

4154 

4155 def _schema_from_json_file_object(self, file_obj): 

4156 """Helper function for schema_from_json that takes a 

4157 file object that describes a table schema. 

4158 

4159 Returns: 

4160 List of schema field objects. 

4161 """ 

4162 json_data = json.load(file_obj) 

4163 return [SchemaField.from_api_repr(field) for field in json_data] 

4164 

4165 def _schema_to_json_file_object(self, schema_list, file_obj): 

4166 """Helper function for schema_to_json that takes a schema list and file 

4167 object and writes the schema list to the file object with json.dump 

4168 """ 

4169 json.dump(schema_list, file_obj, indent=2, sort_keys=True) 

4170 

4171 def schema_from_json(self, file_or_path: "PathType") -> List[SchemaField]: 

4172 """Takes a file object or file path that contains json that describes 

4173 a table schema. 

4174 

4175 Returns: 

4176 List[SchemaField]: 

4177 List of :class:`~google.cloud.bigquery.schema.SchemaField` objects. 

4178 """ 

4179 if isinstance(file_or_path, io.IOBase): 

4180 return self._schema_from_json_file_object(file_or_path) 

4181 

4182 with open(file_or_path) as file_obj: 

4183 return self._schema_from_json_file_object(file_obj) 

4184 

4185 def schema_to_json( 

4186 self, schema_list: Sequence[SchemaField], destination: "PathType" 

4187 ): 

4188 """Takes a list of schema field objects. 

4189 

4190 Serializes the list of schema field objects as json to a file. 

4191 

4192 Destination is a file path or a file object. 

4193 """ 

4194 json_schema_list = [f.to_api_repr() for f in schema_list] 

4195 

4196 if isinstance(destination, io.IOBase): 

4197 return self._schema_to_json_file_object(json_schema_list, destination) 

4198