Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/cloud/bigquery/client.py: 2%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

15"""Client for interacting with the Google BigQuery API."""

17from __future__ import absolute_import

18from __future__ import division

20from collections import abc as collections_abc

21import copy

22import datetime

23import functools

24import gzip

25import io

26import itertools

27import json

28import math

29import os

30import tempfile

31import typing

32from typing import (

33 Any,

34 Dict,

35 IO,

36 Iterable,

37 Mapping,

38 List,

39 Optional,

40 Sequence,

41 Tuple,

42 Union,

43)

44import uuid

45import warnings

47from google import resumable_media # type: ignore

48from google.resumable_media.requests import MultipartUpload # type: ignore

49from google.resumable_media.requests import ResumableUpload

51import google.api_core.client_options

52import google.api_core.exceptions as core_exceptions

53from google.api_core.iam import Policy

54from google.api_core import page_iterator

55from google.api_core import retry as retries

56import google.cloud._helpers # type: ignore

57from google.cloud import exceptions # pytype: disable=import-error

58from google.cloud.client import ClientWithProject # type: ignore # pytype: disable=import-error

60try:

61 from google.cloud.bigquery_storage_v1.services.big_query_read.client import (

62 DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO,

63 )

64except ImportError:

65 DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore

68from google.cloud.bigquery._http import Connection

69from google.cloud.bigquery import _job_helpers

70from google.cloud.bigquery import _pandas_helpers

71from google.cloud.bigquery import _versions_helpers

72from google.cloud.bigquery import enums

73from google.cloud.bigquery import exceptions as bq_exceptions

74from google.cloud.bigquery import job

75from google.cloud.bigquery._helpers import _get_sub_prop

76from google.cloud.bigquery._helpers import _record_field_to_json

77from google.cloud.bigquery._helpers import _str_or_none

78from google.cloud.bigquery._helpers import _verify_job_config_type

79from google.cloud.bigquery._helpers import _get_bigquery_host

80from google.cloud.bigquery._helpers import _DEFAULT_HOST

81from google.cloud.bigquery._helpers import _DEFAULT_HOST_TEMPLATE

82from google.cloud.bigquery._helpers import _DEFAULT_UNIVERSE

83from google.cloud.bigquery._helpers import _validate_universe

84from google.cloud.bigquery._helpers import _get_client_universe

85from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id

86from google.cloud.bigquery.dataset import Dataset

87from google.cloud.bigquery.dataset import DatasetListItem

88from google.cloud.bigquery.dataset import DatasetReference

89from google.cloud.bigquery.enums import AutoRowIDs

90from google.cloud.bigquery.format_options import ParquetOptions

91from google.cloud.bigquery.job import (

92 CopyJob,

93 CopyJobConfig,

94 ExtractJob,

95 ExtractJobConfig,

96 LoadJob,

97 LoadJobConfig,

98 QueryJob,

99 QueryJobConfig,

100)

101from google.cloud.bigquery.model import Model

102from google.cloud.bigquery.model import ModelReference

103from google.cloud.bigquery.model import _model_arg_to_model_ref

104from google.cloud.bigquery.opentelemetry_tracing import create_span

105from google.cloud.bigquery.query import _QueryResults

106from google.cloud.bigquery.retry import (

107 DEFAULT_JOB_RETRY,

108 DEFAULT_RETRY,

109 DEFAULT_TIMEOUT,

110)

111from google.cloud.bigquery.routine import Routine

112from google.cloud.bigquery.routine import RoutineReference

113from google.cloud.bigquery.schema import SchemaField

114from google.cloud.bigquery.table import _table_arg_to_table

115from google.cloud.bigquery.table import _table_arg_to_table_ref

116from google.cloud.bigquery.table import Table

117from google.cloud.bigquery.table import TableListItem

118from google.cloud.bigquery.table import TableReference

119from google.cloud.bigquery.table import RowIterator

120

121pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()

122pandas = (

123 _versions_helpers.PANDAS_VERSIONS.try_import()

124) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this

125

126TimeoutType = Union[float, None]

127ResumableTimeoutType = Union[

128 None, float, Tuple[float, float]

129] # for resumable media methods

130

131if typing.TYPE_CHECKING: # pragma: NO COVER

132 # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition.

133 PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]]

134 import requests # required by api-core

135

136_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB

137_MAX_MULTIPART_SIZE = 5 * 1024 * 1024

138_DEFAULT_NUM_RETRIES = 6

139_BASE_UPLOAD_TEMPLATE = "{host}/upload/bigquery/v2/projects/{project}/jobs?uploadType="

140_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart"

141_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable"

142_GENERIC_CONTENT_TYPE = "*/*"

143_READ_LESS_THAN_SIZE = (

144 "Size {:d} was specified but the file-like object only had " "{:d} bytes remaining."

145)

146_NEED_TABLE_ARGUMENT = (

147 "The table argument should be a table ID string, Table, or TableReference"

148)

149_LIST_ROWS_FROM_QUERY_RESULTS_FIELDS = "jobReference,totalRows,pageToken,rows"

150

151# In microbenchmarks, it's been shown that even in ideal conditions (query

152# finished, local data), requests to getQueryResults can take 10+ seconds.

153# In less-than-ideal situations, the response can take even longer, as it must

154# be able to download a full 100+ MB row in that time. Don't let the

155# connection timeout before data can be downloaded.

156# https://github.com/googleapis/python-bigquery/issues/438

157_MIN_GET_QUERY_RESULTS_TIMEOUT = 120

158

159TIMEOUT_HEADER = "X-Server-Timeout"

160

161

162class Project(object):

163 """Wrapper for resource describing a BigQuery project.

164

165 Args:

166 project_id (str): Opaque ID of the project

167

168 numeric_id (int): Numeric ID of the project

169

170 friendly_name (str): Display name of the project

171 """

172

173 def __init__(self, project_id, numeric_id, friendly_name):

174 self.project_id = project_id

175 self.numeric_id = numeric_id

176 self.friendly_name = friendly_name

177

178 @classmethod

179 def from_api_repr(cls, resource):

180 """Factory: construct an instance from a resource dict."""

181 return cls(resource["id"], resource["numericId"], resource["friendlyName"])

182

183

184class Client(ClientWithProject):

185 """Client to bundle configuration needed for API requests.

186

187 Args:

188 project (Optional[str]):

189 Project ID for the project which the client acts on behalf of.

190 Will be passed when creating a dataset / job. If not passed,

191 falls back to the default inferred from the environment.

192 credentials (Optional[google.auth.credentials.Credentials]):

193 The OAuth2 Credentials to use for this client. If not passed

194 (and if no ``_http`` object is passed), falls back to the

195 default inferred from the environment.

196 _http (Optional[requests.Session]):

197 HTTP object to make requests. Can be any object that

198 defines ``request()`` with the same interface as

199 :meth:`requests.Session.request`. If not passed, an ``_http``

200 object is created that is bound to the ``credentials`` for the

201 current object.

202 This parameter should be considered private, and could change in

203 the future.

204 location (Optional[str]):

205 Default location for jobs / datasets / tables.

206 default_query_job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):

207 Default ``QueryJobConfig``.

208 Will be merged into job configs passed into the ``query`` method.

209 default_load_job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]):

210 Default ``LoadJobConfig``.

211 Will be merged into job configs passed into the ``load_table_*`` methods.

212 client_info (Optional[google.api_core.client_info.ClientInfo]):

213 The client info used to send a user-agent string along with API

214 requests. If ``None``, then default info will be used. Generally,

215 you only need to set this if you're developing your own library

216 or partner tool.

217 client_options (Optional[Union[google.api_core.client_options.ClientOptions, Dict]]):

218 Client options used to set user options on the client. API Endpoint

219 should be set through client_options.

220

221 Raises:

222 google.auth.exceptions.DefaultCredentialsError:

223 Raised if ``credentials`` is not specified and the library fails

224 to acquire default credentials.

225 """

226

227 SCOPE = ("https://www.googleapis.com/auth/cloud-platform",) # type: ignore

228 """The scopes required for authenticating as a BigQuery consumer."""

229

230 def __init__(

231 self,

232 project=None,

233 credentials=None,

234 _http=None,

235 location=None,

236 default_query_job_config=None,

237 default_load_job_config=None,

238 client_info=None,

239 client_options=None,

240 ) -> None:

241 super(Client, self).__init__(

242 project=project,

243 credentials=credentials,

244 client_options=client_options,

245 _http=_http,

246 )

247

248 kw_args = {"client_info": client_info}

249 bq_host = _get_bigquery_host()

250 kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None

251 client_universe = None

252 if client_options is None:

253 client_options = {}

254 if isinstance(client_options, dict):

255 client_options = google.api_core.client_options.from_dict(client_options)

256 if client_options.api_endpoint:

257 api_endpoint = client_options.api_endpoint

258 kw_args["api_endpoint"] = api_endpoint

259 else:

260 client_universe = _get_client_universe(client_options)

261 if client_universe != _DEFAULT_UNIVERSE:

262 kw_args["api_endpoint"] = _DEFAULT_HOST_TEMPLATE.replace(

263 "{UNIVERSE_DOMAIN}", client_universe

264 )

265 # Ensure credentials and universe are not in conflict.

266 if hasattr(self, "_credentials") and client_universe is not None:

267 _validate_universe(client_universe, self._credentials)

268

269 self._connection = Connection(self, **kw_args)

270 self._location = location

271 self._default_load_job_config = copy.deepcopy(default_load_job_config)

272

273 # Use property setter so validation can run.

274 self.default_query_job_config = default_query_job_config

275

276 @property

277 def location(self):

278 """Default location for jobs / datasets / tables."""

279 return self._location

280

281 @property

282 def default_query_job_config(self) -> Optional[QueryJobConfig]:

283 """Default ``QueryJobConfig`` or ``None``.

284

285 Will be merged into job configs passed into the ``query`` or

286 ``query_and_wait`` methods.

287 """

288 return self._default_query_job_config

289

290 @default_query_job_config.setter

291 def default_query_job_config(self, value: Optional[QueryJobConfig]):

292 if value is not None:

293 _verify_job_config_type(

294 value, QueryJobConfig, param_name="default_query_job_config"

295 )

296 self._default_query_job_config = copy.deepcopy(value)

297

298 @property

299 def default_load_job_config(self):

300 """Default ``LoadJobConfig``.

301 Will be merged into job configs passed into the ``load_table_*`` methods.

302 """

303 return self._default_load_job_config

304

305 @default_load_job_config.setter

306 def default_load_job_config(self, value: LoadJobConfig):

307 self._default_load_job_config = copy.deepcopy(value)

308

309 def close(self):

310 """Close the underlying transport objects, releasing system resources.

311

312 .. note::

313

314 The client instance can be used for making additional requests even

315 after closing, in which case the underlying connections are

316 automatically re-created.

317 """

318 self._http._auth_request.session.close()

319 self._http.close()

320

321 def get_service_account_email(

322 self,

323 project: Optional[str] = None,

324 retry: retries.Retry = DEFAULT_RETRY,

325 timeout: TimeoutType = DEFAULT_TIMEOUT,

326 ) -> str:

327 """Get the email address of the project's BigQuery service account

328

329 Note:

330 This is the service account that BigQuery uses to manage tables

331 encrypted by a key in KMS.

332

333 Args:

334 project (Optional[str]):

335 Project ID to use for retreiving service account email.

336 Defaults to the client's project.

337 retry (Optional[google.api_core.retry.Retry]): How to retry the RPC.

338 timeout (Optional[float]):

339 The number of seconds to wait for the underlying HTTP transport

340 before using ``retry``.

341

342 Returns:

343 str:

344 service account email address

345

346 Example:

347

348 >>> from google.cloud import bigquery

349 >>> client = bigquery.Client()

350 >>> client.get_service_account_email()

351 my_service_account@my-project.iam.gserviceaccount.com

352

353 """

354 if project is None:

355 project = self.project

356 path = "/projects/%s/serviceAccount" % (project,)

357 span_attributes = {"path": path}

358 api_response = self._call_api(

359 retry,

360 span_name="BigQuery.getServiceAccountEmail",

361 span_attributes=span_attributes,

362 method="GET",

363 path=path,

364 timeout=timeout,

365 )

366 return api_response["email"]

367

368 def list_projects(

369 self,

370 max_results: Optional[int] = None,

371 page_token: Optional[str] = None,

372 retry: retries.Retry = DEFAULT_RETRY,

373 timeout: TimeoutType = DEFAULT_TIMEOUT,

374 page_size: Optional[int] = None,

375 ) -> page_iterator.Iterator:

376 """List projects for the project associated with this client.

377

378 See

379 https://cloud.google.com/bigquery/docs/reference/rest/v2/projects/list

380

381 Args:

382 max_results (Optional[int]):

383 Maximum number of projects to return.

384 Defaults to a value set by the API.

385

386 page_token (Optional[str]):

387 Token representing a cursor into the projects. If not passed,

388 the API will return the first page of projects. The token marks

389 the beginning of the iterator to be returned and the value of

390 the ``page_token`` can be accessed at ``next_page_token`` of the

391 :class:`~google.api_core.page_iterator.HTTPIterator`.

392

393 retry (Optional[google.api_core.retry.Retry]): How to retry the RPC.

394

395 timeout (Optional[float]):

396 The number of seconds to wait for the underlying HTTP transport

397 before using ``retry``.

398

399 page_size (Optional[int]):

400 Maximum number of projects to return in each page.

401 Defaults to a value set by the API.

402

403 Returns:

404 google.api_core.page_iterator.Iterator:

405 Iterator of :class:`~google.cloud.bigquery.client.Project`

406 accessible to the current client.

407 """

408 span_attributes = {"path": "/projects"}

409

410 def api_request(*args, **kwargs):

411 return self._call_api(

412 retry,

413 span_name="BigQuery.listProjects",

414 span_attributes=span_attributes,

415 *args,

416 timeout=timeout,

417 **kwargs,

418 )

419

420 return page_iterator.HTTPIterator(

421 client=self,

422 api_request=api_request,

423 path="/projects",

424 item_to_value=_item_to_project,

425 items_key="projects",

426 page_token=page_token,

427 max_results=max_results,

428 page_size=page_size,

429 )

430

431 def list_datasets(

432 self,

433 project: Optional[str] = None,

434 include_all: bool = False,

435 filter: Optional[str] = None,

436 max_results: Optional[int] = None,

437 page_token: Optional[str] = None,

438 retry: retries.Retry = DEFAULT_RETRY,

439 timeout: TimeoutType = DEFAULT_TIMEOUT,

440 page_size: Optional[int] = None,

441 ) -> page_iterator.Iterator:

442 """List datasets for the project associated with this client.

443

444 See

445 https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list

446

447 Args:

448 project (Optional[str]):

449 Project ID to use for retreiving datasets. Defaults to the

450 client's project.

451 include_all (Optional[bool]):

452 True if results include hidden datasets. Defaults to False.

453 filter (Optional[str]):

454 An expression for filtering the results by label.

455 For syntax, see

456 https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#body.QUERY_PARAMETERS.filter

457 max_results (Optional[int]):

458 Maximum number of datasets to return.

459 page_token (Optional[str]):

460 Token representing a cursor into the datasets. If not passed,

461 the API will return the first page of datasets. The token marks

462 the beginning of the iterator to be returned and the value of

463 the ``page_token`` can be accessed at ``next_page_token`` of the

464 :class:`~google.api_core.page_iterator.HTTPIterator`.

465 retry (Optional[google.api_core.retry.Retry]):

466 How to retry the RPC.

467 timeout (Optional[float]):

468 The number of seconds to wait for the underlying HTTP transport

469 before using ``retry``.

470 page_size (Optional[int]):

471 Maximum number of datasets to return per page.

472

473 Returns:

474 google.api_core.page_iterator.Iterator:

475 Iterator of :class:`~google.cloud.bigquery.dataset.DatasetListItem`.

476 associated with the project.

477 """

478 extra_params: Dict[str, Any] = {}

479 if project is None:

480 project = self.project

481 if include_all:

482 extra_params["all"] = True

483 if filter:

484 # TODO: consider supporting a dict of label -> value for filter,

485 # and converting it into a string here.

486 extra_params["filter"] = filter

487 path = "/projects/%s/datasets" % (project,)

488

489 span_attributes = {"path": path}

490

491 def api_request(*args, **kwargs):

492 return self._call_api(

493 retry,

494 span_name="BigQuery.listDatasets",

495 span_attributes=span_attributes,

496 *args,

497 timeout=timeout,

498 **kwargs,

499 )

500

501 return page_iterator.HTTPIterator(

502 client=self,

503 api_request=api_request,

504 path=path,

505 item_to_value=_item_to_dataset,

506 items_key="datasets",

507 page_token=page_token,

508 max_results=max_results,

509 extra_params=extra_params,

510 page_size=page_size,

511 )

512

513 def dataset(

514 self, dataset_id: str, project: Optional[str] = None

515 ) -> DatasetReference:

516 """Deprecated: Construct a reference to a dataset.

517

518 .. deprecated:: 1.24.0

519 Construct a

520 :class:`~google.cloud.bigquery.dataset.DatasetReference` using its

521 constructor or use a string where previously a reference object

522 was used.

523

524 As of ``google-cloud-bigquery`` version 1.7.0, all client methods

525 that take a

526 :class:`~google.cloud.bigquery.dataset.DatasetReference` or

527 :class:`~google.cloud.bigquery.table.TableReference` also take a

528 string in standard SQL format, e.g. ``project.dataset_id`` or

529 ``project.dataset_id.table_id``.

530

531 Args:

532 dataset_id (str): ID of the dataset.

533

534 project (Optional[str]):

535 Project ID for the dataset (defaults to the project of the client).

536

537 Returns:

538 google.cloud.bigquery.dataset.DatasetReference:

539 a new ``DatasetReference`` instance.

540 """

541 if project is None:

542 project = self.project

543

544 warnings.warn(

545 "Client.dataset is deprecated and will be removed in a future version. "

546 "Use a string like 'my_project.my_dataset' or a "

547 "cloud.google.bigquery.DatasetReference object, instead.",

548 PendingDeprecationWarning,

549 stacklevel=2,

550 )

551 return DatasetReference(project, dataset_id)

552

553 def _ensure_bqstorage_client(

554 self,

555 bqstorage_client: Optional[

556 "google.cloud.bigquery_storage.BigQueryReadClient"

557 ] = None,

558 client_options: Optional[google.api_core.client_options.ClientOptions] = None,

559 client_info: Optional[

560 "google.api_core.gapic_v1.client_info.ClientInfo"

561 ] = DEFAULT_BQSTORAGE_CLIENT_INFO,

562 ) -> Optional["google.cloud.bigquery_storage.BigQueryReadClient"]:

563 """Create a BigQuery Storage API client using this client's credentials.

564

565 Args:

566 bqstorage_client:

567 An existing BigQuery Storage client instance. If ``None``, a new

568 instance is created and returned.

569 client_options:

570 Custom options used with a new BigQuery Storage client instance

571 if one is created.

572 client_info:

573 The client info used with a new BigQuery Storage client

574 instance if one is created.

575

576 Returns:

577 A BigQuery Storage API client.

578 """

579

580 try:

581 bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import(

582 raise_if_error=True

583 )

584 except bq_exceptions.BigQueryStorageNotFoundError:

585 warnings.warn(

586 "Cannot create BigQuery Storage client, the dependency "

587 "google-cloud-bigquery-storage is not installed."

588 )

589 return None

590 except bq_exceptions.LegacyBigQueryStorageError as exc:

591 warnings.warn(

592 "Dependency google-cloud-bigquery-storage is outdated: " + str(exc)

593 )

594 return None

595

596 if bqstorage_client is None: # pragma: NO COVER

597 bqstorage_client = bigquery_storage.BigQueryReadClient(

598 credentials=self._credentials,

599 client_options=client_options,

600 client_info=client_info, # type: ignore # (None is also accepted)

601 )

602

603 return bqstorage_client

604

605 def _dataset_from_arg(self, dataset) -> Union[Dataset, DatasetReference]:

606 if isinstance(dataset, str):

607 dataset = DatasetReference.from_string(

608 dataset, default_project=self.project

609 )

610

611 if not isinstance(dataset, (Dataset, DatasetReference)):

612 if isinstance(dataset, DatasetListItem):

613 dataset = dataset.reference

614 else:

615 raise TypeError(

616 "dataset must be a Dataset, DatasetReference, DatasetListItem,"

617 " or string"

618 )

619 return dataset

620

621 def create_dataset(

622 self,

623 dataset: Union[str, Dataset, DatasetReference, DatasetListItem],

624 exists_ok: bool = False,

625 retry: retries.Retry = DEFAULT_RETRY,

626 timeout: TimeoutType = DEFAULT_TIMEOUT,

627 ) -> Dataset:

628 """API call: create the dataset via a POST request.

629

630 See

631 https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/insert

632

633 Args:

634 dataset (Union[ \

635 google.cloud.bigquery.dataset.Dataset, \

636 google.cloud.bigquery.dataset.DatasetReference, \

637 google.cloud.bigquery.dataset.DatasetListItem, \

638 str, \

639 ]):

640 A :class:`~google.cloud.bigquery.dataset.Dataset` to create.

641 If ``dataset`` is a reference, an empty dataset is created

642 with the specified ID and client's default location.

643 exists_ok (Optional[bool]):

644 Defaults to ``False``. If ``True``, ignore "already exists"

645 errors when creating the dataset.

646 retry (Optional[google.api_core.retry.Retry]):

647 How to retry the RPC.

648 timeout (Optional[float]):

649 The number of seconds to wait for the underlying HTTP transport

650 before using ``retry``.

651

652 Returns:

653 google.cloud.bigquery.dataset.Dataset:

654 A new ``Dataset`` returned from the API.

655

656 Raises:

657 google.cloud.exceptions.Conflict:

658 If the dataset already exists.

659

660 Example:

661

662 >>> from google.cloud import bigquery

663 >>> client = bigquery.Client()

664 >>> dataset = bigquery.Dataset('my_project.my_dataset')

665 >>> dataset = client.create_dataset(dataset)

666

667 """

668 dataset = self._dataset_from_arg(dataset)

669 if isinstance(dataset, DatasetReference):

670 dataset = Dataset(dataset)

671

672 path = "/projects/%s/datasets" % (dataset.project,)

673

674 data = dataset.to_api_repr()

675 if data.get("location") is None and self.location is not None:

676 data["location"] = self.location

677

678 try:

679 span_attributes = {"path": path}

680

681 api_response = self._call_api(

682 retry,

683 span_name="BigQuery.createDataset",

684 span_attributes=span_attributes,

685 method="POST",

686 path=path,

687 data=data,

688 timeout=timeout,

689 )

690 return Dataset.from_api_repr(api_response)

691 except core_exceptions.Conflict:

692 if not exists_ok:

693 raise

694 return self.get_dataset(dataset.reference, retry=retry)

695

696 def create_routine(

697 self,

698 routine: Routine,

699 exists_ok: bool = False,

700 retry: retries.Retry = DEFAULT_RETRY,

701 timeout: TimeoutType = DEFAULT_TIMEOUT,

702 ) -> Routine:

703 """[Beta] Create a routine via a POST request.

704

705 See

706 https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/insert

707

708 Args:

709 routine (google.cloud.bigquery.routine.Routine):

710 A :class:`~google.cloud.bigquery.routine.Routine` to create.

711 The dataset that the routine belongs to must already exist.

712 exists_ok (Optional[bool]):

713 Defaults to ``False``. If ``True``, ignore "already exists"

714 errors when creating the routine.

715 retry (Optional[google.api_core.retry.Retry]):

716 How to retry the RPC.

717 timeout (Optional[float]):

718 The number of seconds to wait for the underlying HTTP transport

719 before using ``retry``.

720

721 Returns:

722 google.cloud.bigquery.routine.Routine:

723 A new ``Routine`` returned from the service.

724

725 Raises:

726 google.cloud.exceptions.Conflict:

727 If the routine already exists.

728 """

729 reference = routine.reference

730 path = "/projects/{}/datasets/{}/routines".format(

731 reference.project, reference.dataset_id

732 )

733 resource = routine.to_api_repr()

734 try:

735 span_attributes = {"path": path}

736 api_response = self._call_api(

737 retry,

738 span_name="BigQuery.createRoutine",

739 span_attributes=span_attributes,

740 method="POST",

741 path=path,

742 data=resource,

743 timeout=timeout,

744 )

745 return Routine.from_api_repr(api_response)

746 except core_exceptions.Conflict:

747 if not exists_ok:

748 raise

749 return self.get_routine(routine.reference, retry=retry)

750

751 def create_table(

752 self,

753 table: Union[str, Table, TableReference, TableListItem],

754 exists_ok: bool = False,

755 retry: retries.Retry = DEFAULT_RETRY,

756 timeout: TimeoutType = DEFAULT_TIMEOUT,

757 ) -> Table:

758 """API call: create a table via a PUT request

759

760 See

761 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert

762

763 Args:

764 table (Union[ \

765 google.cloud.bigquery.table.Table, \

766 google.cloud.bigquery.table.TableReference, \

767 google.cloud.bigquery.table.TableListItem, \

768 str, \

769 ]):

770 A :class:`~google.cloud.bigquery.table.Table` to create.

771 If ``table`` is a reference, an empty table is created

772 with the specified ID. The dataset that the table belongs to

773 must already exist.

774 exists_ok (Optional[bool]):

775 Defaults to ``False``. If ``True``, ignore "already exists"

776 errors when creating the table.

777 retry (Optional[google.api_core.retry.Retry]):

778 How to retry the RPC.

779 timeout (Optional[float]):

780 The number of seconds to wait for the underlying HTTP transport

781 before using ``retry``.

782

783 Returns:

784 google.cloud.bigquery.table.Table:

785 A new ``Table`` returned from the service.

786

787 Raises:

788 google.cloud.exceptions.Conflict:

789 If the table already exists.

790 """

791 table = _table_arg_to_table(table, default_project=self.project)

792 dataset_id = table.dataset_id

793 path = "/projects/%s/datasets/%s/tables" % (table.project, dataset_id)

794 data = table.to_api_repr()

795 try:

796 span_attributes = {"path": path, "dataset_id": dataset_id}

797 api_response = self._call_api(

798 retry,

799 span_name="BigQuery.createTable",

800 span_attributes=span_attributes,

801 method="POST",

802 path=path,

803 data=data,

804 timeout=timeout,

805 )

806 return Table.from_api_repr(api_response)

807 except core_exceptions.Conflict:

808 if not exists_ok:

809 raise

810 return self.get_table(table.reference, retry=retry)

811

812 def _call_api(

813 self,

814 retry,

815 span_name=None,

816 span_attributes=None,

817 job_ref=None,

818 headers: Optional[Dict[str, str]] = None,

819 **kwargs,

820 ):

821 kwargs = _add_server_timeout_header(headers, kwargs)

822 call = functools.partial(self._connection.api_request, **kwargs)

823

824 if retry:

825 call = retry(call)

826

827 if span_name is not None:

828 with create_span(

829 name=span_name, attributes=span_attributes, client=self, job_ref=job_ref

830 ):

831 return call()

832

833 return call()

834

835 def get_dataset(

836 self,

837 dataset_ref: Union[DatasetReference, str],

838 retry: retries.Retry = DEFAULT_RETRY,

839 timeout: TimeoutType = DEFAULT_TIMEOUT,

840 ) -> Dataset:

841 """Fetch the dataset referenced by ``dataset_ref``

842

843 Args:

844 dataset_ref (Union[ \

845 google.cloud.bigquery.dataset.DatasetReference, \

846 str, \

847 ]):

848 A reference to the dataset to fetch from the BigQuery API.

849 If a string is passed in, this method attempts to create a

850 dataset reference from a string using

851 :func:`~google.cloud.bigquery.dataset.DatasetReference.from_string`.

852 retry (Optional[google.api_core.retry.Retry]):

853 How to retry the RPC.

854 timeout (Optional[float]):

855 The number of seconds to wait for the underlying HTTP transport

856 before using ``retry``.

857

858 Returns:

859 google.cloud.bigquery.dataset.Dataset:

860 A ``Dataset`` instance.

861 """

862 if isinstance(dataset_ref, str):

863 dataset_ref = DatasetReference.from_string(

864 dataset_ref, default_project=self.project

865 )

866 path = dataset_ref.path

867 span_attributes = {"path": path}

868 api_response = self._call_api(

869 retry,

870 span_name="BigQuery.getDataset",

871 span_attributes=span_attributes,

872 method="GET",

873 path=path,

874 timeout=timeout,

875 )

876 return Dataset.from_api_repr(api_response)

877

878 def get_iam_policy(

879 self,

880 table: Union[Table, TableReference, TableListItem, str],

881 requested_policy_version: int = 1,

882 retry: retries.Retry = DEFAULT_RETRY,

883 timeout: TimeoutType = DEFAULT_TIMEOUT,

884 ) -> Policy:

885 """Return the access control policy for a table resource.

886

887 Args:

888 table (Union[ \

889 google.cloud.bigquery.table.Table, \

890 google.cloud.bigquery.table.TableReference, \

891 google.cloud.bigquery.table.TableListItem, \

892 str, \

893 ]):

894 The table to get the access control policy for.

895 If a string is passed in, this method attempts to create a

896 table reference from a string using

897 :func:`~google.cloud.bigquery.table.TableReference.from_string`.

898 requested_policy_version (int):

899 Optional. The maximum policy version that will be used to format the policy.

900

901 Only version ``1`` is currently supported.

902

903 See: https://cloud.google.com/bigquery/docs/reference/rest/v2/GetPolicyOptions

904 retry (Optional[google.api_core.retry.Retry]):

905 How to retry the RPC.

906 timeout (Optional[float]):

907 The number of seconds to wait for the underlying HTTP transport

908 before using ``retry``.

909

910 Returns:

911 google.api_core.iam.Policy:

912 The access control policy.

913 """

914 table = _table_arg_to_table_ref(table, default_project=self.project)

915

916 if requested_policy_version != 1:

917 raise ValueError("only IAM policy version 1 is supported")

918

919 body = {"options": {"requestedPolicyVersion": 1}}

920

921 path = "{}:getIamPolicy".format(table.path)

922 span_attributes = {"path": path}

923 response = self._call_api(

924 retry,

925 span_name="BigQuery.getIamPolicy",

926 span_attributes=span_attributes,

927 method="POST",

928 path=path,

929 data=body,

930 timeout=timeout,

931 )

932

933 return Policy.from_api_repr(response)

934

935 def set_iam_policy(

936 self,

937 table: Union[Table, TableReference, TableListItem, str],

938 policy: Policy,

939 updateMask: Optional[str] = None,

940 retry: retries.Retry = DEFAULT_RETRY,

941 timeout: TimeoutType = DEFAULT_TIMEOUT,

942 *,

943 fields: Sequence[str] = (),

944 ) -> Policy:

945 """Return the access control policy for a table resource.

946

947 Args:

948 table (Union[ \

949 google.cloud.bigquery.table.Table, \

950 google.cloud.bigquery.table.TableReference, \

951 google.cloud.bigquery.table.TableListItem, \

952 str, \

953 ]):

954 The table to get the access control policy for.

955 If a string is passed in, this method attempts to create a

956 table reference from a string using

957 :func:`~google.cloud.bigquery.table.TableReference.from_string`.

958 policy (google.api_core.iam.Policy):

959 The access control policy to set.

960 updateMask (Optional[str]):

961 Mask as defined by

962 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/setIamPolicy#body.request_body.FIELDS.update_mask

963

964 Incompatible with ``fields``.

965 retry (Optional[google.api_core.retry.Retry]):

966 How to retry the RPC.

967 timeout (Optional[float]):

968 The number of seconds to wait for the underlying HTTP transport

969 before using ``retry``.

970 fields (Sequence[str]):

971 Which properties to set on the policy. See:

972 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/setIamPolicy#body.request_body.FIELDS.update_mask

973

974 Incompatible with ``updateMask``.

975

976 Returns:

977 google.api_core.iam.Policy:

978 The updated access control policy.

979 """

980 if updateMask is not None and not fields:

981 update_mask = updateMask

982 elif updateMask is not None and fields:

983 raise ValueError("Cannot set both fields and updateMask")

984 elif fields:

985 update_mask = ",".join(fields)

986 else:

987 update_mask = None

988

989 table = _table_arg_to_table_ref(table, default_project=self.project)

990

991 if not isinstance(policy, (Policy)):

992 raise TypeError("policy must be a Policy")

993

994 body = {"policy": policy.to_api_repr()}

995

996 if update_mask is not None:

997 body["updateMask"] = update_mask

998

999 path = "{}:setIamPolicy".format(table.path)

1000 span_attributes = {"path": path}

1001

1002 response = self._call_api(

1003 retry,

1004 span_name="BigQuery.setIamPolicy",

1005 span_attributes=span_attributes,

1006 method="POST",

1007 path=path,

1008 data=body,

1009 timeout=timeout,

1010 )

1011

1012 return Policy.from_api_repr(response)

1013

1014 def test_iam_permissions(

1015 self,

1016 table: Union[Table, TableReference, TableListItem, str],

1017 permissions: Sequence[str],

1018 retry: retries.Retry = DEFAULT_RETRY,

1019 timeout: TimeoutType = DEFAULT_TIMEOUT,

1020 ) -> Dict[str, Any]:

1021 table = _table_arg_to_table_ref(table, default_project=self.project)

1022

1023 body = {"permissions": permissions}

1024

1025 path = "{}:testIamPermissions".format(table.path)

1026 span_attributes = {"path": path}

1027 response = self._call_api(

1028 retry,

1029 span_name="BigQuery.testIamPermissions",

1030 span_attributes=span_attributes,

1031 method="POST",

1032 path=path,

1033 data=body,

1034 timeout=timeout,

1035 )

1036

1037 return response

1038

1039 def get_model(

1040 self,

1041 model_ref: Union[ModelReference, str],

1042 retry: retries.Retry = DEFAULT_RETRY,

1043 timeout: TimeoutType = DEFAULT_TIMEOUT,

1044 ) -> Model:

1045 """[Beta] Fetch the model referenced by ``model_ref``.

1046

1047 Args:

1048 model_ref (Union[ \

1049 google.cloud.bigquery.model.ModelReference, \

1050 str, \

1051 ]):

1052 A reference to the model to fetch from the BigQuery API.

1053 If a string is passed in, this method attempts to create a

1054 model reference from a string using

1055 :func:`google.cloud.bigquery.model.ModelReference.from_string`.

1056 retry (Optional[google.api_core.retry.Retry]):

1057 How to retry the RPC.

1058 timeout (Optional[float]):

1059 The number of seconds to wait for the underlying HTTP transport

1060 before using ``retry``.

1061

1062 Returns:

1063 google.cloud.bigquery.model.Model: A ``Model`` instance.

1064 """

1065 if isinstance(model_ref, str):

1066 model_ref = ModelReference.from_string(

1067 model_ref, default_project=self.project

1068 )

1069 path = model_ref.path

1070 span_attributes = {"path": path}

1071

1072 api_response = self._call_api(

1073 retry,

1074 span_name="BigQuery.getModel",

1075 span_attributes=span_attributes,

1076 method="GET",

1077 path=path,

1078 timeout=timeout,

1079 )

1080 return Model.from_api_repr(api_response)

1081

1082 def get_routine(

1083 self,

1084 routine_ref: Union[Routine, RoutineReference, str],

1085 retry: retries.Retry = DEFAULT_RETRY,

1086 timeout: TimeoutType = DEFAULT_TIMEOUT,

1087 ) -> Routine:

1088 """[Beta] Get the routine referenced by ``routine_ref``.

1089

1090 Args:

1091 routine_ref (Union[ \

1092 google.cloud.bigquery.routine.Routine, \

1093 google.cloud.bigquery.routine.RoutineReference, \

1094 str, \

1095 ]):

1096 A reference to the routine to fetch from the BigQuery API. If

1097 a string is passed in, this method attempts to create a

1098 reference from a string using

1099 :func:`google.cloud.bigquery.routine.RoutineReference.from_string`.

1100 retry (Optional[google.api_core.retry.Retry]):

1101 How to retry the API call.

1102 timeout (Optional[float]):

1103 The number of seconds to wait for the underlying HTTP transport

1104 before using ``retry``.

1105

1106 Returns:

1107 google.cloud.bigquery.routine.Routine:

1108 A ``Routine`` instance.

1109 """

1110 if isinstance(routine_ref, str):

1111 routine_ref = RoutineReference.from_string(

1112 routine_ref, default_project=self.project

1113 )

1114 path = routine_ref.path

1115 span_attributes = {"path": path}

1116 api_response = self._call_api(

1117 retry,

1118 span_name="BigQuery.getRoutine",

1119 span_attributes=span_attributes,

1120 method="GET",

1121 path=path,

1122 timeout=timeout,

1123 )

1124 return Routine.from_api_repr(api_response)

1125

1126 def get_table(

1127 self,

1128 table: Union[Table, TableReference, TableListItem, str],

1129 retry: retries.Retry = DEFAULT_RETRY,

1130 timeout: TimeoutType = DEFAULT_TIMEOUT,

1131 ) -> Table:

1132 """Fetch the table referenced by ``table``.

1133

1134 Args:

1135 table (Union[ \

1136 google.cloud.bigquery.table.Table, \

1137 google.cloud.bigquery.table.TableReference, \

1138 google.cloud.bigquery.table.TableListItem, \

1139 str, \

1140 ]):

1141 A reference to the table to fetch from the BigQuery API.

1142 If a string is passed in, this method attempts to create a

1143 table reference from a string using

1144 :func:`google.cloud.bigquery.table.TableReference.from_string`.

1145 retry (Optional[google.api_core.retry.Retry]):

1146 How to retry the RPC.

1147 timeout (Optional[float]):

1148 The number of seconds to wait for the underlying HTTP transport

1149 before using ``retry``.

1150

1151 Returns:

1152 google.cloud.bigquery.table.Table:

1153 A ``Table`` instance.

1154 """

1155 table_ref = _table_arg_to_table_ref(table, default_project=self.project)

1156 path = table_ref.path

1157 span_attributes = {"path": path}

1158 api_response = self._call_api(

1159 retry,

1160 span_name="BigQuery.getTable",

1161 span_attributes=span_attributes,

1162 method="GET",

1163 path=path,

1164 timeout=timeout,

1165 )

1166 return Table.from_api_repr(api_response)

1167

1168 def update_dataset(

1169 self,

1170 dataset: Dataset,

1171 fields: Sequence[str],

1172 retry: retries.Retry = DEFAULT_RETRY,

1173 timeout: TimeoutType = DEFAULT_TIMEOUT,

1174 ) -> Dataset:

1175 """Change some fields of a dataset.

1176

1177 Use ``fields`` to specify which fields to update. At least one field

1178 must be provided. If a field is listed in ``fields`` and is ``None`` in

1179 ``dataset``, it will be deleted.

1180

1181 If ``dataset.etag`` is not ``None``, the update will only

1182 succeed if the dataset on the server has the same ETag. Thus

1183 reading a dataset with ``get_dataset``, changing its fields,

1184 and then passing it to ``update_dataset`` will ensure that the changes

1185 will only be saved if no modifications to the dataset occurred

1186 since the read.

1187

1188 Args:

1189 dataset (google.cloud.bigquery.dataset.Dataset):

1190 The dataset to update.

1191 fields (Sequence[str]):

1192 The properties of ``dataset`` to change. These are strings

1193 corresponding to the properties of

1194 :class:`~google.cloud.bigquery.dataset.Dataset`.

1195

1196 For example, to update the default expiration times, specify

1197 both properties in the ``fields`` argument:

1198

1199 .. code-block:: python

1200

1201 bigquery_client.update_dataset(

1202 dataset,

1203 [

1204 "default_partition_expiration_ms",

1205 "default_table_expiration_ms",

1206 ]

1207 )

1208 retry (Optional[google.api_core.retry.Retry]):

1209 How to retry the RPC.

1210 timeout (Optional[float]):

1211 The number of seconds to wait for the underlying HTTP transport

1212 before using ``retry``.

1213

1214 Returns:

1215 google.cloud.bigquery.dataset.Dataset:

1216 The modified ``Dataset`` instance.

1217 """

1218 partial = dataset._build_resource(fields)

1219 if dataset.etag is not None:

1220 headers: Optional[Dict[str, str]] = {"If-Match": dataset.etag}

1221 else:

1222 headers = None

1223 path = dataset.path

1224 span_attributes = {"path": path, "fields": fields}

1225

1226 api_response = self._call_api(

1227 retry,

1228 span_name="BigQuery.updateDataset",

1229 span_attributes=span_attributes,

1230 method="PATCH",

1231 path=path,

1232 data=partial,

1233 headers=headers,

1234 timeout=timeout,

1235 )

1236 return Dataset.from_api_repr(api_response)

1237

1238 def update_model(

1239 self,

1240 model: Model,

1241 fields: Sequence[str],

1242 retry: retries.Retry = DEFAULT_RETRY,

1243 timeout: TimeoutType = DEFAULT_TIMEOUT,

1244 ) -> Model:

1245 """[Beta] Change some fields of a model.

1246

1247 Use ``fields`` to specify which fields to update. At least one field

1248 must be provided. If a field is listed in ``fields`` and is ``None``

1249 in ``model``, the field value will be deleted.

1250

1251 If ``model.etag`` is not ``None``, the update will only succeed if

1252 the model on the server has the same ETag. Thus reading a model with

1253 ``get_model``, changing its fields, and then passing it to

1254 ``update_model`` will ensure that the changes will only be saved if

1255 no modifications to the model occurred since the read.

1256

1257 Args:

1258 model (google.cloud.bigquery.model.Model): The model to update.

1259 fields (Sequence[str]):

1260 The properties of ``model`` to change. These are strings

1261 corresponding to the properties of

1262 :class:`~google.cloud.bigquery.model.Model`.

1263

1264 For example, to update the descriptive properties of the model,

1265 specify them in the ``fields`` argument:

1266

1267 .. code-block:: python

1268

1269 bigquery_client.update_model(

1270 model, ["description", "friendly_name"]

1271 )

1272 retry (Optional[google.api_core.retry.Retry]):

1273 A description of how to retry the API call.

1274 timeout (Optional[float]):

1275 The number of seconds to wait for the underlying HTTP transport

1276 before using ``retry``.

1277

1278 Returns:

1279 google.cloud.bigquery.model.Model:

1280 The model resource returned from the API call.

1281 """

1282 partial = model._build_resource(fields)

1283 if model.etag:

1284 headers: Optional[Dict[str, str]] = {"If-Match": model.etag}

1285 else:

1286 headers = None

1287 path = model.path

1288 span_attributes = {"path": path, "fields": fields}

1289

1290 api_response = self._call_api(

1291 retry,

1292 span_name="BigQuery.updateModel",

1293 span_attributes=span_attributes,

1294 method="PATCH",

1295 path=path,

1296 data=partial,

1297 headers=headers,

1298 timeout=timeout,

1299 )

1300 return Model.from_api_repr(api_response)

1301

1302 def update_routine(

1303 self,

1304 routine: Routine,

1305 fields: Sequence[str],

1306 retry: retries.Retry = DEFAULT_RETRY,

1307 timeout: TimeoutType = DEFAULT_TIMEOUT,

1308 ) -> Routine:

1309 """[Beta] Change some fields of a routine.

1310

1311 Use ``fields`` to specify which fields to update. At least one field

1312 must be provided. If a field is listed in ``fields`` and is ``None``

1313 in ``routine``, the field value will be deleted.

1314

1315 .. warning::

1316 During beta, partial updates are not supported. You must provide

1317 all fields in the resource.

1318

1319 If :attr:`~google.cloud.bigquery.routine.Routine.etag` is not

1320 ``None``, the update will only succeed if the resource on the server

1321 has the same ETag. Thus reading a routine with

1322 :func:`~google.cloud.bigquery.client.Client.get_routine`, changing

1323 its fields, and then passing it to this method will ensure that the

1324 changes will only be saved if no modifications to the resource

1325 occurred since the read.

1326

1327 Args:

1328 routine (google.cloud.bigquery.routine.Routine):

1329 The routine to update.

1330 fields (Sequence[str]):

1331 The fields of ``routine`` to change, spelled as the

1332 :class:`~google.cloud.bigquery.routine.Routine` properties.

1333

1334 For example, to update the description property of the routine,

1335 specify it in the ``fields`` argument:

1336

1337 .. code-block:: python

1338

1339 bigquery_client.update_routine(

1340 routine, ["description"]

1341 )

1342 retry (Optional[google.api_core.retry.Retry]):

1343 A description of how to retry the API call.

1344 timeout (Optional[float]):

1345 The number of seconds to wait for the underlying HTTP transport

1346 before using ``retry``.

1347

1348 Returns:

1349 google.cloud.bigquery.routine.Routine:

1350 The routine resource returned from the API call.

1351 """

1352 partial = routine._build_resource(fields)

1353 if routine.etag:

1354 headers: Optional[Dict[str, str]] = {"If-Match": routine.etag}

1355 else:

1356 headers = None

1357

1358 # TODO: remove when routines update supports partial requests.

1359 partial["routineReference"] = routine.reference.to_api_repr()

1360

1361 path = routine.path

1362 span_attributes = {"path": path, "fields": fields}

1363

1364 api_response = self._call_api(

1365 retry,

1366 span_name="BigQuery.updateRoutine",

1367 span_attributes=span_attributes,

1368 method="PUT",

1369 path=path,

1370 data=partial,

1371 headers=headers,

1372 timeout=timeout,

1373 )

1374 return Routine.from_api_repr(api_response)

1375

1376 def update_table(

1377 self,

1378 table: Table,

1379 fields: Sequence[str],

1380 retry: retries.Retry = DEFAULT_RETRY,

1381 timeout: TimeoutType = DEFAULT_TIMEOUT,

1382 ) -> Table:

1383 """Change some fields of a table.

1384

1385 Use ``fields`` to specify which fields to update. At least one field

1386 must be provided. If a field is listed in ``fields`` and is ``None``

1387 in ``table``, the field value will be deleted.

1388

1389 If ``table.etag`` is not ``None``, the update will only succeed if

1390 the table on the server has the same ETag. Thus reading a table with

1391 ``get_table``, changing its fields, and then passing it to

1392 ``update_table`` will ensure that the changes will only be saved if

1393 no modifications to the table occurred since the read.

1394

1395 Args:

1396 table (google.cloud.bigquery.table.Table): The table to update.

1397 fields (Sequence[str]):

1398 The fields of ``table`` to change, spelled as the

1399 :class:`~google.cloud.bigquery.table.Table` properties.

1400

1401 For example, to update the descriptive properties of the table,

1402 specify them in the ``fields`` argument:

1403

1404 .. code-block:: python

1405

1406 bigquery_client.update_table(

1407 table,

1408 ["description", "friendly_name"]

1409 )

1410 retry (Optional[google.api_core.retry.Retry]):

1411 A description of how to retry the API call.

1412 timeout (Optional[float]):

1413 The number of seconds to wait for the underlying HTTP transport

1414 before using ``retry``.

1415

1416 Returns:

1417 google.cloud.bigquery.table.Table:

1418 The table resource returned from the API call.

1419 """

1420 partial = table._build_resource(fields)

1421 if table.etag is not None:

1422 headers: Optional[Dict[str, str]] = {"If-Match": table.etag}

1423 else:

1424 headers = None

1425

1426 path = table.path

1427 span_attributes = {"path": path, "fields": fields}

1428

1429 api_response = self._call_api(

1430 retry,

1431 span_name="BigQuery.updateTable",

1432 span_attributes=span_attributes,

1433 method="PATCH",

1434 path=path,

1435 data=partial,

1436 headers=headers,

1437 timeout=timeout,

1438 )

1439 return Table.from_api_repr(api_response)

1440

1441 def list_models(

1442 self,

1443 dataset: Union[Dataset, DatasetReference, DatasetListItem, str],

1444 max_results: Optional[int] = None,

1445 page_token: Optional[str] = None,

1446 retry: retries.Retry = DEFAULT_RETRY,

1447 timeout: TimeoutType = DEFAULT_TIMEOUT,

1448 page_size: Optional[int] = None,

1449 ) -> page_iterator.Iterator:

1450 """[Beta] List models in the dataset.

1451

1452 See

1453 https://cloud.google.com/bigquery/docs/reference/rest/v2/models/list

1454

1455 Args:

1456 dataset (Union[ \

1457 google.cloud.bigquery.dataset.Dataset, \

1458 google.cloud.bigquery.dataset.DatasetReference, \

1459 google.cloud.bigquery.dataset.DatasetListItem, \

1460 str, \

1461 ]):

1462 A reference to the dataset whose models to list from the

1463 BigQuery API. If a string is passed in, this method attempts

1464 to create a dataset reference from a string using

1465 :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`.

1466 max_results (Optional[int]):

1467 Maximum number of models to return. Defaults to a

1468 value set by the API.

1469 page_token (Optional[str]):

1470 Token representing a cursor into the models. If not passed,

1471 the API will return the first page of models. The token marks

1472 the beginning of the iterator to be returned and the value of

1473 the ``page_token`` can be accessed at ``next_page_token`` of the

1474 :class:`~google.api_core.page_iterator.HTTPIterator`.

1475 retry (Optional[google.api_core.retry.Retry]):

1476 How to retry the RPC.

1477 timeout (Optional[float]):

1478 The number of seconds to wait for the underlying HTTP transport

1479 before using ``retry``.

1480 page_size (Optional[int]):

1481 Maximum number of models to return per page.

1482 Defaults to a value set by the API.

1483

1484 Returns:

1485 google.api_core.page_iterator.Iterator:

1486 Iterator of

1487 :class:`~google.cloud.bigquery.model.Model` contained

1488 within the requested dataset.

1489 """

1490 dataset = self._dataset_from_arg(dataset)

1491

1492 path = "%s/models" % dataset.path

1493 span_attributes = {"path": path}

1494

1495 def api_request(*args, **kwargs):

1496 return self._call_api(

1497 retry,

1498 span_name="BigQuery.listModels",

1499 span_attributes=span_attributes,

1500 *args,

1501 timeout=timeout,

1502 **kwargs,

1503 )

1504

1505 result = page_iterator.HTTPIterator(

1506 client=self,

1507 api_request=api_request,

1508 path=path,

1509 item_to_value=_item_to_model,

1510 items_key="models",

1511 page_token=page_token,

1512 max_results=max_results,

1513 page_size=page_size,

1514 )

1515 result.dataset = dataset # type: ignore

1516 return result

1517

1518 def list_routines(

1519 self,

1520 dataset: Union[Dataset, DatasetReference, DatasetListItem, str],

1521 max_results: Optional[int] = None,

1522 page_token: Optional[str] = None,

1523 retry: retries.Retry = DEFAULT_RETRY,

1524 timeout: TimeoutType = DEFAULT_TIMEOUT,

1525 page_size: Optional[int] = None,

1526 ) -> page_iterator.Iterator:

1527 """[Beta] List routines in the dataset.

1528

1529 See

1530 https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/list

1531

1532 Args:

1533 dataset (Union[ \

1534 google.cloud.bigquery.dataset.Dataset, \

1535 google.cloud.bigquery.dataset.DatasetReference, \

1536 google.cloud.bigquery.dataset.DatasetListItem, \

1537 str, \

1538 ]):

1539 A reference to the dataset whose routines to list from the

1540 BigQuery API. If a string is passed in, this method attempts

1541 to create a dataset reference from a string using

1542 :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`.

1543 max_results (Optional[int]):

1544 Maximum number of routines to return. Defaults

1545 to a value set by the API.

1546 page_token (Optional[str]):

1547 Token representing a cursor into the routines. If not passed,

1548 the API will return the first page of routines. The token marks

1549 the beginning of the iterator to be returned and the value of the

1550 ``page_token`` can be accessed at ``next_page_token`` of the

1551 :class:`~google.api_core.page_iterator.HTTPIterator`.

1552 retry (Optional[google.api_core.retry.Retry]):

1553 How to retry the RPC.

1554 timeout (Optional[float]):

1555 The number of seconds to wait for the underlying HTTP transport

1556 before using ``retry``.

1557 page_size (Optional[int]):

1558 Maximum number of routines to return per page.

1559 Defaults to a value set by the API.

1560

1561 Returns:

1562 google.api_core.page_iterator.Iterator:

1563 Iterator of all

1564 :class:`~google.cloud.bigquery.routine.Routine`s contained

1565 within the requested dataset, limited by ``max_results``.

1566 """

1567 dataset = self._dataset_from_arg(dataset)

1568 path = "{}/routines".format(dataset.path)

1569

1570 span_attributes = {"path": path}

1571

1572 def api_request(*args, **kwargs):

1573 return self._call_api(

1574 retry,

1575 span_name="BigQuery.listRoutines",

1576 span_attributes=span_attributes,

1577 *args,

1578 timeout=timeout,

1579 **kwargs,

1580 )

1581

1582 result = page_iterator.HTTPIterator(

1583 client=self,

1584 api_request=api_request,

1585 path=path,

1586 item_to_value=_item_to_routine,

1587 items_key="routines",

1588 page_token=page_token,

1589 max_results=max_results,

1590 page_size=page_size,

1591 )

1592 result.dataset = dataset # type: ignore

1593 return result

1594

1595 def list_tables(

1596 self,

1597 dataset: Union[Dataset, DatasetReference, DatasetListItem, str],

1598 max_results: Optional[int] = None,

1599 page_token: Optional[str] = None,

1600 retry: retries.Retry = DEFAULT_RETRY,

1601 timeout: TimeoutType = DEFAULT_TIMEOUT,

1602 page_size: Optional[int] = None,

1603 ) -> page_iterator.Iterator:

1604 """List tables in the dataset.

1605

1606 See

1607 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list

1608

1609 Args:

1610 dataset (Union[ \

1611 google.cloud.bigquery.dataset.Dataset, \

1612 google.cloud.bigquery.dataset.DatasetReference, \

1613 google.cloud.bigquery.dataset.DatasetListItem, \

1614 str, \

1615 ]):

1616 A reference to the dataset whose tables to list from the

1617 BigQuery API. If a string is passed in, this method attempts

1618 to create a dataset reference from a string using

1619 :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`.

1620 max_results (Optional[int]):

1621 Maximum number of tables to return. Defaults

1622 to a value set by the API.

1623 page_token (Optional[str]):

1624 Token representing a cursor into the tables. If not passed,

1625 the API will return the first page of tables. The token marks

1626 the beginning of the iterator to be returned and the value of

1627 the ``page_token`` can be accessed at ``next_page_token`` of the

1628 :class:`~google.api_core.page_iterator.HTTPIterator`.

1629 retry (Optional[google.api_core.retry.Retry]):

1630 How to retry the RPC.

1631 timeout (Optional[float]):

1632 The number of seconds to wait for the underlying HTTP transport

1633 before using ``retry``.

1634 page_size (Optional[int]):

1635 Maximum number of tables to return per page.

1636 Defaults to a value set by the API.

1637

1638 Returns:

1639 google.api_core.page_iterator.Iterator:

1640 Iterator of

1641 :class:`~google.cloud.bigquery.table.TableListItem` contained

1642 within the requested dataset.

1643 """

1644 dataset = self._dataset_from_arg(dataset)

1645 path = "%s/tables" % dataset.path

1646 span_attributes = {"path": path}

1647

1648 def api_request(*args, **kwargs):

1649 return self._call_api(

1650 retry,

1651 span_name="BigQuery.listTables",

1652 span_attributes=span_attributes,

1653 *args,

1654 timeout=timeout,

1655 **kwargs,

1656 )

1657

1658 result = page_iterator.HTTPIterator(

1659 client=self,

1660 api_request=api_request,

1661 path=path,

1662 item_to_value=_item_to_table,

1663 items_key="tables",

1664 page_token=page_token,

1665 max_results=max_results,

1666 page_size=page_size,

1667 )

1668 result.dataset = dataset # type: ignore

1669 return result

1670

1671 def delete_dataset(

1672 self,

1673 dataset: Union[Dataset, DatasetReference, DatasetListItem, str],

1674 delete_contents: bool = False,

1675 retry: retries.Retry = DEFAULT_RETRY,

1676 timeout: TimeoutType = DEFAULT_TIMEOUT,

1677 not_found_ok: bool = False,

1678 ) -> None:

1679 """Delete a dataset.

1680

1681 See

1682 https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete

1683

1684 Args:

1685 dataset (Union[ \

1686 google.cloud.bigquery.dataset.Dataset, \

1687 google.cloud.bigquery.dataset.DatasetReference, \

1688 google.cloud.bigquery.dataset.DatasetListItem, \

1689 str, \

1690 ]):

1691 A reference to the dataset to delete. If a string is passed

1692 in, this method attempts to create a dataset reference from a

1693 string using

1694 :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`.

1695 delete_contents (Optional[bool]):

1696 If True, delete all the tables in the dataset. If False and

1697 the dataset contains tables, the request will fail.

1698 Default is False.

1699 retry (Optional[google.api_core.retry.Retry]):

1700 How to retry the RPC.

1701 timeout (Optional[float]):

1702 The number of seconds to wait for the underlying HTTP transport

1703 before using ``retry``.

1704 not_found_ok (Optional[bool]):

1705 Defaults to ``False``. If ``True``, ignore "not found" errors

1706 when deleting the dataset.

1707 """

1708 dataset = self._dataset_from_arg(dataset)

1709 params = {}

1710 path = dataset.path

1711 if delete_contents:

1712 params["deleteContents"] = "true"

1713 span_attributes = {"path": path, "deleteContents": delete_contents}

1714 else:

1715 span_attributes = {"path": path}

1716

1717 try:

1718 self._call_api(

1719 retry,

1720 span_name="BigQuery.deleteDataset",

1721 span_attributes=span_attributes,

1722 method="DELETE",

1723 path=path,

1724 query_params=params,

1725 timeout=timeout,

1726 )

1727 except core_exceptions.NotFound:

1728 if not not_found_ok:

1729 raise

1730

1731 def delete_model(

1732 self,

1733 model: Union[Model, ModelReference, str],

1734 retry: retries.Retry = DEFAULT_RETRY,

1735 timeout: TimeoutType = DEFAULT_TIMEOUT,

1736 not_found_ok: bool = False,

1737 ) -> None:

1738 """[Beta] Delete a model

1739

1740 See

1741 https://cloud.google.com/bigquery/docs/reference/rest/v2/models/delete

1742

1743 Args:

1744 model (Union[ \

1745 google.cloud.bigquery.model.Model, \

1746 google.cloud.bigquery.model.ModelReference, \

1747 str, \

1748 ]):

1749 A reference to the model to delete. If a string is passed in,

1750 this method attempts to create a model reference from a

1751 string using

1752 :func:`google.cloud.bigquery.model.ModelReference.from_string`.

1753 retry (Optional[google.api_core.retry.Retry]):

1754 How to retry the RPC.

1755 timeout (Optional[float]):

1756 The number of seconds to wait for the underlying HTTP transport

1757 before using ``retry``.

1758 not_found_ok (Optional[bool]):

1759 Defaults to ``False``. If ``True``, ignore "not found" errors

1760 when deleting the model.

1761 """

1762 if isinstance(model, str):

1763 model = ModelReference.from_string(model, default_project=self.project)

1764

1765 if not isinstance(model, (Model, ModelReference)):

1766 raise TypeError("model must be a Model or a ModelReference")

1767

1768 path = model.path

1769 try:

1770 span_attributes = {"path": path}

1771 self._call_api(

1772 retry,

1773 span_name="BigQuery.deleteModel",

1774 span_attributes=span_attributes,

1775 method="DELETE",

1776 path=path,

1777 timeout=timeout,

1778 )

1779 except core_exceptions.NotFound:

1780 if not not_found_ok:

1781 raise

1782

1783 def delete_job_metadata(

1784 self,

1785 job_id: Union[str, LoadJob, CopyJob, ExtractJob, QueryJob],

1786 project: Optional[str] = None,

1787 location: Optional[str] = None,

1788 retry: retries.Retry = DEFAULT_RETRY,

1789 timeout: TimeoutType = DEFAULT_TIMEOUT,

1790 not_found_ok: bool = False,

1791 ):

1792 """[Beta] Delete job metadata from job history.

1793

1794 Note: This does not stop a running job. Use

1795 :func:`~google.cloud.bigquery.client.Client.cancel_job` instead.

1796

1797 Args:

1798 job_id (Union[ \

1799 str, \

1800 LoadJob, \

1801 CopyJob, \

1802 ExtractJob, \

1803 QueryJob \

1804 ]): Job or job identifier.

1805 project (Optional[str]):

1806 ID of the project which owns the job (defaults to the client's project).

1807 location (Optional[str]):

1808 Location where the job was run. Ignored if ``job_id`` is a job

1809 object.

1810 retry (Optional[google.api_core.retry.Retry]):

1811 How to retry the RPC.

1812 timeout (Optional[float]):

1813 The number of seconds to wait for the underlying HTTP transport

1814 before using ``retry``.

1815 not_found_ok (Optional[bool]):

1816 Defaults to ``False``. If ``True``, ignore "not found" errors

1817 when deleting the job.

1818 """

1819 extra_params = {}

1820

1821 project, location, job_id = _extract_job_reference(

1822 job_id, project=project, location=location

1823 )

1824

1825 if project is None:

1826 project = self.project

1827

1828 if location is None:

1829 location = self.location

1830

1831 # Location is always required for jobs.delete()

1832 extra_params["location"] = location

1833

1834 path = f"/projects/{project}/jobs/{job_id}/delete"

1835

1836 span_attributes = {"path": path, "job_id": job_id, "location": location}

1837

1838 try:

1839 self._call_api(

1840 retry,

1841 span_name="BigQuery.deleteJob",

1842 span_attributes=span_attributes,

1843 method="DELETE",

1844 path=path,

1845 query_params=extra_params,

1846 timeout=timeout,

1847 )

1848 except google.api_core.exceptions.NotFound:

1849 if not not_found_ok:

1850 raise

1851

1852 def delete_routine(

1853 self,

1854 routine: Union[Routine, RoutineReference, str],

1855 retry: retries.Retry = DEFAULT_RETRY,

1856 timeout: TimeoutType = DEFAULT_TIMEOUT,

1857 not_found_ok: bool = False,

1858 ) -> None:

1859 """[Beta] Delete a routine.

1860

1861 See

1862 https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/delete

1863

1864 Args:

1865 routine (Union[ \

1866 google.cloud.bigquery.routine.Routine, \

1867 google.cloud.bigquery.routine.RoutineReference, \

1868 str, \

1869 ]):

1870 A reference to the routine to delete. If a string is passed

1871 in, this method attempts to create a routine reference from a

1872 string using

1873 :func:`google.cloud.bigquery.routine.RoutineReference.from_string`.

1874 retry (Optional[google.api_core.retry.Retry]):

1875 How to retry the RPC.

1876 timeout (Optional[float]):

1877 The number of seconds to wait for the underlying HTTP transport

1878 before using ``retry``.

1879 not_found_ok (Optional[bool]):

1880 Defaults to ``False``. If ``True``, ignore "not found" errors

1881 when deleting the routine.

1882 """

1883 if isinstance(routine, str):

1884 routine = RoutineReference.from_string(

1885 routine, default_project=self.project

1886 )

1887 path = routine.path

1888

1889 if not isinstance(routine, (Routine, RoutineReference)):

1890 raise TypeError("routine must be a Routine or a RoutineReference")

1891

1892 try:

1893 span_attributes = {"path": path}

1894 self._call_api(

1895 retry,

1896 span_name="BigQuery.deleteRoutine",

1897 span_attributes=span_attributes,

1898 method="DELETE",

1899 path=path,

1900 timeout=timeout,

1901 )

1902 except core_exceptions.NotFound:

1903 if not not_found_ok:

1904 raise

1905

1906 def delete_table(

1907 self,

1908 table: Union[Table, TableReference, TableListItem, str],

1909 retry: retries.Retry = DEFAULT_RETRY,

1910 timeout: TimeoutType = DEFAULT_TIMEOUT,

1911 not_found_ok: bool = False,

1912 ) -> None:

1913 """Delete a table

1914

1915 See

1916 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete

1917

1918 Args:

1919 table (Union[ \

1920 google.cloud.bigquery.table.Table, \

1921 google.cloud.bigquery.table.TableReference, \

1922 google.cloud.bigquery.table.TableListItem, \

1923 str, \

1924 ]):

1925 A reference to the table to delete. If a string is passed in,

1926 this method attempts to create a table reference from a

1927 string using

1928 :func:`google.cloud.bigquery.table.TableReference.from_string`.

1929 retry (Optional[google.api_core.retry.Retry]):

1930 How to retry the RPC.

1931 timeout (Optional[float]):

1932 The number of seconds to wait for the underlying HTTP transport

1933 before using ``retry``.

1934 not_found_ok (Optional[bool]):

1935 Defaults to ``False``. If ``True``, ignore "not found" errors

1936 when deleting the table.

1937 """

1938 table = _table_arg_to_table_ref(table, default_project=self.project)

1939 if not isinstance(table, TableReference):

1940 raise TypeError("Unable to get TableReference for table '{}'".format(table))

1941

1942 try:

1943 path = table.path

1944 span_attributes = {"path": path}

1945 self._call_api(

1946 retry,

1947 span_name="BigQuery.deleteTable",

1948 span_attributes=span_attributes,

1949 method="DELETE",

1950 path=path,

1951 timeout=timeout,

1952 )

1953 except core_exceptions.NotFound:

1954 if not not_found_ok:

1955 raise

1956

1957 def _get_query_results(

1958 self,

1959 job_id: str,

1960 retry: retries.Retry,

1961 project: Optional[str] = None,

1962 timeout_ms: Optional[int] = None,

1963 location: Optional[str] = None,

1964 timeout: TimeoutType = DEFAULT_TIMEOUT,

1965 ) -> _QueryResults:

1966 """Get the query results object for a query job.

1967

1968 Args:

1969 job_id (str): Name of the query job.

1970 retry (google.api_core.retry.Retry):

1971 How to retry the RPC.

1972 project (Optional[str]):

1973 Project ID for the query job (defaults to the project of the client).

1974 timeout_ms (Optional[int]):

1975 Number of milliseconds the the API call should wait for the query

1976 to complete before the request times out.

1977 location (Optional[str]): Location of the query job.

1978 timeout (Optional[float]):

1979 The number of seconds to wait for the underlying HTTP transport

1980 before using ``retry``. If set, this connection timeout may be

1981 increased to a minimum value. This prevents retries on what

1982 would otherwise be a successful response.

1983

1984 Returns:

1985 google.cloud.bigquery.query._QueryResults:

1986 A new ``_QueryResults`` instance.

1987 """

1988

1989 extra_params: Dict[str, Any] = {"maxResults": 0}

1990

1991 if timeout is not None:

1992 if not isinstance(timeout, (int, float)):

1993 timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT

1994 else:

1995 timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT)

1996

1997 if project is None:

1998 project = self.project

1999

2000 if timeout_ms is not None:

2001 extra_params["timeoutMs"] = timeout_ms

2002

2003 if location is None:

2004 location = self.location

2005

2006 if location is not None:

2007 extra_params["location"] = location

2008

2009 path = "/projects/{}/queries/{}".format(project, job_id)

2010

2011 # This call is typically made in a polling loop that checks whether the

2012 # job is complete (from QueryJob.done(), called ultimately from

2013 # QueryJob.result()). So we don't need to poll here.

2014 span_attributes = {"path": path}

2015 resource = self._call_api(

2016 retry,

2017 span_name="BigQuery.getQueryResults",

2018 span_attributes=span_attributes,

2019 method="GET",

2020 path=path,

2021 query_params=extra_params,

2022 timeout=timeout,

2023 )

2024 return _QueryResults.from_api_repr(resource)

2025

2026 def job_from_resource(

2027 self, resource: dict

2028 ) -> Union[job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob]:

2029 """Detect correct job type from resource and instantiate.

2030

2031 Args:

2032 resource (Dict): one job resource from API response

2033

2034 Returns:

2035 Union[job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob]:

2036 The job instance, constructed via the resource.

2037 """

2038 config = resource.get("configuration", {})

2039 if "load" in config:

2040 return job.LoadJob.from_api_repr(resource, self)

2041 elif "copy" in config:

2042 return job.CopyJob.from_api_repr(resource, self)

2043 elif "extract" in config:

2044 return job.ExtractJob.from_api_repr(resource, self)

2045 elif "query" in config:

2046 return job.QueryJob.from_api_repr(resource, self)

2047 return job.UnknownJob.from_api_repr(resource, self)

2048

2049 def create_job(

2050 self,

2051 job_config: dict,

2052 retry: retries.Retry = DEFAULT_RETRY,

2053 timeout: TimeoutType = DEFAULT_TIMEOUT,

2054 ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]:

2055 """Create a new job.

2056

2057 Args:

2058 job_config (dict): configuration job representation returned from the API.

2059 retry (Optional[google.api_core.retry.Retry]): How to retry the RPC.

2060 timeout (Optional[float]):

2061 The number of seconds to wait for the underlying HTTP transport

2062 before using ``retry``.

2063

2064 Returns:

2065 Union[ \

2066 google.cloud.bigquery.job.LoadJob, \

2067 google.cloud.bigquery.job.CopyJob, \

2068 google.cloud.bigquery.job.ExtractJob, \

2069 google.cloud.bigquery.job.QueryJob \

2070 ]:

2071 A new job instance.

2072 """

2073

2074 if "load" in job_config:

2075 load_job_config = google.cloud.bigquery.job.LoadJobConfig.from_api_repr(

2076 job_config

2077 )

2078 destination = _get_sub_prop(job_config, ["load", "destinationTable"])

2079 source_uris = _get_sub_prop(job_config, ["load", "sourceUris"])

2080 destination = TableReference.from_api_repr(destination)

2081 return self.load_table_from_uri(

2082 source_uris,

2083 destination,

2084 job_config=typing.cast(LoadJobConfig, load_job_config),

2085 retry=retry,

2086 timeout=timeout,

2087 )

2088 elif "copy" in job_config:

2089 copy_job_config = google.cloud.bigquery.job.CopyJobConfig.from_api_repr(

2090 job_config

2091 )

2092 destination = _get_sub_prop(job_config, ["copy", "destinationTable"])

2093 destination = TableReference.from_api_repr(destination)

2094 return self.copy_table(

2095 [], # Source table(s) already in job_config resource.

2096 destination,

2097 job_config=typing.cast(CopyJobConfig, copy_job_config),

2098 retry=retry,

2099 timeout=timeout,

2100 )

2101 elif "extract" in job_config:

2102 extract_job_config = (

2103 google.cloud.bigquery.job.ExtractJobConfig.from_api_repr(job_config)

2104 )

2105 source = _get_sub_prop(job_config, ["extract", "sourceTable"])

2106 if source:

2107 source_type = "Table"

2108 source = TableReference.from_api_repr(source)

2109 else:

2110 source = _get_sub_prop(job_config, ["extract", "sourceModel"])

2111 source_type = "Model"

2112 source = ModelReference.from_api_repr(source)

2113 destination_uris = _get_sub_prop(job_config, ["extract", "destinationUris"])

2114 return self.extract_table(

2115 source,

2116 destination_uris,

2117 job_config=typing.cast(ExtractJobConfig, extract_job_config),

2118 retry=retry,

2119 timeout=timeout,

2120 source_type=source_type,

2121 )

2122 elif "query" in job_config:

2123 query_job_config = google.cloud.bigquery.job.QueryJobConfig.from_api_repr(

2124 job_config

2125 )

2126 query = _get_sub_prop(job_config, ["query", "query"])

2127 return self.query(

2128 query,

2129 job_config=typing.cast(QueryJobConfig, query_job_config),

2130 retry=retry,

2131 timeout=timeout,

2132 )

2133 else:

2134 raise TypeError("Invalid job configuration received.")

2135

2136 def get_job(

2137 self,

2138 job_id: Union[str, job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob],

2139 project: Optional[str] = None,

2140 location: Optional[str] = None,

2141 retry: retries.Retry = DEFAULT_RETRY,

2142 timeout: TimeoutType = DEFAULT_TIMEOUT,

2143 ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]:

2144 """Fetch a job for the project associated with this client.

2145

2146 See

2147 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get

2148

2149 Args:

2150 job_id (Union[ \

2151 str, \

2152 job.LoadJob, \

2153 job.CopyJob, \

2154 job.ExtractJob, \

2155 job.QueryJob \

2156 ]):

2157 Job identifier.

2158 project (Optional[str]):

2159 ID of the project which owns the job (defaults to the client's project).

2160 location (Optional[str]):

2161 Location where the job was run. Ignored if ``job_id`` is a job

2162 object.

2163 retry (Optional[google.api_core.retry.Retry]):

2164 How to retry the RPC.

2165 timeout (Optional[float]):

2166 The number of seconds to wait for the underlying HTTP transport

2167 before using ``retry``.

2168

2169 Returns:

2170 Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]:

2171 Job instance, based on the resource returned by the API.

2172 """

2173 extra_params = {"projection": "full"}

2174

2175 project, location, job_id = _extract_job_reference(

2176 job_id, project=project, location=location

2177 )

2178

2179 if project is None:

2180 project = self.project

2181

2182 if location is None:

2183 location = self.location

2184

2185 if location is not None:

2186 extra_params["location"] = location

2187

2188 path = "/projects/{}/jobs/{}".format(project, job_id)

2189

2190 span_attributes = {"path": path, "job_id": job_id, "location": location}

2191

2192 resource = self._call_api(

2193 retry,

2194 span_name="BigQuery.getJob",

2195 span_attributes=span_attributes,

2196 method="GET",

2197 path=path,

2198 query_params=extra_params,

2199 timeout=timeout,

2200 )

2201

2202 return self.job_from_resource(resource)

2203

2204 def cancel_job(

2205 self,

2206 job_id: str,

2207 project: Optional[str] = None,

2208 location: Optional[str] = None,

2209 retry: retries.Retry = DEFAULT_RETRY,

2210 timeout: TimeoutType = DEFAULT_TIMEOUT,

2211 ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]:

2212 """Attempt to cancel a job from a job ID.

2213

2214 See

2215 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel

2216

2217 Args:

2218 job_id (Union[ \

2219 str, \

2220 google.cloud.bigquery.job.LoadJob, \

2221 google.cloud.bigquery.job.CopyJob, \

2222 google.cloud.bigquery.job.ExtractJob, \

2223 google.cloud.bigquery.job.QueryJob \

2224 ]): Job identifier.

2225 project (Optional[str]):

2226 ID of the project which owns the job (defaults to the client's project).

2227 location (Optional[str]):

2228 Location where the job was run. Ignored if ``job_id`` is a job

2229 object.

2230 retry (Optional[google.api_core.retry.Retry]):

2231 How to retry the RPC.

2232 timeout (Optional[float]):

2233 The number of seconds to wait for the underlying HTTP transport

2234 before using ``retry``.

2235

2236 Returns:

2237 Union[ \

2238 google.cloud.bigquery.job.LoadJob, \

2239 google.cloud.bigquery.job.CopyJob, \

2240 google.cloud.bigquery.job.ExtractJob, \

2241 google.cloud.bigquery.job.QueryJob, \

2242 ]:

2243 Job instance, based on the resource returned by the API.

2244 """

2245 extra_params = {"projection": "full"}

2246

2247 project, location, job_id = _extract_job_reference(

2248 job_id, project=project, location=location

2249 )

2250

2251 if project is None:

2252 project = self.project

2253

2254 if location is None:

2255 location = self.location

2256

2257 if location is not None:

2258 extra_params["location"] = location

2259

2260 path = "/projects/{}/jobs/{}/cancel".format(project, job_id)

2261

2262 span_attributes = {"path": path, "job_id": job_id, "location": location}

2263

2264 resource = self._call_api(

2265 retry,

2266 span_name="BigQuery.cancelJob",

2267 span_attributes=span_attributes,

2268 method="POST",

2269 path=path,

2270 query_params=extra_params,

2271 timeout=timeout,

2272 )

2273

2274 job_instance = self.job_from_resource(resource["job"]) # never an UnknownJob

2275

2276 return typing.cast(

2277 Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob],

2278 job_instance,

2279 )

2280

2281 def list_jobs(

2282 self,

2283 project: Optional[str] = None,

2284 parent_job: Optional[Union[QueryJob, str]] = None,

2285 max_results: Optional[int] = None,

2286 page_token: Optional[str] = None,

2287 all_users: Optional[bool] = None,

2288 state_filter: Optional[str] = None,

2289 retry: retries.Retry = DEFAULT_RETRY,

2290 timeout: TimeoutType = DEFAULT_TIMEOUT,

2291 min_creation_time: Optional[datetime.datetime] = None,

2292 max_creation_time: Optional[datetime.datetime] = None,

2293 page_size: Optional[int] = None,

2294 ) -> page_iterator.Iterator:

2295 """List jobs for the project associated with this client.

2296

2297 See

2298 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/list

2299

2300 Args:

2301 project (Optional[str]):

2302 Project ID to use for retreiving datasets. Defaults

2303 to the client's project.

2304 parent_job (Optional[Union[ \

2305 google.cloud.bigquery.job._AsyncJob, \

2306 str, \

2307 ]]):

2308 If set, retrieve only child jobs of the specified parent.

2309 max_results (Optional[int]):

2310 Maximum number of jobs to return.

2311 page_token (Optional[str]):

2312 Opaque marker for the next "page" of jobs. If not

2313 passed, the API will return the first page of jobs. The token

2314 marks the beginning of the iterator to be returned and the

2315 value of the ``page_token`` can be accessed at

2316 ``next_page_token`` of

2317 :class:`~google.api_core.page_iterator.HTTPIterator`.

2318 all_users (Optional[bool]):

2319 If true, include jobs owned by all users in the project.

2320 Defaults to :data:`False`.

2321 state_filter (Optional[str]):

2322 If set, include only jobs matching the given state. One of:

2323 * ``"done"``

2324 * ``"pending"``

2325 * ``"running"``

2326 retry (Optional[google.api_core.retry.Retry]):

2327 How to retry the RPC.

2328 timeout (Optional[float]):

2329 The number of seconds to wait for the underlying HTTP transport

2330 before using ``retry``.

2331 min_creation_time (Optional[datetime.datetime]):

2332 Min value for job creation time. If set, only jobs created

2333 after or at this timestamp are returned. If the datetime has

2334 no time zone assumes UTC time.

2335 max_creation_time (Optional[datetime.datetime]):

2336 Max value for job creation time. If set, only jobs created

2337 before or at this timestamp are returned. If the datetime has

2338 no time zone assumes UTC time.

2339 page_size (Optional[int]):

2340 Maximum number of jobs to return per page.

2341

2342 Returns:

2343 google.api_core.page_iterator.Iterator:

2344 Iterable of job instances.

2345 """

2346 if isinstance(parent_job, job._AsyncJob):

2347 parent_job = parent_job.job_id # pytype: disable=attribute-error

2348

2349 extra_params = {

2350 "allUsers": all_users,

2351 "stateFilter": state_filter,

2352 "minCreationTime": _str_or_none(

2353 google.cloud._helpers._millis_from_datetime(min_creation_time)

2354 ),

2355 "maxCreationTime": _str_or_none(

2356 google.cloud._helpers._millis_from_datetime(max_creation_time)

2357 ),

2358 "projection": "full",

2359 "parentJobId": parent_job,

2360 }

2361

2362 extra_params = {

2363 param: value for param, value in extra_params.items() if value is not None

2364 }

2365

2366 if project is None:

2367 project = self.project

2368

2369 path = "/projects/%s/jobs" % (project,)

2370

2371 span_attributes = {"path": path}

2372

2373 def api_request(*args, **kwargs):

2374 return self._call_api(

2375 retry,

2376 span_name="BigQuery.listJobs",

2377 span_attributes=span_attributes,

2378 *args,

2379 timeout=timeout,

2380 **kwargs,

2381 )

2382

2383 return page_iterator.HTTPIterator(

2384 client=self,

2385 api_request=api_request,

2386 path=path,

2387 item_to_value=_item_to_job,

2388 items_key="jobs",

2389 page_token=page_token,

2390 max_results=max_results,

2391 extra_params=extra_params,

2392 page_size=page_size,

2393 )

2394

2395 def load_table_from_uri(

2396 self,

2397 source_uris: Union[str, Sequence[str]],

2398 destination: Union[Table, TableReference, TableListItem, str],

2399 job_id: Optional[str] = None,

2400 job_id_prefix: Optional[str] = None,

2401 location: Optional[str] = None,

2402 project: Optional[str] = None,

2403 job_config: Optional[LoadJobConfig] = None,

2404 retry: retries.Retry = DEFAULT_RETRY,

2405 timeout: TimeoutType = DEFAULT_TIMEOUT,

2406 ) -> job.LoadJob:

2407 """Starts a job for loading data into a table from Cloud Storage.

2408

2409 See

2410 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationload

2411

2412 Args:

2413 source_uris (Union[str, Sequence[str]]):

2414 URIs of data files to be loaded; in format

2415 ``gs://<bucket_name>/<object_name_or_glob>``.

2416 destination (Union[ \

2417 google.cloud.bigquery.table.Table, \

2418 google.cloud.bigquery.table.TableReference, \

2419 google.cloud.bigquery.table.TableListItem, \

2420 str, \

2421 ]):

2422 Table into which data is to be loaded. If a string is passed

2423 in, this method attempts to create a table reference from a

2424 string using

2425 :func:`google.cloud.bigquery.table.TableReference.from_string`.

2426 job_id (Optional[str]): Name of the job.

2427 job_id_prefix (Optional[str]):

2428 The user-provided prefix for a randomly generated job ID.

2429 This parameter will be ignored if a ``job_id`` is also given.

2430 location (Optional[str]):

2431 Location where to run the job. Must match the location of the

2432 destination table.

2433 project (Optional[str]):

2434 Project ID of the project of where to run the job. Defaults

2435 to the client's project.

2436 job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]):

2437 Extra configuration options for the job.

2438 retry (Optional[google.api_core.retry.Retry]):

2439 How to retry the RPC.

2440 timeout (Optional[float]):

2441 The number of seconds to wait for the underlying HTTP transport

2442 before using ``retry``.

2443

2444 Returns:

2445 google.cloud.bigquery.job.LoadJob: A new load job.

2446

2447 Raises:

2448 TypeError:

2449 If ``job_config`` is not an instance of

2450 :class:`~google.cloud.bigquery.job.LoadJobConfig` class.

2451 """

2452 job_id = _make_job_id(job_id, job_id_prefix)

2453

2454 if project is None:

2455 project = self.project

2456

2457 if location is None:

2458 location = self.location

2459

2460 job_ref = job._JobReference(job_id, project=project, location=location)

2461

2462 if isinstance(source_uris, str):

2463 source_uris = [source_uris]

2464

2465 destination = _table_arg_to_table_ref(destination, default_project=self.project)

2466

2467 if job_config is not None:

2468 _verify_job_config_type(job_config, LoadJobConfig)

2469 else:

2470 job_config = job.LoadJobConfig()

2471

2472 new_job_config = job_config._fill_from_default(self._default_load_job_config)

2473

2474 load_job = job.LoadJob(job_ref, source_uris, destination, self, new_job_config)

2475 load_job._begin(retry=retry, timeout=timeout)

2476

2477 return load_job

2478

2479 def load_table_from_file(

2480 self,

2481 file_obj: IO[bytes],

2482 destination: Union[Table, TableReference, TableListItem, str],

2483 rewind: bool = False,

2484 size: Optional[int] = None,

2485 num_retries: int = _DEFAULT_NUM_RETRIES,

2486 job_id: Optional[str] = None,

2487 job_id_prefix: Optional[str] = None,

2488 location: Optional[str] = None,

2489 project: Optional[str] = None,

2490 job_config: Optional[LoadJobConfig] = None,

2491 timeout: ResumableTimeoutType = DEFAULT_TIMEOUT,

2492 ) -> job.LoadJob:

2493 """Upload the contents of this table from a file-like object.

2494

2495 Similar to :meth:`load_table_from_uri`, this method creates, starts and

2496 returns a :class:`~google.cloud.bigquery.job.LoadJob`.

2497

2498 Args:

2499 file_obj (IO[bytes]):

2500 A file handle opened in binary mode for reading.

2501 destination (Union[Table, \

2502 TableReference, \

2503 TableListItem, \

2504 str \

2505 ]):

2506 Table into which data is to be loaded. If a string is passed

2507 in, this method attempts to create a table reference from a

2508 string using

2509 :func:`google.cloud.bigquery.table.TableReference.from_string`.

2510 rewind (Optional[bool]):

2511 If True, seek to the beginning of the file handle before

2512 reading the file. Defaults to False.

2513 size (Optional[int]):

2514 The number of bytes to read from the file handle. If size is

2515 ``None`` or large, resumable upload will be used. Otherwise,

2516 multipart upload will be used.

2517 num_retries (Optional[int]): Number of upload retries. Defaults to 6.

2518 job_id (Optional[str]): Name of the job.

2519 job_id_prefix (Optional[str]):

2520 The user-provided prefix for a randomly generated job ID.

2521 This parameter will be ignored if a ``job_id`` is also given.

2522 location (Optional[str]):

2523 Location where to run the job. Must match the location of the

2524 destination table.

2525 project (Optional[str]):

2526 Project ID of the project of where to run the job. Defaults

2527 to the client's project.

2528 job_config (Optional[LoadJobConfig]):

2529 Extra configuration options for the job.

2530 timeout (Optional[float]):

2531 The number of seconds to wait for the underlying HTTP transport

2532 before using ``retry``. Depending on the retry strategy, a request

2533 may be repeated several times using the same timeout each time.

2534 Defaults to None.

2535

2536 Can also be passed as a tuple (connect_timeout, read_timeout).

2537 See :meth:`requests.Session.request` documentation for details.

2538

2539 Returns:

2540 google.cloud.bigquery.job.LoadJob: A new load job.

2541

2542 Raises:

2543 ValueError:

2544 If ``size`` is not passed in and can not be determined, or if

2545 the ``file_obj`` can be detected to be a file opened in text

2546 mode.

2547

2548 TypeError:

2549 If ``job_config`` is not an instance of

2550 :class:`~google.cloud.bigquery.job.LoadJobConfig` class.

2551 """

2552 job_id = _make_job_id(job_id, job_id_prefix)

2553

2554 if project is None:

2555 project = self.project

2556

2557 if location is None:

2558 location = self.location

2559

2560 destination = _table_arg_to_table_ref(destination, default_project=self.project)

2561 job_ref = job._JobReference(job_id, project=project, location=location)

2562

2563 if job_config is not None:

2564 _verify_job_config_type(job_config, LoadJobConfig)

2565 else:

2566 job_config = job.LoadJobConfig()

2567

2568 new_job_config = job_config._fill_from_default(self._default_load_job_config)

2569

2570 load_job = job.LoadJob(job_ref, None, destination, self, new_job_config)

2571 job_resource = load_job.to_api_repr()

2572

2573 if rewind:

2574 file_obj.seek(0, os.SEEK_SET)

2575

2576 _check_mode(file_obj)

2577

2578 try:

2579 if size is None or size >= _MAX_MULTIPART_SIZE:

2580 response = self._do_resumable_upload(

2581 file_obj, job_resource, num_retries, timeout, project=project

2582 )

2583 else:

2584 response = self._do_multipart_upload(

2585 file_obj, job_resource, size, num_retries, timeout, project=project

2586 )

2587 except resumable_media.InvalidResponse as exc:

2588 raise exceptions.from_http_response(exc.response)

2589

2590 return typing.cast(LoadJob, self.job_from_resource(response.json()))

2591

2592 def load_table_from_dataframe(

2593 self,

2594 dataframe: "pandas.DataFrame", # type: ignore

2595 destination: Union[Table, TableReference, str],

2596 num_retries: int = _DEFAULT_NUM_RETRIES,

2597 job_id: Optional[str] = None,

2598 job_id_prefix: Optional[str] = None,

2599 location: Optional[str] = None,

2600 project: Optional[str] = None,

2601 job_config: Optional[LoadJobConfig] = None,

2602 parquet_compression: str = "snappy",

2603 timeout: ResumableTimeoutType = DEFAULT_TIMEOUT,

2604 ) -> job.LoadJob:

2605 """Upload the contents of a table from a pandas DataFrame.

2606

2607 Similar to :meth:`load_table_from_uri`, this method creates, starts and

2608 returns a :class:`~google.cloud.bigquery.job.LoadJob`.

2609

2610 .. note::

2611

2612 REPEATED fields are NOT supported when using the CSV source format.

2613 They are supported when using the PARQUET source format, but

2614 due to the way they are encoded in the ``parquet`` file,

2615 a mismatch with the existing table schema can occur, so

2616 REPEATED fields are not properly supported when using ``pyarrow<4.0.0``

2617 using the parquet format.

2618

2619 https://github.com/googleapis/python-bigquery/issues/19

2620

2621 Args:

2622 dataframe (pandas.Dataframe):

2623 A :class:`~pandas.DataFrame` containing the data to load.

2624 destination (Union[ \

2625 Table, \

2626 TableReference, \

2627 str \

2628 ]):

2629 The destination table to use for loading the data. If it is an

2630 existing table, the schema of the :class:`~pandas.DataFrame`

2631 must match the schema of the destination table. If the table

2632 does not yet exist, the schema is inferred from the

2633 :class:`~pandas.DataFrame`.

2634

2635 If a string is passed in, this method attempts to create a

2636 table reference from a string using

2637 :func:`google.cloud.bigquery.table.TableReference.from_string`.

2638 num_retries (Optional[int]): Number of upload retries. Defaults to 6.

2639 job_id (Optional[str]): Name of the job.

2640 job_id_prefix (Optional[str]):

2641 The user-provided prefix for a randomly generated

2642 job ID. This parameter will be ignored if a ``job_id`` is

2643 also given.

2644 location (Optional[str]):

2645 Location where to run the job. Must match the location of the

2646 destination table.

2647 project (Optional[str]):

2648 Project ID of the project of where to run the job. Defaults

2649 to the client's project.

2650 job_config (Optional[LoadJobConfig]):

2651 Extra configuration options for the job.

2652

2653 To override the default pandas data type conversions, supply

2654 a value for

2655 :attr:`~google.cloud.bigquery.job.LoadJobConfig.schema` with

2656 column names matching those of the dataframe. The BigQuery

2657 schema is used to determine the correct data type conversion.

2658 Indexes are not loaded.

2659

2660 By default, this method uses the parquet source format. To

2661 override this, supply a value for

2662 :attr:`~google.cloud.bigquery.job.LoadJobConfig.source_format`

2663 with the format name. Currently only

2664 :attr:`~google.cloud.bigquery.job.SourceFormat.CSV` and

2665 :attr:`~google.cloud.bigquery.job.SourceFormat.PARQUET` are

2666 supported.

2667 parquet_compression (Optional[str]):

2668 [Beta] The compression method to use if intermittently

2669 serializing ``dataframe`` to a parquet file.

2670 Defaults to "snappy".

2671

2672 The argument is directly passed as the ``compression``

2673 argument to the underlying ``pyarrow.parquet.write_table()``

2674 method (the default value "snappy" gets converted to uppercase).

2675 https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table

2676

2677 If the job config schema is missing, the argument is directly

2678 passed as the ``compression`` argument to the underlying

2679 ``DataFrame.to_parquet()`` method.

2680 https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet

2681 timeout (Optional[flaot]):

2682 The number of seconds to wait for the underlying HTTP transport

2683 before using ``retry``. Depending on the retry strategy, a request may

2684 be repeated several times using the same timeout each time.

2685 Defaults to None.

2686

2687 Can also be passed as a tuple (connect_timeout, read_timeout).

2688 See :meth:`requests.Session.request` documentation for details.

2689

2690 Returns:

2691 google.cloud.bigquery.job.LoadJob: A new load job.

2692

2693 Raises:

2694 ValueError:

2695 If a usable parquet engine cannot be found. This method

2696 requires :mod:`pyarrow` to be installed.

2697 TypeError:

2698 If ``job_config`` is not an instance of

2699 :class:`~google.cloud.bigquery.job.LoadJobConfig` class.

2700 """

2701 job_id = _make_job_id(job_id, job_id_prefix)

2702

2703 if job_config is not None:

2704 _verify_job_config_type(job_config, LoadJobConfig)

2705 else:

2706 job_config = job.LoadJobConfig()

2707

2708 new_job_config = job_config._fill_from_default(self._default_load_job_config)

2709

2710 supported_formats = {job.SourceFormat.CSV, job.SourceFormat.PARQUET}

2711 if new_job_config.source_format is None:

2712 # default value

2713 new_job_config.source_format = job.SourceFormat.PARQUET

2714

2715 if (

2716 new_job_config.source_format == job.SourceFormat.PARQUET

2717 and new_job_config.parquet_options is None

2718 ):

2719 parquet_options = ParquetOptions()

2720 # default value

2721 parquet_options.enable_list_inference = True

2722 new_job_config.parquet_options = parquet_options

2723

2724 if new_job_config.source_format not in supported_formats:

2725 raise ValueError(

2726 "Got unexpected source_format: '{}'. Currently, only PARQUET and CSV are supported".format(

2727 new_job_config.source_format

2728 )

2729 )

2730

2731 if pyarrow is None and new_job_config.source_format == job.SourceFormat.PARQUET:

2732 # pyarrow is now the only supported parquet engine.

2733 raise ValueError("This method requires pyarrow to be installed")

2734

2735 if location is None:

2736 location = self.location

2737

2738 # If table schema is not provided, we try to fetch the existing table

2739 # schema, and check if dataframe schema is compatible with it - except

2740 # for WRITE_TRUNCATE jobs, the existing schema does not matter then.

2741 if (

2742 not new_job_config.schema

2743 and new_job_config.write_disposition != job.WriteDisposition.WRITE_TRUNCATE

2744 ):

2745 try:

2746 table = self.get_table(destination)

2747 except core_exceptions.NotFound:

2748 pass

2749 else:

2750 columns_and_indexes = frozenset(

2751 name

2752 for name, _ in _pandas_helpers.list_columns_and_indexes(dataframe)

2753 )

2754 new_job_config.schema = [

2755 # Field description and policy tags are not needed to

2756 # serialize a data frame.

2757 SchemaField(

2758 field.name,

2759 field.field_type,

2760 mode=field.mode,

2761 fields=field.fields,

2762 )

2763 # schema fields not present in the dataframe are not needed

2764 for field in table.schema

2765 if field.name in columns_and_indexes

2766 ]

2767

2768 new_job_config.schema = _pandas_helpers.dataframe_to_bq_schema(

2769 dataframe, new_job_config.schema

2770 )

2771

2772 if not new_job_config.schema:

2773 # the schema could not be fully detected

2774 warnings.warn(

2775 "Schema could not be detected for all columns. Loading from a "

2776 "dataframe without a schema will be deprecated in the future, "

2777 "please provide a schema.",

2778 PendingDeprecationWarning,

2779 stacklevel=2,

2780 )

2781

2782 tmpfd, tmppath = tempfile.mkstemp(

2783 suffix="_job_{}.{}".format(job_id[:8], new_job_config.source_format.lower())

2784 )

2785 os.close(tmpfd)

2786

2787 try:

2788 if new_job_config.source_format == job.SourceFormat.PARQUET:

2789 if new_job_config.schema:

2790 if parquet_compression == "snappy": # adjust the default value

2791 parquet_compression = parquet_compression.upper()

2792

2793 _pandas_helpers.dataframe_to_parquet(

2794 dataframe,

2795 new_job_config.schema,

2796 tmppath,

2797 parquet_compression=parquet_compression,

2798 parquet_use_compliant_nested_type=True,

2799 )

2800 else:

2801 dataframe.to_parquet(

2802 tmppath,

2803 engine="pyarrow",

2804 compression=parquet_compression,

2805 **(

2806 {"use_compliant_nested_type": True}

2807 if _versions_helpers.PYARROW_VERSIONS.use_compliant_nested_type

2808 else {}

2809 ),

2810 )

2811

2812 else:

2813 dataframe.to_csv(

2814 tmppath,

2815 index=False,

2816 header=False,

2817 encoding="utf-8",

2818 float_format="%.17g",

2819 date_format="%Y-%m-%d %H:%M:%S.%f",

2820 )

2821

2822 with open(tmppath, "rb") as tmpfile:

2823 file_size = os.path.getsize(tmppath)

2824 return self.load_table_from_file(

2825 tmpfile,

2826 destination,

2827 num_retries=num_retries,

2828 rewind=True,

2829 size=file_size,

2830 job_id=job_id,

2831 job_id_prefix=job_id_prefix,

2832 location=location,

2833 project=project,

2834 job_config=new_job_config,

2835 timeout=timeout,

2836 )

2837

2838 finally:

2839 os.remove(tmppath)

2840

2841 def load_table_from_json(

2842 self,

2843 json_rows: Iterable[Dict[str, Any]],

2844 destination: Union[Table, TableReference, TableListItem, str],

2845 num_retries: int = _DEFAULT_NUM_RETRIES,

2846 job_id: Optional[str] = None,

2847 job_id_prefix: Optional[str] = None,

2848 location: Optional[str] = None,

2849 project: Optional[str] = None,

2850 job_config: Optional[LoadJobConfig] = None,

2851 timeout: ResumableTimeoutType = DEFAULT_TIMEOUT,

2852 ) -> job.LoadJob:

2853 """Upload the contents of a table from a JSON string or dict.

2854

2855 Args:

2856 json_rows (Iterable[Dict[str, Any]]):

2857 Row data to be inserted. Keys must match the table schema fields

2858 and values must be JSON-compatible representations.

2859

2860 .. note::

2861

2862 If your data is already a newline-delimited JSON string,

2863 it is best to wrap it into a file-like object and pass it

2864 to :meth:`~google.cloud.bigquery.client.Client.load_table_from_file`::

2865

2866 import io

2867 from google.cloud import bigquery

2868

2869 data = u'{"foo": "bar"}'

2870 data_as_file = io.StringIO(data)

2871

2872 client = bigquery.Client()

2873 client.load_table_from_file(data_as_file, ...)

2874

2875 destination (Union[ \

2876 Table, \

2877 TableReference, \

2878 TableListItem, \

2879 str \

2880 ]):

2881 Table into which data is to be loaded. If a string is passed

2882 in, this method attempts to create a table reference from a

2883 string using

2884 :func:`google.cloud.bigquery.table.TableReference.from_string`.

2885 num_retries (Optional[int]): Number of upload retries. Defaults to 6.

2886 job_id (Optional[str]): Name of the job.

2887 job_id_prefix (Optional[str]):

2888 The user-provided prefix for a randomly generated job ID.

2889 This parameter will be ignored if a ``job_id`` is also given.

2890 location (Optional[str]):

2891 Location where to run the job. Must match the location of the

2892 destination table.

2893 project (Optional[str]):

2894 Project ID of the project of where to run the job. Defaults

2895 to the client's project.

2896 job_config (Optional[LoadJobConfig]):

2897 Extra configuration options for the job. The ``source_format``

2898 setting is always set to

2899 :attr:`~google.cloud.bigquery.job.SourceFormat.NEWLINE_DELIMITED_JSON`.

2900 timeout (Optional[float]):

2901 The number of seconds to wait for the underlying HTTP transport

2902 before using ``retry``. Depending on the retry strategy, a request may

2903 be repeated several times using the same timeout each time.

2904 Defaults to None.

2905

2906 Can also be passed as a tuple (connect_timeout, read_timeout).

2907 See :meth:`requests.Session.request` documentation for details.

2908

2909 Returns:

2910 google.cloud.bigquery.job.LoadJob: A new load job.

2911

2912 Raises:

2913 TypeError:

2914 If ``job_config`` is not an instance of

2915 :class:`~google.cloud.bigquery.job.LoadJobConfig` class.

2916 """

2917 job_id = _make_job_id(job_id, job_id_prefix)

2918

2919 if job_config is not None:

2920 _verify_job_config_type(job_config, LoadJobConfig)

2921 else:

2922 job_config = job.LoadJobConfig()

2923

2924 new_job_config = job_config._fill_from_default(self._default_load_job_config)

2925

2926 new_job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON

2927

2928 # In specific conditions, we check if the table alread exists, and/or

2929 # set the autodetect value for the user. For exact conditions, see table

2930 # https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297

2931 if new_job_config.schema is None and new_job_config.autodetect is None:

2932 if new_job_config.write_disposition in (

2933 job.WriteDisposition.WRITE_TRUNCATE,

2934 job.WriteDisposition.WRITE_EMPTY,

2935 ):

2936 new_job_config.autodetect = True

2937 else:

2938 try:

2939 self.get_table(destination)

2940 except core_exceptions.NotFound:

2941 new_job_config.autodetect = True

2942 else:

2943 new_job_config.autodetect = False

2944

2945 if project is None:

2946 project = self.project

2947

2948 if location is None:

2949 location = self.location

2950

2951 destination = _table_arg_to_table_ref(destination, default_project=self.project)

2952

2953 data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows)

2954 encoded_str = data_str.encode()

2955 data_file = io.BytesIO(encoded_str)

2956 return self.load_table_from_file(

2957 data_file,

2958 destination,

2959 size=len(encoded_str),

2960 num_retries=num_retries,

2961 job_id=job_id,

2962 job_id_prefix=job_id_prefix,

2963 location=location,

2964 project=project,

2965 job_config=new_job_config,

2966 timeout=timeout,

2967 )

2968

2969 def _do_resumable_upload(

2970 self,

2971 stream: IO[bytes],

2972 metadata: Mapping[str, str],

2973 num_retries: int,

2974 timeout: Optional[ResumableTimeoutType],

2975 project: Optional[str] = None,

2976 ) -> "requests.Response":

2977 """Perform a resumable upload.

2978

2979 Args:

2980 stream (IO[bytes]): A bytes IO object open for reading.

2981 metadata (Mapping[str, str]): The metadata associated with the upload.

2982 num_retries (int):

2983 Number of upload retries. (Deprecated: This

2984 argument will be removed in a future release.)

2985 timeout (Optional[float]):

2986 The number of seconds to wait for the underlying HTTP transport

2987 before using ``retry``. Depending on the retry strategy, a request may

2988 be repeated several times using the same timeout each time.

2989

2990 Can also be passed as a tuple (connect_timeout, read_timeout).

2991 See :meth:`requests.Session.request` documentation for details.

2992 project (Optional[str]):

2993 Project ID of the project of where to run the upload. Defaults

2994 to the client's project.

2995

2996 Returns:

2997 The "200 OK" response object returned after the final chunk

2998 is uploaded.

2999 """

3000 upload, transport = self._initiate_resumable_upload(

3001 stream, metadata, num_retries, timeout, project=project

3002 )

3003

3004 while not upload.finished:

3005 response = upload.transmit_next_chunk(transport, timeout=timeout)

3006

3007 return response

3008

3009 def _initiate_resumable_upload(

3010 self,

3011 stream: IO[bytes],

3012 metadata: Mapping[str, str],

3013 num_retries: int,

3014 timeout: Optional[ResumableTimeoutType],

3015 project: Optional[str] = None,

3016 ):

3017 """Initiate a resumable upload.

3018

3019 Args:

3020 stream (IO[bytes]): A bytes IO object open for reading.

3021 metadata (Mapping[str, str]): The metadata associated with the upload.

3022 num_retries (int):

3023 Number of upload retries. (Deprecated: This

3024 argument will be removed in a future release.)

3025 timeout (Optional[float]):

3026 The number of seconds to wait for the underlying HTTP transport

3027 before using ``retry``. Depending on the retry strategy, a request may

3028 be repeated several times using the same timeout each time.

3029

3030 Can also be passed as a tuple (connect_timeout, read_timeout).

3031 See :meth:`requests.Session.request` documentation for details.

3032 project (Optional[str]):

3033 Project ID of the project of where to run the upload. Defaults

3034 to the client's project.

3035

3036 Returns:

3037 Tuple:

3038 Pair of

3039

3040 * The :class:`~google.resumable_media.requests.ResumableUpload`

3041 that was created

3042 * The ``transport`` used to initiate the upload.

3043 """

3044 chunk_size = _DEFAULT_CHUNKSIZE

3045 transport = self._http

3046 headers = _get_upload_headers(self._connection.user_agent)

3047

3048 if project is None:

3049 project = self.project

3050 # TODO: Increase the minimum version of google-cloud-core to 1.6.0

3051 # and remove this logic. See:

3052 # https://github.com/googleapis/python-bigquery/issues/509

3053 hostname = (

3054 self._connection.API_BASE_URL

3055 if not hasattr(self._connection, "get_api_base_url_for_mtls")

3056 else self._connection.get_api_base_url_for_mtls()

3057 )

3058 upload_url = _RESUMABLE_URL_TEMPLATE.format(host=hostname, project=project)

3059

3060 # TODO: modify ResumableUpload to take a retry.Retry object

3061 # that it can use for the initial RPC.

3062 upload = ResumableUpload(upload_url, chunk_size, headers=headers)

3063

3064 if num_retries is not None:

3065 upload._retry_strategy = resumable_media.RetryStrategy(

3066 max_retries=num_retries

3067 )

3068

3069 upload.initiate(

3070 transport,

3071 stream,

3072 metadata,

3073 _GENERIC_CONTENT_TYPE,

3074 stream_final=False,

3075 timeout=timeout,

3076 )

3077

3078 return upload, transport

3079

3080 def _do_multipart_upload(

3081 self,

3082 stream: IO[bytes],

3083 metadata: Mapping[str, str],

3084 size: int,

3085 num_retries: int,

3086 timeout: Optional[ResumableTimeoutType],

3087 project: Optional[str] = None,

3088 ):

3089 """Perform a multipart upload.

3090

3091 Args:

3092 stream (IO[bytes]): A bytes IO object open for reading.

3093 metadata (Mapping[str, str]): The metadata associated with the upload.

3094 size (int):

3095 The number of bytes to be uploaded (which will be read

3096 from ``stream``). If not provided, the upload will be

3097 concluded once ``stream`` is exhausted (or :data:`None`).

3098 num_retries (int):

3099 Number of upload retries. (Deprecated: This

3100 argument will be removed in a future release.)

3101 timeout (Optional[float]):

3102 The number of seconds to wait for the underlying HTTP transport

3103 before using ``retry``. Depending on the retry strategy, a request may

3104 be repeated several times using the same timeout each time.

3105

3106 Can also be passed as a tuple (connect_timeout, read_timeout).

3107 See :meth:`requests.Session.request` documentation for details.

3108 project (Optional[str]):

3109 Project ID of the project of where to run the upload. Defaults

3110 to the client's project.

3111

3112 Returns:

3113 requests.Response:

3114 The "200 OK" response object returned after the multipart

3115 upload request.

3116

3117 Raises:

3118 ValueError:

3119 if the ``stream`` has fewer than ``size``

3120 bytes remaining.

3121 """

3122 data = stream.read(size)

3123 if len(data) < size:

3124 msg = _READ_LESS_THAN_SIZE.format(size, len(data))

3125 raise ValueError(msg)

3126

3127 headers = _get_upload_headers(self._connection.user_agent)

3128

3129 if project is None:

3130 project = self.project

3131

3132 # TODO: Increase the minimum version of google-cloud-core to 1.6.0

3133 # and remove this logic. See:

3134 # https://github.com/googleapis/python-bigquery/issues/509

3135 hostname = (

3136 self._connection.API_BASE_URL

3137 if not hasattr(self._connection, "get_api_base_url_for_mtls")

3138 else self._connection.get_api_base_url_for_mtls()

3139 )

3140 upload_url = _MULTIPART_URL_TEMPLATE.format(host=hostname, project=project)

3141 upload = MultipartUpload(upload_url, headers=headers)

3142

3143 if num_retries is not None:

3144 upload._retry_strategy = resumable_media.RetryStrategy(

3145 max_retries=num_retries

3146 )

3147

3148 response = upload.transmit(

3149 self._http, data, metadata, _GENERIC_CONTENT_TYPE, timeout=timeout

3150 )

3151

3152 return response

3153

3154 def copy_table(

3155 self,

3156 sources: Union[

3157 Table,

3158 TableReference,

3159 TableListItem,

3160 str,

3161 Sequence[Union[Table, TableReference, TableListItem, str]],

3162 ],

3163 destination: Union[Table, TableReference, TableListItem, str],

3164 job_id: Optional[str] = None,

3165 job_id_prefix: Optional[str] = None,

3166 location: Optional[str] = None,

3167 project: Optional[str] = None,

3168 job_config: Optional[CopyJobConfig] = None,

3169 retry: retries.Retry = DEFAULT_RETRY,

3170 timeout: TimeoutType = DEFAULT_TIMEOUT,

3171 ) -> job.CopyJob:

3172 """Copy one or more tables to another table.

3173

3174 See

3175 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationtablecopy

3176

3177 Args:

3178 sources (Union[ \

3179 google.cloud.bigquery.table.Table, \

3180 google.cloud.bigquery.table.TableReference, \

3181 google.cloud.bigquery.table.TableListItem, \

3182 str, \

3183 Sequence[ \

3184 Union[ \

3185 google.cloud.bigquery.table.Table, \

3186 google.cloud.bigquery.table.TableReference, \

3187 google.cloud.bigquery.table.TableListItem, \

3188 str, \

3189 ] \

3190 ], \

3191 ]):

3192 Table or tables to be copied.

3193 destination (Union[ \

3194 google.cloud.bigquery.table.Table, \

3195 google.cloud.bigquery.table.TableReference, \

3196 google.cloud.bigquery.table.TableListItem, \

3197 str, \

3198 ]):

3199 Table into which data is to be copied.

3200 job_id (Optional[str]): The ID of the job.

3201 job_id_prefix (Optional[str]):

3202 The user-provided prefix for a randomly generated job ID.

3203 This parameter will be ignored if a ``job_id`` is also given.

3204 location (Optional[str]):

3205 Location where to run the job. Must match the location of any

3206 source table as well as the destination table.

3207 project (Optional[str]):

3208 Project ID of the project of where to run the job. Defaults

3209 to the client's project.

3210 job_config (Optional[google.cloud.bigquery.job.CopyJobConfig]):

3211 Extra configuration options for the job.

3212 retry (Optional[google.api_core.retry.Retry]):

3213 How to retry the RPC.

3214 timeout (Optional[float]):

3215 The number of seconds to wait for the underlying HTTP transport

3216 before using ``retry``.

3217

3218 Returns:

3219 google.cloud.bigquery.job.CopyJob: A new copy job instance.

3220

3221 Raises:

3222 TypeError:

3223 If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.CopyJobConfig`

3224 class.

3225 """

3226 job_id = _make_job_id(job_id, job_id_prefix)

3227

3228 if project is None:

3229 project = self.project

3230

3231 if location is None:

3232 location = self.location

3233

3234 job_ref = job._JobReference(job_id, project=project, location=location)

3235

3236 # sources can be one of many different input types. (string, Table,

3237 # TableReference, or a sequence of any of those.) Convert them all to a

3238 # list of TableReferences.

3239 #

3240 # _table_arg_to_table_ref leaves lists unmodified.

3241 sources = _table_arg_to_table_ref(sources, default_project=self.project)

3242

3243 if not isinstance(sources, collections_abc.Sequence):

3244 sources = [sources]

3245

3246 sources = [

3247 _table_arg_to_table_ref(source, default_project=self.project)

3248 for source in sources

3249 ]

3250

3251 destination = _table_arg_to_table_ref(destination, default_project=self.project)

3252

3253 if job_config:

3254 _verify_job_config_type(job_config, google.cloud.bigquery.job.CopyJobConfig)

3255 job_config = copy.deepcopy(job_config)

3256

3257 copy_job = job.CopyJob(

3258 job_ref, sources, destination, client=self, job_config=job_config

3259 )

3260 copy_job._begin(retry=retry, timeout=timeout)

3261

3262 return copy_job

3263

3264 def extract_table(

3265 self,

3266 source: Union[Table, TableReference, TableListItem, Model, ModelReference, str],

3267 destination_uris: Union[str, Sequence[str]],

3268 job_id: Optional[str] = None,

3269 job_id_prefix: Optional[str] = None,

3270 location: Optional[str] = None,

3271 project: Optional[str] = None,

3272 job_config: Optional[ExtractJobConfig] = None,

3273 retry: retries.Retry = DEFAULT_RETRY,

3274 timeout: TimeoutType = DEFAULT_TIMEOUT,

3275 source_type: str = "Table",

3276 ) -> job.ExtractJob:

3277 """Start a job to extract a table into Cloud Storage files.

3278

3279 See

3280 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationextract

3281

3282 Args:

3283 source (Union[ \

3284 google.cloud.bigquery.table.Table, \

3285 google.cloud.bigquery.table.TableReference, \

3286 google.cloud.bigquery.table.TableListItem, \

3287 google.cloud.bigquery.model.Model, \

3288 google.cloud.bigquery.model.ModelReference, \

3289 src, \

3290 ]):

3291 Table or Model to be extracted.

3292 destination_uris (Union[str, Sequence[str]]):

3293 URIs of Cloud Storage file(s) into which table data is to be

3294 extracted; in format

3295 ``gs://<bucket_name>/<object_name_or_glob>``.

3296 job_id (Optional[str]): The ID of the job.

3297 job_id_prefix (Optional[str]):

3298 The user-provided prefix for a randomly generated job ID.

3299 This parameter will be ignored if a ``job_id`` is also given.

3300 location (Optional[str]):

3301 Location where to run the job. Must match the location of the

3302 source table.

3303 project (Optional[str]):

3304 Project ID of the project of where to run the job. Defaults

3305 to the client's project.

3306 job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]):

3307 Extra configuration options for the job.

3308 retry (Optional[google.api_core.retry.Retry]):

3309 How to retry the RPC.

3310 timeout (Optional[float]):

3311 The number of seconds to wait for the underlying HTTP transport

3312 before using ``retry``.

3313 source_type (Optional[str]):

3314 Type of source to be extracted.``Table`` or ``Model``. Defaults to ``Table``.

3315 Returns:

3316 google.cloud.bigquery.job.ExtractJob: A new extract job instance.

3317

3318 Raises:

3319 TypeError:

3320 If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.ExtractJobConfig`

3321 class.

3322 ValueError:

3323 If ``source_type`` is not among ``Table``,``Model``.

3324 """

3325 job_id = _make_job_id(job_id, job_id_prefix)

3326

3327 if project is None:

3328 project = self.project

3329

3330 if location is None:

3331 location = self.location

3332

3333 job_ref = job._JobReference(job_id, project=project, location=location)

3334 src = source_type.lower()

3335 if src == "table":

3336 source = _table_arg_to_table_ref(source, default_project=self.project)

3337 elif src == "model":

3338 source = _model_arg_to_model_ref(source, default_project=self.project)

3339 else:

3340 raise ValueError(

3341 "Cannot pass `{}` as a ``source_type``, pass Table or Model".format(

3342 source_type

3343 )

3344 )

3345

3346 if isinstance(destination_uris, str):

3347 destination_uris = [destination_uris]

3348

3349 if job_config:

3350 _verify_job_config_type(

3351 job_config, google.cloud.bigquery.job.ExtractJobConfig

3352 )

3353 job_config = copy.deepcopy(job_config)

3354

3355 extract_job = job.ExtractJob(

3356 job_ref, source, destination_uris, client=self, job_config=job_config

3357 )

3358 extract_job._begin(retry=retry, timeout=timeout)

3359

3360 return extract_job

3361

3362 def query(

3363 self,

3364 query: str,

3365 job_config: Optional[QueryJobConfig] = None,

3366 job_id: Optional[str] = None,

3367 job_id_prefix: Optional[str] = None,

3368 location: Optional[str] = None,

3369 project: Optional[str] = None,

3370 retry: retries.Retry = DEFAULT_RETRY,

3371 timeout: TimeoutType = DEFAULT_TIMEOUT,

3372 job_retry: retries.Retry = DEFAULT_JOB_RETRY,

3373 api_method: Union[str, enums.QueryApiMethod] = enums.QueryApiMethod.INSERT,

3374 ) -> job.QueryJob:

3375 """Run a SQL query.

3376

3377 See

3378 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationquery

3379

3380 Args:

3381 query (str):

3382 SQL query to be executed. Defaults to the standard SQL

3383 dialect. Use the ``job_config`` parameter to change dialects.

3384 job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):

3385 Extra configuration options for the job.

3386 To override any options that were previously set in

3387 the ``default_query_job_config`` given to the

3388 ``Client`` constructor, manually set those options to ``None``,

3389 or whatever value is preferred.

3390 job_id (Optional[str]): ID to use for the query job.

3391 job_id_prefix (Optional[str]):

3392 The prefix to use for a randomly generated job ID. This parameter

3393 will be ignored if a ``job_id`` is also given.

3394 location (Optional[str]):

3395 Location where to run the job. Must match the location of the

3396 table used in the query as well as the destination table.

3397 project (Optional[str]):

3398 Project ID of the project of where to run the job. Defaults

3399 to the client's project.

3400 retry (Optional[google.api_core.retry.Retry]):

3401 How to retry the RPC. This only applies to making RPC

3402 calls. It isn't used to retry failed jobs. This has

3403 a reasonable default that should only be overridden

3404 with care.

3405 timeout (Optional[float]):

3406 The number of seconds to wait for the underlying HTTP transport

3407 before using ``retry``.

3408 job_retry (Optional[google.api_core.retry.Retry]):

3409 How to retry failed jobs. The default retries

3410 rate-limit-exceeded errors. Passing ``None`` disables

3411 job retry.

3412

3413 Not all jobs can be retried. If ``job_id`` is

3414 provided, then the job returned by the query will not

3415 be retryable, and an exception will be raised if a

3416 non-``None`` (and non-default) value for ``job_retry``

3417 is also provided.

3418

3419 Note that errors aren't detected until ``result()`` is

3420 called on the job returned. The ``job_retry``

3421 specified here becomes the default ``job_retry`` for

3422 ``result()``, where it can also be specified.

3423 api_method (Union[str, enums.QueryApiMethod]):

3424 Method with which to start the query job.

3425

3426 See :class:`google.cloud.bigquery.enums.QueryApiMethod` for

3427 details on the difference between the query start methods.

3428

3429 Returns:

3430 google.cloud.bigquery.job.QueryJob: A new query job instance.

3431

3432 Raises:

3433 TypeError:

3434 If ``job_config`` is not an instance of

3435 :class:`~google.cloud.bigquery.job.QueryJobConfig`

3436 class, or if both ``job_id`` and non-``None`` non-default

3437 ``job_retry`` are provided.

3438 """

3439 job_id_given = job_id is not None

3440 if (

3441 job_id_given

3442 and job_retry is not None

3443 and job_retry is not DEFAULT_JOB_RETRY

3444 ):

3445 raise TypeError(

3446 "`job_retry` was provided, but the returned job is"

3447 " not retryable, because a custom `job_id` was"

3448 " provided."

3449 )

3450

3451 if job_id_given and api_method == enums.QueryApiMethod.QUERY:

3452 raise TypeError(

3453 "`job_id` was provided, but the 'QUERY' `api_method` was requested."

3454 )

3455

3456 if project is None:

3457 project = self.project

3458

3459 if location is None:

3460 location = self.location

3461

3462 if job_config is not None:

3463 _verify_job_config_type(job_config, QueryJobConfig)

3464

3465 job_config = _job_helpers.job_config_with_defaults(

3466 job_config, self._default_query_job_config

3467 )

3468

3469 # Note that we haven't modified the original job_config (or

3470 # _default_query_job_config) up to this point.

3471 if api_method == enums.QueryApiMethod.QUERY:

3472 return _job_helpers.query_jobs_query(

3473 self,

3474 query,

3475 job_config,

3476 location,

3477 project,

3478 retry,

3479 timeout,

3480 job_retry,

3481 )

3482 elif api_method == enums.QueryApiMethod.INSERT:

3483 return _job_helpers.query_jobs_insert(

3484 self,

3485 query,

3486 job_config,

3487 job_id,

3488 job_id_prefix,

3489 location,

3490 project,

3491 retry,

3492 timeout,

3493 job_retry,

3494 )

3495 else:

3496 raise ValueError(f"Got unexpected value for api_method: {repr(api_method)}")

3497

3498 def query_and_wait(

3499 self,

3500 query,

3501 *,

3502 job_config: Optional[QueryJobConfig] = None,

3503 location: Optional[str] = None,

3504 project: Optional[str] = None,

3505 api_timeout: TimeoutType = DEFAULT_TIMEOUT,

3506 wait_timeout: TimeoutType = None,

3507 retry: retries.Retry = DEFAULT_RETRY,

3508 job_retry: retries.Retry = DEFAULT_JOB_RETRY,

3509 page_size: Optional[int] = None,

3510 max_results: Optional[int] = None,

3511 ) -> RowIterator:

3512 """Run the query, wait for it to finish, and return the results.

3513

3514 While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the

3515 ``jobs.query`` REST API, use the default ``jobCreationMode`` unless

3516 the environment variable ``QUERY_PREVIEW_ENABLED=true``. After

3517 ``jobCreationMode`` is GA, this method will always use

3518 ``jobCreationMode=JOB_CREATION_OPTIONAL``. See:

3519 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query

3520

3521 Args:

3522 query (str):

3523 SQL query to be executed. Defaults to the standard SQL

3524 dialect. Use the ``job_config`` parameter to change dialects.

3525 job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):

3526 Extra configuration options for the job.

3527 To override any options that were previously set in

3528 the ``default_query_job_config`` given to the

3529 ``Client`` constructor, manually set those options to ``None``,

3530 or whatever value is preferred.

3531 location (Optional[str]):

3532 Location where to run the job. Must match the location of the

3533 table used in the query as well as the destination table.

3534 project (Optional[str]):

3535 Project ID of the project of where to run the job. Defaults

3536 to the client's project.

3537 api_timeout (Optional[float]):

3538 The number of seconds to wait for the underlying HTTP transport

3539 before using ``retry``.

3540 wait_timeout (Optional[float]):

3541 The number of seconds to wait for the query to finish. If the

3542 query doesn't finish before this timeout, the client attempts

3543 to cancel the query.

3544 retry (Optional[google.api_core.retry.Retry]):

3545 How to retry the RPC. This only applies to making RPC

3546 calls. It isn't used to retry failed jobs. This has

3547 a reasonable default that should only be overridden

3548 with care.

3549 job_retry (Optional[google.api_core.retry.Retry]):

3550 How to retry failed jobs. The default retries

3551 rate-limit-exceeded errors. Passing ``None`` disables

3552 job retry. Not all jobs can be retried.

3553 page_size (Optional[int]):

3554 The maximum number of rows in each page of results from this

3555 request. Non-positive values are ignored.

3556 max_results (Optional[int]):

3557 The maximum total number of rows from this request.

3558

3559 Returns:

3560 google.cloud.bigquery.table.RowIterator:

3561 Iterator of row data

3562 :class:`~google.cloud.bigquery.table.Row`-s. During each

3563 page, the iterator will have the ``total_rows`` attribute

3564 set, which counts the total number of rows **in the result

3565 set** (this is distinct from the total number of rows in the

3566 current page: ``iterator.page.num_items``).

3567

3568 If the query is a special query that produces no results, e.g.

3569 a DDL query, an ``_EmptyRowIterator`` instance is returned.

3570

3571 Raises:

3572 TypeError:

3573 If ``job_config`` is not an instance of

3574 :class:`~google.cloud.bigquery.job.QueryJobConfig`

3575 class.

3576 """

3577 if project is None:

3578 project = self.project

3579

3580 if location is None:

3581 location = self.location

3582

3583 if job_config is not None:

3584 _verify_job_config_type(job_config, QueryJobConfig)

3585

3586 job_config = _job_helpers.job_config_with_defaults(

3587 job_config, self._default_query_job_config

3588 )

3589

3590 return _job_helpers.query_and_wait(

3591 self,

3592 query,

3593 job_config=job_config,

3594 location=location,

3595 project=project,

3596 api_timeout=api_timeout,

3597 wait_timeout=wait_timeout,

3598 retry=retry,

3599 job_retry=job_retry,

3600 page_size=page_size,

3601 max_results=max_results,

3602 )

3603

3604 def insert_rows(

3605 self,

3606 table: Union[Table, TableReference, str],

3607 rows: Union[Iterable[Tuple], Iterable[Mapping[str, Any]]],

3608 selected_fields: Optional[Sequence[SchemaField]] = None,

3609 **kwargs,

3610 ) -> Sequence[Dict[str, Any]]:

3611 """Insert rows into a table via the streaming API.

3612

3613 See

3614 https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll

3615

3616 BigQuery will reject insertAll payloads that exceed a defined limit (10MB).

3617 Additionally, if a payload vastly exceeds this limit, the request is rejected

3618 by the intermediate architecture, which returns a 413 (Payload Too Large) status code.

3619

3620

3621 See

3622 https://cloud.google.com/bigquery/quotas#streaming_inserts

3623

3624 Args:

3625 table (Union[ \

3626 google.cloud.bigquery.table.Table, \

3627 google.cloud.bigquery.table.TableReference, \

3628 str, \

3629 ]):

3630 The destination table for the row data, or a reference to it.

3631 rows (Union[Sequence[Tuple], Sequence[Dict]]):

3632 Row data to be inserted. If a list of tuples is given, each

3633 tuple should contain data for each schema field on the

3634 current table and in the same order as the schema fields. If

3635 a list of dictionaries is given, the keys must include all

3636 required fields in the schema. Keys which do not correspond

3637 to a field in the schema are ignored.

3638 selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]):

3639 The fields to return. Required if ``table`` is a

3640 :class:`~google.cloud.bigquery.table.TableReference`.

3641 kwargs (dict):

3642 Keyword arguments to

3643 :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`.

3644

3645 Returns:

3646 Sequence[Mappings]:

3647 One mapping per row with insert errors: the "index" key

3648 identifies the row, and the "errors" key contains a list of

3649 the mappings describing one or more problems with the row.

3650

3651 Raises:

3652 ValueError: if table's schema is not set or `rows` is not a `Sequence`.

3653 """

3654 if not isinstance(rows, (collections_abc.Sequence, collections_abc.Iterator)):

3655 raise TypeError("rows argument should be a sequence of dicts or tuples")

3656

3657 table = _table_arg_to_table(table, default_project=self.project)

3658

3659 if not isinstance(table, Table):

3660 raise TypeError(_NEED_TABLE_ARGUMENT)

3661

3662 schema = table.schema

3663

3664 # selected_fields can override the table schema.

3665 if selected_fields is not None:

3666 schema = selected_fields

3667

3668 if len(schema) == 0:

3669 raise ValueError(

3670 (

3671 "Could not determine schema for table '{}'. Call client.get_table() "

3672 "or pass in a list of schema fields to the selected_fields argument."

3673 ).format(table)

3674 )

3675

3676 json_rows = [_record_field_to_json(schema, row) for row in rows]

3677

3678 return self.insert_rows_json(table, json_rows, **kwargs)

3679

3680 def insert_rows_from_dataframe(

3681 self,

3682 table: Union[Table, TableReference, str],

3683 dataframe,

3684 selected_fields: Optional[Sequence[SchemaField]] = None,

3685 chunk_size: int = 500,

3686 **kwargs: Dict,

3687 ) -> Sequence[Sequence[dict]]:

3688 """Insert rows into a table from a dataframe via the streaming API.

3689

3690 BigQuery will reject insertAll payloads that exceed a defined limit (10MB).

3691 Additionally, if a payload vastly exceeds this limit, the request is rejected

3692 by the intermediate architecture, which returns a 413 (Payload Too Large) status code.

3693

3694 See

3695 https://cloud.google.com/bigquery/quotas#streaming_inserts

3696

3697 Args:

3698 table (Union[ \

3699 google.cloud.bigquery.table.Table, \

3700 google.cloud.bigquery.table.TableReference, \

3701 str, \

3702 ]):

3703 The destination table for the row data, or a reference to it.

3704 dataframe (pandas.DataFrame):

3705 A :class:`~pandas.DataFrame` containing the data to load. Any

3706 ``NaN`` values present in the dataframe are omitted from the

3707 streaming API request(s).

3708 selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]):

3709 The fields to return. Required if ``table`` is a

3710 :class:`~google.cloud.bigquery.table.TableReference`.

3711 chunk_size (int):

3712 The number of rows to stream in a single chunk. Must be positive.

3713 kwargs (Dict):

3714 Keyword arguments to

3715 :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`.

3716

3717 Returns:

3718 Sequence[Sequence[Mappings]]:

3719 A list with insert errors for each insert chunk. Each element

3720 is a list containing one mapping per row with insert errors:

3721 the "index" key identifies the row, and the "errors" key

3722 contains a list of the mappings describing one or more problems

3723 with the row.

3724

3725 Raises:

3726 ValueError: if table's schema is not set

3727 """

3728 insert_results = []

3729

3730 chunk_count = int(math.ceil(len(dataframe) / chunk_size))

3731 rows_iter = _pandas_helpers.dataframe_to_json_generator(dataframe)

3732

3733 for _ in range(chunk_count):

3734 rows_chunk = itertools.islice(rows_iter, chunk_size)

3735 result = self.insert_rows(table, rows_chunk, selected_fields, **kwargs)

3736 insert_results.append(result)

3737

3738 return insert_results

3739

3740 def insert_rows_json(

3741 self,

3742 table: Union[Table, TableReference, TableListItem, str],

3743 json_rows: Sequence[Mapping[str, Any]],

3744 row_ids: Union[

3745 Iterable[Optional[str]], AutoRowIDs, None

3746 ] = AutoRowIDs.GENERATE_UUID,

3747 skip_invalid_rows: Optional[bool] = None,

3748 ignore_unknown_values: Optional[bool] = None,

3749 template_suffix: Optional[str] = None,

3750 retry: retries.Retry = DEFAULT_RETRY,

3751 timeout: TimeoutType = DEFAULT_TIMEOUT,

3752 ) -> Sequence[dict]:

3753 """Insert rows into a table without applying local type conversions.

3754

3755 See

3756 https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll

3757

3758 BigQuery will reject insertAll payloads that exceed a defined limit (10MB).

3759 Additionally, if a payload vastly exceeds this limit, the request is rejected

3760 by the intermediate architecture, which returns a 413 (Payload Too Large) status code.

3761

3762 See

3763 https://cloud.google.com/bigquery/quotas#streaming_inserts

3764

3765 Args:

3766 table (Union[ \

3767 google.cloud.bigquery.table.Table \

3768 google.cloud.bigquery.table.TableReference, \

3769 google.cloud.bigquery.table.TableListItem, \

3770 str \

3771 ]):

3772 The destination table for the row data, or a reference to it.

3773 json_rows (Sequence[Dict]):

3774 Row data to be inserted. Keys must match the table schema fields

3775 and values must be JSON-compatible representations.

3776 row_ids (Union[Iterable[str], AutoRowIDs, None]):

3777 Unique IDs, one per row being inserted. An ID can also be

3778 ``None``, indicating that an explicit insert ID should **not**

3779 be used for that row. If the argument is omitted altogether,

3780 unique IDs are created automatically.

3781

3782 .. versionchanged:: 2.21.0

3783 Can also be an iterable, not just a sequence, or an

3784 :class:`AutoRowIDs` enum member.

3785

3786 .. deprecated:: 2.21.0

3787 Passing ``None`` to explicitly request autogenerating insert IDs is

3788 deprecated, use :attr:`AutoRowIDs.GENERATE_UUID` instead.

3789

3790 skip_invalid_rows (Optional[bool]):

3791 Insert all valid rows of a request, even if invalid rows exist.

3792 The default value is ``False``, which causes the entire request

3793 to fail if any invalid rows exist.

3794 ignore_unknown_values (Optional[bool]):

3795 Accept rows that contain values that do not match the schema.

3796 The unknown values are ignored. Default is ``False``, which

3797 treats unknown values as errors.

3798 template_suffix (Optional[str]):

3799 Treat ``name`` as a template table and provide a suffix.

3800 BigQuery will create the table ``<name> + <template_suffix>``

3801 based on the schema of the template table. See

3802 https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables

3803 retry (Optional[google.api_core.retry.Retry]):

3804 How to retry the RPC.

3805 timeout (Optional[float]):

3806 The number of seconds to wait for the underlying HTTP transport

3807 before using ``retry``.

3808

3809 Returns:

3810 Sequence[Mappings]:

3811 One mapping per row with insert errors: the "index" key

3812 identifies the row, and the "errors" key contains a list of

3813 the mappings describing one or more problems with the row.

3814

3815 Raises:

3816 TypeError: if `json_rows` is not a `Sequence`.

3817 """

3818 if not isinstance(

3819 json_rows, (collections_abc.Sequence, collections_abc.Iterator)

3820 ):

3821 raise TypeError("json_rows argument should be a sequence of dicts")

3822 # Convert table to just a reference because unlike insert_rows,

3823 # insert_rows_json doesn't need the table schema. It's not doing any

3824 # type conversions.

3825 table = _table_arg_to_table_ref(table, default_project=self.project)

3826 rows_info: List[Any] = []

3827 data: Dict[str, Any] = {"rows": rows_info}

3828

3829 if row_ids is None:

3830 warnings.warn(

3831 "Passing None for row_ids is deprecated. To explicitly request "

3832 "autogenerated insert IDs, use AutoRowIDs.GENERATE_UUID instead",

3833 category=DeprecationWarning,

3834 )

3835 row_ids = AutoRowIDs.GENERATE_UUID

3836

3837 if not isinstance(row_ids, AutoRowIDs):

3838 try:

3839 row_ids_iter = iter(row_ids)

3840 except TypeError:

3841 msg = "row_ids is neither an iterable nor an AutoRowIDs enum member"

3842 raise TypeError(msg)

3843

3844 for i, row in enumerate(json_rows):

3845 info: Dict[str, Any] = {"json": row}

3846

3847 if row_ids is AutoRowIDs.GENERATE_UUID:

3848 info["insertId"] = str(uuid.uuid4())

3849 elif row_ids is AutoRowIDs.DISABLED:

3850 info["insertId"] = None

3851 else:

3852 try:

3853 insert_id = next(row_ids_iter)

3854 except StopIteration:

3855 msg = f"row_ids did not generate enough IDs, error at index {i}"

3856 raise ValueError(msg)

3857 else:

3858 info["insertId"] = insert_id

3859

3860 rows_info.append(info)

3861

3862 if skip_invalid_rows is not None:

3863 data["skipInvalidRows"] = skip_invalid_rows

3864

3865 if ignore_unknown_values is not None:

3866 data["ignoreUnknownValues"] = ignore_unknown_values

3867

3868 if template_suffix is not None:

3869 data["templateSuffix"] = template_suffix

3870

3871 path = "%s/insertAll" % table.path

3872 # We can always retry, because every row has an insert ID.

3873 span_attributes = {"path": path}

3874 response = self._call_api(

3875 retry,

3876 span_name="BigQuery.insertRowsJson",

3877 span_attributes=span_attributes,

3878 method="POST",

3879 path=path,

3880 data=data,

3881 timeout=timeout,

3882 )

3883 errors = []

3884

3885 for error in response.get("insertErrors", ()):

3886 errors.append({"index": int(error["index"]), "errors": error["errors"]})

3887

3888 return errors

3889

3890 def list_partitions(

3891 self,

3892 table: Union[Table, TableReference, TableListItem, str],

3893 retry: retries.Retry = DEFAULT_RETRY,

3894 timeout: TimeoutType = DEFAULT_TIMEOUT,

3895 ) -> Sequence[str]:

3896 """List the partitions in a table.

3897

3898 Args:

3899 table (Union[ \

3900 google.cloud.bigquery.table.Table, \

3901 google.cloud.bigquery.table.TableReference, \

3902 google.cloud.bigquery.table.TableListItem, \

3903 str, \

3904 ]):

3905 The table or reference from which to get partition info

3906 retry (Optional[google.api_core.retry.Retry]):

3907 How to retry the RPC.

3908 timeout (Optional[float]):

3909 The number of seconds to wait for the underlying HTTP transport

3910 before using ``retry``.

3911 If multiple requests are made under the hood, ``timeout``

3912 applies to each individual request.

3913

3914 Returns:

3915 List[str]:

3916 A list of the partition ids present in the partitioned table

3917 """

3918 table = _table_arg_to_table_ref(table, default_project=self.project)

3919 meta_table = self.get_table(

3920 TableReference(

3921 DatasetReference(table.project, table.dataset_id),

3922 "%s$__PARTITIONS_SUMMARY__" % table.table_id,

3923 ),

3924 retry=retry,

3925 timeout=timeout,

3926 )

3927

3928 subset = [col for col in meta_table.schema if col.name == "partition_id"]

3929 return [

3930 row[0]

3931 for row in self.list_rows(

3932 meta_table, selected_fields=subset, retry=retry, timeout=timeout

3933 )

3934 ]

3935

3936 def list_rows(

3937 self,

3938 table: Union[Table, TableListItem, TableReference, str],

3939 selected_fields: Optional[Sequence[SchemaField]] = None,

3940 max_results: Optional[int] = None,

3941 page_token: Optional[str] = None,

3942 start_index: Optional[int] = None,

3943 page_size: Optional[int] = None,

3944 retry: retries.Retry = DEFAULT_RETRY,

3945 timeout: TimeoutType = DEFAULT_TIMEOUT,

3946 ) -> RowIterator:

3947 """List the rows of the table.

3948

3949 See

3950 https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list

3951

3952 .. note::

3953

3954 This method assumes that the provided schema is up-to-date with the

3955 schema as defined on the back-end: if the two schemas are not

3956 identical, the values returned may be incomplete. To ensure that the

3957 local copy of the schema is up-to-date, call ``client.get_table``.

3958

3959 Args:

3960 table (Union[ \

3961 google.cloud.bigquery.table.Table, \

3962 google.cloud.bigquery.table.TableListItem, \

3963 google.cloud.bigquery.table.TableReference, \

3964 str, \

3965 ]):

3966 The table to list, or a reference to it. When the table

3967 object does not contain a schema and ``selected_fields`` is

3968 not supplied, this method calls ``get_table`` to fetch the

3969 table schema.

3970 selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]):

3971 The fields to return. If not supplied, data for all columns

3972 are downloaded.

3973 max_results (Optional[int]):

3974 Maximum number of rows to return.

3975 page_token (Optional[str]):

3976 Token representing a cursor into the table's rows.

3977 If not passed, the API will return the first page of the

3978 rows. The token marks the beginning of the iterator to be

3979 returned and the value of the ``page_token`` can be accessed

3980 at ``next_page_token`` of the

3981 :class:`~google.cloud.bigquery.table.RowIterator`.

3982 start_index (Optional[int]):

3983 The zero-based index of the starting row to read.

3984 page_size (Optional[int]):

3985 The maximum number of rows in each page of results from this request.

3986 Non-positive values are ignored. Defaults to a sensible value set by the API.

3987 retry (Optional[google.api_core.retry.Retry]):

3988 How to retry the RPC.

3989 timeout (Optional[float]):

3990 The number of seconds to wait for the underlying HTTP transport

3991 before using ``retry``.

3992 If multiple requests are made under the hood, ``timeout``

3993 applies to each individual request.

3994

3995 Returns:

3996 google.cloud.bigquery.table.RowIterator:

3997 Iterator of row data

3998 :class:`~google.cloud.bigquery.table.Row`-s. During each

3999 page, the iterator will have the ``total_rows`` attribute

4000 set, which counts the total number of rows **in the table**

4001 (this is distinct from the total number of rows in the

4002 current page: ``iterator.page.num_items``).

4003 """

4004 table = _table_arg_to_table(table, default_project=self.project)

4005

4006 if not isinstance(table, Table):

4007 raise TypeError(_NEED_TABLE_ARGUMENT)

4008

4009 schema = table.schema

4010

4011 # selected_fields can override the table schema.

4012 if selected_fields is not None:

4013 schema = selected_fields

4014

4015 # No schema, but no selected_fields. Assume the developer wants all

4016 # columns, so get the table resource for them rather than failing.

4017 elif len(schema) == 0:

4018 table = self.get_table(table.reference, retry=retry, timeout=timeout)

4019 schema = table.schema

4020

4021 params: Dict[str, Any] = {}

4022 if selected_fields is not None:

4023 params["selectedFields"] = ",".join(field.name for field in selected_fields)

4024 if start_index is not None:

4025 params["startIndex"] = start_index

4026

4027 params["formatOptions.useInt64Timestamp"] = True

4028 row_iterator = RowIterator(

4029 client=self,

4030 api_request=functools.partial(self._call_api, retry, timeout=timeout),

4031 path="%s/data" % (table.path,),

4032 schema=schema,

4033 page_token=page_token,

4034 max_results=max_results,

4035 page_size=page_size,

4036 extra_params=params,

4037 table=table,

4038 # Pass in selected_fields separately from schema so that full

4039 # tables can be fetched without a column filter.

4040 selected_fields=selected_fields,

4041 total_rows=getattr(table, "num_rows", None),

4042 project=table.project,

4043 location=table.location,

4044 )

4045 return row_iterator

4046

4047 def _list_rows_from_query_results(

4048 self,

4049 job_id: str,

4050 location: str,

4051 project: str,

4052 schema: Sequence[SchemaField],

4053 total_rows: Optional[int] = None,

4054 destination: Optional[Union[Table, TableReference, TableListItem, str]] = None,

4055 max_results: Optional[int] = None,

4056 start_index: Optional[int] = None,

4057 page_size: Optional[int] = None,

4058 retry: retries.Retry = DEFAULT_RETRY,

4059 timeout: TimeoutType = DEFAULT_TIMEOUT,

4060 query_id: Optional[str] = None,

4061 first_page_response: Optional[Dict[str, Any]] = None,

4062 num_dml_affected_rows: Optional[int] = None,

4063 ) -> RowIterator:

4064 """List the rows of a completed query.

4065 See

4066 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults

4067 Args:

4068 job_id (str):

4069 ID of a query job.

4070 location (str): Location of the query job.

4071 project (str):

4072 ID of the project where the query job was run.

4073 schema (Sequence[google.cloud.bigquery.schema.SchemaField]):

4074 The fields expected in these query results. Used to convert

4075 from JSON to expected Python types.

4076 total_rows (Optional[int]):

4077 Total number of rows in the query results.

4078 destination (Optional[Union[ \

4079 google.cloud.bigquery.table.Table, \

4080 google.cloud.bigquery.table.TableListItem, \

4081 google.cloud.bigquery.table.TableReference, \

4082 str, \

4083 ]]):

4084 Destination table reference. Used to fetch the query results

4085 with the BigQuery Storage API.

4086 max_results (Optional[int]):

4087 Maximum number of rows to return across the whole iterator.

4088 start_index (Optional[int]):

4089 The zero-based index of the starting row to read.

4090 page_size (Optional[int]):

4091 The maximum number of rows in each page of results from this request.

4092 Non-positive values are ignored. Defaults to a sensible value set by the API.

4093 retry (Optional[google.api_core.retry.Retry]):

4094 How to retry the RPC.

4095 timeout (Optional[float]):

4096 The number of seconds to wait for the underlying HTTP transport

4097 before using ``retry``. If set, this connection timeout may be

4098 increased to a minimum value. This prevents retries on what

4099 would otherwise be a successful response.

4100 If multiple requests are made under the hood, ``timeout``

4101 applies to each individual request.

4102 query_id (Optional[str]):

4103 [Preview] ID of a completed query. This ID is auto-generated

4104 and not guaranteed to be populated.

4105 first_page_response (Optional[dict]):

4106 API response for the first page of results (if available).

4107 num_dml_affected_rows (Optional[int]):

4108 If this RowIterator is the result of a DML query, the number of

4109 rows that were affected.

4110

4111 Returns:

4112 google.cloud.bigquery.table.RowIterator:

4113 Iterator of row data

4114 :class:`~google.cloud.bigquery.table.Row`-s.

4115 """

4116 params: Dict[str, Any] = {

4117 "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS,

4118 "location": location,

4119 }

4120

4121 if timeout is not None:

4122 if not isinstance(timeout, (int, float)):

4123 timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT

4124 else:

4125 timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT)

4126

4127 if start_index is not None:

4128 params["startIndex"] = start_index

4129

4130 # We don't call jobs.query with a page size, so if the user explicitly

4131 # requests a certain size, invalidate the cache.

4132 if page_size is not None:

4133 first_page_response = None

4134

4135 params["formatOptions.useInt64Timestamp"] = True

4136 row_iterator = RowIterator(

4137 client=self,

4138 api_request=functools.partial(self._call_api, retry, timeout=timeout),

4139 path=f"/projects/{project}/queries/{job_id}",

4140 schema=schema,

4141 max_results=max_results,

4142 page_size=page_size,

4143 table=destination,

4144 extra_params=params,

4145 total_rows=total_rows,

4146 project=project,

4147 location=location,

4148 job_id=job_id,

4149 query_id=query_id,

4150 first_page_response=first_page_response,

4151 num_dml_affected_rows=num_dml_affected_rows,

4152 )

4153 return row_iterator

4154

4155 def _schema_from_json_file_object(self, file_obj):

4156 """Helper function for schema_from_json that takes a

4157 file object that describes a table schema.

4158

4159 Returns:

4160 List of schema field objects.

4161 """

4162 json_data = json.load(file_obj)

4163 return [SchemaField.from_api_repr(field) for field in json_data]

4164

4165 def _schema_to_json_file_object(self, schema_list, file_obj):

4166 """Helper function for schema_to_json that takes a schema list and file

4167 object and writes the schema list to the file object with json.dump

4168 """

4169 json.dump(schema_list, file_obj, indent=2, sort_keys=True)

4170

4171 def schema_from_json(self, file_or_path: "PathType") -> List[SchemaField]:

4172 """Takes a file object or file path that contains json that describes

4173 a table schema.

4174

4175 Returns:

4176 List[SchemaField]:

4177 List of :class:`~google.cloud.bigquery.schema.SchemaField` objects.

4178 """

4179 if isinstance(file_or_path, io.IOBase):

4180 return self._schema_from_json_file_object(file_or_path)

4181

4182 with open(file_or_path) as file_obj:

4183 return self._schema_from_json_file_object(file_obj)

4184

4185 def schema_to_json(

4186 self, schema_list: Sequence[SchemaField], destination: "PathType"

4187 ):

4188 """Takes a list of schema field objects.

4189

4190 Serializes the list of schema field objects as json to a file.

4191

4192 Destination is a file path or a file object.

4193 """

4194 json_schema_list = [f.to_api_repr() for f in schema_list]

4195

4196 if isinstance(destination, io.IOBase):

4197 return self._schema_to_json_file_object(json_schema_list, destination)

4198

4199 with open(destination, mode="w") as file_obj:

4200 return self._schema_to_json_file_object(json_schema_list, file_obj)

4201

4202 def __enter__(self):

4203 return self

4204

4205 def __exit__(self, exc_type, exc_value, traceback):

4206 self.close()

4207

4208

4209# pylint: disable=unused-argument

4210def _item_to_project(iterator, resource):

4211 """Convert a JSON project to the native object.

4212

4213 Args:

4214 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use.

4215

4216 resource (Dict): An item to be converted to a project.

4217

4218 Returns:

4219 google.cloud.bigquery.client.Project: The next project in the page.

4220 """

4221 return Project.from_api_repr(resource)

4222

4223

4224# pylint: enable=unused-argument

4225

4226

4227def _item_to_dataset(iterator, resource):

4228 """Convert a JSON dataset to the native object.

4229

4230 Args:

4231 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use.

4232

4233 resource (Dict): An item to be converted to a dataset.

4234

4235 Returns:

4236 google.cloud.bigquery.dataset.DatasetListItem: The next dataset in the page.

4237 """

4238 return DatasetListItem(resource)

4239

4240

4241def _item_to_job(iterator, resource):

4242 """Convert a JSON job to the native object.

4243

4244 Args:

4245 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use.

4246

4247 resource (Dict): An item to be converted to a job.

4248

4249 Returns:

4250 job instance: The next job in the page.

4251 """

4252 return iterator.client.job_from_resource(resource)

4253

4254

4255def _item_to_model(iterator, resource):

4256 """Convert a JSON model to the native object.

4257

4258 Args:

4259 iterator (google.api_core.page_iterator.Iterator):

4260 The iterator that is currently in use.

4261 resource (Dict): An item to be converted to a model.

4262

4263 Returns:

4264 google.cloud.bigquery.model.Model: The next model in the page.

4265 """

4266 return Model.from_api_repr(resource)

4267

4268

4269def _item_to_routine(iterator, resource):

4270 """Convert a JSON model to the native object.

4271

4272 Args:

4273 iterator (google.api_core.page_iterator.Iterator):

4274 The iterator that is currently in use.

4275 resource (Dict): An item to be converted to a routine.

4276

4277 Returns:

4278 google.cloud.bigquery.routine.Routine: The next routine in the page.

4279 """

4280 return Routine.from_api_repr(resource)

4281

4282

4283def _item_to_table(iterator, resource):

4284 """Convert a JSON table to the native object.

4285

4286 Args:

4287 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use.

4288

4289 resource (Dict): An item to be converted to a table.

4290

4291 Returns:

4292 google.cloud.bigquery.table.Table: The next table in the page.

4293 """

4294 return TableListItem(resource)

4295

4296

4297def _extract_job_reference(job, project=None, location=None):

4298 """Extract fully-qualified job reference from a job-like object.

4299

4300 Args:

4301 job_id (Union[ \

4302 str, \

4303 google.cloud.bigquery.job.LoadJob, \

4304 google.cloud.bigquery.job.CopyJob, \

4305 google.cloud.bigquery.job.ExtractJob, \

4306 google.cloud.bigquery.job.QueryJob \

4307 ]): Job identifier.

4308 project (Optional[str]):

4309 Project where the job was run. Ignored if ``job_id`` is a job

4310 object.

4311 location (Optional[str]):

4312 Location where the job was run. Ignored if ``job_id`` is a job

4313 object.

4314

4315 Returns:

4316 Tuple[str, str, str]: ``(project, location, job_id)``

4317 """

4318 if hasattr(job, "job_id"):

4319 project = job.project

4320 job_id = job.job_id

4321 location = job.location

4322 else:

4323 job_id = job

4324

4325 return (project, location, job_id)

4326

4327

4328def _check_mode(stream):

4329 """Check that a stream was opened in read-binary mode.

4330

4331 Args:

4332 stream (IO[bytes]): A bytes IO object open for reading.

4333

4334 Raises:

4335 ValueError:

4336 if the ``stream.mode`` is a valid attribute

4337 and is not among ``rb``, ``r+b`` or ``rb+``.

4338 """

4339 mode = getattr(stream, "mode", None)

4340

4341 if isinstance(stream, gzip.GzipFile):

4342 if mode != gzip.READ: # pytype: disable=module-attr

4343 raise ValueError(

4344 "Cannot upload gzip files opened in write mode: use "

4345 "gzip.GzipFile(filename, mode='rb')"

4346 )

4347 else:

4348 if mode is not None and mode not in ("rb", "r+b", "rb+"):

4349 raise ValueError(

4350 "Cannot upload files opened in text mode: use "

4351 "open(filename, mode='rb') or open(filename, mode='r+b')"

4352 )

4353

4354

4355def _get_upload_headers(user_agent):

4356 """Get the headers for an upload request.

4357

4358 Args:

4359 user_agent (str): The user-agent for requests.

4360

4361 Returns:

4362 Dict: The headers to be used for the request.

4363 """

4364 return {

4365 "Accept": "application/json",

4366 "Accept-Encoding": "gzip, deflate",

4367 "User-Agent": user_agent,

4368 "content-type": "application/json; charset=UTF-8",

4369 }

4370

4371

4372def _add_server_timeout_header(headers: Optional[Dict[str, str]], kwargs):

4373 timeout = kwargs.get("timeout")

4374 if timeout is not None:

4375 if headers is None:

4376 headers = {}

4377 headers[TIMEOUT_HEADER] = str(timeout)

4378

4379 if headers:

4380 kwargs["headers"] = headers

4381

4382 return kwargs