Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/cloud/bigquery/client.py: 2%
885 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-20 06:09 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-20 06:09 +0000
1# Copyright 2015 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15"""Client for interacting with the Google BigQuery API."""
17from __future__ import absolute_import
18from __future__ import division
20from collections import abc as collections_abc
21import copy
22import datetime
23import functools
24import gzip
25import io
26import itertools
27import json
28import math
29import os
30import tempfile
31import typing
32from typing import (
33 Any,
34 Dict,
35 IO,
36 Iterable,
37 Mapping,
38 List,
39 Optional,
40 Sequence,
41 Tuple,
42 Union,
43)
44import uuid
45import warnings
47from google import resumable_media # type: ignore
48from google.resumable_media.requests import MultipartUpload # type: ignore
49from google.resumable_media.requests import ResumableUpload
51import google.api_core.client_options
52import google.api_core.exceptions as core_exceptions
53from google.api_core.iam import Policy
54from google.api_core import page_iterator
55from google.api_core import retry as retries
56import google.cloud._helpers # type: ignore
57from google.cloud import exceptions # pytype: disable=import-error
58from google.cloud.client import ClientWithProject # type: ignore # pytype: disable=import-error
60try:
61 from google.cloud.bigquery_storage_v1.services.big_query_read.client import (
62 DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO,
63 )
64except ImportError:
65 DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore
68from google.cloud.bigquery._http import Connection
69from google.cloud.bigquery import _job_helpers
70from google.cloud.bigquery import _pandas_helpers
71from google.cloud.bigquery import _versions_helpers
72from google.cloud.bigquery import enums
73from google.cloud.bigquery import exceptions as bq_exceptions
74from google.cloud.bigquery import job
75from google.cloud.bigquery._helpers import _get_sub_prop
76from google.cloud.bigquery._helpers import _record_field_to_json
77from google.cloud.bigquery._helpers import _str_or_none
78from google.cloud.bigquery._helpers import _verify_job_config_type
79from google.cloud.bigquery._helpers import _get_bigquery_host
80from google.cloud.bigquery._helpers import _DEFAULT_HOST
81from google.cloud.bigquery._helpers import _DEFAULT_HOST_TEMPLATE
82from google.cloud.bigquery._helpers import _DEFAULT_UNIVERSE
83from google.cloud.bigquery._helpers import _validate_universe
84from google.cloud.bigquery._helpers import _get_client_universe
85from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id
86from google.cloud.bigquery.dataset import Dataset
87from google.cloud.bigquery.dataset import DatasetListItem
88from google.cloud.bigquery.dataset import DatasetReference
89from google.cloud.bigquery.enums import AutoRowIDs
90from google.cloud.bigquery.format_options import ParquetOptions
91from google.cloud.bigquery.job import (
92 CopyJob,
93 CopyJobConfig,
94 ExtractJob,
95 ExtractJobConfig,
96 LoadJob,
97 LoadJobConfig,
98 QueryJob,
99 QueryJobConfig,
100)
101from google.cloud.bigquery.model import Model
102from google.cloud.bigquery.model import ModelReference
103from google.cloud.bigquery.model import _model_arg_to_model_ref
104from google.cloud.bigquery.opentelemetry_tracing import create_span
105from google.cloud.bigquery.query import _QueryResults
106from google.cloud.bigquery.retry import (
107 DEFAULT_JOB_RETRY,
108 DEFAULT_RETRY,
109 DEFAULT_TIMEOUT,
110)
111from google.cloud.bigquery.routine import Routine
112from google.cloud.bigquery.routine import RoutineReference
113from google.cloud.bigquery.schema import SchemaField
114from google.cloud.bigquery.table import _table_arg_to_table
115from google.cloud.bigquery.table import _table_arg_to_table_ref
116from google.cloud.bigquery.table import Table
117from google.cloud.bigquery.table import TableListItem
118from google.cloud.bigquery.table import TableReference
119from google.cloud.bigquery.table import RowIterator
121pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()
122pandas = (
123 _versions_helpers.PANDAS_VERSIONS.try_import()
124) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this
126TimeoutType = Union[float, None]
127ResumableTimeoutType = Union[
128 None, float, Tuple[float, float]
129] # for resumable media methods
131if typing.TYPE_CHECKING: # pragma: NO COVER
132 # os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition.
133 PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]]
134 import requests # required by api-core
136_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB
137_MAX_MULTIPART_SIZE = 5 * 1024 * 1024
138_DEFAULT_NUM_RETRIES = 6
139_BASE_UPLOAD_TEMPLATE = "{host}/upload/bigquery/v2/projects/{project}/jobs?uploadType="
140_MULTIPART_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "multipart"
141_RESUMABLE_URL_TEMPLATE = _BASE_UPLOAD_TEMPLATE + "resumable"
142_GENERIC_CONTENT_TYPE = "*/*"
143_READ_LESS_THAN_SIZE = (
144 "Size {:d} was specified but the file-like object only had " "{:d} bytes remaining."
145)
146_NEED_TABLE_ARGUMENT = (
147 "The table argument should be a table ID string, Table, or TableReference"
148)
149_LIST_ROWS_FROM_QUERY_RESULTS_FIELDS = "jobReference,totalRows,pageToken,rows"
151# In microbenchmarks, it's been shown that even in ideal conditions (query
152# finished, local data), requests to getQueryResults can take 10+ seconds.
153# In less-than-ideal situations, the response can take even longer, as it must
154# be able to download a full 100+ MB row in that time. Don't let the
155# connection timeout before data can be downloaded.
156# https://github.com/googleapis/python-bigquery/issues/438
157_MIN_GET_QUERY_RESULTS_TIMEOUT = 120
159TIMEOUT_HEADER = "X-Server-Timeout"
162class Project(object):
163 """Wrapper for resource describing a BigQuery project.
165 Args:
166 project_id (str): Opaque ID of the project
168 numeric_id (int): Numeric ID of the project
170 friendly_name (str): Display name of the project
171 """
173 def __init__(self, project_id, numeric_id, friendly_name):
174 self.project_id = project_id
175 self.numeric_id = numeric_id
176 self.friendly_name = friendly_name
178 @classmethod
179 def from_api_repr(cls, resource):
180 """Factory: construct an instance from a resource dict."""
181 return cls(resource["id"], resource["numericId"], resource["friendlyName"])
184class Client(ClientWithProject):
185 """Client to bundle configuration needed for API requests.
187 Args:
188 project (Optional[str]):
189 Project ID for the project which the client acts on behalf of.
190 Will be passed when creating a dataset / job. If not passed,
191 falls back to the default inferred from the environment.
192 credentials (Optional[google.auth.credentials.Credentials]):
193 The OAuth2 Credentials to use for this client. If not passed
194 (and if no ``_http`` object is passed), falls back to the
195 default inferred from the environment.
196 _http (Optional[requests.Session]):
197 HTTP object to make requests. Can be any object that
198 defines ``request()`` with the same interface as
199 :meth:`requests.Session.request`. If not passed, an ``_http``
200 object is created that is bound to the ``credentials`` for the
201 current object.
202 This parameter should be considered private, and could change in
203 the future.
204 location (Optional[str]):
205 Default location for jobs / datasets / tables.
206 default_query_job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
207 Default ``QueryJobConfig``.
208 Will be merged into job configs passed into the ``query`` method.
209 default_load_job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]):
210 Default ``LoadJobConfig``.
211 Will be merged into job configs passed into the ``load_table_*`` methods.
212 client_info (Optional[google.api_core.client_info.ClientInfo]):
213 The client info used to send a user-agent string along with API
214 requests. If ``None``, then default info will be used. Generally,
215 you only need to set this if you're developing your own library
216 or partner tool.
217 client_options (Optional[Union[google.api_core.client_options.ClientOptions, Dict]]):
218 Client options used to set user options on the client. API Endpoint
219 should be set through client_options.
221 Raises:
222 google.auth.exceptions.DefaultCredentialsError:
223 Raised if ``credentials`` is not specified and the library fails
224 to acquire default credentials.
225 """
227 SCOPE = ("https://www.googleapis.com/auth/cloud-platform",) # type: ignore
228 """The scopes required for authenticating as a BigQuery consumer."""
230 def __init__(
231 self,
232 project=None,
233 credentials=None,
234 _http=None,
235 location=None,
236 default_query_job_config=None,
237 default_load_job_config=None,
238 client_info=None,
239 client_options=None,
240 ) -> None:
241 super(Client, self).__init__(
242 project=project,
243 credentials=credentials,
244 client_options=client_options,
245 _http=_http,
246 )
248 kw_args = {"client_info": client_info}
249 bq_host = _get_bigquery_host()
250 kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None
251 client_universe = None
252 if client_options is None:
253 client_options = {}
254 if isinstance(client_options, dict):
255 client_options = google.api_core.client_options.from_dict(client_options)
256 if client_options.api_endpoint:
257 api_endpoint = client_options.api_endpoint
258 kw_args["api_endpoint"] = api_endpoint
259 else:
260 client_universe = _get_client_universe(client_options)
261 if client_universe != _DEFAULT_UNIVERSE:
262 kw_args["api_endpoint"] = _DEFAULT_HOST_TEMPLATE.replace(
263 "{UNIVERSE_DOMAIN}", client_universe
264 )
265 # Ensure credentials and universe are not in conflict.
266 if hasattr(self, "_credentials") and client_universe is not None:
267 _validate_universe(client_universe, self._credentials)
269 self._connection = Connection(self, **kw_args)
270 self._location = location
271 self._default_load_job_config = copy.deepcopy(default_load_job_config)
273 # Use property setter so validation can run.
274 self.default_query_job_config = default_query_job_config
276 @property
277 def location(self):
278 """Default location for jobs / datasets / tables."""
279 return self._location
281 @property
282 def default_query_job_config(self) -> Optional[QueryJobConfig]:
283 """Default ``QueryJobConfig`` or ``None``.
285 Will be merged into job configs passed into the ``query`` or
286 ``query_and_wait`` methods.
287 """
288 return self._default_query_job_config
290 @default_query_job_config.setter
291 def default_query_job_config(self, value: Optional[QueryJobConfig]):
292 if value is not None:
293 _verify_job_config_type(
294 value, QueryJobConfig, param_name="default_query_job_config"
295 )
296 self._default_query_job_config = copy.deepcopy(value)
298 @property
299 def default_load_job_config(self):
300 """Default ``LoadJobConfig``.
301 Will be merged into job configs passed into the ``load_table_*`` methods.
302 """
303 return self._default_load_job_config
305 @default_load_job_config.setter
306 def default_load_job_config(self, value: LoadJobConfig):
307 self._default_load_job_config = copy.deepcopy(value)
309 def close(self):
310 """Close the underlying transport objects, releasing system resources.
312 .. note::
314 The client instance can be used for making additional requests even
315 after closing, in which case the underlying connections are
316 automatically re-created.
317 """
318 self._http._auth_request.session.close()
319 self._http.close()
321 def get_service_account_email(
322 self,
323 project: Optional[str] = None,
324 retry: retries.Retry = DEFAULT_RETRY,
325 timeout: TimeoutType = DEFAULT_TIMEOUT,
326 ) -> str:
327 """Get the email address of the project's BigQuery service account
329 Note:
330 This is the service account that BigQuery uses to manage tables
331 encrypted by a key in KMS.
333 Args:
334 project (Optional[str]):
335 Project ID to use for retreiving service account email.
336 Defaults to the client's project.
337 retry (Optional[google.api_core.retry.Retry]): How to retry the RPC.
338 timeout (Optional[float]):
339 The number of seconds to wait for the underlying HTTP transport
340 before using ``retry``.
342 Returns:
343 str:
344 service account email address
346 Example:
348 >>> from google.cloud import bigquery
349 >>> client = bigquery.Client()
350 >>> client.get_service_account_email()
351 my_service_account@my-project.iam.gserviceaccount.com
353 """
354 if project is None:
355 project = self.project
356 path = "/projects/%s/serviceAccount" % (project,)
357 span_attributes = {"path": path}
358 api_response = self._call_api(
359 retry,
360 span_name="BigQuery.getServiceAccountEmail",
361 span_attributes=span_attributes,
362 method="GET",
363 path=path,
364 timeout=timeout,
365 )
366 return api_response["email"]
368 def list_projects(
369 self,
370 max_results: Optional[int] = None,
371 page_token: Optional[str] = None,
372 retry: retries.Retry = DEFAULT_RETRY,
373 timeout: TimeoutType = DEFAULT_TIMEOUT,
374 page_size: Optional[int] = None,
375 ) -> page_iterator.Iterator:
376 """List projects for the project associated with this client.
378 See
379 https://cloud.google.com/bigquery/docs/reference/rest/v2/projects/list
381 Args:
382 max_results (Optional[int]):
383 Maximum number of projects to return.
384 Defaults to a value set by the API.
386 page_token (Optional[str]):
387 Token representing a cursor into the projects. If not passed,
388 the API will return the first page of projects. The token marks
389 the beginning of the iterator to be returned and the value of
390 the ``page_token`` can be accessed at ``next_page_token`` of the
391 :class:`~google.api_core.page_iterator.HTTPIterator`.
393 retry (Optional[google.api_core.retry.Retry]): How to retry the RPC.
395 timeout (Optional[float]):
396 The number of seconds to wait for the underlying HTTP transport
397 before using ``retry``.
399 page_size (Optional[int]):
400 Maximum number of projects to return in each page.
401 Defaults to a value set by the API.
403 Returns:
404 google.api_core.page_iterator.Iterator:
405 Iterator of :class:`~google.cloud.bigquery.client.Project`
406 accessible to the current client.
407 """
408 span_attributes = {"path": "/projects"}
410 def api_request(*args, **kwargs):
411 return self._call_api(
412 retry,
413 span_name="BigQuery.listProjects",
414 span_attributes=span_attributes,
415 *args,
416 timeout=timeout,
417 **kwargs,
418 )
420 return page_iterator.HTTPIterator(
421 client=self,
422 api_request=api_request,
423 path="/projects",
424 item_to_value=_item_to_project,
425 items_key="projects",
426 page_token=page_token,
427 max_results=max_results,
428 page_size=page_size,
429 )
431 def list_datasets(
432 self,
433 project: Optional[str] = None,
434 include_all: bool = False,
435 filter: Optional[str] = None,
436 max_results: Optional[int] = None,
437 page_token: Optional[str] = None,
438 retry: retries.Retry = DEFAULT_RETRY,
439 timeout: TimeoutType = DEFAULT_TIMEOUT,
440 page_size: Optional[int] = None,
441 ) -> page_iterator.Iterator:
442 """List datasets for the project associated with this client.
444 See
445 https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list
447 Args:
448 project (Optional[str]):
449 Project ID to use for retreiving datasets. Defaults to the
450 client's project.
451 include_all (Optional[bool]):
452 True if results include hidden datasets. Defaults to False.
453 filter (Optional[str]):
454 An expression for filtering the results by label.
455 For syntax, see
456 https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list#body.QUERY_PARAMETERS.filter
457 max_results (Optional[int]):
458 Maximum number of datasets to return.
459 page_token (Optional[str]):
460 Token representing a cursor into the datasets. If not passed,
461 the API will return the first page of datasets. The token marks
462 the beginning of the iterator to be returned and the value of
463 the ``page_token`` can be accessed at ``next_page_token`` of the
464 :class:`~google.api_core.page_iterator.HTTPIterator`.
465 retry (Optional[google.api_core.retry.Retry]):
466 How to retry the RPC.
467 timeout (Optional[float]):
468 The number of seconds to wait for the underlying HTTP transport
469 before using ``retry``.
470 page_size (Optional[int]):
471 Maximum number of datasets to return per page.
473 Returns:
474 google.api_core.page_iterator.Iterator:
475 Iterator of :class:`~google.cloud.bigquery.dataset.DatasetListItem`.
476 associated with the project.
477 """
478 extra_params: Dict[str, Any] = {}
479 if project is None:
480 project = self.project
481 if include_all:
482 extra_params["all"] = True
483 if filter:
484 # TODO: consider supporting a dict of label -> value for filter,
485 # and converting it into a string here.
486 extra_params["filter"] = filter
487 path = "/projects/%s/datasets" % (project,)
489 span_attributes = {"path": path}
491 def api_request(*args, **kwargs):
492 return self._call_api(
493 retry,
494 span_name="BigQuery.listDatasets",
495 span_attributes=span_attributes,
496 *args,
497 timeout=timeout,
498 **kwargs,
499 )
501 return page_iterator.HTTPIterator(
502 client=self,
503 api_request=api_request,
504 path=path,
505 item_to_value=_item_to_dataset,
506 items_key="datasets",
507 page_token=page_token,
508 max_results=max_results,
509 extra_params=extra_params,
510 page_size=page_size,
511 )
513 def dataset(
514 self, dataset_id: str, project: Optional[str] = None
515 ) -> DatasetReference:
516 """Deprecated: Construct a reference to a dataset.
518 .. deprecated:: 1.24.0
519 Construct a
520 :class:`~google.cloud.bigquery.dataset.DatasetReference` using its
521 constructor or use a string where previously a reference object
522 was used.
524 As of ``google-cloud-bigquery`` version 1.7.0, all client methods
525 that take a
526 :class:`~google.cloud.bigquery.dataset.DatasetReference` or
527 :class:`~google.cloud.bigquery.table.TableReference` also take a
528 string in standard SQL format, e.g. ``project.dataset_id`` or
529 ``project.dataset_id.table_id``.
531 Args:
532 dataset_id (str): ID of the dataset.
534 project (Optional[str]):
535 Project ID for the dataset (defaults to the project of the client).
537 Returns:
538 google.cloud.bigquery.dataset.DatasetReference:
539 a new ``DatasetReference`` instance.
540 """
541 if project is None:
542 project = self.project
544 warnings.warn(
545 "Client.dataset is deprecated and will be removed in a future version. "
546 "Use a string like 'my_project.my_dataset' or a "
547 "cloud.google.bigquery.DatasetReference object, instead.",
548 PendingDeprecationWarning,
549 stacklevel=2,
550 )
551 return DatasetReference(project, dataset_id)
553 def _ensure_bqstorage_client(
554 self,
555 bqstorage_client: Optional[
556 "google.cloud.bigquery_storage.BigQueryReadClient"
557 ] = None,
558 client_options: Optional[google.api_core.client_options.ClientOptions] = None,
559 client_info: Optional[
560 "google.api_core.gapic_v1.client_info.ClientInfo"
561 ] = DEFAULT_BQSTORAGE_CLIENT_INFO,
562 ) -> Optional["google.cloud.bigquery_storage.BigQueryReadClient"]:
563 """Create a BigQuery Storage API client using this client's credentials.
565 Args:
566 bqstorage_client:
567 An existing BigQuery Storage client instance. If ``None``, a new
568 instance is created and returned.
569 client_options:
570 Custom options used with a new BigQuery Storage client instance
571 if one is created.
572 client_info:
573 The client info used with a new BigQuery Storage client
574 instance if one is created.
576 Returns:
577 A BigQuery Storage API client.
578 """
580 try:
581 bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import(
582 raise_if_error=True
583 )
584 except bq_exceptions.BigQueryStorageNotFoundError:
585 warnings.warn(
586 "Cannot create BigQuery Storage client, the dependency "
587 "google-cloud-bigquery-storage is not installed."
588 )
589 return None
590 except bq_exceptions.LegacyBigQueryStorageError as exc:
591 warnings.warn(
592 "Dependency google-cloud-bigquery-storage is outdated: " + str(exc)
593 )
594 return None
596 if bqstorage_client is None: # pragma: NO COVER
597 bqstorage_client = bigquery_storage.BigQueryReadClient(
598 credentials=self._credentials,
599 client_options=client_options,
600 client_info=client_info, # type: ignore # (None is also accepted)
601 )
603 return bqstorage_client
605 def _dataset_from_arg(self, dataset) -> Union[Dataset, DatasetReference]:
606 if isinstance(dataset, str):
607 dataset = DatasetReference.from_string(
608 dataset, default_project=self.project
609 )
611 if not isinstance(dataset, (Dataset, DatasetReference)):
612 if isinstance(dataset, DatasetListItem):
613 dataset = dataset.reference
614 else:
615 raise TypeError(
616 "dataset must be a Dataset, DatasetReference, DatasetListItem,"
617 " or string"
618 )
619 return dataset
621 def create_dataset(
622 self,
623 dataset: Union[str, Dataset, DatasetReference, DatasetListItem],
624 exists_ok: bool = False,
625 retry: retries.Retry = DEFAULT_RETRY,
626 timeout: TimeoutType = DEFAULT_TIMEOUT,
627 ) -> Dataset:
628 """API call: create the dataset via a POST request.
630 See
631 https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/insert
633 Args:
634 dataset (Union[ \
635 google.cloud.bigquery.dataset.Dataset, \
636 google.cloud.bigquery.dataset.DatasetReference, \
637 google.cloud.bigquery.dataset.DatasetListItem, \
638 str, \
639 ]):
640 A :class:`~google.cloud.bigquery.dataset.Dataset` to create.
641 If ``dataset`` is a reference, an empty dataset is created
642 with the specified ID and client's default location.
643 exists_ok (Optional[bool]):
644 Defaults to ``False``. If ``True``, ignore "already exists"
645 errors when creating the dataset.
646 retry (Optional[google.api_core.retry.Retry]):
647 How to retry the RPC.
648 timeout (Optional[float]):
649 The number of seconds to wait for the underlying HTTP transport
650 before using ``retry``.
652 Returns:
653 google.cloud.bigquery.dataset.Dataset:
654 A new ``Dataset`` returned from the API.
656 Raises:
657 google.cloud.exceptions.Conflict:
658 If the dataset already exists.
660 Example:
662 >>> from google.cloud import bigquery
663 >>> client = bigquery.Client()
664 >>> dataset = bigquery.Dataset('my_project.my_dataset')
665 >>> dataset = client.create_dataset(dataset)
667 """
668 dataset = self._dataset_from_arg(dataset)
669 if isinstance(dataset, DatasetReference):
670 dataset = Dataset(dataset)
672 path = "/projects/%s/datasets" % (dataset.project,)
674 data = dataset.to_api_repr()
675 if data.get("location") is None and self.location is not None:
676 data["location"] = self.location
678 try:
679 span_attributes = {"path": path}
681 api_response = self._call_api(
682 retry,
683 span_name="BigQuery.createDataset",
684 span_attributes=span_attributes,
685 method="POST",
686 path=path,
687 data=data,
688 timeout=timeout,
689 )
690 return Dataset.from_api_repr(api_response)
691 except core_exceptions.Conflict:
692 if not exists_ok:
693 raise
694 return self.get_dataset(dataset.reference, retry=retry)
696 def create_routine(
697 self,
698 routine: Routine,
699 exists_ok: bool = False,
700 retry: retries.Retry = DEFAULT_RETRY,
701 timeout: TimeoutType = DEFAULT_TIMEOUT,
702 ) -> Routine:
703 """[Beta] Create a routine via a POST request.
705 See
706 https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/insert
708 Args:
709 routine (google.cloud.bigquery.routine.Routine):
710 A :class:`~google.cloud.bigquery.routine.Routine` to create.
711 The dataset that the routine belongs to must already exist.
712 exists_ok (Optional[bool]):
713 Defaults to ``False``. If ``True``, ignore "already exists"
714 errors when creating the routine.
715 retry (Optional[google.api_core.retry.Retry]):
716 How to retry the RPC.
717 timeout (Optional[float]):
718 The number of seconds to wait for the underlying HTTP transport
719 before using ``retry``.
721 Returns:
722 google.cloud.bigquery.routine.Routine:
723 A new ``Routine`` returned from the service.
725 Raises:
726 google.cloud.exceptions.Conflict:
727 If the routine already exists.
728 """
729 reference = routine.reference
730 path = "/projects/{}/datasets/{}/routines".format(
731 reference.project, reference.dataset_id
732 )
733 resource = routine.to_api_repr()
734 try:
735 span_attributes = {"path": path}
736 api_response = self._call_api(
737 retry,
738 span_name="BigQuery.createRoutine",
739 span_attributes=span_attributes,
740 method="POST",
741 path=path,
742 data=resource,
743 timeout=timeout,
744 )
745 return Routine.from_api_repr(api_response)
746 except core_exceptions.Conflict:
747 if not exists_ok:
748 raise
749 return self.get_routine(routine.reference, retry=retry)
751 def create_table(
752 self,
753 table: Union[str, Table, TableReference, TableListItem],
754 exists_ok: bool = False,
755 retry: retries.Retry = DEFAULT_RETRY,
756 timeout: TimeoutType = DEFAULT_TIMEOUT,
757 ) -> Table:
758 """API call: create a table via a PUT request
760 See
761 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/insert
763 Args:
764 table (Union[ \
765 google.cloud.bigquery.table.Table, \
766 google.cloud.bigquery.table.TableReference, \
767 google.cloud.bigquery.table.TableListItem, \
768 str, \
769 ]):
770 A :class:`~google.cloud.bigquery.table.Table` to create.
771 If ``table`` is a reference, an empty table is created
772 with the specified ID. The dataset that the table belongs to
773 must already exist.
774 exists_ok (Optional[bool]):
775 Defaults to ``False``. If ``True``, ignore "already exists"
776 errors when creating the table.
777 retry (Optional[google.api_core.retry.Retry]):
778 How to retry the RPC.
779 timeout (Optional[float]):
780 The number of seconds to wait for the underlying HTTP transport
781 before using ``retry``.
783 Returns:
784 google.cloud.bigquery.table.Table:
785 A new ``Table`` returned from the service.
787 Raises:
788 google.cloud.exceptions.Conflict:
789 If the table already exists.
790 """
791 table = _table_arg_to_table(table, default_project=self.project)
792 dataset_id = table.dataset_id
793 path = "/projects/%s/datasets/%s/tables" % (table.project, dataset_id)
794 data = table.to_api_repr()
795 try:
796 span_attributes = {"path": path, "dataset_id": dataset_id}
797 api_response = self._call_api(
798 retry,
799 span_name="BigQuery.createTable",
800 span_attributes=span_attributes,
801 method="POST",
802 path=path,
803 data=data,
804 timeout=timeout,
805 )
806 return Table.from_api_repr(api_response)
807 except core_exceptions.Conflict:
808 if not exists_ok:
809 raise
810 return self.get_table(table.reference, retry=retry)
812 def _call_api(
813 self,
814 retry,
815 span_name=None,
816 span_attributes=None,
817 job_ref=None,
818 headers: Optional[Dict[str, str]] = None,
819 **kwargs,
820 ):
821 kwargs = _add_server_timeout_header(headers, kwargs)
822 call = functools.partial(self._connection.api_request, **kwargs)
824 if retry:
825 call = retry(call)
827 if span_name is not None:
828 with create_span(
829 name=span_name, attributes=span_attributes, client=self, job_ref=job_ref
830 ):
831 return call()
833 return call()
835 def get_dataset(
836 self,
837 dataset_ref: Union[DatasetReference, str],
838 retry: retries.Retry = DEFAULT_RETRY,
839 timeout: TimeoutType = DEFAULT_TIMEOUT,
840 ) -> Dataset:
841 """Fetch the dataset referenced by ``dataset_ref``
843 Args:
844 dataset_ref (Union[ \
845 google.cloud.bigquery.dataset.DatasetReference, \
846 str, \
847 ]):
848 A reference to the dataset to fetch from the BigQuery API.
849 If a string is passed in, this method attempts to create a
850 dataset reference from a string using
851 :func:`~google.cloud.bigquery.dataset.DatasetReference.from_string`.
852 retry (Optional[google.api_core.retry.Retry]):
853 How to retry the RPC.
854 timeout (Optional[float]):
855 The number of seconds to wait for the underlying HTTP transport
856 before using ``retry``.
858 Returns:
859 google.cloud.bigquery.dataset.Dataset:
860 A ``Dataset`` instance.
861 """
862 if isinstance(dataset_ref, str):
863 dataset_ref = DatasetReference.from_string(
864 dataset_ref, default_project=self.project
865 )
866 path = dataset_ref.path
867 span_attributes = {"path": path}
868 api_response = self._call_api(
869 retry,
870 span_name="BigQuery.getDataset",
871 span_attributes=span_attributes,
872 method="GET",
873 path=path,
874 timeout=timeout,
875 )
876 return Dataset.from_api_repr(api_response)
878 def get_iam_policy(
879 self,
880 table: Union[Table, TableReference, TableListItem, str],
881 requested_policy_version: int = 1,
882 retry: retries.Retry = DEFAULT_RETRY,
883 timeout: TimeoutType = DEFAULT_TIMEOUT,
884 ) -> Policy:
885 """Return the access control policy for a table resource.
887 Args:
888 table (Union[ \
889 google.cloud.bigquery.table.Table, \
890 google.cloud.bigquery.table.TableReference, \
891 google.cloud.bigquery.table.TableListItem, \
892 str, \
893 ]):
894 The table to get the access control policy for.
895 If a string is passed in, this method attempts to create a
896 table reference from a string using
897 :func:`~google.cloud.bigquery.table.TableReference.from_string`.
898 requested_policy_version (int):
899 Optional. The maximum policy version that will be used to format the policy.
901 Only version ``1`` is currently supported.
903 See: https://cloud.google.com/bigquery/docs/reference/rest/v2/GetPolicyOptions
904 retry (Optional[google.api_core.retry.Retry]):
905 How to retry the RPC.
906 timeout (Optional[float]):
907 The number of seconds to wait for the underlying HTTP transport
908 before using ``retry``.
910 Returns:
911 google.api_core.iam.Policy:
912 The access control policy.
913 """
914 table = _table_arg_to_table_ref(table, default_project=self.project)
916 if requested_policy_version != 1:
917 raise ValueError("only IAM policy version 1 is supported")
919 body = {"options": {"requestedPolicyVersion": 1}}
921 path = "{}:getIamPolicy".format(table.path)
922 span_attributes = {"path": path}
923 response = self._call_api(
924 retry,
925 span_name="BigQuery.getIamPolicy",
926 span_attributes=span_attributes,
927 method="POST",
928 path=path,
929 data=body,
930 timeout=timeout,
931 )
933 return Policy.from_api_repr(response)
935 def set_iam_policy(
936 self,
937 table: Union[Table, TableReference, TableListItem, str],
938 policy: Policy,
939 updateMask: Optional[str] = None,
940 retry: retries.Retry = DEFAULT_RETRY,
941 timeout: TimeoutType = DEFAULT_TIMEOUT,
942 *,
943 fields: Sequence[str] = (),
944 ) -> Policy:
945 """Return the access control policy for a table resource.
947 Args:
948 table (Union[ \
949 google.cloud.bigquery.table.Table, \
950 google.cloud.bigquery.table.TableReference, \
951 google.cloud.bigquery.table.TableListItem, \
952 str, \
953 ]):
954 The table to get the access control policy for.
955 If a string is passed in, this method attempts to create a
956 table reference from a string using
957 :func:`~google.cloud.bigquery.table.TableReference.from_string`.
958 policy (google.api_core.iam.Policy):
959 The access control policy to set.
960 updateMask (Optional[str]):
961 Mask as defined by
962 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/setIamPolicy#body.request_body.FIELDS.update_mask
964 Incompatible with ``fields``.
965 retry (Optional[google.api_core.retry.Retry]):
966 How to retry the RPC.
967 timeout (Optional[float]):
968 The number of seconds to wait for the underlying HTTP transport
969 before using ``retry``.
970 fields (Sequence[str]):
971 Which properties to set on the policy. See:
972 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/setIamPolicy#body.request_body.FIELDS.update_mask
974 Incompatible with ``updateMask``.
976 Returns:
977 google.api_core.iam.Policy:
978 The updated access control policy.
979 """
980 if updateMask is not None and not fields:
981 update_mask = updateMask
982 elif updateMask is not None and fields:
983 raise ValueError("Cannot set both fields and updateMask")
984 elif fields:
985 update_mask = ",".join(fields)
986 else:
987 update_mask = None
989 table = _table_arg_to_table_ref(table, default_project=self.project)
991 if not isinstance(policy, (Policy)):
992 raise TypeError("policy must be a Policy")
994 body = {"policy": policy.to_api_repr()}
996 if update_mask is not None:
997 body["updateMask"] = update_mask
999 path = "{}:setIamPolicy".format(table.path)
1000 span_attributes = {"path": path}
1002 response = self._call_api(
1003 retry,
1004 span_name="BigQuery.setIamPolicy",
1005 span_attributes=span_attributes,
1006 method="POST",
1007 path=path,
1008 data=body,
1009 timeout=timeout,
1010 )
1012 return Policy.from_api_repr(response)
1014 def test_iam_permissions(
1015 self,
1016 table: Union[Table, TableReference, TableListItem, str],
1017 permissions: Sequence[str],
1018 retry: retries.Retry = DEFAULT_RETRY,
1019 timeout: TimeoutType = DEFAULT_TIMEOUT,
1020 ) -> Dict[str, Any]:
1021 table = _table_arg_to_table_ref(table, default_project=self.project)
1023 body = {"permissions": permissions}
1025 path = "{}:testIamPermissions".format(table.path)
1026 span_attributes = {"path": path}
1027 response = self._call_api(
1028 retry,
1029 span_name="BigQuery.testIamPermissions",
1030 span_attributes=span_attributes,
1031 method="POST",
1032 path=path,
1033 data=body,
1034 timeout=timeout,
1035 )
1037 return response
1039 def get_model(
1040 self,
1041 model_ref: Union[ModelReference, str],
1042 retry: retries.Retry = DEFAULT_RETRY,
1043 timeout: TimeoutType = DEFAULT_TIMEOUT,
1044 ) -> Model:
1045 """[Beta] Fetch the model referenced by ``model_ref``.
1047 Args:
1048 model_ref (Union[ \
1049 google.cloud.bigquery.model.ModelReference, \
1050 str, \
1051 ]):
1052 A reference to the model to fetch from the BigQuery API.
1053 If a string is passed in, this method attempts to create a
1054 model reference from a string using
1055 :func:`google.cloud.bigquery.model.ModelReference.from_string`.
1056 retry (Optional[google.api_core.retry.Retry]):
1057 How to retry the RPC.
1058 timeout (Optional[float]):
1059 The number of seconds to wait for the underlying HTTP transport
1060 before using ``retry``.
1062 Returns:
1063 google.cloud.bigquery.model.Model: A ``Model`` instance.
1064 """
1065 if isinstance(model_ref, str):
1066 model_ref = ModelReference.from_string(
1067 model_ref, default_project=self.project
1068 )
1069 path = model_ref.path
1070 span_attributes = {"path": path}
1072 api_response = self._call_api(
1073 retry,
1074 span_name="BigQuery.getModel",
1075 span_attributes=span_attributes,
1076 method="GET",
1077 path=path,
1078 timeout=timeout,
1079 )
1080 return Model.from_api_repr(api_response)
1082 def get_routine(
1083 self,
1084 routine_ref: Union[Routine, RoutineReference, str],
1085 retry: retries.Retry = DEFAULT_RETRY,
1086 timeout: TimeoutType = DEFAULT_TIMEOUT,
1087 ) -> Routine:
1088 """[Beta] Get the routine referenced by ``routine_ref``.
1090 Args:
1091 routine_ref (Union[ \
1092 google.cloud.bigquery.routine.Routine, \
1093 google.cloud.bigquery.routine.RoutineReference, \
1094 str, \
1095 ]):
1096 A reference to the routine to fetch from the BigQuery API. If
1097 a string is passed in, this method attempts to create a
1098 reference from a string using
1099 :func:`google.cloud.bigquery.routine.RoutineReference.from_string`.
1100 retry (Optional[google.api_core.retry.Retry]):
1101 How to retry the API call.
1102 timeout (Optional[float]):
1103 The number of seconds to wait for the underlying HTTP transport
1104 before using ``retry``.
1106 Returns:
1107 google.cloud.bigquery.routine.Routine:
1108 A ``Routine`` instance.
1109 """
1110 if isinstance(routine_ref, str):
1111 routine_ref = RoutineReference.from_string(
1112 routine_ref, default_project=self.project
1113 )
1114 path = routine_ref.path
1115 span_attributes = {"path": path}
1116 api_response = self._call_api(
1117 retry,
1118 span_name="BigQuery.getRoutine",
1119 span_attributes=span_attributes,
1120 method="GET",
1121 path=path,
1122 timeout=timeout,
1123 )
1124 return Routine.from_api_repr(api_response)
1126 def get_table(
1127 self,
1128 table: Union[Table, TableReference, TableListItem, str],
1129 retry: retries.Retry = DEFAULT_RETRY,
1130 timeout: TimeoutType = DEFAULT_TIMEOUT,
1131 ) -> Table:
1132 """Fetch the table referenced by ``table``.
1134 Args:
1135 table (Union[ \
1136 google.cloud.bigquery.table.Table, \
1137 google.cloud.bigquery.table.TableReference, \
1138 google.cloud.bigquery.table.TableListItem, \
1139 str, \
1140 ]):
1141 A reference to the table to fetch from the BigQuery API.
1142 If a string is passed in, this method attempts to create a
1143 table reference from a string using
1144 :func:`google.cloud.bigquery.table.TableReference.from_string`.
1145 retry (Optional[google.api_core.retry.Retry]):
1146 How to retry the RPC.
1147 timeout (Optional[float]):
1148 The number of seconds to wait for the underlying HTTP transport
1149 before using ``retry``.
1151 Returns:
1152 google.cloud.bigquery.table.Table:
1153 A ``Table`` instance.
1154 """
1155 table_ref = _table_arg_to_table_ref(table, default_project=self.project)
1156 path = table_ref.path
1157 span_attributes = {"path": path}
1158 api_response = self._call_api(
1159 retry,
1160 span_name="BigQuery.getTable",
1161 span_attributes=span_attributes,
1162 method="GET",
1163 path=path,
1164 timeout=timeout,
1165 )
1166 return Table.from_api_repr(api_response)
1168 def update_dataset(
1169 self,
1170 dataset: Dataset,
1171 fields: Sequence[str],
1172 retry: retries.Retry = DEFAULT_RETRY,
1173 timeout: TimeoutType = DEFAULT_TIMEOUT,
1174 ) -> Dataset:
1175 """Change some fields of a dataset.
1177 Use ``fields`` to specify which fields to update. At least one field
1178 must be provided. If a field is listed in ``fields`` and is ``None`` in
1179 ``dataset``, it will be deleted.
1181 If ``dataset.etag`` is not ``None``, the update will only
1182 succeed if the dataset on the server has the same ETag. Thus
1183 reading a dataset with ``get_dataset``, changing its fields,
1184 and then passing it to ``update_dataset`` will ensure that the changes
1185 will only be saved if no modifications to the dataset occurred
1186 since the read.
1188 Args:
1189 dataset (google.cloud.bigquery.dataset.Dataset):
1190 The dataset to update.
1191 fields (Sequence[str]):
1192 The properties of ``dataset`` to change. These are strings
1193 corresponding to the properties of
1194 :class:`~google.cloud.bigquery.dataset.Dataset`.
1196 For example, to update the default expiration times, specify
1197 both properties in the ``fields`` argument:
1199 .. code-block:: python
1201 bigquery_client.update_dataset(
1202 dataset,
1203 [
1204 "default_partition_expiration_ms",
1205 "default_table_expiration_ms",
1206 ]
1207 )
1208 retry (Optional[google.api_core.retry.Retry]):
1209 How to retry the RPC.
1210 timeout (Optional[float]):
1211 The number of seconds to wait for the underlying HTTP transport
1212 before using ``retry``.
1214 Returns:
1215 google.cloud.bigquery.dataset.Dataset:
1216 The modified ``Dataset`` instance.
1217 """
1218 partial = dataset._build_resource(fields)
1219 if dataset.etag is not None:
1220 headers: Optional[Dict[str, str]] = {"If-Match": dataset.etag}
1221 else:
1222 headers = None
1223 path = dataset.path
1224 span_attributes = {"path": path, "fields": fields}
1226 api_response = self._call_api(
1227 retry,
1228 span_name="BigQuery.updateDataset",
1229 span_attributes=span_attributes,
1230 method="PATCH",
1231 path=path,
1232 data=partial,
1233 headers=headers,
1234 timeout=timeout,
1235 )
1236 return Dataset.from_api_repr(api_response)
1238 def update_model(
1239 self,
1240 model: Model,
1241 fields: Sequence[str],
1242 retry: retries.Retry = DEFAULT_RETRY,
1243 timeout: TimeoutType = DEFAULT_TIMEOUT,
1244 ) -> Model:
1245 """[Beta] Change some fields of a model.
1247 Use ``fields`` to specify which fields to update. At least one field
1248 must be provided. If a field is listed in ``fields`` and is ``None``
1249 in ``model``, the field value will be deleted.
1251 If ``model.etag`` is not ``None``, the update will only succeed if
1252 the model on the server has the same ETag. Thus reading a model with
1253 ``get_model``, changing its fields, and then passing it to
1254 ``update_model`` will ensure that the changes will only be saved if
1255 no modifications to the model occurred since the read.
1257 Args:
1258 model (google.cloud.bigquery.model.Model): The model to update.
1259 fields (Sequence[str]):
1260 The properties of ``model`` to change. These are strings
1261 corresponding to the properties of
1262 :class:`~google.cloud.bigquery.model.Model`.
1264 For example, to update the descriptive properties of the model,
1265 specify them in the ``fields`` argument:
1267 .. code-block:: python
1269 bigquery_client.update_model(
1270 model, ["description", "friendly_name"]
1271 )
1272 retry (Optional[google.api_core.retry.Retry]):
1273 A description of how to retry the API call.
1274 timeout (Optional[float]):
1275 The number of seconds to wait for the underlying HTTP transport
1276 before using ``retry``.
1278 Returns:
1279 google.cloud.bigquery.model.Model:
1280 The model resource returned from the API call.
1281 """
1282 partial = model._build_resource(fields)
1283 if model.etag:
1284 headers: Optional[Dict[str, str]] = {"If-Match": model.etag}
1285 else:
1286 headers = None
1287 path = model.path
1288 span_attributes = {"path": path, "fields": fields}
1290 api_response = self._call_api(
1291 retry,
1292 span_name="BigQuery.updateModel",
1293 span_attributes=span_attributes,
1294 method="PATCH",
1295 path=path,
1296 data=partial,
1297 headers=headers,
1298 timeout=timeout,
1299 )
1300 return Model.from_api_repr(api_response)
1302 def update_routine(
1303 self,
1304 routine: Routine,
1305 fields: Sequence[str],
1306 retry: retries.Retry = DEFAULT_RETRY,
1307 timeout: TimeoutType = DEFAULT_TIMEOUT,
1308 ) -> Routine:
1309 """[Beta] Change some fields of a routine.
1311 Use ``fields`` to specify which fields to update. At least one field
1312 must be provided. If a field is listed in ``fields`` and is ``None``
1313 in ``routine``, the field value will be deleted.
1315 .. warning::
1316 During beta, partial updates are not supported. You must provide
1317 all fields in the resource.
1319 If :attr:`~google.cloud.bigquery.routine.Routine.etag` is not
1320 ``None``, the update will only succeed if the resource on the server
1321 has the same ETag. Thus reading a routine with
1322 :func:`~google.cloud.bigquery.client.Client.get_routine`, changing
1323 its fields, and then passing it to this method will ensure that the
1324 changes will only be saved if no modifications to the resource
1325 occurred since the read.
1327 Args:
1328 routine (google.cloud.bigquery.routine.Routine):
1329 The routine to update.
1330 fields (Sequence[str]):
1331 The fields of ``routine`` to change, spelled as the
1332 :class:`~google.cloud.bigquery.routine.Routine` properties.
1334 For example, to update the description property of the routine,
1335 specify it in the ``fields`` argument:
1337 .. code-block:: python
1339 bigquery_client.update_routine(
1340 routine, ["description"]
1341 )
1342 retry (Optional[google.api_core.retry.Retry]):
1343 A description of how to retry the API call.
1344 timeout (Optional[float]):
1345 The number of seconds to wait for the underlying HTTP transport
1346 before using ``retry``.
1348 Returns:
1349 google.cloud.bigquery.routine.Routine:
1350 The routine resource returned from the API call.
1351 """
1352 partial = routine._build_resource(fields)
1353 if routine.etag:
1354 headers: Optional[Dict[str, str]] = {"If-Match": routine.etag}
1355 else:
1356 headers = None
1358 # TODO: remove when routines update supports partial requests.
1359 partial["routineReference"] = routine.reference.to_api_repr()
1361 path = routine.path
1362 span_attributes = {"path": path, "fields": fields}
1364 api_response = self._call_api(
1365 retry,
1366 span_name="BigQuery.updateRoutine",
1367 span_attributes=span_attributes,
1368 method="PUT",
1369 path=path,
1370 data=partial,
1371 headers=headers,
1372 timeout=timeout,
1373 )
1374 return Routine.from_api_repr(api_response)
1376 def update_table(
1377 self,
1378 table: Table,
1379 fields: Sequence[str],
1380 retry: retries.Retry = DEFAULT_RETRY,
1381 timeout: TimeoutType = DEFAULT_TIMEOUT,
1382 ) -> Table:
1383 """Change some fields of a table.
1385 Use ``fields`` to specify which fields to update. At least one field
1386 must be provided. If a field is listed in ``fields`` and is ``None``
1387 in ``table``, the field value will be deleted.
1389 If ``table.etag`` is not ``None``, the update will only succeed if
1390 the table on the server has the same ETag. Thus reading a table with
1391 ``get_table``, changing its fields, and then passing it to
1392 ``update_table`` will ensure that the changes will only be saved if
1393 no modifications to the table occurred since the read.
1395 Args:
1396 table (google.cloud.bigquery.table.Table): The table to update.
1397 fields (Sequence[str]):
1398 The fields of ``table`` to change, spelled as the
1399 :class:`~google.cloud.bigquery.table.Table` properties.
1401 For example, to update the descriptive properties of the table,
1402 specify them in the ``fields`` argument:
1404 .. code-block:: python
1406 bigquery_client.update_table(
1407 table,
1408 ["description", "friendly_name"]
1409 )
1410 retry (Optional[google.api_core.retry.Retry]):
1411 A description of how to retry the API call.
1412 timeout (Optional[float]):
1413 The number of seconds to wait for the underlying HTTP transport
1414 before using ``retry``.
1416 Returns:
1417 google.cloud.bigquery.table.Table:
1418 The table resource returned from the API call.
1419 """
1420 partial = table._build_resource(fields)
1421 if table.etag is not None:
1422 headers: Optional[Dict[str, str]] = {"If-Match": table.etag}
1423 else:
1424 headers = None
1426 path = table.path
1427 span_attributes = {"path": path, "fields": fields}
1429 api_response = self._call_api(
1430 retry,
1431 span_name="BigQuery.updateTable",
1432 span_attributes=span_attributes,
1433 method="PATCH",
1434 path=path,
1435 data=partial,
1436 headers=headers,
1437 timeout=timeout,
1438 )
1439 return Table.from_api_repr(api_response)
1441 def list_models(
1442 self,
1443 dataset: Union[Dataset, DatasetReference, DatasetListItem, str],
1444 max_results: Optional[int] = None,
1445 page_token: Optional[str] = None,
1446 retry: retries.Retry = DEFAULT_RETRY,
1447 timeout: TimeoutType = DEFAULT_TIMEOUT,
1448 page_size: Optional[int] = None,
1449 ) -> page_iterator.Iterator:
1450 """[Beta] List models in the dataset.
1452 See
1453 https://cloud.google.com/bigquery/docs/reference/rest/v2/models/list
1455 Args:
1456 dataset (Union[ \
1457 google.cloud.bigquery.dataset.Dataset, \
1458 google.cloud.bigquery.dataset.DatasetReference, \
1459 google.cloud.bigquery.dataset.DatasetListItem, \
1460 str, \
1461 ]):
1462 A reference to the dataset whose models to list from the
1463 BigQuery API. If a string is passed in, this method attempts
1464 to create a dataset reference from a string using
1465 :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`.
1466 max_results (Optional[int]):
1467 Maximum number of models to return. Defaults to a
1468 value set by the API.
1469 page_token (Optional[str]):
1470 Token representing a cursor into the models. If not passed,
1471 the API will return the first page of models. The token marks
1472 the beginning of the iterator to be returned and the value of
1473 the ``page_token`` can be accessed at ``next_page_token`` of the
1474 :class:`~google.api_core.page_iterator.HTTPIterator`.
1475 retry (Optional[google.api_core.retry.Retry]):
1476 How to retry the RPC.
1477 timeout (Optional[float]):
1478 The number of seconds to wait for the underlying HTTP transport
1479 before using ``retry``.
1480 page_size (Optional[int]):
1481 Maximum number of models to return per page.
1482 Defaults to a value set by the API.
1484 Returns:
1485 google.api_core.page_iterator.Iterator:
1486 Iterator of
1487 :class:`~google.cloud.bigquery.model.Model` contained
1488 within the requested dataset.
1489 """
1490 dataset = self._dataset_from_arg(dataset)
1492 path = "%s/models" % dataset.path
1493 span_attributes = {"path": path}
1495 def api_request(*args, **kwargs):
1496 return self._call_api(
1497 retry,
1498 span_name="BigQuery.listModels",
1499 span_attributes=span_attributes,
1500 *args,
1501 timeout=timeout,
1502 **kwargs,
1503 )
1505 result = page_iterator.HTTPIterator(
1506 client=self,
1507 api_request=api_request,
1508 path=path,
1509 item_to_value=_item_to_model,
1510 items_key="models",
1511 page_token=page_token,
1512 max_results=max_results,
1513 page_size=page_size,
1514 )
1515 result.dataset = dataset # type: ignore
1516 return result
1518 def list_routines(
1519 self,
1520 dataset: Union[Dataset, DatasetReference, DatasetListItem, str],
1521 max_results: Optional[int] = None,
1522 page_token: Optional[str] = None,
1523 retry: retries.Retry = DEFAULT_RETRY,
1524 timeout: TimeoutType = DEFAULT_TIMEOUT,
1525 page_size: Optional[int] = None,
1526 ) -> page_iterator.Iterator:
1527 """[Beta] List routines in the dataset.
1529 See
1530 https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/list
1532 Args:
1533 dataset (Union[ \
1534 google.cloud.bigquery.dataset.Dataset, \
1535 google.cloud.bigquery.dataset.DatasetReference, \
1536 google.cloud.bigquery.dataset.DatasetListItem, \
1537 str, \
1538 ]):
1539 A reference to the dataset whose routines to list from the
1540 BigQuery API. If a string is passed in, this method attempts
1541 to create a dataset reference from a string using
1542 :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`.
1543 max_results (Optional[int]):
1544 Maximum number of routines to return. Defaults
1545 to a value set by the API.
1546 page_token (Optional[str]):
1547 Token representing a cursor into the routines. If not passed,
1548 the API will return the first page of routines. The token marks
1549 the beginning of the iterator to be returned and the value of the
1550 ``page_token`` can be accessed at ``next_page_token`` of the
1551 :class:`~google.api_core.page_iterator.HTTPIterator`.
1552 retry (Optional[google.api_core.retry.Retry]):
1553 How to retry the RPC.
1554 timeout (Optional[float]):
1555 The number of seconds to wait for the underlying HTTP transport
1556 before using ``retry``.
1557 page_size (Optional[int]):
1558 Maximum number of routines to return per page.
1559 Defaults to a value set by the API.
1561 Returns:
1562 google.api_core.page_iterator.Iterator:
1563 Iterator of all
1564 :class:`~google.cloud.bigquery.routine.Routine`s contained
1565 within the requested dataset, limited by ``max_results``.
1566 """
1567 dataset = self._dataset_from_arg(dataset)
1568 path = "{}/routines".format(dataset.path)
1570 span_attributes = {"path": path}
1572 def api_request(*args, **kwargs):
1573 return self._call_api(
1574 retry,
1575 span_name="BigQuery.listRoutines",
1576 span_attributes=span_attributes,
1577 *args,
1578 timeout=timeout,
1579 **kwargs,
1580 )
1582 result = page_iterator.HTTPIterator(
1583 client=self,
1584 api_request=api_request,
1585 path=path,
1586 item_to_value=_item_to_routine,
1587 items_key="routines",
1588 page_token=page_token,
1589 max_results=max_results,
1590 page_size=page_size,
1591 )
1592 result.dataset = dataset # type: ignore
1593 return result
1595 def list_tables(
1596 self,
1597 dataset: Union[Dataset, DatasetReference, DatasetListItem, str],
1598 max_results: Optional[int] = None,
1599 page_token: Optional[str] = None,
1600 retry: retries.Retry = DEFAULT_RETRY,
1601 timeout: TimeoutType = DEFAULT_TIMEOUT,
1602 page_size: Optional[int] = None,
1603 ) -> page_iterator.Iterator:
1604 """List tables in the dataset.
1606 See
1607 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list
1609 Args:
1610 dataset (Union[ \
1611 google.cloud.bigquery.dataset.Dataset, \
1612 google.cloud.bigquery.dataset.DatasetReference, \
1613 google.cloud.bigquery.dataset.DatasetListItem, \
1614 str, \
1615 ]):
1616 A reference to the dataset whose tables to list from the
1617 BigQuery API. If a string is passed in, this method attempts
1618 to create a dataset reference from a string using
1619 :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`.
1620 max_results (Optional[int]):
1621 Maximum number of tables to return. Defaults
1622 to a value set by the API.
1623 page_token (Optional[str]):
1624 Token representing a cursor into the tables. If not passed,
1625 the API will return the first page of tables. The token marks
1626 the beginning of the iterator to be returned and the value of
1627 the ``page_token`` can be accessed at ``next_page_token`` of the
1628 :class:`~google.api_core.page_iterator.HTTPIterator`.
1629 retry (Optional[google.api_core.retry.Retry]):
1630 How to retry the RPC.
1631 timeout (Optional[float]):
1632 The number of seconds to wait for the underlying HTTP transport
1633 before using ``retry``.
1634 page_size (Optional[int]):
1635 Maximum number of tables to return per page.
1636 Defaults to a value set by the API.
1638 Returns:
1639 google.api_core.page_iterator.Iterator:
1640 Iterator of
1641 :class:`~google.cloud.bigquery.table.TableListItem` contained
1642 within the requested dataset.
1643 """
1644 dataset = self._dataset_from_arg(dataset)
1645 path = "%s/tables" % dataset.path
1646 span_attributes = {"path": path}
1648 def api_request(*args, **kwargs):
1649 return self._call_api(
1650 retry,
1651 span_name="BigQuery.listTables",
1652 span_attributes=span_attributes,
1653 *args,
1654 timeout=timeout,
1655 **kwargs,
1656 )
1658 result = page_iterator.HTTPIterator(
1659 client=self,
1660 api_request=api_request,
1661 path=path,
1662 item_to_value=_item_to_table,
1663 items_key="tables",
1664 page_token=page_token,
1665 max_results=max_results,
1666 page_size=page_size,
1667 )
1668 result.dataset = dataset # type: ignore
1669 return result
1671 def delete_dataset(
1672 self,
1673 dataset: Union[Dataset, DatasetReference, DatasetListItem, str],
1674 delete_contents: bool = False,
1675 retry: retries.Retry = DEFAULT_RETRY,
1676 timeout: TimeoutType = DEFAULT_TIMEOUT,
1677 not_found_ok: bool = False,
1678 ) -> None:
1679 """Delete a dataset.
1681 See
1682 https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete
1684 Args:
1685 dataset (Union[ \
1686 google.cloud.bigquery.dataset.Dataset, \
1687 google.cloud.bigquery.dataset.DatasetReference, \
1688 google.cloud.bigquery.dataset.DatasetListItem, \
1689 str, \
1690 ]):
1691 A reference to the dataset to delete. If a string is passed
1692 in, this method attempts to create a dataset reference from a
1693 string using
1694 :func:`google.cloud.bigquery.dataset.DatasetReference.from_string`.
1695 delete_contents (Optional[bool]):
1696 If True, delete all the tables in the dataset. If False and
1697 the dataset contains tables, the request will fail.
1698 Default is False.
1699 retry (Optional[google.api_core.retry.Retry]):
1700 How to retry the RPC.
1701 timeout (Optional[float]):
1702 The number of seconds to wait for the underlying HTTP transport
1703 before using ``retry``.
1704 not_found_ok (Optional[bool]):
1705 Defaults to ``False``. If ``True``, ignore "not found" errors
1706 when deleting the dataset.
1707 """
1708 dataset = self._dataset_from_arg(dataset)
1709 params = {}
1710 path = dataset.path
1711 if delete_contents:
1712 params["deleteContents"] = "true"
1713 span_attributes = {"path": path, "deleteContents": delete_contents}
1714 else:
1715 span_attributes = {"path": path}
1717 try:
1718 self._call_api(
1719 retry,
1720 span_name="BigQuery.deleteDataset",
1721 span_attributes=span_attributes,
1722 method="DELETE",
1723 path=path,
1724 query_params=params,
1725 timeout=timeout,
1726 )
1727 except core_exceptions.NotFound:
1728 if not not_found_ok:
1729 raise
1731 def delete_model(
1732 self,
1733 model: Union[Model, ModelReference, str],
1734 retry: retries.Retry = DEFAULT_RETRY,
1735 timeout: TimeoutType = DEFAULT_TIMEOUT,
1736 not_found_ok: bool = False,
1737 ) -> None:
1738 """[Beta] Delete a model
1740 See
1741 https://cloud.google.com/bigquery/docs/reference/rest/v2/models/delete
1743 Args:
1744 model (Union[ \
1745 google.cloud.bigquery.model.Model, \
1746 google.cloud.bigquery.model.ModelReference, \
1747 str, \
1748 ]):
1749 A reference to the model to delete. If a string is passed in,
1750 this method attempts to create a model reference from a
1751 string using
1752 :func:`google.cloud.bigquery.model.ModelReference.from_string`.
1753 retry (Optional[google.api_core.retry.Retry]):
1754 How to retry the RPC.
1755 timeout (Optional[float]):
1756 The number of seconds to wait for the underlying HTTP transport
1757 before using ``retry``.
1758 not_found_ok (Optional[bool]):
1759 Defaults to ``False``. If ``True``, ignore "not found" errors
1760 when deleting the model.
1761 """
1762 if isinstance(model, str):
1763 model = ModelReference.from_string(model, default_project=self.project)
1765 if not isinstance(model, (Model, ModelReference)):
1766 raise TypeError("model must be a Model or a ModelReference")
1768 path = model.path
1769 try:
1770 span_attributes = {"path": path}
1771 self._call_api(
1772 retry,
1773 span_name="BigQuery.deleteModel",
1774 span_attributes=span_attributes,
1775 method="DELETE",
1776 path=path,
1777 timeout=timeout,
1778 )
1779 except core_exceptions.NotFound:
1780 if not not_found_ok:
1781 raise
1783 def delete_job_metadata(
1784 self,
1785 job_id: Union[str, LoadJob, CopyJob, ExtractJob, QueryJob],
1786 project: Optional[str] = None,
1787 location: Optional[str] = None,
1788 retry: retries.Retry = DEFAULT_RETRY,
1789 timeout: TimeoutType = DEFAULT_TIMEOUT,
1790 not_found_ok: bool = False,
1791 ):
1792 """[Beta] Delete job metadata from job history.
1794 Note: This does not stop a running job. Use
1795 :func:`~google.cloud.bigquery.client.Client.cancel_job` instead.
1797 Args:
1798 job_id (Union[ \
1799 str, \
1800 LoadJob, \
1801 CopyJob, \
1802 ExtractJob, \
1803 QueryJob \
1804 ]): Job or job identifier.
1805 project (Optional[str]):
1806 ID of the project which owns the job (defaults to the client's project).
1807 location (Optional[str]):
1808 Location where the job was run. Ignored if ``job_id`` is a job
1809 object.
1810 retry (Optional[google.api_core.retry.Retry]):
1811 How to retry the RPC.
1812 timeout (Optional[float]):
1813 The number of seconds to wait for the underlying HTTP transport
1814 before using ``retry``.
1815 not_found_ok (Optional[bool]):
1816 Defaults to ``False``. If ``True``, ignore "not found" errors
1817 when deleting the job.
1818 """
1819 extra_params = {}
1821 project, location, job_id = _extract_job_reference(
1822 job_id, project=project, location=location
1823 )
1825 if project is None:
1826 project = self.project
1828 if location is None:
1829 location = self.location
1831 # Location is always required for jobs.delete()
1832 extra_params["location"] = location
1834 path = f"/projects/{project}/jobs/{job_id}/delete"
1836 span_attributes = {"path": path, "job_id": job_id, "location": location}
1838 try:
1839 self._call_api(
1840 retry,
1841 span_name="BigQuery.deleteJob",
1842 span_attributes=span_attributes,
1843 method="DELETE",
1844 path=path,
1845 query_params=extra_params,
1846 timeout=timeout,
1847 )
1848 except google.api_core.exceptions.NotFound:
1849 if not not_found_ok:
1850 raise
1852 def delete_routine(
1853 self,
1854 routine: Union[Routine, RoutineReference, str],
1855 retry: retries.Retry = DEFAULT_RETRY,
1856 timeout: TimeoutType = DEFAULT_TIMEOUT,
1857 not_found_ok: bool = False,
1858 ) -> None:
1859 """[Beta] Delete a routine.
1861 See
1862 https://cloud.google.com/bigquery/docs/reference/rest/v2/routines/delete
1864 Args:
1865 routine (Union[ \
1866 google.cloud.bigquery.routine.Routine, \
1867 google.cloud.bigquery.routine.RoutineReference, \
1868 str, \
1869 ]):
1870 A reference to the routine to delete. If a string is passed
1871 in, this method attempts to create a routine reference from a
1872 string using
1873 :func:`google.cloud.bigquery.routine.RoutineReference.from_string`.
1874 retry (Optional[google.api_core.retry.Retry]):
1875 How to retry the RPC.
1876 timeout (Optional[float]):
1877 The number of seconds to wait for the underlying HTTP transport
1878 before using ``retry``.
1879 not_found_ok (Optional[bool]):
1880 Defaults to ``False``. If ``True``, ignore "not found" errors
1881 when deleting the routine.
1882 """
1883 if isinstance(routine, str):
1884 routine = RoutineReference.from_string(
1885 routine, default_project=self.project
1886 )
1887 path = routine.path
1889 if not isinstance(routine, (Routine, RoutineReference)):
1890 raise TypeError("routine must be a Routine or a RoutineReference")
1892 try:
1893 span_attributes = {"path": path}
1894 self._call_api(
1895 retry,
1896 span_name="BigQuery.deleteRoutine",
1897 span_attributes=span_attributes,
1898 method="DELETE",
1899 path=path,
1900 timeout=timeout,
1901 )
1902 except core_exceptions.NotFound:
1903 if not not_found_ok:
1904 raise
1906 def delete_table(
1907 self,
1908 table: Union[Table, TableReference, TableListItem, str],
1909 retry: retries.Retry = DEFAULT_RETRY,
1910 timeout: TimeoutType = DEFAULT_TIMEOUT,
1911 not_found_ok: bool = False,
1912 ) -> None:
1913 """Delete a table
1915 See
1916 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/delete
1918 Args:
1919 table (Union[ \
1920 google.cloud.bigquery.table.Table, \
1921 google.cloud.bigquery.table.TableReference, \
1922 google.cloud.bigquery.table.TableListItem, \
1923 str, \
1924 ]):
1925 A reference to the table to delete. If a string is passed in,
1926 this method attempts to create a table reference from a
1927 string using
1928 :func:`google.cloud.bigquery.table.TableReference.from_string`.
1929 retry (Optional[google.api_core.retry.Retry]):
1930 How to retry the RPC.
1931 timeout (Optional[float]):
1932 The number of seconds to wait for the underlying HTTP transport
1933 before using ``retry``.
1934 not_found_ok (Optional[bool]):
1935 Defaults to ``False``. If ``True``, ignore "not found" errors
1936 when deleting the table.
1937 """
1938 table = _table_arg_to_table_ref(table, default_project=self.project)
1939 if not isinstance(table, TableReference):
1940 raise TypeError("Unable to get TableReference for table '{}'".format(table))
1942 try:
1943 path = table.path
1944 span_attributes = {"path": path}
1945 self._call_api(
1946 retry,
1947 span_name="BigQuery.deleteTable",
1948 span_attributes=span_attributes,
1949 method="DELETE",
1950 path=path,
1951 timeout=timeout,
1952 )
1953 except core_exceptions.NotFound:
1954 if not not_found_ok:
1955 raise
1957 def _get_query_results(
1958 self,
1959 job_id: str,
1960 retry: retries.Retry,
1961 project: Optional[str] = None,
1962 timeout_ms: Optional[int] = None,
1963 location: Optional[str] = None,
1964 timeout: TimeoutType = DEFAULT_TIMEOUT,
1965 ) -> _QueryResults:
1966 """Get the query results object for a query job.
1968 Args:
1969 job_id (str): Name of the query job.
1970 retry (google.api_core.retry.Retry):
1971 How to retry the RPC.
1972 project (Optional[str]):
1973 Project ID for the query job (defaults to the project of the client).
1974 timeout_ms (Optional[int]):
1975 Number of milliseconds the the API call should wait for the query
1976 to complete before the request times out.
1977 location (Optional[str]): Location of the query job.
1978 timeout (Optional[float]):
1979 The number of seconds to wait for the underlying HTTP transport
1980 before using ``retry``. If set, this connection timeout may be
1981 increased to a minimum value. This prevents retries on what
1982 would otherwise be a successful response.
1984 Returns:
1985 google.cloud.bigquery.query._QueryResults:
1986 A new ``_QueryResults`` instance.
1987 """
1989 extra_params: Dict[str, Any] = {"maxResults": 0}
1991 if timeout is not None:
1992 if not isinstance(timeout, (int, float)):
1993 timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT
1994 else:
1995 timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT)
1997 if project is None:
1998 project = self.project
2000 if timeout_ms is not None:
2001 extra_params["timeoutMs"] = timeout_ms
2003 if location is None:
2004 location = self.location
2006 if location is not None:
2007 extra_params["location"] = location
2009 path = "/projects/{}/queries/{}".format(project, job_id)
2011 # This call is typically made in a polling loop that checks whether the
2012 # job is complete (from QueryJob.done(), called ultimately from
2013 # QueryJob.result()). So we don't need to poll here.
2014 span_attributes = {"path": path}
2015 resource = self._call_api(
2016 retry,
2017 span_name="BigQuery.getQueryResults",
2018 span_attributes=span_attributes,
2019 method="GET",
2020 path=path,
2021 query_params=extra_params,
2022 timeout=timeout,
2023 )
2024 return _QueryResults.from_api_repr(resource)
2026 def job_from_resource(
2027 self, resource: dict
2028 ) -> Union[job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob]:
2029 """Detect correct job type from resource and instantiate.
2031 Args:
2032 resource (Dict): one job resource from API response
2034 Returns:
2035 Union[job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob]:
2036 The job instance, constructed via the resource.
2037 """
2038 config = resource.get("configuration", {})
2039 if "load" in config:
2040 return job.LoadJob.from_api_repr(resource, self)
2041 elif "copy" in config:
2042 return job.CopyJob.from_api_repr(resource, self)
2043 elif "extract" in config:
2044 return job.ExtractJob.from_api_repr(resource, self)
2045 elif "query" in config:
2046 return job.QueryJob.from_api_repr(resource, self)
2047 return job.UnknownJob.from_api_repr(resource, self)
2049 def create_job(
2050 self,
2051 job_config: dict,
2052 retry: retries.Retry = DEFAULT_RETRY,
2053 timeout: TimeoutType = DEFAULT_TIMEOUT,
2054 ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]:
2055 """Create a new job.
2057 Args:
2058 job_config (dict): configuration job representation returned from the API.
2059 retry (Optional[google.api_core.retry.Retry]): How to retry the RPC.
2060 timeout (Optional[float]):
2061 The number of seconds to wait for the underlying HTTP transport
2062 before using ``retry``.
2064 Returns:
2065 Union[ \
2066 google.cloud.bigquery.job.LoadJob, \
2067 google.cloud.bigquery.job.CopyJob, \
2068 google.cloud.bigquery.job.ExtractJob, \
2069 google.cloud.bigquery.job.QueryJob \
2070 ]:
2071 A new job instance.
2072 """
2074 if "load" in job_config:
2075 load_job_config = google.cloud.bigquery.job.LoadJobConfig.from_api_repr(
2076 job_config
2077 )
2078 destination = _get_sub_prop(job_config, ["load", "destinationTable"])
2079 source_uris = _get_sub_prop(job_config, ["load", "sourceUris"])
2080 destination = TableReference.from_api_repr(destination)
2081 return self.load_table_from_uri(
2082 source_uris,
2083 destination,
2084 job_config=typing.cast(LoadJobConfig, load_job_config),
2085 retry=retry,
2086 timeout=timeout,
2087 )
2088 elif "copy" in job_config:
2089 copy_job_config = google.cloud.bigquery.job.CopyJobConfig.from_api_repr(
2090 job_config
2091 )
2092 destination = _get_sub_prop(job_config, ["copy", "destinationTable"])
2093 destination = TableReference.from_api_repr(destination)
2094 return self.copy_table(
2095 [], # Source table(s) already in job_config resource.
2096 destination,
2097 job_config=typing.cast(CopyJobConfig, copy_job_config),
2098 retry=retry,
2099 timeout=timeout,
2100 )
2101 elif "extract" in job_config:
2102 extract_job_config = (
2103 google.cloud.bigquery.job.ExtractJobConfig.from_api_repr(job_config)
2104 )
2105 source = _get_sub_prop(job_config, ["extract", "sourceTable"])
2106 if source:
2107 source_type = "Table"
2108 source = TableReference.from_api_repr(source)
2109 else:
2110 source = _get_sub_prop(job_config, ["extract", "sourceModel"])
2111 source_type = "Model"
2112 source = ModelReference.from_api_repr(source)
2113 destination_uris = _get_sub_prop(job_config, ["extract", "destinationUris"])
2114 return self.extract_table(
2115 source,
2116 destination_uris,
2117 job_config=typing.cast(ExtractJobConfig, extract_job_config),
2118 retry=retry,
2119 timeout=timeout,
2120 source_type=source_type,
2121 )
2122 elif "query" in job_config:
2123 query_job_config = google.cloud.bigquery.job.QueryJobConfig.from_api_repr(
2124 job_config
2125 )
2126 query = _get_sub_prop(job_config, ["query", "query"])
2127 return self.query(
2128 query,
2129 job_config=typing.cast(QueryJobConfig, query_job_config),
2130 retry=retry,
2131 timeout=timeout,
2132 )
2133 else:
2134 raise TypeError("Invalid job configuration received.")
2136 def get_job(
2137 self,
2138 job_id: Union[str, job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob],
2139 project: Optional[str] = None,
2140 location: Optional[str] = None,
2141 retry: retries.Retry = DEFAULT_RETRY,
2142 timeout: TimeoutType = DEFAULT_TIMEOUT,
2143 ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]:
2144 """Fetch a job for the project associated with this client.
2146 See
2147 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get
2149 Args:
2150 job_id (Union[ \
2151 str, \
2152 job.LoadJob, \
2153 job.CopyJob, \
2154 job.ExtractJob, \
2155 job.QueryJob \
2156 ]):
2157 Job identifier.
2158 project (Optional[str]):
2159 ID of the project which owns the job (defaults to the client's project).
2160 location (Optional[str]):
2161 Location where the job was run. Ignored if ``job_id`` is a job
2162 object.
2163 retry (Optional[google.api_core.retry.Retry]):
2164 How to retry the RPC.
2165 timeout (Optional[float]):
2166 The number of seconds to wait for the underlying HTTP transport
2167 before using ``retry``.
2169 Returns:
2170 Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]:
2171 Job instance, based on the resource returned by the API.
2172 """
2173 extra_params = {"projection": "full"}
2175 project, location, job_id = _extract_job_reference(
2176 job_id, project=project, location=location
2177 )
2179 if project is None:
2180 project = self.project
2182 if location is None:
2183 location = self.location
2185 if location is not None:
2186 extra_params["location"] = location
2188 path = "/projects/{}/jobs/{}".format(project, job_id)
2190 span_attributes = {"path": path, "job_id": job_id, "location": location}
2192 resource = self._call_api(
2193 retry,
2194 span_name="BigQuery.getJob",
2195 span_attributes=span_attributes,
2196 method="GET",
2197 path=path,
2198 query_params=extra_params,
2199 timeout=timeout,
2200 )
2202 return self.job_from_resource(resource)
2204 def cancel_job(
2205 self,
2206 job_id: str,
2207 project: Optional[str] = None,
2208 location: Optional[str] = None,
2209 retry: retries.Retry = DEFAULT_RETRY,
2210 timeout: TimeoutType = DEFAULT_TIMEOUT,
2211 ) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]:
2212 """Attempt to cancel a job from a job ID.
2214 See
2215 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/cancel
2217 Args:
2218 job_id (Union[ \
2219 str, \
2220 google.cloud.bigquery.job.LoadJob, \
2221 google.cloud.bigquery.job.CopyJob, \
2222 google.cloud.bigquery.job.ExtractJob, \
2223 google.cloud.bigquery.job.QueryJob \
2224 ]): Job identifier.
2225 project (Optional[str]):
2226 ID of the project which owns the job (defaults to the client's project).
2227 location (Optional[str]):
2228 Location where the job was run. Ignored if ``job_id`` is a job
2229 object.
2230 retry (Optional[google.api_core.retry.Retry]):
2231 How to retry the RPC.
2232 timeout (Optional[float]):
2233 The number of seconds to wait for the underlying HTTP transport
2234 before using ``retry``.
2236 Returns:
2237 Union[ \
2238 google.cloud.bigquery.job.LoadJob, \
2239 google.cloud.bigquery.job.CopyJob, \
2240 google.cloud.bigquery.job.ExtractJob, \
2241 google.cloud.bigquery.job.QueryJob, \
2242 ]:
2243 Job instance, based on the resource returned by the API.
2244 """
2245 extra_params = {"projection": "full"}
2247 project, location, job_id = _extract_job_reference(
2248 job_id, project=project, location=location
2249 )
2251 if project is None:
2252 project = self.project
2254 if location is None:
2255 location = self.location
2257 if location is not None:
2258 extra_params["location"] = location
2260 path = "/projects/{}/jobs/{}/cancel".format(project, job_id)
2262 span_attributes = {"path": path, "job_id": job_id, "location": location}
2264 resource = self._call_api(
2265 retry,
2266 span_name="BigQuery.cancelJob",
2267 span_attributes=span_attributes,
2268 method="POST",
2269 path=path,
2270 query_params=extra_params,
2271 timeout=timeout,
2272 )
2274 job_instance = self.job_from_resource(resource["job"]) # never an UnknownJob
2276 return typing.cast(
2277 Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob],
2278 job_instance,
2279 )
2281 def list_jobs(
2282 self,
2283 project: Optional[str] = None,
2284 parent_job: Optional[Union[QueryJob, str]] = None,
2285 max_results: Optional[int] = None,
2286 page_token: Optional[str] = None,
2287 all_users: Optional[bool] = None,
2288 state_filter: Optional[str] = None,
2289 retry: retries.Retry = DEFAULT_RETRY,
2290 timeout: TimeoutType = DEFAULT_TIMEOUT,
2291 min_creation_time: Optional[datetime.datetime] = None,
2292 max_creation_time: Optional[datetime.datetime] = None,
2293 page_size: Optional[int] = None,
2294 ) -> page_iterator.Iterator:
2295 """List jobs for the project associated with this client.
2297 See
2298 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/list
2300 Args:
2301 project (Optional[str]):
2302 Project ID to use for retreiving datasets. Defaults
2303 to the client's project.
2304 parent_job (Optional[Union[ \
2305 google.cloud.bigquery.job._AsyncJob, \
2306 str, \
2307 ]]):
2308 If set, retrieve only child jobs of the specified parent.
2309 max_results (Optional[int]):
2310 Maximum number of jobs to return.
2311 page_token (Optional[str]):
2312 Opaque marker for the next "page" of jobs. If not
2313 passed, the API will return the first page of jobs. The token
2314 marks the beginning of the iterator to be returned and the
2315 value of the ``page_token`` can be accessed at
2316 ``next_page_token`` of
2317 :class:`~google.api_core.page_iterator.HTTPIterator`.
2318 all_users (Optional[bool]):
2319 If true, include jobs owned by all users in the project.
2320 Defaults to :data:`False`.
2321 state_filter (Optional[str]):
2322 If set, include only jobs matching the given state. One of:
2323 * ``"done"``
2324 * ``"pending"``
2325 * ``"running"``
2326 retry (Optional[google.api_core.retry.Retry]):
2327 How to retry the RPC.
2328 timeout (Optional[float]):
2329 The number of seconds to wait for the underlying HTTP transport
2330 before using ``retry``.
2331 min_creation_time (Optional[datetime.datetime]):
2332 Min value for job creation time. If set, only jobs created
2333 after or at this timestamp are returned. If the datetime has
2334 no time zone assumes UTC time.
2335 max_creation_time (Optional[datetime.datetime]):
2336 Max value for job creation time. If set, only jobs created
2337 before or at this timestamp are returned. If the datetime has
2338 no time zone assumes UTC time.
2339 page_size (Optional[int]):
2340 Maximum number of jobs to return per page.
2342 Returns:
2343 google.api_core.page_iterator.Iterator:
2344 Iterable of job instances.
2345 """
2346 if isinstance(parent_job, job._AsyncJob):
2347 parent_job = parent_job.job_id # pytype: disable=attribute-error
2349 extra_params = {
2350 "allUsers": all_users,
2351 "stateFilter": state_filter,
2352 "minCreationTime": _str_or_none(
2353 google.cloud._helpers._millis_from_datetime(min_creation_time)
2354 ),
2355 "maxCreationTime": _str_or_none(
2356 google.cloud._helpers._millis_from_datetime(max_creation_time)
2357 ),
2358 "projection": "full",
2359 "parentJobId": parent_job,
2360 }
2362 extra_params = {
2363 param: value for param, value in extra_params.items() if value is not None
2364 }
2366 if project is None:
2367 project = self.project
2369 path = "/projects/%s/jobs" % (project,)
2371 span_attributes = {"path": path}
2373 def api_request(*args, **kwargs):
2374 return self._call_api(
2375 retry,
2376 span_name="BigQuery.listJobs",
2377 span_attributes=span_attributes,
2378 *args,
2379 timeout=timeout,
2380 **kwargs,
2381 )
2383 return page_iterator.HTTPIterator(
2384 client=self,
2385 api_request=api_request,
2386 path=path,
2387 item_to_value=_item_to_job,
2388 items_key="jobs",
2389 page_token=page_token,
2390 max_results=max_results,
2391 extra_params=extra_params,
2392 page_size=page_size,
2393 )
2395 def load_table_from_uri(
2396 self,
2397 source_uris: Union[str, Sequence[str]],
2398 destination: Union[Table, TableReference, TableListItem, str],
2399 job_id: Optional[str] = None,
2400 job_id_prefix: Optional[str] = None,
2401 location: Optional[str] = None,
2402 project: Optional[str] = None,
2403 job_config: Optional[LoadJobConfig] = None,
2404 retry: retries.Retry = DEFAULT_RETRY,
2405 timeout: TimeoutType = DEFAULT_TIMEOUT,
2406 ) -> job.LoadJob:
2407 """Starts a job for loading data into a table from Cloud Storage.
2409 See
2410 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationload
2412 Args:
2413 source_uris (Union[str, Sequence[str]]):
2414 URIs of data files to be loaded; in format
2415 ``gs://<bucket_name>/<object_name_or_glob>``.
2416 destination (Union[ \
2417 google.cloud.bigquery.table.Table, \
2418 google.cloud.bigquery.table.TableReference, \
2419 google.cloud.bigquery.table.TableListItem, \
2420 str, \
2421 ]):
2422 Table into which data is to be loaded. If a string is passed
2423 in, this method attempts to create a table reference from a
2424 string using
2425 :func:`google.cloud.bigquery.table.TableReference.from_string`.
2426 job_id (Optional[str]): Name of the job.
2427 job_id_prefix (Optional[str]):
2428 The user-provided prefix for a randomly generated job ID.
2429 This parameter will be ignored if a ``job_id`` is also given.
2430 location (Optional[str]):
2431 Location where to run the job. Must match the location of the
2432 destination table.
2433 project (Optional[str]):
2434 Project ID of the project of where to run the job. Defaults
2435 to the client's project.
2436 job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]):
2437 Extra configuration options for the job.
2438 retry (Optional[google.api_core.retry.Retry]):
2439 How to retry the RPC.
2440 timeout (Optional[float]):
2441 The number of seconds to wait for the underlying HTTP transport
2442 before using ``retry``.
2444 Returns:
2445 google.cloud.bigquery.job.LoadJob: A new load job.
2447 Raises:
2448 TypeError:
2449 If ``job_config`` is not an instance of
2450 :class:`~google.cloud.bigquery.job.LoadJobConfig` class.
2451 """
2452 job_id = _make_job_id(job_id, job_id_prefix)
2454 if project is None:
2455 project = self.project
2457 if location is None:
2458 location = self.location
2460 job_ref = job._JobReference(job_id, project=project, location=location)
2462 if isinstance(source_uris, str):
2463 source_uris = [source_uris]
2465 destination = _table_arg_to_table_ref(destination, default_project=self.project)
2467 if job_config is not None:
2468 _verify_job_config_type(job_config, LoadJobConfig)
2469 else:
2470 job_config = job.LoadJobConfig()
2472 new_job_config = job_config._fill_from_default(self._default_load_job_config)
2474 load_job = job.LoadJob(job_ref, source_uris, destination, self, new_job_config)
2475 load_job._begin(retry=retry, timeout=timeout)
2477 return load_job
2479 def load_table_from_file(
2480 self,
2481 file_obj: IO[bytes],
2482 destination: Union[Table, TableReference, TableListItem, str],
2483 rewind: bool = False,
2484 size: Optional[int] = None,
2485 num_retries: int = _DEFAULT_NUM_RETRIES,
2486 job_id: Optional[str] = None,
2487 job_id_prefix: Optional[str] = None,
2488 location: Optional[str] = None,
2489 project: Optional[str] = None,
2490 job_config: Optional[LoadJobConfig] = None,
2491 timeout: ResumableTimeoutType = DEFAULT_TIMEOUT,
2492 ) -> job.LoadJob:
2493 """Upload the contents of this table from a file-like object.
2495 Similar to :meth:`load_table_from_uri`, this method creates, starts and
2496 returns a :class:`~google.cloud.bigquery.job.LoadJob`.
2498 Args:
2499 file_obj (IO[bytes]):
2500 A file handle opened in binary mode for reading.
2501 destination (Union[Table, \
2502 TableReference, \
2503 TableListItem, \
2504 str \
2505 ]):
2506 Table into which data is to be loaded. If a string is passed
2507 in, this method attempts to create a table reference from a
2508 string using
2509 :func:`google.cloud.bigquery.table.TableReference.from_string`.
2510 rewind (Optional[bool]):
2511 If True, seek to the beginning of the file handle before
2512 reading the file. Defaults to False.
2513 size (Optional[int]):
2514 The number of bytes to read from the file handle. If size is
2515 ``None`` or large, resumable upload will be used. Otherwise,
2516 multipart upload will be used.
2517 num_retries (Optional[int]): Number of upload retries. Defaults to 6.
2518 job_id (Optional[str]): Name of the job.
2519 job_id_prefix (Optional[str]):
2520 The user-provided prefix for a randomly generated job ID.
2521 This parameter will be ignored if a ``job_id`` is also given.
2522 location (Optional[str]):
2523 Location where to run the job. Must match the location of the
2524 destination table.
2525 project (Optional[str]):
2526 Project ID of the project of where to run the job. Defaults
2527 to the client's project.
2528 job_config (Optional[LoadJobConfig]):
2529 Extra configuration options for the job.
2530 timeout (Optional[float]):
2531 The number of seconds to wait for the underlying HTTP transport
2532 before using ``retry``. Depending on the retry strategy, a request
2533 may be repeated several times using the same timeout each time.
2534 Defaults to None.
2536 Can also be passed as a tuple (connect_timeout, read_timeout).
2537 See :meth:`requests.Session.request` documentation for details.
2539 Returns:
2540 google.cloud.bigquery.job.LoadJob: A new load job.
2542 Raises:
2543 ValueError:
2544 If ``size`` is not passed in and can not be determined, or if
2545 the ``file_obj`` can be detected to be a file opened in text
2546 mode.
2548 TypeError:
2549 If ``job_config`` is not an instance of
2550 :class:`~google.cloud.bigquery.job.LoadJobConfig` class.
2551 """
2552 job_id = _make_job_id(job_id, job_id_prefix)
2554 if project is None:
2555 project = self.project
2557 if location is None:
2558 location = self.location
2560 destination = _table_arg_to_table_ref(destination, default_project=self.project)
2561 job_ref = job._JobReference(job_id, project=project, location=location)
2563 if job_config is not None:
2564 _verify_job_config_type(job_config, LoadJobConfig)
2565 else:
2566 job_config = job.LoadJobConfig()
2568 new_job_config = job_config._fill_from_default(self._default_load_job_config)
2570 load_job = job.LoadJob(job_ref, None, destination, self, new_job_config)
2571 job_resource = load_job.to_api_repr()
2573 if rewind:
2574 file_obj.seek(0, os.SEEK_SET)
2576 _check_mode(file_obj)
2578 try:
2579 if size is None or size >= _MAX_MULTIPART_SIZE:
2580 response = self._do_resumable_upload(
2581 file_obj, job_resource, num_retries, timeout, project=project
2582 )
2583 else:
2584 response = self._do_multipart_upload(
2585 file_obj, job_resource, size, num_retries, timeout, project=project
2586 )
2587 except resumable_media.InvalidResponse as exc:
2588 raise exceptions.from_http_response(exc.response)
2590 return typing.cast(LoadJob, self.job_from_resource(response.json()))
2592 def load_table_from_dataframe(
2593 self,
2594 dataframe: "pandas.DataFrame", # type: ignore
2595 destination: Union[Table, TableReference, str],
2596 num_retries: int = _DEFAULT_NUM_RETRIES,
2597 job_id: Optional[str] = None,
2598 job_id_prefix: Optional[str] = None,
2599 location: Optional[str] = None,
2600 project: Optional[str] = None,
2601 job_config: Optional[LoadJobConfig] = None,
2602 parquet_compression: str = "snappy",
2603 timeout: ResumableTimeoutType = DEFAULT_TIMEOUT,
2604 ) -> job.LoadJob:
2605 """Upload the contents of a table from a pandas DataFrame.
2607 Similar to :meth:`load_table_from_uri`, this method creates, starts and
2608 returns a :class:`~google.cloud.bigquery.job.LoadJob`.
2610 .. note::
2612 REPEATED fields are NOT supported when using the CSV source format.
2613 They are supported when using the PARQUET source format, but
2614 due to the way they are encoded in the ``parquet`` file,
2615 a mismatch with the existing table schema can occur, so
2616 REPEATED fields are not properly supported when using ``pyarrow<4.0.0``
2617 using the parquet format.
2619 https://github.com/googleapis/python-bigquery/issues/19
2621 Args:
2622 dataframe (pandas.Dataframe):
2623 A :class:`~pandas.DataFrame` containing the data to load.
2624 destination (Union[ \
2625 Table, \
2626 TableReference, \
2627 str \
2628 ]):
2629 The destination table to use for loading the data. If it is an
2630 existing table, the schema of the :class:`~pandas.DataFrame`
2631 must match the schema of the destination table. If the table
2632 does not yet exist, the schema is inferred from the
2633 :class:`~pandas.DataFrame`.
2635 If a string is passed in, this method attempts to create a
2636 table reference from a string using
2637 :func:`google.cloud.bigquery.table.TableReference.from_string`.
2638 num_retries (Optional[int]): Number of upload retries. Defaults to 6.
2639 job_id (Optional[str]): Name of the job.
2640 job_id_prefix (Optional[str]):
2641 The user-provided prefix for a randomly generated
2642 job ID. This parameter will be ignored if a ``job_id`` is
2643 also given.
2644 location (Optional[str]):
2645 Location where to run the job. Must match the location of the
2646 destination table.
2647 project (Optional[str]):
2648 Project ID of the project of where to run the job. Defaults
2649 to the client's project.
2650 job_config (Optional[LoadJobConfig]):
2651 Extra configuration options for the job.
2653 To override the default pandas data type conversions, supply
2654 a value for
2655 :attr:`~google.cloud.bigquery.job.LoadJobConfig.schema` with
2656 column names matching those of the dataframe. The BigQuery
2657 schema is used to determine the correct data type conversion.
2658 Indexes are not loaded.
2660 By default, this method uses the parquet source format. To
2661 override this, supply a value for
2662 :attr:`~google.cloud.bigquery.job.LoadJobConfig.source_format`
2663 with the format name. Currently only
2664 :attr:`~google.cloud.bigquery.job.SourceFormat.CSV` and
2665 :attr:`~google.cloud.bigquery.job.SourceFormat.PARQUET` are
2666 supported.
2667 parquet_compression (Optional[str]):
2668 [Beta] The compression method to use if intermittently
2669 serializing ``dataframe`` to a parquet file.
2670 Defaults to "snappy".
2672 The argument is directly passed as the ``compression``
2673 argument to the underlying ``pyarrow.parquet.write_table()``
2674 method (the default value "snappy" gets converted to uppercase).
2675 https://arrow.apache.org/docs/python/generated/pyarrow.parquet.write_table.html#pyarrow-parquet-write-table
2677 If the job config schema is missing, the argument is directly
2678 passed as the ``compression`` argument to the underlying
2679 ``DataFrame.to_parquet()`` method.
2680 https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet
2681 timeout (Optional[flaot]):
2682 The number of seconds to wait for the underlying HTTP transport
2683 before using ``retry``. Depending on the retry strategy, a request may
2684 be repeated several times using the same timeout each time.
2685 Defaults to None.
2687 Can also be passed as a tuple (connect_timeout, read_timeout).
2688 See :meth:`requests.Session.request` documentation for details.
2690 Returns:
2691 google.cloud.bigquery.job.LoadJob: A new load job.
2693 Raises:
2694 ValueError:
2695 If a usable parquet engine cannot be found. This method
2696 requires :mod:`pyarrow` to be installed.
2697 TypeError:
2698 If ``job_config`` is not an instance of
2699 :class:`~google.cloud.bigquery.job.LoadJobConfig` class.
2700 """
2701 job_id = _make_job_id(job_id, job_id_prefix)
2703 if job_config is not None:
2704 _verify_job_config_type(job_config, LoadJobConfig)
2705 else:
2706 job_config = job.LoadJobConfig()
2708 new_job_config = job_config._fill_from_default(self._default_load_job_config)
2710 supported_formats = {job.SourceFormat.CSV, job.SourceFormat.PARQUET}
2711 if new_job_config.source_format is None:
2712 # default value
2713 new_job_config.source_format = job.SourceFormat.PARQUET
2715 if (
2716 new_job_config.source_format == job.SourceFormat.PARQUET
2717 and new_job_config.parquet_options is None
2718 ):
2719 parquet_options = ParquetOptions()
2720 # default value
2721 parquet_options.enable_list_inference = True
2722 new_job_config.parquet_options = parquet_options
2724 if new_job_config.source_format not in supported_formats:
2725 raise ValueError(
2726 "Got unexpected source_format: '{}'. Currently, only PARQUET and CSV are supported".format(
2727 new_job_config.source_format
2728 )
2729 )
2731 if pyarrow is None and new_job_config.source_format == job.SourceFormat.PARQUET:
2732 # pyarrow is now the only supported parquet engine.
2733 raise ValueError("This method requires pyarrow to be installed")
2735 if location is None:
2736 location = self.location
2738 # If table schema is not provided, we try to fetch the existing table
2739 # schema, and check if dataframe schema is compatible with it - except
2740 # for WRITE_TRUNCATE jobs, the existing schema does not matter then.
2741 if (
2742 not new_job_config.schema
2743 and new_job_config.write_disposition != job.WriteDisposition.WRITE_TRUNCATE
2744 ):
2745 try:
2746 table = self.get_table(destination)
2747 except core_exceptions.NotFound:
2748 pass
2749 else:
2750 columns_and_indexes = frozenset(
2751 name
2752 for name, _ in _pandas_helpers.list_columns_and_indexes(dataframe)
2753 )
2754 new_job_config.schema = [
2755 # Field description and policy tags are not needed to
2756 # serialize a data frame.
2757 SchemaField(
2758 field.name,
2759 field.field_type,
2760 mode=field.mode,
2761 fields=field.fields,
2762 )
2763 # schema fields not present in the dataframe are not needed
2764 for field in table.schema
2765 if field.name in columns_and_indexes
2766 ]
2768 new_job_config.schema = _pandas_helpers.dataframe_to_bq_schema(
2769 dataframe, new_job_config.schema
2770 )
2772 if not new_job_config.schema:
2773 # the schema could not be fully detected
2774 warnings.warn(
2775 "Schema could not be detected for all columns. Loading from a "
2776 "dataframe without a schema will be deprecated in the future, "
2777 "please provide a schema.",
2778 PendingDeprecationWarning,
2779 stacklevel=2,
2780 )
2782 tmpfd, tmppath = tempfile.mkstemp(
2783 suffix="_job_{}.{}".format(job_id[:8], new_job_config.source_format.lower())
2784 )
2785 os.close(tmpfd)
2787 try:
2788 if new_job_config.source_format == job.SourceFormat.PARQUET:
2789 if new_job_config.schema:
2790 if parquet_compression == "snappy": # adjust the default value
2791 parquet_compression = parquet_compression.upper()
2793 _pandas_helpers.dataframe_to_parquet(
2794 dataframe,
2795 new_job_config.schema,
2796 tmppath,
2797 parquet_compression=parquet_compression,
2798 parquet_use_compliant_nested_type=True,
2799 )
2800 else:
2801 dataframe.to_parquet(
2802 tmppath,
2803 engine="pyarrow",
2804 compression=parquet_compression,
2805 **(
2806 {"use_compliant_nested_type": True}
2807 if _versions_helpers.PYARROW_VERSIONS.use_compliant_nested_type
2808 else {}
2809 ),
2810 )
2812 else:
2813 dataframe.to_csv(
2814 tmppath,
2815 index=False,
2816 header=False,
2817 encoding="utf-8",
2818 float_format="%.17g",
2819 date_format="%Y-%m-%d %H:%M:%S.%f",
2820 )
2822 with open(tmppath, "rb") as tmpfile:
2823 file_size = os.path.getsize(tmppath)
2824 return self.load_table_from_file(
2825 tmpfile,
2826 destination,
2827 num_retries=num_retries,
2828 rewind=True,
2829 size=file_size,
2830 job_id=job_id,
2831 job_id_prefix=job_id_prefix,
2832 location=location,
2833 project=project,
2834 job_config=new_job_config,
2835 timeout=timeout,
2836 )
2838 finally:
2839 os.remove(tmppath)
2841 def load_table_from_json(
2842 self,
2843 json_rows: Iterable[Dict[str, Any]],
2844 destination: Union[Table, TableReference, TableListItem, str],
2845 num_retries: int = _DEFAULT_NUM_RETRIES,
2846 job_id: Optional[str] = None,
2847 job_id_prefix: Optional[str] = None,
2848 location: Optional[str] = None,
2849 project: Optional[str] = None,
2850 job_config: Optional[LoadJobConfig] = None,
2851 timeout: ResumableTimeoutType = DEFAULT_TIMEOUT,
2852 ) -> job.LoadJob:
2853 """Upload the contents of a table from a JSON string or dict.
2855 Args:
2856 json_rows (Iterable[Dict[str, Any]]):
2857 Row data to be inserted. Keys must match the table schema fields
2858 and values must be JSON-compatible representations.
2860 .. note::
2862 If your data is already a newline-delimited JSON string,
2863 it is best to wrap it into a file-like object and pass it
2864 to :meth:`~google.cloud.bigquery.client.Client.load_table_from_file`::
2866 import io
2867 from google.cloud import bigquery
2869 data = u'{"foo": "bar"}'
2870 data_as_file = io.StringIO(data)
2872 client = bigquery.Client()
2873 client.load_table_from_file(data_as_file, ...)
2875 destination (Union[ \
2876 Table, \
2877 TableReference, \
2878 TableListItem, \
2879 str \
2880 ]):
2881 Table into which data is to be loaded. If a string is passed
2882 in, this method attempts to create a table reference from a
2883 string using
2884 :func:`google.cloud.bigquery.table.TableReference.from_string`.
2885 num_retries (Optional[int]): Number of upload retries. Defaults to 6.
2886 job_id (Optional[str]): Name of the job.
2887 job_id_prefix (Optional[str]):
2888 The user-provided prefix for a randomly generated job ID.
2889 This parameter will be ignored if a ``job_id`` is also given.
2890 location (Optional[str]):
2891 Location where to run the job. Must match the location of the
2892 destination table.
2893 project (Optional[str]):
2894 Project ID of the project of where to run the job. Defaults
2895 to the client's project.
2896 job_config (Optional[LoadJobConfig]):
2897 Extra configuration options for the job. The ``source_format``
2898 setting is always set to
2899 :attr:`~google.cloud.bigquery.job.SourceFormat.NEWLINE_DELIMITED_JSON`.
2900 timeout (Optional[float]):
2901 The number of seconds to wait for the underlying HTTP transport
2902 before using ``retry``. Depending on the retry strategy, a request may
2903 be repeated several times using the same timeout each time.
2904 Defaults to None.
2906 Can also be passed as a tuple (connect_timeout, read_timeout).
2907 See :meth:`requests.Session.request` documentation for details.
2909 Returns:
2910 google.cloud.bigquery.job.LoadJob: A new load job.
2912 Raises:
2913 TypeError:
2914 If ``job_config`` is not an instance of
2915 :class:`~google.cloud.bigquery.job.LoadJobConfig` class.
2916 """
2917 job_id = _make_job_id(job_id, job_id_prefix)
2919 if job_config is not None:
2920 _verify_job_config_type(job_config, LoadJobConfig)
2921 else:
2922 job_config = job.LoadJobConfig()
2924 new_job_config = job_config._fill_from_default(self._default_load_job_config)
2926 new_job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON
2928 # In specific conditions, we check if the table alread exists, and/or
2929 # set the autodetect value for the user. For exact conditions, see table
2930 # https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
2931 if new_job_config.schema is None and new_job_config.autodetect is None:
2932 if new_job_config.write_disposition in (
2933 job.WriteDisposition.WRITE_TRUNCATE,
2934 job.WriteDisposition.WRITE_EMPTY,
2935 ):
2936 new_job_config.autodetect = True
2937 else:
2938 try:
2939 self.get_table(destination)
2940 except core_exceptions.NotFound:
2941 new_job_config.autodetect = True
2942 else:
2943 new_job_config.autodetect = False
2945 if project is None:
2946 project = self.project
2948 if location is None:
2949 location = self.location
2951 destination = _table_arg_to_table_ref(destination, default_project=self.project)
2953 data_str = "\n".join(json.dumps(item, ensure_ascii=False) for item in json_rows)
2954 encoded_str = data_str.encode()
2955 data_file = io.BytesIO(encoded_str)
2956 return self.load_table_from_file(
2957 data_file,
2958 destination,
2959 size=len(encoded_str),
2960 num_retries=num_retries,
2961 job_id=job_id,
2962 job_id_prefix=job_id_prefix,
2963 location=location,
2964 project=project,
2965 job_config=new_job_config,
2966 timeout=timeout,
2967 )
2969 def _do_resumable_upload(
2970 self,
2971 stream: IO[bytes],
2972 metadata: Mapping[str, str],
2973 num_retries: int,
2974 timeout: Optional[ResumableTimeoutType],
2975 project: Optional[str] = None,
2976 ) -> "requests.Response":
2977 """Perform a resumable upload.
2979 Args:
2980 stream (IO[bytes]): A bytes IO object open for reading.
2981 metadata (Mapping[str, str]): The metadata associated with the upload.
2982 num_retries (int):
2983 Number of upload retries. (Deprecated: This
2984 argument will be removed in a future release.)
2985 timeout (Optional[float]):
2986 The number of seconds to wait for the underlying HTTP transport
2987 before using ``retry``. Depending on the retry strategy, a request may
2988 be repeated several times using the same timeout each time.
2990 Can also be passed as a tuple (connect_timeout, read_timeout).
2991 See :meth:`requests.Session.request` documentation for details.
2992 project (Optional[str]):
2993 Project ID of the project of where to run the upload. Defaults
2994 to the client's project.
2996 Returns:
2997 The "200 OK" response object returned after the final chunk
2998 is uploaded.
2999 """
3000 upload, transport = self._initiate_resumable_upload(
3001 stream, metadata, num_retries, timeout, project=project
3002 )
3004 while not upload.finished:
3005 response = upload.transmit_next_chunk(transport, timeout=timeout)
3007 return response
3009 def _initiate_resumable_upload(
3010 self,
3011 stream: IO[bytes],
3012 metadata: Mapping[str, str],
3013 num_retries: int,
3014 timeout: Optional[ResumableTimeoutType],
3015 project: Optional[str] = None,
3016 ):
3017 """Initiate a resumable upload.
3019 Args:
3020 stream (IO[bytes]): A bytes IO object open for reading.
3021 metadata (Mapping[str, str]): The metadata associated with the upload.
3022 num_retries (int):
3023 Number of upload retries. (Deprecated: This
3024 argument will be removed in a future release.)
3025 timeout (Optional[float]):
3026 The number of seconds to wait for the underlying HTTP transport
3027 before using ``retry``. Depending on the retry strategy, a request may
3028 be repeated several times using the same timeout each time.
3030 Can also be passed as a tuple (connect_timeout, read_timeout).
3031 See :meth:`requests.Session.request` documentation for details.
3032 project (Optional[str]):
3033 Project ID of the project of where to run the upload. Defaults
3034 to the client's project.
3036 Returns:
3037 Tuple:
3038 Pair of
3040 * The :class:`~google.resumable_media.requests.ResumableUpload`
3041 that was created
3042 * The ``transport`` used to initiate the upload.
3043 """
3044 chunk_size = _DEFAULT_CHUNKSIZE
3045 transport = self._http
3046 headers = _get_upload_headers(self._connection.user_agent)
3048 if project is None:
3049 project = self.project
3050 # TODO: Increase the minimum version of google-cloud-core to 1.6.0
3051 # and remove this logic. See:
3052 # https://github.com/googleapis/python-bigquery/issues/509
3053 hostname = (
3054 self._connection.API_BASE_URL
3055 if not hasattr(self._connection, "get_api_base_url_for_mtls")
3056 else self._connection.get_api_base_url_for_mtls()
3057 )
3058 upload_url = _RESUMABLE_URL_TEMPLATE.format(host=hostname, project=project)
3060 # TODO: modify ResumableUpload to take a retry.Retry object
3061 # that it can use for the initial RPC.
3062 upload = ResumableUpload(upload_url, chunk_size, headers=headers)
3064 if num_retries is not None:
3065 upload._retry_strategy = resumable_media.RetryStrategy(
3066 max_retries=num_retries
3067 )
3069 upload.initiate(
3070 transport,
3071 stream,
3072 metadata,
3073 _GENERIC_CONTENT_TYPE,
3074 stream_final=False,
3075 timeout=timeout,
3076 )
3078 return upload, transport
3080 def _do_multipart_upload(
3081 self,
3082 stream: IO[bytes],
3083 metadata: Mapping[str, str],
3084 size: int,
3085 num_retries: int,
3086 timeout: Optional[ResumableTimeoutType],
3087 project: Optional[str] = None,
3088 ):
3089 """Perform a multipart upload.
3091 Args:
3092 stream (IO[bytes]): A bytes IO object open for reading.
3093 metadata (Mapping[str, str]): The metadata associated with the upload.
3094 size (int):
3095 The number of bytes to be uploaded (which will be read
3096 from ``stream``). If not provided, the upload will be
3097 concluded once ``stream`` is exhausted (or :data:`None`).
3098 num_retries (int):
3099 Number of upload retries. (Deprecated: This
3100 argument will be removed in a future release.)
3101 timeout (Optional[float]):
3102 The number of seconds to wait for the underlying HTTP transport
3103 before using ``retry``. Depending on the retry strategy, a request may
3104 be repeated several times using the same timeout each time.
3106 Can also be passed as a tuple (connect_timeout, read_timeout).
3107 See :meth:`requests.Session.request` documentation for details.
3108 project (Optional[str]):
3109 Project ID of the project of where to run the upload. Defaults
3110 to the client's project.
3112 Returns:
3113 requests.Response:
3114 The "200 OK" response object returned after the multipart
3115 upload request.
3117 Raises:
3118 ValueError:
3119 if the ``stream`` has fewer than ``size``
3120 bytes remaining.
3121 """
3122 data = stream.read(size)
3123 if len(data) < size:
3124 msg = _READ_LESS_THAN_SIZE.format(size, len(data))
3125 raise ValueError(msg)
3127 headers = _get_upload_headers(self._connection.user_agent)
3129 if project is None:
3130 project = self.project
3132 # TODO: Increase the minimum version of google-cloud-core to 1.6.0
3133 # and remove this logic. See:
3134 # https://github.com/googleapis/python-bigquery/issues/509
3135 hostname = (
3136 self._connection.API_BASE_URL
3137 if not hasattr(self._connection, "get_api_base_url_for_mtls")
3138 else self._connection.get_api_base_url_for_mtls()
3139 )
3140 upload_url = _MULTIPART_URL_TEMPLATE.format(host=hostname, project=project)
3141 upload = MultipartUpload(upload_url, headers=headers)
3143 if num_retries is not None:
3144 upload._retry_strategy = resumable_media.RetryStrategy(
3145 max_retries=num_retries
3146 )
3148 response = upload.transmit(
3149 self._http, data, metadata, _GENERIC_CONTENT_TYPE, timeout=timeout
3150 )
3152 return response
3154 def copy_table(
3155 self,
3156 sources: Union[
3157 Table,
3158 TableReference,
3159 TableListItem,
3160 str,
3161 Sequence[Union[Table, TableReference, TableListItem, str]],
3162 ],
3163 destination: Union[Table, TableReference, TableListItem, str],
3164 job_id: Optional[str] = None,
3165 job_id_prefix: Optional[str] = None,
3166 location: Optional[str] = None,
3167 project: Optional[str] = None,
3168 job_config: Optional[CopyJobConfig] = None,
3169 retry: retries.Retry = DEFAULT_RETRY,
3170 timeout: TimeoutType = DEFAULT_TIMEOUT,
3171 ) -> job.CopyJob:
3172 """Copy one or more tables to another table.
3174 See
3175 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationtablecopy
3177 Args:
3178 sources (Union[ \
3179 google.cloud.bigquery.table.Table, \
3180 google.cloud.bigquery.table.TableReference, \
3181 google.cloud.bigquery.table.TableListItem, \
3182 str, \
3183 Sequence[ \
3184 Union[ \
3185 google.cloud.bigquery.table.Table, \
3186 google.cloud.bigquery.table.TableReference, \
3187 google.cloud.bigquery.table.TableListItem, \
3188 str, \
3189 ] \
3190 ], \
3191 ]):
3192 Table or tables to be copied.
3193 destination (Union[ \
3194 google.cloud.bigquery.table.Table, \
3195 google.cloud.bigquery.table.TableReference, \
3196 google.cloud.bigquery.table.TableListItem, \
3197 str, \
3198 ]):
3199 Table into which data is to be copied.
3200 job_id (Optional[str]): The ID of the job.
3201 job_id_prefix (Optional[str]):
3202 The user-provided prefix for a randomly generated job ID.
3203 This parameter will be ignored if a ``job_id`` is also given.
3204 location (Optional[str]):
3205 Location where to run the job. Must match the location of any
3206 source table as well as the destination table.
3207 project (Optional[str]):
3208 Project ID of the project of where to run the job. Defaults
3209 to the client's project.
3210 job_config (Optional[google.cloud.bigquery.job.CopyJobConfig]):
3211 Extra configuration options for the job.
3212 retry (Optional[google.api_core.retry.Retry]):
3213 How to retry the RPC.
3214 timeout (Optional[float]):
3215 The number of seconds to wait for the underlying HTTP transport
3216 before using ``retry``.
3218 Returns:
3219 google.cloud.bigquery.job.CopyJob: A new copy job instance.
3221 Raises:
3222 TypeError:
3223 If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.CopyJobConfig`
3224 class.
3225 """
3226 job_id = _make_job_id(job_id, job_id_prefix)
3228 if project is None:
3229 project = self.project
3231 if location is None:
3232 location = self.location
3234 job_ref = job._JobReference(job_id, project=project, location=location)
3236 # sources can be one of many different input types. (string, Table,
3237 # TableReference, or a sequence of any of those.) Convert them all to a
3238 # list of TableReferences.
3239 #
3240 # _table_arg_to_table_ref leaves lists unmodified.
3241 sources = _table_arg_to_table_ref(sources, default_project=self.project)
3243 if not isinstance(sources, collections_abc.Sequence):
3244 sources = [sources]
3246 sources = [
3247 _table_arg_to_table_ref(source, default_project=self.project)
3248 for source in sources
3249 ]
3251 destination = _table_arg_to_table_ref(destination, default_project=self.project)
3253 if job_config:
3254 _verify_job_config_type(job_config, google.cloud.bigquery.job.CopyJobConfig)
3255 job_config = copy.deepcopy(job_config)
3257 copy_job = job.CopyJob(
3258 job_ref, sources, destination, client=self, job_config=job_config
3259 )
3260 copy_job._begin(retry=retry, timeout=timeout)
3262 return copy_job
3264 def extract_table(
3265 self,
3266 source: Union[Table, TableReference, TableListItem, Model, ModelReference, str],
3267 destination_uris: Union[str, Sequence[str]],
3268 job_id: Optional[str] = None,
3269 job_id_prefix: Optional[str] = None,
3270 location: Optional[str] = None,
3271 project: Optional[str] = None,
3272 job_config: Optional[ExtractJobConfig] = None,
3273 retry: retries.Retry = DEFAULT_RETRY,
3274 timeout: TimeoutType = DEFAULT_TIMEOUT,
3275 source_type: str = "Table",
3276 ) -> job.ExtractJob:
3277 """Start a job to extract a table into Cloud Storage files.
3279 See
3280 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationextract
3282 Args:
3283 source (Union[ \
3284 google.cloud.bigquery.table.Table, \
3285 google.cloud.bigquery.table.TableReference, \
3286 google.cloud.bigquery.table.TableListItem, \
3287 google.cloud.bigquery.model.Model, \
3288 google.cloud.bigquery.model.ModelReference, \
3289 src, \
3290 ]):
3291 Table or Model to be extracted.
3292 destination_uris (Union[str, Sequence[str]]):
3293 URIs of Cloud Storage file(s) into which table data is to be
3294 extracted; in format
3295 ``gs://<bucket_name>/<object_name_or_glob>``.
3296 job_id (Optional[str]): The ID of the job.
3297 job_id_prefix (Optional[str]):
3298 The user-provided prefix for a randomly generated job ID.
3299 This parameter will be ignored if a ``job_id`` is also given.
3300 location (Optional[str]):
3301 Location where to run the job. Must match the location of the
3302 source table.
3303 project (Optional[str]):
3304 Project ID of the project of where to run the job. Defaults
3305 to the client's project.
3306 job_config (Optional[google.cloud.bigquery.job.ExtractJobConfig]):
3307 Extra configuration options for the job.
3308 retry (Optional[google.api_core.retry.Retry]):
3309 How to retry the RPC.
3310 timeout (Optional[float]):
3311 The number of seconds to wait for the underlying HTTP transport
3312 before using ``retry``.
3313 source_type (Optional[str]):
3314 Type of source to be extracted.``Table`` or ``Model``. Defaults to ``Table``.
3315 Returns:
3316 google.cloud.bigquery.job.ExtractJob: A new extract job instance.
3318 Raises:
3319 TypeError:
3320 If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.ExtractJobConfig`
3321 class.
3322 ValueError:
3323 If ``source_type`` is not among ``Table``,``Model``.
3324 """
3325 job_id = _make_job_id(job_id, job_id_prefix)
3327 if project is None:
3328 project = self.project
3330 if location is None:
3331 location = self.location
3333 job_ref = job._JobReference(job_id, project=project, location=location)
3334 src = source_type.lower()
3335 if src == "table":
3336 source = _table_arg_to_table_ref(source, default_project=self.project)
3337 elif src == "model":
3338 source = _model_arg_to_model_ref(source, default_project=self.project)
3339 else:
3340 raise ValueError(
3341 "Cannot pass `{}` as a ``source_type``, pass Table or Model".format(
3342 source_type
3343 )
3344 )
3346 if isinstance(destination_uris, str):
3347 destination_uris = [destination_uris]
3349 if job_config:
3350 _verify_job_config_type(
3351 job_config, google.cloud.bigquery.job.ExtractJobConfig
3352 )
3353 job_config = copy.deepcopy(job_config)
3355 extract_job = job.ExtractJob(
3356 job_ref, source, destination_uris, client=self, job_config=job_config
3357 )
3358 extract_job._begin(retry=retry, timeout=timeout)
3360 return extract_job
3362 def query(
3363 self,
3364 query: str,
3365 job_config: Optional[QueryJobConfig] = None,
3366 job_id: Optional[str] = None,
3367 job_id_prefix: Optional[str] = None,
3368 location: Optional[str] = None,
3369 project: Optional[str] = None,
3370 retry: retries.Retry = DEFAULT_RETRY,
3371 timeout: TimeoutType = DEFAULT_TIMEOUT,
3372 job_retry: retries.Retry = DEFAULT_JOB_RETRY,
3373 api_method: Union[str, enums.QueryApiMethod] = enums.QueryApiMethod.INSERT,
3374 ) -> job.QueryJob:
3375 """Run a SQL query.
3377 See
3378 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#jobconfigurationquery
3380 Args:
3381 query (str):
3382 SQL query to be executed. Defaults to the standard SQL
3383 dialect. Use the ``job_config`` parameter to change dialects.
3384 job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
3385 Extra configuration options for the job.
3386 To override any options that were previously set in
3387 the ``default_query_job_config`` given to the
3388 ``Client`` constructor, manually set those options to ``None``,
3389 or whatever value is preferred.
3390 job_id (Optional[str]): ID to use for the query job.
3391 job_id_prefix (Optional[str]):
3392 The prefix to use for a randomly generated job ID. This parameter
3393 will be ignored if a ``job_id`` is also given.
3394 location (Optional[str]):
3395 Location where to run the job. Must match the location of the
3396 table used in the query as well as the destination table.
3397 project (Optional[str]):
3398 Project ID of the project of where to run the job. Defaults
3399 to the client's project.
3400 retry (Optional[google.api_core.retry.Retry]):
3401 How to retry the RPC. This only applies to making RPC
3402 calls. It isn't used to retry failed jobs. This has
3403 a reasonable default that should only be overridden
3404 with care.
3405 timeout (Optional[float]):
3406 The number of seconds to wait for the underlying HTTP transport
3407 before using ``retry``.
3408 job_retry (Optional[google.api_core.retry.Retry]):
3409 How to retry failed jobs. The default retries
3410 rate-limit-exceeded errors. Passing ``None`` disables
3411 job retry.
3413 Not all jobs can be retried. If ``job_id`` is
3414 provided, then the job returned by the query will not
3415 be retryable, and an exception will be raised if a
3416 non-``None`` (and non-default) value for ``job_retry``
3417 is also provided.
3419 Note that errors aren't detected until ``result()`` is
3420 called on the job returned. The ``job_retry``
3421 specified here becomes the default ``job_retry`` for
3422 ``result()``, where it can also be specified.
3423 api_method (Union[str, enums.QueryApiMethod]):
3424 Method with which to start the query job.
3426 See :class:`google.cloud.bigquery.enums.QueryApiMethod` for
3427 details on the difference between the query start methods.
3429 Returns:
3430 google.cloud.bigquery.job.QueryJob: A new query job instance.
3432 Raises:
3433 TypeError:
3434 If ``job_config`` is not an instance of
3435 :class:`~google.cloud.bigquery.job.QueryJobConfig`
3436 class, or if both ``job_id`` and non-``None`` non-default
3437 ``job_retry`` are provided.
3438 """
3439 job_id_given = job_id is not None
3440 if (
3441 job_id_given
3442 and job_retry is not None
3443 and job_retry is not DEFAULT_JOB_RETRY
3444 ):
3445 raise TypeError(
3446 "`job_retry` was provided, but the returned job is"
3447 " not retryable, because a custom `job_id` was"
3448 " provided."
3449 )
3451 if job_id_given and api_method == enums.QueryApiMethod.QUERY:
3452 raise TypeError(
3453 "`job_id` was provided, but the 'QUERY' `api_method` was requested."
3454 )
3456 if project is None:
3457 project = self.project
3459 if location is None:
3460 location = self.location
3462 if job_config is not None:
3463 _verify_job_config_type(job_config, QueryJobConfig)
3465 job_config = _job_helpers.job_config_with_defaults(
3466 job_config, self._default_query_job_config
3467 )
3469 # Note that we haven't modified the original job_config (or
3470 # _default_query_job_config) up to this point.
3471 if api_method == enums.QueryApiMethod.QUERY:
3472 return _job_helpers.query_jobs_query(
3473 self,
3474 query,
3475 job_config,
3476 location,
3477 project,
3478 retry,
3479 timeout,
3480 job_retry,
3481 )
3482 elif api_method == enums.QueryApiMethod.INSERT:
3483 return _job_helpers.query_jobs_insert(
3484 self,
3485 query,
3486 job_config,
3487 job_id,
3488 job_id_prefix,
3489 location,
3490 project,
3491 retry,
3492 timeout,
3493 job_retry,
3494 )
3495 else:
3496 raise ValueError(f"Got unexpected value for api_method: {repr(api_method)}")
3498 def query_and_wait(
3499 self,
3500 query,
3501 *,
3502 job_config: Optional[QueryJobConfig] = None,
3503 location: Optional[str] = None,
3504 project: Optional[str] = None,
3505 api_timeout: TimeoutType = DEFAULT_TIMEOUT,
3506 wait_timeout: TimeoutType = None,
3507 retry: retries.Retry = DEFAULT_RETRY,
3508 job_retry: retries.Retry = DEFAULT_JOB_RETRY,
3509 page_size: Optional[int] = None,
3510 max_results: Optional[int] = None,
3511 ) -> RowIterator:
3512 """Run the query, wait for it to finish, and return the results.
3514 While ``jobCreationMode=JOB_CREATION_OPTIONAL`` is in preview in the
3515 ``jobs.query`` REST API, use the default ``jobCreationMode`` unless
3516 the environment variable ``QUERY_PREVIEW_ENABLED=true``. After
3517 ``jobCreationMode`` is GA, this method will always use
3518 ``jobCreationMode=JOB_CREATION_OPTIONAL``. See:
3519 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
3521 Args:
3522 query (str):
3523 SQL query to be executed. Defaults to the standard SQL
3524 dialect. Use the ``job_config`` parameter to change dialects.
3525 job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
3526 Extra configuration options for the job.
3527 To override any options that were previously set in
3528 the ``default_query_job_config`` given to the
3529 ``Client`` constructor, manually set those options to ``None``,
3530 or whatever value is preferred.
3531 location (Optional[str]):
3532 Location where to run the job. Must match the location of the
3533 table used in the query as well as the destination table.
3534 project (Optional[str]):
3535 Project ID of the project of where to run the job. Defaults
3536 to the client's project.
3537 api_timeout (Optional[float]):
3538 The number of seconds to wait for the underlying HTTP transport
3539 before using ``retry``.
3540 wait_timeout (Optional[float]):
3541 The number of seconds to wait for the query to finish. If the
3542 query doesn't finish before this timeout, the client attempts
3543 to cancel the query.
3544 retry (Optional[google.api_core.retry.Retry]):
3545 How to retry the RPC. This only applies to making RPC
3546 calls. It isn't used to retry failed jobs. This has
3547 a reasonable default that should only be overridden
3548 with care.
3549 job_retry (Optional[google.api_core.retry.Retry]):
3550 How to retry failed jobs. The default retries
3551 rate-limit-exceeded errors. Passing ``None`` disables
3552 job retry. Not all jobs can be retried.
3553 page_size (Optional[int]):
3554 The maximum number of rows in each page of results from this
3555 request. Non-positive values are ignored.
3556 max_results (Optional[int]):
3557 The maximum total number of rows from this request.
3559 Returns:
3560 google.cloud.bigquery.table.RowIterator:
3561 Iterator of row data
3562 :class:`~google.cloud.bigquery.table.Row`-s. During each
3563 page, the iterator will have the ``total_rows`` attribute
3564 set, which counts the total number of rows **in the result
3565 set** (this is distinct from the total number of rows in the
3566 current page: ``iterator.page.num_items``).
3568 If the query is a special query that produces no results, e.g.
3569 a DDL query, an ``_EmptyRowIterator`` instance is returned.
3571 Raises:
3572 TypeError:
3573 If ``job_config`` is not an instance of
3574 :class:`~google.cloud.bigquery.job.QueryJobConfig`
3575 class.
3576 """
3577 if project is None:
3578 project = self.project
3580 if location is None:
3581 location = self.location
3583 if job_config is not None:
3584 _verify_job_config_type(job_config, QueryJobConfig)
3586 job_config = _job_helpers.job_config_with_defaults(
3587 job_config, self._default_query_job_config
3588 )
3590 return _job_helpers.query_and_wait(
3591 self,
3592 query,
3593 job_config=job_config,
3594 location=location,
3595 project=project,
3596 api_timeout=api_timeout,
3597 wait_timeout=wait_timeout,
3598 retry=retry,
3599 job_retry=job_retry,
3600 page_size=page_size,
3601 max_results=max_results,
3602 )
3604 def insert_rows(
3605 self,
3606 table: Union[Table, TableReference, str],
3607 rows: Union[Iterable[Tuple], Iterable[Mapping[str, Any]]],
3608 selected_fields: Optional[Sequence[SchemaField]] = None,
3609 **kwargs,
3610 ) -> Sequence[Dict[str, Any]]:
3611 """Insert rows into a table via the streaming API.
3613 See
3614 https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll
3616 BigQuery will reject insertAll payloads that exceed a defined limit (10MB).
3617 Additionally, if a payload vastly exceeds this limit, the request is rejected
3618 by the intermediate architecture, which returns a 413 (Payload Too Large) status code.
3621 See
3622 https://cloud.google.com/bigquery/quotas#streaming_inserts
3624 Args:
3625 table (Union[ \
3626 google.cloud.bigquery.table.Table, \
3627 google.cloud.bigquery.table.TableReference, \
3628 str, \
3629 ]):
3630 The destination table for the row data, or a reference to it.
3631 rows (Union[Sequence[Tuple], Sequence[Dict]]):
3632 Row data to be inserted. If a list of tuples is given, each
3633 tuple should contain data for each schema field on the
3634 current table and in the same order as the schema fields. If
3635 a list of dictionaries is given, the keys must include all
3636 required fields in the schema. Keys which do not correspond
3637 to a field in the schema are ignored.
3638 selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]):
3639 The fields to return. Required if ``table`` is a
3640 :class:`~google.cloud.bigquery.table.TableReference`.
3641 kwargs (dict):
3642 Keyword arguments to
3643 :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`.
3645 Returns:
3646 Sequence[Mappings]:
3647 One mapping per row with insert errors: the "index" key
3648 identifies the row, and the "errors" key contains a list of
3649 the mappings describing one or more problems with the row.
3651 Raises:
3652 ValueError: if table's schema is not set or `rows` is not a `Sequence`.
3653 """
3654 if not isinstance(rows, (collections_abc.Sequence, collections_abc.Iterator)):
3655 raise TypeError("rows argument should be a sequence of dicts or tuples")
3657 table = _table_arg_to_table(table, default_project=self.project)
3659 if not isinstance(table, Table):
3660 raise TypeError(_NEED_TABLE_ARGUMENT)
3662 schema = table.schema
3664 # selected_fields can override the table schema.
3665 if selected_fields is not None:
3666 schema = selected_fields
3668 if len(schema) == 0:
3669 raise ValueError(
3670 (
3671 "Could not determine schema for table '{}'. Call client.get_table() "
3672 "or pass in a list of schema fields to the selected_fields argument."
3673 ).format(table)
3674 )
3676 json_rows = [_record_field_to_json(schema, row) for row in rows]
3678 return self.insert_rows_json(table, json_rows, **kwargs)
3680 def insert_rows_from_dataframe(
3681 self,
3682 table: Union[Table, TableReference, str],
3683 dataframe,
3684 selected_fields: Optional[Sequence[SchemaField]] = None,
3685 chunk_size: int = 500,
3686 **kwargs: Dict,
3687 ) -> Sequence[Sequence[dict]]:
3688 """Insert rows into a table from a dataframe via the streaming API.
3690 BigQuery will reject insertAll payloads that exceed a defined limit (10MB).
3691 Additionally, if a payload vastly exceeds this limit, the request is rejected
3692 by the intermediate architecture, which returns a 413 (Payload Too Large) status code.
3694 See
3695 https://cloud.google.com/bigquery/quotas#streaming_inserts
3697 Args:
3698 table (Union[ \
3699 google.cloud.bigquery.table.Table, \
3700 google.cloud.bigquery.table.TableReference, \
3701 str, \
3702 ]):
3703 The destination table for the row data, or a reference to it.
3704 dataframe (pandas.DataFrame):
3705 A :class:`~pandas.DataFrame` containing the data to load. Any
3706 ``NaN`` values present in the dataframe are omitted from the
3707 streaming API request(s).
3708 selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]):
3709 The fields to return. Required if ``table`` is a
3710 :class:`~google.cloud.bigquery.table.TableReference`.
3711 chunk_size (int):
3712 The number of rows to stream in a single chunk. Must be positive.
3713 kwargs (Dict):
3714 Keyword arguments to
3715 :meth:`~google.cloud.bigquery.client.Client.insert_rows_json`.
3717 Returns:
3718 Sequence[Sequence[Mappings]]:
3719 A list with insert errors for each insert chunk. Each element
3720 is a list containing one mapping per row with insert errors:
3721 the "index" key identifies the row, and the "errors" key
3722 contains a list of the mappings describing one or more problems
3723 with the row.
3725 Raises:
3726 ValueError: if table's schema is not set
3727 """
3728 insert_results = []
3730 chunk_count = int(math.ceil(len(dataframe) / chunk_size))
3731 rows_iter = _pandas_helpers.dataframe_to_json_generator(dataframe)
3733 for _ in range(chunk_count):
3734 rows_chunk = itertools.islice(rows_iter, chunk_size)
3735 result = self.insert_rows(table, rows_chunk, selected_fields, **kwargs)
3736 insert_results.append(result)
3738 return insert_results
3740 def insert_rows_json(
3741 self,
3742 table: Union[Table, TableReference, TableListItem, str],
3743 json_rows: Sequence[Mapping[str, Any]],
3744 row_ids: Union[
3745 Iterable[Optional[str]], AutoRowIDs, None
3746 ] = AutoRowIDs.GENERATE_UUID,
3747 skip_invalid_rows: Optional[bool] = None,
3748 ignore_unknown_values: Optional[bool] = None,
3749 template_suffix: Optional[str] = None,
3750 retry: retries.Retry = DEFAULT_RETRY,
3751 timeout: TimeoutType = DEFAULT_TIMEOUT,
3752 ) -> Sequence[dict]:
3753 """Insert rows into a table without applying local type conversions.
3755 See
3756 https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll
3758 BigQuery will reject insertAll payloads that exceed a defined limit (10MB).
3759 Additionally, if a payload vastly exceeds this limit, the request is rejected
3760 by the intermediate architecture, which returns a 413 (Payload Too Large) status code.
3762 See
3763 https://cloud.google.com/bigquery/quotas#streaming_inserts
3765 Args:
3766 table (Union[ \
3767 google.cloud.bigquery.table.Table \
3768 google.cloud.bigquery.table.TableReference, \
3769 google.cloud.bigquery.table.TableListItem, \
3770 str \
3771 ]):
3772 The destination table for the row data, or a reference to it.
3773 json_rows (Sequence[Dict]):
3774 Row data to be inserted. Keys must match the table schema fields
3775 and values must be JSON-compatible representations.
3776 row_ids (Union[Iterable[str], AutoRowIDs, None]):
3777 Unique IDs, one per row being inserted. An ID can also be
3778 ``None``, indicating that an explicit insert ID should **not**
3779 be used for that row. If the argument is omitted altogether,
3780 unique IDs are created automatically.
3782 .. versionchanged:: 2.21.0
3783 Can also be an iterable, not just a sequence, or an
3784 :class:`AutoRowIDs` enum member.
3786 .. deprecated:: 2.21.0
3787 Passing ``None`` to explicitly request autogenerating insert IDs is
3788 deprecated, use :attr:`AutoRowIDs.GENERATE_UUID` instead.
3790 skip_invalid_rows (Optional[bool]):
3791 Insert all valid rows of a request, even if invalid rows exist.
3792 The default value is ``False``, which causes the entire request
3793 to fail if any invalid rows exist.
3794 ignore_unknown_values (Optional[bool]):
3795 Accept rows that contain values that do not match the schema.
3796 The unknown values are ignored. Default is ``False``, which
3797 treats unknown values as errors.
3798 template_suffix (Optional[str]):
3799 Treat ``name`` as a template table and provide a suffix.
3800 BigQuery will create the table ``<name> + <template_suffix>``
3801 based on the schema of the template table. See
3802 https://cloud.google.com/bigquery/streaming-data-into-bigquery#template-tables
3803 retry (Optional[google.api_core.retry.Retry]):
3804 How to retry the RPC.
3805 timeout (Optional[float]):
3806 The number of seconds to wait for the underlying HTTP transport
3807 before using ``retry``.
3809 Returns:
3810 Sequence[Mappings]:
3811 One mapping per row with insert errors: the "index" key
3812 identifies the row, and the "errors" key contains a list of
3813 the mappings describing one or more problems with the row.
3815 Raises:
3816 TypeError: if `json_rows` is not a `Sequence`.
3817 """
3818 if not isinstance(
3819 json_rows, (collections_abc.Sequence, collections_abc.Iterator)
3820 ):
3821 raise TypeError("json_rows argument should be a sequence of dicts")
3822 # Convert table to just a reference because unlike insert_rows,
3823 # insert_rows_json doesn't need the table schema. It's not doing any
3824 # type conversions.
3825 table = _table_arg_to_table_ref(table, default_project=self.project)
3826 rows_info: List[Any] = []
3827 data: Dict[str, Any] = {"rows": rows_info}
3829 if row_ids is None:
3830 warnings.warn(
3831 "Passing None for row_ids is deprecated. To explicitly request "
3832 "autogenerated insert IDs, use AutoRowIDs.GENERATE_UUID instead",
3833 category=DeprecationWarning,
3834 )
3835 row_ids = AutoRowIDs.GENERATE_UUID
3837 if not isinstance(row_ids, AutoRowIDs):
3838 try:
3839 row_ids_iter = iter(row_ids)
3840 except TypeError:
3841 msg = "row_ids is neither an iterable nor an AutoRowIDs enum member"
3842 raise TypeError(msg)
3844 for i, row in enumerate(json_rows):
3845 info: Dict[str, Any] = {"json": row}
3847 if row_ids is AutoRowIDs.GENERATE_UUID:
3848 info["insertId"] = str(uuid.uuid4())
3849 elif row_ids is AutoRowIDs.DISABLED:
3850 info["insertId"] = None
3851 else:
3852 try:
3853 insert_id = next(row_ids_iter)
3854 except StopIteration:
3855 msg = f"row_ids did not generate enough IDs, error at index {i}"
3856 raise ValueError(msg)
3857 else:
3858 info["insertId"] = insert_id
3860 rows_info.append(info)
3862 if skip_invalid_rows is not None:
3863 data["skipInvalidRows"] = skip_invalid_rows
3865 if ignore_unknown_values is not None:
3866 data["ignoreUnknownValues"] = ignore_unknown_values
3868 if template_suffix is not None:
3869 data["templateSuffix"] = template_suffix
3871 path = "%s/insertAll" % table.path
3872 # We can always retry, because every row has an insert ID.
3873 span_attributes = {"path": path}
3874 response = self._call_api(
3875 retry,
3876 span_name="BigQuery.insertRowsJson",
3877 span_attributes=span_attributes,
3878 method="POST",
3879 path=path,
3880 data=data,
3881 timeout=timeout,
3882 )
3883 errors = []
3885 for error in response.get("insertErrors", ()):
3886 errors.append({"index": int(error["index"]), "errors": error["errors"]})
3888 return errors
3890 def list_partitions(
3891 self,
3892 table: Union[Table, TableReference, TableListItem, str],
3893 retry: retries.Retry = DEFAULT_RETRY,
3894 timeout: TimeoutType = DEFAULT_TIMEOUT,
3895 ) -> Sequence[str]:
3896 """List the partitions in a table.
3898 Args:
3899 table (Union[ \
3900 google.cloud.bigquery.table.Table, \
3901 google.cloud.bigquery.table.TableReference, \
3902 google.cloud.bigquery.table.TableListItem, \
3903 str, \
3904 ]):
3905 The table or reference from which to get partition info
3906 retry (Optional[google.api_core.retry.Retry]):
3907 How to retry the RPC.
3908 timeout (Optional[float]):
3909 The number of seconds to wait for the underlying HTTP transport
3910 before using ``retry``.
3911 If multiple requests are made under the hood, ``timeout``
3912 applies to each individual request.
3914 Returns:
3915 List[str]:
3916 A list of the partition ids present in the partitioned table
3917 """
3918 table = _table_arg_to_table_ref(table, default_project=self.project)
3919 meta_table = self.get_table(
3920 TableReference(
3921 DatasetReference(table.project, table.dataset_id),
3922 "%s$__PARTITIONS_SUMMARY__" % table.table_id,
3923 ),
3924 retry=retry,
3925 timeout=timeout,
3926 )
3928 subset = [col for col in meta_table.schema if col.name == "partition_id"]
3929 return [
3930 row[0]
3931 for row in self.list_rows(
3932 meta_table, selected_fields=subset, retry=retry, timeout=timeout
3933 )
3934 ]
3936 def list_rows(
3937 self,
3938 table: Union[Table, TableListItem, TableReference, str],
3939 selected_fields: Optional[Sequence[SchemaField]] = None,
3940 max_results: Optional[int] = None,
3941 page_token: Optional[str] = None,
3942 start_index: Optional[int] = None,
3943 page_size: Optional[int] = None,
3944 retry: retries.Retry = DEFAULT_RETRY,
3945 timeout: TimeoutType = DEFAULT_TIMEOUT,
3946 ) -> RowIterator:
3947 """List the rows of the table.
3949 See
3950 https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list
3952 .. note::
3954 This method assumes that the provided schema is up-to-date with the
3955 schema as defined on the back-end: if the two schemas are not
3956 identical, the values returned may be incomplete. To ensure that the
3957 local copy of the schema is up-to-date, call ``client.get_table``.
3959 Args:
3960 table (Union[ \
3961 google.cloud.bigquery.table.Table, \
3962 google.cloud.bigquery.table.TableListItem, \
3963 google.cloud.bigquery.table.TableReference, \
3964 str, \
3965 ]):
3966 The table to list, or a reference to it. When the table
3967 object does not contain a schema and ``selected_fields`` is
3968 not supplied, this method calls ``get_table`` to fetch the
3969 table schema.
3970 selected_fields (Sequence[google.cloud.bigquery.schema.SchemaField]):
3971 The fields to return. If not supplied, data for all columns
3972 are downloaded.
3973 max_results (Optional[int]):
3974 Maximum number of rows to return.
3975 page_token (Optional[str]):
3976 Token representing a cursor into the table's rows.
3977 If not passed, the API will return the first page of the
3978 rows. The token marks the beginning of the iterator to be
3979 returned and the value of the ``page_token`` can be accessed
3980 at ``next_page_token`` of the
3981 :class:`~google.cloud.bigquery.table.RowIterator`.
3982 start_index (Optional[int]):
3983 The zero-based index of the starting row to read.
3984 page_size (Optional[int]):
3985 The maximum number of rows in each page of results from this request.
3986 Non-positive values are ignored. Defaults to a sensible value set by the API.
3987 retry (Optional[google.api_core.retry.Retry]):
3988 How to retry the RPC.
3989 timeout (Optional[float]):
3990 The number of seconds to wait for the underlying HTTP transport
3991 before using ``retry``.
3992 If multiple requests are made under the hood, ``timeout``
3993 applies to each individual request.
3995 Returns:
3996 google.cloud.bigquery.table.RowIterator:
3997 Iterator of row data
3998 :class:`~google.cloud.bigquery.table.Row`-s. During each
3999 page, the iterator will have the ``total_rows`` attribute
4000 set, which counts the total number of rows **in the table**
4001 (this is distinct from the total number of rows in the
4002 current page: ``iterator.page.num_items``).
4003 """
4004 table = _table_arg_to_table(table, default_project=self.project)
4006 if not isinstance(table, Table):
4007 raise TypeError(_NEED_TABLE_ARGUMENT)
4009 schema = table.schema
4011 # selected_fields can override the table schema.
4012 if selected_fields is not None:
4013 schema = selected_fields
4015 # No schema, but no selected_fields. Assume the developer wants all
4016 # columns, so get the table resource for them rather than failing.
4017 elif len(schema) == 0:
4018 table = self.get_table(table.reference, retry=retry, timeout=timeout)
4019 schema = table.schema
4021 params: Dict[str, Any] = {}
4022 if selected_fields is not None:
4023 params["selectedFields"] = ",".join(field.name for field in selected_fields)
4024 if start_index is not None:
4025 params["startIndex"] = start_index
4027 params["formatOptions.useInt64Timestamp"] = True
4028 row_iterator = RowIterator(
4029 client=self,
4030 api_request=functools.partial(self._call_api, retry, timeout=timeout),
4031 path="%s/data" % (table.path,),
4032 schema=schema,
4033 page_token=page_token,
4034 max_results=max_results,
4035 page_size=page_size,
4036 extra_params=params,
4037 table=table,
4038 # Pass in selected_fields separately from schema so that full
4039 # tables can be fetched without a column filter.
4040 selected_fields=selected_fields,
4041 total_rows=getattr(table, "num_rows", None),
4042 project=table.project,
4043 location=table.location,
4044 )
4045 return row_iterator
4047 def _list_rows_from_query_results(
4048 self,
4049 job_id: str,
4050 location: str,
4051 project: str,
4052 schema: Sequence[SchemaField],
4053 total_rows: Optional[int] = None,
4054 destination: Optional[Union[Table, TableReference, TableListItem, str]] = None,
4055 max_results: Optional[int] = None,
4056 start_index: Optional[int] = None,
4057 page_size: Optional[int] = None,
4058 retry: retries.Retry = DEFAULT_RETRY,
4059 timeout: TimeoutType = DEFAULT_TIMEOUT,
4060 query_id: Optional[str] = None,
4061 first_page_response: Optional[Dict[str, Any]] = None,
4062 num_dml_affected_rows: Optional[int] = None,
4063 ) -> RowIterator:
4064 """List the rows of a completed query.
4065 See
4066 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults
4067 Args:
4068 job_id (str):
4069 ID of a query job.
4070 location (str): Location of the query job.
4071 project (str):
4072 ID of the project where the query job was run.
4073 schema (Sequence[google.cloud.bigquery.schema.SchemaField]):
4074 The fields expected in these query results. Used to convert
4075 from JSON to expected Python types.
4076 total_rows (Optional[int]):
4077 Total number of rows in the query results.
4078 destination (Optional[Union[ \
4079 google.cloud.bigquery.table.Table, \
4080 google.cloud.bigquery.table.TableListItem, \
4081 google.cloud.bigquery.table.TableReference, \
4082 str, \
4083 ]]):
4084 Destination table reference. Used to fetch the query results
4085 with the BigQuery Storage API.
4086 max_results (Optional[int]):
4087 Maximum number of rows to return across the whole iterator.
4088 start_index (Optional[int]):
4089 The zero-based index of the starting row to read.
4090 page_size (Optional[int]):
4091 The maximum number of rows in each page of results from this request.
4092 Non-positive values are ignored. Defaults to a sensible value set by the API.
4093 retry (Optional[google.api_core.retry.Retry]):
4094 How to retry the RPC.
4095 timeout (Optional[float]):
4096 The number of seconds to wait for the underlying HTTP transport
4097 before using ``retry``. If set, this connection timeout may be
4098 increased to a minimum value. This prevents retries on what
4099 would otherwise be a successful response.
4100 If multiple requests are made under the hood, ``timeout``
4101 applies to each individual request.
4102 query_id (Optional[str]):
4103 [Preview] ID of a completed query. This ID is auto-generated
4104 and not guaranteed to be populated.
4105 first_page_response (Optional[dict]):
4106 API response for the first page of results (if available).
4107 num_dml_affected_rows (Optional[int]):
4108 If this RowIterator is the result of a DML query, the number of
4109 rows that were affected.
4111 Returns:
4112 google.cloud.bigquery.table.RowIterator:
4113 Iterator of row data
4114 :class:`~google.cloud.bigquery.table.Row`-s.
4115 """
4116 params: Dict[str, Any] = {
4117 "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS,
4118 "location": location,
4119 }
4121 if timeout is not None:
4122 if not isinstance(timeout, (int, float)):
4123 timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT
4124 else:
4125 timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT)
4127 if start_index is not None:
4128 params["startIndex"] = start_index
4130 # We don't call jobs.query with a page size, so if the user explicitly
4131 # requests a certain size, invalidate the cache.
4132 if page_size is not None:
4133 first_page_response = None
4135 params["formatOptions.useInt64Timestamp"] = True
4136 row_iterator = RowIterator(
4137 client=self,
4138 api_request=functools.partial(self._call_api, retry, timeout=timeout),
4139 path=f"/projects/{project}/queries/{job_id}",
4140 schema=schema,
4141 max_results=max_results,
4142 page_size=page_size,
4143 table=destination,
4144 extra_params=params,
4145 total_rows=total_rows,
4146 project=project,
4147 location=location,
4148 job_id=job_id,
4149 query_id=query_id,
4150 first_page_response=first_page_response,
4151 num_dml_affected_rows=num_dml_affected_rows,
4152 )
4153 return row_iterator
4155 def _schema_from_json_file_object(self, file_obj):
4156 """Helper function for schema_from_json that takes a
4157 file object that describes a table schema.
4159 Returns:
4160 List of schema field objects.
4161 """
4162 json_data = json.load(file_obj)
4163 return [SchemaField.from_api_repr(field) for field in json_data]
4165 def _schema_to_json_file_object(self, schema_list, file_obj):
4166 """Helper function for schema_to_json that takes a schema list and file
4167 object and writes the schema list to the file object with json.dump
4168 """
4169 json.dump(schema_list, file_obj, indent=2, sort_keys=True)
4171 def schema_from_json(self, file_or_path: "PathType") -> List[SchemaField]:
4172 """Takes a file object or file path that contains json that describes
4173 a table schema.
4175 Returns:
4176 List[SchemaField]:
4177 List of :class:`~google.cloud.bigquery.schema.SchemaField` objects.
4178 """
4179 if isinstance(file_or_path, io.IOBase):
4180 return self._schema_from_json_file_object(file_or_path)
4182 with open(file_or_path) as file_obj:
4183 return self._schema_from_json_file_object(file_obj)
4185 def schema_to_json(
4186 self, schema_list: Sequence[SchemaField], destination: "PathType"
4187 ):
4188 """Takes a list of schema field objects.
4190 Serializes the list of schema field objects as json to a file.
4192 Destination is a file path or a file object.
4193 """
4194 json_schema_list = [f.to_api_repr() for f in schema_list]
4196 if isinstance(destination, io.IOBase):
4197 return self._schema_to_json_file_object(json_schema_list, destination)
4199 with open(destination, mode="w") as file_obj:
4200 return self._schema_to_json_file_object(json_schema_list, file_obj)
4202 def __enter__(self):
4203 return self
4205 def __exit__(self, exc_type, exc_value, traceback):
4206 self.close()
4209# pylint: disable=unused-argument
4210def _item_to_project(iterator, resource):
4211 """Convert a JSON project to the native object.
4213 Args:
4214 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use.
4216 resource (Dict): An item to be converted to a project.
4218 Returns:
4219 google.cloud.bigquery.client.Project: The next project in the page.
4220 """
4221 return Project.from_api_repr(resource)
4224# pylint: enable=unused-argument
4227def _item_to_dataset(iterator, resource):
4228 """Convert a JSON dataset to the native object.
4230 Args:
4231 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use.
4233 resource (Dict): An item to be converted to a dataset.
4235 Returns:
4236 google.cloud.bigquery.dataset.DatasetListItem: The next dataset in the page.
4237 """
4238 return DatasetListItem(resource)
4241def _item_to_job(iterator, resource):
4242 """Convert a JSON job to the native object.
4244 Args:
4245 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use.
4247 resource (Dict): An item to be converted to a job.
4249 Returns:
4250 job instance: The next job in the page.
4251 """
4252 return iterator.client.job_from_resource(resource)
4255def _item_to_model(iterator, resource):
4256 """Convert a JSON model to the native object.
4258 Args:
4259 iterator (google.api_core.page_iterator.Iterator):
4260 The iterator that is currently in use.
4261 resource (Dict): An item to be converted to a model.
4263 Returns:
4264 google.cloud.bigquery.model.Model: The next model in the page.
4265 """
4266 return Model.from_api_repr(resource)
4269def _item_to_routine(iterator, resource):
4270 """Convert a JSON model to the native object.
4272 Args:
4273 iterator (google.api_core.page_iterator.Iterator):
4274 The iterator that is currently in use.
4275 resource (Dict): An item to be converted to a routine.
4277 Returns:
4278 google.cloud.bigquery.routine.Routine: The next routine in the page.
4279 """
4280 return Routine.from_api_repr(resource)
4283def _item_to_table(iterator, resource):
4284 """Convert a JSON table to the native object.
4286 Args:
4287 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use.
4289 resource (Dict): An item to be converted to a table.
4291 Returns:
4292 google.cloud.bigquery.table.Table: The next table in the page.
4293 """
4294 return TableListItem(resource)
4297def _extract_job_reference(job, project=None, location=None):
4298 """Extract fully-qualified job reference from a job-like object.
4300 Args:
4301 job_id (Union[ \
4302 str, \
4303 google.cloud.bigquery.job.LoadJob, \
4304 google.cloud.bigquery.job.CopyJob, \
4305 google.cloud.bigquery.job.ExtractJob, \
4306 google.cloud.bigquery.job.QueryJob \
4307 ]): Job identifier.
4308 project (Optional[str]):
4309 Project where the job was run. Ignored if ``job_id`` is a job
4310 object.
4311 location (Optional[str]):
4312 Location where the job was run. Ignored if ``job_id`` is a job
4313 object.
4315 Returns:
4316 Tuple[str, str, str]: ``(project, location, job_id)``
4317 """
4318 if hasattr(job, "job_id"):
4319 project = job.project
4320 job_id = job.job_id
4321 location = job.location
4322 else:
4323 job_id = job
4325 return (project, location, job_id)
4328def _check_mode(stream):
4329 """Check that a stream was opened in read-binary mode.
4331 Args:
4332 stream (IO[bytes]): A bytes IO object open for reading.
4334 Raises:
4335 ValueError:
4336 if the ``stream.mode`` is a valid attribute
4337 and is not among ``rb``, ``r+b`` or ``rb+``.
4338 """
4339 mode = getattr(stream, "mode", None)
4341 if isinstance(stream, gzip.GzipFile):
4342 if mode != gzip.READ: # pytype: disable=module-attr
4343 raise ValueError(
4344 "Cannot upload gzip files opened in write mode: use "
4345 "gzip.GzipFile(filename, mode='rb')"
4346 )
4347 else:
4348 if mode is not None and mode not in ("rb", "r+b", "rb+"):
4349 raise ValueError(
4350 "Cannot upload files opened in text mode: use "
4351 "open(filename, mode='rb') or open(filename, mode='r+b')"
4352 )
4355def _get_upload_headers(user_agent):
4356 """Get the headers for an upload request.
4358 Args:
4359 user_agent (str): The user-agent for requests.
4361 Returns:
4362 Dict: The headers to be used for the request.
4363 """
4364 return {
4365 "Accept": "application/json",
4366 "Accept-Encoding": "gzip, deflate",
4367 "User-Agent": user_agent,
4368 "content-type": "application/json; charset=UTF-8",
4369 }
4372def _add_server_timeout_header(headers: Optional[Dict[str, str]], kwargs):
4373 timeout = kwargs.get("timeout")
4374 if timeout is not None:
4375 if headers is None:
4376 headers = {}
4377 headers[TIMEOUT_HEADER] = str(timeout)
4379 if headers:
4380 kwargs["headers"] = headers
4382 return kwargs