Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/io/gbq.py: 33%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1""" Google BigQuery support """
2from __future__ import annotations
4from typing import (
5 TYPE_CHECKING,
6 Any,
7)
9from pandas.compat._optional import import_optional_dependency
11if TYPE_CHECKING:
12 from pandas import DataFrame
15def _try_import():
16 # since pandas is a dependency of pandas-gbq
17 # we need to import on first use
18 msg = (
19 "pandas-gbq is required to load data from Google BigQuery. "
20 "See the docs: https://pandas-gbq.readthedocs.io."
21 )
22 pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg)
23 return pandas_gbq
26def read_gbq(
27 query: str,
28 project_id: str | None = None,
29 index_col: str | None = None,
30 col_order: list[str] | None = None,
31 reauth: bool = False,
32 auth_local_webserver: bool = True,
33 dialect: str | None = None,
34 location: str | None = None,
35 configuration: dict[str, Any] | None = None,
36 credentials=None,
37 use_bqstorage_api: bool | None = None,
38 max_results: int | None = None,
39 progress_bar_type: str | None = None,
40) -> DataFrame:
41 """
42 Load data from Google BigQuery.
44 This function requires the `pandas-gbq package
45 <https://pandas-gbq.readthedocs.io>`__.
47 See the `How to authenticate with Google BigQuery
48 <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__
49 guide for authentication instructions.
51 Parameters
52 ----------
53 query : str
54 SQL-Like Query to return data values.
55 project_id : str, optional
56 Google BigQuery Account project ID. Optional when available from
57 the environment.
58 index_col : str, optional
59 Name of result column to use for index in results DataFrame.
60 col_order : list(str), optional
61 List of BigQuery column names in the desired order for results
62 DataFrame.
63 reauth : bool, default False
64 Force Google BigQuery to re-authenticate the user. This is useful
65 if multiple accounts are used.
66 auth_local_webserver : bool, default True
67 Use the `local webserver flow`_ instead of the `console flow`_
68 when getting user credentials.
70 .. _local webserver flow:
71 https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
72 .. _console flow:
73 https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
75 *New in version 0.2.0 of pandas-gbq*.
77 .. versionchanged:: 1.5.0
78 Default value is changed to ``True``. Google has deprecated the
79 ``auth_local_webserver = False`` `"out of band" (copy-paste)
80 flow
81 <https://developers.googleblog.com/2022/02/making-oauth-flows-safer.html?m=1#disallowed-oob>`_.
82 dialect : str, default 'legacy'
83 Note: The default value is changing to 'standard' in a future version.
85 SQL syntax dialect to use. Value can be one of:
87 ``'legacy'``
88 Use BigQuery's legacy SQL dialect. For more information see
89 `BigQuery Legacy SQL Reference
90 <https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__.
91 ``'standard'``
92 Use BigQuery's standard SQL, which is
93 compliant with the SQL 2011 standard. For more information
94 see `BigQuery Standard SQL Reference
95 <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__.
96 location : str, optional
97 Location where the query job should run. See the `BigQuery locations
98 documentation
99 <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a
100 list of available locations. The location must match that of any
101 datasets used in the query.
103 *New in version 0.5.0 of pandas-gbq*.
104 configuration : dict, optional
105 Query config parameters for job processing.
106 For example:
108 configuration = {'query': {'useQueryCache': False}}
110 For more information see `BigQuery REST API Reference
111 <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__.
112 credentials : google.auth.credentials.Credentials, optional
113 Credentials for accessing Google APIs. Use this parameter to override
114 default credentials, such as to use Compute Engine
115 :class:`google.auth.compute_engine.Credentials` or Service Account
116 :class:`google.oauth2.service_account.Credentials` directly.
118 *New in version 0.8.0 of pandas-gbq*.
119 use_bqstorage_api : bool, default False
120 Use the `BigQuery Storage API
121 <https://cloud.google.com/bigquery/docs/reference/storage/>`__ to
122 download query results quickly, but at an increased cost. To use this
123 API, first `enable it in the Cloud Console
124 <https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__.
125 You must also have the `bigquery.readsessions.create
126 <https://cloud.google.com/bigquery/docs/access-control#roles>`__
127 permission on the project you are billing queries to.
129 This feature requires version 0.10.0 or later of the ``pandas-gbq``
130 package. It also requires the ``google-cloud-bigquery-storage`` and
131 ``fastavro`` packages.
133 max_results : int, optional
134 If set, limit the maximum number of rows to fetch from the query
135 results.
137 *New in version 0.12.0 of pandas-gbq*.
139 .. versionadded:: 1.1.0
140 progress_bar_type : Optional, str
141 If set, use the `tqdm <https://tqdm.github.io/>`__ library to
142 display a progress bar while the data downloads. Install the
143 ``tqdm`` package to use this feature.
145 Possible values of ``progress_bar_type`` include:
147 ``None``
148 No progress bar.
149 ``'tqdm'``
150 Use the :func:`tqdm.tqdm` function to print a progress bar
151 to :data:`sys.stderr`.
152 ``'tqdm_notebook'``
153 Use the :func:`tqdm.tqdm_notebook` function to display a
154 progress bar as a Jupyter notebook widget.
155 ``'tqdm_gui'``
156 Use the :func:`tqdm.tqdm_gui` function to display a
157 progress bar as a graphical dialog box.
159 Note that this feature requires version 0.12.0 or later of the
160 ``pandas-gbq`` package. And it requires the ``tqdm`` package. Slightly
161 different than ``pandas-gbq``, here the default is ``None``.
163 Returns
164 -------
165 df: DataFrame
166 DataFrame representing results of query.
168 See Also
169 --------
170 pandas_gbq.read_gbq : This function in the pandas-gbq library.
171 DataFrame.to_gbq : Write a DataFrame to Google BigQuery.
172 """
173 pandas_gbq = _try_import()
175 kwargs: dict[str, str | bool | int | None] = {}
177 # START: new kwargs. Don't populate unless explicitly set.
178 if use_bqstorage_api is not None:
179 kwargs["use_bqstorage_api"] = use_bqstorage_api
180 if max_results is not None:
181 kwargs["max_results"] = max_results
183 kwargs["progress_bar_type"] = progress_bar_type
184 # END: new kwargs
186 return pandas_gbq.read_gbq(
187 query,
188 project_id=project_id,
189 index_col=index_col,
190 col_order=col_order,
191 reauth=reauth,
192 auth_local_webserver=auth_local_webserver,
193 dialect=dialect,
194 location=location,
195 configuration=configuration,
196 credentials=credentials,
197 **kwargs,
198 )
201def to_gbq(
202 dataframe: DataFrame,
203 destination_table: str,
204 project_id: str | None = None,
205 chunksize: int | None = None,
206 reauth: bool = False,
207 if_exists: str = "fail",
208 auth_local_webserver: bool = True,
209 table_schema: list[dict[str, str]] | None = None,
210 location: str | None = None,
211 progress_bar: bool = True,
212 credentials=None,
213) -> None:
214 pandas_gbq = _try_import()
215 pandas_gbq.to_gbq(
216 dataframe,
217 destination_table,
218 project_id=project_id,
219 chunksize=chunksize,
220 reauth=reauth,
221 if_exists=if_exists,
222 auth_local_webserver=auth_local_webserver,
223 table_schema=table_schema,
224 location=location,
225 progress_bar=progress_bar,
226 credentials=credentials,
227 )