Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/gbq.py: 35%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1""" Google BigQuery support """
2from __future__ import annotations
4from typing import (
5 TYPE_CHECKING,
6 Any,
7)
8import warnings
10from pandas.compat._optional import import_optional_dependency
11from pandas.util._exceptions import find_stack_level
13if TYPE_CHECKING:
14 from google.auth.credentials import Credentials
16 from pandas import DataFrame
19def _try_import():
20 # since pandas is a dependency of pandas-gbq
21 # we need to import on first use
22 msg = (
23 "pandas-gbq is required to load data from Google BigQuery. "
24 "See the docs: https://pandas-gbq.readthedocs.io."
25 )
26 pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg)
27 return pandas_gbq
30def read_gbq(
31 query: str,
32 project_id: str | None = None,
33 index_col: str | None = None,
34 col_order: list[str] | None = None,
35 reauth: bool = False,
36 auth_local_webserver: bool = True,
37 dialect: str | None = None,
38 location: str | None = None,
39 configuration: dict[str, Any] | None = None,
40 credentials: Credentials | None = None,
41 use_bqstorage_api: bool | None = None,
42 max_results: int | None = None,
43 progress_bar_type: str | None = None,
44) -> DataFrame:
45 """
46 Load data from Google BigQuery.
48 .. deprecated:: 2.2.0
50 Please use ``pandas_gbq.read_gbq`` instead.
52 This function requires the `pandas-gbq package
53 <https://pandas-gbq.readthedocs.io>`__.
55 See the `How to authenticate with Google BigQuery
56 <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__
57 guide for authentication instructions.
59 Parameters
60 ----------
61 query : str
62 SQL-Like Query to return data values.
63 project_id : str, optional
64 Google BigQuery Account project ID. Optional when available from
65 the environment.
66 index_col : str, optional
67 Name of result column to use for index in results DataFrame.
68 col_order : list(str), optional
69 List of BigQuery column names in the desired order for results
70 DataFrame.
71 reauth : bool, default False
72 Force Google BigQuery to re-authenticate the user. This is useful
73 if multiple accounts are used.
74 auth_local_webserver : bool, default True
75 Use the `local webserver flow`_ instead of the `console flow`_
76 when getting user credentials.
78 .. _local webserver flow:
79 https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server
80 .. _console flow:
81 https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console
83 *New in version 0.2.0 of pandas-gbq*.
85 .. versionchanged:: 1.5.0
86 Default value is changed to ``True``. Google has deprecated the
87 ``auth_local_webserver = False`` `"out of band" (copy-paste)
88 flow
89 <https://developers.googleblog.com/2022/02/making-oauth-flows-safer.html?m=1#disallowed-oob>`_.
90 dialect : str, default 'legacy'
91 Note: The default value is changing to 'standard' in a future version.
93 SQL syntax dialect to use. Value can be one of:
95 ``'legacy'``
96 Use BigQuery's legacy SQL dialect. For more information see
97 `BigQuery Legacy SQL Reference
98 <https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__.
99 ``'standard'``
100 Use BigQuery's standard SQL, which is
101 compliant with the SQL 2011 standard. For more information
102 see `BigQuery Standard SQL Reference
103 <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__.
104 location : str, optional
105 Location where the query job should run. See the `BigQuery locations
106 documentation
107 <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a
108 list of available locations. The location must match that of any
109 datasets used in the query.
111 *New in version 0.5.0 of pandas-gbq*.
112 configuration : dict, optional
113 Query config parameters for job processing.
114 For example:
116 configuration = {'query': {'useQueryCache': False}}
118 For more information see `BigQuery REST API Reference
119 <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__.
120 credentials : google.auth.credentials.Credentials, optional
121 Credentials for accessing Google APIs. Use this parameter to override
122 default credentials, such as to use Compute Engine
123 :class:`google.auth.compute_engine.Credentials` or Service Account
124 :class:`google.oauth2.service_account.Credentials` directly.
126 *New in version 0.8.0 of pandas-gbq*.
127 use_bqstorage_api : bool, default False
128 Use the `BigQuery Storage API
129 <https://cloud.google.com/bigquery/docs/reference/storage/>`__ to
130 download query results quickly, but at an increased cost. To use this
131 API, first `enable it in the Cloud Console
132 <https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__.
133 You must also have the `bigquery.readsessions.create
134 <https://cloud.google.com/bigquery/docs/access-control#roles>`__
135 permission on the project you are billing queries to.
137 This feature requires version 0.10.0 or later of the ``pandas-gbq``
138 package. It also requires the ``google-cloud-bigquery-storage`` and
139 ``fastavro`` packages.
141 max_results : int, optional
142 If set, limit the maximum number of rows to fetch from the query
143 results.
145 progress_bar_type : Optional, str
146 If set, use the `tqdm <https://tqdm.github.io/>`__ library to
147 display a progress bar while the data downloads. Install the
148 ``tqdm`` package to use this feature.
150 Possible values of ``progress_bar_type`` include:
152 ``None``
153 No progress bar.
154 ``'tqdm'``
155 Use the :func:`tqdm.tqdm` function to print a progress bar
156 to :data:`sys.stderr`.
157 ``'tqdm_notebook'``
158 Use the :func:`tqdm.tqdm_notebook` function to display a
159 progress bar as a Jupyter notebook widget.
160 ``'tqdm_gui'``
161 Use the :func:`tqdm.tqdm_gui` function to display a
162 progress bar as a graphical dialog box.
164 Returns
165 -------
166 df: DataFrame
167 DataFrame representing results of query.
169 See Also
170 --------
171 pandas_gbq.read_gbq : This function in the pandas-gbq library.
172 DataFrame.to_gbq : Write a DataFrame to Google BigQuery.
174 Examples
175 --------
176 Example taken from `Google BigQuery documentation
177 <https://cloud.google.com/bigquery/docs/pandas-gbq-migration>`_
179 >>> sql = "SELECT name FROM table_name WHERE state = 'TX' LIMIT 100;"
180 >>> df = pd.read_gbq(sql, dialect="standard") # doctest: +SKIP
181 >>> project_id = "your-project-id" # doctest: +SKIP
182 >>> df = pd.read_gbq(sql,
183 ... project_id=project_id,
184 ... dialect="standard"
185 ... ) # doctest: +SKIP
186 """
187 warnings.warn(
188 "read_gbq is deprecated and will be removed in a future version. "
189 "Please use pandas_gbq.read_gbq instead: "
190 "https://pandas-gbq.readthedocs.io/en/latest/api.html#pandas_gbq.read_gbq",
191 FutureWarning,
192 stacklevel=find_stack_level(),
193 )
194 pandas_gbq = _try_import()
196 kwargs: dict[str, str | bool | int | None] = {}
198 # START: new kwargs. Don't populate unless explicitly set.
199 if use_bqstorage_api is not None:
200 kwargs["use_bqstorage_api"] = use_bqstorage_api
201 if max_results is not None:
202 kwargs["max_results"] = max_results
204 kwargs["progress_bar_type"] = progress_bar_type
205 # END: new kwargs
207 return pandas_gbq.read_gbq(
208 query,
209 project_id=project_id,
210 index_col=index_col,
211 col_order=col_order,
212 reauth=reauth,
213 auth_local_webserver=auth_local_webserver,
214 dialect=dialect,
215 location=location,
216 configuration=configuration,
217 credentials=credentials,
218 **kwargs,
219 )
222def to_gbq(
223 dataframe: DataFrame,
224 destination_table: str,
225 project_id: str | None = None,
226 chunksize: int | None = None,
227 reauth: bool = False,
228 if_exists: str = "fail",
229 auth_local_webserver: bool = True,
230 table_schema: list[dict[str, str]] | None = None,
231 location: str | None = None,
232 progress_bar: bool = True,
233 credentials: Credentials | None = None,
234) -> None:
235 warnings.warn(
236 "to_gbq is deprecated and will be removed in a future version. "
237 "Please use pandas_gbq.to_gbq instead: "
238 "https://pandas-gbq.readthedocs.io/en/latest/api.html#pandas_gbq.to_gbq",
239 FutureWarning,
240 stacklevel=find_stack_level(),
241 )
242 pandas_gbq = _try_import()
243 pandas_gbq.to_gbq(
244 dataframe,
245 destination_table,
246 project_id=project_id,
247 chunksize=chunksize,
248 reauth=reauth,
249 if_exists=if_exists,
250 auth_local_webserver=auth_local_webserver,
251 table_schema=table_schema,
252 location=location,
253 progress_bar=progress_bar,
254 credentials=credentials,
255 )