Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/gbq.py: 35%

1""" Google BigQuery support """

2from __future__ import annotations

4from typing import (

5 TYPE_CHECKING,

6 Any,

8import warnings

10from pandas.compat._optional import import_optional_dependency

11from pandas.util._exceptions import find_stack_level

13if TYPE_CHECKING:

14 from google.auth.credentials import Credentials

16 from pandas import DataFrame

19def _try_import():

20 # since pandas is a dependency of pandas-gbq

21 # we need to import on first use

22 msg = (

23 "pandas-gbq is required to load data from Google BigQuery. "

24 "See the docs: https://pandas-gbq.readthedocs.io."

25 )

26 pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg)

27 return pandas_gbq

30def read_gbq(

31 query: str,

32 project_id: str | None = None,

33 index_col: str | None = None,

34 col_order: list[str] | None = None,

35 reauth: bool = False,

36 auth_local_webserver: bool = True,

37 dialect: str | None = None,

38 location: str | None = None,

39 configuration: dict[str, Any] | None = None,

40 credentials: Credentials | None = None,

41 use_bqstorage_api: bool | None = None,

42 max_results: int | None = None,

43 progress_bar_type: str | None = None,

44) -> DataFrame:

45 """

46 Load data from Google BigQuery.

48 .. deprecated:: 2.2.0

50 Please use ``pandas_gbq.read_gbq`` instead.

52 This function requires the `pandas-gbq package

53 <https://pandas-gbq.readthedocs.io>`__.

55 See the `How to authenticate with Google BigQuery

56 <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__

57 guide for authentication instructions.

59 Parameters

60 ----------

61 query : str

62 SQL-Like Query to return data values.

63 project_id : str, optional

64 Google BigQuery Account project ID. Optional when available from

65 the environment.

66 index_col : str, optional

67 Name of result column to use for index in results DataFrame.

68 col_order : list(str), optional

69 List of BigQuery column names in the desired order for results

70 DataFrame.

71 reauth : bool, default False

72 Force Google BigQuery to re-authenticate the user. This is useful

73 if multiple accounts are used.

74 auth_local_webserver : bool, default True

75 Use the `local webserver flow`_ instead of the `console flow`_

76 when getting user credentials.

78 .. _local webserver flow:

79 https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server

80 .. _console flow:

81 https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console

83 *New in version 0.2.0 of pandas-gbq*.

85 .. versionchanged:: 1.5.0

86 Default value is changed to ``True``. Google has deprecated the

87 ``auth_local_webserver = False`` `"out of band" (copy-paste)

88 flow

89 <https://developers.googleblog.com/2022/02/making-oauth-flows-safer.html?m=1#disallowed-oob>`_.

90 dialect : str, default 'legacy'

91 Note: The default value is changing to 'standard' in a future version.

93 SQL syntax dialect to use. Value can be one of:

95 ``'legacy'``

96 Use BigQuery's legacy SQL dialect. For more information see

97 `BigQuery Legacy SQL Reference

98 <https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__.

99 ``'standard'``

100 Use BigQuery's standard SQL, which is

101 compliant with the SQL 2011 standard. For more information

102 see `BigQuery Standard SQL Reference

103 <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__.

104 location : str, optional

105 Location where the query job should run. See the `BigQuery locations

106 documentation

107 <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a

108 list of available locations. The location must match that of any

109 datasets used in the query.

110

111 *New in version 0.5.0 of pandas-gbq*.

112 configuration : dict, optional

113 Query config parameters for job processing.

114 For example:

115

116 configuration = {'query': {'useQueryCache': False}}

117

118 For more information see `BigQuery REST API Reference

119 <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__.

120 credentials : google.auth.credentials.Credentials, optional

121 Credentials for accessing Google APIs. Use this parameter to override

122 default credentials, such as to use Compute Engine

123 :class:`google.auth.compute_engine.Credentials` or Service Account

124 :class:`google.oauth2.service_account.Credentials` directly.

125

126 *New in version 0.8.0 of pandas-gbq*.

127 use_bqstorage_api : bool, default False

128 Use the `BigQuery Storage API

129 <https://cloud.google.com/bigquery/docs/reference/storage/>`__ to

130 download query results quickly, but at an increased cost. To use this

131 API, first `enable it in the Cloud Console

132 <https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__.

133 You must also have the `bigquery.readsessions.create

134 <https://cloud.google.com/bigquery/docs/access-control#roles>`__

135 permission on the project you are billing queries to.

136

137 This feature requires version 0.10.0 or later of the ``pandas-gbq``

138 package. It also requires the ``google-cloud-bigquery-storage`` and

139 ``fastavro`` packages.

140

141 max_results : int, optional

142 If set, limit the maximum number of rows to fetch from the query

143 results.

144

145 progress_bar_type : Optional, str

146 If set, use the `tqdm <https://tqdm.github.io/>`__ library to

147 display a progress bar while the data downloads. Install the

148 ``tqdm`` package to use this feature.

149

150 Possible values of ``progress_bar_type`` include:

151

152 ``None``

153 No progress bar.

154 ``'tqdm'``

155 Use the :func:`tqdm.tqdm` function to print a progress bar

156 to :data:`sys.stderr`.

157 ``'tqdm_notebook'``

158 Use the :func:`tqdm.tqdm_notebook` function to display a

159 progress bar as a Jupyter notebook widget.

160 ``'tqdm_gui'``

161 Use the :func:`tqdm.tqdm_gui` function to display a

162 progress bar as a graphical dialog box.

163

164 Returns

165 -------

166 df: DataFrame

167 DataFrame representing results of query.

168

169 See Also

170 --------

171 pandas_gbq.read_gbq : This function in the pandas-gbq library.

172 DataFrame.to_gbq : Write a DataFrame to Google BigQuery.

173

174 Examples

175 --------

176 Example taken from `Google BigQuery documentation

177 <https://cloud.google.com/bigquery/docs/pandas-gbq-migration>`_

178

179 >>> sql = "SELECT name FROM table_name WHERE state = 'TX' LIMIT 100;"

180 >>> df = pd.read_gbq(sql, dialect="standard") # doctest: +SKIP

181 >>> project_id = "your-project-id" # doctest: +SKIP

182 >>> df = pd.read_gbq(sql,

183 ... project_id=project_id,

184 ... dialect="standard"

185 ... ) # doctest: +SKIP

186 """

187 warnings.warn(

188 "read_gbq is deprecated and will be removed in a future version. "

189 "Please use pandas_gbq.read_gbq instead: "

190 "https://pandas-gbq.readthedocs.io/en/latest/api.html#pandas_gbq.read_gbq",

191 FutureWarning,

192 stacklevel=find_stack_level(),

193 )

194 pandas_gbq = _try_import()

195

196 kwargs: dict[str, str | bool | int | None] = {}

197

198 # START: new kwargs. Don't populate unless explicitly set.

199 if use_bqstorage_api is not None:

200 kwargs["use_bqstorage_api"] = use_bqstorage_api

201 if max_results is not None:

202 kwargs["max_results"] = max_results

203

204 kwargs["progress_bar_type"] = progress_bar_type

205 # END: new kwargs

206

207 return pandas_gbq.read_gbq(

208 query,

209 project_id=project_id,

210 index_col=index_col,

211 col_order=col_order,

212 reauth=reauth,

213 auth_local_webserver=auth_local_webserver,

214 dialect=dialect,

215 location=location,

216 configuration=configuration,

217 credentials=credentials,

218 **kwargs,

219 )

220

221

222def to_gbq(

223 dataframe: DataFrame,

224 destination_table: str,

225 project_id: str | None = None,

226 chunksize: int | None = None,

227 reauth: bool = False,

228 if_exists: str = "fail",

229 auth_local_webserver: bool = True,

230 table_schema: list[dict[str, str]] | None = None,

231 location: str | None = None,

232 progress_bar: bool = True,

233 credentials: Credentials | None = None,

234) -> None:

235 warnings.warn(

236 "to_gbq is deprecated and will be removed in a future version. "

237 "Please use pandas_gbq.to_gbq instead: "

238 "https://pandas-gbq.readthedocs.io/en/latest/api.html#pandas_gbq.to_gbq",

239 FutureWarning,

240 stacklevel=find_stack_level(),

241 )

242 pandas_gbq = _try_import()

243 pandas_gbq.to_gbq(

244 dataframe,

245 destination_table,

246 project_id=project_id,

247 chunksize=chunksize,

248 reauth=reauth,

249 if_exists=if_exists,

250 auth_local_webserver=auth_local_webserver,

251 table_schema=table_schema,

252 location=location,

253 progress_bar=progress_bar,

254 credentials=credentials,

255 )