Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/io/gbq.py: 33%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

21 statements  

1""" Google BigQuery support """ 

2from __future__ import annotations 

3 

4from typing import ( 

5 TYPE_CHECKING, 

6 Any, 

7) 

8 

9from pandas.compat._optional import import_optional_dependency 

10 

11if TYPE_CHECKING: 

12 from pandas import DataFrame 

13 

14 

15def _try_import(): 

16 # since pandas is a dependency of pandas-gbq 

17 # we need to import on first use 

18 msg = ( 

19 "pandas-gbq is required to load data from Google BigQuery. " 

20 "See the docs: https://pandas-gbq.readthedocs.io." 

21 ) 

22 pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg) 

23 return pandas_gbq 

24 

25 

26def read_gbq( 

27 query: str, 

28 project_id: str | None = None, 

29 index_col: str | None = None, 

30 col_order: list[str] | None = None, 

31 reauth: bool = False, 

32 auth_local_webserver: bool = True, 

33 dialect: str | None = None, 

34 location: str | None = None, 

35 configuration: dict[str, Any] | None = None, 

36 credentials=None, 

37 use_bqstorage_api: bool | None = None, 

38 max_results: int | None = None, 

39 progress_bar_type: str | None = None, 

40) -> DataFrame: 

41 """ 

42 Load data from Google BigQuery. 

43 

44 This function requires the `pandas-gbq package 

45 <https://pandas-gbq.readthedocs.io>`__. 

46 

47 See the `How to authenticate with Google BigQuery 

48 <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__ 

49 guide for authentication instructions. 

50 

51 Parameters 

52 ---------- 

53 query : str 

54 SQL-Like Query to return data values. 

55 project_id : str, optional 

56 Google BigQuery Account project ID. Optional when available from 

57 the environment. 

58 index_col : str, optional 

59 Name of result column to use for index in results DataFrame. 

60 col_order : list(str), optional 

61 List of BigQuery column names in the desired order for results 

62 DataFrame. 

63 reauth : bool, default False 

64 Force Google BigQuery to re-authenticate the user. This is useful 

65 if multiple accounts are used. 

66 auth_local_webserver : bool, default True 

67 Use the `local webserver flow`_ instead of the `console flow`_ 

68 when getting user credentials. 

69 

70 .. _local webserver flow: 

71 https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server 

72 .. _console flow: 

73 https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console 

74 

75 *New in version 0.2.0 of pandas-gbq*. 

76 

77 .. versionchanged:: 1.5.0 

78 Default value is changed to ``True``. Google has deprecated the 

79 ``auth_local_webserver = False`` `"out of band" (copy-paste) 

80 flow 

81 <https://developers.googleblog.com/2022/02/making-oauth-flows-safer.html?m=1#disallowed-oob>`_. 

82 dialect : str, default 'legacy' 

83 Note: The default value is changing to 'standard' in a future version. 

84 

85 SQL syntax dialect to use. Value can be one of: 

86 

87 ``'legacy'`` 

88 Use BigQuery's legacy SQL dialect. For more information see 

89 `BigQuery Legacy SQL Reference 

90 <https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__. 

91 ``'standard'`` 

92 Use BigQuery's standard SQL, which is 

93 compliant with the SQL 2011 standard. For more information 

94 see `BigQuery Standard SQL Reference 

95 <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__. 

96 location : str, optional 

97 Location where the query job should run. See the `BigQuery locations 

98 documentation 

99 <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a 

100 list of available locations. The location must match that of any 

101 datasets used in the query. 

102 

103 *New in version 0.5.0 of pandas-gbq*. 

104 configuration : dict, optional 

105 Query config parameters for job processing. 

106 For example: 

107 

108 configuration = {'query': {'useQueryCache': False}} 

109 

110 For more information see `BigQuery REST API Reference 

111 <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__. 

112 credentials : google.auth.credentials.Credentials, optional 

113 Credentials for accessing Google APIs. Use this parameter to override 

114 default credentials, such as to use Compute Engine 

115 :class:`google.auth.compute_engine.Credentials` or Service Account 

116 :class:`google.oauth2.service_account.Credentials` directly. 

117 

118 *New in version 0.8.0 of pandas-gbq*. 

119 use_bqstorage_api : bool, default False 

120 Use the `BigQuery Storage API 

121 <https://cloud.google.com/bigquery/docs/reference/storage/>`__ to 

122 download query results quickly, but at an increased cost. To use this 

123 API, first `enable it in the Cloud Console 

124 <https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__. 

125 You must also have the `bigquery.readsessions.create 

126 <https://cloud.google.com/bigquery/docs/access-control#roles>`__ 

127 permission on the project you are billing queries to. 

128 

129 This feature requires version 0.10.0 or later of the ``pandas-gbq`` 

130 package. It also requires the ``google-cloud-bigquery-storage`` and 

131 ``fastavro`` packages. 

132 

133 max_results : int, optional 

134 If set, limit the maximum number of rows to fetch from the query 

135 results. 

136 

137 *New in version 0.12.0 of pandas-gbq*. 

138 

139 .. versionadded:: 1.1.0 

140 progress_bar_type : Optional, str 

141 If set, use the `tqdm <https://tqdm.github.io/>`__ library to 

142 display a progress bar while the data downloads. Install the 

143 ``tqdm`` package to use this feature. 

144 

145 Possible values of ``progress_bar_type`` include: 

146 

147 ``None`` 

148 No progress bar. 

149 ``'tqdm'`` 

150 Use the :func:`tqdm.tqdm` function to print a progress bar 

151 to :data:`sys.stderr`. 

152 ``'tqdm_notebook'`` 

153 Use the :func:`tqdm.tqdm_notebook` function to display a 

154 progress bar as a Jupyter notebook widget. 

155 ``'tqdm_gui'`` 

156 Use the :func:`tqdm.tqdm_gui` function to display a 

157 progress bar as a graphical dialog box. 

158 

159 Note that this feature requires version 0.12.0 or later of the 

160 ``pandas-gbq`` package. And it requires the ``tqdm`` package. Slightly 

161 different than ``pandas-gbq``, here the default is ``None``. 

162 

163 Returns 

164 ------- 

165 df: DataFrame 

166 DataFrame representing results of query. 

167 

168 See Also 

169 -------- 

170 pandas_gbq.read_gbq : This function in the pandas-gbq library. 

171 DataFrame.to_gbq : Write a DataFrame to Google BigQuery. 

172 """ 

173 pandas_gbq = _try_import() 

174 

175 kwargs: dict[str, str | bool | int | None] = {} 

176 

177 # START: new kwargs. Don't populate unless explicitly set. 

178 if use_bqstorage_api is not None: 

179 kwargs["use_bqstorage_api"] = use_bqstorage_api 

180 if max_results is not None: 

181 kwargs["max_results"] = max_results 

182 

183 kwargs["progress_bar_type"] = progress_bar_type 

184 # END: new kwargs 

185 

186 return pandas_gbq.read_gbq( 

187 query, 

188 project_id=project_id, 

189 index_col=index_col, 

190 col_order=col_order, 

191 reauth=reauth, 

192 auth_local_webserver=auth_local_webserver, 

193 dialect=dialect, 

194 location=location, 

195 configuration=configuration, 

196 credentials=credentials, 

197 **kwargs, 

198 ) 

199 

200 

201def to_gbq( 

202 dataframe: DataFrame, 

203 destination_table: str, 

204 project_id: str | None = None, 

205 chunksize: int | None = None, 

206 reauth: bool = False, 

207 if_exists: str = "fail", 

208 auth_local_webserver: bool = True, 

209 table_schema: list[dict[str, str]] | None = None, 

210 location: str | None = None, 

211 progress_bar: bool = True, 

212 credentials=None, 

213) -> None: 

214 pandas_gbq = _try_import() 

215 pandas_gbq.to_gbq( 

216 dataframe, 

217 destination_table, 

218 project_id=project_id, 

219 chunksize=chunksize, 

220 reauth=reauth, 

221 if_exists=if_exists, 

222 auth_local_webserver=auth_local_webserver, 

223 table_schema=table_schema, 

224 location=location, 

225 progress_bar=progress_bar, 

226 credentials=credentials, 

227 )