Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/gbq.py: 35%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

26 statements  

1""" Google BigQuery support """ 

2from __future__ import annotations 

3 

4from typing import ( 

5 TYPE_CHECKING, 

6 Any, 

7) 

8import warnings 

9 

10from pandas.compat._optional import import_optional_dependency 

11from pandas.util._exceptions import find_stack_level 

12 

13if TYPE_CHECKING: 

14 from google.auth.credentials import Credentials 

15 

16 from pandas import DataFrame 

17 

18 

19def _try_import(): 

20 # since pandas is a dependency of pandas-gbq 

21 # we need to import on first use 

22 msg = ( 

23 "pandas-gbq is required to load data from Google BigQuery. " 

24 "See the docs: https://pandas-gbq.readthedocs.io." 

25 ) 

26 pandas_gbq = import_optional_dependency("pandas_gbq", extra=msg) 

27 return pandas_gbq 

28 

29 

30def read_gbq( 

31 query: str, 

32 project_id: str | None = None, 

33 index_col: str | None = None, 

34 col_order: list[str] | None = None, 

35 reauth: bool = False, 

36 auth_local_webserver: bool = True, 

37 dialect: str | None = None, 

38 location: str | None = None, 

39 configuration: dict[str, Any] | None = None, 

40 credentials: Credentials | None = None, 

41 use_bqstorage_api: bool | None = None, 

42 max_results: int | None = None, 

43 progress_bar_type: str | None = None, 

44) -> DataFrame: 

45 """ 

46 Load data from Google BigQuery. 

47 

48 .. deprecated:: 2.2.0 

49 

50 Please use ``pandas_gbq.read_gbq`` instead. 

51 

52 This function requires the `pandas-gbq package 

53 <https://pandas-gbq.readthedocs.io>`__. 

54 

55 See the `How to authenticate with Google BigQuery 

56 <https://pandas-gbq.readthedocs.io/en/latest/howto/authentication.html>`__ 

57 guide for authentication instructions. 

58 

59 Parameters 

60 ---------- 

61 query : str 

62 SQL-Like Query to return data values. 

63 project_id : str, optional 

64 Google BigQuery Account project ID. Optional when available from 

65 the environment. 

66 index_col : str, optional 

67 Name of result column to use for index in results DataFrame. 

68 col_order : list(str), optional 

69 List of BigQuery column names in the desired order for results 

70 DataFrame. 

71 reauth : bool, default False 

72 Force Google BigQuery to re-authenticate the user. This is useful 

73 if multiple accounts are used. 

74 auth_local_webserver : bool, default True 

75 Use the `local webserver flow`_ instead of the `console flow`_ 

76 when getting user credentials. 

77 

78 .. _local webserver flow: 

79 https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_local_server 

80 .. _console flow: 

81 https://google-auth-oauthlib.readthedocs.io/en/latest/reference/google_auth_oauthlib.flow.html#google_auth_oauthlib.flow.InstalledAppFlow.run_console 

82 

83 *New in version 0.2.0 of pandas-gbq*. 

84 

85 .. versionchanged:: 1.5.0 

86 Default value is changed to ``True``. Google has deprecated the 

87 ``auth_local_webserver = False`` `"out of band" (copy-paste) 

88 flow 

89 <https://developers.googleblog.com/2022/02/making-oauth-flows-safer.html?m=1#disallowed-oob>`_. 

90 dialect : str, default 'legacy' 

91 Note: The default value is changing to 'standard' in a future version. 

92 

93 SQL syntax dialect to use. Value can be one of: 

94 

95 ``'legacy'`` 

96 Use BigQuery's legacy SQL dialect. For more information see 

97 `BigQuery Legacy SQL Reference 

98 <https://cloud.google.com/bigquery/docs/reference/legacy-sql>`__. 

99 ``'standard'`` 

100 Use BigQuery's standard SQL, which is 

101 compliant with the SQL 2011 standard. For more information 

102 see `BigQuery Standard SQL Reference 

103 <https://cloud.google.com/bigquery/docs/reference/standard-sql/>`__. 

104 location : str, optional 

105 Location where the query job should run. See the `BigQuery locations 

106 documentation 

107 <https://cloud.google.com/bigquery/docs/dataset-locations>`__ for a 

108 list of available locations. The location must match that of any 

109 datasets used in the query. 

110 

111 *New in version 0.5.0 of pandas-gbq*. 

112 configuration : dict, optional 

113 Query config parameters for job processing. 

114 For example: 

115 

116 configuration = {'query': {'useQueryCache': False}} 

117 

118 For more information see `BigQuery REST API Reference 

119 <https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.query>`__. 

120 credentials : google.auth.credentials.Credentials, optional 

121 Credentials for accessing Google APIs. Use this parameter to override 

122 default credentials, such as to use Compute Engine 

123 :class:`google.auth.compute_engine.Credentials` or Service Account 

124 :class:`google.oauth2.service_account.Credentials` directly. 

125 

126 *New in version 0.8.0 of pandas-gbq*. 

127 use_bqstorage_api : bool, default False 

128 Use the `BigQuery Storage API 

129 <https://cloud.google.com/bigquery/docs/reference/storage/>`__ to 

130 download query results quickly, but at an increased cost. To use this 

131 API, first `enable it in the Cloud Console 

132 <https://console.cloud.google.com/apis/library/bigquerystorage.googleapis.com>`__. 

133 You must also have the `bigquery.readsessions.create 

134 <https://cloud.google.com/bigquery/docs/access-control#roles>`__ 

135 permission on the project you are billing queries to. 

136 

137 This feature requires version 0.10.0 or later of the ``pandas-gbq`` 

138 package. It also requires the ``google-cloud-bigquery-storage`` and 

139 ``fastavro`` packages. 

140 

141 max_results : int, optional 

142 If set, limit the maximum number of rows to fetch from the query 

143 results. 

144 

145 progress_bar_type : Optional, str 

146 If set, use the `tqdm <https://tqdm.github.io/>`__ library to 

147 display a progress bar while the data downloads. Install the 

148 ``tqdm`` package to use this feature. 

149 

150 Possible values of ``progress_bar_type`` include: 

151 

152 ``None`` 

153 No progress bar. 

154 ``'tqdm'`` 

155 Use the :func:`tqdm.tqdm` function to print a progress bar 

156 to :data:`sys.stderr`. 

157 ``'tqdm_notebook'`` 

158 Use the :func:`tqdm.tqdm_notebook` function to display a 

159 progress bar as a Jupyter notebook widget. 

160 ``'tqdm_gui'`` 

161 Use the :func:`tqdm.tqdm_gui` function to display a 

162 progress bar as a graphical dialog box. 

163 

164 Returns 

165 ------- 

166 df: DataFrame 

167 DataFrame representing results of query. 

168 

169 See Also 

170 -------- 

171 pandas_gbq.read_gbq : This function in the pandas-gbq library. 

172 DataFrame.to_gbq : Write a DataFrame to Google BigQuery. 

173 

174 Examples 

175 -------- 

176 Example taken from `Google BigQuery documentation 

177 <https://cloud.google.com/bigquery/docs/pandas-gbq-migration>`_ 

178 

179 >>> sql = "SELECT name FROM table_name WHERE state = 'TX' LIMIT 100;" 

180 >>> df = pd.read_gbq(sql, dialect="standard") # doctest: +SKIP 

181 >>> project_id = "your-project-id" # doctest: +SKIP 

182 >>> df = pd.read_gbq(sql, 

183 ... project_id=project_id, 

184 ... dialect="standard" 

185 ... ) # doctest: +SKIP 

186 """ 

187 warnings.warn( 

188 "read_gbq is deprecated and will be removed in a future version. " 

189 "Please use pandas_gbq.read_gbq instead: " 

190 "https://pandas-gbq.readthedocs.io/en/latest/api.html#pandas_gbq.read_gbq", 

191 FutureWarning, 

192 stacklevel=find_stack_level(), 

193 ) 

194 pandas_gbq = _try_import() 

195 

196 kwargs: dict[str, str | bool | int | None] = {} 

197 

198 # START: new kwargs. Don't populate unless explicitly set. 

199 if use_bqstorage_api is not None: 

200 kwargs["use_bqstorage_api"] = use_bqstorage_api 

201 if max_results is not None: 

202 kwargs["max_results"] = max_results 

203 

204 kwargs["progress_bar_type"] = progress_bar_type 

205 # END: new kwargs 

206 

207 return pandas_gbq.read_gbq( 

208 query, 

209 project_id=project_id, 

210 index_col=index_col, 

211 col_order=col_order, 

212 reauth=reauth, 

213 auth_local_webserver=auth_local_webserver, 

214 dialect=dialect, 

215 location=location, 

216 configuration=configuration, 

217 credentials=credentials, 

218 **kwargs, 

219 ) 

220 

221 

222def to_gbq( 

223 dataframe: DataFrame, 

224 destination_table: str, 

225 project_id: str | None = None, 

226 chunksize: int | None = None, 

227 reauth: bool = False, 

228 if_exists: str = "fail", 

229 auth_local_webserver: bool = True, 

230 table_schema: list[dict[str, str]] | None = None, 

231 location: str | None = None, 

232 progress_bar: bool = True, 

233 credentials: Credentials | None = None, 

234) -> None: 

235 warnings.warn( 

236 "to_gbq is deprecated and will be removed in a future version. " 

237 "Please use pandas_gbq.to_gbq instead: " 

238 "https://pandas-gbq.readthedocs.io/en/latest/api.html#pandas_gbq.to_gbq", 

239 FutureWarning, 

240 stacklevel=find_stack_level(), 

241 ) 

242 pandas_gbq = _try_import() 

243 pandas_gbq.to_gbq( 

244 dataframe, 

245 destination_table, 

246 project_id=project_id, 

247 chunksize=chunksize, 

248 reauth=reauth, 

249 if_exists=if_exists, 

250 auth_local_webserver=auth_local_webserver, 

251 table_schema=table_schema, 

252 location=location, 

253 progress_bar=progress_bar, 

254 credentials=credentials, 

255 )