Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/bigquery/table.py: 37%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1192 statements  

1# Copyright 2015 Google LLC 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Define API Tables.""" 

16 

17from __future__ import absolute_import 

18 

19import copy 

20import datetime 

21import functools 

22import operator 

23import typing 

24from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union, Sequence 

25 

26import warnings 

27 

28try: 

29 import pandas # type: ignore 

30except ImportError: 

31 pandas = None 

32 

33try: 

34 import pyarrow # type: ignore 

35except ImportError: 

36 pyarrow = None 

37 

38try: 

39 import db_dtypes # type: ignore 

40except ImportError: 

41 db_dtypes = None 

42 

43try: 

44 import geopandas # type: ignore 

45except ImportError: 

46 geopandas = None 

47finally: 

48 _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" 

49 

50try: 

51 import shapely # type: ignore 

52 from shapely import wkt # type: ignore 

53except ImportError: 

54 shapely = None 

55else: 

56 _read_wkt = wkt.loads 

57 

58import google.api_core.exceptions 

59from google.api_core.page_iterator import HTTPIterator 

60 

61import google.cloud._helpers # type: ignore 

62from google.cloud.bigquery import _helpers 

63from google.cloud.bigquery import _pandas_helpers 

64from google.cloud.bigquery import _versions_helpers 

65from google.cloud.bigquery import exceptions as bq_exceptions 

66from google.cloud.bigquery._tqdm_helpers import get_progress_bar 

67from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration 

68from google.cloud.bigquery.enums import DefaultPandasDTypes 

69from google.cloud.bigquery.external_config import ExternalConfig 

70from google.cloud.bigquery import schema as _schema 

71from google.cloud.bigquery.schema import _build_schema_resource 

72from google.cloud.bigquery.schema import _parse_schema_resource 

73from google.cloud.bigquery.schema import _to_schema_fields 

74from google.cloud.bigquery import external_config 

75 

76if typing.TYPE_CHECKING: # pragma: NO COVER 

77 # Unconditionally import optional dependencies again to tell pytype that 

78 # they are not None, avoiding false "no attribute" errors. 

79 import pandas 

80 import pyarrow 

81 import geopandas # type: ignore 

82 from google.cloud import bigquery_storage # type: ignore 

83 from google.cloud.bigquery.dataset import DatasetReference 

84 

85 

86_NO_GEOPANDAS_ERROR = ( 

87 "The geopandas library is not installed, please install " 

88 "geopandas to use the to_geodataframe() function." 

89) 

90_NO_PYARROW_ERROR = ( 

91 "The pyarrow library is not installed, please install " 

92 "pyarrow to use the to_arrow() function." 

93) 

94_NO_SHAPELY_ERROR = ( 

95 "The shapely library is not installed, please install " 

96 "shapely to use the geography_as_object option." 

97) 

98 

99_TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' 

100 

101_NO_SUPPORTED_DTYPE = ( 

102 "The dtype cannot to be converted to a pandas ExtensionArray " 

103 "because the necessary `__from_arrow__` attribute is missing." 

104) 

105 

106_RANGE_PYARROW_WARNING = ( 

107 "Unable to represent RANGE schema as struct using pandas ArrowDtype. Using " 

108 "`object` instead. To use ArrowDtype, use pandas >= 1.5 and " 

109 "pyarrow >= 10.0.1." 

110) 

111 

112# How many of the total rows need to be downloaded already for us to skip 

113# calling the BQ Storage API? 

114# 

115# In microbenchmarks on 2024-05-21, I (tswast@) measure that at about 2 MB of 

116# remaining results, it's faster to use the BQ Storage Read API to download 

117# the results than use jobs.getQueryResults. Since we don't have a good way to 

118# know the remaining bytes, we estimate by remaining number of rows. 

119# 

120# Except when rows themselves are larger, I observe that the a single page of 

121# results will be around 10 MB. Therefore, the proportion of rows already 

122# downloaded should be 10 (first page) / 12 (all results) or less for it to be 

123# worth it to make a call to jobs.getQueryResults. 

124ALMOST_COMPLETELY_CACHED_RATIO = 0.833333 

125 

126 

127def _reference_getter(table): 

128 """A :class:`~google.cloud.bigquery.table.TableReference` pointing to 

129 this table. 

130 

131 Returns: 

132 google.cloud.bigquery.table.TableReference: pointer to this table. 

133 """ 

134 from google.cloud.bigquery import dataset 

135 

136 dataset_ref = dataset.DatasetReference(table.project, table.dataset_id) 

137 return TableReference(dataset_ref, table.table_id) 

138 

139 

140def _view_use_legacy_sql_getter( 

141 table: Union["Table", "TableListItem"] 

142) -> Optional[bool]: 

143 """bool: Specifies whether to execute the view with Legacy or Standard SQL. 

144 

145 This boolean specifies whether to execute the view with Legacy SQL 

146 (:data:`True`) or Standard SQL (:data:`False`). The client side default is 

147 :data:`False`. The server-side default is :data:`True`. If this table is 

148 not a view, :data:`None` is returned. 

149 

150 Raises: 

151 ValueError: For invalid value types. 

152 """ 

153 

154 view: Optional[Dict[str, Any]] = table._properties.get("view") 

155 if view is not None: 

156 # The server-side default for useLegacySql is True. 

157 return view.get("useLegacySql", True) if view is not None else True 

158 # In some cases, such as in a table list no view object is present, but the 

159 # resource still represents a view. Use the type as a fallback. 

160 if table.table_type == "VIEW": 

161 # The server-side default for useLegacySql is True. 

162 return True 

163 return None # explicit return statement to appease mypy 

164 

165 

166class _TableBase: 

167 """Base class for Table-related classes with common functionality.""" 

168 

169 _PROPERTY_TO_API_FIELD: Dict[str, Union[str, List[str]]] = { 

170 "dataset_id": ["tableReference", "datasetId"], 

171 "project": ["tableReference", "projectId"], 

172 "table_id": ["tableReference", "tableId"], 

173 } 

174 

175 def __init__(self): 

176 self._properties = {} 

177 

178 @property 

179 def project(self) -> str: 

180 """Project bound to the table.""" 

181 return _helpers._get_sub_prop( 

182 self._properties, self._PROPERTY_TO_API_FIELD["project"] 

183 ) 

184 

185 @property 

186 def dataset_id(self) -> str: 

187 """ID of dataset containing the table.""" 

188 return _helpers._get_sub_prop( 

189 self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] 

190 ) 

191 

192 @property 

193 def table_id(self) -> str: 

194 """The table ID.""" 

195 return _helpers._get_sub_prop( 

196 self._properties, self._PROPERTY_TO_API_FIELD["table_id"] 

197 ) 

198 

199 @property 

200 def path(self) -> str: 

201 """URL path for the table's APIs.""" 

202 return ( 

203 f"/projects/{self.project}/datasets/{self.dataset_id}" 

204 f"/tables/{self.table_id}" 

205 ) 

206 

207 def __eq__(self, other): 

208 if isinstance(other, _TableBase): 

209 return ( 

210 self.project == other.project 

211 and self.dataset_id == other.dataset_id 

212 and self.table_id == other.table_id 

213 ) 

214 else: 

215 return NotImplemented 

216 

217 def __hash__(self): 

218 return hash((self.project, self.dataset_id, self.table_id)) 

219 

220 

221class TableReference(_TableBase): 

222 """TableReferences are pointers to tables. 

223 

224 See 

225 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#tablereference 

226 

227 Args: 

228 dataset_ref: A pointer to the dataset 

229 table_id: The ID of the table 

230 """ 

231 

232 _PROPERTY_TO_API_FIELD = { 

233 "dataset_id": "datasetId", 

234 "project": "projectId", 

235 "table_id": "tableId", 

236 } 

237 

238 def __init__(self, dataset_ref: "DatasetReference", table_id: str): 

239 self._properties = {} 

240 

241 _helpers._set_sub_prop( 

242 self._properties, 

243 self._PROPERTY_TO_API_FIELD["project"], 

244 dataset_ref.project, 

245 ) 

246 _helpers._set_sub_prop( 

247 self._properties, 

248 self._PROPERTY_TO_API_FIELD["dataset_id"], 

249 dataset_ref.dataset_id, 

250 ) 

251 _helpers._set_sub_prop( 

252 self._properties, 

253 self._PROPERTY_TO_API_FIELD["table_id"], 

254 table_id, 

255 ) 

256 

257 @classmethod 

258 def from_string( 

259 cls, table_id: str, default_project: Optional[str] = None 

260 ) -> "TableReference": 

261 """Construct a table reference from table ID string. 

262 

263 Args: 

264 table_id (str): 

265 A table ID in standard SQL format. If ``default_project`` 

266 is not specified, this must included a project ID, dataset 

267 ID, and table ID, each separated by ``.``. 

268 default_project (Optional[str]): 

269 The project ID to use when ``table_id`` does not 

270 include a project ID. 

271 

272 Returns: 

273 TableReference: Table reference parsed from ``table_id``. 

274 

275 Examples: 

276 >>> TableReference.from_string('my-project.mydataset.mytable') 

277 TableRef...(DatasetRef...('my-project', 'mydataset'), 'mytable') 

278 

279 Raises: 

280 ValueError: 

281 If ``table_id`` is not a fully-qualified table ID in 

282 standard SQL format. 

283 """ 

284 from google.cloud.bigquery.dataset import DatasetReference 

285 

286 ( 

287 output_project_id, 

288 output_dataset_id, 

289 output_table_id, 

290 ) = _helpers._parse_3_part_id( 

291 table_id, default_project=default_project, property_name="table_id" 

292 ) 

293 

294 return cls( 

295 DatasetReference(output_project_id, output_dataset_id), output_table_id 

296 ) 

297 

298 @classmethod 

299 def from_api_repr(cls, resource: dict) -> "TableReference": 

300 """Factory: construct a table reference given its API representation 

301 

302 Args: 

303 resource (Dict[str, object]): 

304 Table reference representation returned from the API 

305 

306 Returns: 

307 google.cloud.bigquery.table.TableReference: 

308 Table reference parsed from ``resource``. 

309 """ 

310 from google.cloud.bigquery.dataset import DatasetReference 

311 

312 project = resource["projectId"] 

313 dataset_id = resource["datasetId"] 

314 table_id = resource["tableId"] 

315 

316 return cls(DatasetReference(project, dataset_id), table_id) 

317 

318 def to_api_repr(self) -> dict: 

319 """Construct the API resource representation of this table reference. 

320 

321 Returns: 

322 Dict[str, object]: Table reference represented as an API resource 

323 """ 

324 return copy.deepcopy(self._properties) 

325 

326 def to_bqstorage(self) -> str: 

327 """Construct a BigQuery Storage API representation of this table. 

328 

329 Install the ``google-cloud-bigquery-storage`` package to use this 

330 feature. 

331 

332 If the ``table_id`` contains a partition identifier (e.g. 

333 ``my_table$201812``) or a snapshot identifier (e.g. 

334 ``mytable@1234567890``), it is ignored. Use 

335 :class:`google.cloud.bigquery_storage.types.ReadSession.TableReadOptions` 

336 to filter rows by partition. Use 

337 :class:`google.cloud.bigquery_storage.types.ReadSession.TableModifiers` 

338 to select a specific snapshot to read from. 

339 

340 Returns: 

341 str: A reference to this table in the BigQuery Storage API. 

342 """ 

343 

344 table_id, _, _ = self.table_id.partition("@") 

345 table_id, _, _ = table_id.partition("$") 

346 

347 table_ref = ( 

348 f"projects/{self.project}/datasets/{self.dataset_id}/tables/{table_id}" 

349 ) 

350 return table_ref 

351 

352 def __str__(self): 

353 return f"{self.project}.{self.dataset_id}.{self.table_id}" 

354 

355 def __repr__(self): 

356 from google.cloud.bigquery.dataset import DatasetReference 

357 

358 dataset_ref = DatasetReference(self.project, self.dataset_id) 

359 return f"TableReference({dataset_ref!r}, '{self.table_id}')" 

360 

361 

362class Table(_TableBase): 

363 """Tables represent a set of rows whose values correspond to a schema. 

364 

365 See 

366 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource-table 

367 

368 Args: 

369 table_ref (Union[google.cloud.bigquery.table.TableReference, str]): 

370 A pointer to a table. If ``table_ref`` is a string, it must 

371 included a project ID, dataset ID, and table ID, each separated 

372 by ``.``. 

373 schema (Optional[Sequence[Union[ \ 

374 :class:`~google.cloud.bigquery.schema.SchemaField`, \ 

375 Mapping[str, Any] \ 

376 ]]]): 

377 The table's schema. If any item is a mapping, its content must be 

378 compatible with 

379 :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. 

380 """ 

381 

382 _PROPERTY_TO_API_FIELD: Dict[str, Any] = { 

383 **_TableBase._PROPERTY_TO_API_FIELD, 

384 "biglake_configuration": "biglakeConfiguration", 

385 "clustering_fields": "clustering", 

386 "created": "creationTime", 

387 "description": "description", 

388 "encryption_configuration": "encryptionConfiguration", 

389 "etag": "etag", 

390 "expires": "expirationTime", 

391 "external_data_configuration": "externalDataConfiguration", 

392 "friendly_name": "friendlyName", 

393 "full_table_id": "id", 

394 "labels": "labels", 

395 "location": "location", 

396 "modified": "lastModifiedTime", 

397 "mview_enable_refresh": "materializedView", 

398 "mview_last_refresh_time": ["materializedView", "lastRefreshTime"], 

399 "mview_query": "materializedView", 

400 "mview_refresh_interval": "materializedView", 

401 "mview_allow_non_incremental_definition": "materializedView", 

402 "num_bytes": "numBytes", 

403 "num_rows": "numRows", 

404 "partition_expiration": "timePartitioning", 

405 "partitioning_type": "timePartitioning", 

406 "range_partitioning": "rangePartitioning", 

407 "time_partitioning": "timePartitioning", 

408 "schema": ["schema", "fields"], 

409 "snapshot_definition": "snapshotDefinition", 

410 "clone_definition": "cloneDefinition", 

411 "streaming_buffer": "streamingBuffer", 

412 "self_link": "selfLink", 

413 "type": "type", 

414 "view_use_legacy_sql": "view", 

415 "view_query": "view", 

416 "require_partition_filter": "requirePartitionFilter", 

417 "table_constraints": "tableConstraints", 

418 "max_staleness": "maxStaleness", 

419 "resource_tags": "resourceTags", 

420 "external_catalog_table_options": "externalCatalogTableOptions", 

421 "foreign_type_info": ["schema", "foreignTypeInfo"], 

422 } 

423 

424 def __init__(self, table_ref, schema=None) -> None: 

425 table_ref = _table_arg_to_table_ref(table_ref) 

426 self._properties: Dict[str, Any] = { 

427 "tableReference": table_ref.to_api_repr(), 

428 "labels": {}, 

429 } 

430 # Let the @property do validation. 

431 if schema is not None: 

432 self.schema = schema 

433 

434 reference = property(_reference_getter) 

435 

436 @property 

437 def biglake_configuration(self): 

438 """google.cloud.bigquery.table.BigLakeConfiguration: Configuration 

439 for managed tables for Apache Iceberg. 

440 

441 See https://cloud.google.com/bigquery/docs/iceberg-tables for more information. 

442 """ 

443 prop = self._properties.get( 

444 self._PROPERTY_TO_API_FIELD["biglake_configuration"] 

445 ) 

446 if prop is not None: 

447 prop = BigLakeConfiguration.from_api_repr(prop) 

448 return prop 

449 

450 @biglake_configuration.setter 

451 def biglake_configuration(self, value): 

452 api_repr = value 

453 if value is not None: 

454 api_repr = value.to_api_repr() 

455 self._properties[ 

456 self._PROPERTY_TO_API_FIELD["biglake_configuration"] 

457 ] = api_repr 

458 

459 @property 

460 def require_partition_filter(self): 

461 """bool: If set to true, queries over the partitioned table require a 

462 partition filter that can be used for partition elimination to be 

463 specified. 

464 """ 

465 return self._properties.get( 

466 self._PROPERTY_TO_API_FIELD["require_partition_filter"] 

467 ) 

468 

469 @require_partition_filter.setter 

470 def require_partition_filter(self, value): 

471 self._properties[ 

472 self._PROPERTY_TO_API_FIELD["require_partition_filter"] 

473 ] = value 

474 

475 @property 

476 def schema(self): 

477 """Sequence[Union[ \ 

478 :class:`~google.cloud.bigquery.schema.SchemaField`, \ 

479 Mapping[str, Any] \ 

480 ]]: 

481 Table's schema. 

482 

483 Raises: 

484 Exception: 

485 If ``schema`` is not a sequence, or if any item in the sequence 

486 is not a :class:`~google.cloud.bigquery.schema.SchemaField` 

487 instance or a compatible mapping representation of the field. 

488 

489 .. Note:: 

490 If you are referencing a schema for an external catalog table such 

491 as a Hive table, it will also be necessary to populate the foreign_type_info 

492 attribute. This is not necessary if defining the schema for a BigQuery table. 

493 

494 For details, see: 

495 https://cloud.google.com/bigquery/docs/external-tables 

496 https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets 

497 

498 """ 

499 prop = _helpers._get_sub_prop( 

500 self._properties, self._PROPERTY_TO_API_FIELD["schema"] 

501 ) 

502 if not prop: 

503 return [] 

504 else: 

505 return _parse_schema_resource(prop) 

506 

507 @schema.setter 

508 def schema(self, value): 

509 api_field = self._PROPERTY_TO_API_FIELD["schema"] 

510 

511 if value is None: 

512 _helpers._set_sub_prop( 

513 self._properties, 

514 api_field, 

515 None, 

516 ) 

517 elif isinstance(value, Sequence): 

518 value = _to_schema_fields(value) 

519 value = _build_schema_resource(value) 

520 _helpers._set_sub_prop( 

521 self._properties, 

522 api_field, 

523 value, 

524 ) 

525 else: 

526 raise TypeError("Schema must be a Sequence (e.g. a list) or None.") 

527 

528 @property 

529 def labels(self): 

530 """Dict[str, str]: Labels for the table. 

531 

532 This method always returns a dict. To change a table's labels, 

533 modify the dict, then call ``Client.update_table``. To delete a 

534 label, set its value to :data:`None` before updating. 

535 

536 Raises: 

537 ValueError: If ``value`` type is invalid. 

538 """ 

539 return self._properties.setdefault(self._PROPERTY_TO_API_FIELD["labels"], {}) 

540 

541 @labels.setter 

542 def labels(self, value): 

543 if not isinstance(value, dict): 

544 raise ValueError("Pass a dict") 

545 self._properties[self._PROPERTY_TO_API_FIELD["labels"]] = value 

546 

547 @property 

548 def encryption_configuration(self): 

549 """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom 

550 encryption configuration for the table. 

551 

552 Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` 

553 if using default encryption. 

554 

555 See `protecting data with Cloud KMS keys 

556 <https://cloud.google.com/bigquery/docs/customer-managed-encryption>`_ 

557 in the BigQuery documentation. 

558 """ 

559 prop = self._properties.get( 

560 self._PROPERTY_TO_API_FIELD["encryption_configuration"] 

561 ) 

562 if prop is not None: 

563 prop = EncryptionConfiguration.from_api_repr(prop) 

564 return prop 

565 

566 @encryption_configuration.setter 

567 def encryption_configuration(self, value): 

568 api_repr = value 

569 if value is not None: 

570 api_repr = value.to_api_repr() 

571 self._properties[ 

572 self._PROPERTY_TO_API_FIELD["encryption_configuration"] 

573 ] = api_repr 

574 

575 @property 

576 def created(self): 

577 """Union[datetime.datetime, None]: Datetime at which the table was 

578 created (:data:`None` until set from the server). 

579 """ 

580 creation_time = self._properties.get(self._PROPERTY_TO_API_FIELD["created"]) 

581 if creation_time is not None: 

582 # creation_time will be in milliseconds. 

583 return google.cloud._helpers._datetime_from_microseconds( 

584 1000.0 * float(creation_time) 

585 ) 

586 

587 @property 

588 def etag(self): 

589 """Union[str, None]: ETag for the table resource (:data:`None` until 

590 set from the server). 

591 """ 

592 return self._properties.get(self._PROPERTY_TO_API_FIELD["etag"]) 

593 

594 @property 

595 def modified(self): 

596 """Union[datetime.datetime, None]: Datetime at which the table was last 

597 modified (:data:`None` until set from the server). 

598 """ 

599 modified_time = self._properties.get(self._PROPERTY_TO_API_FIELD["modified"]) 

600 if modified_time is not None: 

601 # modified_time will be in milliseconds. 

602 return google.cloud._helpers._datetime_from_microseconds( 

603 1000.0 * float(modified_time) 

604 ) 

605 

606 @property 

607 def num_bytes(self): 

608 """Union[int, None]: The size of the table in bytes (:data:`None` until 

609 set from the server). 

610 """ 

611 return _helpers._int_or_none( 

612 self._properties.get(self._PROPERTY_TO_API_FIELD["num_bytes"]) 

613 ) 

614 

615 @property 

616 def num_rows(self): 

617 """Union[int, None]: The number of rows in the table (:data:`None` 

618 until set from the server). 

619 """ 

620 return _helpers._int_or_none( 

621 self._properties.get(self._PROPERTY_TO_API_FIELD["num_rows"]) 

622 ) 

623 

624 @property 

625 def self_link(self): 

626 """Union[str, None]: URL for the table resource (:data:`None` until set 

627 from the server). 

628 """ 

629 return self._properties.get(self._PROPERTY_TO_API_FIELD["self_link"]) 

630 

631 @property 

632 def full_table_id(self): 

633 """Union[str, None]: ID for the table (:data:`None` until set from the 

634 server). 

635 

636 In the format ``project-id:dataset_id.table_id``. 

637 """ 

638 return self._properties.get(self._PROPERTY_TO_API_FIELD["full_table_id"]) 

639 

640 @property 

641 def table_type(self): 

642 """Union[str, None]: The type of the table (:data:`None` until set from 

643 the server). 

644 

645 Possible values are ``'TABLE'``, ``'VIEW'``, ``'MATERIALIZED_VIEW'`` or 

646 ``'EXTERNAL'``. 

647 """ 

648 return self._properties.get(self._PROPERTY_TO_API_FIELD["type"]) 

649 

650 @property 

651 def range_partitioning(self): 

652 """Optional[google.cloud.bigquery.table.RangePartitioning]: 

653 Configures range-based partitioning for a table. 

654 

655 .. note:: 

656 **Beta**. The integer range partitioning feature is in a 

657 pre-release state and might change or have limited support. 

658 

659 Only specify at most one of 

660 :attr:`~google.cloud.bigquery.table.Table.time_partitioning` or 

661 :attr:`~google.cloud.bigquery.table.Table.range_partitioning`. 

662 

663 Raises: 

664 ValueError: 

665 If the value is not 

666 :class:`~google.cloud.bigquery.table.RangePartitioning` or 

667 :data:`None`. 

668 """ 

669 resource = self._properties.get( 

670 self._PROPERTY_TO_API_FIELD["range_partitioning"] 

671 ) 

672 if resource is not None: 

673 return RangePartitioning(_properties=resource) 

674 

675 @range_partitioning.setter 

676 def range_partitioning(self, value): 

677 resource = value 

678 if isinstance(value, RangePartitioning): 

679 resource = value._properties 

680 elif value is not None: 

681 raise ValueError( 

682 "Expected value to be RangePartitioning or None, got {}.".format(value) 

683 ) 

684 self._properties[self._PROPERTY_TO_API_FIELD["range_partitioning"]] = resource 

685 

686 @property 

687 def time_partitioning(self): 

688 """Optional[google.cloud.bigquery.table.TimePartitioning]: Configures time-based 

689 partitioning for a table. 

690 

691 Only specify at most one of 

692 :attr:`~google.cloud.bigquery.table.Table.time_partitioning` or 

693 :attr:`~google.cloud.bigquery.table.Table.range_partitioning`. 

694 

695 Raises: 

696 ValueError: 

697 If the value is not 

698 :class:`~google.cloud.bigquery.table.TimePartitioning` or 

699 :data:`None`. 

700 """ 

701 prop = self._properties.get(self._PROPERTY_TO_API_FIELD["time_partitioning"]) 

702 if prop is not None: 

703 return TimePartitioning.from_api_repr(prop) 

704 

705 @time_partitioning.setter 

706 def time_partitioning(self, value): 

707 api_repr = value 

708 if isinstance(value, TimePartitioning): 

709 api_repr = value.to_api_repr() 

710 elif value is not None: 

711 raise ValueError( 

712 "value must be google.cloud.bigquery.table.TimePartitioning " "or None" 

713 ) 

714 self._properties[self._PROPERTY_TO_API_FIELD["time_partitioning"]] = api_repr 

715 

716 @property 

717 def partitioning_type(self): 

718 """Union[str, None]: Time partitioning of the table if it is 

719 partitioned (Defaults to :data:`None`). 

720 

721 """ 

722 warnings.warn( 

723 "This method will be deprecated in future versions. Please use " 

724 "Table.time_partitioning.type_ instead.", 

725 PendingDeprecationWarning, 

726 stacklevel=2, 

727 ) 

728 if self.time_partitioning is not None: 

729 return self.time_partitioning.type_ 

730 

731 @partitioning_type.setter 

732 def partitioning_type(self, value): 

733 warnings.warn( 

734 "This method will be deprecated in future versions. Please use " 

735 "Table.time_partitioning.type_ instead.", 

736 PendingDeprecationWarning, 

737 stacklevel=2, 

738 ) 

739 api_field = self._PROPERTY_TO_API_FIELD["partitioning_type"] 

740 if self.time_partitioning is None: 

741 self._properties[api_field] = {} 

742 self._properties[api_field]["type"] = value 

743 

744 @property 

745 def partition_expiration(self): 

746 """Union[int, None]: Expiration time in milliseconds for a partition. 

747 

748 If :attr:`partition_expiration` is set and :attr:`type_` is 

749 not set, :attr:`type_` will default to 

750 :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`. 

751 """ 

752 warnings.warn( 

753 "This method will be deprecated in future versions. Please use " 

754 "Table.time_partitioning.expiration_ms instead.", 

755 PendingDeprecationWarning, 

756 stacklevel=2, 

757 ) 

758 if self.time_partitioning is not None: 

759 return self.time_partitioning.expiration_ms 

760 

761 @partition_expiration.setter 

762 def partition_expiration(self, value): 

763 warnings.warn( 

764 "This method will be deprecated in future versions. Please use " 

765 "Table.time_partitioning.expiration_ms instead.", 

766 PendingDeprecationWarning, 

767 stacklevel=2, 

768 ) 

769 api_field = self._PROPERTY_TO_API_FIELD["partition_expiration"] 

770 

771 if self.time_partitioning is None: 

772 self._properties[api_field] = {"type": TimePartitioningType.DAY} 

773 

774 if value is None: 

775 self._properties[api_field]["expirationMs"] = None 

776 else: 

777 self._properties[api_field]["expirationMs"] = str(value) 

778 

779 @property 

780 def clustering_fields(self): 

781 """Union[List[str], None]: Fields defining clustering for the table 

782 

783 (Defaults to :data:`None`). 

784 

785 Clustering fields are immutable after table creation. 

786 

787 .. note:: 

788 

789 BigQuery supports clustering for both partitioned and 

790 non-partitioned tables. 

791 """ 

792 prop = self._properties.get(self._PROPERTY_TO_API_FIELD["clustering_fields"]) 

793 if prop is not None: 

794 return list(prop.get("fields", ())) 

795 

796 @clustering_fields.setter 

797 def clustering_fields(self, value): 

798 """Union[List[str], None]: Fields defining clustering for the table 

799 

800 (Defaults to :data:`None`). 

801 """ 

802 api_field = self._PROPERTY_TO_API_FIELD["clustering_fields"] 

803 

804 if value is not None: 

805 prop = self._properties.setdefault(api_field, {}) 

806 prop["fields"] = value 

807 else: 

808 # In order to allow unsetting clustering fields completely, we explicitly 

809 # set this property to None (as oposed to merely removing the key). 

810 self._properties[api_field] = None 

811 

812 @property 

813 def description(self): 

814 """Union[str, None]: Description of the table (defaults to 

815 :data:`None`). 

816 

817 Raises: 

818 ValueError: For invalid value types. 

819 """ 

820 return self._properties.get(self._PROPERTY_TO_API_FIELD["description"]) 

821 

822 @description.setter 

823 def description(self, value): 

824 if not isinstance(value, str) and value is not None: 

825 raise ValueError("Pass a string, or None") 

826 self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value 

827 

828 @property 

829 def expires(self): 

830 """Union[datetime.datetime, None]: Datetime at which the table will be 

831 deleted. 

832 

833 Raises: 

834 ValueError: For invalid value types. 

835 """ 

836 expiration_time = self._properties.get(self._PROPERTY_TO_API_FIELD["expires"]) 

837 if expiration_time is not None: 

838 # expiration_time will be in milliseconds. 

839 return google.cloud._helpers._datetime_from_microseconds( 

840 1000.0 * float(expiration_time) 

841 ) 

842 

843 @expires.setter 

844 def expires(self, value): 

845 if not isinstance(value, datetime.datetime) and value is not None: 

846 raise ValueError("Pass a datetime, or None") 

847 value_ms = google.cloud._helpers._millis_from_datetime(value) 

848 self._properties[ 

849 self._PROPERTY_TO_API_FIELD["expires"] 

850 ] = _helpers._str_or_none(value_ms) 

851 

852 @property 

853 def friendly_name(self): 

854 """Union[str, None]: Title of the table (defaults to :data:`None`). 

855 

856 Raises: 

857 ValueError: For invalid value types. 

858 """ 

859 return self._properties.get(self._PROPERTY_TO_API_FIELD["friendly_name"]) 

860 

861 @friendly_name.setter 

862 def friendly_name(self, value): 

863 if not isinstance(value, str) and value is not None: 

864 raise ValueError("Pass a string, or None") 

865 self._properties[self._PROPERTY_TO_API_FIELD["friendly_name"]] = value 

866 

867 @property 

868 def location(self): 

869 """Union[str, None]: Location in which the table is hosted 

870 

871 Defaults to :data:`None`. 

872 """ 

873 return self._properties.get(self._PROPERTY_TO_API_FIELD["location"]) 

874 

875 @property 

876 def view_query(self): 

877 """Union[str, None]: SQL query defining the table as a view (defaults 

878 to :data:`None`). 

879 

880 By default, the query is treated as Standard SQL. To use Legacy 

881 SQL, set :attr:`view_use_legacy_sql` to :data:`True`. 

882 

883 Raises: 

884 ValueError: For invalid value types. 

885 """ 

886 api_field = self._PROPERTY_TO_API_FIELD["view_query"] 

887 return _helpers._get_sub_prop(self._properties, [api_field, "query"]) 

888 

889 @view_query.setter 

890 def view_query(self, value): 

891 if not isinstance(value, str): 

892 raise ValueError("Pass a string") 

893 

894 api_field = self._PROPERTY_TO_API_FIELD["view_query"] 

895 _helpers._set_sub_prop(self._properties, [api_field, "query"], value) 

896 view = self._properties[api_field] 

897 # The service defaults useLegacySql to True, but this 

898 # client uses Standard SQL by default. 

899 if view.get("useLegacySql") is None: 

900 view["useLegacySql"] = False 

901 

902 @view_query.deleter 

903 def view_query(self): 

904 """Delete SQL query defining the table as a view.""" 

905 self._properties.pop(self._PROPERTY_TO_API_FIELD["view_query"], None) 

906 

907 view_use_legacy_sql = property(_view_use_legacy_sql_getter) 

908 

909 @view_use_legacy_sql.setter # type: ignore # (redefinition from above) 

910 def view_use_legacy_sql(self, value): 

911 if not isinstance(value, bool): 

912 raise ValueError("Pass a boolean") 

913 

914 api_field = self._PROPERTY_TO_API_FIELD["view_query"] 

915 if self._properties.get(api_field) is None: 

916 self._properties[api_field] = {} 

917 self._properties[api_field]["useLegacySql"] = value 

918 

919 @property 

920 def mview_query(self): 

921 """Optional[str]: SQL query defining the table as a materialized 

922 view (defaults to :data:`None`). 

923 """ 

924 api_field = self._PROPERTY_TO_API_FIELD["mview_query"] 

925 return _helpers._get_sub_prop(self._properties, [api_field, "query"]) 

926 

927 @mview_query.setter 

928 def mview_query(self, value): 

929 api_field = self._PROPERTY_TO_API_FIELD["mview_query"] 

930 _helpers._set_sub_prop(self._properties, [api_field, "query"], str(value)) 

931 

932 @mview_query.deleter 

933 def mview_query(self): 

934 """Delete SQL query defining the table as a materialized view.""" 

935 self._properties.pop(self._PROPERTY_TO_API_FIELD["mview_query"], None) 

936 

937 @property 

938 def mview_last_refresh_time(self): 

939 """Optional[datetime.datetime]: Datetime at which the materialized view was last 

940 refreshed (:data:`None` until set from the server). 

941 """ 

942 refresh_time = _helpers._get_sub_prop( 

943 self._properties, self._PROPERTY_TO_API_FIELD["mview_last_refresh_time"] 

944 ) 

945 if refresh_time is not None: 

946 # refresh_time will be in milliseconds. 

947 return google.cloud._helpers._datetime_from_microseconds( 

948 1000 * int(refresh_time) 

949 ) 

950 

951 @property 

952 def mview_enable_refresh(self): 

953 """Optional[bool]: Enable automatic refresh of the materialized view 

954 when the base table is updated. The default value is :data:`True`. 

955 """ 

956 api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"] 

957 return _helpers._get_sub_prop(self._properties, [api_field, "enableRefresh"]) 

958 

959 @mview_enable_refresh.setter 

960 def mview_enable_refresh(self, value): 

961 api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"] 

962 return _helpers._set_sub_prop( 

963 self._properties, [api_field, "enableRefresh"], value 

964 ) 

965 

966 @property 

967 def mview_refresh_interval(self): 

968 """Optional[datetime.timedelta]: The maximum frequency at which this 

969 materialized view will be refreshed. The default value is 1800000 

970 milliseconds (30 minutes). 

971 """ 

972 api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"] 

973 refresh_interval = _helpers._get_sub_prop( 

974 self._properties, [api_field, "refreshIntervalMs"] 

975 ) 

976 if refresh_interval is not None: 

977 return datetime.timedelta(milliseconds=int(refresh_interval)) 

978 

979 @mview_refresh_interval.setter 

980 def mview_refresh_interval(self, value): 

981 if value is None: 

982 refresh_interval_ms = None 

983 else: 

984 refresh_interval_ms = str(value // datetime.timedelta(milliseconds=1)) 

985 

986 api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"] 

987 _helpers._set_sub_prop( 

988 self._properties, 

989 [api_field, "refreshIntervalMs"], 

990 refresh_interval_ms, 

991 ) 

992 

993 @property 

994 def mview_allow_non_incremental_definition(self): 

995 """Optional[bool]: This option declares the intention to construct a 

996 materialized view that isn't refreshed incrementally. 

997 The default value is :data:`False`. 

998 """ 

999 api_field = self._PROPERTY_TO_API_FIELD[ 

1000 "mview_allow_non_incremental_definition" 

1001 ] 

1002 return _helpers._get_sub_prop( 

1003 self._properties, [api_field, "allowNonIncrementalDefinition"] 

1004 ) 

1005 

1006 @mview_allow_non_incremental_definition.setter 

1007 def mview_allow_non_incremental_definition(self, value): 

1008 api_field = self._PROPERTY_TO_API_FIELD[ 

1009 "mview_allow_non_incremental_definition" 

1010 ] 

1011 _helpers._set_sub_prop( 

1012 self._properties, [api_field, "allowNonIncrementalDefinition"], value 

1013 ) 

1014 

1015 @property 

1016 def streaming_buffer(self): 

1017 """google.cloud.bigquery.StreamingBuffer: Information about a table's 

1018 streaming buffer. 

1019 """ 

1020 sb = self._properties.get(self._PROPERTY_TO_API_FIELD["streaming_buffer"]) 

1021 if sb is not None: 

1022 return StreamingBuffer(sb) 

1023 

1024 @property 

1025 def external_data_configuration(self): 

1026 """Union[google.cloud.bigquery.ExternalConfig, None]: Configuration for 

1027 an external data source (defaults to :data:`None`). 

1028 

1029 Raises: 

1030 ValueError: For invalid value types. 

1031 """ 

1032 prop = self._properties.get( 

1033 self._PROPERTY_TO_API_FIELD["external_data_configuration"] 

1034 ) 

1035 if prop is not None: 

1036 prop = ExternalConfig.from_api_repr(prop) 

1037 return prop 

1038 

1039 @external_data_configuration.setter 

1040 def external_data_configuration(self, value): 

1041 if not (value is None or isinstance(value, ExternalConfig)): 

1042 raise ValueError("Pass an ExternalConfig or None") 

1043 api_repr = value 

1044 if value is not None: 

1045 api_repr = value.to_api_repr() 

1046 self._properties[ 

1047 self._PROPERTY_TO_API_FIELD["external_data_configuration"] 

1048 ] = api_repr 

1049 

1050 @property 

1051 def snapshot_definition(self) -> Optional["SnapshotDefinition"]: 

1052 """Information about the snapshot. This value is set via snapshot creation. 

1053 

1054 See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.snapshot_definition 

1055 """ 

1056 snapshot_info = self._properties.get( 

1057 self._PROPERTY_TO_API_FIELD["snapshot_definition"] 

1058 ) 

1059 if snapshot_info is not None: 

1060 snapshot_info = SnapshotDefinition(snapshot_info) 

1061 return snapshot_info 

1062 

1063 @property 

1064 def clone_definition(self) -> Optional["CloneDefinition"]: 

1065 """Information about the clone. This value is set via clone creation. 

1066 

1067 See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.clone_definition 

1068 """ 

1069 clone_info = self._properties.get( 

1070 self._PROPERTY_TO_API_FIELD["clone_definition"] 

1071 ) 

1072 if clone_info is not None: 

1073 clone_info = CloneDefinition(clone_info) 

1074 return clone_info 

1075 

1076 @property 

1077 def table_constraints(self) -> Optional["TableConstraints"]: 

1078 """Tables Primary Key and Foreign Key information.""" 

1079 table_constraints = self._properties.get( 

1080 self._PROPERTY_TO_API_FIELD["table_constraints"] 

1081 ) 

1082 if table_constraints is not None: 

1083 table_constraints = TableConstraints.from_api_repr(table_constraints) 

1084 return table_constraints 

1085 

1086 @table_constraints.setter 

1087 def table_constraints(self, value): 

1088 """Tables Primary Key and Foreign Key information.""" 

1089 api_repr = value 

1090 if not isinstance(value, TableConstraints) and value is not None: 

1091 raise ValueError( 

1092 "value must be google.cloud.bigquery.table.TableConstraints or None" 

1093 ) 

1094 api_repr = value.to_api_repr() if value else None 

1095 self._properties[self._PROPERTY_TO_API_FIELD["table_constraints"]] = api_repr 

1096 

1097 @property 

1098 def resource_tags(self): 

1099 """Dict[str, str]: Resource tags for the table. 

1100 

1101 See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.resource_tags 

1102 """ 

1103 return self._properties.setdefault( 

1104 self._PROPERTY_TO_API_FIELD["resource_tags"], {} 

1105 ) 

1106 

1107 @resource_tags.setter 

1108 def resource_tags(self, value): 

1109 if not isinstance(value, dict) and value is not None: 

1110 raise ValueError("resource_tags must be a dict or None") 

1111 self._properties[self._PROPERTY_TO_API_FIELD["resource_tags"]] = value 

1112 

1113 @property 

1114 def external_catalog_table_options( 

1115 self, 

1116 ) -> Optional[external_config.ExternalCatalogTableOptions]: 

1117 """Options defining open source compatible datasets living in the 

1118 BigQuery catalog. Contains metadata of open source database, schema 

1119 or namespace represented by the current dataset.""" 

1120 

1121 prop = self._properties.get( 

1122 self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] 

1123 ) 

1124 if prop is not None: 

1125 return external_config.ExternalCatalogTableOptions.from_api_repr(prop) 

1126 return None 

1127 

1128 @external_catalog_table_options.setter 

1129 def external_catalog_table_options( 

1130 self, value: Union[external_config.ExternalCatalogTableOptions, dict, None] 

1131 ): 

1132 value = _helpers._isinstance_or_raise( 

1133 value, 

1134 (external_config.ExternalCatalogTableOptions, dict), 

1135 none_allowed=True, 

1136 ) 

1137 if isinstance(value, external_config.ExternalCatalogTableOptions): 

1138 self._properties[ 

1139 self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] 

1140 ] = value.to_api_repr() 

1141 else: 

1142 self._properties[ 

1143 self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] 

1144 ] = value 

1145 

1146 @property 

1147 def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]: 

1148 """Optional. Specifies metadata of the foreign data type definition in 

1149 field schema (TableFieldSchema.foreign_type_definition). 

1150 Returns: 

1151 Optional[schema.ForeignTypeInfo]: 

1152 Foreign type information, or :data:`None` if not set. 

1153 .. Note:: 

1154 foreign_type_info is only required if you are referencing an 

1155 external catalog such as a Hive table. 

1156 For details, see: 

1157 https://cloud.google.com/bigquery/docs/external-tables 

1158 https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets 

1159 """ 

1160 

1161 prop = _helpers._get_sub_prop( 

1162 self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"] 

1163 ) 

1164 if prop is not None: 

1165 return _schema.ForeignTypeInfo.from_api_repr(prop) 

1166 return None 

1167 

1168 @foreign_type_info.setter 

1169 def foreign_type_info(self, value: Union[_schema.ForeignTypeInfo, dict, None]): 

1170 value = _helpers._isinstance_or_raise( 

1171 value, 

1172 (_schema.ForeignTypeInfo, dict), 

1173 none_allowed=True, 

1174 ) 

1175 if isinstance(value, _schema.ForeignTypeInfo): 

1176 value = value.to_api_repr() 

1177 _helpers._set_sub_prop( 

1178 self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"], value 

1179 ) 

1180 

1181 @classmethod 

1182 def from_string(cls, full_table_id: str) -> "Table": 

1183 """Construct a table from fully-qualified table ID. 

1184 

1185 Args: 

1186 full_table_id (str): 

1187 A fully-qualified table ID in standard SQL format. Must 

1188 included a project ID, dataset ID, and table ID, each 

1189 separated by ``.``. 

1190 

1191 Returns: 

1192 Table: Table parsed from ``full_table_id``. 

1193 

1194 Examples: 

1195 >>> Table.from_string('my-project.mydataset.mytable') 

1196 Table(TableRef...(D...('my-project', 'mydataset'), 'mytable')) 

1197 

1198 Raises: 

1199 ValueError: 

1200 If ``full_table_id`` is not a fully-qualified table ID in 

1201 standard SQL format. 

1202 """ 

1203 return cls(TableReference.from_string(full_table_id)) 

1204 

1205 @classmethod 

1206 def from_api_repr(cls, resource: dict) -> "Table": 

1207 """Factory: construct a table given its API representation 

1208 

1209 Args: 

1210 resource (Dict[str, object]): 

1211 Table resource representation from the API 

1212 

1213 Returns: 

1214 google.cloud.bigquery.table.Table: Table parsed from ``resource``. 

1215 

1216 Raises: 

1217 KeyError: 

1218 If the ``resource`` lacks the key ``'tableReference'``, or if 

1219 the ``dict`` stored within the key ``'tableReference'`` lacks 

1220 the keys ``'tableId'``, ``'projectId'``, or ``'datasetId'``. 

1221 """ 

1222 from google.cloud.bigquery import dataset 

1223 

1224 if ( 

1225 "tableReference" not in resource 

1226 or "tableId" not in resource["tableReference"] 

1227 ): 

1228 raise KeyError( 

1229 "Resource lacks required identity information:" 

1230 '["tableReference"]["tableId"]' 

1231 ) 

1232 project_id = _helpers._get_sub_prop( 

1233 resource, cls._PROPERTY_TO_API_FIELD["project"] 

1234 ) 

1235 table_id = _helpers._get_sub_prop( 

1236 resource, cls._PROPERTY_TO_API_FIELD["table_id"] 

1237 ) 

1238 dataset_id = _helpers._get_sub_prop( 

1239 resource, cls._PROPERTY_TO_API_FIELD["dataset_id"] 

1240 ) 

1241 dataset_ref = dataset.DatasetReference(project_id, dataset_id) 

1242 

1243 table = cls(dataset_ref.table(table_id)) 

1244 table._properties = resource 

1245 

1246 return table 

1247 

1248 def to_api_repr(self) -> dict: 

1249 """Constructs the API resource of this table 

1250 

1251 Returns: 

1252 Dict[str, object]: Table represented as an API resource 

1253 """ 

1254 return copy.deepcopy(self._properties) 

1255 

1256 def to_bqstorage(self) -> str: 

1257 """Construct a BigQuery Storage API representation of this table. 

1258 

1259 Returns: 

1260 str: A reference to this table in the BigQuery Storage API. 

1261 """ 

1262 return self.reference.to_bqstorage() 

1263 

1264 def _build_resource(self, filter_fields): 

1265 """Generate a resource for ``update``.""" 

1266 return _helpers._build_resource_from_properties(self, filter_fields) 

1267 

1268 def __repr__(self): 

1269 return "Table({})".format(repr(self.reference)) 

1270 

1271 def __str__(self): 

1272 return f"{self.project}.{self.dataset_id}.{self.table_id}" 

1273 

1274 @property 

1275 def max_staleness(self): 

1276 """Union[str, None]: The maximum staleness of data that could be returned when the table is queried. 

1277 

1278 Staleness encoded as a string encoding of sql IntervalValue type. 

1279 This property is optional and defaults to None. 

1280 

1281 According to the BigQuery API documentation, maxStaleness specifies the maximum time 

1282 interval for which stale data can be returned when querying the table. 

1283 It helps control data freshness in scenarios like metadata-cached external tables. 

1284 

1285 Returns: 

1286 Optional[str]: A string representing the maximum staleness interval 

1287 (e.g., '1h', '30m', '15s' for hours, minutes, seconds respectively). 

1288 """ 

1289 return self._properties.get(self._PROPERTY_TO_API_FIELD["max_staleness"]) 

1290 

1291 @max_staleness.setter 

1292 def max_staleness(self, value): 

1293 """Set the maximum staleness for the table. 

1294 

1295 Args: 

1296 value (Optional[str]): A string representing the maximum staleness interval. 

1297 Must be a valid time interval string. 

1298 Examples include '1h' (1 hour), '30m' (30 minutes), '15s' (15 seconds). 

1299 

1300 Raises: 

1301 ValueError: If the value is not None and not a string. 

1302 """ 

1303 if value is not None and not isinstance(value, str): 

1304 raise ValueError("max_staleness must be a string or None") 

1305 

1306 self._properties[self._PROPERTY_TO_API_FIELD["max_staleness"]] = value 

1307 

1308 

1309class TableListItem(_TableBase): 

1310 """A read-only table resource from a list operation. 

1311 

1312 For performance reasons, the BigQuery API only includes some of the table 

1313 properties when listing tables. Notably, 

1314 :attr:`~google.cloud.bigquery.table.Table.schema` and 

1315 :attr:`~google.cloud.bigquery.table.Table.num_rows` are missing. 

1316 

1317 For a full list of the properties that the BigQuery API returns, see the 

1318 `REST documentation for tables.list 

1319 <https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list>`_. 

1320 

1321 

1322 Args: 

1323 resource (Dict[str, object]): 

1324 A table-like resource object from a table list response. A 

1325 ``tableReference`` property is required. 

1326 

1327 Raises: 

1328 ValueError: 

1329 If ``tableReference`` or one of its required members is missing 

1330 from ``resource``. 

1331 """ 

1332 

1333 def __init__(self, resource): 

1334 if "tableReference" not in resource: 

1335 raise ValueError("resource must contain a tableReference value") 

1336 if "projectId" not in resource["tableReference"]: 

1337 raise ValueError( 

1338 "resource['tableReference'] must contain a projectId value" 

1339 ) 

1340 if "datasetId" not in resource["tableReference"]: 

1341 raise ValueError( 

1342 "resource['tableReference'] must contain a datasetId value" 

1343 ) 

1344 if "tableId" not in resource["tableReference"]: 

1345 raise ValueError("resource['tableReference'] must contain a tableId value") 

1346 

1347 self._properties = resource 

1348 

1349 @property 

1350 def created(self): 

1351 """Union[datetime.datetime, None]: Datetime at which the table was 

1352 created (:data:`None` until set from the server). 

1353 """ 

1354 creation_time = self._properties.get("creationTime") 

1355 if creation_time is not None: 

1356 # creation_time will be in milliseconds. 

1357 return google.cloud._helpers._datetime_from_microseconds( 

1358 1000.0 * float(creation_time) 

1359 ) 

1360 

1361 @property 

1362 def expires(self): 

1363 """Union[datetime.datetime, None]: Datetime at which the table will be 

1364 deleted. 

1365 """ 

1366 expiration_time = self._properties.get("expirationTime") 

1367 if expiration_time is not None: 

1368 # expiration_time will be in milliseconds. 

1369 return google.cloud._helpers._datetime_from_microseconds( 

1370 1000.0 * float(expiration_time) 

1371 ) 

1372 

1373 reference = property(_reference_getter) 

1374 

1375 @property 

1376 def labels(self): 

1377 """Dict[str, str]: Labels for the table. 

1378 

1379 This method always returns a dict. To change a table's labels, 

1380 modify the dict, then call ``Client.update_table``. To delete a 

1381 label, set its value to :data:`None` before updating. 

1382 """ 

1383 return self._properties.setdefault("labels", {}) 

1384 

1385 @property 

1386 def full_table_id(self): 

1387 """Union[str, None]: ID for the table (:data:`None` until set from the 

1388 server). 

1389 

1390 In the format ``project_id:dataset_id.table_id``. 

1391 """ 

1392 return self._properties.get("id") 

1393 

1394 @property 

1395 def table_type(self): 

1396 """Union[str, None]: The type of the table (:data:`None` until set from 

1397 the server). 

1398 

1399 Possible values are ``'TABLE'``, ``'VIEW'``, or ``'EXTERNAL'``. 

1400 """ 

1401 return self._properties.get("type") 

1402 

1403 @property 

1404 def time_partitioning(self): 

1405 """google.cloud.bigquery.table.TimePartitioning: Configures time-based 

1406 partitioning for a table. 

1407 """ 

1408 prop = self._properties.get("timePartitioning") 

1409 if prop is not None: 

1410 return TimePartitioning.from_api_repr(prop) 

1411 

1412 @property 

1413 def partitioning_type(self): 

1414 """Union[str, None]: Time partitioning of the table if it is 

1415 partitioned (Defaults to :data:`None`). 

1416 """ 

1417 warnings.warn( 

1418 "This method will be deprecated in future versions. Please use " 

1419 "TableListItem.time_partitioning.type_ instead.", 

1420 PendingDeprecationWarning, 

1421 stacklevel=2, 

1422 ) 

1423 if self.time_partitioning is not None: 

1424 return self.time_partitioning.type_ 

1425 

1426 @property 

1427 def partition_expiration(self): 

1428 """Union[int, None]: Expiration time in milliseconds for a partition. 

1429 

1430 If this property is set and :attr:`type_` is not set, :attr:`type_` 

1431 will default to :attr:`TimePartitioningType.DAY`. 

1432 """ 

1433 warnings.warn( 

1434 "This method will be deprecated in future versions. Please use " 

1435 "TableListItem.time_partitioning.expiration_ms instead.", 

1436 PendingDeprecationWarning, 

1437 stacklevel=2, 

1438 ) 

1439 if self.time_partitioning is not None: 

1440 return self.time_partitioning.expiration_ms 

1441 

1442 @property 

1443 def friendly_name(self): 

1444 """Union[str, None]: Title of the table (defaults to :data:`None`).""" 

1445 return self._properties.get("friendlyName") 

1446 

1447 view_use_legacy_sql = property(_view_use_legacy_sql_getter) 

1448 

1449 @property 

1450 def clustering_fields(self): 

1451 """Union[List[str], None]: Fields defining clustering for the table 

1452 

1453 (Defaults to :data:`None`). 

1454 

1455 Clustering fields are immutable after table creation. 

1456 

1457 .. note:: 

1458 

1459 BigQuery supports clustering for both partitioned and 

1460 non-partitioned tables. 

1461 """ 

1462 prop = self._properties.get("clustering") 

1463 if prop is not None: 

1464 return list(prop.get("fields", ())) 

1465 

1466 @classmethod 

1467 def from_string(cls, full_table_id: str) -> "TableListItem": 

1468 """Construct a table from fully-qualified table ID. 

1469 

1470 Args: 

1471 full_table_id (str): 

1472 A fully-qualified table ID in standard SQL format. Must 

1473 included a project ID, dataset ID, and table ID, each 

1474 separated by ``.``. 

1475 

1476 Returns: 

1477 Table: Table parsed from ``full_table_id``. 

1478 

1479 Examples: 

1480 >>> Table.from_string('my-project.mydataset.mytable') 

1481 Table(TableRef...(D...('my-project', 'mydataset'), 'mytable')) 

1482 

1483 Raises: 

1484 ValueError: 

1485 If ``full_table_id`` is not a fully-qualified table ID in 

1486 standard SQL format. 

1487 """ 

1488 return cls( 

1489 {"tableReference": TableReference.from_string(full_table_id).to_api_repr()} 

1490 ) 

1491 

1492 def to_bqstorage(self) -> str: 

1493 """Construct a BigQuery Storage API representation of this table. 

1494 

1495 Returns: 

1496 str: A reference to this table in the BigQuery Storage API. 

1497 """ 

1498 return self.reference.to_bqstorage() 

1499 

1500 def to_api_repr(self) -> dict: 

1501 """Constructs the API resource of this table 

1502 

1503 Returns: 

1504 Dict[str, object]: Table represented as an API resource 

1505 """ 

1506 return copy.deepcopy(self._properties) 

1507 

1508 

1509def _row_from_mapping(mapping, schema): 

1510 """Convert a mapping to a row tuple using the schema. 

1511 

1512 Args: 

1513 mapping (Dict[str, object]) 

1514 Mapping of row data: must contain keys for all required fields in 

1515 the schema. Keys which do not correspond to a field in the schema 

1516 are ignored. 

1517 schema (List[google.cloud.bigquery.schema.SchemaField]): 

1518 The schema of the table destination for the rows 

1519 

1520 Returns: 

1521 Tuple[object]: 

1522 Tuple whose elements are ordered according to the schema. 

1523 

1524 Raises: 

1525 ValueError: If schema is empty. 

1526 """ 

1527 if len(schema) == 0: 

1528 raise ValueError(_TABLE_HAS_NO_SCHEMA) 

1529 

1530 row = [] 

1531 for field in schema: 

1532 if field.mode == "REQUIRED": 

1533 row.append(mapping[field.name]) 

1534 elif field.mode == "REPEATED": 

1535 row.append(mapping.get(field.name, ())) 

1536 elif field.mode == "NULLABLE": 

1537 row.append(mapping.get(field.name)) 

1538 else: 

1539 raise ValueError("Unknown field mode: {}".format(field.mode)) 

1540 return tuple(row) 

1541 

1542 

1543class StreamingBuffer(object): 

1544 """Information about a table's streaming buffer. 

1545 

1546 See https://cloud.google.com/bigquery/streaming-data-into-bigquery. 

1547 

1548 Args: 

1549 resource (Dict[str, object]): 

1550 streaming buffer representation returned from the API 

1551 """ 

1552 

1553 def __init__(self, resource): 

1554 self.estimated_bytes = None 

1555 if "estimatedBytes" in resource: 

1556 self.estimated_bytes = int(resource["estimatedBytes"]) 

1557 self.estimated_rows = None 

1558 if "estimatedRows" in resource: 

1559 self.estimated_rows = int(resource["estimatedRows"]) 

1560 self.oldest_entry_time = None 

1561 if "oldestEntryTime" in resource: 

1562 self.oldest_entry_time = google.cloud._helpers._datetime_from_microseconds( 

1563 1000.0 * int(resource["oldestEntryTime"]) 

1564 ) 

1565 

1566 

1567class SnapshotDefinition: 

1568 """Information about base table and snapshot time of the snapshot. 

1569 

1570 See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#snapshotdefinition 

1571 

1572 Args: 

1573 resource: Snapshot definition representation returned from the API. 

1574 """ 

1575 

1576 def __init__(self, resource: Dict[str, Any]): 

1577 self.base_table_reference = None 

1578 if "baseTableReference" in resource: 

1579 self.base_table_reference = TableReference.from_api_repr( 

1580 resource["baseTableReference"] 

1581 ) 

1582 

1583 self.snapshot_time = None 

1584 if "snapshotTime" in resource: 

1585 self.snapshot_time = google.cloud._helpers._rfc3339_to_datetime( 

1586 resource["snapshotTime"] 

1587 ) 

1588 

1589 

1590class CloneDefinition: 

1591 """Information about base table and clone time of the clone. 

1592 

1593 See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clonedefinition 

1594 

1595 Args: 

1596 resource: Clone definition representation returned from the API. 

1597 """ 

1598 

1599 def __init__(self, resource: Dict[str, Any]): 

1600 self.base_table_reference = None 

1601 if "baseTableReference" in resource: 

1602 self.base_table_reference = TableReference.from_api_repr( 

1603 resource["baseTableReference"] 

1604 ) 

1605 

1606 self.clone_time = None 

1607 if "cloneTime" in resource: 

1608 self.clone_time = google.cloud._helpers._rfc3339_to_datetime( 

1609 resource["cloneTime"] 

1610 ) 

1611 

1612 

1613class Row(object): 

1614 """A BigQuery row. 

1615 

1616 Values can be accessed by position (index), by key like a dict, 

1617 or as properties. 

1618 

1619 Args: 

1620 values (Sequence[object]): The row values 

1621 field_to_index (Dict[str, int]): 

1622 A mapping from schema field names to indexes 

1623 """ 

1624 

1625 # Choose unusual field names to try to avoid conflict with schema fields. 

1626 __slots__ = ("_xxx_values", "_xxx_field_to_index") 

1627 

1628 def __init__(self, values, field_to_index) -> None: 

1629 self._xxx_values = values 

1630 self._xxx_field_to_index = field_to_index 

1631 

1632 def values(self): 

1633 """Return the values included in this row. 

1634 

1635 Returns: 

1636 Sequence[object]: A sequence of length ``len(row)``. 

1637 """ 

1638 return copy.deepcopy(self._xxx_values) 

1639 

1640 def keys(self) -> Iterable[str]: 

1641 """Return the keys for using a row as a dict. 

1642 

1643 Returns: 

1644 Iterable[str]: The keys corresponding to the columns of a row 

1645 

1646 Examples: 

1647 

1648 >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).keys()) 

1649 ['x', 'y'] 

1650 """ 

1651 return self._xxx_field_to_index.keys() 

1652 

1653 def items(self) -> Iterable[Tuple[str, Any]]: 

1654 """Return items as ``(key, value)`` pairs. 

1655 

1656 Returns: 

1657 Iterable[Tuple[str, object]]: 

1658 The ``(key, value)`` pairs representing this row. 

1659 

1660 Examples: 

1661 

1662 >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).items()) 

1663 [('x', 'a'), ('y', 'b')] 

1664 """ 

1665 for key, index in self._xxx_field_to_index.items(): 

1666 yield (key, copy.deepcopy(self._xxx_values[index])) 

1667 

1668 def get(self, key: str, default: Any = None) -> Any: 

1669 """Return a value for key, with a default value if it does not exist. 

1670 

1671 Args: 

1672 key (str): The key of the column to access 

1673 default (object): 

1674 The default value to use if the key does not exist. (Defaults 

1675 to :data:`None`.) 

1676 

1677 Returns: 

1678 object: 

1679 The value associated with the provided key, or a default value. 

1680 

1681 Examples: 

1682 When the key exists, the value associated with it is returned. 

1683 

1684 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('x') 

1685 'a' 

1686 

1687 The default value is :data:`None` when the key does not exist. 

1688 

1689 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z') 

1690 None 

1691 

1692 The default value can be overridden with the ``default`` parameter. 

1693 

1694 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '') 

1695 '' 

1696 

1697 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', default = '') 

1698 '' 

1699 """ 

1700 index = self._xxx_field_to_index.get(key) 

1701 if index is None: 

1702 return default 

1703 return self._xxx_values[index] 

1704 

1705 def __getattr__(self, name): 

1706 value = self._xxx_field_to_index.get(name) 

1707 if value is None: 

1708 raise AttributeError("no row field {!r}".format(name)) 

1709 return self._xxx_values[value] 

1710 

1711 def __len__(self): 

1712 return len(self._xxx_values) 

1713 

1714 def __getitem__(self, key): 

1715 if isinstance(key, str): 

1716 value = self._xxx_field_to_index.get(key) 

1717 if value is None: 

1718 raise KeyError("no row field {!r}".format(key)) 

1719 key = value 

1720 return self._xxx_values[key] 

1721 

1722 def __eq__(self, other): 

1723 if not isinstance(other, Row): 

1724 return NotImplemented 

1725 return ( 

1726 self._xxx_values == other._xxx_values 

1727 and self._xxx_field_to_index == other._xxx_field_to_index 

1728 ) 

1729 

1730 def __ne__(self, other): 

1731 return not self == other 

1732 

1733 def __repr__(self): 

1734 # sort field dict by value, for determinism 

1735 items = sorted(self._xxx_field_to_index.items(), key=operator.itemgetter(1)) 

1736 f2i = "{" + ", ".join("%r: %d" % item for item in items) + "}" 

1737 return "Row({}, {})".format(self._xxx_values, f2i) 

1738 

1739 

1740class _NoopProgressBarQueue(object): 

1741 """A fake Queue class that does nothing. 

1742 

1743 This is used when there is no progress bar to send updates to. 

1744 """ 

1745 

1746 def put_nowait(self, item): 

1747 """Don't actually do anything with the item.""" 

1748 

1749 

1750class RowIterator(HTTPIterator): 

1751 """A class for iterating through HTTP/JSON API row list responses. 

1752 

1753 Args: 

1754 client (Optional[google.cloud.bigquery.Client]): 

1755 The API client instance. This should always be non-`None`, except for 

1756 subclasses that do not use it, namely the ``_EmptyRowIterator``. 

1757 api_request (Callable[google.cloud._http.JSONConnection.api_request]): 

1758 The function to use to make API requests. 

1759 path (str): The method path to query for the list of items. 

1760 schema (Sequence[Union[ \ 

1761 :class:`~google.cloud.bigquery.schema.SchemaField`, \ 

1762 Mapping[str, Any] \ 

1763 ]]): 

1764 The table's schema. If any item is a mapping, its content must be 

1765 compatible with 

1766 :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. 

1767 page_token (str): A token identifying a page in a result set to start 

1768 fetching results from. 

1769 max_results (Optional[int]): The maximum number of results to fetch. 

1770 page_size (Optional[int]): The maximum number of rows in each page 

1771 of results from this request. Non-positive values are ignored. 

1772 Defaults to a sensible value set by the API. 

1773 extra_params (Optional[Dict[str, object]]): 

1774 Extra query string parameters for the API call. 

1775 table (Optional[Union[ \ 

1776 google.cloud.bigquery.table.Table, \ 

1777 google.cloud.bigquery.table.TableReference, \ 

1778 ]]): 

1779 The table which these rows belong to, or a reference to it. Used to 

1780 call the BigQuery Storage API to fetch rows. 

1781 selected_fields (Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]): 

1782 A subset of columns to select from this table. 

1783 total_rows (Optional[int]): 

1784 Total number of rows in the table. 

1785 first_page_response (Optional[dict]): 

1786 API response for the first page of results. These are returned when 

1787 the first page is requested. 

1788 query (Optional[str]): 

1789 The query text used. 

1790 total_bytes_processed (Optional[int]): 

1791 If representing query results, the total bytes processed by the associated query. 

1792 slot_millis (Optional[int]): 

1793 If representing query results, the number of slot ms billed for the associated query. 

1794 created (Optional[datetime.datetime]): 

1795 If representing query results, the creation time of the associated query. 

1796 started (Optional[datetime.datetime]): 

1797 If representing query results, the start time of the associated query. 

1798 ended (Optional[datetime.datetime]): 

1799 If representing query results, the end time of the associated query. 

1800 """ 

1801 

1802 def __init__( 

1803 self, 

1804 client, 

1805 api_request, 

1806 path, 

1807 schema, 

1808 page_token=None, 

1809 max_results=None, 

1810 page_size=None, 

1811 extra_params=None, 

1812 table=None, 

1813 selected_fields=None, 

1814 total_rows=None, 

1815 first_page_response=None, 

1816 location: Optional[str] = None, 

1817 job_id: Optional[str] = None, 

1818 query_id: Optional[str] = None, 

1819 project: Optional[str] = None, 

1820 num_dml_affected_rows: Optional[int] = None, 

1821 query: Optional[str] = None, 

1822 total_bytes_processed: Optional[int] = None, 

1823 slot_millis: Optional[int] = None, 

1824 created: Optional[datetime.datetime] = None, 

1825 started: Optional[datetime.datetime] = None, 

1826 ended: Optional[datetime.datetime] = None, 

1827 ): 

1828 super(RowIterator, self).__init__( 

1829 client, 

1830 api_request, 

1831 path, 

1832 item_to_value=_item_to_row, 

1833 items_key="rows", 

1834 page_token=page_token, 

1835 max_results=max_results, 

1836 extra_params=extra_params, 

1837 page_start=_rows_page_start, 

1838 next_token="pageToken", 

1839 ) 

1840 schema = _to_schema_fields(schema) if schema else () 

1841 self._field_to_index = _helpers._field_to_index_mapping(schema) 

1842 self._page_size = page_size 

1843 self._preserve_order = False 

1844 self._schema = schema 

1845 self._selected_fields = selected_fields 

1846 self._table = table 

1847 self._total_rows = total_rows 

1848 self._first_page_response = first_page_response 

1849 self._location = location 

1850 self._job_id = job_id 

1851 self._query_id = query_id 

1852 self._project = project 

1853 self._num_dml_affected_rows = num_dml_affected_rows 

1854 self._query = query 

1855 self._total_bytes_processed = total_bytes_processed 

1856 self._slot_millis = slot_millis 

1857 self._job_created = created 

1858 self._job_started = started 

1859 self._job_ended = ended 

1860 

1861 @property 

1862 def _billing_project(self) -> Optional[str]: 

1863 """GCP Project ID where BQ API will bill to (if applicable).""" 

1864 client = self.client 

1865 return client.project if client is not None else None 

1866 

1867 @property 

1868 def job_id(self) -> Optional[str]: 

1869 """ID of the query job (if applicable). 

1870 

1871 To get the job metadata, call 

1872 ``job = client.get_job(rows.job_id, location=rows.location)``. 

1873 """ 

1874 return self._job_id 

1875 

1876 @property 

1877 def location(self) -> Optional[str]: 

1878 """Location where the query executed (if applicable). 

1879 

1880 See: https://cloud.google.com/bigquery/docs/locations 

1881 """ 

1882 return self._location 

1883 

1884 @property 

1885 def num_dml_affected_rows(self) -> Optional[int]: 

1886 """If this RowIterator is the result of a DML query, the number of 

1887 rows that were affected. 

1888 

1889 See: 

1890 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.num_dml_affected_rows 

1891 """ 

1892 return self._num_dml_affected_rows 

1893 

1894 @property 

1895 def project(self) -> Optional[str]: 

1896 """GCP Project ID where these rows are read from.""" 

1897 return self._project 

1898 

1899 @property 

1900 def query_id(self) -> Optional[str]: 

1901 """[Preview] ID of a completed query. 

1902 

1903 This ID is auto-generated and not guaranteed to be populated. 

1904 """ 

1905 return self._query_id 

1906 

1907 @property 

1908 def query(self) -> Optional[str]: 

1909 """The query text used.""" 

1910 return self._query 

1911 

1912 @property 

1913 def total_bytes_processed(self) -> Optional[int]: 

1914 """total bytes processed from job statistics, if present.""" 

1915 return self._total_bytes_processed 

1916 

1917 @property 

1918 def slot_millis(self) -> Optional[int]: 

1919 """Number of slot ms the user is actually billed for.""" 

1920 return self._slot_millis 

1921 

1922 @property 

1923 def created(self) -> Optional[datetime.datetime]: 

1924 """If representing query results, the creation time of the associated query.""" 

1925 return self._job_created 

1926 

1927 @property 

1928 def started(self) -> Optional[datetime.datetime]: 

1929 """If representing query results, the start time of the associated query.""" 

1930 return self._job_started 

1931 

1932 @property 

1933 def ended(self) -> Optional[datetime.datetime]: 

1934 """If representing query results, the end time of the associated query.""" 

1935 return self._job_ended 

1936 

1937 def _is_almost_completely_cached(self): 

1938 """Check if all results are completely cached. 

1939 

1940 This is useful to know, because we can avoid alternative download 

1941 mechanisms. 

1942 """ 

1943 if ( 

1944 not hasattr(self, "_first_page_response") 

1945 or self._first_page_response is None 

1946 ): 

1947 return False 

1948 

1949 total_cached_rows = len(self._first_page_response.get(self._items_key, [])) 

1950 if self.max_results is not None and total_cached_rows >= self.max_results: 

1951 return True 

1952 

1953 if ( 

1954 self.next_page_token is None 

1955 and self._first_page_response.get(self._next_token) is None 

1956 ): 

1957 return True 

1958 

1959 if self._total_rows is not None: 

1960 almost_completely = self._total_rows * ALMOST_COMPLETELY_CACHED_RATIO 

1961 if total_cached_rows >= almost_completely: 

1962 return True 

1963 

1964 return False 

1965 

1966 def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client): 

1967 """Returns True if the BigQuery Storage API can be used. 

1968 

1969 Returns: 

1970 bool 

1971 True if the BigQuery Storage client can be used or created. 

1972 """ 

1973 using_bqstorage_api = bqstorage_client or create_bqstorage_client 

1974 if not using_bqstorage_api: 

1975 return False 

1976 

1977 if self._table is None: 

1978 return False 

1979 

1980 # The developer has already started paging through results if 

1981 # next_page_token is set. 

1982 if hasattr(self, "next_page_token") and self.next_page_token is not None: 

1983 return False 

1984 

1985 if self._is_almost_completely_cached(): 

1986 return False 

1987 

1988 if self.max_results is not None: 

1989 return False 

1990 

1991 try: 

1992 _versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True) 

1993 except bq_exceptions.BigQueryStorageNotFoundError: 

1994 warnings.warn( 

1995 "BigQuery Storage module not found, fetch data with the REST " 

1996 "endpoint instead." 

1997 ) 

1998 return False 

1999 except bq_exceptions.LegacyBigQueryStorageError as exc: 

2000 warnings.warn(str(exc)) 

2001 return False 

2002 

2003 return True 

2004 

2005 def _get_next_page_response(self): 

2006 """Requests the next page from the path provided. 

2007 

2008 Returns: 

2009 Dict[str, object]: 

2010 The parsed JSON response of the next page's contents. 

2011 """ 

2012 if self._first_page_response: 

2013 rows = self._first_page_response.get(self._items_key, [])[ 

2014 : self.max_results 

2015 ] 

2016 response = { 

2017 self._items_key: rows, 

2018 } 

2019 if self._next_token in self._first_page_response: 

2020 response[self._next_token] = self._first_page_response[self._next_token] 

2021 

2022 self._first_page_response = None 

2023 return response 

2024 

2025 params = self._get_query_params() 

2026 

2027 # If the user has provided page_size and start_index, we need to pass 

2028 # start_index for the first page, but for all subsequent pages, we 

2029 # should not pass start_index. We make a shallow copy of params and do 

2030 # not alter the original, so if the user iterates the results again, 

2031 # start_index is preserved. 

2032 params_copy = copy.copy(params) 

2033 if self._page_size is not None: 

2034 if self.page_number and "startIndex" in params: 

2035 del params_copy["startIndex"] 

2036 

2037 return self.api_request( 

2038 method=self._HTTP_METHOD, path=self.path, query_params=params_copy 

2039 ) 

2040 

2041 @property 

2042 def schema(self): 

2043 """List[google.cloud.bigquery.schema.SchemaField]: The subset of 

2044 columns to be read from the table.""" 

2045 return list(self._schema) 

2046 

2047 @property 

2048 def total_rows(self): 

2049 """int: The total number of rows in the table or query results.""" 

2050 return self._total_rows 

2051 

2052 def _maybe_warn_max_results( 

2053 self, 

2054 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"], 

2055 ): 

2056 """Issue a warning if BQ Storage client is not ``None`` with ``max_results`` set. 

2057 

2058 This helper method should be used directly in the relevant top-level public 

2059 methods, so that the warning is issued for the correct line in user code. 

2060 

2061 Args: 

2062 bqstorage_client: 

2063 The BigQuery Storage client intended to use for downloading result rows. 

2064 """ 

2065 if bqstorage_client is not None and self.max_results is not None: 

2066 warnings.warn( 

2067 "Cannot use bqstorage_client if max_results is set, " 

2068 "reverting to fetching data with the REST endpoint.", 

2069 stacklevel=3, 

2070 ) 

2071 

2072 def _to_page_iterable( 

2073 self, bqstorage_download, tabledata_list_download, bqstorage_client=None 

2074 ): 

2075 if not self._should_use_bqstorage(bqstorage_client, False): 

2076 bqstorage_client = None 

2077 

2078 result_pages = ( 

2079 bqstorage_download() 

2080 if bqstorage_client is not None 

2081 else tabledata_list_download() 

2082 ) 

2083 yield from result_pages 

2084 

2085 def to_arrow_iterable( 

2086 self, 

2087 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, 

2088 max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore 

2089 max_stream_count: Optional[int] = None, 

2090 timeout: Optional[float] = None, 

2091 ) -> Iterator["pyarrow.RecordBatch"]: 

2092 """[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream. 

2093 

2094 Args: 

2095 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): 

2096 A BigQuery Storage API client. If supplied, use the faster 

2097 BigQuery Storage API to fetch rows from BigQuery. 

2098 

2099 This method requires the ``pyarrow`` and 

2100 ``google-cloud-bigquery-storage`` libraries. 

2101 

2102 This method only exposes a subset of the capabilities of the 

2103 BigQuery Storage API. For full access to all features 

2104 (projections, filters, snapshots) use the Storage API directly. 

2105 

2106 max_queue_size (Optional[int]): 

2107 The maximum number of result pages to hold in the internal queue when 

2108 streaming query results over the BigQuery Storage API. Ignored if 

2109 Storage API is not used. 

2110 

2111 By default, the max queue size is set to the number of BQ Storage streams 

2112 created by the server. If ``max_queue_size`` is :data:`None`, the queue 

2113 size is infinite. 

2114 

2115 max_stream_count (Optional[int]): 

2116 The maximum number of parallel download streams when 

2117 using BigQuery Storage API. Ignored if 

2118 BigQuery Storage API is not used. 

2119 

2120 This setting also has no effect if the query result 

2121 is deterministically ordered with ORDER BY, 

2122 in which case, the number of download stream is always 1. 

2123 

2124 If set to 0 or None (the default), the number of download 

2125 streams is determined by BigQuery the server. However, this behaviour 

2126 can require a lot of memory to store temporary download result, 

2127 especially with very large queries. In that case, 

2128 setting this parameter value to a value > 0 can help 

2129 reduce system resource consumption. 

2130 

2131 timeout (Optional[float]): 

2132 The number of seconds to wait for the underlying download to complete. 

2133 If ``None``, wait indefinitely. 

2134 

2135 Returns: 

2136 pyarrow.RecordBatch: 

2137 A generator of :class:`~pyarrow.RecordBatch`. 

2138 

2139 .. versionadded:: 2.31.0 

2140 """ 

2141 self._maybe_warn_max_results(bqstorage_client) 

2142 

2143 bqstorage_download = functools.partial( 

2144 _pandas_helpers.download_arrow_bqstorage, 

2145 self._billing_project, 

2146 self._table, 

2147 bqstorage_client, 

2148 preserve_order=self._preserve_order, 

2149 selected_fields=self._selected_fields, 

2150 max_queue_size=max_queue_size, 

2151 max_stream_count=max_stream_count, 

2152 timeout=timeout, 

2153 ) 

2154 tabledata_list_download = functools.partial( 

2155 _pandas_helpers.download_arrow_row_iterator, 

2156 iter(self.pages), 

2157 self.schema, 

2158 timeout=timeout, 

2159 ) 

2160 return self._to_page_iterable( 

2161 bqstorage_download, 

2162 tabledata_list_download, 

2163 bqstorage_client=bqstorage_client, 

2164 ) 

2165 

2166 # If changing the signature of this method, make sure to apply the same 

2167 # changes to job.QueryJob.to_arrow() 

2168 def to_arrow( 

2169 self, 

2170 progress_bar_type: Optional[str] = None, 

2171 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, 

2172 create_bqstorage_client: bool = True, 

2173 timeout: Optional[float] = None, 

2174 ) -> "pyarrow.Table": 

2175 """[Beta] Create a class:`pyarrow.Table` by loading all pages of a 

2176 table or query. 

2177 

2178 Args: 

2179 progress_bar_type (Optional[str]): 

2180 If set, use the `tqdm <https://tqdm.github.io/>`_ library to 

2181 display a progress bar while the data downloads. Install the 

2182 ``tqdm`` package to use this feature. 

2183 

2184 Possible values of ``progress_bar_type`` include: 

2185 

2186 ``None`` 

2187 No progress bar. 

2188 ``'tqdm'`` 

2189 Use the :func:`tqdm.tqdm` function to print a progress bar 

2190 to :data:`sys.stdout`. 

2191 ``'tqdm_notebook'`` 

2192 Use the :func:`tqdm.notebook.tqdm` function to display a 

2193 progress bar as a Jupyter notebook widget. 

2194 ``'tqdm_gui'`` 

2195 Use the :func:`tqdm.tqdm_gui` function to display a 

2196 progress bar as a graphical dialog box. 

2197 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): 

2198 A BigQuery Storage API client. If supplied, use the faster BigQuery 

2199 Storage API to fetch rows from BigQuery. This API is a billable API. 

2200 

2201 This method requires ``google-cloud-bigquery-storage`` library. 

2202 

2203 This method only exposes a subset of the capabilities of the 

2204 BigQuery Storage API. For full access to all features 

2205 (projections, filters, snapshots) use the Storage API directly. 

2206 create_bqstorage_client (Optional[bool]): 

2207 If ``True`` (default), create a BigQuery Storage API client using 

2208 the default API settings. The BigQuery Storage API is a faster way 

2209 to fetch rows from BigQuery. See the ``bqstorage_client`` parameter 

2210 for more information. 

2211 

2212 This argument does nothing if ``bqstorage_client`` is supplied. 

2213 

2214 .. versionadded:: 1.24.0 

2215 timeout (Optional[float]): 

2216 The number of seconds to wait for the underlying download to complete. 

2217 If ``None``, wait indefinitely. 

2218 

2219 Returns: 

2220 pyarrow.Table 

2221 A :class:`pyarrow.Table` populated with row data and column 

2222 headers from the query results. The column headers are derived 

2223 from the destination table's schema. 

2224 

2225 Raises: 

2226 ValueError: If the :mod:`pyarrow` library cannot be imported. 

2227 

2228 

2229 .. versionadded:: 1.17.0 

2230 """ 

2231 if pyarrow is None: 

2232 raise ValueError(_NO_PYARROW_ERROR) 

2233 

2234 self._maybe_warn_max_results(bqstorage_client) 

2235 

2236 if not self._should_use_bqstorage(bqstorage_client, create_bqstorage_client): 

2237 create_bqstorage_client = False 

2238 bqstorage_client = None 

2239 

2240 owns_bqstorage_client = False 

2241 if not bqstorage_client and create_bqstorage_client: 

2242 bqstorage_client = self.client._ensure_bqstorage_client() 

2243 owns_bqstorage_client = bqstorage_client is not None 

2244 

2245 try: 

2246 progress_bar = get_progress_bar( 

2247 progress_bar_type, "Downloading", self.total_rows, "rows" 

2248 ) 

2249 

2250 record_batches = [] 

2251 for record_batch in self.to_arrow_iterable( 

2252 bqstorage_client=bqstorage_client, timeout=timeout 

2253 ): 

2254 record_batches.append(record_batch) 

2255 

2256 if progress_bar is not None: 

2257 # In some cases, the number of total rows is not populated 

2258 # until the first page of rows is fetched. Update the 

2259 # progress bar's total to keep an accurate count. 

2260 progress_bar.total = progress_bar.total or self.total_rows 

2261 progress_bar.update(record_batch.num_rows) 

2262 

2263 if progress_bar is not None: 

2264 # Indicate that the download has finished. 

2265 progress_bar.close() 

2266 finally: 

2267 if owns_bqstorage_client: 

2268 bqstorage_client._transport.grpc_channel.close() # type: ignore 

2269 

2270 if record_batches and bqstorage_client is not None: 

2271 return pyarrow.Table.from_batches(record_batches) 

2272 else: 

2273 # No records (not record_batches), use schema based on BigQuery schema 

2274 # **or** 

2275 # we used the REST API (bqstorage_client is None), 

2276 # which doesn't add arrow extension metadata, so we let 

2277 # `bq_to_arrow_schema` do it. 

2278 arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema) 

2279 return pyarrow.Table.from_batches(record_batches, schema=arrow_schema) 

2280 

2281 def to_dataframe_iterable( 

2282 self, 

2283 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, 

2284 dtypes: Optional[Dict[str, Any]] = None, 

2285 max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore 

2286 max_stream_count: Optional[int] = None, 

2287 timeout: Optional[float] = None, 

2288 ) -> "pandas.DataFrame": 

2289 """Create an iterable of pandas DataFrames, to process the table as a stream. 

2290 

2291 Args: 

2292 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): 

2293 A BigQuery Storage API client. If supplied, use the faster 

2294 BigQuery Storage API to fetch rows from BigQuery. 

2295 

2296 This method requires ``google-cloud-bigquery-storage`` library. 

2297 

2298 This method only exposes a subset of the capabilities of the 

2299 BigQuery Storage API. For full access to all features 

2300 (projections, filters, snapshots) use the Storage API directly. 

2301 

2302 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): 

2303 A dictionary of column names pandas ``dtype``s. The provided 

2304 ``dtype`` is used when constructing the series for the column 

2305 specified. Otherwise, the default pandas behavior is used. 

2306 

2307 max_queue_size (Optional[int]): 

2308 The maximum number of result pages to hold in the internal queue when 

2309 streaming query results over the BigQuery Storage API. Ignored if 

2310 Storage API is not used. 

2311 

2312 By default, the max queue size is set to the number of BQ Storage streams 

2313 created by the server. If ``max_queue_size`` is :data:`None`, the queue 

2314 size is infinite. 

2315 

2316 .. versionadded:: 2.14.0 

2317 

2318 max_stream_count (Optional[int]): 

2319 The maximum number of parallel download streams when 

2320 using BigQuery Storage API. Ignored if 

2321 BigQuery Storage API is not used. 

2322 

2323 This setting also has no effect if the query result 

2324 is deterministically ordered with ORDER BY, 

2325 in which case, the number of download stream is always 1. 

2326 

2327 If set to 0 or None (the default), the number of download 

2328 streams is determined by BigQuery the server. However, this behaviour 

2329 can require a lot of memory to store temporary download result, 

2330 especially with very large queries. In that case, 

2331 setting this parameter value to a value > 0 can help 

2332 reduce system resource consumption. 

2333 

2334 timeout (Optional[float]): 

2335 The number of seconds to wait for the underlying download to complete. 

2336 If ``None``, wait indefinitely. 

2337 

2338 Returns: 

2339 pandas.DataFrame: 

2340 A generator of :class:`~pandas.DataFrame`. 

2341 

2342 Raises: 

2343 ValueError: 

2344 If the :mod:`pandas` library cannot be imported. 

2345 """ 

2346 _pandas_helpers.verify_pandas_imports() 

2347 

2348 if dtypes is None: 

2349 dtypes = {} 

2350 

2351 self._maybe_warn_max_results(bqstorage_client) 

2352 

2353 column_names = [field.name for field in self._schema] 

2354 bqstorage_download = functools.partial( 

2355 _pandas_helpers.download_dataframe_bqstorage, 

2356 self._billing_project, 

2357 self._table, 

2358 bqstorage_client, 

2359 column_names, 

2360 dtypes, 

2361 preserve_order=self._preserve_order, 

2362 selected_fields=self._selected_fields, 

2363 max_queue_size=max_queue_size, 

2364 max_stream_count=max_stream_count, 

2365 timeout=timeout, 

2366 ) 

2367 tabledata_list_download = functools.partial( 

2368 _pandas_helpers.download_dataframe_row_iterator, 

2369 iter(self.pages), 

2370 self.schema, 

2371 dtypes, 

2372 timeout=timeout, 

2373 ) 

2374 return self._to_page_iterable( 

2375 bqstorage_download, 

2376 tabledata_list_download, 

2377 bqstorage_client=bqstorage_client, 

2378 ) 

2379 

2380 # If changing the signature of this method, make sure to apply the same 

2381 # changes to job.QueryJob.to_dataframe() 

2382 def to_dataframe( 

2383 self, 

2384 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, 

2385 dtypes: Optional[Dict[str, Any]] = None, 

2386 progress_bar_type: Optional[str] = None, 

2387 create_bqstorage_client: bool = True, 

2388 geography_as_object: bool = False, 

2389 bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, 

2390 int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, 

2391 float_dtype: Union[Any, None] = None, 

2392 string_dtype: Union[Any, None] = None, 

2393 date_dtype: Union[Any, None] = DefaultPandasDTypes.DATE_DTYPE, 

2394 datetime_dtype: Union[Any, None] = None, 

2395 time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE, 

2396 timestamp_dtype: Union[Any, None] = None, 

2397 range_date_dtype: Union[Any, None] = DefaultPandasDTypes.RANGE_DATE_DTYPE, 

2398 range_datetime_dtype: Union[ 

2399 Any, None 

2400 ] = DefaultPandasDTypes.RANGE_DATETIME_DTYPE, 

2401 range_timestamp_dtype: Union[ 

2402 Any, None 

2403 ] = DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE, 

2404 timeout: Optional[float] = None, 

2405 ) -> "pandas.DataFrame": 

2406 """Create a pandas DataFrame by loading all pages of a query. 

2407 

2408 Args: 

2409 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): 

2410 A BigQuery Storage API client. If supplied, use the faster 

2411 BigQuery Storage API to fetch rows from BigQuery. 

2412 

2413 This method requires ``google-cloud-bigquery-storage`` library. 

2414 

2415 This method only exposes a subset of the capabilities of the 

2416 BigQuery Storage API. For full access to all features 

2417 (projections, filters, snapshots) use the Storage API directly. 

2418 

2419 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): 

2420 A dictionary of column names pandas ``dtype``s. The provided 

2421 ``dtype`` is used when constructing the series for the column 

2422 specified. Otherwise, the default pandas behavior is used. 

2423 progress_bar_type (Optional[str]): 

2424 If set, use the `tqdm <https://tqdm.github.io/>`_ library to 

2425 display a progress bar while the data downloads. Install the 

2426 ``tqdm`` package to use this feature. 

2427 

2428 Possible values of ``progress_bar_type`` include: 

2429 

2430 ``None`` 

2431 No progress bar. 

2432 ``'tqdm'`` 

2433 Use the :func:`tqdm.tqdm` function to print a progress bar 

2434 to :data:`sys.stdout`. 

2435 ``'tqdm_notebook'`` 

2436 Use the :func:`tqdm.notebook.tqdm` function to display a 

2437 progress bar as a Jupyter notebook widget. 

2438 ``'tqdm_gui'`` 

2439 Use the :func:`tqdm.tqdm_gui` function to display a 

2440 progress bar as a graphical dialog box. 

2441 

2442 .. versionadded:: 1.11.0 

2443 

2444 create_bqstorage_client (Optional[bool]): 

2445 If ``True`` (default), create a BigQuery Storage API client 

2446 using the default API settings. The BigQuery Storage API 

2447 is a faster way to fetch rows from BigQuery. See the 

2448 ``bqstorage_client`` parameter for more information. 

2449 

2450 This argument does nothing if ``bqstorage_client`` is supplied. 

2451 

2452 .. versionadded:: 1.24.0 

2453 

2454 geography_as_object (Optional[bool]): 

2455 If ``True``, convert GEOGRAPHY data to :mod:`shapely` 

2456 geometry objects. If ``False`` (default), don't cast 

2457 geography data to :mod:`shapely` geometry objects. 

2458 

2459 .. versionadded:: 2.24.0 

2460 

2461 bool_dtype (Optional[pandas.Series.dtype, None]): 

2462 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``) 

2463 to convert BigQuery Boolean type, instead of relying on the default 

2464 ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``, 

2465 then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean 

2466 type can be found at: 

2467 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type 

2468 

2469 .. versionadded:: 3.8.0 

2470 

2471 int_dtype (Optional[pandas.Series.dtype, None]): 

2472 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) 

2473 to convert BigQuery Integer types, instead of relying on the default 

2474 ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``, 

2475 then the data type will be ``numpy.dtype("int64")``. A list of BigQuery 

2476 Integer types can be found at: 

2477 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types 

2478 

2479 .. versionadded:: 3.8.0 

2480 

2481 float_dtype (Optional[pandas.Series.dtype, None]): 

2482 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) 

2483 to convert BigQuery Float type, instead of relying on the default 

2484 ``numpy.dtype("float64")``. If you explicitly set the value to ``None``, 

2485 then the data type will be ``numpy.dtype("float64")``. BigQuery Float 

2486 type can be found at: 

2487 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types 

2488 

2489 .. versionadded:: 3.8.0 

2490 

2491 string_dtype (Optional[pandas.Series.dtype, None]): 

2492 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to 

2493 convert BigQuery String type, instead of relying on the default 

2494 ``numpy.dtype("object")``. If you explicitly set the value to ``None``, 

2495 then the data type will be ``numpy.dtype("object")``. BigQuery String 

2496 type can be found at: 

2497 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type 

2498 

2499 .. versionadded:: 3.8.0 

2500 

2501 date_dtype (Optional[pandas.Series.dtype, None]): 

2502 If set, indicate a pandas ExtensionDtype (e.g. 

2503 ``pandas.ArrowDtype(pyarrow.date32())``) to convert BigQuery Date 

2504 type, instead of relying on the default ``db_dtypes.DateDtype()``. 

2505 If you explicitly set the value to ``None``, then the data type will be 

2506 ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery 

2507 Date type can be found at: 

2508 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type 

2509 

2510 .. versionadded:: 3.10.0 

2511 

2512 datetime_dtype (Optional[pandas.Series.dtype, None]): 

2513 If set, indicate a pandas ExtensionDtype (e.g. 

2514 ``pandas.ArrowDtype(pyarrow.timestamp("us"))``) to convert BigQuery Datetime 

2515 type, instead of relying on the default ``numpy.dtype("datetime64[ns]``. 

2516 If you explicitly set the value to ``None``, then the data type will be 

2517 ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery 

2518 Datetime type can be found at: 

2519 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type 

2520 

2521 .. versionadded:: 3.10.0 

2522 

2523 time_dtype (Optional[pandas.Series.dtype, None]): 

2524 If set, indicate a pandas ExtensionDtype (e.g. 

2525 ``pandas.ArrowDtype(pyarrow.time64("us"))``) to convert BigQuery Time 

2526 type, instead of relying on the default ``db_dtypes.TimeDtype()``. 

2527 If you explicitly set the value to ``None``, then the data type will be 

2528 ``numpy.dtype("object")``. BigQuery Time type can be found at: 

2529 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type 

2530 

2531 .. versionadded:: 3.10.0 

2532 

2533 timestamp_dtype (Optional[pandas.Series.dtype, None]): 

2534 If set, indicate a pandas ExtensionDtype (e.g. 

2535 ``pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))``) to convert BigQuery Timestamp 

2536 type, instead of relying on the default ``numpy.dtype("datetime64[ns, UTC]")``. 

2537 If you explicitly set the value to ``None``, then the data type will be 

2538 ``numpy.dtype("datetime64[ns, UTC]")`` or ``object`` if out of bound. BigQuery 

2539 Datetime type can be found at: 

2540 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type 

2541 

2542 .. versionadded:: 3.10.0 

2543 

2544 range_date_dtype (Optional[pandas.Series.dtype, None]): 

2545 If set, indicate a pandas ExtensionDtype, such as: 

2546 

2547 .. code-block:: python 

2548 

2549 pandas.ArrowDtype(pyarrow.struct( 

2550 [("start", pyarrow.date32()), ("end", pyarrow.date32())] 

2551 )) 

2552 

2553 to convert BigQuery RANGE<DATE> type, instead of relying on 

2554 the default ``object``. If you explicitly set the value to 

2555 ``None``, the data type will be ``object``. BigQuery Range type 

2556 can be found at: 

2557 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type 

2558 

2559 .. versionadded:: 3.21.0 

2560 

2561 range_datetime_dtype (Optional[pandas.Series.dtype, None]): 

2562 If set, indicate a pandas ExtensionDtype, such as: 

2563 

2564 .. code-block:: python 

2565 

2566 pandas.ArrowDtype(pyarrow.struct( 

2567 [ 

2568 ("start", pyarrow.timestamp("us")), 

2569 ("end", pyarrow.timestamp("us")), 

2570 ] 

2571 )) 

2572 

2573 to convert BigQuery RANGE<DATETIME> type, instead of relying on 

2574 the default ``object``. If you explicitly set the value to 

2575 ``None``, the data type will be ``object``. BigQuery Range type 

2576 can be found at: 

2577 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type 

2578 

2579 .. versionadded:: 3.21.0 

2580 

2581 range_timestamp_dtype (Optional[pandas.Series.dtype, None]): 

2582 If set, indicate a pandas ExtensionDtype, such as: 

2583 

2584 .. code-block:: python 

2585 

2586 pandas.ArrowDtype(pyarrow.struct( 

2587 [ 

2588 ("start", pyarrow.timestamp("us", tz="UTC")), 

2589 ("end", pyarrow.timestamp("us", tz="UTC")), 

2590 ] 

2591 )) 

2592 

2593 to convert BigQuery RANGE<TIMESTAMP> type, instead of relying 

2594 on the default ``object``. If you explicitly set the value to 

2595 ``None``, the data type will be ``object``. BigQuery Range type 

2596 can be found at: 

2597 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type 

2598 

2599 .. versionadded:: 3.21.0 

2600 

2601 timeout (Optional[float]): 

2602 The number of seconds to wait for the underlying download to complete. 

2603 If ``None``, wait indefinitely. 

2604 

2605 Returns: 

2606 pandas.DataFrame: 

2607 A :class:`~pandas.DataFrame` populated with row data and column 

2608 headers from the query results. The column headers are derived 

2609 from the destination table's schema. 

2610 

2611 Raises: 

2612 ValueError: 

2613 If the :mod:`pandas` library cannot be imported, or 

2614 the :mod:`google.cloud.bigquery_storage_v1` module is 

2615 required but cannot be imported. Also if 

2616 `geography_as_object` is `True`, but the 

2617 :mod:`shapely` library cannot be imported. Also if 

2618 `bool_dtype`, `int_dtype` or other dtype parameters 

2619 is not supported dtype. 

2620 

2621 """ 

2622 _pandas_helpers.verify_pandas_imports() 

2623 

2624 if geography_as_object and shapely is None: 

2625 raise ValueError(_NO_SHAPELY_ERROR) 

2626 

2627 if bool_dtype is DefaultPandasDTypes.BOOL_DTYPE: 

2628 bool_dtype = pandas.BooleanDtype() 

2629 

2630 if int_dtype is DefaultPandasDTypes.INT_DTYPE: 

2631 int_dtype = pandas.Int64Dtype() 

2632 

2633 if time_dtype is DefaultPandasDTypes.TIME_DTYPE: 

2634 time_dtype = db_dtypes.TimeDtype() 

2635 

2636 if range_date_dtype is DefaultPandasDTypes.RANGE_DATE_DTYPE: 

2637 if _versions_helpers.SUPPORTS_RANGE_PYARROW: 

2638 range_date_dtype = pandas.ArrowDtype( 

2639 pyarrow.struct( 

2640 [("start", pyarrow.date32()), ("end", pyarrow.date32())] 

2641 ) 

2642 ) 

2643 else: 

2644 warnings.warn(_RANGE_PYARROW_WARNING) 

2645 range_date_dtype = None 

2646 

2647 if range_datetime_dtype is DefaultPandasDTypes.RANGE_DATETIME_DTYPE: 

2648 if _versions_helpers.SUPPORTS_RANGE_PYARROW: 

2649 range_datetime_dtype = pandas.ArrowDtype( 

2650 pyarrow.struct( 

2651 [ 

2652 ("start", pyarrow.timestamp("us")), 

2653 ("end", pyarrow.timestamp("us")), 

2654 ] 

2655 ) 

2656 ) 

2657 else: 

2658 warnings.warn(_RANGE_PYARROW_WARNING) 

2659 range_datetime_dtype = None 

2660 

2661 if range_timestamp_dtype is DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE: 

2662 if _versions_helpers.SUPPORTS_RANGE_PYARROW: 

2663 range_timestamp_dtype = pandas.ArrowDtype( 

2664 pyarrow.struct( 

2665 [ 

2666 ("start", pyarrow.timestamp("us", tz="UTC")), 

2667 ("end", pyarrow.timestamp("us", tz="UTC")), 

2668 ] 

2669 ) 

2670 ) 

2671 else: 

2672 warnings.warn(_RANGE_PYARROW_WARNING) 

2673 range_timestamp_dtype = None 

2674 

2675 if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"): 

2676 raise ValueError("bool_dtype", _NO_SUPPORTED_DTYPE) 

2677 

2678 if int_dtype is not None and not hasattr(int_dtype, "__from_arrow__"): 

2679 raise ValueError("int_dtype", _NO_SUPPORTED_DTYPE) 

2680 

2681 if float_dtype is not None and not hasattr(float_dtype, "__from_arrow__"): 

2682 raise ValueError("float_dtype", _NO_SUPPORTED_DTYPE) 

2683 

2684 if string_dtype is not None and not hasattr(string_dtype, "__from_arrow__"): 

2685 raise ValueError("string_dtype", _NO_SUPPORTED_DTYPE) 

2686 

2687 if ( 

2688 date_dtype is not None 

2689 and date_dtype is not DefaultPandasDTypes.DATE_DTYPE 

2690 and not hasattr(date_dtype, "__from_arrow__") 

2691 ): 

2692 raise ValueError("date_dtype", _NO_SUPPORTED_DTYPE) 

2693 

2694 if datetime_dtype is not None and not hasattr(datetime_dtype, "__from_arrow__"): 

2695 raise ValueError("datetime_dtype", _NO_SUPPORTED_DTYPE) 

2696 

2697 if time_dtype is not None and not hasattr(time_dtype, "__from_arrow__"): 

2698 raise ValueError("time_dtype", _NO_SUPPORTED_DTYPE) 

2699 

2700 if timestamp_dtype is not None and not hasattr( 

2701 timestamp_dtype, "__from_arrow__" 

2702 ): 

2703 raise ValueError("timestamp_dtype", _NO_SUPPORTED_DTYPE) 

2704 

2705 if dtypes is None: 

2706 dtypes = {} 

2707 

2708 self._maybe_warn_max_results(bqstorage_client) 

2709 

2710 if not self._should_use_bqstorage(bqstorage_client, create_bqstorage_client): 

2711 create_bqstorage_client = False 

2712 bqstorage_client = None 

2713 

2714 record_batch = self.to_arrow( 

2715 progress_bar_type=progress_bar_type, 

2716 bqstorage_client=bqstorage_client, 

2717 create_bqstorage_client=create_bqstorage_client, 

2718 timeout=timeout, 

2719 ) 

2720 

2721 # Default date dtype is `db_dtypes.DateDtype()` that could cause out of bounds error, 

2722 # when pyarrow converts date values to nanosecond precision. To avoid the error, we 

2723 # set the date_as_object parameter to True, if necessary. 

2724 date_as_object = False 

2725 if date_dtype is DefaultPandasDTypes.DATE_DTYPE: 

2726 date_dtype = db_dtypes.DateDtype() 

2727 date_as_object = not all( 

2728 self.__can_cast_timestamp_ns(col) 

2729 for col in record_batch 

2730 # Type can be date32 or date64 (plus units). 

2731 # See: https://arrow.apache.org/docs/python/api/datatypes.html 

2732 if pyarrow.types.is_date(col.type) 

2733 ) 

2734 

2735 timestamp_as_object = False 

2736 if datetime_dtype is None and timestamp_dtype is None: 

2737 timestamp_as_object = not all( 

2738 self.__can_cast_timestamp_ns(col) 

2739 for col in record_batch 

2740 # Type can be datetime and timestamp (plus units and time zone). 

2741 # See: https://arrow.apache.org/docs/python/api/datatypes.html 

2742 if pyarrow.types.is_timestamp(col.type) 

2743 ) 

2744 

2745 df = record_batch.to_pandas( 

2746 date_as_object=date_as_object, 

2747 timestamp_as_object=timestamp_as_object, 

2748 integer_object_nulls=True, 

2749 types_mapper=_pandas_helpers.default_types_mapper( 

2750 date_as_object=date_as_object, 

2751 bool_dtype=bool_dtype, 

2752 int_dtype=int_dtype, 

2753 float_dtype=float_dtype, 

2754 string_dtype=string_dtype, 

2755 date_dtype=date_dtype, 

2756 datetime_dtype=datetime_dtype, 

2757 time_dtype=time_dtype, 

2758 timestamp_dtype=timestamp_dtype, 

2759 range_date_dtype=range_date_dtype, 

2760 range_datetime_dtype=range_datetime_dtype, 

2761 range_timestamp_dtype=range_timestamp_dtype, 

2762 ), 

2763 ) 

2764 

2765 for column in dtypes: 

2766 df[column] = pandas.Series(df[column], dtype=dtypes[column], copy=False) 

2767 

2768 if geography_as_object: 

2769 for field in self.schema: 

2770 if field.field_type.upper() == "GEOGRAPHY" and field.mode != "REPEATED": 

2771 df[field.name] = df[field.name].dropna().apply(_read_wkt) 

2772 

2773 return df 

2774 

2775 @staticmethod 

2776 def __can_cast_timestamp_ns(column): 

2777 try: 

2778 column.cast("timestamp[ns]") 

2779 except pyarrow.lib.ArrowInvalid: 

2780 return False 

2781 else: 

2782 return True 

2783 

2784 # If changing the signature of this method, make sure to apply the same 

2785 # changes to job.QueryJob.to_geodataframe() 

2786 def to_geodataframe( 

2787 self, 

2788 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, 

2789 dtypes: Optional[Dict[str, Any]] = None, 

2790 progress_bar_type: Optional[str] = None, 

2791 create_bqstorage_client: bool = True, 

2792 geography_column: Optional[str] = None, 

2793 bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, 

2794 int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, 

2795 float_dtype: Union[Any, None] = None, 

2796 string_dtype: Union[Any, None] = None, 

2797 timeout: Optional[float] = None, 

2798 ) -> "geopandas.GeoDataFrame": 

2799 """Create a GeoPandas GeoDataFrame by loading all pages of a query. 

2800 

2801 Args: 

2802 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): 

2803 A BigQuery Storage API client. If supplied, use the faster 

2804 BigQuery Storage API to fetch rows from BigQuery. 

2805 

2806 This method requires the ``pyarrow`` and 

2807 ``google-cloud-bigquery-storage`` libraries. 

2808 

2809 This method only exposes a subset of the capabilities of the 

2810 BigQuery Storage API. For full access to all features 

2811 (projections, filters, snapshots) use the Storage API directly. 

2812 

2813 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): 

2814 A dictionary of column names pandas ``dtype``s. The provided 

2815 ``dtype`` is used when constructing the series for the column 

2816 specified. Otherwise, the default pandas behavior is used. 

2817 progress_bar_type (Optional[str]): 

2818 If set, use the `tqdm <https://tqdm.github.io/>`_ library to 

2819 display a progress bar while the data downloads. Install the 

2820 ``tqdm`` package to use this feature. 

2821 

2822 Possible values of ``progress_bar_type`` include: 

2823 

2824 ``None`` 

2825 No progress bar. 

2826 ``'tqdm'`` 

2827 Use the :func:`tqdm.tqdm` function to print a progress bar 

2828 to :data:`sys.stdout`. 

2829 ``'tqdm_notebook'`` 

2830 Use the :func:`tqdm.notebook.tqdm` function to display a 

2831 progress bar as a Jupyter notebook widget. 

2832 ``'tqdm_gui'`` 

2833 Use the :func:`tqdm.tqdm_gui` function to display a 

2834 progress bar as a graphical dialog box. 

2835 

2836 create_bqstorage_client (Optional[bool]): 

2837 If ``True`` (default), create a BigQuery Storage API client 

2838 using the default API settings. The BigQuery Storage API 

2839 is a faster way to fetch rows from BigQuery. See the 

2840 ``bqstorage_client`` parameter for more information. 

2841 

2842 This argument does nothing if ``bqstorage_client`` is supplied. 

2843 

2844 geography_column (Optional[str]): 

2845 If there are more than one GEOGRAPHY column, 

2846 identifies which one to use to construct a geopandas 

2847 GeoDataFrame. This option can be ommitted if there's 

2848 only one GEOGRAPHY column. 

2849 bool_dtype (Optional[pandas.Series.dtype, None]): 

2850 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``) 

2851 to convert BigQuery Boolean type, instead of relying on the default 

2852 ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``, 

2853 then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean 

2854 type can be found at: 

2855 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type 

2856 int_dtype (Optional[pandas.Series.dtype, None]): 

2857 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) 

2858 to convert BigQuery Integer types, instead of relying on the default 

2859 ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``, 

2860 then the data type will be ``numpy.dtype("int64")``. A list of BigQuery 

2861 Integer types can be found at: 

2862 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types 

2863 float_dtype (Optional[pandas.Series.dtype, None]): 

2864 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) 

2865 to convert BigQuery Float type, instead of relying on the default 

2866 ``numpy.dtype("float64")``. If you explicitly set the value to ``None``, 

2867 then the data type will be ``numpy.dtype("float64")``. BigQuery Float 

2868 type can be found at: 

2869 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types 

2870 string_dtype (Optional[pandas.Series.dtype, None]): 

2871 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to 

2872 convert BigQuery String type, instead of relying on the default 

2873 ``numpy.dtype("object")``. If you explicitly set the value to ``None``, 

2874 then the data type will be ``numpy.dtype("object")``. BigQuery String 

2875 type can be found at: 

2876 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type 

2877 

2878 Returns: 

2879 geopandas.GeoDataFrame: 

2880 A :class:`geopandas.GeoDataFrame` populated with row 

2881 data and column headers from the query results. The 

2882 column headers are derived from the destination 

2883 table's schema. 

2884 

2885 Raises: 

2886 ValueError: 

2887 If the :mod:`geopandas` library cannot be imported, or the 

2888 :mod:`google.cloud.bigquery_storage_v1` module is 

2889 required but cannot be imported. 

2890 

2891 .. versionadded:: 2.24.0 

2892 """ 

2893 if geopandas is None: 

2894 raise ValueError(_NO_GEOPANDAS_ERROR) 

2895 

2896 geography_columns = set( 

2897 field.name 

2898 for field in self.schema 

2899 if field.field_type.upper() == "GEOGRAPHY" 

2900 ) 

2901 if not geography_columns: 

2902 raise TypeError( 

2903 "There must be at least one GEOGRAPHY column" 

2904 " to create a GeoDataFrame" 

2905 ) 

2906 

2907 if geography_column: 

2908 if geography_column not in geography_columns: 

2909 raise ValueError( 

2910 f"The given geography column, {geography_column}, doesn't name" 

2911 f" a GEOGRAPHY column in the result." 

2912 ) 

2913 elif len(geography_columns) == 1: 

2914 [geography_column] = geography_columns 

2915 else: 

2916 raise ValueError( 

2917 "There is more than one GEOGRAPHY column in the result. " 

2918 "The geography_column argument must be used to specify which " 

2919 "one to use to create a GeoDataFrame" 

2920 ) 

2921 

2922 df = self.to_dataframe( 

2923 bqstorage_client, 

2924 dtypes, 

2925 progress_bar_type, 

2926 create_bqstorage_client, 

2927 geography_as_object=True, 

2928 bool_dtype=bool_dtype, 

2929 int_dtype=int_dtype, 

2930 float_dtype=float_dtype, 

2931 string_dtype=string_dtype, 

2932 timeout=timeout, 

2933 ) 

2934 

2935 return geopandas.GeoDataFrame( 

2936 df, crs=_COORDINATE_REFERENCE_SYSTEM, geometry=geography_column 

2937 ) 

2938 

2939 

2940class _EmptyRowIterator(RowIterator): 

2941 """An empty row iterator. 

2942 

2943 This class prevents API requests when there are no rows to fetch or rows 

2944 are impossible to fetch, such as with query results for DDL CREATE VIEW 

2945 statements. 

2946 """ 

2947 

2948 def __init__( 

2949 self, client=None, api_request=None, path=None, schema=(), *args, **kwargs 

2950 ): 

2951 super().__init__( 

2952 client=client, 

2953 api_request=api_request, 

2954 path=path, 

2955 schema=schema, 

2956 *args, 

2957 **kwargs, 

2958 ) 

2959 self._total_rows = 0 

2960 

2961 def to_arrow( 

2962 self, 

2963 progress_bar_type=None, 

2964 bqstorage_client=None, 

2965 create_bqstorage_client=True, 

2966 timeout: Optional[float] = None, 

2967 ) -> "pyarrow.Table": 

2968 """[Beta] Create an empty class:`pyarrow.Table`. 

2969 

2970 Args: 

2971 progress_bar_type (str): Ignored. Added for compatibility with RowIterator. 

2972 bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. 

2973 create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. 

2974 timeout (Optional[float]): Ignored. Added for compatibility with RowIterator. 

2975 

2976 Returns: 

2977 pyarrow.Table: An empty :class:`pyarrow.Table`. 

2978 """ 

2979 if pyarrow is None: 

2980 raise ValueError(_NO_PYARROW_ERROR) 

2981 return pyarrow.Table.from_arrays(()) 

2982 

2983 def to_dataframe( 

2984 self, 

2985 bqstorage_client=None, 

2986 dtypes=None, 

2987 progress_bar_type=None, 

2988 create_bqstorage_client=True, 

2989 geography_as_object=False, 

2990 bool_dtype=None, 

2991 int_dtype=None, 

2992 float_dtype=None, 

2993 string_dtype=None, 

2994 date_dtype=None, 

2995 datetime_dtype=None, 

2996 time_dtype=None, 

2997 timestamp_dtype=None, 

2998 range_date_dtype=None, 

2999 range_datetime_dtype=None, 

3000 range_timestamp_dtype=None, 

3001 timeout: Optional[float] = None, 

3002 ) -> "pandas.DataFrame": 

3003 """Create an empty dataframe. 

3004 

3005 Args: 

3006 bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. 

3007 dtypes (Any): Ignored. Added for compatibility with RowIterator. 

3008 progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. 

3009 create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. 

3010 geography_as_object (bool): Ignored. Added for compatibility with RowIterator. 

3011 bool_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3012 int_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3013 float_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3014 string_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3015 date_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3016 datetime_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3017 time_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3018 timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3019 range_date_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3020 range_datetime_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3021 range_timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3022 timeout (Optional[float]): Ignored. Added for compatibility with RowIterator. 

3023 

3024 Returns: 

3025 pandas.DataFrame: An empty :class:`~pandas.DataFrame`. 

3026 """ 

3027 _pandas_helpers.verify_pandas_imports() 

3028 return pandas.DataFrame() 

3029 

3030 def to_geodataframe( 

3031 self, 

3032 bqstorage_client=None, 

3033 dtypes=None, 

3034 progress_bar_type=None, 

3035 create_bqstorage_client=True, 

3036 geography_column: Optional[str] = None, 

3037 bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, 

3038 int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, 

3039 float_dtype: Union[Any, None] = None, 

3040 string_dtype: Union[Any, None] = None, 

3041 timeout: Optional[float] = None, 

3042 ) -> "pandas.DataFrame": 

3043 """Create an empty dataframe. 

3044 

3045 Args: 

3046 bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. 

3047 dtypes (Any): Ignored. Added for compatibility with RowIterator. 

3048 progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. 

3049 create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. 

3050 geography_column (str): Ignored. Added for compatibility with RowIterator. 

3051 bool_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3052 int_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3053 float_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3054 string_dtype (Any): Ignored. Added for compatibility with RowIterator. 

3055 timeout (Optional[float]): Ignored. Added for compatibility with RowIterator. 

3056 

3057 Returns: 

3058 pandas.DataFrame: An empty :class:`~pandas.DataFrame`. 

3059 """ 

3060 if geopandas is None: 

3061 raise ValueError(_NO_GEOPANDAS_ERROR) 

3062 

3063 # Since an empty GeoDataFrame has no geometry column, we do not CRS on it, 

3064 # because that's deprecated. 

3065 return geopandas.GeoDataFrame() 

3066 

3067 def to_dataframe_iterable( 

3068 self, 

3069 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, 

3070 dtypes: Optional[Dict[str, Any]] = None, 

3071 max_queue_size: Optional[int] = None, 

3072 max_stream_count: Optional[int] = None, 

3073 timeout: Optional[float] = None, 

3074 ) -> Iterator["pandas.DataFrame"]: 

3075 """Create an iterable of pandas DataFrames, to process the table as a stream. 

3076 

3077 .. versionadded:: 2.21.0 

3078 

3079 Args: 

3080 bqstorage_client: 

3081 Ignored. Added for compatibility with RowIterator. 

3082 

3083 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): 

3084 Ignored. Added for compatibility with RowIterator. 

3085 

3086 max_queue_size: 

3087 Ignored. Added for compatibility with RowIterator. 

3088 

3089 max_stream_count: 

3090 Ignored. Added for compatibility with RowIterator. 

3091 

3092 timeout (Optional[float]): 

3093 Ignored. Added for compatibility with RowIterator. 

3094 

3095 Returns: 

3096 An iterator yielding a single empty :class:`~pandas.DataFrame`. 

3097 

3098 Raises: 

3099 ValueError: 

3100 If the :mod:`pandas` library cannot be imported. 

3101 """ 

3102 _pandas_helpers.verify_pandas_imports() 

3103 return iter((pandas.DataFrame(),)) 

3104 

3105 def to_arrow_iterable( 

3106 self, 

3107 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, 

3108 max_queue_size: Optional[int] = None, 

3109 max_stream_count: Optional[int] = None, 

3110 timeout: Optional[float] = None, 

3111 ) -> Iterator["pyarrow.RecordBatch"]: 

3112 """Create an iterable of pandas DataFrames, to process the table as a stream. 

3113 

3114 .. versionadded:: 2.31.0 

3115 

3116 Args: 

3117 bqstorage_client: 

3118 Ignored. Added for compatibility with RowIterator. 

3119 

3120 max_queue_size: 

3121 Ignored. Added for compatibility with RowIterator. 

3122 

3123 max_stream_count: 

3124 Ignored. Added for compatibility with RowIterator. 

3125 

3126 timeout (Optional[float]): 

3127 Ignored. Added for compatibility with RowIterator. 

3128 

3129 Returns: 

3130 An iterator yielding a single empty :class:`~pyarrow.RecordBatch`. 

3131 """ 

3132 return iter((pyarrow.record_batch([]),)) 

3133 

3134 def __iter__(self): 

3135 return iter(()) 

3136 

3137 

3138class PartitionRange(object): 

3139 """Definition of the ranges for range partitioning. 

3140 

3141 .. note:: 

3142 **Beta**. The integer range partitioning feature is in a pre-release 

3143 state and might change or have limited support. 

3144 

3145 Args: 

3146 start (Optional[int]): 

3147 Sets the 

3148 :attr:`~google.cloud.bigquery.table.PartitionRange.start` 

3149 property. 

3150 end (Optional[int]): 

3151 Sets the 

3152 :attr:`~google.cloud.bigquery.table.PartitionRange.end` 

3153 property. 

3154 interval (Optional[int]): 

3155 Sets the 

3156 :attr:`~google.cloud.bigquery.table.PartitionRange.interval` 

3157 property. 

3158 _properties (Optional[dict]): 

3159 Private. Used to construct object from API resource. 

3160 """ 

3161 

3162 def __init__(self, start=None, end=None, interval=None, _properties=None) -> None: 

3163 if _properties is None: 

3164 _properties = {} 

3165 self._properties = _properties 

3166 

3167 if start is not None: 

3168 self.start = start 

3169 if end is not None: 

3170 self.end = end 

3171 if interval is not None: 

3172 self.interval = interval 

3173 

3174 @property 

3175 def start(self): 

3176 """int: The start of range partitioning, inclusive.""" 

3177 return _helpers._int_or_none(self._properties.get("start")) 

3178 

3179 @start.setter 

3180 def start(self, value): 

3181 self._properties["start"] = _helpers._str_or_none(value) 

3182 

3183 @property 

3184 def end(self): 

3185 """int: The end of range partitioning, exclusive.""" 

3186 return _helpers._int_or_none(self._properties.get("end")) 

3187 

3188 @end.setter 

3189 def end(self, value): 

3190 self._properties["end"] = _helpers._str_or_none(value) 

3191 

3192 @property 

3193 def interval(self): 

3194 """int: The width of each interval.""" 

3195 return _helpers._int_or_none(self._properties.get("interval")) 

3196 

3197 @interval.setter 

3198 def interval(self, value): 

3199 self._properties["interval"] = _helpers._str_or_none(value) 

3200 

3201 def _key(self): 

3202 return tuple(sorted(self._properties.items())) 

3203 

3204 def __eq__(self, other): 

3205 if not isinstance(other, PartitionRange): 

3206 return NotImplemented 

3207 return self._key() == other._key() 

3208 

3209 def __ne__(self, other): 

3210 return not self == other 

3211 

3212 def __repr__(self): 

3213 key_vals = ["{}={}".format(key, val) for key, val in self._key()] 

3214 return "PartitionRange({})".format(", ".join(key_vals)) 

3215 

3216 

3217class RangePartitioning(object): 

3218 """Range-based partitioning configuration for a table. 

3219 

3220 .. note:: 

3221 **Beta**. The integer range partitioning feature is in a pre-release 

3222 state and might change or have limited support. 

3223 

3224 Args: 

3225 range_ (Optional[google.cloud.bigquery.table.PartitionRange]): 

3226 Sets the 

3227 :attr:`google.cloud.bigquery.table.RangePartitioning.range_` 

3228 property. 

3229 field (Optional[str]): 

3230 Sets the 

3231 :attr:`google.cloud.bigquery.table.RangePartitioning.field` 

3232 property. 

3233 _properties (Optional[dict]): 

3234 Private. Used to construct object from API resource. 

3235 """ 

3236 

3237 def __init__(self, range_=None, field=None, _properties=None) -> None: 

3238 if _properties is None: 

3239 _properties = {} 

3240 self._properties: Dict[str, Any] = _properties 

3241 

3242 if range_ is not None: 

3243 self.range_ = range_ 

3244 if field is not None: 

3245 self.field = field 

3246 

3247 # Trailing underscore to prevent conflict with built-in range() function. 

3248 @property 

3249 def range_(self): 

3250 """google.cloud.bigquery.table.PartitionRange: Defines the 

3251 ranges for range partitioning. 

3252 

3253 Raises: 

3254 ValueError: 

3255 If the value is not a :class:`PartitionRange`. 

3256 """ 

3257 range_properties = self._properties.setdefault("range", {}) 

3258 return PartitionRange(_properties=range_properties) 

3259 

3260 @range_.setter 

3261 def range_(self, value): 

3262 if not isinstance(value, PartitionRange): 

3263 raise ValueError("Expected a PartitionRange, but got {}.".format(value)) 

3264 self._properties["range"] = value._properties 

3265 

3266 @property 

3267 def field(self): 

3268 """str: The table is partitioned by this field. 

3269 

3270 The field must be a top-level ``NULLABLE`` / ``REQUIRED`` field. The 

3271 only supported type is ``INTEGER`` / ``INT64``. 

3272 """ 

3273 return self._properties.get("field") 

3274 

3275 @field.setter 

3276 def field(self, value): 

3277 self._properties["field"] = value 

3278 

3279 def _key(self): 

3280 return (("field", self.field), ("range_", self.range_)) 

3281 

3282 def __eq__(self, other): 

3283 if not isinstance(other, RangePartitioning): 

3284 return NotImplemented 

3285 return self._key() == other._key() 

3286 

3287 def __ne__(self, other): 

3288 return not self == other 

3289 

3290 def __repr__(self): 

3291 key_vals = ["{}={}".format(key, repr(val)) for key, val in self._key()] 

3292 return "RangePartitioning({})".format(", ".join(key_vals)) 

3293 

3294 

3295class TimePartitioningType(object): 

3296 """Specifies the type of time partitioning to perform.""" 

3297 

3298 DAY = "DAY" 

3299 """str: Generates one partition per day.""" 

3300 

3301 HOUR = "HOUR" 

3302 """str: Generates one partition per hour.""" 

3303 

3304 MONTH = "MONTH" 

3305 """str: Generates one partition per month.""" 

3306 

3307 YEAR = "YEAR" 

3308 """str: Generates one partition per year.""" 

3309 

3310 

3311class TimePartitioning(object): 

3312 """Configures time-based partitioning for a table. 

3313 

3314 Args: 

3315 type_ (Optional[google.cloud.bigquery.table.TimePartitioningType]): 

3316 Specifies the type of time partitioning to perform. Defaults to 

3317 :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`. 

3318 

3319 Supported values are: 

3320 

3321 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.HOUR` 

3322 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY` 

3323 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.MONTH` 

3324 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.YEAR` 

3325 

3326 field (Optional[str]): 

3327 If set, the table is partitioned by this field. If not set, the 

3328 table is partitioned by pseudo column ``_PARTITIONTIME``. The field 

3329 must be a top-level ``TIMESTAMP``, ``DATETIME``, or ``DATE`` 

3330 field. Its mode must be ``NULLABLE`` or ``REQUIRED``. 

3331 

3332 See the `time-unit column-partitioned tables guide 

3333 <https://cloud.google.com/bigquery/docs/creating-column-partitions>`_ 

3334 in the BigQuery documentation. 

3335 expiration_ms(Optional[int]): 

3336 Number of milliseconds for which to keep the storage for a 

3337 partition. 

3338 require_partition_filter (Optional[bool]): 

3339 DEPRECATED: Use 

3340 :attr:`~google.cloud.bigquery.table.Table.require_partition_filter`, 

3341 instead. 

3342 """ 

3343 

3344 def __init__( 

3345 self, type_=None, field=None, expiration_ms=None, require_partition_filter=None 

3346 ) -> None: 

3347 self._properties: Dict[str, Any] = {} 

3348 if type_ is None: 

3349 self.type_ = TimePartitioningType.DAY 

3350 else: 

3351 self.type_ = type_ 

3352 if field is not None: 

3353 self.field = field 

3354 if expiration_ms is not None: 

3355 self.expiration_ms = expiration_ms 

3356 if require_partition_filter is not None: 

3357 self.require_partition_filter = require_partition_filter 

3358 

3359 @property 

3360 def type_(self): 

3361 """google.cloud.bigquery.table.TimePartitioningType: The type of time 

3362 partitioning to use. 

3363 """ 

3364 return self._properties.get("type") 

3365 

3366 @type_.setter 

3367 def type_(self, value): 

3368 self._properties["type"] = value 

3369 

3370 @property 

3371 def field(self): 

3372 """str: Field in the table to use for partitioning""" 

3373 return self._properties.get("field") 

3374 

3375 @field.setter 

3376 def field(self, value): 

3377 self._properties["field"] = value 

3378 

3379 @property 

3380 def expiration_ms(self): 

3381 """int: Number of milliseconds to keep the storage for a partition.""" 

3382 return _helpers._int_or_none(self._properties.get("expirationMs")) 

3383 

3384 @expiration_ms.setter 

3385 def expiration_ms(self, value): 

3386 if value is not None: 

3387 # Allow explicitly setting the expiration to None. 

3388 value = str(value) 

3389 self._properties["expirationMs"] = value 

3390 

3391 @property 

3392 def require_partition_filter(self): 

3393 """bool: Specifies whether partition filters are required for queries 

3394 

3395 DEPRECATED: Use 

3396 :attr:`~google.cloud.bigquery.table.Table.require_partition_filter`, 

3397 instead. 

3398 """ 

3399 warnings.warn( 

3400 ( 

3401 "TimePartitioning.require_partition_filter will be removed in " 

3402 "future versions. Please use Table.require_partition_filter " 

3403 "instead." 

3404 ), 

3405 PendingDeprecationWarning, 

3406 stacklevel=2, 

3407 ) 

3408 return self._properties.get("requirePartitionFilter") 

3409 

3410 @require_partition_filter.setter 

3411 def require_partition_filter(self, value): 

3412 warnings.warn( 

3413 ( 

3414 "TimePartitioning.require_partition_filter will be removed in " 

3415 "future versions. Please use Table.require_partition_filter " 

3416 "instead." 

3417 ), 

3418 PendingDeprecationWarning, 

3419 stacklevel=2, 

3420 ) 

3421 self._properties["requirePartitionFilter"] = value 

3422 

3423 @classmethod 

3424 def from_api_repr(cls, api_repr: dict) -> "TimePartitioning": 

3425 """Return a :class:`TimePartitioning` object deserialized from a dict. 

3426 

3427 This method creates a new ``TimePartitioning`` instance that points to 

3428 the ``api_repr`` parameter as its internal properties dict. This means 

3429 that when a ``TimePartitioning`` instance is stored as a property of 

3430 another object, any changes made at the higher level will also appear 

3431 here:: 

3432 

3433 >>> time_partitioning = TimePartitioning() 

3434 >>> table.time_partitioning = time_partitioning 

3435 >>> table.time_partitioning.field = 'timecolumn' 

3436 >>> time_partitioning.field 

3437 'timecolumn' 

3438 

3439 Args: 

3440 api_repr (Mapping[str, str]): 

3441 The serialized representation of the TimePartitioning, such as 

3442 what is output by :meth:`to_api_repr`. 

3443 

3444 Returns: 

3445 google.cloud.bigquery.table.TimePartitioning: 

3446 The ``TimePartitioning`` object. 

3447 """ 

3448 instance = cls() 

3449 instance._properties = api_repr 

3450 return instance 

3451 

3452 def to_api_repr(self) -> dict: 

3453 """Return a dictionary representing this object. 

3454 

3455 This method returns the properties dict of the ``TimePartitioning`` 

3456 instance rather than making a copy. This means that when a 

3457 ``TimePartitioning`` instance is stored as a property of another 

3458 object, any changes made at the higher level will also appear here. 

3459 

3460 Returns: 

3461 dict: 

3462 A dictionary representing the TimePartitioning object in 

3463 serialized form. 

3464 """ 

3465 return self._properties 

3466 

3467 def _key(self): 

3468 # because we are only "renaming" top level keys shallow copy is sufficient here. 

3469 properties = self._properties.copy() 

3470 # calling repr for non built-in type objects. 

3471 properties["type_"] = repr(properties.pop("type")) 

3472 if "field" in properties: 

3473 # calling repr for non built-in type objects. 

3474 properties["field"] = repr(properties["field"]) 

3475 if "requirePartitionFilter" in properties: 

3476 properties["require_partition_filter"] = properties.pop( 

3477 "requirePartitionFilter" 

3478 ) 

3479 if "expirationMs" in properties: 

3480 properties["expiration_ms"] = properties.pop("expirationMs") 

3481 return tuple(sorted(properties.items())) 

3482 

3483 def __eq__(self, other): 

3484 if not isinstance(other, TimePartitioning): 

3485 return NotImplemented 

3486 return self._key() == other._key() 

3487 

3488 def __ne__(self, other): 

3489 return not self == other 

3490 

3491 def __hash__(self): 

3492 return hash(self._key()) 

3493 

3494 def __repr__(self): 

3495 key_vals = ["{}={}".format(key, val) for key, val in self._key()] 

3496 return "TimePartitioning({})".format(",".join(key_vals)) 

3497 

3498 

3499class PrimaryKey: 

3500 """Represents the primary key constraint on a table's columns. 

3501 

3502 Args: 

3503 columns: The columns that are composed of the primary key constraint. 

3504 """ 

3505 

3506 def __init__(self, columns: List[str]): 

3507 self.columns = columns 

3508 

3509 def __eq__(self, other): 

3510 if not isinstance(other, PrimaryKey): 

3511 raise TypeError("The value provided is not a BigQuery PrimaryKey.") 

3512 return self.columns == other.columns 

3513 

3514 

3515class ColumnReference: 

3516 """The pair of the foreign key column and primary key column. 

3517 

3518 Args: 

3519 referencing_column: The column that composes the foreign key. 

3520 referenced_column: The column in the primary key that are referenced by the referencingColumn. 

3521 """ 

3522 

3523 def __init__(self, referencing_column: str, referenced_column: str): 

3524 self.referencing_column = referencing_column 

3525 self.referenced_column = referenced_column 

3526 

3527 def __eq__(self, other): 

3528 if not isinstance(other, ColumnReference): 

3529 raise TypeError("The value provided is not a BigQuery ColumnReference.") 

3530 return ( 

3531 self.referencing_column == other.referencing_column 

3532 and self.referenced_column == other.referenced_column 

3533 ) 

3534 

3535 

3536class ForeignKey: 

3537 """Represents a foreign key constraint on a table's columns. 

3538 

3539 Args: 

3540 name: Set only if the foreign key constraint is named. 

3541 referenced_table: The table that holds the primary key and is referenced by this foreign key. 

3542 column_references: The columns that compose the foreign key. 

3543 """ 

3544 

3545 def __init__( 

3546 self, 

3547 name: str, 

3548 referenced_table: TableReference, 

3549 column_references: List[ColumnReference], 

3550 ): 

3551 self.name = name 

3552 self.referenced_table = referenced_table 

3553 self.column_references = column_references 

3554 

3555 def __eq__(self, other): 

3556 if not isinstance(other, ForeignKey): 

3557 raise TypeError("The value provided is not a BigQuery ForeignKey.") 

3558 return ( 

3559 self.name == other.name 

3560 and self.referenced_table == other.referenced_table 

3561 and self.column_references == other.column_references 

3562 ) 

3563 

3564 @classmethod 

3565 def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignKey": 

3566 """Create an instance from API representation.""" 

3567 return cls( 

3568 name=api_repr["name"], 

3569 referenced_table=TableReference.from_api_repr(api_repr["referencedTable"]), 

3570 column_references=[ 

3571 ColumnReference( 

3572 column_reference_resource["referencingColumn"], 

3573 column_reference_resource["referencedColumn"], 

3574 ) 

3575 for column_reference_resource in api_repr["columnReferences"] 

3576 ], 

3577 ) 

3578 

3579 def to_api_repr(self) -> Dict[str, Any]: 

3580 """Return a dictionary representing this object.""" 

3581 return { 

3582 "name": self.name, 

3583 "referencedTable": self.referenced_table.to_api_repr(), 

3584 "columnReferences": [ 

3585 { 

3586 "referencingColumn": column_reference.referencing_column, 

3587 "referencedColumn": column_reference.referenced_column, 

3588 } 

3589 for column_reference in self.column_references 

3590 ], 

3591 } 

3592 

3593 

3594class TableConstraints: 

3595 """The TableConstraints defines the primary key and foreign key. 

3596 

3597 Args: 

3598 primary_key: 

3599 Represents a primary key constraint on a table's columns. Present only if the table 

3600 has a primary key. The primary key is not enforced. 

3601 foreign_keys: 

3602 Present only if the table has a foreign key. The foreign key is not enforced. 

3603 

3604 """ 

3605 

3606 def __init__( 

3607 self, 

3608 primary_key: Optional[PrimaryKey], 

3609 foreign_keys: Optional[List[ForeignKey]], 

3610 ): 

3611 self.primary_key = primary_key 

3612 self.foreign_keys = foreign_keys 

3613 

3614 def __eq__(self, other): 

3615 if not isinstance(other, TableConstraints) and other is not None: 

3616 raise TypeError("The value provided is not a BigQuery TableConstraints.") 

3617 return self.primary_key == ( 

3618 other.primary_key if other.primary_key else None 

3619 ) and self.foreign_keys == (other.foreign_keys if other.foreign_keys else None) 

3620 

3621 @classmethod 

3622 def from_api_repr(cls, resource: Dict[str, Any]) -> "TableConstraints": 

3623 """Create an instance from API representation.""" 

3624 primary_key = None 

3625 if "primaryKey" in resource: 

3626 primary_key = PrimaryKey(resource["primaryKey"]["columns"]) 

3627 

3628 foreign_keys = None 

3629 if "foreignKeys" in resource: 

3630 foreign_keys = [ 

3631 ForeignKey.from_api_repr(foreign_key_resource) 

3632 for foreign_key_resource in resource["foreignKeys"] 

3633 ] 

3634 return cls(primary_key, foreign_keys) 

3635 

3636 def to_api_repr(self) -> Dict[str, Any]: 

3637 """Return a dictionary representing this object.""" 

3638 resource: Dict[str, Any] = {} 

3639 if self.primary_key: 

3640 resource["primaryKey"] = {"columns": self.primary_key.columns} 

3641 if self.foreign_keys: 

3642 resource["foreignKeys"] = [ 

3643 foreign_key.to_api_repr() for foreign_key in self.foreign_keys 

3644 ] 

3645 return resource 

3646 

3647 

3648class BigLakeConfiguration(object): 

3649 """Configuration for managed tables for Apache Iceberg, formerly 

3650 known as BigLake. 

3651 

3652 Args: 

3653 connection_id (Optional[str]): 

3654 The connection specifying the credentials to be used to read and write to external 

3655 storage, such as Cloud Storage. The connection_id can have the form 

3656 ``{project}.{location}.{connection_id}`` or 

3657 ``projects/{project}/locations/{location}/connections/{connection_id}``. 

3658 storage_uri (Optional[str]): 

3659 The fully qualified location prefix of the external folder where table data is 

3660 stored. The '*' wildcard character is not allowed. The URI should be in the 

3661 format ``gs://bucket/path_to_table/``. 

3662 file_format (Optional[str]): 

3663 The file format the table data is stored in. See BigLakeFileFormat for available 

3664 values. 

3665 table_format (Optional[str]): 

3666 The table format the metadata only snapshots are stored in. See BigLakeTableFormat 

3667 for available values. 

3668 _properties (Optional[dict]): 

3669 Private. Used to construct object from API resource. 

3670 """ 

3671 

3672 def __init__( 

3673 self, 

3674 connection_id: Optional[str] = None, 

3675 storage_uri: Optional[str] = None, 

3676 file_format: Optional[str] = None, 

3677 table_format: Optional[str] = None, 

3678 _properties: Optional[dict] = None, 

3679 ) -> None: 

3680 if _properties is None: 

3681 _properties = {} 

3682 self._properties = _properties 

3683 if connection_id is not None: 

3684 self.connection_id = connection_id 

3685 if storage_uri is not None: 

3686 self.storage_uri = storage_uri 

3687 if file_format is not None: 

3688 self.file_format = file_format 

3689 if table_format is not None: 

3690 self.table_format = table_format 

3691 

3692 @property 

3693 def connection_id(self) -> Optional[str]: 

3694 """str: The connection specifying the credentials to be used to read and write to external 

3695 storage, such as Cloud Storage.""" 

3696 return self._properties.get("connectionId") 

3697 

3698 @connection_id.setter 

3699 def connection_id(self, value: Optional[str]): 

3700 self._properties["connectionId"] = value 

3701 

3702 @property 

3703 def storage_uri(self) -> Optional[str]: 

3704 """str: The fully qualified location prefix of the external folder where table data is 

3705 stored.""" 

3706 return self._properties.get("storageUri") 

3707 

3708 @storage_uri.setter 

3709 def storage_uri(self, value: Optional[str]): 

3710 self._properties["storageUri"] = value 

3711 

3712 @property 

3713 def file_format(self) -> Optional[str]: 

3714 """str: The file format the table data is stored in. See BigLakeFileFormat for available 

3715 values.""" 

3716 return self._properties.get("fileFormat") 

3717 

3718 @file_format.setter 

3719 def file_format(self, value: Optional[str]): 

3720 self._properties["fileFormat"] = value 

3721 

3722 @property 

3723 def table_format(self) -> Optional[str]: 

3724 """str: The table format the metadata only snapshots are stored in. See BigLakeTableFormat 

3725 for available values.""" 

3726 return self._properties.get("tableFormat") 

3727 

3728 @table_format.setter 

3729 def table_format(self, value: Optional[str]): 

3730 self._properties["tableFormat"] = value 

3731 

3732 def _key(self): 

3733 return tuple(sorted(self._properties.items())) 

3734 

3735 def __eq__(self, other): 

3736 if not isinstance(other, BigLakeConfiguration): 

3737 return NotImplemented 

3738 return self._key() == other._key() 

3739 

3740 def __ne__(self, other): 

3741 return not self == other 

3742 

3743 def __hash__(self): 

3744 return hash(self._key()) 

3745 

3746 def __repr__(self): 

3747 key_vals = ["{}={}".format(key, val) for key, val in self._key()] 

3748 return "BigLakeConfiguration({})".format(",".join(key_vals)) 

3749 

3750 @classmethod 

3751 def from_api_repr(cls, resource: Dict[str, Any]) -> "BigLakeConfiguration": 

3752 """Factory: construct a BigLakeConfiguration given its API representation. 

3753 

3754 Args: 

3755 resource: 

3756 BigLakeConfiguration representation returned from the API 

3757 

3758 Returns: 

3759 BigLakeConfiguration parsed from ``resource``. 

3760 """ 

3761 ref = cls() 

3762 ref._properties = resource 

3763 return ref 

3764 

3765 def to_api_repr(self) -> Dict[str, Any]: 

3766 """Construct the API resource representation of this BigLakeConfiguration. 

3767 

3768 Returns: 

3769 BigLakeConfiguration represented as an API resource. 

3770 """ 

3771 return copy.deepcopy(self._properties) 

3772 

3773 

3774def _item_to_row(iterator, resource): 

3775 """Convert a JSON row to the native object. 

3776 

3777 .. note:: 

3778 

3779 This assumes that the ``schema`` attribute has been 

3780 added to the iterator after being created, which 

3781 should be done by the caller. 

3782 

3783 Args: 

3784 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use. 

3785 resource (Dict): An item to be converted to a row. 

3786 

3787 Returns: 

3788 google.cloud.bigquery.table.Row: The next row in the page. 

3789 """ 

3790 return Row( 

3791 _helpers._row_tuple_from_json(resource, iterator.schema), 

3792 iterator._field_to_index, 

3793 ) 

3794 

3795 

3796def _row_iterator_page_columns(schema, response): 

3797 """Make a generator of all the columns in a page from tabledata.list. 

3798 

3799 This enables creating a :class:`pandas.DataFrame` and other 

3800 column-oriented data structures such as :class:`pyarrow.RecordBatch` 

3801 """ 

3802 columns = [] 

3803 rows = response.get("rows", []) 

3804 

3805 def get_column_data(field_index, field): 

3806 for row in rows: 

3807 yield _helpers.DATA_FRAME_CELL_DATA_PARSER.to_py( 

3808 row["f"][field_index]["v"], field 

3809 ) 

3810 

3811 for field_index, field in enumerate(schema): 

3812 columns.append(get_column_data(field_index, field)) 

3813 

3814 return columns 

3815 

3816 

3817# pylint: disable=unused-argument 

3818def _rows_page_start(iterator, page, response): 

3819 """Grab total rows when :class:`~google.cloud.iterator.Page` starts. 

3820 

3821 Args: 

3822 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use. 

3823 page (google.api_core.page_iterator.Page): The page that was just created. 

3824 response (Dict): The JSON API response for a page of rows in a table. 

3825 """ 

3826 # Make a (lazy) copy of the page in column-oriented format for use in data 

3827 # science packages. 

3828 page._columns = _row_iterator_page_columns(iterator._schema, response) 

3829 

3830 total_rows = response.get("totalRows") 

3831 # Don't reset total_rows if it's not present in the next API response. 

3832 if total_rows is not None: 

3833 iterator._total_rows = int(total_rows) 

3834 

3835 

3836# pylint: enable=unused-argument 

3837 

3838 

3839def _table_arg_to_table_ref(value, default_project=None) -> TableReference: 

3840 """Helper to convert a string or Table to TableReference. 

3841 

3842 This function keeps TableReference and other kinds of objects unchanged. 

3843 """ 

3844 if isinstance(value, str): 

3845 value = TableReference.from_string(value, default_project=default_project) 

3846 if isinstance(value, (Table, TableListItem)): 

3847 value = value.reference 

3848 return value 

3849 

3850 

3851def _table_arg_to_table(value, default_project=None) -> Table: 

3852 """Helper to convert a string or TableReference to a Table. 

3853 

3854 This function keeps Table and other kinds of objects unchanged. 

3855 """ 

3856 if isinstance(value, str): 

3857 value = TableReference.from_string(value, default_project=default_project) 

3858 if isinstance(value, TableReference): 

3859 value = Table(value) 

3860 if isinstance(value, TableListItem): 

3861 newvalue = Table(value.reference) 

3862 newvalue._properties = value._properties 

3863 value = newvalue 

3864 

3865 return value