Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/cloud/bigquery/table.py: 37%

901 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 06:07 +0000

1# Copyright 2015 Google LLC 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Define API Tables.""" 

16 

17from __future__ import absolute_import 

18 

19import copy 

20import datetime 

21import functools 

22import operator 

23import typing 

24from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union 

25import warnings 

26 

27try: 

28 import pandas # type: ignore 

29except ImportError: # pragma: NO COVER 

30 pandas = None 

31 

32try: 

33 import pyarrow # type: ignore 

34except ImportError: # pragma: NO COVER 

35 pyarrow = None 

36 

37try: 

38 import db_dtypes # type: ignore 

39except ImportError: # pragma: NO COVER 

40 db_dtypes = None 

41 

42try: 

43 import geopandas # type: ignore 

44except ImportError: 

45 geopandas = None 

46else: 

47 _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326" 

48 

49try: 

50 import shapely # type: ignore 

51 from shapely import wkt # type: ignore 

52except ImportError: 

53 shapely = None 

54else: 

55 _read_wkt = wkt.loads 

56 

57import google.api_core.exceptions 

58from google.api_core.page_iterator import HTTPIterator 

59 

60import google.cloud._helpers # type: ignore 

61from google.cloud.bigquery import _helpers 

62from google.cloud.bigquery import _pandas_helpers 

63from google.cloud.bigquery.enums import DefaultPandasDTypes 

64from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError 

65from google.cloud.bigquery.schema import _build_schema_resource 

66from google.cloud.bigquery.schema import _parse_schema_resource 

67from google.cloud.bigquery.schema import _to_schema_fields 

68from google.cloud.bigquery._tqdm_helpers import get_progress_bar 

69from google.cloud.bigquery.external_config import ExternalConfig 

70from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration 

71 

72if typing.TYPE_CHECKING: # pragma: NO COVER 

73 # Unconditionally import optional dependencies again to tell pytype that 

74 # they are not None, avoiding false "no attribute" errors. 

75 import pandas 

76 import pyarrow 

77 import geopandas # type: ignore 

78 from google.cloud import bigquery_storage # type: ignore 

79 from google.cloud.bigquery.dataset import DatasetReference 

80 

81 

82_NO_GEOPANDAS_ERROR = ( 

83 "The geopandas library is not installed, please install " 

84 "geopandas to use the to_geodataframe() function." 

85) 

86_NO_PYARROW_ERROR = ( 

87 "The pyarrow library is not installed, please install " 

88 "pyarrow to use the to_arrow() function." 

89) 

90_NO_SHAPELY_ERROR = ( 

91 "The shapely library is not installed, please install " 

92 "shapely to use the geography_as_object option." 

93) 

94 

95_TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"' 

96 

97_NO_SUPPORTED_DTYPE = ( 

98 "The dtype cannot to be converted to a pandas ExtensionArray " 

99 "because the necessary `__from_arrow__` attribute is missing." 

100) 

101 

102 

103def _reference_getter(table): 

104 """A :class:`~google.cloud.bigquery.table.TableReference` pointing to 

105 this table. 

106 

107 Returns: 

108 google.cloud.bigquery.table.TableReference: pointer to this table. 

109 """ 

110 from google.cloud.bigquery import dataset 

111 

112 dataset_ref = dataset.DatasetReference(table.project, table.dataset_id) 

113 return TableReference(dataset_ref, table.table_id) 

114 

115 

116def _view_use_legacy_sql_getter(table): 

117 """bool: Specifies whether to execute the view with Legacy or Standard SQL. 

118 

119 This boolean specifies whether to execute the view with Legacy SQL 

120 (:data:`True`) or Standard SQL (:data:`False`). The client side default is 

121 :data:`False`. The server-side default is :data:`True`. If this table is 

122 not a view, :data:`None` is returned. 

123 

124 Raises: 

125 ValueError: For invalid value types. 

126 """ 

127 view = table._properties.get("view") 

128 if view is not None: 

129 # The server-side default for useLegacySql is True. 

130 return view.get("useLegacySql", True) 

131 # In some cases, such as in a table list no view object is present, but the 

132 # resource still represents a view. Use the type as a fallback. 

133 if table.table_type == "VIEW": 

134 # The server-side default for useLegacySql is True. 

135 return True 

136 

137 

138class _TableBase: 

139 """Base class for Table-related classes with common functionality.""" 

140 

141 _PROPERTY_TO_API_FIELD: Dict[str, Union[str, List[str]]] = { 

142 "dataset_id": ["tableReference", "datasetId"], 

143 "project": ["tableReference", "projectId"], 

144 "table_id": ["tableReference", "tableId"], 

145 } 

146 

147 def __init__(self): 

148 self._properties = {} 

149 

150 @property 

151 def project(self) -> str: 

152 """Project bound to the table.""" 

153 return _helpers._get_sub_prop( 

154 self._properties, self._PROPERTY_TO_API_FIELD["project"] 

155 ) 

156 

157 @property 

158 def dataset_id(self) -> str: 

159 """ID of dataset containing the table.""" 

160 return _helpers._get_sub_prop( 

161 self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] 

162 ) 

163 

164 @property 

165 def table_id(self) -> str: 

166 """The table ID.""" 

167 return _helpers._get_sub_prop( 

168 self._properties, self._PROPERTY_TO_API_FIELD["table_id"] 

169 ) 

170 

171 @property 

172 def path(self) -> str: 

173 """URL path for the table's APIs.""" 

174 return ( 

175 f"/projects/{self.project}/datasets/{self.dataset_id}" 

176 f"/tables/{self.table_id}" 

177 ) 

178 

179 def __eq__(self, other): 

180 if isinstance(other, _TableBase): 

181 return ( 

182 self.project == other.project 

183 and self.dataset_id == other.dataset_id 

184 and self.table_id == other.table_id 

185 ) 

186 else: 

187 return NotImplemented 

188 

189 def __hash__(self): 

190 return hash((self.project, self.dataset_id, self.table_id)) 

191 

192 

193class TableReference(_TableBase): 

194 """TableReferences are pointers to tables. 

195 

196 See 

197 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#tablereference 

198 

199 Args: 

200 dataset_ref: A pointer to the dataset 

201 table_id: The ID of the table 

202 """ 

203 

204 _PROPERTY_TO_API_FIELD = { 

205 "dataset_id": "datasetId", 

206 "project": "projectId", 

207 "table_id": "tableId", 

208 } 

209 

210 def __init__(self, dataset_ref: "DatasetReference", table_id: str): 

211 self._properties = {} 

212 

213 _helpers._set_sub_prop( 

214 self._properties, 

215 self._PROPERTY_TO_API_FIELD["project"], 

216 dataset_ref.project, 

217 ) 

218 _helpers._set_sub_prop( 

219 self._properties, 

220 self._PROPERTY_TO_API_FIELD["dataset_id"], 

221 dataset_ref.dataset_id, 

222 ) 

223 _helpers._set_sub_prop( 

224 self._properties, 

225 self._PROPERTY_TO_API_FIELD["table_id"], 

226 table_id, 

227 ) 

228 

229 @classmethod 

230 def from_string( 

231 cls, table_id: str, default_project: str = None 

232 ) -> "TableReference": 

233 """Construct a table reference from table ID string. 

234 

235 Args: 

236 table_id (str): 

237 A table ID in standard SQL format. If ``default_project`` 

238 is not specified, this must included a project ID, dataset 

239 ID, and table ID, each separated by ``.``. 

240 default_project (Optional[str]): 

241 The project ID to use when ``table_id`` does not 

242 include a project ID. 

243 

244 Returns: 

245 TableReference: Table reference parsed from ``table_id``. 

246 

247 Examples: 

248 >>> TableReference.from_string('my-project.mydataset.mytable') 

249 TableRef...(DatasetRef...('my-project', 'mydataset'), 'mytable') 

250 

251 Raises: 

252 ValueError: 

253 If ``table_id`` is not a fully-qualified table ID in 

254 standard SQL format. 

255 """ 

256 from google.cloud.bigquery.dataset import DatasetReference 

257 

258 ( 

259 output_project_id, 

260 output_dataset_id, 

261 output_table_id, 

262 ) = _helpers._parse_3_part_id( 

263 table_id, default_project=default_project, property_name="table_id" 

264 ) 

265 

266 return cls( 

267 DatasetReference(output_project_id, output_dataset_id), output_table_id 

268 ) 

269 

270 @classmethod 

271 def from_api_repr(cls, resource: dict) -> "TableReference": 

272 """Factory: construct a table reference given its API representation 

273 

274 Args: 

275 resource (Dict[str, object]): 

276 Table reference representation returned from the API 

277 

278 Returns: 

279 google.cloud.bigquery.table.TableReference: 

280 Table reference parsed from ``resource``. 

281 """ 

282 from google.cloud.bigquery.dataset import DatasetReference 

283 

284 project = resource["projectId"] 

285 dataset_id = resource["datasetId"] 

286 table_id = resource["tableId"] 

287 

288 return cls(DatasetReference(project, dataset_id), table_id) 

289 

290 def to_api_repr(self) -> dict: 

291 """Construct the API resource representation of this table reference. 

292 

293 Returns: 

294 Dict[str, object]: Table reference represented as an API resource 

295 """ 

296 return copy.deepcopy(self._properties) 

297 

298 def to_bqstorage(self) -> str: 

299 """Construct a BigQuery Storage API representation of this table. 

300 

301 Install the ``google-cloud-bigquery-storage`` package to use this 

302 feature. 

303 

304 If the ``table_id`` contains a partition identifier (e.g. 

305 ``my_table$201812``) or a snapshot identifier (e.g. 

306 ``mytable@1234567890``), it is ignored. Use 

307 :class:`google.cloud.bigquery_storage.types.ReadSession.TableReadOptions` 

308 to filter rows by partition. Use 

309 :class:`google.cloud.bigquery_storage.types.ReadSession.TableModifiers` 

310 to select a specific snapshot to read from. 

311 

312 Returns: 

313 str: A reference to this table in the BigQuery Storage API. 

314 """ 

315 

316 table_id, _, _ = self.table_id.partition("@") 

317 table_id, _, _ = table_id.partition("$") 

318 

319 table_ref = ( 

320 f"projects/{self.project}/datasets/{self.dataset_id}/tables/{table_id}" 

321 ) 

322 return table_ref 

323 

324 def __str__(self): 

325 return f"{self.project}.{self.dataset_id}.{self.table_id}" 

326 

327 def __repr__(self): 

328 from google.cloud.bigquery.dataset import DatasetReference 

329 

330 dataset_ref = DatasetReference(self.project, self.dataset_id) 

331 return f"TableReference({dataset_ref!r}, '{self.table_id}')" 

332 

333 

334class Table(_TableBase): 

335 """Tables represent a set of rows whose values correspond to a schema. 

336 

337 See 

338 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource-table 

339 

340 Args: 

341 table_ref (Union[google.cloud.bigquery.table.TableReference, str]): 

342 A pointer to a table. If ``table_ref`` is a string, it must 

343 included a project ID, dataset ID, and table ID, each separated 

344 by ``.``. 

345 schema (Optional[Sequence[Union[ \ 

346 :class:`~google.cloud.bigquery.schema.SchemaField`, \ 

347 Mapping[str, Any] \ 

348 ]]]): 

349 The table's schema. If any item is a mapping, its content must be 

350 compatible with 

351 :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. 

352 """ 

353 

354 _PROPERTY_TO_API_FIELD = { 

355 **_TableBase._PROPERTY_TO_API_FIELD, 

356 "clustering_fields": "clustering", 

357 "created": "creationTime", 

358 "description": "description", 

359 "encryption_configuration": "encryptionConfiguration", 

360 "etag": "etag", 

361 "expires": "expirationTime", 

362 "external_data_configuration": "externalDataConfiguration", 

363 "friendly_name": "friendlyName", 

364 "full_table_id": "id", 

365 "labels": "labels", 

366 "location": "location", 

367 "modified": "lastModifiedTime", 

368 "mview_enable_refresh": "materializedView", 

369 "mview_last_refresh_time": ["materializedView", "lastRefreshTime"], 

370 "mview_query": "materializedView", 

371 "mview_refresh_interval": "materializedView", 

372 "num_bytes": "numBytes", 

373 "num_rows": "numRows", 

374 "partition_expiration": "timePartitioning", 

375 "partitioning_type": "timePartitioning", 

376 "range_partitioning": "rangePartitioning", 

377 "time_partitioning": "timePartitioning", 

378 "schema": "schema", 

379 "snapshot_definition": "snapshotDefinition", 

380 "clone_definition": "cloneDefinition", 

381 "streaming_buffer": "streamingBuffer", 

382 "self_link": "selfLink", 

383 "time_partitioning": "timePartitioning", 

384 "type": "type", 

385 "view_use_legacy_sql": "view", 

386 "view_query": "view", 

387 "require_partition_filter": "requirePartitionFilter", 

388 } 

389 

390 def __init__(self, table_ref, schema=None) -> None: 

391 table_ref = _table_arg_to_table_ref(table_ref) 

392 self._properties = {"tableReference": table_ref.to_api_repr(), "labels": {}} 

393 # Let the @property do validation. 

394 if schema is not None: 

395 self.schema = schema 

396 

397 reference = property(_reference_getter) 

398 

399 @property 

400 def require_partition_filter(self): 

401 """bool: If set to true, queries over the partitioned table require a 

402 partition filter that can be used for partition elimination to be 

403 specified. 

404 """ 

405 return self._properties.get( 

406 self._PROPERTY_TO_API_FIELD["require_partition_filter"] 

407 ) 

408 

409 @require_partition_filter.setter 

410 def require_partition_filter(self, value): 

411 self._properties[ 

412 self._PROPERTY_TO_API_FIELD["require_partition_filter"] 

413 ] = value 

414 

415 @property 

416 def schema(self): 

417 """Sequence[Union[ \ 

418 :class:`~google.cloud.bigquery.schema.SchemaField`, \ 

419 Mapping[str, Any] \ 

420 ]]: 

421 Table's schema. 

422 

423 Raises: 

424 Exception: 

425 If ``schema`` is not a sequence, or if any item in the sequence 

426 is not a :class:`~google.cloud.bigquery.schema.SchemaField` 

427 instance or a compatible mapping representation of the field. 

428 """ 

429 prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"]) 

430 if not prop: 

431 return [] 

432 else: 

433 return _parse_schema_resource(prop) 

434 

435 @schema.setter 

436 def schema(self, value): 

437 api_field = self._PROPERTY_TO_API_FIELD["schema"] 

438 

439 if value is None: 

440 self._properties[api_field] = None 

441 else: 

442 value = _to_schema_fields(value) 

443 self._properties[api_field] = {"fields": _build_schema_resource(value)} 

444 

445 @property 

446 def labels(self): 

447 """Dict[str, str]: Labels for the table. 

448 

449 This method always returns a dict. To change a table's labels, 

450 modify the dict, then call ``Client.update_table``. To delete a 

451 label, set its value to :data:`None` before updating. 

452 

453 Raises: 

454 ValueError: If ``value`` type is invalid. 

455 """ 

456 return self._properties.setdefault(self._PROPERTY_TO_API_FIELD["labels"], {}) 

457 

458 @labels.setter 

459 def labels(self, value): 

460 if not isinstance(value, dict): 

461 raise ValueError("Pass a dict") 

462 self._properties[self._PROPERTY_TO_API_FIELD["labels"]] = value 

463 

464 @property 

465 def encryption_configuration(self): 

466 """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom 

467 encryption configuration for the table. 

468 

469 Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None` 

470 if using default encryption. 

471 

472 See `protecting data with Cloud KMS keys 

473 <https://cloud.google.com/bigquery/docs/customer-managed-encryption>`_ 

474 in the BigQuery documentation. 

475 """ 

476 prop = self._properties.get( 

477 self._PROPERTY_TO_API_FIELD["encryption_configuration"] 

478 ) 

479 if prop is not None: 

480 prop = EncryptionConfiguration.from_api_repr(prop) 

481 return prop 

482 

483 @encryption_configuration.setter 

484 def encryption_configuration(self, value): 

485 api_repr = value 

486 if value is not None: 

487 api_repr = value.to_api_repr() 

488 self._properties[ 

489 self._PROPERTY_TO_API_FIELD["encryption_configuration"] 

490 ] = api_repr 

491 

492 @property 

493 def created(self): 

494 """Union[datetime.datetime, None]: Datetime at which the table was 

495 created (:data:`None` until set from the server). 

496 """ 

497 creation_time = self._properties.get(self._PROPERTY_TO_API_FIELD["created"]) 

498 if creation_time is not None: 

499 # creation_time will be in milliseconds. 

500 return google.cloud._helpers._datetime_from_microseconds( 

501 1000.0 * float(creation_time) 

502 ) 

503 

504 @property 

505 def etag(self): 

506 """Union[str, None]: ETag for the table resource (:data:`None` until 

507 set from the server). 

508 """ 

509 return self._properties.get(self._PROPERTY_TO_API_FIELD["etag"]) 

510 

511 @property 

512 def modified(self): 

513 """Union[datetime.datetime, None]: Datetime at which the table was last 

514 modified (:data:`None` until set from the server). 

515 """ 

516 modified_time = self._properties.get(self._PROPERTY_TO_API_FIELD["modified"]) 

517 if modified_time is not None: 

518 # modified_time will be in milliseconds. 

519 return google.cloud._helpers._datetime_from_microseconds( 

520 1000.0 * float(modified_time) 

521 ) 

522 

523 @property 

524 def num_bytes(self): 

525 """Union[int, None]: The size of the table in bytes (:data:`None` until 

526 set from the server). 

527 """ 

528 return _helpers._int_or_none( 

529 self._properties.get(self._PROPERTY_TO_API_FIELD["num_bytes"]) 

530 ) 

531 

532 @property 

533 def num_rows(self): 

534 """Union[int, None]: The number of rows in the table (:data:`None` 

535 until set from the server). 

536 """ 

537 return _helpers._int_or_none( 

538 self._properties.get(self._PROPERTY_TO_API_FIELD["num_rows"]) 

539 ) 

540 

541 @property 

542 def self_link(self): 

543 """Union[str, None]: URL for the table resource (:data:`None` until set 

544 from the server). 

545 """ 

546 return self._properties.get(self._PROPERTY_TO_API_FIELD["self_link"]) 

547 

548 @property 

549 def full_table_id(self): 

550 """Union[str, None]: ID for the table (:data:`None` until set from the 

551 server). 

552 

553 In the format ``project-id:dataset_id.table_id``. 

554 """ 

555 return self._properties.get(self._PROPERTY_TO_API_FIELD["full_table_id"]) 

556 

557 @property 

558 def table_type(self): 

559 """Union[str, None]: The type of the table (:data:`None` until set from 

560 the server). 

561 

562 Possible values are ``'TABLE'``, ``'VIEW'``, ``'MATERIALIZED_VIEW'`` or 

563 ``'EXTERNAL'``. 

564 """ 

565 return self._properties.get(self._PROPERTY_TO_API_FIELD["type"]) 

566 

567 @property 

568 def range_partitioning(self): 

569 """Optional[google.cloud.bigquery.table.RangePartitioning]: 

570 Configures range-based partitioning for a table. 

571 

572 .. note:: 

573 **Beta**. The integer range partitioning feature is in a 

574 pre-release state and might change or have limited support. 

575 

576 Only specify at most one of 

577 :attr:`~google.cloud.bigquery.table.Table.time_partitioning` or 

578 :attr:`~google.cloud.bigquery.table.Table.range_partitioning`. 

579 

580 Raises: 

581 ValueError: 

582 If the value is not 

583 :class:`~google.cloud.bigquery.table.RangePartitioning` or 

584 :data:`None`. 

585 """ 

586 resource = self._properties.get( 

587 self._PROPERTY_TO_API_FIELD["range_partitioning"] 

588 ) 

589 if resource is not None: 

590 return RangePartitioning(_properties=resource) 

591 

592 @range_partitioning.setter 

593 def range_partitioning(self, value): 

594 resource = value 

595 if isinstance(value, RangePartitioning): 

596 resource = value._properties 

597 elif value is not None: 

598 raise ValueError( 

599 "Expected value to be RangePartitioning or None, got {}.".format(value) 

600 ) 

601 self._properties[self._PROPERTY_TO_API_FIELD["range_partitioning"]] = resource 

602 

603 @property 

604 def time_partitioning(self): 

605 """Optional[google.cloud.bigquery.table.TimePartitioning]: Configures time-based 

606 partitioning for a table. 

607 

608 Only specify at most one of 

609 :attr:`~google.cloud.bigquery.table.Table.time_partitioning` or 

610 :attr:`~google.cloud.bigquery.table.Table.range_partitioning`. 

611 

612 Raises: 

613 ValueError: 

614 If the value is not 

615 :class:`~google.cloud.bigquery.table.TimePartitioning` or 

616 :data:`None`. 

617 """ 

618 prop = self._properties.get(self._PROPERTY_TO_API_FIELD["time_partitioning"]) 

619 if prop is not None: 

620 return TimePartitioning.from_api_repr(prop) 

621 

622 @time_partitioning.setter 

623 def time_partitioning(self, value): 

624 api_repr = value 

625 if isinstance(value, TimePartitioning): 

626 api_repr = value.to_api_repr() 

627 elif value is not None: 

628 raise ValueError( 

629 "value must be google.cloud.bigquery.table.TimePartitioning " "or None" 

630 ) 

631 self._properties[self._PROPERTY_TO_API_FIELD["time_partitioning"]] = api_repr 

632 

633 @property 

634 def partitioning_type(self): 

635 """Union[str, None]: Time partitioning of the table if it is 

636 partitioned (Defaults to :data:`None`). 

637 

638 """ 

639 warnings.warn( 

640 "This method will be deprecated in future versions. Please use " 

641 "Table.time_partitioning.type_ instead.", 

642 PendingDeprecationWarning, 

643 stacklevel=2, 

644 ) 

645 if self.time_partitioning is not None: 

646 return self.time_partitioning.type_ 

647 

648 @partitioning_type.setter 

649 def partitioning_type(self, value): 

650 warnings.warn( 

651 "This method will be deprecated in future versions. Please use " 

652 "Table.time_partitioning.type_ instead.", 

653 PendingDeprecationWarning, 

654 stacklevel=2, 

655 ) 

656 api_field = self._PROPERTY_TO_API_FIELD["partitioning_type"] 

657 if self.time_partitioning is None: 

658 self._properties[api_field] = {} 

659 self._properties[api_field]["type"] = value 

660 

661 @property 

662 def partition_expiration(self): 

663 """Union[int, None]: Expiration time in milliseconds for a partition. 

664 

665 If :attr:`partition_expiration` is set and :attr:`type_` is 

666 not set, :attr:`type_` will default to 

667 :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`. 

668 """ 

669 warnings.warn( 

670 "This method will be deprecated in future versions. Please use " 

671 "Table.time_partitioning.expiration_ms instead.", 

672 PendingDeprecationWarning, 

673 stacklevel=2, 

674 ) 

675 if self.time_partitioning is not None: 

676 return self.time_partitioning.expiration_ms 

677 

678 @partition_expiration.setter 

679 def partition_expiration(self, value): 

680 warnings.warn( 

681 "This method will be deprecated in future versions. Please use " 

682 "Table.time_partitioning.expiration_ms instead.", 

683 PendingDeprecationWarning, 

684 stacklevel=2, 

685 ) 

686 api_field = self._PROPERTY_TO_API_FIELD["partition_expiration"] 

687 

688 if self.time_partitioning is None: 

689 self._properties[api_field] = {"type": TimePartitioningType.DAY} 

690 self._properties[api_field]["expirationMs"] = str(value) 

691 

692 @property 

693 def clustering_fields(self): 

694 """Union[List[str], None]: Fields defining clustering for the table 

695 

696 (Defaults to :data:`None`). 

697 

698 Clustering fields are immutable after table creation. 

699 

700 .. note:: 

701 

702 BigQuery supports clustering for both partitioned and 

703 non-partitioned tables. 

704 """ 

705 prop = self._properties.get(self._PROPERTY_TO_API_FIELD["clustering_fields"]) 

706 if prop is not None: 

707 return list(prop.get("fields", ())) 

708 

709 @clustering_fields.setter 

710 def clustering_fields(self, value): 

711 """Union[List[str], None]: Fields defining clustering for the table 

712 

713 (Defaults to :data:`None`). 

714 """ 

715 api_field = self._PROPERTY_TO_API_FIELD["clustering_fields"] 

716 

717 if value is not None: 

718 prop = self._properties.setdefault(api_field, {}) 

719 prop["fields"] = value 

720 else: 

721 # In order to allow unsetting clustering fields completely, we explicitly 

722 # set this property to None (as oposed to merely removing the key). 

723 self._properties[api_field] = None 

724 

725 @property 

726 def description(self): 

727 """Union[str, None]: Description of the table (defaults to 

728 :data:`None`). 

729 

730 Raises: 

731 ValueError: For invalid value types. 

732 """ 

733 return self._properties.get(self._PROPERTY_TO_API_FIELD["description"]) 

734 

735 @description.setter 

736 def description(self, value): 

737 if not isinstance(value, str) and value is not None: 

738 raise ValueError("Pass a string, or None") 

739 self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value 

740 

741 @property 

742 def expires(self): 

743 """Union[datetime.datetime, None]: Datetime at which the table will be 

744 deleted. 

745 

746 Raises: 

747 ValueError: For invalid value types. 

748 """ 

749 expiration_time = self._properties.get(self._PROPERTY_TO_API_FIELD["expires"]) 

750 if expiration_time is not None: 

751 # expiration_time will be in milliseconds. 

752 return google.cloud._helpers._datetime_from_microseconds( 

753 1000.0 * float(expiration_time) 

754 ) 

755 

756 @expires.setter 

757 def expires(self, value): 

758 if not isinstance(value, datetime.datetime) and value is not None: 

759 raise ValueError("Pass a datetime, or None") 

760 value_ms = google.cloud._helpers._millis_from_datetime(value) 

761 self._properties[ 

762 self._PROPERTY_TO_API_FIELD["expires"] 

763 ] = _helpers._str_or_none(value_ms) 

764 

765 @property 

766 def friendly_name(self): 

767 """Union[str, None]: Title of the table (defaults to :data:`None`). 

768 

769 Raises: 

770 ValueError: For invalid value types. 

771 """ 

772 return self._properties.get(self._PROPERTY_TO_API_FIELD["friendly_name"]) 

773 

774 @friendly_name.setter 

775 def friendly_name(self, value): 

776 if not isinstance(value, str) and value is not None: 

777 raise ValueError("Pass a string, or None") 

778 self._properties[self._PROPERTY_TO_API_FIELD["friendly_name"]] = value 

779 

780 @property 

781 def location(self): 

782 """Union[str, None]: Location in which the table is hosted 

783 

784 Defaults to :data:`None`. 

785 """ 

786 return self._properties.get(self._PROPERTY_TO_API_FIELD["location"]) 

787 

788 @property 

789 def view_query(self): 

790 """Union[str, None]: SQL query defining the table as a view (defaults 

791 to :data:`None`). 

792 

793 By default, the query is treated as Standard SQL. To use Legacy 

794 SQL, set :attr:`view_use_legacy_sql` to :data:`True`. 

795 

796 Raises: 

797 ValueError: For invalid value types. 

798 """ 

799 api_field = self._PROPERTY_TO_API_FIELD["view_query"] 

800 return _helpers._get_sub_prop(self._properties, [api_field, "query"]) 

801 

802 @view_query.setter 

803 def view_query(self, value): 

804 if not isinstance(value, str): 

805 raise ValueError("Pass a string") 

806 

807 api_field = self._PROPERTY_TO_API_FIELD["view_query"] 

808 _helpers._set_sub_prop(self._properties, [api_field, "query"], value) 

809 view = self._properties[api_field] 

810 # The service defaults useLegacySql to True, but this 

811 # client uses Standard SQL by default. 

812 if view.get("useLegacySql") is None: 

813 view["useLegacySql"] = False 

814 

815 @view_query.deleter 

816 def view_query(self): 

817 """Delete SQL query defining the table as a view.""" 

818 self._properties.pop(self._PROPERTY_TO_API_FIELD["view_query"], None) 

819 

820 view_use_legacy_sql = property(_view_use_legacy_sql_getter) 

821 

822 @view_use_legacy_sql.setter # type: ignore # (redefinition from above) 

823 def view_use_legacy_sql(self, value): 

824 if not isinstance(value, bool): 

825 raise ValueError("Pass a boolean") 

826 

827 api_field = self._PROPERTY_TO_API_FIELD["view_query"] 

828 if self._properties.get(api_field) is None: 

829 self._properties[api_field] = {} 

830 self._properties[api_field]["useLegacySql"] = value 

831 

832 @property 

833 def mview_query(self): 

834 """Optional[str]: SQL query defining the table as a materialized 

835 view (defaults to :data:`None`). 

836 """ 

837 api_field = self._PROPERTY_TO_API_FIELD["mview_query"] 

838 return _helpers._get_sub_prop(self._properties, [api_field, "query"]) 

839 

840 @mview_query.setter 

841 def mview_query(self, value): 

842 api_field = self._PROPERTY_TO_API_FIELD["mview_query"] 

843 _helpers._set_sub_prop(self._properties, [api_field, "query"], str(value)) 

844 

845 @mview_query.deleter 

846 def mview_query(self): 

847 """Delete SQL query defining the table as a materialized view.""" 

848 self._properties.pop(self._PROPERTY_TO_API_FIELD["mview_query"], None) 

849 

850 @property 

851 def mview_last_refresh_time(self): 

852 """Optional[datetime.datetime]: Datetime at which the materialized view was last 

853 refreshed (:data:`None` until set from the server). 

854 """ 

855 refresh_time = _helpers._get_sub_prop( 

856 self._properties, self._PROPERTY_TO_API_FIELD["mview_last_refresh_time"] 

857 ) 

858 if refresh_time is not None: 

859 # refresh_time will be in milliseconds. 

860 return google.cloud._helpers._datetime_from_microseconds( 

861 1000 * int(refresh_time) 

862 ) 

863 

864 @property 

865 def mview_enable_refresh(self): 

866 """Optional[bool]: Enable automatic refresh of the materialized view 

867 when the base table is updated. The default value is :data:`True`. 

868 """ 

869 api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"] 

870 return _helpers._get_sub_prop(self._properties, [api_field, "enableRefresh"]) 

871 

872 @mview_enable_refresh.setter 

873 def mview_enable_refresh(self, value): 

874 api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"] 

875 return _helpers._set_sub_prop( 

876 self._properties, [api_field, "enableRefresh"], value 

877 ) 

878 

879 @property 

880 def mview_refresh_interval(self): 

881 """Optional[datetime.timedelta]: The maximum frequency at which this 

882 materialized view will be refreshed. The default value is 1800000 

883 milliseconds (30 minutes). 

884 """ 

885 api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"] 

886 refresh_interval = _helpers._get_sub_prop( 

887 self._properties, [api_field, "refreshIntervalMs"] 

888 ) 

889 if refresh_interval is not None: 

890 return datetime.timedelta(milliseconds=int(refresh_interval)) 

891 

892 @mview_refresh_interval.setter 

893 def mview_refresh_interval(self, value): 

894 if value is None: 

895 refresh_interval_ms = None 

896 else: 

897 refresh_interval_ms = str(value // datetime.timedelta(milliseconds=1)) 

898 

899 api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"] 

900 _helpers._set_sub_prop( 

901 self._properties, 

902 [api_field, "refreshIntervalMs"], 

903 refresh_interval_ms, 

904 ) 

905 

906 @property 

907 def streaming_buffer(self): 

908 """google.cloud.bigquery.StreamingBuffer: Information about a table's 

909 streaming buffer. 

910 """ 

911 sb = self._properties.get(self._PROPERTY_TO_API_FIELD["streaming_buffer"]) 

912 if sb is not None: 

913 return StreamingBuffer(sb) 

914 

915 @property 

916 def external_data_configuration(self): 

917 """Union[google.cloud.bigquery.ExternalConfig, None]: Configuration for 

918 an external data source (defaults to :data:`None`). 

919 

920 Raises: 

921 ValueError: For invalid value types. 

922 """ 

923 prop = self._properties.get( 

924 self._PROPERTY_TO_API_FIELD["external_data_configuration"] 

925 ) 

926 if prop is not None: 

927 prop = ExternalConfig.from_api_repr(prop) 

928 return prop 

929 

930 @external_data_configuration.setter 

931 def external_data_configuration(self, value): 

932 if not (value is None or isinstance(value, ExternalConfig)): 

933 raise ValueError("Pass an ExternalConfig or None") 

934 api_repr = value 

935 if value is not None: 

936 api_repr = value.to_api_repr() 

937 self._properties[ 

938 self._PROPERTY_TO_API_FIELD["external_data_configuration"] 

939 ] = api_repr 

940 

941 @property 

942 def snapshot_definition(self) -> Optional["SnapshotDefinition"]: 

943 """Information about the snapshot. This value is set via snapshot creation. 

944 

945 See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.snapshot_definition 

946 """ 

947 snapshot_info = self._properties.get( 

948 self._PROPERTY_TO_API_FIELD["snapshot_definition"] 

949 ) 

950 if snapshot_info is not None: 

951 snapshot_info = SnapshotDefinition(snapshot_info) 

952 return snapshot_info 

953 

954 @property 

955 def clone_definition(self) -> Optional["CloneDefinition"]: 

956 """Information about the clone. This value is set via clone creation. 

957 

958 See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.clone_definition 

959 """ 

960 clone_info = self._properties.get( 

961 self._PROPERTY_TO_API_FIELD["clone_definition"] 

962 ) 

963 if clone_info is not None: 

964 clone_info = CloneDefinition(clone_info) 

965 return clone_info 

966 

967 @classmethod 

968 def from_string(cls, full_table_id: str) -> "Table": 

969 """Construct a table from fully-qualified table ID. 

970 

971 Args: 

972 full_table_id (str): 

973 A fully-qualified table ID in standard SQL format. Must 

974 included a project ID, dataset ID, and table ID, each 

975 separated by ``.``. 

976 

977 Returns: 

978 Table: Table parsed from ``full_table_id``. 

979 

980 Examples: 

981 >>> Table.from_string('my-project.mydataset.mytable') 

982 Table(TableRef...(D...('my-project', 'mydataset'), 'mytable')) 

983 

984 Raises: 

985 ValueError: 

986 If ``full_table_id`` is not a fully-qualified table ID in 

987 standard SQL format. 

988 """ 

989 return cls(TableReference.from_string(full_table_id)) 

990 

991 @classmethod 

992 def from_api_repr(cls, resource: dict) -> "Table": 

993 """Factory: construct a table given its API representation 

994 

995 Args: 

996 resource (Dict[str, object]): 

997 Table resource representation from the API 

998 

999 Returns: 

1000 google.cloud.bigquery.table.Table: Table parsed from ``resource``. 

1001 

1002 Raises: 

1003 KeyError: 

1004 If the ``resource`` lacks the key ``'tableReference'``, or if 

1005 the ``dict`` stored within the key ``'tableReference'`` lacks 

1006 the keys ``'tableId'``, ``'projectId'``, or ``'datasetId'``. 

1007 """ 

1008 from google.cloud.bigquery import dataset 

1009 

1010 if ( 

1011 "tableReference" not in resource 

1012 or "tableId" not in resource["tableReference"] 

1013 ): 

1014 raise KeyError( 

1015 "Resource lacks required identity information:" 

1016 '["tableReference"]["tableId"]' 

1017 ) 

1018 project_id = _helpers._get_sub_prop( 

1019 resource, cls._PROPERTY_TO_API_FIELD["project"] 

1020 ) 

1021 table_id = _helpers._get_sub_prop( 

1022 resource, cls._PROPERTY_TO_API_FIELD["table_id"] 

1023 ) 

1024 dataset_id = _helpers._get_sub_prop( 

1025 resource, cls._PROPERTY_TO_API_FIELD["dataset_id"] 

1026 ) 

1027 dataset_ref = dataset.DatasetReference(project_id, dataset_id) 

1028 

1029 table = cls(dataset_ref.table(table_id)) 

1030 table._properties = resource 

1031 

1032 return table 

1033 

1034 def to_api_repr(self) -> dict: 

1035 """Constructs the API resource of this table 

1036 

1037 Returns: 

1038 Dict[str, object]: Table represented as an API resource 

1039 """ 

1040 return copy.deepcopy(self._properties) 

1041 

1042 def to_bqstorage(self) -> str: 

1043 """Construct a BigQuery Storage API representation of this table. 

1044 

1045 Returns: 

1046 str: A reference to this table in the BigQuery Storage API. 

1047 """ 

1048 return self.reference.to_bqstorage() 

1049 

1050 def _build_resource(self, filter_fields): 

1051 """Generate a resource for ``update``.""" 

1052 return _helpers._build_resource_from_properties(self, filter_fields) 

1053 

1054 def __repr__(self): 

1055 return "Table({})".format(repr(self.reference)) 

1056 

1057 def __str__(self): 

1058 return f"{self.project}.{self.dataset_id}.{self.table_id}" 

1059 

1060 

1061class TableListItem(_TableBase): 

1062 """A read-only table resource from a list operation. 

1063 

1064 For performance reasons, the BigQuery API only includes some of the table 

1065 properties when listing tables. Notably, 

1066 :attr:`~google.cloud.bigquery.table.Table.schema` and 

1067 :attr:`~google.cloud.bigquery.table.Table.num_rows` are missing. 

1068 

1069 For a full list of the properties that the BigQuery API returns, see the 

1070 `REST documentation for tables.list 

1071 <https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list>`_. 

1072 

1073 

1074 Args: 

1075 resource (Dict[str, object]): 

1076 A table-like resource object from a table list response. A 

1077 ``tableReference`` property is required. 

1078 

1079 Raises: 

1080 ValueError: 

1081 If ``tableReference`` or one of its required members is missing 

1082 from ``resource``. 

1083 """ 

1084 

1085 def __init__(self, resource): 

1086 if "tableReference" not in resource: 

1087 raise ValueError("resource must contain a tableReference value") 

1088 if "projectId" not in resource["tableReference"]: 

1089 raise ValueError( 

1090 "resource['tableReference'] must contain a projectId value" 

1091 ) 

1092 if "datasetId" not in resource["tableReference"]: 

1093 raise ValueError( 

1094 "resource['tableReference'] must contain a datasetId value" 

1095 ) 

1096 if "tableId" not in resource["tableReference"]: 

1097 raise ValueError("resource['tableReference'] must contain a tableId value") 

1098 

1099 self._properties = resource 

1100 

1101 @property 

1102 def created(self): 

1103 """Union[datetime.datetime, None]: Datetime at which the table was 

1104 created (:data:`None` until set from the server). 

1105 """ 

1106 creation_time = self._properties.get("creationTime") 

1107 if creation_time is not None: 

1108 # creation_time will be in milliseconds. 

1109 return google.cloud._helpers._datetime_from_microseconds( 

1110 1000.0 * float(creation_time) 

1111 ) 

1112 

1113 @property 

1114 def expires(self): 

1115 """Union[datetime.datetime, None]: Datetime at which the table will be 

1116 deleted. 

1117 """ 

1118 expiration_time = self._properties.get("expirationTime") 

1119 if expiration_time is not None: 

1120 # expiration_time will be in milliseconds. 

1121 return google.cloud._helpers._datetime_from_microseconds( 

1122 1000.0 * float(expiration_time) 

1123 ) 

1124 

1125 reference = property(_reference_getter) 

1126 

1127 @property 

1128 def labels(self): 

1129 """Dict[str, str]: Labels for the table. 

1130 

1131 This method always returns a dict. To change a table's labels, 

1132 modify the dict, then call ``Client.update_table``. To delete a 

1133 label, set its value to :data:`None` before updating. 

1134 """ 

1135 return self._properties.setdefault("labels", {}) 

1136 

1137 @property 

1138 def full_table_id(self): 

1139 """Union[str, None]: ID for the table (:data:`None` until set from the 

1140 server). 

1141 

1142 In the format ``project_id:dataset_id.table_id``. 

1143 """ 

1144 return self._properties.get("id") 

1145 

1146 @property 

1147 def table_type(self): 

1148 """Union[str, None]: The type of the table (:data:`None` until set from 

1149 the server). 

1150 

1151 Possible values are ``'TABLE'``, ``'VIEW'``, or ``'EXTERNAL'``. 

1152 """ 

1153 return self._properties.get("type") 

1154 

1155 @property 

1156 def time_partitioning(self): 

1157 """google.cloud.bigquery.table.TimePartitioning: Configures time-based 

1158 partitioning for a table. 

1159 """ 

1160 prop = self._properties.get("timePartitioning") 

1161 if prop is not None: 

1162 return TimePartitioning.from_api_repr(prop) 

1163 

1164 @property 

1165 def partitioning_type(self): 

1166 """Union[str, None]: Time partitioning of the table if it is 

1167 partitioned (Defaults to :data:`None`). 

1168 """ 

1169 warnings.warn( 

1170 "This method will be deprecated in future versions. Please use " 

1171 "TableListItem.time_partitioning.type_ instead.", 

1172 PendingDeprecationWarning, 

1173 stacklevel=2, 

1174 ) 

1175 if self.time_partitioning is not None: 

1176 return self.time_partitioning.type_ 

1177 

1178 @property 

1179 def partition_expiration(self): 

1180 """Union[int, None]: Expiration time in milliseconds for a partition. 

1181 

1182 If this property is set and :attr:`type_` is not set, :attr:`type_` 

1183 will default to :attr:`TimePartitioningType.DAY`. 

1184 """ 

1185 warnings.warn( 

1186 "This method will be deprecated in future versions. Please use " 

1187 "TableListItem.time_partitioning.expiration_ms instead.", 

1188 PendingDeprecationWarning, 

1189 stacklevel=2, 

1190 ) 

1191 if self.time_partitioning is not None: 

1192 return self.time_partitioning.expiration_ms 

1193 

1194 @property 

1195 def friendly_name(self): 

1196 """Union[str, None]: Title of the table (defaults to :data:`None`).""" 

1197 return self._properties.get("friendlyName") 

1198 

1199 view_use_legacy_sql = property(_view_use_legacy_sql_getter) 

1200 

1201 @property 

1202 def clustering_fields(self): 

1203 """Union[List[str], None]: Fields defining clustering for the table 

1204 

1205 (Defaults to :data:`None`). 

1206 

1207 Clustering fields are immutable after table creation. 

1208 

1209 .. note:: 

1210 

1211 BigQuery supports clustering for both partitioned and 

1212 non-partitioned tables. 

1213 """ 

1214 prop = self._properties.get("clustering") 

1215 if prop is not None: 

1216 return list(prop.get("fields", ())) 

1217 

1218 @classmethod 

1219 def from_string(cls, full_table_id: str) -> "TableListItem": 

1220 """Construct a table from fully-qualified table ID. 

1221 

1222 Args: 

1223 full_table_id (str): 

1224 A fully-qualified table ID in standard SQL format. Must 

1225 included a project ID, dataset ID, and table ID, each 

1226 separated by ``.``. 

1227 

1228 Returns: 

1229 Table: Table parsed from ``full_table_id``. 

1230 

1231 Examples: 

1232 >>> Table.from_string('my-project.mydataset.mytable') 

1233 Table(TableRef...(D...('my-project', 'mydataset'), 'mytable')) 

1234 

1235 Raises: 

1236 ValueError: 

1237 If ``full_table_id`` is not a fully-qualified table ID in 

1238 standard SQL format. 

1239 """ 

1240 return cls( 

1241 {"tableReference": TableReference.from_string(full_table_id).to_api_repr()} 

1242 ) 

1243 

1244 def to_bqstorage(self) -> str: 

1245 """Construct a BigQuery Storage API representation of this table. 

1246 

1247 Returns: 

1248 str: A reference to this table in the BigQuery Storage API. 

1249 """ 

1250 return self.reference.to_bqstorage() 

1251 

1252 def to_api_repr(self) -> dict: 

1253 """Constructs the API resource of this table 

1254 

1255 Returns: 

1256 Dict[str, object]: Table represented as an API resource 

1257 """ 

1258 return copy.deepcopy(self._properties) 

1259 

1260 

1261def _row_from_mapping(mapping, schema): 

1262 """Convert a mapping to a row tuple using the schema. 

1263 

1264 Args: 

1265 mapping (Dict[str, object]) 

1266 Mapping of row data: must contain keys for all required fields in 

1267 the schema. Keys which do not correspond to a field in the schema 

1268 are ignored. 

1269 schema (List[google.cloud.bigquery.schema.SchemaField]): 

1270 The schema of the table destination for the rows 

1271 

1272 Returns: 

1273 Tuple[object]: 

1274 Tuple whose elements are ordered according to the schema. 

1275 

1276 Raises: 

1277 ValueError: If schema is empty. 

1278 """ 

1279 if len(schema) == 0: 

1280 raise ValueError(_TABLE_HAS_NO_SCHEMA) 

1281 

1282 row = [] 

1283 for field in schema: 

1284 if field.mode == "REQUIRED": 

1285 row.append(mapping[field.name]) 

1286 elif field.mode == "REPEATED": 

1287 row.append(mapping.get(field.name, ())) 

1288 elif field.mode == "NULLABLE": 

1289 row.append(mapping.get(field.name)) 

1290 else: 

1291 raise ValueError("Unknown field mode: {}".format(field.mode)) 

1292 return tuple(row) 

1293 

1294 

1295class StreamingBuffer(object): 

1296 """Information about a table's streaming buffer. 

1297 

1298 See https://cloud.google.com/bigquery/streaming-data-into-bigquery. 

1299 

1300 Args: 

1301 resource (Dict[str, object]): 

1302 streaming buffer representation returned from the API 

1303 """ 

1304 

1305 def __init__(self, resource): 

1306 self.estimated_bytes = None 

1307 if "estimatedBytes" in resource: 

1308 self.estimated_bytes = int(resource["estimatedBytes"]) 

1309 self.estimated_rows = None 

1310 if "estimatedRows" in resource: 

1311 self.estimated_rows = int(resource["estimatedRows"]) 

1312 self.oldest_entry_time = None 

1313 if "oldestEntryTime" in resource: 

1314 self.oldest_entry_time = google.cloud._helpers._datetime_from_microseconds( 

1315 1000.0 * int(resource["oldestEntryTime"]) 

1316 ) 

1317 

1318 

1319class SnapshotDefinition: 

1320 """Information about base table and snapshot time of the snapshot. 

1321 

1322 See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#snapshotdefinition 

1323 

1324 Args: 

1325 resource: Snapshot definition representation returned from the API. 

1326 """ 

1327 

1328 def __init__(self, resource: Dict[str, Any]): 

1329 self.base_table_reference = None 

1330 if "baseTableReference" in resource: 

1331 self.base_table_reference = TableReference.from_api_repr( 

1332 resource["baseTableReference"] 

1333 ) 

1334 

1335 self.snapshot_time = None 

1336 if "snapshotTime" in resource: 

1337 self.snapshot_time = google.cloud._helpers._rfc3339_to_datetime( 

1338 resource["snapshotTime"] 

1339 ) 

1340 

1341 

1342class CloneDefinition: 

1343 """Information about base table and clone time of the clone. 

1344 

1345 See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clonedefinition 

1346 

1347 Args: 

1348 resource: Clone definition representation returned from the API. 

1349 """ 

1350 

1351 def __init__(self, resource: Dict[str, Any]): 

1352 self.base_table_reference = None 

1353 if "baseTableReference" in resource: 

1354 self.base_table_reference = TableReference.from_api_repr( 

1355 resource["baseTableReference"] 

1356 ) 

1357 

1358 self.clone_time = None 

1359 if "cloneTime" in resource: 

1360 self.clone_time = google.cloud._helpers._rfc3339_to_datetime( 

1361 resource["cloneTime"] 

1362 ) 

1363 

1364 

1365class Row(object): 

1366 """A BigQuery row. 

1367 

1368 Values can be accessed by position (index), by key like a dict, 

1369 or as properties. 

1370 

1371 Args: 

1372 values (Sequence[object]): The row values 

1373 field_to_index (Dict[str, int]): 

1374 A mapping from schema field names to indexes 

1375 """ 

1376 

1377 # Choose unusual field names to try to avoid conflict with schema fields. 

1378 __slots__ = ("_xxx_values", "_xxx_field_to_index") 

1379 

1380 def __init__(self, values, field_to_index) -> None: 

1381 self._xxx_values = values 

1382 self._xxx_field_to_index = field_to_index 

1383 

1384 def values(self): 

1385 """Return the values included in this row. 

1386 

1387 Returns: 

1388 Sequence[object]: A sequence of length ``len(row)``. 

1389 """ 

1390 return copy.deepcopy(self._xxx_values) 

1391 

1392 def keys(self) -> Iterable[str]: 

1393 """Return the keys for using a row as a dict. 

1394 

1395 Returns: 

1396 Iterable[str]: The keys corresponding to the columns of a row 

1397 

1398 Examples: 

1399 

1400 >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).keys()) 

1401 ['x', 'y'] 

1402 """ 

1403 return self._xxx_field_to_index.keys() 

1404 

1405 def items(self) -> Iterable[Tuple[str, Any]]: 

1406 """Return items as ``(key, value)`` pairs. 

1407 

1408 Returns: 

1409 Iterable[Tuple[str, object]]: 

1410 The ``(key, value)`` pairs representing this row. 

1411 

1412 Examples: 

1413 

1414 >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).items()) 

1415 [('x', 'a'), ('y', 'b')] 

1416 """ 

1417 for key, index in self._xxx_field_to_index.items(): 

1418 yield (key, copy.deepcopy(self._xxx_values[index])) 

1419 

1420 def get(self, key: str, default: Any = None) -> Any: 

1421 """Return a value for key, with a default value if it does not exist. 

1422 

1423 Args: 

1424 key (str): The key of the column to access 

1425 default (object): 

1426 The default value to use if the key does not exist. (Defaults 

1427 to :data:`None`.) 

1428 

1429 Returns: 

1430 object: 

1431 The value associated with the provided key, or a default value. 

1432 

1433 Examples: 

1434 When the key exists, the value associated with it is returned. 

1435 

1436 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('x') 

1437 'a' 

1438 

1439 The default value is :data:`None` when the key does not exist. 

1440 

1441 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z') 

1442 None 

1443 

1444 The default value can be overridden with the ``default`` parameter. 

1445 

1446 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '') 

1447 '' 

1448 

1449 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', default = '') 

1450 '' 

1451 """ 

1452 index = self._xxx_field_to_index.get(key) 

1453 if index is None: 

1454 return default 

1455 return self._xxx_values[index] 

1456 

1457 def __getattr__(self, name): 

1458 value = self._xxx_field_to_index.get(name) 

1459 if value is None: 

1460 raise AttributeError("no row field {!r}".format(name)) 

1461 return self._xxx_values[value] 

1462 

1463 def __len__(self): 

1464 return len(self._xxx_values) 

1465 

1466 def __getitem__(self, key): 

1467 if isinstance(key, str): 

1468 value = self._xxx_field_to_index.get(key) 

1469 if value is None: 

1470 raise KeyError("no row field {!r}".format(key)) 

1471 key = value 

1472 return self._xxx_values[key] 

1473 

1474 def __eq__(self, other): 

1475 if not isinstance(other, Row): 

1476 return NotImplemented 

1477 return ( 

1478 self._xxx_values == other._xxx_values 

1479 and self._xxx_field_to_index == other._xxx_field_to_index 

1480 ) 

1481 

1482 def __ne__(self, other): 

1483 return not self == other 

1484 

1485 def __repr__(self): 

1486 # sort field dict by value, for determinism 

1487 items = sorted(self._xxx_field_to_index.items(), key=operator.itemgetter(1)) 

1488 f2i = "{" + ", ".join("%r: %d" % item for item in items) + "}" 

1489 return "Row({}, {})".format(self._xxx_values, f2i) 

1490 

1491 

1492class _NoopProgressBarQueue(object): 

1493 """A fake Queue class that does nothing. 

1494 

1495 This is used when there is no progress bar to send updates to. 

1496 """ 

1497 

1498 def put_nowait(self, item): 

1499 """Don't actually do anything with the item.""" 

1500 

1501 

1502class RowIterator(HTTPIterator): 

1503 """A class for iterating through HTTP/JSON API row list responses. 

1504 

1505 Args: 

1506 client (Optional[google.cloud.bigquery.Client]): 

1507 The API client instance. This should always be non-`None`, except for 

1508 subclasses that do not use it, namely the ``_EmptyRowIterator``. 

1509 api_request (Callable[google.cloud._http.JSONConnection.api_request]): 

1510 The function to use to make API requests. 

1511 path (str): The method path to query for the list of items. 

1512 schema (Sequence[Union[ \ 

1513 :class:`~google.cloud.bigquery.schema.SchemaField`, \ 

1514 Mapping[str, Any] \ 

1515 ]]): 

1516 The table's schema. If any item is a mapping, its content must be 

1517 compatible with 

1518 :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. 

1519 page_token (str): A token identifying a page in a result set to start 

1520 fetching results from. 

1521 max_results (Optional[int]): The maximum number of results to fetch. 

1522 page_size (Optional[int]): The maximum number of rows in each page 

1523 of results from this request. Non-positive values are ignored. 

1524 Defaults to a sensible value set by the API. 

1525 extra_params (Optional[Dict[str, object]]): 

1526 Extra query string parameters for the API call. 

1527 table (Optional[Union[ \ 

1528 google.cloud.bigquery.table.Table, \ 

1529 google.cloud.bigquery.table.TableReference, \ 

1530 ]]): 

1531 The table which these rows belong to, or a reference to it. Used to 

1532 call the BigQuery Storage API to fetch rows. 

1533 selected_fields (Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]): 

1534 A subset of columns to select from this table. 

1535 total_rows (Optional[int]): 

1536 Total number of rows in the table. 

1537 first_page_response (Optional[dict]): 

1538 API response for the first page of results. These are returned when 

1539 the first page is requested. 

1540 """ 

1541 

1542 def __init__( 

1543 self, 

1544 client, 

1545 api_request, 

1546 path, 

1547 schema, 

1548 page_token=None, 

1549 max_results=None, 

1550 page_size=None, 

1551 extra_params=None, 

1552 table=None, 

1553 selected_fields=None, 

1554 total_rows=None, 

1555 first_page_response=None, 

1556 ): 

1557 super(RowIterator, self).__init__( 

1558 client, 

1559 api_request, 

1560 path, 

1561 item_to_value=_item_to_row, 

1562 items_key="rows", 

1563 page_token=page_token, 

1564 max_results=max_results, 

1565 extra_params=extra_params, 

1566 page_start=_rows_page_start, 

1567 next_token="pageToken", 

1568 ) 

1569 schema = _to_schema_fields(schema) 

1570 self._field_to_index = _helpers._field_to_index_mapping(schema) 

1571 self._page_size = page_size 

1572 self._preserve_order = False 

1573 self._project = client.project if client is not None else None 

1574 self._schema = schema 

1575 self._selected_fields = selected_fields 

1576 self._table = table 

1577 self._total_rows = total_rows 

1578 self._first_page_response = first_page_response 

1579 

1580 def _is_completely_cached(self): 

1581 """Check if all results are completely cached. 

1582 

1583 This is useful to know, because we can avoid alternative download 

1584 mechanisms. 

1585 """ 

1586 if self._first_page_response is None or self.next_page_token: 

1587 return False 

1588 

1589 return self._first_page_response.get(self._next_token) is None 

1590 

1591 def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client): 

1592 """Returns if the BigQuery Storage API can be used. 

1593 

1594 Returns: 

1595 bool 

1596 True if the BigQuery Storage client can be used or created. 

1597 """ 

1598 using_bqstorage_api = bqstorage_client or create_bqstorage_client 

1599 if not using_bqstorage_api: 

1600 return False 

1601 

1602 if self._is_completely_cached(): 

1603 return False 

1604 

1605 if self.max_results is not None: 

1606 return False 

1607 

1608 try: 

1609 from google.cloud import bigquery_storage # noqa: F401 

1610 except ImportError: 

1611 return False 

1612 

1613 try: 

1614 _helpers.BQ_STORAGE_VERSIONS.verify_version() 

1615 except LegacyBigQueryStorageError as exc: 

1616 warnings.warn(str(exc)) 

1617 return False 

1618 

1619 return True 

1620 

1621 def _get_next_page_response(self): 

1622 """Requests the next page from the path provided. 

1623 

1624 Returns: 

1625 Dict[str, object]: 

1626 The parsed JSON response of the next page's contents. 

1627 """ 

1628 if self._first_page_response: 

1629 response = self._first_page_response 

1630 self._first_page_response = None 

1631 return response 

1632 

1633 params = self._get_query_params() 

1634 if self._page_size is not None: 

1635 if self.page_number and "startIndex" in params: 

1636 del params["startIndex"] 

1637 params["maxResults"] = self._page_size 

1638 return self.api_request( 

1639 method=self._HTTP_METHOD, path=self.path, query_params=params 

1640 ) 

1641 

1642 @property 

1643 def schema(self): 

1644 """List[google.cloud.bigquery.schema.SchemaField]: The subset of 

1645 columns to be read from the table.""" 

1646 return list(self._schema) 

1647 

1648 @property 

1649 def total_rows(self): 

1650 """int: The total number of rows in the table.""" 

1651 return self._total_rows 

1652 

1653 def _maybe_warn_max_results( 

1654 self, 

1655 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"], 

1656 ): 

1657 """Issue a warning if BQ Storage client is not ``None`` with ``max_results`` set. 

1658 

1659 This helper method should be used directly in the relevant top-level public 

1660 methods, so that the warning is issued for the correct line in user code. 

1661 

1662 Args: 

1663 bqstorage_client: 

1664 The BigQuery Storage client intended to use for downloading result rows. 

1665 """ 

1666 if bqstorage_client is not None and self.max_results is not None: 

1667 warnings.warn( 

1668 "Cannot use bqstorage_client if max_results is set, " 

1669 "reverting to fetching data with the REST endpoint.", 

1670 stacklevel=3, 

1671 ) 

1672 

1673 def _to_page_iterable( 

1674 self, bqstorage_download, tabledata_list_download, bqstorage_client=None 

1675 ): 

1676 if not self._validate_bqstorage(bqstorage_client, False): 

1677 bqstorage_client = None 

1678 

1679 result_pages = ( 

1680 bqstorage_download() 

1681 if bqstorage_client is not None 

1682 else tabledata_list_download() 

1683 ) 

1684 yield from result_pages 

1685 

1686 def to_arrow_iterable( 

1687 self, 

1688 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, 

1689 max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore 

1690 ) -> Iterator["pyarrow.RecordBatch"]: 

1691 """[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream. 

1692 

1693 Args: 

1694 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): 

1695 A BigQuery Storage API client. If supplied, use the faster 

1696 BigQuery Storage API to fetch rows from BigQuery. 

1697 

1698 This method requires the ``pyarrow`` and 

1699 ``google-cloud-bigquery-storage`` libraries. 

1700 

1701 This method only exposes a subset of the capabilities of the 

1702 BigQuery Storage API. For full access to all features 

1703 (projections, filters, snapshots) use the Storage API directly. 

1704 

1705 max_queue_size (Optional[int]): 

1706 The maximum number of result pages to hold in the internal queue when 

1707 streaming query results over the BigQuery Storage API. Ignored if 

1708 Storage API is not used. 

1709 

1710 By default, the max queue size is set to the number of BQ Storage streams 

1711 created by the server. If ``max_queue_size`` is :data:`None`, the queue 

1712 size is infinite. 

1713 

1714 Returns: 

1715 pyarrow.RecordBatch: 

1716 A generator of :class:`~pyarrow.RecordBatch`. 

1717 

1718 .. versionadded:: 2.31.0 

1719 """ 

1720 self._maybe_warn_max_results(bqstorage_client) 

1721 

1722 bqstorage_download = functools.partial( 

1723 _pandas_helpers.download_arrow_bqstorage, 

1724 self._project, 

1725 self._table, 

1726 bqstorage_client, 

1727 preserve_order=self._preserve_order, 

1728 selected_fields=self._selected_fields, 

1729 max_queue_size=max_queue_size, 

1730 ) 

1731 tabledata_list_download = functools.partial( 

1732 _pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema 

1733 ) 

1734 return self._to_page_iterable( 

1735 bqstorage_download, 

1736 tabledata_list_download, 

1737 bqstorage_client=bqstorage_client, 

1738 ) 

1739 

1740 # If changing the signature of this method, make sure to apply the same 

1741 # changes to job.QueryJob.to_arrow() 

1742 def to_arrow( 

1743 self, 

1744 progress_bar_type: str = None, 

1745 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, 

1746 create_bqstorage_client: bool = True, 

1747 ) -> "pyarrow.Table": 

1748 """[Beta] Create a class:`pyarrow.Table` by loading all pages of a 

1749 table or query. 

1750 

1751 Args: 

1752 progress_bar_type (Optional[str]): 

1753 If set, use the `tqdm <https://tqdm.github.io/>`_ library to 

1754 display a progress bar while the data downloads. Install the 

1755 ``tqdm`` package to use this feature. 

1756 

1757 Possible values of ``progress_bar_type`` include: 

1758 

1759 ``None`` 

1760 No progress bar. 

1761 ``'tqdm'`` 

1762 Use the :func:`tqdm.tqdm` function to print a progress bar 

1763 to :data:`sys.stdout`. 

1764 ``'tqdm_notebook'`` 

1765 Use the :func:`tqdm.notebook.tqdm` function to display a 

1766 progress bar as a Jupyter notebook widget. 

1767 ``'tqdm_gui'`` 

1768 Use the :func:`tqdm.tqdm_gui` function to display a 

1769 progress bar as a graphical dialog box. 

1770 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): 

1771 A BigQuery Storage API client. If supplied, use the faster BigQuery 

1772 Storage API to fetch rows from BigQuery. This API is a billable API. 

1773 

1774 This method requires ``google-cloud-bigquery-storage`` library. 

1775 

1776 This method only exposes a subset of the capabilities of the 

1777 BigQuery Storage API. For full access to all features 

1778 (projections, filters, snapshots) use the Storage API directly. 

1779 create_bqstorage_client (Optional[bool]): 

1780 If ``True`` (default), create a BigQuery Storage API client using 

1781 the default API settings. The BigQuery Storage API is a faster way 

1782 to fetch rows from BigQuery. See the ``bqstorage_client`` parameter 

1783 for more information. 

1784 

1785 This argument does nothing if ``bqstorage_client`` is supplied. 

1786 

1787 .. versionadded:: 1.24.0 

1788 

1789 Returns: 

1790 pyarrow.Table 

1791 A :class:`pyarrow.Table` populated with row data and column 

1792 headers from the query results. The column headers are derived 

1793 from the destination table's schema. 

1794 

1795 Raises: 

1796 ValueError: If the :mod:`pyarrow` library cannot be imported. 

1797 

1798 

1799 .. versionadded:: 1.17.0 

1800 """ 

1801 if pyarrow is None: 

1802 raise ValueError(_NO_PYARROW_ERROR) 

1803 

1804 self._maybe_warn_max_results(bqstorage_client) 

1805 

1806 if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): 

1807 create_bqstorage_client = False 

1808 bqstorage_client = None 

1809 

1810 owns_bqstorage_client = False 

1811 if not bqstorage_client and create_bqstorage_client: 

1812 bqstorage_client = self.client._ensure_bqstorage_client() 

1813 owns_bqstorage_client = bqstorage_client is not None 

1814 

1815 try: 

1816 progress_bar = get_progress_bar( 

1817 progress_bar_type, "Downloading", self.total_rows, "rows" 

1818 ) 

1819 

1820 record_batches = [] 

1821 for record_batch in self.to_arrow_iterable( 

1822 bqstorage_client=bqstorage_client 

1823 ): 

1824 record_batches.append(record_batch) 

1825 

1826 if progress_bar is not None: 

1827 # In some cases, the number of total rows is not populated 

1828 # until the first page of rows is fetched. Update the 

1829 # progress bar's total to keep an accurate count. 

1830 progress_bar.total = progress_bar.total or self.total_rows 

1831 progress_bar.update(record_batch.num_rows) 

1832 

1833 if progress_bar is not None: 

1834 # Indicate that the download has finished. 

1835 progress_bar.close() 

1836 finally: 

1837 if owns_bqstorage_client: 

1838 bqstorage_client._transport.grpc_channel.close() # type: ignore 

1839 

1840 if record_batches and bqstorage_client is not None: 

1841 return pyarrow.Table.from_batches(record_batches) 

1842 else: 

1843 # No records (not record_batches), use schema based on BigQuery schema 

1844 # **or** 

1845 # we used the REST API (bqstorage_client is None), 

1846 # which doesn't add arrow extension metadata, so we let 

1847 # `bq_to_arrow_schema` do it. 

1848 arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema) 

1849 return pyarrow.Table.from_batches(record_batches, schema=arrow_schema) 

1850 

1851 def to_dataframe_iterable( 

1852 self, 

1853 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, 

1854 dtypes: Dict[str, Any] = None, 

1855 max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore 

1856 ) -> "pandas.DataFrame": 

1857 """Create an iterable of pandas DataFrames, to process the table as a stream. 

1858 

1859 Args: 

1860 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): 

1861 A BigQuery Storage API client. If supplied, use the faster 

1862 BigQuery Storage API to fetch rows from BigQuery. 

1863 

1864 This method requires ``google-cloud-bigquery-storage`` library. 

1865 

1866 This method only exposes a subset of the capabilities of the 

1867 BigQuery Storage API. For full access to all features 

1868 (projections, filters, snapshots) use the Storage API directly. 

1869 

1870 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): 

1871 A dictionary of column names pandas ``dtype``s. The provided 

1872 ``dtype`` is used when constructing the series for the column 

1873 specified. Otherwise, the default pandas behavior is used. 

1874 

1875 max_queue_size (Optional[int]): 

1876 The maximum number of result pages to hold in the internal queue when 

1877 streaming query results over the BigQuery Storage API. Ignored if 

1878 Storage API is not used. 

1879 

1880 By default, the max queue size is set to the number of BQ Storage streams 

1881 created by the server. If ``max_queue_size`` is :data:`None`, the queue 

1882 size is infinite. 

1883 

1884 .. versionadded:: 2.14.0 

1885 

1886 Returns: 

1887 pandas.DataFrame: 

1888 A generator of :class:`~pandas.DataFrame`. 

1889 

1890 Raises: 

1891 ValueError: 

1892 If the :mod:`pandas` library cannot be imported. 

1893 """ 

1894 _pandas_helpers.verify_pandas_imports() 

1895 

1896 if dtypes is None: 

1897 dtypes = {} 

1898 

1899 self._maybe_warn_max_results(bqstorage_client) 

1900 

1901 column_names = [field.name for field in self._schema] 

1902 bqstorage_download = functools.partial( 

1903 _pandas_helpers.download_dataframe_bqstorage, 

1904 self._project, 

1905 self._table, 

1906 bqstorage_client, 

1907 column_names, 

1908 dtypes, 

1909 preserve_order=self._preserve_order, 

1910 selected_fields=self._selected_fields, 

1911 max_queue_size=max_queue_size, 

1912 ) 

1913 tabledata_list_download = functools.partial( 

1914 _pandas_helpers.download_dataframe_row_iterator, 

1915 iter(self.pages), 

1916 self.schema, 

1917 dtypes, 

1918 ) 

1919 return self._to_page_iterable( 

1920 bqstorage_download, 

1921 tabledata_list_download, 

1922 bqstorage_client=bqstorage_client, 

1923 ) 

1924 

1925 # If changing the signature of this method, make sure to apply the same 

1926 # changes to job.QueryJob.to_dataframe() 

1927 def to_dataframe( 

1928 self, 

1929 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, 

1930 dtypes: Dict[str, Any] = None, 

1931 progress_bar_type: str = None, 

1932 create_bqstorage_client: bool = True, 

1933 geography_as_object: bool = False, 

1934 bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE, 

1935 int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE, 

1936 float_dtype: Union[Any, None] = None, 

1937 string_dtype: Union[Any, None] = None, 

1938 ) -> "pandas.DataFrame": 

1939 """Create a pandas DataFrame by loading all pages of a query. 

1940 

1941 Args: 

1942 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): 

1943 A BigQuery Storage API client. If supplied, use the faster 

1944 BigQuery Storage API to fetch rows from BigQuery. 

1945 

1946 This method requires ``google-cloud-bigquery-storage`` library. 

1947 

1948 This method only exposes a subset of the capabilities of the 

1949 BigQuery Storage API. For full access to all features 

1950 (projections, filters, snapshots) use the Storage API directly. 

1951 

1952 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): 

1953 A dictionary of column names pandas ``dtype``s. The provided 

1954 ``dtype`` is used when constructing the series for the column 

1955 specified. Otherwise, the default pandas behavior is used. 

1956 progress_bar_type (Optional[str]): 

1957 If set, use the `tqdm <https://tqdm.github.io/>`_ library to 

1958 display a progress bar while the data downloads. Install the 

1959 ``tqdm`` package to use this feature. 

1960 

1961 Possible values of ``progress_bar_type`` include: 

1962 

1963 ``None`` 

1964 No progress bar. 

1965 ``'tqdm'`` 

1966 Use the :func:`tqdm.tqdm` function to print a progress bar 

1967 to :data:`sys.stdout`. 

1968 ``'tqdm_notebook'`` 

1969 Use the :func:`tqdm.notebook.tqdm` function to display a 

1970 progress bar as a Jupyter notebook widget. 

1971 ``'tqdm_gui'`` 

1972 Use the :func:`tqdm.tqdm_gui` function to display a 

1973 progress bar as a graphical dialog box. 

1974 

1975 .. versionadded:: 1.11.0 

1976 

1977 create_bqstorage_client (Optional[bool]): 

1978 If ``True`` (default), create a BigQuery Storage API client 

1979 using the default API settings. The BigQuery Storage API 

1980 is a faster way to fetch rows from BigQuery. See the 

1981 ``bqstorage_client`` parameter for more information. 

1982 

1983 This argument does nothing if ``bqstorage_client`` is supplied. 

1984 

1985 .. versionadded:: 1.24.0 

1986 

1987 geography_as_object (Optional[bool]): 

1988 If ``True``, convert GEOGRAPHY data to :mod:`shapely` 

1989 geometry objects. If ``False`` (default), don't cast 

1990 geography data to :mod:`shapely` geometry objects. 

1991 

1992 .. versionadded:: 2.24.0 

1993 

1994 bool_dtype (Optional[pandas.Series.dtype, None]): 

1995 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``) 

1996 to convert BigQuery Boolean type, instead of relying on the default 

1997 ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``, 

1998 then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean 

1999 type can be found at: 

2000 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type 

2001 

2002 .. versionadded:: 3.7.1 

2003 

2004 int_dtype (Optional[pandas.Series.dtype, None]): 

2005 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``) 

2006 to convert BigQuery Integer types, instead of relying on the default 

2007 ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``, 

2008 then the data type will be ``numpy.dtype("int64")``. A list of BigQuery 

2009 Integer types can be found at: 

2010 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types 

2011 

2012 .. versionadded:: 3.7.1 

2013 

2014 float_dtype (Optional[pandas.Series.dtype, None]): 

2015 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``) 

2016 to convert BigQuery Float type, instead of relying on the default 

2017 ``numpy.dtype("float64")``. If you explicitly set the value to ``None``, 

2018 then the data type will be ``numpy.dtype("float64")``. BigQuery Float 

2019 type can be found at: 

2020 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types 

2021 

2022 .. versionadded:: 3.7.1 

2023 

2024 string_dtype (Optional[pandas.Series.dtype, None]): 

2025 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to 

2026 convert BigQuery String type, instead of relying on the default 

2027 ``numpy.dtype("object")``. If you explicitly set the value to ``None``, 

2028 then the data type will be ``numpy.dtype("object")``. BigQuery String 

2029 type can be found at: 

2030 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type 

2031 

2032 .. versionadded:: 3.7.1 

2033 

2034 Returns: 

2035 pandas.DataFrame: 

2036 A :class:`~pandas.DataFrame` populated with row data and column 

2037 headers from the query results. The column headers are derived 

2038 from the destination table's schema. 

2039 

2040 Raises: 

2041 ValueError: 

2042 If the :mod:`pandas` library cannot be imported, or 

2043 the :mod:`google.cloud.bigquery_storage_v1` module is 

2044 required but cannot be imported. Also if 

2045 `geography_as_object` is `True`, but the 

2046 :mod:`shapely` library cannot be imported. Also if 

2047 `bool_dtype`, `int_dtype` or other dtype parameters 

2048 is not supported dtype. 

2049 

2050 """ 

2051 _pandas_helpers.verify_pandas_imports() 

2052 

2053 if geography_as_object and shapely is None: 

2054 raise ValueError(_NO_SHAPELY_ERROR) 

2055 

2056 if bool_dtype is DefaultPandasDTypes.BOOL_DTYPE: 

2057 bool_dtype = pandas.BooleanDtype() 

2058 

2059 if int_dtype is DefaultPandasDTypes.INT_DTYPE: 

2060 int_dtype = pandas.Int64Dtype() 

2061 

2062 if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"): 

2063 raise ValueError("bool_dtype", _NO_SUPPORTED_DTYPE) 

2064 

2065 if int_dtype is not None and not hasattr(int_dtype, "__from_arrow__"): 

2066 raise ValueError("int_dtype", _NO_SUPPORTED_DTYPE) 

2067 

2068 if float_dtype is not None and not hasattr(float_dtype, "__from_arrow__"): 

2069 raise ValueError("float_dtype", _NO_SUPPORTED_DTYPE) 

2070 

2071 if string_dtype is not None and not hasattr(string_dtype, "__from_arrow__"): 

2072 raise ValueError("string_dtype", _NO_SUPPORTED_DTYPE) 

2073 

2074 if dtypes is None: 

2075 dtypes = {} 

2076 

2077 self._maybe_warn_max_results(bqstorage_client) 

2078 

2079 if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client): 

2080 create_bqstorage_client = False 

2081 bqstorage_client = None 

2082 

2083 record_batch = self.to_arrow( 

2084 progress_bar_type=progress_bar_type, 

2085 bqstorage_client=bqstorage_client, 

2086 create_bqstorage_client=create_bqstorage_client, 

2087 ) 

2088 

2089 # When converting date or timestamp values to nanosecond precision, the result 

2090 # can be out of pyarrow bounds. To avoid the error when converting to 

2091 # Pandas, we set the date_as_object or timestamp_as_object parameter to True, 

2092 # if necessary. 

2093 date_as_object = not all( 

2094 self.__can_cast_timestamp_ns(col) 

2095 for col in record_batch 

2096 # Type can be date32 or date64 (plus units). 

2097 # See: https://arrow.apache.org/docs/python/api/datatypes.html 

2098 if pyarrow.types.is_date(col.type) 

2099 ) 

2100 

2101 timestamp_as_object = not all( 

2102 self.__can_cast_timestamp_ns(col) 

2103 for col in record_batch 

2104 # Type can be datetime and timestamp (plus units and time zone). 

2105 # See: https://arrow.apache.org/docs/python/api/datatypes.html 

2106 if pyarrow.types.is_timestamp(col.type) 

2107 ) 

2108 

2109 if len(record_batch) > 0: 

2110 df = record_batch.to_pandas( 

2111 date_as_object=date_as_object, 

2112 timestamp_as_object=timestamp_as_object, 

2113 integer_object_nulls=True, 

2114 types_mapper=_pandas_helpers.default_types_mapper( 

2115 date_as_object=date_as_object, 

2116 bool_dtype=bool_dtype, 

2117 int_dtype=int_dtype, 

2118 float_dtype=float_dtype, 

2119 string_dtype=string_dtype, 

2120 ), 

2121 ) 

2122 else: 

2123 # Avoid "ValueError: need at least one array to concatenate" on 

2124 # older versions of pandas when converting empty RecordBatch to 

2125 # DataFrame. See: https://github.com/pandas-dev/pandas/issues/41241 

2126 df = pandas.DataFrame([], columns=record_batch.schema.names) 

2127 

2128 for column in dtypes: 

2129 df[column] = pandas.Series(df[column], dtype=dtypes[column], copy=False) 

2130 

2131 if geography_as_object: 

2132 for field in self.schema: 

2133 if field.field_type.upper() == "GEOGRAPHY" and field.mode != "REPEATED": 

2134 df[field.name] = df[field.name].dropna().apply(_read_wkt) 

2135 

2136 return df 

2137 

2138 @staticmethod 

2139 def __can_cast_timestamp_ns(column): 

2140 try: 

2141 column.cast("timestamp[ns]") 

2142 except pyarrow.lib.ArrowInvalid: 

2143 return False 

2144 else: 

2145 return True 

2146 

2147 # If changing the signature of this method, make sure to apply the same 

2148 # changes to job.QueryJob.to_geodataframe() 

2149 def to_geodataframe( 

2150 self, 

2151 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, 

2152 dtypes: Dict[str, Any] = None, 

2153 progress_bar_type: str = None, 

2154 create_bqstorage_client: bool = True, 

2155 geography_column: Optional[str] = None, 

2156 ) -> "geopandas.GeoDataFrame": 

2157 """Create a GeoPandas GeoDataFrame by loading all pages of a query. 

2158 

2159 Args: 

2160 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]): 

2161 A BigQuery Storage API client. If supplied, use the faster 

2162 BigQuery Storage API to fetch rows from BigQuery. 

2163 

2164 This method requires the ``pyarrow`` and 

2165 ``google-cloud-bigquery-storage`` libraries. 

2166 

2167 This method only exposes a subset of the capabilities of the 

2168 BigQuery Storage API. For full access to all features 

2169 (projections, filters, snapshots) use the Storage API directly. 

2170 

2171 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): 

2172 A dictionary of column names pandas ``dtype``s. The provided 

2173 ``dtype`` is used when constructing the series for the column 

2174 specified. Otherwise, the default pandas behavior is used. 

2175 progress_bar_type (Optional[str]): 

2176 If set, use the `tqdm <https://tqdm.github.io/>`_ library to 

2177 display a progress bar while the data downloads. Install the 

2178 ``tqdm`` package to use this feature. 

2179 

2180 Possible values of ``progress_bar_type`` include: 

2181 

2182 ``None`` 

2183 No progress bar. 

2184 ``'tqdm'`` 

2185 Use the :func:`tqdm.tqdm` function to print a progress bar 

2186 to :data:`sys.stdout`. 

2187 ``'tqdm_notebook'`` 

2188 Use the :func:`tqdm.notebook.tqdm` function to display a 

2189 progress bar as a Jupyter notebook widget. 

2190 ``'tqdm_gui'`` 

2191 Use the :func:`tqdm.tqdm_gui` function to display a 

2192 progress bar as a graphical dialog box. 

2193 

2194 create_bqstorage_client (Optional[bool]): 

2195 If ``True`` (default), create a BigQuery Storage API client 

2196 using the default API settings. The BigQuery Storage API 

2197 is a faster way to fetch rows from BigQuery. See the 

2198 ``bqstorage_client`` parameter for more information. 

2199 

2200 This argument does nothing if ``bqstorage_client`` is supplied. 

2201 

2202 geography_column (Optional[str]): 

2203 If there are more than one GEOGRAPHY column, 

2204 identifies which one to use to construct a geopandas 

2205 GeoDataFrame. This option can be ommitted if there's 

2206 only one GEOGRAPHY column. 

2207 

2208 Returns: 

2209 geopandas.GeoDataFrame: 

2210 A :class:`geopandas.GeoDataFrame` populated with row 

2211 data and column headers from the query results. The 

2212 column headers are derived from the destination 

2213 table's schema. 

2214 

2215 Raises: 

2216 ValueError: 

2217 If the :mod:`geopandas` library cannot be imported, or the 

2218 :mod:`google.cloud.bigquery_storage_v1` module is 

2219 required but cannot be imported. 

2220 

2221 .. versionadded:: 2.24.0 

2222 """ 

2223 if geopandas is None: 

2224 raise ValueError(_NO_GEOPANDAS_ERROR) 

2225 

2226 geography_columns = set( 

2227 field.name 

2228 for field in self.schema 

2229 if field.field_type.upper() == "GEOGRAPHY" 

2230 ) 

2231 if not geography_columns: 

2232 raise TypeError( 

2233 "There must be at least one GEOGRAPHY column" 

2234 " to create a GeoDataFrame" 

2235 ) 

2236 

2237 if geography_column: 

2238 if geography_column not in geography_columns: 

2239 raise ValueError( 

2240 f"The given geography column, {geography_column}, doesn't name" 

2241 f" a GEOGRAPHY column in the result." 

2242 ) 

2243 elif len(geography_columns) == 1: 

2244 [geography_column] = geography_columns 

2245 else: 

2246 raise ValueError( 

2247 "There is more than one GEOGRAPHY column in the result. " 

2248 "The geography_column argument must be used to specify which " 

2249 "one to use to create a GeoDataFrame" 

2250 ) 

2251 

2252 df = self.to_dataframe( 

2253 bqstorage_client, 

2254 dtypes, 

2255 progress_bar_type, 

2256 create_bqstorage_client, 

2257 geography_as_object=True, 

2258 ) 

2259 

2260 return geopandas.GeoDataFrame( 

2261 df, crs=_COORDINATE_REFERENCE_SYSTEM, geometry=geography_column 

2262 ) 

2263 

2264 

2265class _EmptyRowIterator(RowIterator): 

2266 """An empty row iterator. 

2267 

2268 This class prevents API requests when there are no rows to fetch or rows 

2269 are impossible to fetch, such as with query results for DDL CREATE VIEW 

2270 statements. 

2271 """ 

2272 

2273 schema = () 

2274 pages = () 

2275 total_rows = 0 

2276 

2277 def __init__( 

2278 self, client=None, api_request=None, path=None, schema=(), *args, **kwargs 

2279 ): 

2280 super().__init__( 

2281 client=client, 

2282 api_request=api_request, 

2283 path=path, 

2284 schema=schema, 

2285 *args, 

2286 **kwargs, 

2287 ) 

2288 

2289 def to_arrow( 

2290 self, 

2291 progress_bar_type=None, 

2292 bqstorage_client=None, 

2293 create_bqstorage_client=True, 

2294 ) -> "pyarrow.Table": 

2295 """[Beta] Create an empty class:`pyarrow.Table`. 

2296 

2297 Args: 

2298 progress_bar_type (str): Ignored. Added for compatibility with RowIterator. 

2299 bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. 

2300 create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. 

2301 

2302 Returns: 

2303 pyarrow.Table: An empty :class:`pyarrow.Table`. 

2304 """ 

2305 if pyarrow is None: 

2306 raise ValueError(_NO_PYARROW_ERROR) 

2307 return pyarrow.Table.from_arrays(()) 

2308 

2309 def to_dataframe( 

2310 self, 

2311 bqstorage_client=None, 

2312 dtypes=None, 

2313 progress_bar_type=None, 

2314 create_bqstorage_client=True, 

2315 geography_as_object=False, 

2316 bool_dtype=None, 

2317 int_dtype=None, 

2318 float_dtype=None, 

2319 string_dtype=None, 

2320 ) -> "pandas.DataFrame": 

2321 """Create an empty dataframe. 

2322 

2323 Args: 

2324 bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. 

2325 dtypes (Any): Ignored. Added for compatibility with RowIterator. 

2326 progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. 

2327 create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. 

2328 geography_as_object (bool): Ignored. Added for compatibility with RowIterator. 

2329 bool_dtype (Any): Ignored. Added for compatibility with RowIterator. 

2330 int_dtype (Any): Ignored. Added for compatibility with RowIterator. 

2331 float_dtype (Any): Ignored. Added for compatibility with RowIterator. 

2332 string_dtype (Any): Ignored. Added for compatibility with RowIterator. 

2333 

2334 Returns: 

2335 pandas.DataFrame: An empty :class:`~pandas.DataFrame`. 

2336 """ 

2337 _pandas_helpers.verify_pandas_imports() 

2338 return pandas.DataFrame() 

2339 

2340 def to_geodataframe( 

2341 self, 

2342 bqstorage_client=None, 

2343 dtypes=None, 

2344 progress_bar_type=None, 

2345 create_bqstorage_client=True, 

2346 geography_column: Optional[str] = None, 

2347 ) -> "pandas.DataFrame": 

2348 """Create an empty dataframe. 

2349 

2350 Args: 

2351 bqstorage_client (Any): Ignored. Added for compatibility with RowIterator. 

2352 dtypes (Any): Ignored. Added for compatibility with RowIterator. 

2353 progress_bar_type (Any): Ignored. Added for compatibility with RowIterator. 

2354 create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator. 

2355 

2356 Returns: 

2357 pandas.DataFrame: An empty :class:`~pandas.DataFrame`. 

2358 """ 

2359 if geopandas is None: 

2360 raise ValueError(_NO_GEOPANDAS_ERROR) 

2361 

2362 # Since an empty GeoDataFrame has no geometry column, we do not CRS on it, 

2363 # because that's deprecated. 

2364 return geopandas.GeoDataFrame() 

2365 

2366 def to_dataframe_iterable( 

2367 self, 

2368 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, 

2369 dtypes: Optional[Dict[str, Any]] = None, 

2370 max_queue_size: Optional[int] = None, 

2371 ) -> Iterator["pandas.DataFrame"]: 

2372 """Create an iterable of pandas DataFrames, to process the table as a stream. 

2373 

2374 .. versionadded:: 2.21.0 

2375 

2376 Args: 

2377 bqstorage_client: 

2378 Ignored. Added for compatibility with RowIterator. 

2379 

2380 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]): 

2381 Ignored. Added for compatibility with RowIterator. 

2382 

2383 max_queue_size: 

2384 Ignored. Added for compatibility with RowIterator. 

2385 

2386 Returns: 

2387 An iterator yielding a single empty :class:`~pandas.DataFrame`. 

2388 

2389 Raises: 

2390 ValueError: 

2391 If the :mod:`pandas` library cannot be imported. 

2392 """ 

2393 _pandas_helpers.verify_pandas_imports() 

2394 return iter((pandas.DataFrame(),)) 

2395 

2396 def to_arrow_iterable( 

2397 self, 

2398 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None, 

2399 max_queue_size: Optional[int] = None, 

2400 ) -> Iterator["pyarrow.RecordBatch"]: 

2401 """Create an iterable of pandas DataFrames, to process the table as a stream. 

2402 

2403 .. versionadded:: 2.31.0 

2404 

2405 Args: 

2406 bqstorage_client: 

2407 Ignored. Added for compatibility with RowIterator. 

2408 

2409 max_queue_size: 

2410 Ignored. Added for compatibility with RowIterator. 

2411 

2412 Returns: 

2413 An iterator yielding a single empty :class:`~pyarrow.RecordBatch`. 

2414 """ 

2415 return iter((pyarrow.record_batch([]),)) 

2416 

2417 def __iter__(self): 

2418 return iter(()) 

2419 

2420 

2421class PartitionRange(object): 

2422 """Definition of the ranges for range partitioning. 

2423 

2424 .. note:: 

2425 **Beta**. The integer range partitioning feature is in a pre-release 

2426 state and might change or have limited support. 

2427 

2428 Args: 

2429 start (Optional[int]): 

2430 Sets the 

2431 :attr:`~google.cloud.bigquery.table.PartitionRange.start` 

2432 property. 

2433 end (Optional[int]): 

2434 Sets the 

2435 :attr:`~google.cloud.bigquery.table.PartitionRange.end` 

2436 property. 

2437 interval (Optional[int]): 

2438 Sets the 

2439 :attr:`~google.cloud.bigquery.table.PartitionRange.interval` 

2440 property. 

2441 _properties (Optional[dict]): 

2442 Private. Used to construct object from API resource. 

2443 """ 

2444 

2445 def __init__(self, start=None, end=None, interval=None, _properties=None) -> None: 

2446 if _properties is None: 

2447 _properties = {} 

2448 self._properties = _properties 

2449 

2450 if start is not None: 

2451 self.start = start 

2452 if end is not None: 

2453 self.end = end 

2454 if interval is not None: 

2455 self.interval = interval 

2456 

2457 @property 

2458 def start(self): 

2459 """int: The start of range partitioning, inclusive.""" 

2460 return _helpers._int_or_none(self._properties.get("start")) 

2461 

2462 @start.setter 

2463 def start(self, value): 

2464 self._properties["start"] = _helpers._str_or_none(value) 

2465 

2466 @property 

2467 def end(self): 

2468 """int: The end of range partitioning, exclusive.""" 

2469 return _helpers._int_or_none(self._properties.get("end")) 

2470 

2471 @end.setter 

2472 def end(self, value): 

2473 self._properties["end"] = _helpers._str_or_none(value) 

2474 

2475 @property 

2476 def interval(self): 

2477 """int: The width of each interval.""" 

2478 return _helpers._int_or_none(self._properties.get("interval")) 

2479 

2480 @interval.setter 

2481 def interval(self, value): 

2482 self._properties["interval"] = _helpers._str_or_none(value) 

2483 

2484 def _key(self): 

2485 return tuple(sorted(self._properties.items())) 

2486 

2487 def __eq__(self, other): 

2488 if not isinstance(other, PartitionRange): 

2489 return NotImplemented 

2490 return self._key() == other._key() 

2491 

2492 def __ne__(self, other): 

2493 return not self == other 

2494 

2495 def __repr__(self): 

2496 key_vals = ["{}={}".format(key, val) for key, val in self._key()] 

2497 return "PartitionRange({})".format(", ".join(key_vals)) 

2498 

2499 

2500class RangePartitioning(object): 

2501 """Range-based partitioning configuration for a table. 

2502 

2503 .. note:: 

2504 **Beta**. The integer range partitioning feature is in a pre-release 

2505 state and might change or have limited support. 

2506 

2507 Args: 

2508 range_ (Optional[google.cloud.bigquery.table.PartitionRange]): 

2509 Sets the 

2510 :attr:`google.cloud.bigquery.table.RangePartitioning.range_` 

2511 property. 

2512 field (Optional[str]): 

2513 Sets the 

2514 :attr:`google.cloud.bigquery.table.RangePartitioning.field` 

2515 property. 

2516 _properties (Optional[dict]): 

2517 Private. Used to construct object from API resource. 

2518 """ 

2519 

2520 def __init__(self, range_=None, field=None, _properties=None) -> None: 

2521 if _properties is None: 

2522 _properties = {} 

2523 self._properties: Dict[str, Any] = _properties 

2524 

2525 if range_ is not None: 

2526 self.range_ = range_ 

2527 if field is not None: 

2528 self.field = field 

2529 

2530 # Trailing underscore to prevent conflict with built-in range() function. 

2531 @property 

2532 def range_(self): 

2533 """google.cloud.bigquery.table.PartitionRange: Defines the 

2534 ranges for range partitioning. 

2535 

2536 Raises: 

2537 ValueError: 

2538 If the value is not a :class:`PartitionRange`. 

2539 """ 

2540 range_properties = self._properties.setdefault("range", {}) 

2541 return PartitionRange(_properties=range_properties) 

2542 

2543 @range_.setter 

2544 def range_(self, value): 

2545 if not isinstance(value, PartitionRange): 

2546 raise ValueError("Expected a PartitionRange, but got {}.".format(value)) 

2547 self._properties["range"] = value._properties 

2548 

2549 @property 

2550 def field(self): 

2551 """str: The table is partitioned by this field. 

2552 

2553 The field must be a top-level ``NULLABLE`` / ``REQUIRED`` field. The 

2554 only supported type is ``INTEGER`` / ``INT64``. 

2555 """ 

2556 return self._properties.get("field") 

2557 

2558 @field.setter 

2559 def field(self, value): 

2560 self._properties["field"] = value 

2561 

2562 def _key(self): 

2563 return (("field", self.field), ("range_", self.range_)) 

2564 

2565 def __eq__(self, other): 

2566 if not isinstance(other, RangePartitioning): 

2567 return NotImplemented 

2568 return self._key() == other._key() 

2569 

2570 def __ne__(self, other): 

2571 return not self == other 

2572 

2573 def __repr__(self): 

2574 key_vals = ["{}={}".format(key, repr(val)) for key, val in self._key()] 

2575 return "RangePartitioning({})".format(", ".join(key_vals)) 

2576 

2577 

2578class TimePartitioningType(object): 

2579 """Specifies the type of time partitioning to perform.""" 

2580 

2581 DAY = "DAY" 

2582 """str: Generates one partition per day.""" 

2583 

2584 HOUR = "HOUR" 

2585 """str: Generates one partition per hour.""" 

2586 

2587 MONTH = "MONTH" 

2588 """str: Generates one partition per month.""" 

2589 

2590 YEAR = "YEAR" 

2591 """str: Generates one partition per year.""" 

2592 

2593 

2594class TimePartitioning(object): 

2595 """Configures time-based partitioning for a table. 

2596 

2597 Args: 

2598 type_ (Optional[google.cloud.bigquery.table.TimePartitioningType]): 

2599 Specifies the type of time partitioning to perform. Defaults to 

2600 :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`. 

2601 

2602 Supported values are: 

2603 

2604 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.HOUR` 

2605 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY` 

2606 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.MONTH` 

2607 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.YEAR` 

2608 

2609 field (Optional[str]): 

2610 If set, the table is partitioned by this field. If not set, the 

2611 table is partitioned by pseudo column ``_PARTITIONTIME``. The field 

2612 must be a top-level ``TIMESTAMP``, ``DATETIME``, or ``DATE`` 

2613 field. Its mode must be ``NULLABLE`` or ``REQUIRED``. 

2614 

2615 See the `time-unit column-partitioned tables guide 

2616 <https://cloud.google.com/bigquery/docs/creating-column-partitions>`_ 

2617 in the BigQuery documentation. 

2618 expiration_ms(Optional[int]): 

2619 Number of milliseconds for which to keep the storage for a 

2620 partition. 

2621 require_partition_filter (Optional[bool]): 

2622 DEPRECATED: Use 

2623 :attr:`~google.cloud.bigquery.table.Table.require_partition_filter`, 

2624 instead. 

2625 """ 

2626 

2627 def __init__( 

2628 self, type_=None, field=None, expiration_ms=None, require_partition_filter=None 

2629 ) -> None: 

2630 self._properties: Dict[str, Any] = {} 

2631 if type_ is None: 

2632 self.type_ = TimePartitioningType.DAY 

2633 else: 

2634 self.type_ = type_ 

2635 if field is not None: 

2636 self.field = field 

2637 if expiration_ms is not None: 

2638 self.expiration_ms = expiration_ms 

2639 if require_partition_filter is not None: 

2640 self.require_partition_filter = require_partition_filter 

2641 

2642 @property 

2643 def type_(self): 

2644 """google.cloud.bigquery.table.TimePartitioningType: The type of time 

2645 partitioning to use. 

2646 """ 

2647 return self._properties.get("type") 

2648 

2649 @type_.setter 

2650 def type_(self, value): 

2651 self._properties["type"] = value 

2652 

2653 @property 

2654 def field(self): 

2655 """str: Field in the table to use for partitioning""" 

2656 return self._properties.get("field") 

2657 

2658 @field.setter 

2659 def field(self, value): 

2660 self._properties["field"] = value 

2661 

2662 @property 

2663 def expiration_ms(self): 

2664 """int: Number of milliseconds to keep the storage for a partition.""" 

2665 return _helpers._int_or_none(self._properties.get("expirationMs")) 

2666 

2667 @expiration_ms.setter 

2668 def expiration_ms(self, value): 

2669 if value is not None: 

2670 # Allow explicitly setting the expiration to None. 

2671 value = str(value) 

2672 self._properties["expirationMs"] = value 

2673 

2674 @property 

2675 def require_partition_filter(self): 

2676 """bool: Specifies whether partition filters are required for queries 

2677 

2678 DEPRECATED: Use 

2679 :attr:`~google.cloud.bigquery.table.Table.require_partition_filter`, 

2680 instead. 

2681 """ 

2682 warnings.warn( 

2683 ( 

2684 "TimePartitioning.require_partition_filter will be removed in " 

2685 "future versions. Please use Table.require_partition_filter " 

2686 "instead." 

2687 ), 

2688 PendingDeprecationWarning, 

2689 stacklevel=2, 

2690 ) 

2691 return self._properties.get("requirePartitionFilter") 

2692 

2693 @require_partition_filter.setter 

2694 def require_partition_filter(self, value): 

2695 warnings.warn( 

2696 ( 

2697 "TimePartitioning.require_partition_filter will be removed in " 

2698 "future versions. Please use Table.require_partition_filter " 

2699 "instead." 

2700 ), 

2701 PendingDeprecationWarning, 

2702 stacklevel=2, 

2703 ) 

2704 self._properties["requirePartitionFilter"] = value 

2705 

2706 @classmethod 

2707 def from_api_repr(cls, api_repr: dict) -> "TimePartitioning": 

2708 """Return a :class:`TimePartitioning` object deserialized from a dict. 

2709 

2710 This method creates a new ``TimePartitioning`` instance that points to 

2711 the ``api_repr`` parameter as its internal properties dict. This means 

2712 that when a ``TimePartitioning`` instance is stored as a property of 

2713 another object, any changes made at the higher level will also appear 

2714 here:: 

2715 

2716 >>> time_partitioning = TimePartitioning() 

2717 >>> table.time_partitioning = time_partitioning 

2718 >>> table.time_partitioning.field = 'timecolumn' 

2719 >>> time_partitioning.field 

2720 'timecolumn' 

2721 

2722 Args: 

2723 api_repr (Mapping[str, str]): 

2724 The serialized representation of the TimePartitioning, such as 

2725 what is output by :meth:`to_api_repr`. 

2726 

2727 Returns: 

2728 google.cloud.bigquery.table.TimePartitioning: 

2729 The ``TimePartitioning`` object. 

2730 """ 

2731 instance = cls() 

2732 instance._properties = api_repr 

2733 return instance 

2734 

2735 def to_api_repr(self) -> dict: 

2736 """Return a dictionary representing this object. 

2737 

2738 This method returns the properties dict of the ``TimePartitioning`` 

2739 instance rather than making a copy. This means that when a 

2740 ``TimePartitioning`` instance is stored as a property of another 

2741 object, any changes made at the higher level will also appear here. 

2742 

2743 Returns: 

2744 dict: 

2745 A dictionary representing the TimePartitioning object in 

2746 serialized form. 

2747 """ 

2748 return self._properties 

2749 

2750 def _key(self): 

2751 # because we are only "renaming" top level keys shallow copy is sufficient here. 

2752 properties = self._properties.copy() 

2753 # calling repr for non built-in type objects. 

2754 properties["type_"] = repr(properties.pop("type")) 

2755 if "field" in properties: 

2756 # calling repr for non built-in type objects. 

2757 properties["field"] = repr(properties["field"]) 

2758 if "requirePartitionFilter" in properties: 

2759 properties["require_partition_filter"] = properties.pop( 

2760 "requirePartitionFilter" 

2761 ) 

2762 if "expirationMs" in properties: 

2763 properties["expiration_ms"] = properties.pop("expirationMs") 

2764 return tuple(sorted(properties.items())) 

2765 

2766 def __eq__(self, other): 

2767 if not isinstance(other, TimePartitioning): 

2768 return NotImplemented 

2769 return self._key() == other._key() 

2770 

2771 def __ne__(self, other): 

2772 return not self == other 

2773 

2774 def __hash__(self): 

2775 return hash(self._key()) 

2776 

2777 def __repr__(self): 

2778 key_vals = ["{}={}".format(key, val) for key, val in self._key()] 

2779 return "TimePartitioning({})".format(",".join(key_vals)) 

2780 

2781 

2782def _item_to_row(iterator, resource): 

2783 """Convert a JSON row to the native object. 

2784 

2785 .. note:: 

2786 

2787 This assumes that the ``schema`` attribute has been 

2788 added to the iterator after being created, which 

2789 should be done by the caller. 

2790 

2791 Args: 

2792 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use. 

2793 resource (Dict): An item to be converted to a row. 

2794 

2795 Returns: 

2796 google.cloud.bigquery.table.Row: The next row in the page. 

2797 """ 

2798 return Row( 

2799 _helpers._row_tuple_from_json(resource, iterator.schema), 

2800 iterator._field_to_index, 

2801 ) 

2802 

2803 

2804def _row_iterator_page_columns(schema, response): 

2805 """Make a generator of all the columns in a page from tabledata.list. 

2806 

2807 This enables creating a :class:`pandas.DataFrame` and other 

2808 column-oriented data structures such as :class:`pyarrow.RecordBatch` 

2809 """ 

2810 columns = [] 

2811 rows = response.get("rows", []) 

2812 

2813 def get_column_data(field_index, field): 

2814 for row in rows: 

2815 yield _helpers._field_from_json(row["f"][field_index]["v"], field) 

2816 

2817 for field_index, field in enumerate(schema): 

2818 columns.append(get_column_data(field_index, field)) 

2819 

2820 return columns 

2821 

2822 

2823# pylint: disable=unused-argument 

2824def _rows_page_start(iterator, page, response): 

2825 """Grab total rows when :class:`~google.cloud.iterator.Page` starts. 

2826 

2827 Args: 

2828 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use. 

2829 page (google.api_core.page_iterator.Page): The page that was just created. 

2830 response (Dict): The JSON API response for a page of rows in a table. 

2831 """ 

2832 # Make a (lazy) copy of the page in column-oriented format for use in data 

2833 # science packages. 

2834 page._columns = _row_iterator_page_columns(iterator._schema, response) 

2835 

2836 total_rows = response.get("totalRows") 

2837 if total_rows is not None: 

2838 total_rows = int(total_rows) 

2839 iterator._total_rows = total_rows 

2840 

2841 

2842# pylint: enable=unused-argument 

2843 

2844 

2845def _table_arg_to_table_ref(value, default_project=None) -> TableReference: 

2846 """Helper to convert a string or Table to TableReference. 

2847 

2848 This function keeps TableReference and other kinds of objects unchanged. 

2849 """ 

2850 if isinstance(value, str): 

2851 value = TableReference.from_string(value, default_project=default_project) 

2852 if isinstance(value, (Table, TableListItem)): 

2853 value = value.reference 

2854 return value 

2855 

2856 

2857def _table_arg_to_table(value, default_project=None) -> Table: 

2858 """Helper to convert a string or TableReference to a Table. 

2859 

2860 This function keeps Table and other kinds of objects unchanged. 

2861 """ 

2862 if isinstance(value, str): 

2863 value = TableReference.from_string(value, default_project=default_project) 

2864 if isinstance(value, TableReference): 

2865 value = Table(value) 

2866 if isinstance(value, TableListItem): 

2867 newvalue = Table(value.reference) 

2868 newvalue._properties = value._properties 

2869 value = newvalue 

2870 

2871 return value