Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/bigquery/table.py: 37%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

15"""Define API Tables."""

17from __future__ import absolute_import

19import copy

20import datetime

21import functools

22import operator

23import typing

24from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union, Sequence

26import warnings

28try:

29 import pandas # type: ignore

30except ImportError:

31 pandas = None

33try:

34 import pyarrow # type: ignore

35except ImportError:

36 pyarrow = None

38try:

39 import db_dtypes # type: ignore

40except ImportError:

41 db_dtypes = None

43try:

44 import geopandas # type: ignore

45except ImportError:

46 geopandas = None

47finally:

48 _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326"

50try:

51 import shapely # type: ignore

52 from shapely import wkt # type: ignore

53except ImportError:

54 shapely = None

55else:

56 _read_wkt = wkt.loads

58import google.api_core.exceptions

59from google.api_core.page_iterator import HTTPIterator

61import google.cloud._helpers # type: ignore

62from google.cloud.bigquery import _helpers

63from google.cloud.bigquery import _pandas_helpers

64from google.cloud.bigquery import _versions_helpers

65from google.cloud.bigquery import exceptions as bq_exceptions

66from google.cloud.bigquery._tqdm_helpers import get_progress_bar

67from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration

68from google.cloud.bigquery.enums import DefaultPandasDTypes

69from google.cloud.bigquery.external_config import ExternalConfig

70from google.cloud.bigquery import schema as _schema

71from google.cloud.bigquery.schema import _build_schema_resource

72from google.cloud.bigquery.schema import _parse_schema_resource

73from google.cloud.bigquery.schema import _to_schema_fields

74from google.cloud.bigquery import external_config

76if typing.TYPE_CHECKING: # pragma: NO COVER

77 # Unconditionally import optional dependencies again to tell pytype that

78 # they are not None, avoiding false "no attribute" errors.

79 import pandas

80 import pyarrow

81 import geopandas # type: ignore

82 from google.cloud import bigquery_storage # type: ignore

83 from google.cloud.bigquery.dataset import DatasetReference

86_NO_GEOPANDAS_ERROR = (

87 "The geopandas library is not installed, please install "

88 "geopandas to use the to_geodataframe() function."

89)

90_NO_PYARROW_ERROR = (

91 "The pyarrow library is not installed, please install "

92 "pyarrow to use the to_arrow() function."

93)

94_NO_SHAPELY_ERROR = (

95 "The shapely library is not installed, please install "

96 "shapely to use the geography_as_object option."

97)

99_TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"'

100

101_NO_SUPPORTED_DTYPE = (

102 "The dtype cannot to be converted to a pandas ExtensionArray "

103 "because the necessary `__from_arrow__` attribute is missing."

104)

105

106_RANGE_PYARROW_WARNING = (

107 "Unable to represent RANGE schema as struct using pandas ArrowDtype. Using "

108 "`object` instead. To use ArrowDtype, use pandas >= 1.5 and "

109 "pyarrow >= 10.0.1."

110)

111

112# How many of the total rows need to be downloaded already for us to skip

113# calling the BQ Storage API?

114#

115# In microbenchmarks on 2024-05-21, I (tswast@) measure that at about 2 MB of

116# remaining results, it's faster to use the BQ Storage Read API to download

117# the results than use jobs.getQueryResults. Since we don't have a good way to

118# know the remaining bytes, we estimate by remaining number of rows.

119#

120# Except when rows themselves are larger, I observe that the a single page of

121# results will be around 10 MB. Therefore, the proportion of rows already

122# downloaded should be 10 (first page) / 12 (all results) or less for it to be

123# worth it to make a call to jobs.getQueryResults.

124ALMOST_COMPLETELY_CACHED_RATIO = 0.833333

125

126

127def _reference_getter(table):

128 """A :class:`~google.cloud.bigquery.table.TableReference` pointing to

129 this table.

130

131 Returns:

132 google.cloud.bigquery.table.TableReference: pointer to this table.

133 """

134 from google.cloud.bigquery import dataset

135

136 dataset_ref = dataset.DatasetReference(table.project, table.dataset_id)

137 return TableReference(dataset_ref, table.table_id)

138

139

140def _view_use_legacy_sql_getter(

141 table: Union["Table", "TableListItem"]

142) -> Optional[bool]:

143 """bool: Specifies whether to execute the view with Legacy or Standard SQL.

144

145 This boolean specifies whether to execute the view with Legacy SQL

146 (:data:`True`) or Standard SQL (:data:`False`). The client side default is

147 :data:`False`. The server-side default is :data:`True`. If this table is

148 not a view, :data:`None` is returned.

149

150 Raises:

151 ValueError: For invalid value types.

152 """

153

154 view: Optional[Dict[str, Any]] = table._properties.get("view")

155 if view is not None:

156 # The server-side default for useLegacySql is True.

157 return view.get("useLegacySql", True) if view is not None else True

158 # In some cases, such as in a table list no view object is present, but the

159 # resource still represents a view. Use the type as a fallback.

160 if table.table_type == "VIEW":

161 # The server-side default for useLegacySql is True.

162 return True

163 return None # explicit return statement to appease mypy

164

165

166class _TableBase:

167 """Base class for Table-related classes with common functionality."""

168

169 _PROPERTY_TO_API_FIELD: Dict[str, Union[str, List[str]]] = {

170 "dataset_id": ["tableReference", "datasetId"],

171 "project": ["tableReference", "projectId"],

172 "table_id": ["tableReference", "tableId"],

173 }

174

175 def __init__(self):

176 self._properties = {}

177

178 @property

179 def project(self) -> str:

180 """Project bound to the table."""

181 return _helpers._get_sub_prop(

182 self._properties, self._PROPERTY_TO_API_FIELD["project"]

183 )

184

185 @property

186 def dataset_id(self) -> str:

187 """ID of dataset containing the table."""

188 return _helpers._get_sub_prop(

189 self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"]

190 )

191

192 @property

193 def table_id(self) -> str:

194 """The table ID."""

195 return _helpers._get_sub_prop(

196 self._properties, self._PROPERTY_TO_API_FIELD["table_id"]

197 )

198

199 @property

200 def path(self) -> str:

201 """URL path for the table's APIs."""

202 return (

203 f"/projects/{self.project}/datasets/{self.dataset_id}"

204 f"/tables/{self.table_id}"

205 )

206

207 def __eq__(self, other):

208 if isinstance(other, _TableBase):

209 return (

210 self.project == other.project

211 and self.dataset_id == other.dataset_id

212 and self.table_id == other.table_id

213 )

214 else:

215 return NotImplemented

216

217 def __hash__(self):

218 return hash((self.project, self.dataset_id, self.table_id))

219

220

221class TableReference(_TableBase):

222 """TableReferences are pointers to tables.

223

224 See

225 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#tablereference

226

227 Args:

228 dataset_ref: A pointer to the dataset

229 table_id: The ID of the table

230 """

231

232 _PROPERTY_TO_API_FIELD = {

233 "dataset_id": "datasetId",

234 "project": "projectId",

235 "table_id": "tableId",

236 }

237

238 def __init__(self, dataset_ref: "DatasetReference", table_id: str):

239 self._properties = {}

240

241 _helpers._set_sub_prop(

242 self._properties,

243 self._PROPERTY_TO_API_FIELD["project"],

244 dataset_ref.project,

245 )

246 _helpers._set_sub_prop(

247 self._properties,

248 self._PROPERTY_TO_API_FIELD["dataset_id"],

249 dataset_ref.dataset_id,

250 )

251 _helpers._set_sub_prop(

252 self._properties,

253 self._PROPERTY_TO_API_FIELD["table_id"],

254 table_id,

255 )

256

257 @classmethod

258 def from_string(

259 cls, table_id: str, default_project: Optional[str] = None

260 ) -> "TableReference":

261 """Construct a table reference from table ID string.

262

263 Args:

264 table_id (str):

265 A table ID in standard SQL format. If ``default_project``

266 is not specified, this must included a project ID, dataset

267 ID, and table ID, each separated by ``.``.

268 default_project (Optional[str]):

269 The project ID to use when ``table_id`` does not

270 include a project ID.

271

272 Returns:

273 TableReference: Table reference parsed from ``table_id``.

274

275 Examples:

276 >>> TableReference.from_string('my-project.mydataset.mytable')

277 TableRef...(DatasetRef...('my-project', 'mydataset'), 'mytable')

278

279 Raises:

280 ValueError:

281 If ``table_id`` is not a fully-qualified table ID in

282 standard SQL format.

283 """

284 from google.cloud.bigquery.dataset import DatasetReference

285

286 (

287 output_project_id,

288 output_dataset_id,

289 output_table_id,

290 ) = _helpers._parse_3_part_id(

291 table_id, default_project=default_project, property_name="table_id"

292 )

293

294 return cls(

295 DatasetReference(output_project_id, output_dataset_id), output_table_id

296 )

297

298 @classmethod

299 def from_api_repr(cls, resource: dict) -> "TableReference":

300 """Factory: construct a table reference given its API representation

301

302 Args:

303 resource (Dict[str, object]):

304 Table reference representation returned from the API

305

306 Returns:

307 google.cloud.bigquery.table.TableReference:

308 Table reference parsed from ``resource``.

309 """

310 from google.cloud.bigquery.dataset import DatasetReference

311

312 project = resource["projectId"]

313 dataset_id = resource["datasetId"]

314 table_id = resource["tableId"]

315

316 return cls(DatasetReference(project, dataset_id), table_id)

317

318 def to_api_repr(self) -> dict:

319 """Construct the API resource representation of this table reference.

320

321 Returns:

322 Dict[str, object]: Table reference represented as an API resource

323 """

324 return copy.deepcopy(self._properties)

325

326 def to_bqstorage(self) -> str:

327 """Construct a BigQuery Storage API representation of this table.

328

329 Install the ``google-cloud-bigquery-storage`` package to use this

330 feature.

331

332 If the ``table_id`` contains a partition identifier (e.g.

333 ``my_table$201812``) or a snapshot identifier (e.g.

334 ``mytable@1234567890``), it is ignored. Use

335 :class:`google.cloud.bigquery_storage.types.ReadSession.TableReadOptions`

336 to filter rows by partition. Use

337 :class:`google.cloud.bigquery_storage.types.ReadSession.TableModifiers`

338 to select a specific snapshot to read from.

339

340 Returns:

341 str: A reference to this table in the BigQuery Storage API.

342 """

343

344 table_id, _, _ = self.table_id.partition("@")

345 table_id, _, _ = table_id.partition("$")

346

347 table_ref = (

348 f"projects/{self.project}/datasets/{self.dataset_id}/tables/{table_id}"

349 )

350 return table_ref

351

352 def __str__(self):

353 return f"{self.project}.{self.dataset_id}.{self.table_id}"

354

355 def __repr__(self):

356 from google.cloud.bigquery.dataset import DatasetReference

357

358 dataset_ref = DatasetReference(self.project, self.dataset_id)

359 return f"TableReference({dataset_ref!r}, '{self.table_id}')"

360

361

362class Table(_TableBase):

363 """Tables represent a set of rows whose values correspond to a schema.

364

365 See

366 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource-table

367

368 Args:

369 table_ref (Union[google.cloud.bigquery.table.TableReference, str]):

370 A pointer to a table. If ``table_ref`` is a string, it must

371 included a project ID, dataset ID, and table ID, each separated

372 by ``.``.

373 schema (Optional[Sequence[Union[ \

374 :class:`~google.cloud.bigquery.schema.SchemaField`, \

375 Mapping[str, Any] \

376 ]]]):

377 The table's schema. If any item is a mapping, its content must be

378 compatible with

379 :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.

380 """

381

382 _PROPERTY_TO_API_FIELD: Dict[str, Any] = {

383 **_TableBase._PROPERTY_TO_API_FIELD,

384 "biglake_configuration": "biglakeConfiguration",

385 "clustering_fields": "clustering",

386 "created": "creationTime",

387 "description": "description",

388 "encryption_configuration": "encryptionConfiguration",

389 "etag": "etag",

390 "expires": "expirationTime",

391 "external_data_configuration": "externalDataConfiguration",

392 "friendly_name": "friendlyName",

393 "full_table_id": "id",

394 "labels": "labels",

395 "location": "location",

396 "modified": "lastModifiedTime",

397 "mview_enable_refresh": "materializedView",

398 "mview_last_refresh_time": ["materializedView", "lastRefreshTime"],

399 "mview_query": "materializedView",

400 "mview_refresh_interval": "materializedView",

401 "mview_allow_non_incremental_definition": "materializedView",

402 "num_bytes": "numBytes",

403 "num_rows": "numRows",

404 "partition_expiration": "timePartitioning",

405 "partitioning_type": "timePartitioning",

406 "range_partitioning": "rangePartitioning",

407 "time_partitioning": "timePartitioning",

408 "schema": ["schema", "fields"],

409 "snapshot_definition": "snapshotDefinition",

410 "clone_definition": "cloneDefinition",

411 "streaming_buffer": "streamingBuffer",

412 "self_link": "selfLink",

413 "type": "type",

414 "view_use_legacy_sql": "view",

415 "view_query": "view",

416 "require_partition_filter": "requirePartitionFilter",

417 "table_constraints": "tableConstraints",

418 "max_staleness": "maxStaleness",

419 "resource_tags": "resourceTags",

420 "external_catalog_table_options": "externalCatalogTableOptions",

421 "foreign_type_info": ["schema", "foreignTypeInfo"],

422 }

423

424 def __init__(self, table_ref, schema=None) -> None:

425 table_ref = _table_arg_to_table_ref(table_ref)

426 self._properties: Dict[str, Any] = {

427 "tableReference": table_ref.to_api_repr(),

428 "labels": {},

429 }

430 # Let the @property do validation.

431 if schema is not None:

432 self.schema = schema

433

434 reference = property(_reference_getter)

435

436 @property

437 def biglake_configuration(self):

438 """google.cloud.bigquery.table.BigLakeConfiguration: Configuration

439 for managed tables for Apache Iceberg.

440

441 See https://cloud.google.com/bigquery/docs/iceberg-tables for more information.

442 """

443 prop = self._properties.get(

444 self._PROPERTY_TO_API_FIELD["biglake_configuration"]

445 )

446 if prop is not None:

447 prop = BigLakeConfiguration.from_api_repr(prop)

448 return prop

449

450 @biglake_configuration.setter

451 def biglake_configuration(self, value):

452 api_repr = value

453 if value is not None:

454 api_repr = value.to_api_repr()

455 self._properties[

456 self._PROPERTY_TO_API_FIELD["biglake_configuration"]

457 ] = api_repr

458

459 @property

460 def require_partition_filter(self):

461 """bool: If set to true, queries over the partitioned table require a

462 partition filter that can be used for partition elimination to be

463 specified.

464 """

465 return self._properties.get(

466 self._PROPERTY_TO_API_FIELD["require_partition_filter"]

467 )

468

469 @require_partition_filter.setter

470 def require_partition_filter(self, value):

471 self._properties[

472 self._PROPERTY_TO_API_FIELD["require_partition_filter"]

473 ] = value

474

475 @property

476 def schema(self):

477 """Sequence[Union[ \

478 :class:`~google.cloud.bigquery.schema.SchemaField`, \

479 Mapping[str, Any] \

480 ]]:

481 Table's schema.

482

483 Raises:

484 Exception:

485 If ``schema`` is not a sequence, or if any item in the sequence

486 is not a :class:`~google.cloud.bigquery.schema.SchemaField`

487 instance or a compatible mapping representation of the field.

488

489 .. Note::

490 If you are referencing a schema for an external catalog table such

491 as a Hive table, it will also be necessary to populate the foreign_type_info

492 attribute. This is not necessary if defining the schema for a BigQuery table.

493

494 For details, see:

495 https://cloud.google.com/bigquery/docs/external-tables

496 https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets

497

498 """

499 prop = _helpers._get_sub_prop(

500 self._properties, self._PROPERTY_TO_API_FIELD["schema"]

501 )

502 if not prop:

503 return []

504 else:

505 return _parse_schema_resource(prop)

506

507 @schema.setter

508 def schema(self, value):

509 api_field = self._PROPERTY_TO_API_FIELD["schema"]

510

511 if value is None:

512 _helpers._set_sub_prop(

513 self._properties,

514 api_field,

515 None,

516 )

517 elif isinstance(value, Sequence):

518 value = _to_schema_fields(value)

519 value = _build_schema_resource(value)

520 _helpers._set_sub_prop(

521 self._properties,

522 api_field,

523 value,

524 )

525 else:

526 raise TypeError("Schema must be a Sequence (e.g. a list) or None.")

527

528 @property

529 def labels(self):

530 """Dict[str, str]: Labels for the table.

531

532 This method always returns a dict. To change a table's labels,

533 modify the dict, then call ``Client.update_table``. To delete a

534 label, set its value to :data:`None` before updating.

535

536 Raises:

537 ValueError: If ``value`` type is invalid.

538 """

539 return self._properties.setdefault(self._PROPERTY_TO_API_FIELD["labels"], {})

540

541 @labels.setter

542 def labels(self, value):

543 if not isinstance(value, dict):

544 raise ValueError("Pass a dict")

545 self._properties[self._PROPERTY_TO_API_FIELD["labels"]] = value

546

547 @property

548 def encryption_configuration(self):

549 """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom

550 encryption configuration for the table.

551

552 Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None`

553 if using default encryption.

554

555 See `protecting data with Cloud KMS keys

556 <https://cloud.google.com/bigquery/docs/customer-managed-encryption>`_

557 in the BigQuery documentation.

558 """

559 prop = self._properties.get(

560 self._PROPERTY_TO_API_FIELD["encryption_configuration"]

561 )

562 if prop is not None:

563 prop = EncryptionConfiguration.from_api_repr(prop)

564 return prop

565

566 @encryption_configuration.setter

567 def encryption_configuration(self, value):

568 api_repr = value

569 if value is not None:

570 api_repr = value.to_api_repr()

571 self._properties[

572 self._PROPERTY_TO_API_FIELD["encryption_configuration"]

573 ] = api_repr

574

575 @property

576 def created(self):

577 """Union[datetime.datetime, None]: Datetime at which the table was

578 created (:data:`None` until set from the server).

579 """

580 creation_time = self._properties.get(self._PROPERTY_TO_API_FIELD["created"])

581 if creation_time is not None:

582 # creation_time will be in milliseconds.

583 return google.cloud._helpers._datetime_from_microseconds(

584 1000.0 * float(creation_time)

585 )

586

587 @property

588 def etag(self):

589 """Union[str, None]: ETag for the table resource (:data:`None` until

590 set from the server).

591 """

592 return self._properties.get(self._PROPERTY_TO_API_FIELD["etag"])

593

594 @property

595 def modified(self):

596 """Union[datetime.datetime, None]: Datetime at which the table was last

597 modified (:data:`None` until set from the server).

598 """

599 modified_time = self._properties.get(self._PROPERTY_TO_API_FIELD["modified"])

600 if modified_time is not None:

601 # modified_time will be in milliseconds.

602 return google.cloud._helpers._datetime_from_microseconds(

603 1000.0 * float(modified_time)

604 )

605

606 @property

607 def num_bytes(self):

608 """Union[int, None]: The size of the table in bytes (:data:`None` until

609 set from the server).

610 """

611 return _helpers._int_or_none(

612 self._properties.get(self._PROPERTY_TO_API_FIELD["num_bytes"])

613 )

614

615 @property

616 def num_rows(self):

617 """Union[int, None]: The number of rows in the table (:data:`None`

618 until set from the server).

619 """

620 return _helpers._int_or_none(

621 self._properties.get(self._PROPERTY_TO_API_FIELD["num_rows"])

622 )

623

624 @property

625 def self_link(self):

626 """Union[str, None]: URL for the table resource (:data:`None` until set

627 from the server).

628 """

629 return self._properties.get(self._PROPERTY_TO_API_FIELD["self_link"])

630

631 @property

632 def full_table_id(self):

633 """Union[str, None]: ID for the table (:data:`None` until set from the

634 server).

635

636 In the format ``project-id:dataset_id.table_id``.

637 """

638 return self._properties.get(self._PROPERTY_TO_API_FIELD["full_table_id"])

639

640 @property

641 def table_type(self):

642 """Union[str, None]: The type of the table (:data:`None` until set from

643 the server).

644

645 Possible values are ``'TABLE'``, ``'VIEW'``, ``'MATERIALIZED_VIEW'`` or

646 ``'EXTERNAL'``.

647 """

648 return self._properties.get(self._PROPERTY_TO_API_FIELD["type"])

649

650 @property

651 def range_partitioning(self):

652 """Optional[google.cloud.bigquery.table.RangePartitioning]:

653 Configures range-based partitioning for a table.

654

655 .. note::

656 **Beta**. The integer range partitioning feature is in a

657 pre-release state and might change or have limited support.

658

659 Only specify at most one of

660 :attr:`~google.cloud.bigquery.table.Table.time_partitioning` or

661 :attr:`~google.cloud.bigquery.table.Table.range_partitioning`.

662

663 Raises:

664 ValueError:

665 If the value is not

666 :class:`~google.cloud.bigquery.table.RangePartitioning` or

667 :data:`None`.

668 """

669 resource = self._properties.get(

670 self._PROPERTY_TO_API_FIELD["range_partitioning"]

671 )

672 if resource is not None:

673 return RangePartitioning(_properties=resource)

674

675 @range_partitioning.setter

676 def range_partitioning(self, value):

677 resource = value

678 if isinstance(value, RangePartitioning):

679 resource = value._properties

680 elif value is not None:

681 raise ValueError(

682 "Expected value to be RangePartitioning or None, got {}.".format(value)

683 )

684 self._properties[self._PROPERTY_TO_API_FIELD["range_partitioning"]] = resource

685

686 @property

687 def time_partitioning(self):

688 """Optional[google.cloud.bigquery.table.TimePartitioning]: Configures time-based

689 partitioning for a table.

690

691 Only specify at most one of

692 :attr:`~google.cloud.bigquery.table.Table.time_partitioning` or

693 :attr:`~google.cloud.bigquery.table.Table.range_partitioning`.

694

695 Raises:

696 ValueError:

697 If the value is not

698 :class:`~google.cloud.bigquery.table.TimePartitioning` or

699 :data:`None`.

700 """

701 prop = self._properties.get(self._PROPERTY_TO_API_FIELD["time_partitioning"])

702 if prop is not None:

703 return TimePartitioning.from_api_repr(prop)

704

705 @time_partitioning.setter

706 def time_partitioning(self, value):

707 api_repr = value

708 if isinstance(value, TimePartitioning):

709 api_repr = value.to_api_repr()

710 elif value is not None:

711 raise ValueError(

712 "value must be google.cloud.bigquery.table.TimePartitioning " "or None"

713 )

714 self._properties[self._PROPERTY_TO_API_FIELD["time_partitioning"]] = api_repr

715

716 @property

717 def partitioning_type(self):

718 """Union[str, None]: Time partitioning of the table if it is

719 partitioned (Defaults to :data:`None`).

720

721 """

722 warnings.warn(

723 "This method will be deprecated in future versions. Please use "

724 "Table.time_partitioning.type_ instead.",

725 PendingDeprecationWarning,

726 stacklevel=2,

727 )

728 if self.time_partitioning is not None:

729 return self.time_partitioning.type_

730

731 @partitioning_type.setter

732 def partitioning_type(self, value):

733 warnings.warn(

734 "This method will be deprecated in future versions. Please use "

735 "Table.time_partitioning.type_ instead.",

736 PendingDeprecationWarning,

737 stacklevel=2,

738 )

739 api_field = self._PROPERTY_TO_API_FIELD["partitioning_type"]

740 if self.time_partitioning is None:

741 self._properties[api_field] = {}

742 self._properties[api_field]["type"] = value

743

744 @property

745 def partition_expiration(self):

746 """Union[int, None]: Expiration time in milliseconds for a partition.

747

748 If :attr:`partition_expiration` is set and :attr:`type_` is

749 not set, :attr:`type_` will default to

750 :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`.

751 """

752 warnings.warn(

753 "This method will be deprecated in future versions. Please use "

754 "Table.time_partitioning.expiration_ms instead.",

755 PendingDeprecationWarning,

756 stacklevel=2,

757 )

758 if self.time_partitioning is not None:

759 return self.time_partitioning.expiration_ms

760

761 @partition_expiration.setter

762 def partition_expiration(self, value):

763 warnings.warn(

764 "This method will be deprecated in future versions. Please use "

765 "Table.time_partitioning.expiration_ms instead.",

766 PendingDeprecationWarning,

767 stacklevel=2,

768 )

769 api_field = self._PROPERTY_TO_API_FIELD["partition_expiration"]

770

771 if self.time_partitioning is None:

772 self._properties[api_field] = {"type": TimePartitioningType.DAY}

773

774 if value is None:

775 self._properties[api_field]["expirationMs"] = None

776 else:

777 self._properties[api_field]["expirationMs"] = str(value)

778

779 @property

780 def clustering_fields(self):

781 """Union[List[str], None]: Fields defining clustering for the table

782

783 (Defaults to :data:`None`).

784

785 Clustering fields are immutable after table creation.

786

787 .. note::

788

789 BigQuery supports clustering for both partitioned and

790 non-partitioned tables.

791 """

792 prop = self._properties.get(self._PROPERTY_TO_API_FIELD["clustering_fields"])

793 if prop is not None:

794 return list(prop.get("fields", ()))

795

796 @clustering_fields.setter

797 def clustering_fields(self, value):

798 """Union[List[str], None]: Fields defining clustering for the table

799

800 (Defaults to :data:`None`).

801 """

802 api_field = self._PROPERTY_TO_API_FIELD["clustering_fields"]

803

804 if value is not None:

805 prop = self._properties.setdefault(api_field, {})

806 prop["fields"] = value

807 else:

808 # In order to allow unsetting clustering fields completely, we explicitly

809 # set this property to None (as oposed to merely removing the key).

810 self._properties[api_field] = None

811

812 @property

813 def description(self):

814 """Union[str, None]: Description of the table (defaults to

815 :data:`None`).

816

817 Raises:

818 ValueError: For invalid value types.

819 """

820 return self._properties.get(self._PROPERTY_TO_API_FIELD["description"])

821

822 @description.setter

823 def description(self, value):

824 if not isinstance(value, str) and value is not None:

825 raise ValueError("Pass a string, or None")

826 self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value

827

828 @property

829 def expires(self):

830 """Union[datetime.datetime, None]: Datetime at which the table will be

831 deleted.

832

833 Raises:

834 ValueError: For invalid value types.

835 """

836 expiration_time = self._properties.get(self._PROPERTY_TO_API_FIELD["expires"])

837 if expiration_time is not None:

838 # expiration_time will be in milliseconds.

839 return google.cloud._helpers._datetime_from_microseconds(

840 1000.0 * float(expiration_time)

841 )

842

843 @expires.setter

844 def expires(self, value):

845 if not isinstance(value, datetime.datetime) and value is not None:

846 raise ValueError("Pass a datetime, or None")

847 value_ms = google.cloud._helpers._millis_from_datetime(value)

848 self._properties[

849 self._PROPERTY_TO_API_FIELD["expires"]

850 ] = _helpers._str_or_none(value_ms)

851

852 @property

853 def friendly_name(self):

854 """Union[str, None]: Title of the table (defaults to :data:`None`).

855

856 Raises:

857 ValueError: For invalid value types.

858 """

859 return self._properties.get(self._PROPERTY_TO_API_FIELD["friendly_name"])

860

861 @friendly_name.setter

862 def friendly_name(self, value):

863 if not isinstance(value, str) and value is not None:

864 raise ValueError("Pass a string, or None")

865 self._properties[self._PROPERTY_TO_API_FIELD["friendly_name"]] = value

866

867 @property

868 def location(self):

869 """Union[str, None]: Location in which the table is hosted

870

871 Defaults to :data:`None`.

872 """

873 return self._properties.get(self._PROPERTY_TO_API_FIELD["location"])

874

875 @property

876 def view_query(self):

877 """Union[str, None]: SQL query defining the table as a view (defaults

878 to :data:`None`).

879

880 By default, the query is treated as Standard SQL. To use Legacy

881 SQL, set :attr:`view_use_legacy_sql` to :data:`True`.

882

883 Raises:

884 ValueError: For invalid value types.

885 """

886 api_field = self._PROPERTY_TO_API_FIELD["view_query"]

887 return _helpers._get_sub_prop(self._properties, [api_field, "query"])

888

889 @view_query.setter

890 def view_query(self, value):

891 if not isinstance(value, str):

892 raise ValueError("Pass a string")

893

894 api_field = self._PROPERTY_TO_API_FIELD["view_query"]

895 _helpers._set_sub_prop(self._properties, [api_field, "query"], value)

896 view = self._properties[api_field]

897 # The service defaults useLegacySql to True, but this

898 # client uses Standard SQL by default.

899 if view.get("useLegacySql") is None:

900 view["useLegacySql"] = False

901

902 @view_query.deleter

903 def view_query(self):

904 """Delete SQL query defining the table as a view."""

905 self._properties.pop(self._PROPERTY_TO_API_FIELD["view_query"], None)

906

907 view_use_legacy_sql = property(_view_use_legacy_sql_getter)

908

909 @view_use_legacy_sql.setter # type: ignore # (redefinition from above)

910 def view_use_legacy_sql(self, value):

911 if not isinstance(value, bool):

912 raise ValueError("Pass a boolean")

913

914 api_field = self._PROPERTY_TO_API_FIELD["view_query"]

915 if self._properties.get(api_field) is None:

916 self._properties[api_field] = {}

917 self._properties[api_field]["useLegacySql"] = value

918

919 @property

920 def mview_query(self):

921 """Optional[str]: SQL query defining the table as a materialized

922 view (defaults to :data:`None`).

923 """

924 api_field = self._PROPERTY_TO_API_FIELD["mview_query"]

925 return _helpers._get_sub_prop(self._properties, [api_field, "query"])

926

927 @mview_query.setter

928 def mview_query(self, value):

929 api_field = self._PROPERTY_TO_API_FIELD["mview_query"]

930 _helpers._set_sub_prop(self._properties, [api_field, "query"], str(value))

931

932 @mview_query.deleter

933 def mview_query(self):

934 """Delete SQL query defining the table as a materialized view."""

935 self._properties.pop(self._PROPERTY_TO_API_FIELD["mview_query"], None)

936

937 @property

938 def mview_last_refresh_time(self):

939 """Optional[datetime.datetime]: Datetime at which the materialized view was last

940 refreshed (:data:`None` until set from the server).

941 """

942 refresh_time = _helpers._get_sub_prop(

943 self._properties, self._PROPERTY_TO_API_FIELD["mview_last_refresh_time"]

944 )

945 if refresh_time is not None:

946 # refresh_time will be in milliseconds.

947 return google.cloud._helpers._datetime_from_microseconds(

948 1000 * int(refresh_time)

949 )

950

951 @property

952 def mview_enable_refresh(self):

953 """Optional[bool]: Enable automatic refresh of the materialized view

954 when the base table is updated. The default value is :data:`True`.

955 """

956 api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"]

957 return _helpers._get_sub_prop(self._properties, [api_field, "enableRefresh"])

958

959 @mview_enable_refresh.setter

960 def mview_enable_refresh(self, value):

961 api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"]

962 return _helpers._set_sub_prop(

963 self._properties, [api_field, "enableRefresh"], value

964 )

965

966 @property

967 def mview_refresh_interval(self):

968 """Optional[datetime.timedelta]: The maximum frequency at which this

969 materialized view will be refreshed. The default value is 1800000

970 milliseconds (30 minutes).

971 """

972 api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"]

973 refresh_interval = _helpers._get_sub_prop(

974 self._properties, [api_field, "refreshIntervalMs"]

975 )

976 if refresh_interval is not None:

977 return datetime.timedelta(milliseconds=int(refresh_interval))

978

979 @mview_refresh_interval.setter

980 def mview_refresh_interval(self, value):

981 if value is None:

982 refresh_interval_ms = None

983 else:

984 refresh_interval_ms = str(value // datetime.timedelta(milliseconds=1))

985

986 api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"]

987 _helpers._set_sub_prop(

988 self._properties,

989 [api_field, "refreshIntervalMs"],

990 refresh_interval_ms,

991 )

992

993 @property

994 def mview_allow_non_incremental_definition(self):

995 """Optional[bool]: This option declares the intention to construct a

996 materialized view that isn't refreshed incrementally.

997 The default value is :data:`False`.

998 """

999 api_field = self._PROPERTY_TO_API_FIELD[

1000 "mview_allow_non_incremental_definition"

1001 ]

1002 return _helpers._get_sub_prop(

1003 self._properties, [api_field, "allowNonIncrementalDefinition"]

1004 )

1005

1006 @mview_allow_non_incremental_definition.setter

1007 def mview_allow_non_incremental_definition(self, value):

1008 api_field = self._PROPERTY_TO_API_FIELD[

1009 "mview_allow_non_incremental_definition"

1010 ]

1011 _helpers._set_sub_prop(

1012 self._properties, [api_field, "allowNonIncrementalDefinition"], value

1013 )

1014

1015 @property

1016 def streaming_buffer(self):

1017 """google.cloud.bigquery.StreamingBuffer: Information about a table's

1018 streaming buffer.

1019 """

1020 sb = self._properties.get(self._PROPERTY_TO_API_FIELD["streaming_buffer"])

1021 if sb is not None:

1022 return StreamingBuffer(sb)

1023

1024 @property

1025 def external_data_configuration(self):

1026 """Union[google.cloud.bigquery.ExternalConfig, None]: Configuration for

1027 an external data source (defaults to :data:`None`).

1028

1029 Raises:

1030 ValueError: For invalid value types.

1031 """

1032 prop = self._properties.get(

1033 self._PROPERTY_TO_API_FIELD["external_data_configuration"]

1034 )

1035 if prop is not None:

1036 prop = ExternalConfig.from_api_repr(prop)

1037 return prop

1038

1039 @external_data_configuration.setter

1040 def external_data_configuration(self, value):

1041 if not (value is None or isinstance(value, ExternalConfig)):

1042 raise ValueError("Pass an ExternalConfig or None")

1043 api_repr = value

1044 if value is not None:

1045 api_repr = value.to_api_repr()

1046 self._properties[

1047 self._PROPERTY_TO_API_FIELD["external_data_configuration"]

1048 ] = api_repr

1049

1050 @property

1051 def snapshot_definition(self) -> Optional["SnapshotDefinition"]:

1052 """Information about the snapshot. This value is set via snapshot creation.

1053

1054 See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.snapshot_definition

1055 """

1056 snapshot_info = self._properties.get(

1057 self._PROPERTY_TO_API_FIELD["snapshot_definition"]

1058 )

1059 if snapshot_info is not None:

1060 snapshot_info = SnapshotDefinition(snapshot_info)

1061 return snapshot_info

1062

1063 @property

1064 def clone_definition(self) -> Optional["CloneDefinition"]:

1065 """Information about the clone. This value is set via clone creation.

1066

1067 See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.clone_definition

1068 """

1069 clone_info = self._properties.get(

1070 self._PROPERTY_TO_API_FIELD["clone_definition"]

1071 )

1072 if clone_info is not None:

1073 clone_info = CloneDefinition(clone_info)

1074 return clone_info

1075

1076 @property

1077 def table_constraints(self) -> Optional["TableConstraints"]:

1078 """Tables Primary Key and Foreign Key information."""

1079 table_constraints = self._properties.get(

1080 self._PROPERTY_TO_API_FIELD["table_constraints"]

1081 )

1082 if table_constraints is not None:

1083 table_constraints = TableConstraints.from_api_repr(table_constraints)

1084 return table_constraints

1085

1086 @table_constraints.setter

1087 def table_constraints(self, value):

1088 """Tables Primary Key and Foreign Key information."""

1089 api_repr = value

1090 if not isinstance(value, TableConstraints) and value is not None:

1091 raise ValueError(

1092 "value must be google.cloud.bigquery.table.TableConstraints or None"

1093 )

1094 api_repr = value.to_api_repr() if value else None

1095 self._properties[self._PROPERTY_TO_API_FIELD["table_constraints"]] = api_repr

1096

1097 @property

1098 def resource_tags(self):

1099 """Dict[str, str]: Resource tags for the table.

1100

1101 See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.resource_tags

1102 """

1103 return self._properties.setdefault(

1104 self._PROPERTY_TO_API_FIELD["resource_tags"], {}

1105 )

1106

1107 @resource_tags.setter

1108 def resource_tags(self, value):

1109 if not isinstance(value, dict) and value is not None:

1110 raise ValueError("resource_tags must be a dict or None")

1111 self._properties[self._PROPERTY_TO_API_FIELD["resource_tags"]] = value

1112

1113 @property

1114 def external_catalog_table_options(

1115 self,

1116 ) -> Optional[external_config.ExternalCatalogTableOptions]:

1117 """Options defining open source compatible datasets living in the

1118 BigQuery catalog. Contains metadata of open source database, schema

1119 or namespace represented by the current dataset."""

1120

1121 prop = self._properties.get(

1122 self._PROPERTY_TO_API_FIELD["external_catalog_table_options"]

1123 )

1124 if prop is not None:

1125 return external_config.ExternalCatalogTableOptions.from_api_repr(prop)

1126 return None

1127

1128 @external_catalog_table_options.setter

1129 def external_catalog_table_options(

1130 self, value: Union[external_config.ExternalCatalogTableOptions, dict, None]

1131 ):

1132 value = _helpers._isinstance_or_raise(

1133 value,

1134 (external_config.ExternalCatalogTableOptions, dict),

1135 none_allowed=True,

1136 )

1137 if isinstance(value, external_config.ExternalCatalogTableOptions):

1138 self._properties[

1139 self._PROPERTY_TO_API_FIELD["external_catalog_table_options"]

1140 ] = value.to_api_repr()

1141 else:

1142 self._properties[

1143 self._PROPERTY_TO_API_FIELD["external_catalog_table_options"]

1144 ] = value

1145

1146 @property

1147 def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]:

1148 """Optional. Specifies metadata of the foreign data type definition in

1149 field schema (TableFieldSchema.foreign_type_definition).

1150 Returns:

1151 Optional[schema.ForeignTypeInfo]:

1152 Foreign type information, or :data:`None` if not set.

1153 .. Note::

1154 foreign_type_info is only required if you are referencing an

1155 external catalog such as a Hive table.

1156 For details, see:

1157 https://cloud.google.com/bigquery/docs/external-tables

1158 https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets

1159 """

1160

1161 prop = _helpers._get_sub_prop(

1162 self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"]

1163 )

1164 if prop is not None:

1165 return _schema.ForeignTypeInfo.from_api_repr(prop)

1166 return None

1167

1168 @foreign_type_info.setter

1169 def foreign_type_info(self, value: Union[_schema.ForeignTypeInfo, dict, None]):

1170 value = _helpers._isinstance_or_raise(

1171 value,

1172 (_schema.ForeignTypeInfo, dict),

1173 none_allowed=True,

1174 )

1175 if isinstance(value, _schema.ForeignTypeInfo):

1176 value = value.to_api_repr()

1177 _helpers._set_sub_prop(

1178 self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"], value

1179 )

1180

1181 @classmethod

1182 def from_string(cls, full_table_id: str) -> "Table":

1183 """Construct a table from fully-qualified table ID.

1184

1185 Args:

1186 full_table_id (str):

1187 A fully-qualified table ID in standard SQL format. Must

1188 included a project ID, dataset ID, and table ID, each

1189 separated by ``.``.

1190

1191 Returns:

1192 Table: Table parsed from ``full_table_id``.

1193

1194 Examples:

1195 >>> Table.from_string('my-project.mydataset.mytable')

1196 Table(TableRef...(D...('my-project', 'mydataset'), 'mytable'))

1197

1198 Raises:

1199 ValueError:

1200 If ``full_table_id`` is not a fully-qualified table ID in

1201 standard SQL format.

1202 """

1203 return cls(TableReference.from_string(full_table_id))

1204

1205 @classmethod

1206 def from_api_repr(cls, resource: dict) -> "Table":

1207 """Factory: construct a table given its API representation

1208

1209 Args:

1210 resource (Dict[str, object]):

1211 Table resource representation from the API

1212

1213 Returns:

1214 google.cloud.bigquery.table.Table: Table parsed from ``resource``.

1215

1216 Raises:

1217 KeyError:

1218 If the ``resource`` lacks the key ``'tableReference'``, or if

1219 the ``dict`` stored within the key ``'tableReference'`` lacks

1220 the keys ``'tableId'``, ``'projectId'``, or ``'datasetId'``.

1221 """

1222 from google.cloud.bigquery import dataset

1223

1224 if (

1225 "tableReference" not in resource

1226 or "tableId" not in resource["tableReference"]

1227 ):

1228 raise KeyError(

1229 "Resource lacks required identity information:"

1230 '["tableReference"]["tableId"]'

1231 )

1232 project_id = _helpers._get_sub_prop(

1233 resource, cls._PROPERTY_TO_API_FIELD["project"]

1234 )

1235 table_id = _helpers._get_sub_prop(

1236 resource, cls._PROPERTY_TO_API_FIELD["table_id"]

1237 )

1238 dataset_id = _helpers._get_sub_prop(

1239 resource, cls._PROPERTY_TO_API_FIELD["dataset_id"]

1240 )

1241 dataset_ref = dataset.DatasetReference(project_id, dataset_id)

1242

1243 table = cls(dataset_ref.table(table_id))

1244 table._properties = resource

1245

1246 return table

1247

1248 def to_api_repr(self) -> dict:

1249 """Constructs the API resource of this table

1250

1251 Returns:

1252 Dict[str, object]: Table represented as an API resource

1253 """

1254 return copy.deepcopy(self._properties)

1255

1256 def to_bqstorage(self) -> str:

1257 """Construct a BigQuery Storage API representation of this table.

1258

1259 Returns:

1260 str: A reference to this table in the BigQuery Storage API.

1261 """

1262 return self.reference.to_bqstorage()

1263

1264 def _build_resource(self, filter_fields):

1265 """Generate a resource for ``update``."""

1266 return _helpers._build_resource_from_properties(self, filter_fields)

1267

1268 def __repr__(self):

1269 return "Table({})".format(repr(self.reference))

1270

1271 def __str__(self):

1272 return f"{self.project}.{self.dataset_id}.{self.table_id}"

1273

1274 @property

1275 def max_staleness(self):

1276 """Union[str, None]: The maximum staleness of data that could be returned when the table is queried.

1277

1278 Staleness encoded as a string encoding of sql IntervalValue type.

1279 This property is optional and defaults to None.

1280

1281 According to the BigQuery API documentation, maxStaleness specifies the maximum time

1282 interval for which stale data can be returned when querying the table.

1283 It helps control data freshness in scenarios like metadata-cached external tables.

1284

1285 Returns:

1286 Optional[str]: A string representing the maximum staleness interval

1287 (e.g., '1h', '30m', '15s' for hours, minutes, seconds respectively).

1288 """

1289 return self._properties.get(self._PROPERTY_TO_API_FIELD["max_staleness"])

1290

1291 @max_staleness.setter

1292 def max_staleness(self, value):

1293 """Set the maximum staleness for the table.

1294

1295 Args:

1296 value (Optional[str]): A string representing the maximum staleness interval.

1297 Must be a valid time interval string.

1298 Examples include '1h' (1 hour), '30m' (30 minutes), '15s' (15 seconds).

1299

1300 Raises:

1301 ValueError: If the value is not None and not a string.

1302 """

1303 if value is not None and not isinstance(value, str):

1304 raise ValueError("max_staleness must be a string or None")

1305

1306 self._properties[self._PROPERTY_TO_API_FIELD["max_staleness"]] = value

1307

1308

1309class TableListItem(_TableBase):

1310 """A read-only table resource from a list operation.

1311

1312 For performance reasons, the BigQuery API only includes some of the table

1313 properties when listing tables. Notably,

1314 :attr:`~google.cloud.bigquery.table.Table.schema` and

1315 :attr:`~google.cloud.bigquery.table.Table.num_rows` are missing.

1316

1317 For a full list of the properties that the BigQuery API returns, see the

1318 `REST documentation for tables.list

1319 <https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list>`_.

1320

1321

1322 Args:

1323 resource (Dict[str, object]):

1324 A table-like resource object from a table list response. A

1325 ``tableReference`` property is required.

1326

1327 Raises:

1328 ValueError:

1329 If ``tableReference`` or one of its required members is missing

1330 from ``resource``.

1331 """

1332

1333 def __init__(self, resource):

1334 if "tableReference" not in resource:

1335 raise ValueError("resource must contain a tableReference value")

1336 if "projectId" not in resource["tableReference"]:

1337 raise ValueError(

1338 "resource['tableReference'] must contain a projectId value"

1339 )

1340 if "datasetId" not in resource["tableReference"]:

1341 raise ValueError(

1342 "resource['tableReference'] must contain a datasetId value"

1343 )

1344 if "tableId" not in resource["tableReference"]:

1345 raise ValueError("resource['tableReference'] must contain a tableId value")

1346

1347 self._properties = resource

1348

1349 @property

1350 def created(self):

1351 """Union[datetime.datetime, None]: Datetime at which the table was

1352 created (:data:`None` until set from the server).

1353 """

1354 creation_time = self._properties.get("creationTime")

1355 if creation_time is not None:

1356 # creation_time will be in milliseconds.

1357 return google.cloud._helpers._datetime_from_microseconds(

1358 1000.0 * float(creation_time)

1359 )

1360

1361 @property

1362 def expires(self):

1363 """Union[datetime.datetime, None]: Datetime at which the table will be

1364 deleted.

1365 """

1366 expiration_time = self._properties.get("expirationTime")

1367 if expiration_time is not None:

1368 # expiration_time will be in milliseconds.

1369 return google.cloud._helpers._datetime_from_microseconds(

1370 1000.0 * float(expiration_time)

1371 )

1372

1373 reference = property(_reference_getter)

1374

1375 @property

1376 def labels(self):

1377 """Dict[str, str]: Labels for the table.

1378

1379 This method always returns a dict. To change a table's labels,

1380 modify the dict, then call ``Client.update_table``. To delete a

1381 label, set its value to :data:`None` before updating.

1382 """

1383 return self._properties.setdefault("labels", {})

1384

1385 @property

1386 def full_table_id(self):

1387 """Union[str, None]: ID for the table (:data:`None` until set from the

1388 server).

1389

1390 In the format ``project_id:dataset_id.table_id``.

1391 """

1392 return self._properties.get("id")

1393

1394 @property

1395 def table_type(self):

1396 """Union[str, None]: The type of the table (:data:`None` until set from

1397 the server).

1398

1399 Possible values are ``'TABLE'``, ``'VIEW'``, or ``'EXTERNAL'``.

1400 """

1401 return self._properties.get("type")

1402

1403 @property

1404 def time_partitioning(self):

1405 """google.cloud.bigquery.table.TimePartitioning: Configures time-based

1406 partitioning for a table.

1407 """

1408 prop = self._properties.get("timePartitioning")

1409 if prop is not None:

1410 return TimePartitioning.from_api_repr(prop)

1411

1412 @property

1413 def partitioning_type(self):

1414 """Union[str, None]: Time partitioning of the table if it is

1415 partitioned (Defaults to :data:`None`).

1416 """

1417 warnings.warn(

1418 "This method will be deprecated in future versions. Please use "

1419 "TableListItem.time_partitioning.type_ instead.",

1420 PendingDeprecationWarning,

1421 stacklevel=2,

1422 )

1423 if self.time_partitioning is not None:

1424 return self.time_partitioning.type_

1425

1426 @property

1427 def partition_expiration(self):

1428 """Union[int, None]: Expiration time in milliseconds for a partition.

1429

1430 If this property is set and :attr:`type_` is not set, :attr:`type_`

1431 will default to :attr:`TimePartitioningType.DAY`.

1432 """

1433 warnings.warn(

1434 "This method will be deprecated in future versions. Please use "

1435 "TableListItem.time_partitioning.expiration_ms instead.",

1436 PendingDeprecationWarning,

1437 stacklevel=2,

1438 )

1439 if self.time_partitioning is not None:

1440 return self.time_partitioning.expiration_ms

1441

1442 @property

1443 def friendly_name(self):

1444 """Union[str, None]: Title of the table (defaults to :data:`None`)."""

1445 return self._properties.get("friendlyName")

1446

1447 view_use_legacy_sql = property(_view_use_legacy_sql_getter)

1448

1449 @property

1450 def clustering_fields(self):

1451 """Union[List[str], None]: Fields defining clustering for the table

1452

1453 (Defaults to :data:`None`).

1454

1455 Clustering fields are immutable after table creation.

1456

1457 .. note::

1458

1459 BigQuery supports clustering for both partitioned and

1460 non-partitioned tables.

1461 """

1462 prop = self._properties.get("clustering")

1463 if prop is not None:

1464 return list(prop.get("fields", ()))

1465

1466 @classmethod

1467 def from_string(cls, full_table_id: str) -> "TableListItem":

1468 """Construct a table from fully-qualified table ID.

1469

1470 Args:

1471 full_table_id (str):

1472 A fully-qualified table ID in standard SQL format. Must

1473 included a project ID, dataset ID, and table ID, each

1474 separated by ``.``.

1475

1476 Returns:

1477 Table: Table parsed from ``full_table_id``.

1478

1479 Examples:

1480 >>> Table.from_string('my-project.mydataset.mytable')

1481 Table(TableRef...(D...('my-project', 'mydataset'), 'mytable'))

1482

1483 Raises:

1484 ValueError:

1485 If ``full_table_id`` is not a fully-qualified table ID in

1486 standard SQL format.

1487 """

1488 return cls(

1489 {"tableReference": TableReference.from_string(full_table_id).to_api_repr()}

1490 )

1491

1492 def to_bqstorage(self) -> str:

1493 """Construct a BigQuery Storage API representation of this table.

1494

1495 Returns:

1496 str: A reference to this table in the BigQuery Storage API.

1497 """

1498 return self.reference.to_bqstorage()

1499

1500 def to_api_repr(self) -> dict:

1501 """Constructs the API resource of this table

1502

1503 Returns:

1504 Dict[str, object]: Table represented as an API resource

1505 """

1506 return copy.deepcopy(self._properties)

1507

1508

1509def _row_from_mapping(mapping, schema):

1510 """Convert a mapping to a row tuple using the schema.

1511

1512 Args:

1513 mapping (Dict[str, object])

1514 Mapping of row data: must contain keys for all required fields in

1515 the schema. Keys which do not correspond to a field in the schema

1516 are ignored.

1517 schema (List[google.cloud.bigquery.schema.SchemaField]):

1518 The schema of the table destination for the rows

1519

1520 Returns:

1521 Tuple[object]:

1522 Tuple whose elements are ordered according to the schema.

1523

1524 Raises:

1525 ValueError: If schema is empty.

1526 """

1527 if len(schema) == 0:

1528 raise ValueError(_TABLE_HAS_NO_SCHEMA)

1529

1530 row = []

1531 for field in schema:

1532 if field.mode == "REQUIRED":

1533 row.append(mapping[field.name])

1534 elif field.mode == "REPEATED":

1535 row.append(mapping.get(field.name, ()))

1536 elif field.mode == "NULLABLE":

1537 row.append(mapping.get(field.name))

1538 else:

1539 raise ValueError("Unknown field mode: {}".format(field.mode))

1540 return tuple(row)

1541

1542

1543class StreamingBuffer(object):

1544 """Information about a table's streaming buffer.

1545

1546 See https://cloud.google.com/bigquery/streaming-data-into-bigquery.

1547

1548 Args:

1549 resource (Dict[str, object]):

1550 streaming buffer representation returned from the API

1551 """

1552

1553 def __init__(self, resource):

1554 self.estimated_bytes = None

1555 if "estimatedBytes" in resource:

1556 self.estimated_bytes = int(resource["estimatedBytes"])

1557 self.estimated_rows = None

1558 if "estimatedRows" in resource:

1559 self.estimated_rows = int(resource["estimatedRows"])

1560 self.oldest_entry_time = None

1561 if "oldestEntryTime" in resource:

1562 self.oldest_entry_time = google.cloud._helpers._datetime_from_microseconds(

1563 1000.0 * int(resource["oldestEntryTime"])

1564 )

1565

1566

1567class SnapshotDefinition:

1568 """Information about base table and snapshot time of the snapshot.

1569

1570 See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#snapshotdefinition

1571

1572 Args:

1573 resource: Snapshot definition representation returned from the API.

1574 """

1575

1576 def __init__(self, resource: Dict[str, Any]):

1577 self.base_table_reference = None

1578 if "baseTableReference" in resource:

1579 self.base_table_reference = TableReference.from_api_repr(

1580 resource["baseTableReference"]

1581 )

1582

1583 self.snapshot_time = None

1584 if "snapshotTime" in resource:

1585 self.snapshot_time = google.cloud._helpers._rfc3339_to_datetime(

1586 resource["snapshotTime"]

1587 )

1588

1589

1590class CloneDefinition:

1591 """Information about base table and clone time of the clone.

1592

1593 See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clonedefinition

1594

1595 Args:

1596 resource: Clone definition representation returned from the API.

1597 """

1598

1599 def __init__(self, resource: Dict[str, Any]):

1600 self.base_table_reference = None

1601 if "baseTableReference" in resource:

1602 self.base_table_reference = TableReference.from_api_repr(

1603 resource["baseTableReference"]

1604 )

1605

1606 self.clone_time = None

1607 if "cloneTime" in resource:

1608 self.clone_time = google.cloud._helpers._rfc3339_to_datetime(

1609 resource["cloneTime"]

1610 )

1611

1612

1613class Row(object):

1614 """A BigQuery row.

1615

1616 Values can be accessed by position (index), by key like a dict,

1617 or as properties.

1618

1619 Args:

1620 values (Sequence[object]): The row values

1621 field_to_index (Dict[str, int]):

1622 A mapping from schema field names to indexes

1623 """

1624

1625 # Choose unusual field names to try to avoid conflict with schema fields.

1626 __slots__ = ("_xxx_values", "_xxx_field_to_index")

1627

1628 def __init__(self, values, field_to_index) -> None:

1629 self._xxx_values = values

1630 self._xxx_field_to_index = field_to_index

1631

1632 def values(self):

1633 """Return the values included in this row.

1634

1635 Returns:

1636 Sequence[object]: A sequence of length ``len(row)``.

1637 """

1638 return copy.deepcopy(self._xxx_values)

1639

1640 def keys(self) -> Iterable[str]:

1641 """Return the keys for using a row as a dict.

1642

1643 Returns:

1644 Iterable[str]: The keys corresponding to the columns of a row

1645

1646 Examples:

1647

1648 >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).keys())

1649 ['x', 'y']

1650 """

1651 return self._xxx_field_to_index.keys()

1652

1653 def items(self) -> Iterable[Tuple[str, Any]]:

1654 """Return items as ``(key, value)`` pairs.

1655

1656 Returns:

1657 Iterable[Tuple[str, object]]:

1658 The ``(key, value)`` pairs representing this row.

1659

1660 Examples:

1661

1662 >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).items())

1663 [('x', 'a'), ('y', 'b')]

1664 """

1665 for key, index in self._xxx_field_to_index.items():

1666 yield (key, copy.deepcopy(self._xxx_values[index]))

1667

1668 def get(self, key: str, default: Any = None) -> Any:

1669 """Return a value for key, with a default value if it does not exist.

1670

1671 Args:

1672 key (str): The key of the column to access

1673 default (object):

1674 The default value to use if the key does not exist. (Defaults

1675 to :data:`None`.)

1676

1677 Returns:

1678 object:

1679 The value associated with the provided key, or a default value.

1680

1681 Examples:

1682 When the key exists, the value associated with it is returned.

1683

1684 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('x')

1685 'a'

1686

1687 The default value is :data:`None` when the key does not exist.

1688

1689 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z')

1690 None

1691

1692 The default value can be overridden with the ``default`` parameter.

1693

1694 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '')

1695 ''

1696

1697 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', default = '')

1698 ''

1699 """

1700 index = self._xxx_field_to_index.get(key)

1701 if index is None:

1702 return default

1703 return self._xxx_values[index]

1704

1705 def __getattr__(self, name):

1706 value = self._xxx_field_to_index.get(name)

1707 if value is None:

1708 raise AttributeError("no row field {!r}".format(name))

1709 return self._xxx_values[value]

1710

1711 def __len__(self):

1712 return len(self._xxx_values)

1713

1714 def __getitem__(self, key):

1715 if isinstance(key, str):

1716 value = self._xxx_field_to_index.get(key)

1717 if value is None:

1718 raise KeyError("no row field {!r}".format(key))

1719 key = value

1720 return self._xxx_values[key]

1721

1722 def __eq__(self, other):

1723 if not isinstance(other, Row):

1724 return NotImplemented

1725 return (

1726 self._xxx_values == other._xxx_values

1727 and self._xxx_field_to_index == other._xxx_field_to_index

1728 )

1729

1730 def __ne__(self, other):

1731 return not self == other

1732

1733 def __repr__(self):

1734 # sort field dict by value, for determinism

1735 items = sorted(self._xxx_field_to_index.items(), key=operator.itemgetter(1))

1736 f2i = "{" + ", ".join("%r: %d" % item for item in items) + "}"

1737 return "Row({}, {})".format(self._xxx_values, f2i)

1738

1739

1740class _NoopProgressBarQueue(object):

1741 """A fake Queue class that does nothing.

1742

1743 This is used when there is no progress bar to send updates to.

1744 """

1745

1746 def put_nowait(self, item):

1747 """Don't actually do anything with the item."""

1748

1749

1750class RowIterator(HTTPIterator):

1751 """A class for iterating through HTTP/JSON API row list responses.

1752

1753 Args:

1754 client (Optional[google.cloud.bigquery.Client]):

1755 The API client instance. This should always be non-`None`, except for

1756 subclasses that do not use it, namely the ``_EmptyRowIterator``.

1757 api_request (Callable[google.cloud._http.JSONConnection.api_request]):

1758 The function to use to make API requests.

1759 path (str): The method path to query for the list of items.

1760 schema (Sequence[Union[ \

1761 :class:`~google.cloud.bigquery.schema.SchemaField`, \

1762 Mapping[str, Any] \

1763 ]]):

1764 The table's schema. If any item is a mapping, its content must be

1765 compatible with

1766 :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.

1767 page_token (str): A token identifying a page in a result set to start

1768 fetching results from.

1769 max_results (Optional[int]): The maximum number of results to fetch.

1770 page_size (Optional[int]): The maximum number of rows in each page

1771 of results from this request. Non-positive values are ignored.

1772 Defaults to a sensible value set by the API.

1773 extra_params (Optional[Dict[str, object]]):

1774 Extra query string parameters for the API call.

1775 table (Optional[Union[ \

1776 google.cloud.bigquery.table.Table, \

1777 google.cloud.bigquery.table.TableReference, \

1778 ]]):

1779 The table which these rows belong to, or a reference to it. Used to

1780 call the BigQuery Storage API to fetch rows.

1781 selected_fields (Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]):

1782 A subset of columns to select from this table.

1783 total_rows (Optional[int]):

1784 Total number of rows in the table.

1785 first_page_response (Optional[dict]):

1786 API response for the first page of results. These are returned when

1787 the first page is requested.

1788 query (Optional[str]):

1789 The query text used.

1790 total_bytes_processed (Optional[int]):

1791 If representing query results, the total bytes processed by the associated query.

1792 slot_millis (Optional[int]):

1793 If representing query results, the number of slot ms billed for the associated query.

1794 created (Optional[datetime.datetime]):

1795 If representing query results, the creation time of the associated query.

1796 started (Optional[datetime.datetime]):

1797 If representing query results, the start time of the associated query.

1798 ended (Optional[datetime.datetime]):

1799 If representing query results, the end time of the associated query.

1800 """

1801

1802 def __init__(

1803 self,

1804 client,

1805 api_request,

1806 path,

1807 schema,

1808 page_token=None,

1809 max_results=None,

1810 page_size=None,

1811 extra_params=None,

1812 table=None,

1813 selected_fields=None,

1814 total_rows=None,

1815 first_page_response=None,

1816 location: Optional[str] = None,

1817 job_id: Optional[str] = None,

1818 query_id: Optional[str] = None,

1819 project: Optional[str] = None,

1820 num_dml_affected_rows: Optional[int] = None,

1821 query: Optional[str] = None,

1822 total_bytes_processed: Optional[int] = None,

1823 slot_millis: Optional[int] = None,

1824 created: Optional[datetime.datetime] = None,

1825 started: Optional[datetime.datetime] = None,

1826 ended: Optional[datetime.datetime] = None,

1827 ):

1828 super(RowIterator, self).__init__(

1829 client,

1830 api_request,

1831 path,

1832 item_to_value=_item_to_row,

1833 items_key="rows",

1834 page_token=page_token,

1835 max_results=max_results,

1836 extra_params=extra_params,

1837 page_start=_rows_page_start,

1838 next_token="pageToken",

1839 )

1840 schema = _to_schema_fields(schema) if schema else ()

1841 self._field_to_index = _helpers._field_to_index_mapping(schema)

1842 self._page_size = page_size

1843 self._preserve_order = False

1844 self._schema = schema

1845 self._selected_fields = selected_fields

1846 self._table = table

1847 self._total_rows = total_rows

1848 self._first_page_response = first_page_response

1849 self._location = location

1850 self._job_id = job_id

1851 self._query_id = query_id

1852 self._project = project

1853 self._num_dml_affected_rows = num_dml_affected_rows

1854 self._query = query

1855 self._total_bytes_processed = total_bytes_processed

1856 self._slot_millis = slot_millis

1857 self._job_created = created

1858 self._job_started = started

1859 self._job_ended = ended

1860

1861 @property

1862 def _billing_project(self) -> Optional[str]:

1863 """GCP Project ID where BQ API will bill to (if applicable)."""

1864 client = self.client

1865 return client.project if client is not None else None

1866

1867 @property

1868 def job_id(self) -> Optional[str]:

1869 """ID of the query job (if applicable).

1870

1871 To get the job metadata, call

1872 ``job = client.get_job(rows.job_id, location=rows.location)``.

1873 """

1874 return self._job_id

1875

1876 @property

1877 def location(self) -> Optional[str]:

1878 """Location where the query executed (if applicable).

1879

1880 See: https://cloud.google.com/bigquery/docs/locations

1881 """

1882 return self._location

1883

1884 @property

1885 def num_dml_affected_rows(self) -> Optional[int]:

1886 """If this RowIterator is the result of a DML query, the number of

1887 rows that were affected.

1888

1889 See:

1890 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.num_dml_affected_rows

1891 """

1892 return self._num_dml_affected_rows

1893

1894 @property

1895 def project(self) -> Optional[str]:

1896 """GCP Project ID where these rows are read from."""

1897 return self._project

1898

1899 @property

1900 def query_id(self) -> Optional[str]:

1901 """[Preview] ID of a completed query.

1902

1903 This ID is auto-generated and not guaranteed to be populated.

1904 """

1905 return self._query_id

1906

1907 @property

1908 def query(self) -> Optional[str]:

1909 """The query text used."""

1910 return self._query

1911

1912 @property

1913 def total_bytes_processed(self) -> Optional[int]:

1914 """total bytes processed from job statistics, if present."""

1915 return self._total_bytes_processed

1916

1917 @property

1918 def slot_millis(self) -> Optional[int]:

1919 """Number of slot ms the user is actually billed for."""

1920 return self._slot_millis

1921

1922 @property

1923 def created(self) -> Optional[datetime.datetime]:

1924 """If representing query results, the creation time of the associated query."""

1925 return self._job_created

1926

1927 @property

1928 def started(self) -> Optional[datetime.datetime]:

1929 """If representing query results, the start time of the associated query."""

1930 return self._job_started

1931

1932 @property

1933 def ended(self) -> Optional[datetime.datetime]:

1934 """If representing query results, the end time of the associated query."""

1935 return self._job_ended

1936

1937 def _is_almost_completely_cached(self):

1938 """Check if all results are completely cached.

1939

1940 This is useful to know, because we can avoid alternative download

1941 mechanisms.

1942 """

1943 if (

1944 not hasattr(self, "_first_page_response")

1945 or self._first_page_response is None

1946 ):

1947 return False

1948

1949 total_cached_rows = len(self._first_page_response.get(self._items_key, []))

1950 if self.max_results is not None and total_cached_rows >= self.max_results:

1951 return True

1952

1953 if (

1954 self.next_page_token is None

1955 and self._first_page_response.get(self._next_token) is None

1956 ):

1957 return True

1958

1959 if self._total_rows is not None:

1960 almost_completely = self._total_rows * ALMOST_COMPLETELY_CACHED_RATIO

1961 if total_cached_rows >= almost_completely:

1962 return True

1963

1964 return False

1965

1966 def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client):

1967 """Returns True if the BigQuery Storage API can be used.

1968

1969 Returns:

1970 bool

1971 True if the BigQuery Storage client can be used or created.

1972 """

1973 using_bqstorage_api = bqstorage_client or create_bqstorage_client

1974 if not using_bqstorage_api:

1975 return False

1976

1977 if self._table is None:

1978 return False

1979

1980 # The developer has already started paging through results if

1981 # next_page_token is set.

1982 if hasattr(self, "next_page_token") and self.next_page_token is not None:

1983 return False

1984

1985 if self._is_almost_completely_cached():

1986 return False

1987

1988 if self.max_results is not None:

1989 return False

1990

1991 try:

1992 _versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True)

1993 except bq_exceptions.BigQueryStorageNotFoundError:

1994 warnings.warn(

1995 "BigQuery Storage module not found, fetch data with the REST "

1996 "endpoint instead."

1997 )

1998 return False

1999 except bq_exceptions.LegacyBigQueryStorageError as exc:

2000 warnings.warn(str(exc))

2001 return False

2002

2003 return True

2004

2005 def _get_next_page_response(self):

2006 """Requests the next page from the path provided.

2007

2008 Returns:

2009 Dict[str, object]:

2010 The parsed JSON response of the next page's contents.

2011 """

2012 if self._first_page_response:

2013 rows = self._first_page_response.get(self._items_key, [])[

2014 : self.max_results

2015 ]

2016 response = {

2017 self._items_key: rows,

2018 }

2019 if self._next_token in self._first_page_response:

2020 response[self._next_token] = self._first_page_response[self._next_token]

2021

2022 self._first_page_response = None

2023 return response

2024

2025 params = self._get_query_params()

2026

2027 # If the user has provided page_size and start_index, we need to pass

2028 # start_index for the first page, but for all subsequent pages, we

2029 # should not pass start_index. We make a shallow copy of params and do

2030 # not alter the original, so if the user iterates the results again,

2031 # start_index is preserved.

2032 params_copy = copy.copy(params)

2033 if self._page_size is not None:

2034 if self.page_number and "startIndex" in params:

2035 del params_copy["startIndex"]

2036

2037 return self.api_request(

2038 method=self._HTTP_METHOD, path=self.path, query_params=params_copy

2039 )

2040

2041 @property

2042 def schema(self):

2043 """List[google.cloud.bigquery.schema.SchemaField]: The subset of

2044 columns to be read from the table."""

2045 return list(self._schema)

2046

2047 @property

2048 def total_rows(self):

2049 """int: The total number of rows in the table or query results."""

2050 return self._total_rows

2051

2052 def _maybe_warn_max_results(

2053 self,

2054 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"],

2055 ):

2056 """Issue a warning if BQ Storage client is not ``None`` with ``max_results`` set.

2057

2058 This helper method should be used directly in the relevant top-level public

2059 methods, so that the warning is issued for the correct line in user code.

2060

2061 Args:

2062 bqstorage_client:

2063 The BigQuery Storage client intended to use for downloading result rows.

2064 """

2065 if bqstorage_client is not None and self.max_results is not None:

2066 warnings.warn(

2067 "Cannot use bqstorage_client if max_results is set, "

2068 "reverting to fetching data with the REST endpoint.",

2069 stacklevel=3,

2070 )

2071

2072 def _to_page_iterable(

2073 self, bqstorage_download, tabledata_list_download, bqstorage_client=None

2074 ):

2075 if not self._should_use_bqstorage(bqstorage_client, False):

2076 bqstorage_client = None

2077

2078 result_pages = (

2079 bqstorage_download()

2080 if bqstorage_client is not None

2081 else tabledata_list_download()

2082 )

2083 yield from result_pages

2084

2085 def to_arrow_iterable(

2086 self,

2087 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

2088 max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore

2089 max_stream_count: Optional[int] = None,

2090 ) -> Iterator["pyarrow.RecordBatch"]:

2091 """[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream.

2092

2093 Args:

2094 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):

2095 A BigQuery Storage API client. If supplied, use the faster

2096 BigQuery Storage API to fetch rows from BigQuery.

2097

2098 This method requires the ``pyarrow`` and

2099 ``google-cloud-bigquery-storage`` libraries.

2100

2101 This method only exposes a subset of the capabilities of the

2102 BigQuery Storage API. For full access to all features

2103 (projections, filters, snapshots) use the Storage API directly.

2104

2105 max_queue_size (Optional[int]):

2106 The maximum number of result pages to hold in the internal queue when

2107 streaming query results over the BigQuery Storage API. Ignored if

2108 Storage API is not used.

2109

2110 By default, the max queue size is set to the number of BQ Storage streams

2111 created by the server. If ``max_queue_size`` is :data:`None`, the queue

2112 size is infinite.

2113

2114 max_stream_count (Optional[int]):

2115 The maximum number of parallel download streams when

2116 using BigQuery Storage API. Ignored if

2117 BigQuery Storage API is not used.

2118

2119 This setting also has no effect if the query result

2120 is deterministically ordered with ORDER BY,

2121 in which case, the number of download stream is always 1.

2122

2123 If set to 0 or None (the default), the number of download

2124 streams is determined by BigQuery the server. However, this behaviour

2125 can require a lot of memory to store temporary download result,

2126 especially with very large queries. In that case,

2127 setting this parameter value to a value > 0 can help

2128 reduce system resource consumption.

2129

2130 Returns:

2131 pyarrow.RecordBatch:

2132 A generator of :class:`~pyarrow.RecordBatch`.

2133

2134 .. versionadded:: 2.31.0

2135 """

2136 self._maybe_warn_max_results(bqstorage_client)

2137

2138 bqstorage_download = functools.partial(

2139 _pandas_helpers.download_arrow_bqstorage,

2140 self._billing_project,

2141 self._table,

2142 bqstorage_client,

2143 preserve_order=self._preserve_order,

2144 selected_fields=self._selected_fields,

2145 max_queue_size=max_queue_size,

2146 max_stream_count=max_stream_count,

2147 )

2148 tabledata_list_download = functools.partial(

2149 _pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema

2150 )

2151 return self._to_page_iterable(

2152 bqstorage_download,

2153 tabledata_list_download,

2154 bqstorage_client=bqstorage_client,

2155 )

2156

2157 # If changing the signature of this method, make sure to apply the same

2158 # changes to job.QueryJob.to_arrow()

2159 def to_arrow(

2160 self,

2161 progress_bar_type: Optional[str] = None,

2162 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

2163 create_bqstorage_client: bool = True,

2164 ) -> "pyarrow.Table":

2165 """[Beta] Create a class:`pyarrow.Table` by loading all pages of a

2166 table or query.

2167

2168 Args:

2169 progress_bar_type (Optional[str]):

2170 If set, use the `tqdm <https://tqdm.github.io/>`_ library to

2171 display a progress bar while the data downloads. Install the

2172 ``tqdm`` package to use this feature.

2173

2174 Possible values of ``progress_bar_type`` include:

2175

2176 ``None``

2177 No progress bar.

2178 ``'tqdm'``

2179 Use the :func:`tqdm.tqdm` function to print a progress bar

2180 to :data:`sys.stdout`.

2181 ``'tqdm_notebook'``

2182 Use the :func:`tqdm.notebook.tqdm` function to display a

2183 progress bar as a Jupyter notebook widget.

2184 ``'tqdm_gui'``

2185 Use the :func:`tqdm.tqdm_gui` function to display a

2186 progress bar as a graphical dialog box.

2187 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):

2188 A BigQuery Storage API client. If supplied, use the faster BigQuery

2189 Storage API to fetch rows from BigQuery. This API is a billable API.

2190

2191 This method requires ``google-cloud-bigquery-storage`` library.

2192

2193 This method only exposes a subset of the capabilities of the

2194 BigQuery Storage API. For full access to all features

2195 (projections, filters, snapshots) use the Storage API directly.

2196 create_bqstorage_client (Optional[bool]):

2197 If ``True`` (default), create a BigQuery Storage API client using

2198 the default API settings. The BigQuery Storage API is a faster way

2199 to fetch rows from BigQuery. See the ``bqstorage_client`` parameter

2200 for more information.

2201

2202 This argument does nothing if ``bqstorage_client`` is supplied.

2203

2204 .. versionadded:: 1.24.0

2205

2206 Returns:

2207 pyarrow.Table

2208 A :class:`pyarrow.Table` populated with row data and column

2209 headers from the query results. The column headers are derived

2210 from the destination table's schema.

2211

2212 Raises:

2213 ValueError: If the :mod:`pyarrow` library cannot be imported.

2214

2215

2216 .. versionadded:: 1.17.0

2217 """

2218 if pyarrow is None:

2219 raise ValueError(_NO_PYARROW_ERROR)

2220

2221 self._maybe_warn_max_results(bqstorage_client)

2222

2223 if not self._should_use_bqstorage(bqstorage_client, create_bqstorage_client):

2224 create_bqstorage_client = False

2225 bqstorage_client = None

2226

2227 owns_bqstorage_client = False

2228 if not bqstorage_client and create_bqstorage_client:

2229 bqstorage_client = self.client._ensure_bqstorage_client()

2230 owns_bqstorage_client = bqstorage_client is not None

2231

2232 try:

2233 progress_bar = get_progress_bar(

2234 progress_bar_type, "Downloading", self.total_rows, "rows"

2235 )

2236

2237 record_batches = []

2238 for record_batch in self.to_arrow_iterable(

2239 bqstorage_client=bqstorage_client

2240 ):

2241 record_batches.append(record_batch)

2242

2243 if progress_bar is not None:

2244 # In some cases, the number of total rows is not populated

2245 # until the first page of rows is fetched. Update the

2246 # progress bar's total to keep an accurate count.

2247 progress_bar.total = progress_bar.total or self.total_rows

2248 progress_bar.update(record_batch.num_rows)

2249

2250 if progress_bar is not None:

2251 # Indicate that the download has finished.

2252 progress_bar.close()

2253 finally:

2254 if owns_bqstorage_client:

2255 bqstorage_client._transport.grpc_channel.close() # type: ignore

2256

2257 if record_batches and bqstorage_client is not None:

2258 return pyarrow.Table.from_batches(record_batches)

2259 else:

2260 # No records (not record_batches), use schema based on BigQuery schema

2261 # **or**

2262 # we used the REST API (bqstorage_client is None),

2263 # which doesn't add arrow extension metadata, so we let

2264 # `bq_to_arrow_schema` do it.

2265 arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema)

2266 return pyarrow.Table.from_batches(record_batches, schema=arrow_schema)

2267

2268 def to_dataframe_iterable(

2269 self,

2270 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

2271 dtypes: Optional[Dict[str, Any]] = None,

2272 max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore

2273 max_stream_count: Optional[int] = None,

2274 ) -> "pandas.DataFrame":

2275 """Create an iterable of pandas DataFrames, to process the table as a stream.

2276

2277 Args:

2278 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):

2279 A BigQuery Storage API client. If supplied, use the faster

2280 BigQuery Storage API to fetch rows from BigQuery.

2281

2282 This method requires ``google-cloud-bigquery-storage`` library.

2283

2284 This method only exposes a subset of the capabilities of the

2285 BigQuery Storage API. For full access to all features

2286 (projections, filters, snapshots) use the Storage API directly.

2287

2288 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):

2289 A dictionary of column names pandas ``dtype``s. The provided

2290 ``dtype`` is used when constructing the series for the column

2291 specified. Otherwise, the default pandas behavior is used.

2292

2293 max_queue_size (Optional[int]):

2294 The maximum number of result pages to hold in the internal queue when

2295 streaming query results over the BigQuery Storage API. Ignored if

2296 Storage API is not used.

2297

2298 By default, the max queue size is set to the number of BQ Storage streams

2299 created by the server. If ``max_queue_size`` is :data:`None`, the queue

2300 size is infinite.

2301

2302 .. versionadded:: 2.14.0

2303

2304 max_stream_count (Optional[int]):

2305 The maximum number of parallel download streams when

2306 using BigQuery Storage API. Ignored if

2307 BigQuery Storage API is not used.

2308

2309 This setting also has no effect if the query result

2310 is deterministically ordered with ORDER BY,

2311 in which case, the number of download stream is always 1.

2312

2313 If set to 0 or None (the default), the number of download

2314 streams is determined by BigQuery the server. However, this behaviour

2315 can require a lot of memory to store temporary download result,

2316 especially with very large queries. In that case,

2317 setting this parameter value to a value > 0 can help

2318 reduce system resource consumption.

2319

2320 Returns:

2321 pandas.DataFrame:

2322 A generator of :class:`~pandas.DataFrame`.

2323

2324 Raises:

2325 ValueError:

2326 If the :mod:`pandas` library cannot be imported.

2327 """

2328 _pandas_helpers.verify_pandas_imports()

2329

2330 if dtypes is None:

2331 dtypes = {}

2332

2333 self._maybe_warn_max_results(bqstorage_client)

2334

2335 column_names = [field.name for field in self._schema]

2336 bqstorage_download = functools.partial(

2337 _pandas_helpers.download_dataframe_bqstorage,

2338 self._billing_project,

2339 self._table,

2340 bqstorage_client,

2341 column_names,

2342 dtypes,

2343 preserve_order=self._preserve_order,

2344 selected_fields=self._selected_fields,

2345 max_queue_size=max_queue_size,

2346 max_stream_count=max_stream_count,

2347 )

2348 tabledata_list_download = functools.partial(

2349 _pandas_helpers.download_dataframe_row_iterator,

2350 iter(self.pages),

2351 self.schema,

2352 dtypes,

2353 )

2354 return self._to_page_iterable(

2355 bqstorage_download,

2356 tabledata_list_download,

2357 bqstorage_client=bqstorage_client,

2358 )

2359

2360 # If changing the signature of this method, make sure to apply the same

2361 # changes to job.QueryJob.to_dataframe()

2362 def to_dataframe(

2363 self,

2364 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

2365 dtypes: Optional[Dict[str, Any]] = None,

2366 progress_bar_type: Optional[str] = None,

2367 create_bqstorage_client: bool = True,

2368 geography_as_object: bool = False,

2369 bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE,

2370 int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE,

2371 float_dtype: Union[Any, None] = None,

2372 string_dtype: Union[Any, None] = None,

2373 date_dtype: Union[Any, None] = DefaultPandasDTypes.DATE_DTYPE,

2374 datetime_dtype: Union[Any, None] = None,

2375 time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE,

2376 timestamp_dtype: Union[Any, None] = None,

2377 range_date_dtype: Union[Any, None] = DefaultPandasDTypes.RANGE_DATE_DTYPE,

2378 range_datetime_dtype: Union[

2379 Any, None

2380 ] = DefaultPandasDTypes.RANGE_DATETIME_DTYPE,

2381 range_timestamp_dtype: Union[

2382 Any, None

2383 ] = DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE,

2384 ) -> "pandas.DataFrame":

2385 """Create a pandas DataFrame by loading all pages of a query.

2386

2387 Args:

2388 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):

2389 A BigQuery Storage API client. If supplied, use the faster

2390 BigQuery Storage API to fetch rows from BigQuery.

2391

2392 This method requires ``google-cloud-bigquery-storage`` library.

2393

2394 This method only exposes a subset of the capabilities of the

2395 BigQuery Storage API. For full access to all features

2396 (projections, filters, snapshots) use the Storage API directly.

2397

2398 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):

2399 A dictionary of column names pandas ``dtype``s. The provided

2400 ``dtype`` is used when constructing the series for the column

2401 specified. Otherwise, the default pandas behavior is used.

2402 progress_bar_type (Optional[str]):

2403 If set, use the `tqdm <https://tqdm.github.io/>`_ library to

2404 display a progress bar while the data downloads. Install the

2405 ``tqdm`` package to use this feature.

2406

2407 Possible values of ``progress_bar_type`` include:

2408

2409 ``None``

2410 No progress bar.

2411 ``'tqdm'``

2412 Use the :func:`tqdm.tqdm` function to print a progress bar

2413 to :data:`sys.stdout`.

2414 ``'tqdm_notebook'``

2415 Use the :func:`tqdm.notebook.tqdm` function to display a

2416 progress bar as a Jupyter notebook widget.

2417 ``'tqdm_gui'``

2418 Use the :func:`tqdm.tqdm_gui` function to display a

2419 progress bar as a graphical dialog box.

2420

2421 .. versionadded:: 1.11.0

2422

2423 create_bqstorage_client (Optional[bool]):

2424 If ``True`` (default), create a BigQuery Storage API client

2425 using the default API settings. The BigQuery Storage API

2426 is a faster way to fetch rows from BigQuery. See the

2427 ``bqstorage_client`` parameter for more information.

2428

2429 This argument does nothing if ``bqstorage_client`` is supplied.

2430

2431 .. versionadded:: 1.24.0

2432

2433 geography_as_object (Optional[bool]):

2434 If ``True``, convert GEOGRAPHY data to :mod:`shapely`

2435 geometry objects. If ``False`` (default), don't cast

2436 geography data to :mod:`shapely` geometry objects.

2437

2438 .. versionadded:: 2.24.0

2439

2440 bool_dtype (Optional[pandas.Series.dtype, None]):

2441 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``)

2442 to convert BigQuery Boolean type, instead of relying on the default

2443 ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``,

2444 then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean

2445 type can be found at:

2446 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type

2447

2448 .. versionadded:: 3.8.0

2449

2450 int_dtype (Optional[pandas.Series.dtype, None]):

2451 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``)

2452 to convert BigQuery Integer types, instead of relying on the default

2453 ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``,

2454 then the data type will be ``numpy.dtype("int64")``. A list of BigQuery

2455 Integer types can be found at:

2456 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types

2457

2458 .. versionadded:: 3.8.0

2459

2460 float_dtype (Optional[pandas.Series.dtype, None]):

2461 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``)

2462 to convert BigQuery Float type, instead of relying on the default

2463 ``numpy.dtype("float64")``. If you explicitly set the value to ``None``,

2464 then the data type will be ``numpy.dtype("float64")``. BigQuery Float

2465 type can be found at:

2466 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types

2467

2468 .. versionadded:: 3.8.0

2469

2470 string_dtype (Optional[pandas.Series.dtype, None]):

2471 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to

2472 convert BigQuery String type, instead of relying on the default

2473 ``numpy.dtype("object")``. If you explicitly set the value to ``None``,

2474 then the data type will be ``numpy.dtype("object")``. BigQuery String

2475 type can be found at:

2476 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type

2477

2478 .. versionadded:: 3.8.0

2479

2480 date_dtype (Optional[pandas.Series.dtype, None]):

2481 If set, indicate a pandas ExtensionDtype (e.g.

2482 ``pandas.ArrowDtype(pyarrow.date32())``) to convert BigQuery Date

2483 type, instead of relying on the default ``db_dtypes.DateDtype()``.

2484 If you explicitly set the value to ``None``, then the data type will be

2485 ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery

2486 Date type can be found at:

2487 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type

2488

2489 .. versionadded:: 3.10.0

2490

2491 datetime_dtype (Optional[pandas.Series.dtype, None]):

2492 If set, indicate a pandas ExtensionDtype (e.g.

2493 ``pandas.ArrowDtype(pyarrow.timestamp("us"))``) to convert BigQuery Datetime

2494 type, instead of relying on the default ``numpy.dtype("datetime64[ns]``.

2495 If you explicitly set the value to ``None``, then the data type will be

2496 ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery

2497 Datetime type can be found at:

2498 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type

2499

2500 .. versionadded:: 3.10.0

2501

2502 time_dtype (Optional[pandas.Series.dtype, None]):

2503 If set, indicate a pandas ExtensionDtype (e.g.

2504 ``pandas.ArrowDtype(pyarrow.time64("us"))``) to convert BigQuery Time

2505 type, instead of relying on the default ``db_dtypes.TimeDtype()``.

2506 If you explicitly set the value to ``None``, then the data type will be

2507 ``numpy.dtype("object")``. BigQuery Time type can be found at:

2508 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type

2509

2510 .. versionadded:: 3.10.0

2511

2512 timestamp_dtype (Optional[pandas.Series.dtype, None]):

2513 If set, indicate a pandas ExtensionDtype (e.g.

2514 ``pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))``) to convert BigQuery Timestamp

2515 type, instead of relying on the default ``numpy.dtype("datetime64[ns, UTC]")``.

2516 If you explicitly set the value to ``None``, then the data type will be

2517 ``numpy.dtype("datetime64[ns, UTC]")`` or ``object`` if out of bound. BigQuery

2518 Datetime type can be found at:

2519 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type

2520

2521 .. versionadded:: 3.10.0

2522

2523 range_date_dtype (Optional[pandas.Series.dtype, None]):

2524 If set, indicate a pandas ExtensionDtype, such as:

2525

2526 .. code-block:: python

2527

2528 pandas.ArrowDtype(pyarrow.struct(

2529 [("start", pyarrow.date32()), ("end", pyarrow.date32())]

2530 ))

2531

2532 to convert BigQuery RANGE<DATE> type, instead of relying on

2533 the default ``object``. If you explicitly set the value to

2534 ``None``, the data type will be ``object``. BigQuery Range type

2535 can be found at:

2536 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type

2537

2538 .. versionadded:: 3.21.0

2539

2540 range_datetime_dtype (Optional[pandas.Series.dtype, None]):

2541 If set, indicate a pandas ExtensionDtype, such as:

2542

2543 .. code-block:: python

2544

2545 pandas.ArrowDtype(pyarrow.struct(

2546 [

2547 ("start", pyarrow.timestamp("us")),

2548 ("end", pyarrow.timestamp("us")),

2549 ]

2550 ))

2551

2552 to convert BigQuery RANGE<DATETIME> type, instead of relying on

2553 the default ``object``. If you explicitly set the value to

2554 ``None``, the data type will be ``object``. BigQuery Range type

2555 can be found at:

2556 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type

2557

2558 .. versionadded:: 3.21.0

2559

2560 range_timestamp_dtype (Optional[pandas.Series.dtype, None]):

2561 If set, indicate a pandas ExtensionDtype, such as:

2562

2563 .. code-block:: python

2564

2565 pandas.ArrowDtype(pyarrow.struct(

2566 [

2567 ("start", pyarrow.timestamp("us", tz="UTC")),

2568 ("end", pyarrow.timestamp("us", tz="UTC")),

2569 ]

2570 ))

2571

2572 to convert BigQuery RANGE<TIMESTAMP> type, instead of relying

2573 on the default ``object``. If you explicitly set the value to

2574 ``None``, the data type will be ``object``. BigQuery Range type

2575 can be found at:

2576 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type

2577

2578 .. versionadded:: 3.21.0

2579

2580 Returns:

2581 pandas.DataFrame:

2582 A :class:`~pandas.DataFrame` populated with row data and column

2583 headers from the query results. The column headers are derived

2584 from the destination table's schema.

2585

2586 Raises:

2587 ValueError:

2588 If the :mod:`pandas` library cannot be imported, or

2589 the :mod:`google.cloud.bigquery_storage_v1` module is

2590 required but cannot be imported. Also if

2591 `geography_as_object` is `True`, but the

2592 :mod:`shapely` library cannot be imported. Also if

2593 `bool_dtype`, `int_dtype` or other dtype parameters

2594 is not supported dtype.

2595

2596 """

2597 _pandas_helpers.verify_pandas_imports()

2598

2599 if geography_as_object and shapely is None:

2600 raise ValueError(_NO_SHAPELY_ERROR)

2601

2602 if bool_dtype is DefaultPandasDTypes.BOOL_DTYPE:

2603 bool_dtype = pandas.BooleanDtype()

2604

2605 if int_dtype is DefaultPandasDTypes.INT_DTYPE:

2606 int_dtype = pandas.Int64Dtype()

2607

2608 if time_dtype is DefaultPandasDTypes.TIME_DTYPE:

2609 time_dtype = db_dtypes.TimeDtype()

2610

2611 if range_date_dtype is DefaultPandasDTypes.RANGE_DATE_DTYPE:

2612 if _versions_helpers.SUPPORTS_RANGE_PYARROW:

2613 range_date_dtype = pandas.ArrowDtype(

2614 pyarrow.struct(

2615 [("start", pyarrow.date32()), ("end", pyarrow.date32())]

2616 )

2617 )

2618 else:

2619 warnings.warn(_RANGE_PYARROW_WARNING)

2620 range_date_dtype = None

2621

2622 if range_datetime_dtype is DefaultPandasDTypes.RANGE_DATETIME_DTYPE:

2623 if _versions_helpers.SUPPORTS_RANGE_PYARROW:

2624 range_datetime_dtype = pandas.ArrowDtype(

2625 pyarrow.struct(

2626 [

2627 ("start", pyarrow.timestamp("us")),

2628 ("end", pyarrow.timestamp("us")),

2629 ]

2630 )

2631 )

2632 else:

2633 warnings.warn(_RANGE_PYARROW_WARNING)

2634 range_datetime_dtype = None

2635

2636 if range_timestamp_dtype is DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE:

2637 if _versions_helpers.SUPPORTS_RANGE_PYARROW:

2638 range_timestamp_dtype = pandas.ArrowDtype(

2639 pyarrow.struct(

2640 [

2641 ("start", pyarrow.timestamp("us", tz="UTC")),

2642 ("end", pyarrow.timestamp("us", tz="UTC")),

2643 ]

2644 )

2645 )

2646 else:

2647 warnings.warn(_RANGE_PYARROW_WARNING)

2648 range_timestamp_dtype = None

2649

2650 if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"):

2651 raise ValueError("bool_dtype", _NO_SUPPORTED_DTYPE)

2652

2653 if int_dtype is not None and not hasattr(int_dtype, "__from_arrow__"):

2654 raise ValueError("int_dtype", _NO_SUPPORTED_DTYPE)

2655

2656 if float_dtype is not None and not hasattr(float_dtype, "__from_arrow__"):

2657 raise ValueError("float_dtype", _NO_SUPPORTED_DTYPE)

2658

2659 if string_dtype is not None and not hasattr(string_dtype, "__from_arrow__"):

2660 raise ValueError("string_dtype", _NO_SUPPORTED_DTYPE)

2661

2662 if (

2663 date_dtype is not None

2664 and date_dtype is not DefaultPandasDTypes.DATE_DTYPE

2665 and not hasattr(date_dtype, "__from_arrow__")

2666 ):

2667 raise ValueError("date_dtype", _NO_SUPPORTED_DTYPE)

2668

2669 if datetime_dtype is not None and not hasattr(datetime_dtype, "__from_arrow__"):

2670 raise ValueError("datetime_dtype", _NO_SUPPORTED_DTYPE)

2671

2672 if time_dtype is not None and not hasattr(time_dtype, "__from_arrow__"):

2673 raise ValueError("time_dtype", _NO_SUPPORTED_DTYPE)

2674

2675 if timestamp_dtype is not None and not hasattr(

2676 timestamp_dtype, "__from_arrow__"

2677 ):

2678 raise ValueError("timestamp_dtype", _NO_SUPPORTED_DTYPE)

2679

2680 if dtypes is None:

2681 dtypes = {}

2682

2683 self._maybe_warn_max_results(bqstorage_client)

2684

2685 if not self._should_use_bqstorage(bqstorage_client, create_bqstorage_client):

2686 create_bqstorage_client = False

2687 bqstorage_client = None

2688

2689 record_batch = self.to_arrow(

2690 progress_bar_type=progress_bar_type,

2691 bqstorage_client=bqstorage_client,

2692 create_bqstorage_client=create_bqstorage_client,

2693 )

2694

2695 # Default date dtype is `db_dtypes.DateDtype()` that could cause out of bounds error,

2696 # when pyarrow converts date values to nanosecond precision. To avoid the error, we

2697 # set the date_as_object parameter to True, if necessary.

2698 date_as_object = False

2699 if date_dtype is DefaultPandasDTypes.DATE_DTYPE:

2700 date_dtype = db_dtypes.DateDtype()

2701 date_as_object = not all(

2702 self.__can_cast_timestamp_ns(col)

2703 for col in record_batch

2704 # Type can be date32 or date64 (plus units).

2705 # See: https://arrow.apache.org/docs/python/api/datatypes.html

2706 if pyarrow.types.is_date(col.type)

2707 )

2708

2709 timestamp_as_object = False

2710 if datetime_dtype is None and timestamp_dtype is None:

2711 timestamp_as_object = not all(

2712 self.__can_cast_timestamp_ns(col)

2713 for col in record_batch

2714 # Type can be datetime and timestamp (plus units and time zone).

2715 # See: https://arrow.apache.org/docs/python/api/datatypes.html

2716 if pyarrow.types.is_timestamp(col.type)

2717 )

2718

2719 df = record_batch.to_pandas(

2720 date_as_object=date_as_object,

2721 timestamp_as_object=timestamp_as_object,

2722 integer_object_nulls=True,

2723 types_mapper=_pandas_helpers.default_types_mapper(

2724 date_as_object=date_as_object,

2725 bool_dtype=bool_dtype,

2726 int_dtype=int_dtype,

2727 float_dtype=float_dtype,

2728 string_dtype=string_dtype,

2729 date_dtype=date_dtype,

2730 datetime_dtype=datetime_dtype,

2731 time_dtype=time_dtype,

2732 timestamp_dtype=timestamp_dtype,

2733 range_date_dtype=range_date_dtype,

2734 range_datetime_dtype=range_datetime_dtype,

2735 range_timestamp_dtype=range_timestamp_dtype,

2736 ),

2737 )

2738

2739 for column in dtypes:

2740 df[column] = pandas.Series(df[column], dtype=dtypes[column], copy=False)

2741

2742 if geography_as_object:

2743 for field in self.schema:

2744 if field.field_type.upper() == "GEOGRAPHY" and field.mode != "REPEATED":

2745 df[field.name] = df[field.name].dropna().apply(_read_wkt)

2746

2747 return df

2748

2749 @staticmethod

2750 def __can_cast_timestamp_ns(column):

2751 try:

2752 column.cast("timestamp[ns]")

2753 except pyarrow.lib.ArrowInvalid:

2754 return False

2755 else:

2756 return True

2757

2758 # If changing the signature of this method, make sure to apply the same

2759 # changes to job.QueryJob.to_geodataframe()

2760 def to_geodataframe(

2761 self,

2762 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

2763 dtypes: Optional[Dict[str, Any]] = None,

2764 progress_bar_type: Optional[str] = None,

2765 create_bqstorage_client: bool = True,

2766 geography_column: Optional[str] = None,

2767 bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE,

2768 int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE,

2769 float_dtype: Union[Any, None] = None,

2770 string_dtype: Union[Any, None] = None,

2771 ) -> "geopandas.GeoDataFrame":

2772 """Create a GeoPandas GeoDataFrame by loading all pages of a query.

2773

2774 Args:

2775 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):

2776 A BigQuery Storage API client. If supplied, use the faster

2777 BigQuery Storage API to fetch rows from BigQuery.

2778

2779 This method requires the ``pyarrow`` and

2780 ``google-cloud-bigquery-storage`` libraries.

2781

2782 This method only exposes a subset of the capabilities of the

2783 BigQuery Storage API. For full access to all features

2784 (projections, filters, snapshots) use the Storage API directly.

2785

2786 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):

2787 A dictionary of column names pandas ``dtype``s. The provided

2788 ``dtype`` is used when constructing the series for the column

2789 specified. Otherwise, the default pandas behavior is used.

2790 progress_bar_type (Optional[str]):

2791 If set, use the `tqdm <https://tqdm.github.io/>`_ library to

2792 display a progress bar while the data downloads. Install the

2793 ``tqdm`` package to use this feature.

2794

2795 Possible values of ``progress_bar_type`` include:

2796

2797 ``None``

2798 No progress bar.

2799 ``'tqdm'``

2800 Use the :func:`tqdm.tqdm` function to print a progress bar

2801 to :data:`sys.stdout`.

2802 ``'tqdm_notebook'``

2803 Use the :func:`tqdm.notebook.tqdm` function to display a

2804 progress bar as a Jupyter notebook widget.

2805 ``'tqdm_gui'``

2806 Use the :func:`tqdm.tqdm_gui` function to display a

2807 progress bar as a graphical dialog box.

2808

2809 create_bqstorage_client (Optional[bool]):

2810 If ``True`` (default), create a BigQuery Storage API client

2811 using the default API settings. The BigQuery Storage API

2812 is a faster way to fetch rows from BigQuery. See the

2813 ``bqstorage_client`` parameter for more information.

2814

2815 This argument does nothing if ``bqstorage_client`` is supplied.

2816

2817 geography_column (Optional[str]):

2818 If there are more than one GEOGRAPHY column,

2819 identifies which one to use to construct a geopandas

2820 GeoDataFrame. This option can be ommitted if there's

2821 only one GEOGRAPHY column.

2822 bool_dtype (Optional[pandas.Series.dtype, None]):

2823 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``)

2824 to convert BigQuery Boolean type, instead of relying on the default

2825 ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``,

2826 then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean

2827 type can be found at:

2828 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type

2829 int_dtype (Optional[pandas.Series.dtype, None]):

2830 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``)

2831 to convert BigQuery Integer types, instead of relying on the default

2832 ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``,

2833 then the data type will be ``numpy.dtype("int64")``. A list of BigQuery

2834 Integer types can be found at:

2835 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types

2836 float_dtype (Optional[pandas.Series.dtype, None]):

2837 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``)

2838 to convert BigQuery Float type, instead of relying on the default

2839 ``numpy.dtype("float64")``. If you explicitly set the value to ``None``,

2840 then the data type will be ``numpy.dtype("float64")``. BigQuery Float

2841 type can be found at:

2842 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types

2843 string_dtype (Optional[pandas.Series.dtype, None]):

2844 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to

2845 convert BigQuery String type, instead of relying on the default

2846 ``numpy.dtype("object")``. If you explicitly set the value to ``None``,

2847 then the data type will be ``numpy.dtype("object")``. BigQuery String

2848 type can be found at:

2849 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type

2850

2851 Returns:

2852 geopandas.GeoDataFrame:

2853 A :class:`geopandas.GeoDataFrame` populated with row

2854 data and column headers from the query results. The

2855 column headers are derived from the destination

2856 table's schema.

2857

2858 Raises:

2859 ValueError:

2860 If the :mod:`geopandas` library cannot be imported, or the

2861 :mod:`google.cloud.bigquery_storage_v1` module is

2862 required but cannot be imported.

2863

2864 .. versionadded:: 2.24.0

2865 """

2866 if geopandas is None:

2867 raise ValueError(_NO_GEOPANDAS_ERROR)

2868

2869 geography_columns = set(

2870 field.name

2871 for field in self.schema

2872 if field.field_type.upper() == "GEOGRAPHY"

2873 )

2874 if not geography_columns:

2875 raise TypeError(

2876 "There must be at least one GEOGRAPHY column"

2877 " to create a GeoDataFrame"

2878 )

2879

2880 if geography_column:

2881 if geography_column not in geography_columns:

2882 raise ValueError(

2883 f"The given geography column, {geography_column}, doesn't name"

2884 f" a GEOGRAPHY column in the result."

2885 )

2886 elif len(geography_columns) == 1:

2887 [geography_column] = geography_columns

2888 else:

2889 raise ValueError(

2890 "There is more than one GEOGRAPHY column in the result. "

2891 "The geography_column argument must be used to specify which "

2892 "one to use to create a GeoDataFrame"

2893 )

2894

2895 df = self.to_dataframe(

2896 bqstorage_client,

2897 dtypes,

2898 progress_bar_type,

2899 create_bqstorage_client,

2900 geography_as_object=True,

2901 bool_dtype=bool_dtype,

2902 int_dtype=int_dtype,

2903 float_dtype=float_dtype,

2904 string_dtype=string_dtype,

2905 )

2906

2907 return geopandas.GeoDataFrame(

2908 df, crs=_COORDINATE_REFERENCE_SYSTEM, geometry=geography_column

2909 )

2910

2911

2912class _EmptyRowIterator(RowIterator):

2913 """An empty row iterator.

2914

2915 This class prevents API requests when there are no rows to fetch or rows

2916 are impossible to fetch, such as with query results for DDL CREATE VIEW

2917 statements.

2918 """

2919

2920 pages = ()

2921 total_rows = 0

2922

2923 def __init__(

2924 self, client=None, api_request=None, path=None, schema=(), *args, **kwargs

2925 ):

2926 super().__init__(

2927 client=client,

2928 api_request=api_request,

2929 path=path,

2930 schema=schema,

2931 *args,

2932 **kwargs,

2933 )

2934

2935 def to_arrow(

2936 self,

2937 progress_bar_type=None,

2938 bqstorage_client=None,

2939 create_bqstorage_client=True,

2940 ) -> "pyarrow.Table":

2941 """[Beta] Create an empty class:`pyarrow.Table`.

2942

2943 Args:

2944 progress_bar_type (str): Ignored. Added for compatibility with RowIterator.

2945 bqstorage_client (Any): Ignored. Added for compatibility with RowIterator.

2946 create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.

2947

2948 Returns:

2949 pyarrow.Table: An empty :class:`pyarrow.Table`.

2950 """

2951 if pyarrow is None:

2952 raise ValueError(_NO_PYARROW_ERROR)

2953 return pyarrow.Table.from_arrays(())

2954

2955 def to_dataframe(

2956 self,

2957 bqstorage_client=None,

2958 dtypes=None,

2959 progress_bar_type=None,

2960 create_bqstorage_client=True,

2961 geography_as_object=False,

2962 bool_dtype=None,

2963 int_dtype=None,

2964 float_dtype=None,

2965 string_dtype=None,

2966 date_dtype=None,

2967 datetime_dtype=None,

2968 time_dtype=None,

2969 timestamp_dtype=None,

2970 range_date_dtype=None,

2971 range_datetime_dtype=None,

2972 range_timestamp_dtype=None,

2973 ) -> "pandas.DataFrame":

2974 """Create an empty dataframe.

2975

2976 Args:

2977 bqstorage_client (Any): Ignored. Added for compatibility with RowIterator.

2978 dtypes (Any): Ignored. Added for compatibility with RowIterator.

2979 progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.

2980 create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.

2981 geography_as_object (bool): Ignored. Added for compatibility with RowIterator.

2982 bool_dtype (Any): Ignored. Added for compatibility with RowIterator.

2983 int_dtype (Any): Ignored. Added for compatibility with RowIterator.

2984 float_dtype (Any): Ignored. Added for compatibility with RowIterator.

2985 string_dtype (Any): Ignored. Added for compatibility with RowIterator.

2986 date_dtype (Any): Ignored. Added for compatibility with RowIterator.

2987 datetime_dtype (Any): Ignored. Added for compatibility with RowIterator.

2988 time_dtype (Any): Ignored. Added for compatibility with RowIterator.

2989 timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator.

2990 range_date_dtype (Any): Ignored. Added for compatibility with RowIterator.

2991 range_datetime_dtype (Any): Ignored. Added for compatibility with RowIterator.

2992 range_timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator.

2993

2994 Returns:

2995 pandas.DataFrame: An empty :class:`~pandas.DataFrame`.

2996 """

2997 _pandas_helpers.verify_pandas_imports()

2998 return pandas.DataFrame()

2999

3000 def to_geodataframe(

3001 self,

3002 bqstorage_client=None,

3003 dtypes=None,

3004 progress_bar_type=None,

3005 create_bqstorage_client=True,

3006 geography_column: Optional[str] = None,

3007 bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE,

3008 int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE,

3009 float_dtype: Union[Any, None] = None,

3010 string_dtype: Union[Any, None] = None,

3011 ) -> "pandas.DataFrame":

3012 """Create an empty dataframe.

3013

3014 Args:

3015 bqstorage_client (Any): Ignored. Added for compatibility with RowIterator.

3016 dtypes (Any): Ignored. Added for compatibility with RowIterator.

3017 progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.

3018 create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.

3019 geography_column (str): Ignored. Added for compatibility with RowIterator.

3020 bool_dtype (Any): Ignored. Added for compatibility with RowIterator.

3021 int_dtype (Any): Ignored. Added for compatibility with RowIterator.

3022 float_dtype (Any): Ignored. Added for compatibility with RowIterator.

3023 string_dtype (Any): Ignored. Added for compatibility with RowIterator.

3024

3025 Returns:

3026 pandas.DataFrame: An empty :class:`~pandas.DataFrame`.

3027 """

3028 if geopandas is None:

3029 raise ValueError(_NO_GEOPANDAS_ERROR)

3030

3031 # Since an empty GeoDataFrame has no geometry column, we do not CRS on it,

3032 # because that's deprecated.

3033 return geopandas.GeoDataFrame()

3034

3035 def to_dataframe_iterable(

3036 self,

3037 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

3038 dtypes: Optional[Dict[str, Any]] = None,

3039 max_queue_size: Optional[int] = None,

3040 max_stream_count: Optional[int] = None,

3041 ) -> Iterator["pandas.DataFrame"]:

3042 """Create an iterable of pandas DataFrames, to process the table as a stream.

3043

3044 .. versionadded:: 2.21.0

3045

3046 Args:

3047 bqstorage_client:

3048 Ignored. Added for compatibility with RowIterator.

3049

3050 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):

3051 Ignored. Added for compatibility with RowIterator.

3052

3053 max_queue_size:

3054 Ignored. Added for compatibility with RowIterator.

3055

3056 max_stream_count:

3057 Ignored. Added for compatibility with RowIterator.

3058

3059 Returns:

3060 An iterator yielding a single empty :class:`~pandas.DataFrame`.

3061

3062 Raises:

3063 ValueError:

3064 If the :mod:`pandas` library cannot be imported.

3065 """

3066 _pandas_helpers.verify_pandas_imports()

3067 return iter((pandas.DataFrame(),))

3068

3069 def to_arrow_iterable(

3070 self,

3071 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

3072 max_queue_size: Optional[int] = None,

3073 max_stream_count: Optional[int] = None,

3074 ) -> Iterator["pyarrow.RecordBatch"]:

3075 """Create an iterable of pandas DataFrames, to process the table as a stream.

3076

3077 .. versionadded:: 2.31.0

3078

3079 Args:

3080 bqstorage_client:

3081 Ignored. Added for compatibility with RowIterator.

3082

3083 max_queue_size:

3084 Ignored. Added for compatibility with RowIterator.

3085

3086 max_stream_count:

3087 Ignored. Added for compatibility with RowIterator.

3088

3089 Returns:

3090 An iterator yielding a single empty :class:`~pyarrow.RecordBatch`.

3091 """

3092 return iter((pyarrow.record_batch([]),))

3093

3094 def __iter__(self):

3095 return iter(())

3096

3097

3098class PartitionRange(object):

3099 """Definition of the ranges for range partitioning.

3100

3101 .. note::

3102 **Beta**. The integer range partitioning feature is in a pre-release

3103 state and might change or have limited support.

3104

3105 Args:

3106 start (Optional[int]):

3107 Sets the

3108 :attr:`~google.cloud.bigquery.table.PartitionRange.start`

3109 property.

3110 end (Optional[int]):

3111 Sets the

3112 :attr:`~google.cloud.bigquery.table.PartitionRange.end`

3113 property.

3114 interval (Optional[int]):

3115 Sets the

3116 :attr:`~google.cloud.bigquery.table.PartitionRange.interval`

3117 property.

3118 _properties (Optional[dict]):

3119 Private. Used to construct object from API resource.

3120 """

3121

3122 def __init__(self, start=None, end=None, interval=None, _properties=None) -> None:

3123 if _properties is None:

3124 _properties = {}

3125 self._properties = _properties

3126

3127 if start is not None:

3128 self.start = start

3129 if end is not None:

3130 self.end = end

3131 if interval is not None:

3132 self.interval = interval

3133

3134 @property

3135 def start(self):

3136 """int: The start of range partitioning, inclusive."""

3137 return _helpers._int_or_none(self._properties.get("start"))

3138

3139 @start.setter

3140 def start(self, value):

3141 self._properties["start"] = _helpers._str_or_none(value)

3142

3143 @property

3144 def end(self):

3145 """int: The end of range partitioning, exclusive."""

3146 return _helpers._int_or_none(self._properties.get("end"))

3147

3148 @end.setter

3149 def end(self, value):

3150 self._properties["end"] = _helpers._str_or_none(value)

3151

3152 @property

3153 def interval(self):

3154 """int: The width of each interval."""

3155 return _helpers._int_or_none(self._properties.get("interval"))

3156

3157 @interval.setter

3158 def interval(self, value):

3159 self._properties["interval"] = _helpers._str_or_none(value)

3160

3161 def _key(self):

3162 return tuple(sorted(self._properties.items()))

3163

3164 def __eq__(self, other):

3165 if not isinstance(other, PartitionRange):

3166 return NotImplemented

3167 return self._key() == other._key()

3168

3169 def __ne__(self, other):

3170 return not self == other

3171

3172 def __repr__(self):

3173 key_vals = ["{}={}".format(key, val) for key, val in self._key()]

3174 return "PartitionRange({})".format(", ".join(key_vals))

3175

3176

3177class RangePartitioning(object):

3178 """Range-based partitioning configuration for a table.

3179

3180 .. note::

3181 **Beta**. The integer range partitioning feature is in a pre-release

3182 state and might change or have limited support.

3183

3184 Args:

3185 range_ (Optional[google.cloud.bigquery.table.PartitionRange]):

3186 Sets the

3187 :attr:`google.cloud.bigquery.table.RangePartitioning.range_`

3188 property.

3189 field (Optional[str]):

3190 Sets the

3191 :attr:`google.cloud.bigquery.table.RangePartitioning.field`

3192 property.

3193 _properties (Optional[dict]):

3194 Private. Used to construct object from API resource.

3195 """

3196

3197 def __init__(self, range_=None, field=None, _properties=None) -> None:

3198 if _properties is None:

3199 _properties = {}

3200 self._properties: Dict[str, Any] = _properties

3201

3202 if range_ is not None:

3203 self.range_ = range_

3204 if field is not None:

3205 self.field = field

3206

3207 # Trailing underscore to prevent conflict with built-in range() function.

3208 @property

3209 def range_(self):

3210 """google.cloud.bigquery.table.PartitionRange: Defines the

3211 ranges for range partitioning.

3212

3213 Raises:

3214 ValueError:

3215 If the value is not a :class:`PartitionRange`.

3216 """

3217 range_properties = self._properties.setdefault("range", {})

3218 return PartitionRange(_properties=range_properties)

3219

3220 @range_.setter

3221 def range_(self, value):

3222 if not isinstance(value, PartitionRange):

3223 raise ValueError("Expected a PartitionRange, but got {}.".format(value))

3224 self._properties["range"] = value._properties

3225

3226 @property

3227 def field(self):

3228 """str: The table is partitioned by this field.

3229

3230 The field must be a top-level ``NULLABLE`` / ``REQUIRED`` field. The

3231 only supported type is ``INTEGER`` / ``INT64``.

3232 """

3233 return self._properties.get("field")

3234

3235 @field.setter

3236 def field(self, value):

3237 self._properties["field"] = value

3238

3239 def _key(self):

3240 return (("field", self.field), ("range_", self.range_))

3241

3242 def __eq__(self, other):

3243 if not isinstance(other, RangePartitioning):

3244 return NotImplemented

3245 return self._key() == other._key()

3246

3247 def __ne__(self, other):

3248 return not self == other

3249

3250 def __repr__(self):

3251 key_vals = ["{}={}".format(key, repr(val)) for key, val in self._key()]

3252 return "RangePartitioning({})".format(", ".join(key_vals))

3253

3254

3255class TimePartitioningType(object):

3256 """Specifies the type of time partitioning to perform."""

3257

3258 DAY = "DAY"

3259 """str: Generates one partition per day."""

3260

3261 HOUR = "HOUR"

3262 """str: Generates one partition per hour."""

3263

3264 MONTH = "MONTH"

3265 """str: Generates one partition per month."""

3266

3267 YEAR = "YEAR"

3268 """str: Generates one partition per year."""

3269

3270

3271class TimePartitioning(object):

3272 """Configures time-based partitioning for a table.

3273

3274 Args:

3275 type_ (Optional[google.cloud.bigquery.table.TimePartitioningType]):

3276 Specifies the type of time partitioning to perform. Defaults to

3277 :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`.

3278

3279 Supported values are:

3280

3281 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.HOUR`

3282 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`

3283 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.MONTH`

3284 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.YEAR`

3285

3286 field (Optional[str]):

3287 If set, the table is partitioned by this field. If not set, the

3288 table is partitioned by pseudo column ``_PARTITIONTIME``. The field

3289 must be a top-level ``TIMESTAMP``, ``DATETIME``, or ``DATE``

3290 field. Its mode must be ``NULLABLE`` or ``REQUIRED``.

3291

3292 See the `time-unit column-partitioned tables guide

3293 <https://cloud.google.com/bigquery/docs/creating-column-partitions>`_

3294 in the BigQuery documentation.

3295 expiration_ms(Optional[int]):

3296 Number of milliseconds for which to keep the storage for a

3297 partition.

3298 require_partition_filter (Optional[bool]):

3299 DEPRECATED: Use

3300 :attr:`~google.cloud.bigquery.table.Table.require_partition_filter`,

3301 instead.

3302 """

3303

3304 def __init__(

3305 self, type_=None, field=None, expiration_ms=None, require_partition_filter=None

3306 ) -> None:

3307 self._properties: Dict[str, Any] = {}

3308 if type_ is None:

3309 self.type_ = TimePartitioningType.DAY

3310 else:

3311 self.type_ = type_

3312 if field is not None:

3313 self.field = field

3314 if expiration_ms is not None:

3315 self.expiration_ms = expiration_ms

3316 if require_partition_filter is not None:

3317 self.require_partition_filter = require_partition_filter

3318

3319 @property

3320 def type_(self):

3321 """google.cloud.bigquery.table.TimePartitioningType: The type of time

3322 partitioning to use.

3323 """

3324 return self._properties.get("type")

3325

3326 @type_.setter

3327 def type_(self, value):

3328 self._properties["type"] = value

3329

3330 @property

3331 def field(self):

3332 """str: Field in the table to use for partitioning"""

3333 return self._properties.get("field")

3334

3335 @field.setter

3336 def field(self, value):

3337 self._properties["field"] = value

3338

3339 @property

3340 def expiration_ms(self):

3341 """int: Number of milliseconds to keep the storage for a partition."""

3342 return _helpers._int_or_none(self._properties.get("expirationMs"))

3343

3344 @expiration_ms.setter

3345 def expiration_ms(self, value):

3346 if value is not None:

3347 # Allow explicitly setting the expiration to None.

3348 value = str(value)

3349 self._properties["expirationMs"] = value

3350

3351 @property

3352 def require_partition_filter(self):

3353 """bool: Specifies whether partition filters are required for queries

3354

3355 DEPRECATED: Use

3356 :attr:`~google.cloud.bigquery.table.Table.require_partition_filter`,

3357 instead.

3358 """

3359 warnings.warn(

3360 (

3361 "TimePartitioning.require_partition_filter will be removed in "

3362 "future versions. Please use Table.require_partition_filter "

3363 "instead."

3364 ),

3365 PendingDeprecationWarning,

3366 stacklevel=2,

3367 )

3368 return self._properties.get("requirePartitionFilter")

3369

3370 @require_partition_filter.setter

3371 def require_partition_filter(self, value):

3372 warnings.warn(

3373 (

3374 "TimePartitioning.require_partition_filter will be removed in "

3375 "future versions. Please use Table.require_partition_filter "

3376 "instead."

3377 ),

3378 PendingDeprecationWarning,

3379 stacklevel=2,

3380 )

3381 self._properties["requirePartitionFilter"] = value

3382

3383 @classmethod

3384 def from_api_repr(cls, api_repr: dict) -> "TimePartitioning":

3385 """Return a :class:`TimePartitioning` object deserialized from a dict.

3386

3387 This method creates a new ``TimePartitioning`` instance that points to

3388 the ``api_repr`` parameter as its internal properties dict. This means

3389 that when a ``TimePartitioning`` instance is stored as a property of

3390 another object, any changes made at the higher level will also appear

3391 here::

3392

3393 >>> time_partitioning = TimePartitioning()

3394 >>> table.time_partitioning = time_partitioning

3395 >>> table.time_partitioning.field = 'timecolumn'

3396 >>> time_partitioning.field

3397 'timecolumn'

3398

3399 Args:

3400 api_repr (Mapping[str, str]):

3401 The serialized representation of the TimePartitioning, such as

3402 what is output by :meth:`to_api_repr`.

3403

3404 Returns:

3405 google.cloud.bigquery.table.TimePartitioning:

3406 The ``TimePartitioning`` object.

3407 """

3408 instance = cls()

3409 instance._properties = api_repr

3410 return instance

3411

3412 def to_api_repr(self) -> dict:

3413 """Return a dictionary representing this object.

3414

3415 This method returns the properties dict of the ``TimePartitioning``

3416 instance rather than making a copy. This means that when a

3417 ``TimePartitioning`` instance is stored as a property of another

3418 object, any changes made at the higher level will also appear here.

3419

3420 Returns:

3421 dict:

3422 A dictionary representing the TimePartitioning object in

3423 serialized form.

3424 """

3425 return self._properties

3426

3427 def _key(self):

3428 # because we are only "renaming" top level keys shallow copy is sufficient here.

3429 properties = self._properties.copy()

3430 # calling repr for non built-in type objects.

3431 properties["type_"] = repr(properties.pop("type"))

3432 if "field" in properties:

3433 # calling repr for non built-in type objects.

3434 properties["field"] = repr(properties["field"])

3435 if "requirePartitionFilter" in properties:

3436 properties["require_partition_filter"] = properties.pop(

3437 "requirePartitionFilter"

3438 )

3439 if "expirationMs" in properties:

3440 properties["expiration_ms"] = properties.pop("expirationMs")

3441 return tuple(sorted(properties.items()))

3442

3443 def __eq__(self, other):

3444 if not isinstance(other, TimePartitioning):

3445 return NotImplemented

3446 return self._key() == other._key()

3447

3448 def __ne__(self, other):

3449 return not self == other

3450

3451 def __hash__(self):

3452 return hash(self._key())

3453

3454 def __repr__(self):

3455 key_vals = ["{}={}".format(key, val) for key, val in self._key()]

3456 return "TimePartitioning({})".format(",".join(key_vals))

3457

3458

3459class PrimaryKey:

3460 """Represents the primary key constraint on a table's columns.

3461

3462 Args:

3463 columns: The columns that are composed of the primary key constraint.

3464 """

3465

3466 def __init__(self, columns: List[str]):

3467 self.columns = columns

3468

3469 def __eq__(self, other):

3470 if not isinstance(other, PrimaryKey):

3471 raise TypeError("The value provided is not a BigQuery PrimaryKey.")

3472 return self.columns == other.columns

3473

3474

3475class ColumnReference:

3476 """The pair of the foreign key column and primary key column.

3477

3478 Args:

3479 referencing_column: The column that composes the foreign key.

3480 referenced_column: The column in the primary key that are referenced by the referencingColumn.

3481 """

3482

3483 def __init__(self, referencing_column: str, referenced_column: str):

3484 self.referencing_column = referencing_column

3485 self.referenced_column = referenced_column

3486

3487 def __eq__(self, other):

3488 if not isinstance(other, ColumnReference):

3489 raise TypeError("The value provided is not a BigQuery ColumnReference.")

3490 return (

3491 self.referencing_column == other.referencing_column

3492 and self.referenced_column == other.referenced_column

3493 )

3494

3495

3496class ForeignKey:

3497 """Represents a foreign key constraint on a table's columns.

3498

3499 Args:

3500 name: Set only if the foreign key constraint is named.

3501 referenced_table: The table that holds the primary key and is referenced by this foreign key.

3502 column_references: The columns that compose the foreign key.

3503 """

3504

3505 def __init__(

3506 self,

3507 name: str,

3508 referenced_table: TableReference,

3509 column_references: List[ColumnReference],

3510 ):

3511 self.name = name

3512 self.referenced_table = referenced_table

3513 self.column_references = column_references

3514

3515 def __eq__(self, other):

3516 if not isinstance(other, ForeignKey):

3517 raise TypeError("The value provided is not a BigQuery ForeignKey.")

3518 return (

3519 self.name == other.name

3520 and self.referenced_table == other.referenced_table

3521 and self.column_references == other.column_references

3522 )

3523

3524 @classmethod

3525 def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignKey":

3526 """Create an instance from API representation."""

3527 return cls(

3528 name=api_repr["name"],

3529 referenced_table=TableReference.from_api_repr(api_repr["referencedTable"]),

3530 column_references=[

3531 ColumnReference(

3532 column_reference_resource["referencingColumn"],

3533 column_reference_resource["referencedColumn"],

3534 )

3535 for column_reference_resource in api_repr["columnReferences"]

3536 ],

3537 )

3538

3539 def to_api_repr(self) -> Dict[str, Any]:

3540 """Return a dictionary representing this object."""

3541 return {

3542 "name": self.name,

3543 "referencedTable": self.referenced_table.to_api_repr(),

3544 "columnReferences": [

3545 {

3546 "referencingColumn": column_reference.referencing_column,

3547 "referencedColumn": column_reference.referenced_column,

3548 }

3549 for column_reference in self.column_references

3550 ],

3551 }

3552

3553

3554class TableConstraints:

3555 """The TableConstraints defines the primary key and foreign key.

3556

3557 Args:

3558 primary_key:

3559 Represents a primary key constraint on a table's columns. Present only if the table

3560 has a primary key. The primary key is not enforced.

3561 foreign_keys:

3562 Present only if the table has a foreign key. The foreign key is not enforced.

3563

3564 """

3565

3566 def __init__(

3567 self,

3568 primary_key: Optional[PrimaryKey],

3569 foreign_keys: Optional[List[ForeignKey]],

3570 ):

3571 self.primary_key = primary_key

3572 self.foreign_keys = foreign_keys

3573

3574 def __eq__(self, other):

3575 if not isinstance(other, TableConstraints) and other is not None:

3576 raise TypeError("The value provided is not a BigQuery TableConstraints.")

3577 return (

3578 self.primary_key == other.primary_key if other.primary_key else None

3579 ) and (self.foreign_keys == other.foreign_keys if other.foreign_keys else None)

3580

3581 @classmethod

3582 def from_api_repr(cls, resource: Dict[str, Any]) -> "TableConstraints":

3583 """Create an instance from API representation."""

3584 primary_key = None

3585 if "primaryKey" in resource:

3586 primary_key = PrimaryKey(resource["primaryKey"]["columns"])

3587

3588 foreign_keys = None

3589 if "foreignKeys" in resource:

3590 foreign_keys = [

3591 ForeignKey.from_api_repr(foreign_key_resource)

3592 for foreign_key_resource in resource["foreignKeys"]

3593 ]

3594 return cls(primary_key, foreign_keys)

3595

3596 def to_api_repr(self) -> Dict[str, Any]:

3597 """Return a dictionary representing this object."""

3598 resource: Dict[str, Any] = {}

3599 if self.primary_key:

3600 resource["primaryKey"] = {"columns": self.primary_key.columns}

3601 if self.foreign_keys:

3602 resource["foreignKeys"] = [

3603 foreign_key.to_api_repr() for foreign_key in self.foreign_keys

3604 ]

3605 return resource

3606

3607

3608class BigLakeConfiguration(object):

3609 """Configuration for managed tables for Apache Iceberg, formerly

3610 known as BigLake.

3611

3612 Args:

3613 connection_id (Optional[str]):

3614 The connection specifying the credentials to be used to read and write to external

3615 storage, such as Cloud Storage. The connection_id can have the form

3616 ``{project}.{location}.{connection_id}`` or

3617 ``projects/{project}/locations/{location}/connections/{connection_id}``.

3618 storage_uri (Optional[str]):

3619 The fully qualified location prefix of the external folder where table data is

3620 stored. The '*' wildcard character is not allowed. The URI should be in the

3621 format ``gs://bucket/path_to_table/``.

3622 file_format (Optional[str]):

3623 The file format the table data is stored in. See BigLakeFileFormat for available

3624 values.

3625 table_format (Optional[str]):

3626 The table format the metadata only snapshots are stored in. See BigLakeTableFormat

3627 for available values.

3628 _properties (Optional[dict]):

3629 Private. Used to construct object from API resource.

3630 """

3631

3632 def __init__(

3633 self,

3634 connection_id: Optional[str] = None,

3635 storage_uri: Optional[str] = None,

3636 file_format: Optional[str] = None,

3637 table_format: Optional[str] = None,

3638 _properties: Optional[dict] = None,

3639 ) -> None:

3640 if _properties is None:

3641 _properties = {}

3642 self._properties = _properties

3643 if connection_id is not None:

3644 self.connection_id = connection_id

3645 if storage_uri is not None:

3646 self.storage_uri = storage_uri

3647 if file_format is not None:

3648 self.file_format = file_format

3649 if table_format is not None:

3650 self.table_format = table_format

3651

3652 @property

3653 def connection_id(self) -> Optional[str]:

3654 """str: The connection specifying the credentials to be used to read and write to external

3655 storage, such as Cloud Storage."""

3656 return self._properties.get("connectionId")

3657

3658 @connection_id.setter

3659 def connection_id(self, value: Optional[str]):

3660 self._properties["connectionId"] = value

3661

3662 @property

3663 def storage_uri(self) -> Optional[str]:

3664 """str: The fully qualified location prefix of the external folder where table data is

3665 stored."""

3666 return self._properties.get("storageUri")

3667

3668 @storage_uri.setter

3669 def storage_uri(self, value: Optional[str]):

3670 self._properties["storageUri"] = value

3671

3672 @property

3673 def file_format(self) -> Optional[str]:

3674 """str: The file format the table data is stored in. See BigLakeFileFormat for available

3675 values."""

3676 return self._properties.get("fileFormat")

3677

3678 @file_format.setter

3679 def file_format(self, value: Optional[str]):

3680 self._properties["fileFormat"] = value

3681

3682 @property

3683 def table_format(self) -> Optional[str]:

3684 """str: The table format the metadata only snapshots are stored in. See BigLakeTableFormat

3685 for available values."""

3686 return self._properties.get("tableFormat")

3687

3688 @table_format.setter

3689 def table_format(self, value: Optional[str]):

3690 self._properties["tableFormat"] = value

3691

3692 def _key(self):

3693 return tuple(sorted(self._properties.items()))

3694

3695 def __eq__(self, other):

3696 if not isinstance(other, BigLakeConfiguration):

3697 return NotImplemented

3698 return self._key() == other._key()

3699

3700 def __ne__(self, other):

3701 return not self == other

3702

3703 def __hash__(self):

3704 return hash(self._key())

3705

3706 def __repr__(self):

3707 key_vals = ["{}={}".format(key, val) for key, val in self._key()]

3708 return "BigLakeConfiguration({})".format(",".join(key_vals))

3709

3710 @classmethod

3711 def from_api_repr(cls, resource: Dict[str, Any]) -> "BigLakeConfiguration":

3712 """Factory: construct a BigLakeConfiguration given its API representation.

3713

3714 Args:

3715 resource:

3716 BigLakeConfiguration representation returned from the API

3717

3718 Returns:

3719 BigLakeConfiguration parsed from ``resource``.

3720 """

3721 ref = cls()

3722 ref._properties = resource

3723 return ref

3724

3725 def to_api_repr(self) -> Dict[str, Any]:

3726 """Construct the API resource representation of this BigLakeConfiguration.

3727

3728 Returns:

3729 BigLakeConfiguration represented as an API resource.

3730 """

3731 return copy.deepcopy(self._properties)

3732

3733

3734def _item_to_row(iterator, resource):

3735 """Convert a JSON row to the native object.

3736

3737 .. note::

3738

3739 This assumes that the ``schema`` attribute has been

3740 added to the iterator after being created, which

3741 should be done by the caller.

3742

3743 Args:

3744 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use.

3745 resource (Dict): An item to be converted to a row.

3746

3747 Returns:

3748 google.cloud.bigquery.table.Row: The next row in the page.

3749 """

3750 return Row(

3751 _helpers._row_tuple_from_json(resource, iterator.schema),

3752 iterator._field_to_index,

3753 )

3754

3755

3756def _row_iterator_page_columns(schema, response):

3757 """Make a generator of all the columns in a page from tabledata.list.

3758

3759 This enables creating a :class:`pandas.DataFrame` and other

3760 column-oriented data structures such as :class:`pyarrow.RecordBatch`

3761 """

3762 columns = []

3763 rows = response.get("rows", [])

3764

3765 def get_column_data(field_index, field):

3766 for row in rows:

3767 yield _helpers.DATA_FRAME_CELL_DATA_PARSER.to_py(

3768 row["f"][field_index]["v"], field

3769 )

3770

3771 for field_index, field in enumerate(schema):

3772 columns.append(get_column_data(field_index, field))

3773

3774 return columns

3775

3776

3777# pylint: disable=unused-argument

3778def _rows_page_start(iterator, page, response):

3779 """Grab total rows when :class:`~google.cloud.iterator.Page` starts.

3780

3781 Args:

3782 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use.

3783 page (google.api_core.page_iterator.Page): The page that was just created.

3784 response (Dict): The JSON API response for a page of rows in a table.

3785 """

3786 # Make a (lazy) copy of the page in column-oriented format for use in data

3787 # science packages.

3788 page._columns = _row_iterator_page_columns(iterator._schema, response)

3789

3790 total_rows = response.get("totalRows")

3791 # Don't reset total_rows if it's not present in the next API response.

3792 if total_rows is not None:

3793 iterator._total_rows = int(total_rows)

3794

3795

3796# pylint: enable=unused-argument

3797

3798

3799def _table_arg_to_table_ref(value, default_project=None) -> TableReference:

3800 """Helper to convert a string or Table to TableReference.

3801

3802 This function keeps TableReference and other kinds of objects unchanged.

3803 """

3804 if isinstance(value, str):

3805 value = TableReference.from_string(value, default_project=default_project)

3806 if isinstance(value, (Table, TableListItem)):

3807 value = value.reference

3808 return value

3809

3810

3811def _table_arg_to_table(value, default_project=None) -> Table:

3812 """Helper to convert a string or TableReference to a Table.

3813

3814 This function keeps Table and other kinds of objects unchanged.

3815 """

3816 if isinstance(value, str):

3817 value = TableReference.from_string(value, default_project=default_project)

3818 if isinstance(value, TableReference):

3819 value = Table(value)

3820 if isinstance(value, TableListItem):

3821 newvalue = Table(value.reference)

3822 newvalue._properties = value._properties

3823 value = newvalue

3824

3825 return value