Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/cloud/bigquery/table.py: 37%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

15"""Define API Tables."""

17from __future__ import absolute_import

19import copy

20import datetime

21import functools

22import operator

23import typing

24from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union

25import warnings

27try:

28 import pandas # type: ignore

29except ImportError: # pragma: NO COVER

30 pandas = None

32try:

33 import pyarrow # type: ignore

34except ImportError: # pragma: NO COVER

35 pyarrow = None

37try:

38 import db_dtypes # type: ignore

39except ImportError: # pragma: NO COVER

40 db_dtypes = None

42try:

43 import geopandas # type: ignore

44except ImportError:

45 geopandas = None

46else:

47 _COORDINATE_REFERENCE_SYSTEM = "EPSG:4326"

49try:

50 import shapely # type: ignore

51 from shapely import wkt # type: ignore

52except ImportError:

53 shapely = None

54else:

55 _read_wkt = wkt.loads

57import google.api_core.exceptions

58from google.api_core.page_iterator import HTTPIterator

60import google.cloud._helpers # type: ignore

61from google.cloud.bigquery import _helpers

62from google.cloud.bigquery import _pandas_helpers

63from google.cloud.bigquery.enums import DefaultPandasDTypes

64from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError

65from google.cloud.bigquery.schema import _build_schema_resource

66from google.cloud.bigquery.schema import _parse_schema_resource

67from google.cloud.bigquery.schema import _to_schema_fields

68from google.cloud.bigquery._tqdm_helpers import get_progress_bar

69from google.cloud.bigquery.external_config import ExternalConfig

70from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration

72if typing.TYPE_CHECKING: # pragma: NO COVER

73 # Unconditionally import optional dependencies again to tell pytype that

74 # they are not None, avoiding false "no attribute" errors.

75 import pandas

76 import pyarrow

77 import geopandas # type: ignore

78 from google.cloud import bigquery_storage # type: ignore

79 from google.cloud.bigquery.dataset import DatasetReference

82_NO_GEOPANDAS_ERROR = (

83 "The geopandas library is not installed, please install "

84 "geopandas to use the to_geodataframe() function."

85)

86_NO_PYARROW_ERROR = (

87 "The pyarrow library is not installed, please install "

88 "pyarrow to use the to_arrow() function."

89)

90_NO_SHAPELY_ERROR = (

91 "The shapely library is not installed, please install "

92 "shapely to use the geography_as_object option."

93)

95_TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"'

97_NO_SUPPORTED_DTYPE = (

98 "The dtype cannot to be converted to a pandas ExtensionArray "

99 "because the necessary `__from_arrow__` attribute is missing."

100)

101

102

103def _reference_getter(table):

104 """A :class:`~google.cloud.bigquery.table.TableReference` pointing to

105 this table.

106

107 Returns:

108 google.cloud.bigquery.table.TableReference: pointer to this table.

109 """

110 from google.cloud.bigquery import dataset

111

112 dataset_ref = dataset.DatasetReference(table.project, table.dataset_id)

113 return TableReference(dataset_ref, table.table_id)

114

115

116def _view_use_legacy_sql_getter(table):

117 """bool: Specifies whether to execute the view with Legacy or Standard SQL.

118

119 This boolean specifies whether to execute the view with Legacy SQL

120 (:data:`True`) or Standard SQL (:data:`False`). The client side default is

121 :data:`False`. The server-side default is :data:`True`. If this table is

122 not a view, :data:`None` is returned.

123

124 Raises:

125 ValueError: For invalid value types.

126 """

127 view = table._properties.get("view")

128 if view is not None:

129 # The server-side default for useLegacySql is True.

130 return view.get("useLegacySql", True)

131 # In some cases, such as in a table list no view object is present, but the

132 # resource still represents a view. Use the type as a fallback.

133 if table.table_type == "VIEW":

134 # The server-side default for useLegacySql is True.

135 return True

136

137

138class _TableBase:

139 """Base class for Table-related classes with common functionality."""

140

141 _PROPERTY_TO_API_FIELD: Dict[str, Union[str, List[str]]] = {

142 "dataset_id": ["tableReference", "datasetId"],

143 "project": ["tableReference", "projectId"],

144 "table_id": ["tableReference", "tableId"],

145 }

146

147 def __init__(self):

148 self._properties = {}

149

150 @property

151 def project(self) -> str:

152 """Project bound to the table."""

153 return _helpers._get_sub_prop(

154 self._properties, self._PROPERTY_TO_API_FIELD["project"]

155 )

156

157 @property

158 def dataset_id(self) -> str:

159 """ID of dataset containing the table."""

160 return _helpers._get_sub_prop(

161 self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"]

162 )

163

164 @property

165 def table_id(self) -> str:

166 """The table ID."""

167 return _helpers._get_sub_prop(

168 self._properties, self._PROPERTY_TO_API_FIELD["table_id"]

169 )

170

171 @property

172 def path(self) -> str:

173 """URL path for the table's APIs."""

174 return (

175 f"/projects/{self.project}/datasets/{self.dataset_id}"

176 f"/tables/{self.table_id}"

177 )

178

179 def __eq__(self, other):

180 if isinstance(other, _TableBase):

181 return (

182 self.project == other.project

183 and self.dataset_id == other.dataset_id

184 and self.table_id == other.table_id

185 )

186 else:

187 return NotImplemented

188

189 def __hash__(self):

190 return hash((self.project, self.dataset_id, self.table_id))

191

192

193class TableReference(_TableBase):

194 """TableReferences are pointers to tables.

195

196 See

197 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#tablereference

198

199 Args:

200 dataset_ref: A pointer to the dataset

201 table_id: The ID of the table

202 """

203

204 _PROPERTY_TO_API_FIELD = {

205 "dataset_id": "datasetId",

206 "project": "projectId",

207 "table_id": "tableId",

208 }

209

210 def __init__(self, dataset_ref: "DatasetReference", table_id: str):

211 self._properties = {}

212

213 _helpers._set_sub_prop(

214 self._properties,

215 self._PROPERTY_TO_API_FIELD["project"],

216 dataset_ref.project,

217 )

218 _helpers._set_sub_prop(

219 self._properties,

220 self._PROPERTY_TO_API_FIELD["dataset_id"],

221 dataset_ref.dataset_id,

222 )

223 _helpers._set_sub_prop(

224 self._properties,

225 self._PROPERTY_TO_API_FIELD["table_id"],

226 table_id,

227 )

228

229 @classmethod

230 def from_string(

231 cls, table_id: str, default_project: str = None

232 ) -> "TableReference":

233 """Construct a table reference from table ID string.

234

235 Args:

236 table_id (str):

237 A table ID in standard SQL format. If ``default_project``

238 is not specified, this must included a project ID, dataset

239 ID, and table ID, each separated by ``.``.

240 default_project (Optional[str]):

241 The project ID to use when ``table_id`` does not

242 include a project ID.

243

244 Returns:

245 TableReference: Table reference parsed from ``table_id``.

246

247 Examples:

248 >>> TableReference.from_string('my-project.mydataset.mytable')

249 TableRef...(DatasetRef...('my-project', 'mydataset'), 'mytable')

250

251 Raises:

252 ValueError:

253 If ``table_id`` is not a fully-qualified table ID in

254 standard SQL format.

255 """

256 from google.cloud.bigquery.dataset import DatasetReference

257

258 (

259 output_project_id,

260 output_dataset_id,

261 output_table_id,

262 ) = _helpers._parse_3_part_id(

263 table_id, default_project=default_project, property_name="table_id"

264 )

265

266 return cls(

267 DatasetReference(output_project_id, output_dataset_id), output_table_id

268 )

269

270 @classmethod

271 def from_api_repr(cls, resource: dict) -> "TableReference":

272 """Factory: construct a table reference given its API representation

273

274 Args:

275 resource (Dict[str, object]):

276 Table reference representation returned from the API

277

278 Returns:

279 google.cloud.bigquery.table.TableReference:

280 Table reference parsed from ``resource``.

281 """

282 from google.cloud.bigquery.dataset import DatasetReference

283

284 project = resource["projectId"]

285 dataset_id = resource["datasetId"]

286 table_id = resource["tableId"]

287

288 return cls(DatasetReference(project, dataset_id), table_id)

289

290 def to_api_repr(self) -> dict:

291 """Construct the API resource representation of this table reference.

292

293 Returns:

294 Dict[str, object]: Table reference represented as an API resource

295 """

296 return copy.deepcopy(self._properties)

297

298 def to_bqstorage(self) -> str:

299 """Construct a BigQuery Storage API representation of this table.

300

301 Install the ``google-cloud-bigquery-storage`` package to use this

302 feature.

303

304 If the ``table_id`` contains a partition identifier (e.g.

305 ``my_table$201812``) or a snapshot identifier (e.g.

306 ``mytable@1234567890``), it is ignored. Use

307 :class:`google.cloud.bigquery_storage.types.ReadSession.TableReadOptions`

308 to filter rows by partition. Use

309 :class:`google.cloud.bigquery_storage.types.ReadSession.TableModifiers`

310 to select a specific snapshot to read from.

311

312 Returns:

313 str: A reference to this table in the BigQuery Storage API.

314 """

315

316 table_id, _, _ = self.table_id.partition("@")

317 table_id, _, _ = table_id.partition("$")

318

319 table_ref = (

320 f"projects/{self.project}/datasets/{self.dataset_id}/tables/{table_id}"

321 )

322 return table_ref

323

324 def __str__(self):

325 return f"{self.project}.{self.dataset_id}.{self.table_id}"

326

327 def __repr__(self):

328 from google.cloud.bigquery.dataset import DatasetReference

329

330 dataset_ref = DatasetReference(self.project, self.dataset_id)

331 return f"TableReference({dataset_ref!r}, '{self.table_id}')"

332

333

334class Table(_TableBase):

335 """Tables represent a set of rows whose values correspond to a schema.

336

337 See

338 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource-table

339

340 Args:

341 table_ref (Union[google.cloud.bigquery.table.TableReference, str]):

342 A pointer to a table. If ``table_ref`` is a string, it must

343 included a project ID, dataset ID, and table ID, each separated

344 by ``.``.

345 schema (Optional[Sequence[Union[ \

346 :class:`~google.cloud.bigquery.schema.SchemaField`, \

347 Mapping[str, Any] \

348 ]]]):

349 The table's schema. If any item is a mapping, its content must be

350 compatible with

351 :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.

352 """

353

354 _PROPERTY_TO_API_FIELD = {

355 **_TableBase._PROPERTY_TO_API_FIELD,

356 "clustering_fields": "clustering",

357 "created": "creationTime",

358 "description": "description",

359 "encryption_configuration": "encryptionConfiguration",

360 "etag": "etag",

361 "expires": "expirationTime",

362 "external_data_configuration": "externalDataConfiguration",

363 "friendly_name": "friendlyName",

364 "full_table_id": "id",

365 "labels": "labels",

366 "location": "location",

367 "modified": "lastModifiedTime",

368 "mview_enable_refresh": "materializedView",

369 "mview_last_refresh_time": ["materializedView", "lastRefreshTime"],

370 "mview_query": "materializedView",

371 "mview_refresh_interval": "materializedView",

372 "num_bytes": "numBytes",

373 "num_rows": "numRows",

374 "partition_expiration": "timePartitioning",

375 "partitioning_type": "timePartitioning",

376 "range_partitioning": "rangePartitioning",

377 "time_partitioning": "timePartitioning",

378 "schema": "schema",

379 "snapshot_definition": "snapshotDefinition",

380 "clone_definition": "cloneDefinition",

381 "streaming_buffer": "streamingBuffer",

382 "self_link": "selfLink",

383 "time_partitioning": "timePartitioning",

384 "type": "type",

385 "view_use_legacy_sql": "view",

386 "view_query": "view",

387 "require_partition_filter": "requirePartitionFilter",

388 }

389

390 def __init__(self, table_ref, schema=None) -> None:

391 table_ref = _table_arg_to_table_ref(table_ref)

392 self._properties = {"tableReference": table_ref.to_api_repr(), "labels": {}}

393 # Let the @property do validation.

394 if schema is not None:

395 self.schema = schema

396

397 reference = property(_reference_getter)

398

399 @property

400 def require_partition_filter(self):

401 """bool: If set to true, queries over the partitioned table require a

402 partition filter that can be used for partition elimination to be

403 specified.

404 """

405 return self._properties.get(

406 self._PROPERTY_TO_API_FIELD["require_partition_filter"]

407 )

408

409 @require_partition_filter.setter

410 def require_partition_filter(self, value):

411 self._properties[

412 self._PROPERTY_TO_API_FIELD["require_partition_filter"]

413 ] = value

414

415 @property

416 def schema(self):

417 """Sequence[Union[ \

418 :class:`~google.cloud.bigquery.schema.SchemaField`, \

419 Mapping[str, Any] \

420 ]]:

421 Table's schema.

422

423 Raises:

424 Exception:

425 If ``schema`` is not a sequence, or if any item in the sequence

426 is not a :class:`~google.cloud.bigquery.schema.SchemaField`

427 instance or a compatible mapping representation of the field.

428 """

429 prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"])

430 if not prop:

431 return []

432 else:

433 return _parse_schema_resource(prop)

434

435 @schema.setter

436 def schema(self, value):

437 api_field = self._PROPERTY_TO_API_FIELD["schema"]

438

439 if value is None:

440 self._properties[api_field] = None

441 else:

442 value = _to_schema_fields(value)

443 self._properties[api_field] = {"fields": _build_schema_resource(value)}

444

445 @property

446 def labels(self):

447 """Dict[str, str]: Labels for the table.

448

449 This method always returns a dict. To change a table's labels,

450 modify the dict, then call ``Client.update_table``. To delete a

451 label, set its value to :data:`None` before updating.

452

453 Raises:

454 ValueError: If ``value`` type is invalid.

455 """

456 return self._properties.setdefault(self._PROPERTY_TO_API_FIELD["labels"], {})

457

458 @labels.setter

459 def labels(self, value):

460 if not isinstance(value, dict):

461 raise ValueError("Pass a dict")

462 self._properties[self._PROPERTY_TO_API_FIELD["labels"]] = value

463

464 @property

465 def encryption_configuration(self):

466 """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom

467 encryption configuration for the table.

468

469 Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None`

470 if using default encryption.

471

472 See `protecting data with Cloud KMS keys

473 <https://cloud.google.com/bigquery/docs/customer-managed-encryption>`_

474 in the BigQuery documentation.

475 """

476 prop = self._properties.get(

477 self._PROPERTY_TO_API_FIELD["encryption_configuration"]

478 )

479 if prop is not None:

480 prop = EncryptionConfiguration.from_api_repr(prop)

481 return prop

482

483 @encryption_configuration.setter

484 def encryption_configuration(self, value):

485 api_repr = value

486 if value is not None:

487 api_repr = value.to_api_repr()

488 self._properties[

489 self._PROPERTY_TO_API_FIELD["encryption_configuration"]

490 ] = api_repr

491

492 @property

493 def created(self):

494 """Union[datetime.datetime, None]: Datetime at which the table was

495 created (:data:`None` until set from the server).

496 """

497 creation_time = self._properties.get(self._PROPERTY_TO_API_FIELD["created"])

498 if creation_time is not None:

499 # creation_time will be in milliseconds.

500 return google.cloud._helpers._datetime_from_microseconds(

501 1000.0 * float(creation_time)

502 )

503

504 @property

505 def etag(self):

506 """Union[str, None]: ETag for the table resource (:data:`None` until

507 set from the server).

508 """

509 return self._properties.get(self._PROPERTY_TO_API_FIELD["etag"])

510

511 @property

512 def modified(self):

513 """Union[datetime.datetime, None]: Datetime at which the table was last

514 modified (:data:`None` until set from the server).

515 """

516 modified_time = self._properties.get(self._PROPERTY_TO_API_FIELD["modified"])

517 if modified_time is not None:

518 # modified_time will be in milliseconds.

519 return google.cloud._helpers._datetime_from_microseconds(

520 1000.0 * float(modified_time)

521 )

522

523 @property

524 def num_bytes(self):

525 """Union[int, None]: The size of the table in bytes (:data:`None` until

526 set from the server).

527 """

528 return _helpers._int_or_none(

529 self._properties.get(self._PROPERTY_TO_API_FIELD["num_bytes"])

530 )

531

532 @property

533 def num_rows(self):

534 """Union[int, None]: The number of rows in the table (:data:`None`

535 until set from the server).

536 """

537 return _helpers._int_or_none(

538 self._properties.get(self._PROPERTY_TO_API_FIELD["num_rows"])

539 )

540

541 @property

542 def self_link(self):

543 """Union[str, None]: URL for the table resource (:data:`None` until set

544 from the server).

545 """

546 return self._properties.get(self._PROPERTY_TO_API_FIELD["self_link"])

547

548 @property

549 def full_table_id(self):

550 """Union[str, None]: ID for the table (:data:`None` until set from the

551 server).

552

553 In the format ``project-id:dataset_id.table_id``.

554 """

555 return self._properties.get(self._PROPERTY_TO_API_FIELD["full_table_id"])

556

557 @property

558 def table_type(self):

559 """Union[str, None]: The type of the table (:data:`None` until set from

560 the server).

561

562 Possible values are ``'TABLE'``, ``'VIEW'``, ``'MATERIALIZED_VIEW'`` or

563 ``'EXTERNAL'``.

564 """

565 return self._properties.get(self._PROPERTY_TO_API_FIELD["type"])

566

567 @property

568 def range_partitioning(self):

569 """Optional[google.cloud.bigquery.table.RangePartitioning]:

570 Configures range-based partitioning for a table.

571

572 .. note::

573 **Beta**. The integer range partitioning feature is in a

574 pre-release state and might change or have limited support.

575

576 Only specify at most one of

577 :attr:`~google.cloud.bigquery.table.Table.time_partitioning` or

578 :attr:`~google.cloud.bigquery.table.Table.range_partitioning`.

579

580 Raises:

581 ValueError:

582 If the value is not

583 :class:`~google.cloud.bigquery.table.RangePartitioning` or

584 :data:`None`.

585 """

586 resource = self._properties.get(

587 self._PROPERTY_TO_API_FIELD["range_partitioning"]

588 )

589 if resource is not None:

590 return RangePartitioning(_properties=resource)

591

592 @range_partitioning.setter

593 def range_partitioning(self, value):

594 resource = value

595 if isinstance(value, RangePartitioning):

596 resource = value._properties

597 elif value is not None:

598 raise ValueError(

599 "Expected value to be RangePartitioning or None, got {}.".format(value)

600 )

601 self._properties[self._PROPERTY_TO_API_FIELD["range_partitioning"]] = resource

602

603 @property

604 def time_partitioning(self):

605 """Optional[google.cloud.bigquery.table.TimePartitioning]: Configures time-based

606 partitioning for a table.

607

608 Only specify at most one of

609 :attr:`~google.cloud.bigquery.table.Table.time_partitioning` or

610 :attr:`~google.cloud.bigquery.table.Table.range_partitioning`.

611

612 Raises:

613 ValueError:

614 If the value is not

615 :class:`~google.cloud.bigquery.table.TimePartitioning` or

616 :data:`None`.

617 """

618 prop = self._properties.get(self._PROPERTY_TO_API_FIELD["time_partitioning"])

619 if prop is not None:

620 return TimePartitioning.from_api_repr(prop)

621

622 @time_partitioning.setter

623 def time_partitioning(self, value):

624 api_repr = value

625 if isinstance(value, TimePartitioning):

626 api_repr = value.to_api_repr()

627 elif value is not None:

628 raise ValueError(

629 "value must be google.cloud.bigquery.table.TimePartitioning " "or None"

630 )

631 self._properties[self._PROPERTY_TO_API_FIELD["time_partitioning"]] = api_repr

632

633 @property

634 def partitioning_type(self):

635 """Union[str, None]: Time partitioning of the table if it is

636 partitioned (Defaults to :data:`None`).

637

638 """

639 warnings.warn(

640 "This method will be deprecated in future versions. Please use "

641 "Table.time_partitioning.type_ instead.",

642 PendingDeprecationWarning,

643 stacklevel=2,

644 )

645 if self.time_partitioning is not None:

646 return self.time_partitioning.type_

647

648 @partitioning_type.setter

649 def partitioning_type(self, value):

650 warnings.warn(

651 "This method will be deprecated in future versions. Please use "

652 "Table.time_partitioning.type_ instead.",

653 PendingDeprecationWarning,

654 stacklevel=2,

655 )

656 api_field = self._PROPERTY_TO_API_FIELD["partitioning_type"]

657 if self.time_partitioning is None:

658 self._properties[api_field] = {}

659 self._properties[api_field]["type"] = value

660

661 @property

662 def partition_expiration(self):

663 """Union[int, None]: Expiration time in milliseconds for a partition.

664

665 If :attr:`partition_expiration` is set and :attr:`type_` is

666 not set, :attr:`type_` will default to

667 :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`.

668 """

669 warnings.warn(

670 "This method will be deprecated in future versions. Please use "

671 "Table.time_partitioning.expiration_ms instead.",

672 PendingDeprecationWarning,

673 stacklevel=2,

674 )

675 if self.time_partitioning is not None:

676 return self.time_partitioning.expiration_ms

677

678 @partition_expiration.setter

679 def partition_expiration(self, value):

680 warnings.warn(

681 "This method will be deprecated in future versions. Please use "

682 "Table.time_partitioning.expiration_ms instead.",

683 PendingDeprecationWarning,

684 stacklevel=2,

685 )

686 api_field = self._PROPERTY_TO_API_FIELD["partition_expiration"]

687

688 if self.time_partitioning is None:

689 self._properties[api_field] = {"type": TimePartitioningType.DAY}

690 self._properties[api_field]["expirationMs"] = str(value)

691

692 @property

693 def clustering_fields(self):

694 """Union[List[str], None]: Fields defining clustering for the table

695

696 (Defaults to :data:`None`).

697

698 Clustering fields are immutable after table creation.

699

700 .. note::

701

702 BigQuery supports clustering for both partitioned and

703 non-partitioned tables.

704 """

705 prop = self._properties.get(self._PROPERTY_TO_API_FIELD["clustering_fields"])

706 if prop is not None:

707 return list(prop.get("fields", ()))

708

709 @clustering_fields.setter

710 def clustering_fields(self, value):

711 """Union[List[str], None]: Fields defining clustering for the table

712

713 (Defaults to :data:`None`).

714 """

715 api_field = self._PROPERTY_TO_API_FIELD["clustering_fields"]

716

717 if value is not None:

718 prop = self._properties.setdefault(api_field, {})

719 prop["fields"] = value

720 else:

721 # In order to allow unsetting clustering fields completely, we explicitly

722 # set this property to None (as oposed to merely removing the key).

723 self._properties[api_field] = None

724

725 @property

726 def description(self):

727 """Union[str, None]: Description of the table (defaults to

728 :data:`None`).

729

730 Raises:

731 ValueError: For invalid value types.

732 """

733 return self._properties.get(self._PROPERTY_TO_API_FIELD["description"])

734

735 @description.setter

736 def description(self, value):

737 if not isinstance(value, str) and value is not None:

738 raise ValueError("Pass a string, or None")

739 self._properties[self._PROPERTY_TO_API_FIELD["description"]] = value

740

741 @property

742 def expires(self):

743 """Union[datetime.datetime, None]: Datetime at which the table will be

744 deleted.

745

746 Raises:

747 ValueError: For invalid value types.

748 """

749 expiration_time = self._properties.get(self._PROPERTY_TO_API_FIELD["expires"])

750 if expiration_time is not None:

751 # expiration_time will be in milliseconds.

752 return google.cloud._helpers._datetime_from_microseconds(

753 1000.0 * float(expiration_time)

754 )

755

756 @expires.setter

757 def expires(self, value):

758 if not isinstance(value, datetime.datetime) and value is not None:

759 raise ValueError("Pass a datetime, or None")

760 value_ms = google.cloud._helpers._millis_from_datetime(value)

761 self._properties[

762 self._PROPERTY_TO_API_FIELD["expires"]

763 ] = _helpers._str_or_none(value_ms)

764

765 @property

766 def friendly_name(self):

767 """Union[str, None]: Title of the table (defaults to :data:`None`).

768

769 Raises:

770 ValueError: For invalid value types.

771 """

772 return self._properties.get(self._PROPERTY_TO_API_FIELD["friendly_name"])

773

774 @friendly_name.setter

775 def friendly_name(self, value):

776 if not isinstance(value, str) and value is not None:

777 raise ValueError("Pass a string, or None")

778 self._properties[self._PROPERTY_TO_API_FIELD["friendly_name"]] = value

779

780 @property

781 def location(self):

782 """Union[str, None]: Location in which the table is hosted

783

784 Defaults to :data:`None`.

785 """

786 return self._properties.get(self._PROPERTY_TO_API_FIELD["location"])

787

788 @property

789 def view_query(self):

790 """Union[str, None]: SQL query defining the table as a view (defaults

791 to :data:`None`).

792

793 By default, the query is treated as Standard SQL. To use Legacy

794 SQL, set :attr:`view_use_legacy_sql` to :data:`True`.

795

796 Raises:

797 ValueError: For invalid value types.

798 """

799 api_field = self._PROPERTY_TO_API_FIELD["view_query"]

800 return _helpers._get_sub_prop(self._properties, [api_field, "query"])

801

802 @view_query.setter

803 def view_query(self, value):

804 if not isinstance(value, str):

805 raise ValueError("Pass a string")

806

807 api_field = self._PROPERTY_TO_API_FIELD["view_query"]

808 _helpers._set_sub_prop(self._properties, [api_field, "query"], value)

809 view = self._properties[api_field]

810 # The service defaults useLegacySql to True, but this

811 # client uses Standard SQL by default.

812 if view.get("useLegacySql") is None:

813 view["useLegacySql"] = False

814

815 @view_query.deleter

816 def view_query(self):

817 """Delete SQL query defining the table as a view."""

818 self._properties.pop(self._PROPERTY_TO_API_FIELD["view_query"], None)

819

820 view_use_legacy_sql = property(_view_use_legacy_sql_getter)

821

822 @view_use_legacy_sql.setter # type: ignore # (redefinition from above)

823 def view_use_legacy_sql(self, value):

824 if not isinstance(value, bool):

825 raise ValueError("Pass a boolean")

826

827 api_field = self._PROPERTY_TO_API_FIELD["view_query"]

828 if self._properties.get(api_field) is None:

829 self._properties[api_field] = {}

830 self._properties[api_field]["useLegacySql"] = value

831

832 @property

833 def mview_query(self):

834 """Optional[str]: SQL query defining the table as a materialized

835 view (defaults to :data:`None`).

836 """

837 api_field = self._PROPERTY_TO_API_FIELD["mview_query"]

838 return _helpers._get_sub_prop(self._properties, [api_field, "query"])

839

840 @mview_query.setter

841 def mview_query(self, value):

842 api_field = self._PROPERTY_TO_API_FIELD["mview_query"]

843 _helpers._set_sub_prop(self._properties, [api_field, "query"], str(value))

844

845 @mview_query.deleter

846 def mview_query(self):

847 """Delete SQL query defining the table as a materialized view."""

848 self._properties.pop(self._PROPERTY_TO_API_FIELD["mview_query"], None)

849

850 @property

851 def mview_last_refresh_time(self):

852 """Optional[datetime.datetime]: Datetime at which the materialized view was last

853 refreshed (:data:`None` until set from the server).

854 """

855 refresh_time = _helpers._get_sub_prop(

856 self._properties, self._PROPERTY_TO_API_FIELD["mview_last_refresh_time"]

857 )

858 if refresh_time is not None:

859 # refresh_time will be in milliseconds.

860 return google.cloud._helpers._datetime_from_microseconds(

861 1000 * int(refresh_time)

862 )

863

864 @property

865 def mview_enable_refresh(self):

866 """Optional[bool]: Enable automatic refresh of the materialized view

867 when the base table is updated. The default value is :data:`True`.

868 """

869 api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"]

870 return _helpers._get_sub_prop(self._properties, [api_field, "enableRefresh"])

871

872 @mview_enable_refresh.setter

873 def mview_enable_refresh(self, value):

874 api_field = self._PROPERTY_TO_API_FIELD["mview_enable_refresh"]

875 return _helpers._set_sub_prop(

876 self._properties, [api_field, "enableRefresh"], value

877 )

878

879 @property

880 def mview_refresh_interval(self):

881 """Optional[datetime.timedelta]: The maximum frequency at which this

882 materialized view will be refreshed. The default value is 1800000

883 milliseconds (30 minutes).

884 """

885 api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"]

886 refresh_interval = _helpers._get_sub_prop(

887 self._properties, [api_field, "refreshIntervalMs"]

888 )

889 if refresh_interval is not None:

890 return datetime.timedelta(milliseconds=int(refresh_interval))

891

892 @mview_refresh_interval.setter

893 def mview_refresh_interval(self, value):

894 if value is None:

895 refresh_interval_ms = None

896 else:

897 refresh_interval_ms = str(value // datetime.timedelta(milliseconds=1))

898

899 api_field = self._PROPERTY_TO_API_FIELD["mview_refresh_interval"]

900 _helpers._set_sub_prop(

901 self._properties,

902 [api_field, "refreshIntervalMs"],

903 refresh_interval_ms,

904 )

905

906 @property

907 def streaming_buffer(self):

908 """google.cloud.bigquery.StreamingBuffer: Information about a table's

909 streaming buffer.

910 """

911 sb = self._properties.get(self._PROPERTY_TO_API_FIELD["streaming_buffer"])

912 if sb is not None:

913 return StreamingBuffer(sb)

914

915 @property

916 def external_data_configuration(self):

917 """Union[google.cloud.bigquery.ExternalConfig, None]: Configuration for

918 an external data source (defaults to :data:`None`).

919

920 Raises:

921 ValueError: For invalid value types.

922 """

923 prop = self._properties.get(

924 self._PROPERTY_TO_API_FIELD["external_data_configuration"]

925 )

926 if prop is not None:

927 prop = ExternalConfig.from_api_repr(prop)

928 return prop

929

930 @external_data_configuration.setter

931 def external_data_configuration(self, value):

932 if not (value is None or isinstance(value, ExternalConfig)):

933 raise ValueError("Pass an ExternalConfig or None")

934 api_repr = value

935 if value is not None:

936 api_repr = value.to_api_repr()

937 self._properties[

938 self._PROPERTY_TO_API_FIELD["external_data_configuration"]

939 ] = api_repr

940

941 @property

942 def snapshot_definition(self) -> Optional["SnapshotDefinition"]:

943 """Information about the snapshot. This value is set via snapshot creation.

944

945 See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.snapshot_definition

946 """

947 snapshot_info = self._properties.get(

948 self._PROPERTY_TO_API_FIELD["snapshot_definition"]

949 )

950 if snapshot_info is not None:

951 snapshot_info = SnapshotDefinition(snapshot_info)

952 return snapshot_info

953

954 @property

955 def clone_definition(self) -> Optional["CloneDefinition"]:

956 """Information about the clone. This value is set via clone creation.

957

958 See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.clone_definition

959 """

960 clone_info = self._properties.get(

961 self._PROPERTY_TO_API_FIELD["clone_definition"]

962 )

963 if clone_info is not None:

964 clone_info = CloneDefinition(clone_info)

965 return clone_info

966

967 @classmethod

968 def from_string(cls, full_table_id: str) -> "Table":

969 """Construct a table from fully-qualified table ID.

970

971 Args:

972 full_table_id (str):

973 A fully-qualified table ID in standard SQL format. Must

974 included a project ID, dataset ID, and table ID, each

975 separated by ``.``.

976

977 Returns:

978 Table: Table parsed from ``full_table_id``.

979

980 Examples:

981 >>> Table.from_string('my-project.mydataset.mytable')

982 Table(TableRef...(D...('my-project', 'mydataset'), 'mytable'))

983

984 Raises:

985 ValueError:

986 If ``full_table_id`` is not a fully-qualified table ID in

987 standard SQL format.

988 """

989 return cls(TableReference.from_string(full_table_id))

990

991 @classmethod

992 def from_api_repr(cls, resource: dict) -> "Table":

993 """Factory: construct a table given its API representation

994

995 Args:

996 resource (Dict[str, object]):

997 Table resource representation from the API

998

999 Returns:

1000 google.cloud.bigquery.table.Table: Table parsed from ``resource``.

1001

1002 Raises:

1003 KeyError:

1004 If the ``resource`` lacks the key ``'tableReference'``, or if

1005 the ``dict`` stored within the key ``'tableReference'`` lacks

1006 the keys ``'tableId'``, ``'projectId'``, or ``'datasetId'``.

1007 """

1008 from google.cloud.bigquery import dataset

1009

1010 if (

1011 "tableReference" not in resource

1012 or "tableId" not in resource["tableReference"]

1013 ):

1014 raise KeyError(

1015 "Resource lacks required identity information:"

1016 '["tableReference"]["tableId"]'

1017 )

1018 project_id = _helpers._get_sub_prop(

1019 resource, cls._PROPERTY_TO_API_FIELD["project"]

1020 )

1021 table_id = _helpers._get_sub_prop(

1022 resource, cls._PROPERTY_TO_API_FIELD["table_id"]

1023 )

1024 dataset_id = _helpers._get_sub_prop(

1025 resource, cls._PROPERTY_TO_API_FIELD["dataset_id"]

1026 )

1027 dataset_ref = dataset.DatasetReference(project_id, dataset_id)

1028

1029 table = cls(dataset_ref.table(table_id))

1030 table._properties = resource

1031

1032 return table

1033

1034 def to_api_repr(self) -> dict:

1035 """Constructs the API resource of this table

1036

1037 Returns:

1038 Dict[str, object]: Table represented as an API resource

1039 """

1040 return copy.deepcopy(self._properties)

1041

1042 def to_bqstorage(self) -> str:

1043 """Construct a BigQuery Storage API representation of this table.

1044

1045 Returns:

1046 str: A reference to this table in the BigQuery Storage API.

1047 """

1048 return self.reference.to_bqstorage()

1049

1050 def _build_resource(self, filter_fields):

1051 """Generate a resource for ``update``."""

1052 return _helpers._build_resource_from_properties(self, filter_fields)

1053

1054 def __repr__(self):

1055 return "Table({})".format(repr(self.reference))

1056

1057 def __str__(self):

1058 return f"{self.project}.{self.dataset_id}.{self.table_id}"

1059

1060

1061class TableListItem(_TableBase):

1062 """A read-only table resource from a list operation.

1063

1064 For performance reasons, the BigQuery API only includes some of the table

1065 properties when listing tables. Notably,

1066 :attr:`~google.cloud.bigquery.table.Table.schema` and

1067 :attr:`~google.cloud.bigquery.table.Table.num_rows` are missing.

1068

1069 For a full list of the properties that the BigQuery API returns, see the

1070 `REST documentation for tables.list

1071 <https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list>`_.

1072

1073

1074 Args:

1075 resource (Dict[str, object]):

1076 A table-like resource object from a table list response. A

1077 ``tableReference`` property is required.

1078

1079 Raises:

1080 ValueError:

1081 If ``tableReference`` or one of its required members is missing

1082 from ``resource``.

1083 """

1084

1085 def __init__(self, resource):

1086 if "tableReference" not in resource:

1087 raise ValueError("resource must contain a tableReference value")

1088 if "projectId" not in resource["tableReference"]:

1089 raise ValueError(

1090 "resource['tableReference'] must contain a projectId value"

1091 )

1092 if "datasetId" not in resource["tableReference"]:

1093 raise ValueError(

1094 "resource['tableReference'] must contain a datasetId value"

1095 )

1096 if "tableId" not in resource["tableReference"]:

1097 raise ValueError("resource['tableReference'] must contain a tableId value")

1098

1099 self._properties = resource

1100

1101 @property

1102 def created(self):

1103 """Union[datetime.datetime, None]: Datetime at which the table was

1104 created (:data:`None` until set from the server).

1105 """

1106 creation_time = self._properties.get("creationTime")

1107 if creation_time is not None:

1108 # creation_time will be in milliseconds.

1109 return google.cloud._helpers._datetime_from_microseconds(

1110 1000.0 * float(creation_time)

1111 )

1112

1113 @property

1114 def expires(self):

1115 """Union[datetime.datetime, None]: Datetime at which the table will be

1116 deleted.

1117 """

1118 expiration_time = self._properties.get("expirationTime")

1119 if expiration_time is not None:

1120 # expiration_time will be in milliseconds.

1121 return google.cloud._helpers._datetime_from_microseconds(

1122 1000.0 * float(expiration_time)

1123 )

1124

1125 reference = property(_reference_getter)

1126

1127 @property

1128 def labels(self):

1129 """Dict[str, str]: Labels for the table.

1130

1131 This method always returns a dict. To change a table's labels,

1132 modify the dict, then call ``Client.update_table``. To delete a

1133 label, set its value to :data:`None` before updating.

1134 """

1135 return self._properties.setdefault("labels", {})

1136

1137 @property

1138 def full_table_id(self):

1139 """Union[str, None]: ID for the table (:data:`None` until set from the

1140 server).

1141

1142 In the format ``project_id:dataset_id.table_id``.

1143 """

1144 return self._properties.get("id")

1145

1146 @property

1147 def table_type(self):

1148 """Union[str, None]: The type of the table (:data:`None` until set from

1149 the server).

1150

1151 Possible values are ``'TABLE'``, ``'VIEW'``, or ``'EXTERNAL'``.

1152 """

1153 return self._properties.get("type")

1154

1155 @property

1156 def time_partitioning(self):

1157 """google.cloud.bigquery.table.TimePartitioning: Configures time-based

1158 partitioning for a table.

1159 """

1160 prop = self._properties.get("timePartitioning")

1161 if prop is not None:

1162 return TimePartitioning.from_api_repr(prop)

1163

1164 @property

1165 def partitioning_type(self):

1166 """Union[str, None]: Time partitioning of the table if it is

1167 partitioned (Defaults to :data:`None`).

1168 """

1169 warnings.warn(

1170 "This method will be deprecated in future versions. Please use "

1171 "TableListItem.time_partitioning.type_ instead.",

1172 PendingDeprecationWarning,

1173 stacklevel=2,

1174 )

1175 if self.time_partitioning is not None:

1176 return self.time_partitioning.type_

1177

1178 @property

1179 def partition_expiration(self):

1180 """Union[int, None]: Expiration time in milliseconds for a partition.

1181

1182 If this property is set and :attr:`type_` is not set, :attr:`type_`

1183 will default to :attr:`TimePartitioningType.DAY`.

1184 """

1185 warnings.warn(

1186 "This method will be deprecated in future versions. Please use "

1187 "TableListItem.time_partitioning.expiration_ms instead.",

1188 PendingDeprecationWarning,

1189 stacklevel=2,

1190 )

1191 if self.time_partitioning is not None:

1192 return self.time_partitioning.expiration_ms

1193

1194 @property

1195 def friendly_name(self):

1196 """Union[str, None]: Title of the table (defaults to :data:`None`)."""

1197 return self._properties.get("friendlyName")

1198

1199 view_use_legacy_sql = property(_view_use_legacy_sql_getter)

1200

1201 @property

1202 def clustering_fields(self):

1203 """Union[List[str], None]: Fields defining clustering for the table

1204

1205 (Defaults to :data:`None`).

1206

1207 Clustering fields are immutable after table creation.

1208

1209 .. note::

1210

1211 BigQuery supports clustering for both partitioned and

1212 non-partitioned tables.

1213 """

1214 prop = self._properties.get("clustering")

1215 if prop is not None:

1216 return list(prop.get("fields", ()))

1217

1218 @classmethod

1219 def from_string(cls, full_table_id: str) -> "TableListItem":

1220 """Construct a table from fully-qualified table ID.

1221

1222 Args:

1223 full_table_id (str):

1224 A fully-qualified table ID in standard SQL format. Must

1225 included a project ID, dataset ID, and table ID, each

1226 separated by ``.``.

1227

1228 Returns:

1229 Table: Table parsed from ``full_table_id``.

1230

1231 Examples:

1232 >>> Table.from_string('my-project.mydataset.mytable')

1233 Table(TableRef...(D...('my-project', 'mydataset'), 'mytable'))

1234

1235 Raises:

1236 ValueError:

1237 If ``full_table_id`` is not a fully-qualified table ID in

1238 standard SQL format.

1239 """

1240 return cls(

1241 {"tableReference": TableReference.from_string(full_table_id).to_api_repr()}

1242 )

1243

1244 def to_bqstorage(self) -> str:

1245 """Construct a BigQuery Storage API representation of this table.

1246

1247 Returns:

1248 str: A reference to this table in the BigQuery Storage API.

1249 """

1250 return self.reference.to_bqstorage()

1251

1252 def to_api_repr(self) -> dict:

1253 """Constructs the API resource of this table

1254

1255 Returns:

1256 Dict[str, object]: Table represented as an API resource

1257 """

1258 return copy.deepcopy(self._properties)

1259

1260

1261def _row_from_mapping(mapping, schema):

1262 """Convert a mapping to a row tuple using the schema.

1263

1264 Args:

1265 mapping (Dict[str, object])

1266 Mapping of row data: must contain keys for all required fields in

1267 the schema. Keys which do not correspond to a field in the schema

1268 are ignored.

1269 schema (List[google.cloud.bigquery.schema.SchemaField]):

1270 The schema of the table destination for the rows

1271

1272 Returns:

1273 Tuple[object]:

1274 Tuple whose elements are ordered according to the schema.

1275

1276 Raises:

1277 ValueError: If schema is empty.

1278 """

1279 if len(schema) == 0:

1280 raise ValueError(_TABLE_HAS_NO_SCHEMA)

1281

1282 row = []

1283 for field in schema:

1284 if field.mode == "REQUIRED":

1285 row.append(mapping[field.name])

1286 elif field.mode == "REPEATED":

1287 row.append(mapping.get(field.name, ()))

1288 elif field.mode == "NULLABLE":

1289 row.append(mapping.get(field.name))

1290 else:

1291 raise ValueError("Unknown field mode: {}".format(field.mode))

1292 return tuple(row)

1293

1294

1295class StreamingBuffer(object):

1296 """Information about a table's streaming buffer.

1297

1298 See https://cloud.google.com/bigquery/streaming-data-into-bigquery.

1299

1300 Args:

1301 resource (Dict[str, object]):

1302 streaming buffer representation returned from the API

1303 """

1304

1305 def __init__(self, resource):

1306 self.estimated_bytes = None

1307 if "estimatedBytes" in resource:

1308 self.estimated_bytes = int(resource["estimatedBytes"])

1309 self.estimated_rows = None

1310 if "estimatedRows" in resource:

1311 self.estimated_rows = int(resource["estimatedRows"])

1312 self.oldest_entry_time = None

1313 if "oldestEntryTime" in resource:

1314 self.oldest_entry_time = google.cloud._helpers._datetime_from_microseconds(

1315 1000.0 * int(resource["oldestEntryTime"])

1316 )

1317

1318

1319class SnapshotDefinition:

1320 """Information about base table and snapshot time of the snapshot.

1321

1322 See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#snapshotdefinition

1323

1324 Args:

1325 resource: Snapshot definition representation returned from the API.

1326 """

1327

1328 def __init__(self, resource: Dict[str, Any]):

1329 self.base_table_reference = None

1330 if "baseTableReference" in resource:

1331 self.base_table_reference = TableReference.from_api_repr(

1332 resource["baseTableReference"]

1333 )

1334

1335 self.snapshot_time = None

1336 if "snapshotTime" in resource:

1337 self.snapshot_time = google.cloud._helpers._rfc3339_to_datetime(

1338 resource["snapshotTime"]

1339 )

1340

1341

1342class CloneDefinition:

1343 """Information about base table and clone time of the clone.

1344

1345 See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#clonedefinition

1346

1347 Args:

1348 resource: Clone definition representation returned from the API.

1349 """

1350

1351 def __init__(self, resource: Dict[str, Any]):

1352 self.base_table_reference = None

1353 if "baseTableReference" in resource:

1354 self.base_table_reference = TableReference.from_api_repr(

1355 resource["baseTableReference"]

1356 )

1357

1358 self.clone_time = None

1359 if "cloneTime" in resource:

1360 self.clone_time = google.cloud._helpers._rfc3339_to_datetime(

1361 resource["cloneTime"]

1362 )

1363

1364

1365class Row(object):

1366 """A BigQuery row.

1367

1368 Values can be accessed by position (index), by key like a dict,

1369 or as properties.

1370

1371 Args:

1372 values (Sequence[object]): The row values

1373 field_to_index (Dict[str, int]):

1374 A mapping from schema field names to indexes

1375 """

1376

1377 # Choose unusual field names to try to avoid conflict with schema fields.

1378 __slots__ = ("_xxx_values", "_xxx_field_to_index")

1379

1380 def __init__(self, values, field_to_index) -> None:

1381 self._xxx_values = values

1382 self._xxx_field_to_index = field_to_index

1383

1384 def values(self):

1385 """Return the values included in this row.

1386

1387 Returns:

1388 Sequence[object]: A sequence of length ``len(row)``.

1389 """

1390 return copy.deepcopy(self._xxx_values)

1391

1392 def keys(self) -> Iterable[str]:

1393 """Return the keys for using a row as a dict.

1394

1395 Returns:

1396 Iterable[str]: The keys corresponding to the columns of a row

1397

1398 Examples:

1399

1400 >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).keys())

1401 ['x', 'y']

1402 """

1403 return self._xxx_field_to_index.keys()

1404

1405 def items(self) -> Iterable[Tuple[str, Any]]:

1406 """Return items as ``(key, value)`` pairs.

1407

1408 Returns:

1409 Iterable[Tuple[str, object]]:

1410 The ``(key, value)`` pairs representing this row.

1411

1412 Examples:

1413

1414 >>> list(Row(('a', 'b'), {'x': 0, 'y': 1}).items())

1415 [('x', 'a'), ('y', 'b')]

1416 """

1417 for key, index in self._xxx_field_to_index.items():

1418 yield (key, copy.deepcopy(self._xxx_values[index]))

1419

1420 def get(self, key: str, default: Any = None) -> Any:

1421 """Return a value for key, with a default value if it does not exist.

1422

1423 Args:

1424 key (str): The key of the column to access

1425 default (object):

1426 The default value to use if the key does not exist. (Defaults

1427 to :data:`None`.)

1428

1429 Returns:

1430 object:

1431 The value associated with the provided key, or a default value.

1432

1433 Examples:

1434 When the key exists, the value associated with it is returned.

1435

1436 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('x')

1437 'a'

1438

1439 The default value is :data:`None` when the key does not exist.

1440

1441 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z')

1442 None

1443

1444 The default value can be overridden with the ``default`` parameter.

1445

1446 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '')

1447 ''

1448

1449 >>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', default = '')

1450 ''

1451 """

1452 index = self._xxx_field_to_index.get(key)

1453 if index is None:

1454 return default

1455 return self._xxx_values[index]

1456

1457 def __getattr__(self, name):

1458 value = self._xxx_field_to_index.get(name)

1459 if value is None:

1460 raise AttributeError("no row field {!r}".format(name))

1461 return self._xxx_values[value]

1462

1463 def __len__(self):

1464 return len(self._xxx_values)

1465

1466 def __getitem__(self, key):

1467 if isinstance(key, str):

1468 value = self._xxx_field_to_index.get(key)

1469 if value is None:

1470 raise KeyError("no row field {!r}".format(key))

1471 key = value

1472 return self._xxx_values[key]

1473

1474 def __eq__(self, other):

1475 if not isinstance(other, Row):

1476 return NotImplemented

1477 return (

1478 self._xxx_values == other._xxx_values

1479 and self._xxx_field_to_index == other._xxx_field_to_index

1480 )

1481

1482 def __ne__(self, other):

1483 return not self == other

1484

1485 def __repr__(self):

1486 # sort field dict by value, for determinism

1487 items = sorted(self._xxx_field_to_index.items(), key=operator.itemgetter(1))

1488 f2i = "{" + ", ".join("%r: %d" % item for item in items) + "}"

1489 return "Row({}, {})".format(self._xxx_values, f2i)

1490

1491

1492class _NoopProgressBarQueue(object):

1493 """A fake Queue class that does nothing.

1494

1495 This is used when there is no progress bar to send updates to.

1496 """

1497

1498 def put_nowait(self, item):

1499 """Don't actually do anything with the item."""

1500

1501

1502class RowIterator(HTTPIterator):

1503 """A class for iterating through HTTP/JSON API row list responses.

1504

1505 Args:

1506 client (Optional[google.cloud.bigquery.Client]):

1507 The API client instance. This should always be non-`None`, except for

1508 subclasses that do not use it, namely the ``_EmptyRowIterator``.

1509 api_request (Callable[google.cloud._http.JSONConnection.api_request]):

1510 The function to use to make API requests.

1511 path (str): The method path to query for the list of items.

1512 schema (Sequence[Union[ \

1513 :class:`~google.cloud.bigquery.schema.SchemaField`, \

1514 Mapping[str, Any] \

1515 ]]):

1516 The table's schema. If any item is a mapping, its content must be

1517 compatible with

1518 :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.

1519 page_token (str): A token identifying a page in a result set to start

1520 fetching results from.

1521 max_results (Optional[int]): The maximum number of results to fetch.

1522 page_size (Optional[int]): The maximum number of rows in each page

1523 of results from this request. Non-positive values are ignored.

1524 Defaults to a sensible value set by the API.

1525 extra_params (Optional[Dict[str, object]]):

1526 Extra query string parameters for the API call.

1527 table (Optional[Union[ \

1528 google.cloud.bigquery.table.Table, \

1529 google.cloud.bigquery.table.TableReference, \

1530 ]]):

1531 The table which these rows belong to, or a reference to it. Used to

1532 call the BigQuery Storage API to fetch rows.

1533 selected_fields (Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]):

1534 A subset of columns to select from this table.

1535 total_rows (Optional[int]):

1536 Total number of rows in the table.

1537 first_page_response (Optional[dict]):

1538 API response for the first page of results. These are returned when

1539 the first page is requested.

1540 """

1541

1542 def __init__(

1543 self,

1544 client,

1545 api_request,

1546 path,

1547 schema,

1548 page_token=None,

1549 max_results=None,

1550 page_size=None,

1551 extra_params=None,

1552 table=None,

1553 selected_fields=None,

1554 total_rows=None,

1555 first_page_response=None,

1556 ):

1557 super(RowIterator, self).__init__(

1558 client,

1559 api_request,

1560 path,

1561 item_to_value=_item_to_row,

1562 items_key="rows",

1563 page_token=page_token,

1564 max_results=max_results,

1565 extra_params=extra_params,

1566 page_start=_rows_page_start,

1567 next_token="pageToken",

1568 )

1569 schema = _to_schema_fields(schema)

1570 self._field_to_index = _helpers._field_to_index_mapping(schema)

1571 self._page_size = page_size

1572 self._preserve_order = False

1573 self._project = client.project if client is not None else None

1574 self._schema = schema

1575 self._selected_fields = selected_fields

1576 self._table = table

1577 self._total_rows = total_rows

1578 self._first_page_response = first_page_response

1579

1580 def _is_completely_cached(self):

1581 """Check if all results are completely cached.

1582

1583 This is useful to know, because we can avoid alternative download

1584 mechanisms.

1585 """

1586 if self._first_page_response is None or self.next_page_token:

1587 return False

1588

1589 return self._first_page_response.get(self._next_token) is None

1590

1591 def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client):

1592 """Returns if the BigQuery Storage API can be used.

1593

1594 Returns:

1595 bool

1596 True if the BigQuery Storage client can be used or created.

1597 """

1598 using_bqstorage_api = bqstorage_client or create_bqstorage_client

1599 if not using_bqstorage_api:

1600 return False

1601

1602 if self._is_completely_cached():

1603 return False

1604

1605 if self.max_results is not None:

1606 return False

1607

1608 try:

1609 from google.cloud import bigquery_storage # noqa: F401

1610 except ImportError:

1611 return False

1612

1613 try:

1614 _helpers.BQ_STORAGE_VERSIONS.verify_version()

1615 except LegacyBigQueryStorageError as exc:

1616 warnings.warn(str(exc))

1617 return False

1618

1619 return True

1620

1621 def _get_next_page_response(self):

1622 """Requests the next page from the path provided.

1623

1624 Returns:

1625 Dict[str, object]:

1626 The parsed JSON response of the next page's contents.

1627 """

1628 if self._first_page_response:

1629 response = self._first_page_response

1630 self._first_page_response = None

1631 return response

1632

1633 params = self._get_query_params()

1634 if self._page_size is not None:

1635 if self.page_number and "startIndex" in params:

1636 del params["startIndex"]

1637 params["maxResults"] = self._page_size

1638 return self.api_request(

1639 method=self._HTTP_METHOD, path=self.path, query_params=params

1640 )

1641

1642 @property

1643 def schema(self):

1644 """List[google.cloud.bigquery.schema.SchemaField]: The subset of

1645 columns to be read from the table."""

1646 return list(self._schema)

1647

1648 @property

1649 def total_rows(self):

1650 """int: The total number of rows in the table."""

1651 return self._total_rows

1652

1653 def _maybe_warn_max_results(

1654 self,

1655 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"],

1656 ):

1657 """Issue a warning if BQ Storage client is not ``None`` with ``max_results`` set.

1658

1659 This helper method should be used directly in the relevant top-level public

1660 methods, so that the warning is issued for the correct line in user code.

1661

1662 Args:

1663 bqstorage_client:

1664 The BigQuery Storage client intended to use for downloading result rows.

1665 """

1666 if bqstorage_client is not None and self.max_results is not None:

1667 warnings.warn(

1668 "Cannot use bqstorage_client if max_results is set, "

1669 "reverting to fetching data with the REST endpoint.",

1670 stacklevel=3,

1671 )

1672

1673 def _to_page_iterable(

1674 self, bqstorage_download, tabledata_list_download, bqstorage_client=None

1675 ):

1676 if not self._validate_bqstorage(bqstorage_client, False):

1677 bqstorage_client = None

1678

1679 result_pages = (

1680 bqstorage_download()

1681 if bqstorage_client is not None

1682 else tabledata_list_download()

1683 )

1684 yield from result_pages

1685

1686 def to_arrow_iterable(

1687 self,

1688 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

1689 max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore

1690 ) -> Iterator["pyarrow.RecordBatch"]:

1691 """[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream.

1692

1693 Args:

1694 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):

1695 A BigQuery Storage API client. If supplied, use the faster

1696 BigQuery Storage API to fetch rows from BigQuery.

1697

1698 This method requires the ``pyarrow`` and

1699 ``google-cloud-bigquery-storage`` libraries.

1700

1701 This method only exposes a subset of the capabilities of the

1702 BigQuery Storage API. For full access to all features

1703 (projections, filters, snapshots) use the Storage API directly.

1704

1705 max_queue_size (Optional[int]):

1706 The maximum number of result pages to hold in the internal queue when

1707 streaming query results over the BigQuery Storage API. Ignored if

1708 Storage API is not used.

1709

1710 By default, the max queue size is set to the number of BQ Storage streams

1711 created by the server. If ``max_queue_size`` is :data:`None`, the queue

1712 size is infinite.

1713

1714 Returns:

1715 pyarrow.RecordBatch:

1716 A generator of :class:`~pyarrow.RecordBatch`.

1717

1718 .. versionadded:: 2.31.0

1719 """

1720 self._maybe_warn_max_results(bqstorage_client)

1721

1722 bqstorage_download = functools.partial(

1723 _pandas_helpers.download_arrow_bqstorage,

1724 self._project,

1725 self._table,

1726 bqstorage_client,

1727 preserve_order=self._preserve_order,

1728 selected_fields=self._selected_fields,

1729 max_queue_size=max_queue_size,

1730 )

1731 tabledata_list_download = functools.partial(

1732 _pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema

1733 )

1734 return self._to_page_iterable(

1735 bqstorage_download,

1736 tabledata_list_download,

1737 bqstorage_client=bqstorage_client,

1738 )

1739

1740 # If changing the signature of this method, make sure to apply the same

1741 # changes to job.QueryJob.to_arrow()

1742 def to_arrow(

1743 self,

1744 progress_bar_type: str = None,

1745 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

1746 create_bqstorage_client: bool = True,

1747 ) -> "pyarrow.Table":

1748 """[Beta] Create a class:`pyarrow.Table` by loading all pages of a

1749 table or query.

1750

1751 Args:

1752 progress_bar_type (Optional[str]):

1753 If set, use the `tqdm <https://tqdm.github.io/>`_ library to

1754 display a progress bar while the data downloads. Install the

1755 ``tqdm`` package to use this feature.

1756

1757 Possible values of ``progress_bar_type`` include:

1758

1759 ``None``

1760 No progress bar.

1761 ``'tqdm'``

1762 Use the :func:`tqdm.tqdm` function to print a progress bar

1763 to :data:`sys.stdout`.

1764 ``'tqdm_notebook'``

1765 Use the :func:`tqdm.notebook.tqdm` function to display a

1766 progress bar as a Jupyter notebook widget.

1767 ``'tqdm_gui'``

1768 Use the :func:`tqdm.tqdm_gui` function to display a

1769 progress bar as a graphical dialog box.

1770 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):

1771 A BigQuery Storage API client. If supplied, use the faster BigQuery

1772 Storage API to fetch rows from BigQuery. This API is a billable API.

1773

1774 This method requires ``google-cloud-bigquery-storage`` library.

1775

1776 This method only exposes a subset of the capabilities of the

1777 BigQuery Storage API. For full access to all features

1778 (projections, filters, snapshots) use the Storage API directly.

1779 create_bqstorage_client (Optional[bool]):

1780 If ``True`` (default), create a BigQuery Storage API client using

1781 the default API settings. The BigQuery Storage API is a faster way

1782 to fetch rows from BigQuery. See the ``bqstorage_client`` parameter

1783 for more information.

1784

1785 This argument does nothing if ``bqstorage_client`` is supplied.

1786

1787 .. versionadded:: 1.24.0

1788

1789 Returns:

1790 pyarrow.Table

1791 A :class:`pyarrow.Table` populated with row data and column

1792 headers from the query results. The column headers are derived

1793 from the destination table's schema.

1794

1795 Raises:

1796 ValueError: If the :mod:`pyarrow` library cannot be imported.

1797

1798

1799 .. versionadded:: 1.17.0

1800 """

1801 if pyarrow is None:

1802 raise ValueError(_NO_PYARROW_ERROR)

1803

1804 self._maybe_warn_max_results(bqstorage_client)

1805

1806 if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client):

1807 create_bqstorage_client = False

1808 bqstorage_client = None

1809

1810 owns_bqstorage_client = False

1811 if not bqstorage_client and create_bqstorage_client:

1812 bqstorage_client = self.client._ensure_bqstorage_client()

1813 owns_bqstorage_client = bqstorage_client is not None

1814

1815 try:

1816 progress_bar = get_progress_bar(

1817 progress_bar_type, "Downloading", self.total_rows, "rows"

1818 )

1819

1820 record_batches = []

1821 for record_batch in self.to_arrow_iterable(

1822 bqstorage_client=bqstorage_client

1823 ):

1824 record_batches.append(record_batch)

1825

1826 if progress_bar is not None:

1827 # In some cases, the number of total rows is not populated

1828 # until the first page of rows is fetched. Update the

1829 # progress bar's total to keep an accurate count.

1830 progress_bar.total = progress_bar.total or self.total_rows

1831 progress_bar.update(record_batch.num_rows)

1832

1833 if progress_bar is not None:

1834 # Indicate that the download has finished.

1835 progress_bar.close()

1836 finally:

1837 if owns_bqstorage_client:

1838 bqstorage_client._transport.grpc_channel.close() # type: ignore

1839

1840 if record_batches and bqstorage_client is not None:

1841 return pyarrow.Table.from_batches(record_batches)

1842 else:

1843 # No records (not record_batches), use schema based on BigQuery schema

1844 # **or**

1845 # we used the REST API (bqstorage_client is None),

1846 # which doesn't add arrow extension metadata, so we let

1847 # `bq_to_arrow_schema` do it.

1848 arrow_schema = _pandas_helpers.bq_to_arrow_schema(self._schema)

1849 return pyarrow.Table.from_batches(record_batches, schema=arrow_schema)

1850

1851 def to_dataframe_iterable(

1852 self,

1853 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

1854 dtypes: Dict[str, Any] = None,

1855 max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore

1856 ) -> "pandas.DataFrame":

1857 """Create an iterable of pandas DataFrames, to process the table as a stream.

1858

1859 Args:

1860 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):

1861 A BigQuery Storage API client. If supplied, use the faster

1862 BigQuery Storage API to fetch rows from BigQuery.

1863

1864 This method requires ``google-cloud-bigquery-storage`` library.

1865

1866 This method only exposes a subset of the capabilities of the

1867 BigQuery Storage API. For full access to all features

1868 (projections, filters, snapshots) use the Storage API directly.

1869

1870 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):

1871 A dictionary of column names pandas ``dtype``s. The provided

1872 ``dtype`` is used when constructing the series for the column

1873 specified. Otherwise, the default pandas behavior is used.

1874

1875 max_queue_size (Optional[int]):

1876 The maximum number of result pages to hold in the internal queue when

1877 streaming query results over the BigQuery Storage API. Ignored if

1878 Storage API is not used.

1879

1880 By default, the max queue size is set to the number of BQ Storage streams

1881 created by the server. If ``max_queue_size`` is :data:`None`, the queue

1882 size is infinite.

1883

1884 .. versionadded:: 2.14.0

1885

1886 Returns:

1887 pandas.DataFrame:

1888 A generator of :class:`~pandas.DataFrame`.

1889

1890 Raises:

1891 ValueError:

1892 If the :mod:`pandas` library cannot be imported.

1893 """

1894 _pandas_helpers.verify_pandas_imports()

1895

1896 if dtypes is None:

1897 dtypes = {}

1898

1899 self._maybe_warn_max_results(bqstorage_client)

1900

1901 column_names = [field.name for field in self._schema]

1902 bqstorage_download = functools.partial(

1903 _pandas_helpers.download_dataframe_bqstorage,

1904 self._project,

1905 self._table,

1906 bqstorage_client,

1907 column_names,

1908 dtypes,

1909 preserve_order=self._preserve_order,

1910 selected_fields=self._selected_fields,

1911 max_queue_size=max_queue_size,

1912 )

1913 tabledata_list_download = functools.partial(

1914 _pandas_helpers.download_dataframe_row_iterator,

1915 iter(self.pages),

1916 self.schema,

1917 dtypes,

1918 )

1919 return self._to_page_iterable(

1920 bqstorage_download,

1921 tabledata_list_download,

1922 bqstorage_client=bqstorage_client,

1923 )

1924

1925 # If changing the signature of this method, make sure to apply the same

1926 # changes to job.QueryJob.to_dataframe()

1927 def to_dataframe(

1928 self,

1929 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

1930 dtypes: Dict[str, Any] = None,

1931 progress_bar_type: str = None,

1932 create_bqstorage_client: bool = True,

1933 geography_as_object: bool = False,

1934 bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE,

1935 int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE,

1936 float_dtype: Union[Any, None] = None,

1937 string_dtype: Union[Any, None] = None,

1938 ) -> "pandas.DataFrame":

1939 """Create a pandas DataFrame by loading all pages of a query.

1940

1941 Args:

1942 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):

1943 A BigQuery Storage API client. If supplied, use the faster

1944 BigQuery Storage API to fetch rows from BigQuery.

1945

1946 This method requires ``google-cloud-bigquery-storage`` library.

1947

1948 This method only exposes a subset of the capabilities of the

1949 BigQuery Storage API. For full access to all features

1950 (projections, filters, snapshots) use the Storage API directly.

1951

1952 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):

1953 A dictionary of column names pandas ``dtype``s. The provided

1954 ``dtype`` is used when constructing the series for the column

1955 specified. Otherwise, the default pandas behavior is used.

1956 progress_bar_type (Optional[str]):

1957 If set, use the `tqdm <https://tqdm.github.io/>`_ library to

1958 display a progress bar while the data downloads. Install the

1959 ``tqdm`` package to use this feature.

1960

1961 Possible values of ``progress_bar_type`` include:

1962

1963 ``None``

1964 No progress bar.

1965 ``'tqdm'``

1966 Use the :func:`tqdm.tqdm` function to print a progress bar

1967 to :data:`sys.stdout`.

1968 ``'tqdm_notebook'``

1969 Use the :func:`tqdm.notebook.tqdm` function to display a

1970 progress bar as a Jupyter notebook widget.

1971 ``'tqdm_gui'``

1972 Use the :func:`tqdm.tqdm_gui` function to display a

1973 progress bar as a graphical dialog box.

1974

1975 .. versionadded:: 1.11.0

1976

1977 create_bqstorage_client (Optional[bool]):

1978 If ``True`` (default), create a BigQuery Storage API client

1979 using the default API settings. The BigQuery Storage API

1980 is a faster way to fetch rows from BigQuery. See the

1981 ``bqstorage_client`` parameter for more information.

1982

1983 This argument does nothing if ``bqstorage_client`` is supplied.

1984

1985 .. versionadded:: 1.24.0

1986

1987 geography_as_object (Optional[bool]):

1988 If ``True``, convert GEOGRAPHY data to :mod:`shapely`

1989 geometry objects. If ``False`` (default), don't cast

1990 geography data to :mod:`shapely` geometry objects.

1991

1992 .. versionadded:: 2.24.0

1993

1994 bool_dtype (Optional[pandas.Series.dtype, None]):

1995 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``)

1996 to convert BigQuery Boolean type, instead of relying on the default

1997 ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``,

1998 then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean

1999 type can be found at:

2000 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type

2001

2002 .. versionadded:: 3.7.1

2003

2004 int_dtype (Optional[pandas.Series.dtype, None]):

2005 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``)

2006 to convert BigQuery Integer types, instead of relying on the default

2007 ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``,

2008 then the data type will be ``numpy.dtype("int64")``. A list of BigQuery

2009 Integer types can be found at:

2010 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types

2011

2012 .. versionadded:: 3.7.1

2013

2014 float_dtype (Optional[pandas.Series.dtype, None]):

2015 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``)

2016 to convert BigQuery Float type, instead of relying on the default

2017 ``numpy.dtype("float64")``. If you explicitly set the value to ``None``,

2018 then the data type will be ``numpy.dtype("float64")``. BigQuery Float

2019 type can be found at:

2020 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types

2021

2022 .. versionadded:: 3.7.1

2023

2024 string_dtype (Optional[pandas.Series.dtype, None]):

2025 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to

2026 convert BigQuery String type, instead of relying on the default

2027 ``numpy.dtype("object")``. If you explicitly set the value to ``None``,

2028 then the data type will be ``numpy.dtype("object")``. BigQuery String

2029 type can be found at:

2030 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type

2031

2032 .. versionadded:: 3.7.1

2033

2034 Returns:

2035 pandas.DataFrame:

2036 A :class:`~pandas.DataFrame` populated with row data and column

2037 headers from the query results. The column headers are derived

2038 from the destination table's schema.

2039

2040 Raises:

2041 ValueError:

2042 If the :mod:`pandas` library cannot be imported, or

2043 the :mod:`google.cloud.bigquery_storage_v1` module is

2044 required but cannot be imported. Also if

2045 `geography_as_object` is `True`, but the

2046 :mod:`shapely` library cannot be imported. Also if

2047 `bool_dtype`, `int_dtype` or other dtype parameters

2048 is not supported dtype.

2049

2050 """

2051 _pandas_helpers.verify_pandas_imports()

2052

2053 if geography_as_object and shapely is None:

2054 raise ValueError(_NO_SHAPELY_ERROR)

2055

2056 if bool_dtype is DefaultPandasDTypes.BOOL_DTYPE:

2057 bool_dtype = pandas.BooleanDtype()

2058

2059 if int_dtype is DefaultPandasDTypes.INT_DTYPE:

2060 int_dtype = pandas.Int64Dtype()

2061

2062 if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"):

2063 raise ValueError("bool_dtype", _NO_SUPPORTED_DTYPE)

2064

2065 if int_dtype is not None and not hasattr(int_dtype, "__from_arrow__"):

2066 raise ValueError("int_dtype", _NO_SUPPORTED_DTYPE)

2067

2068 if float_dtype is not None and not hasattr(float_dtype, "__from_arrow__"):

2069 raise ValueError("float_dtype", _NO_SUPPORTED_DTYPE)

2070

2071 if string_dtype is not None and not hasattr(string_dtype, "__from_arrow__"):

2072 raise ValueError("string_dtype", _NO_SUPPORTED_DTYPE)

2073

2074 if dtypes is None:

2075 dtypes = {}

2076

2077 self._maybe_warn_max_results(bqstorage_client)

2078

2079 if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client):

2080 create_bqstorage_client = False

2081 bqstorage_client = None

2082

2083 record_batch = self.to_arrow(

2084 progress_bar_type=progress_bar_type,

2085 bqstorage_client=bqstorage_client,

2086 create_bqstorage_client=create_bqstorage_client,

2087 )

2088

2089 # When converting date or timestamp values to nanosecond precision, the result

2090 # can be out of pyarrow bounds. To avoid the error when converting to

2091 # Pandas, we set the date_as_object or timestamp_as_object parameter to True,

2092 # if necessary.

2093 date_as_object = not all(

2094 self.__can_cast_timestamp_ns(col)

2095 for col in record_batch

2096 # Type can be date32 or date64 (plus units).

2097 # See: https://arrow.apache.org/docs/python/api/datatypes.html

2098 if pyarrow.types.is_date(col.type)

2099 )

2100

2101 timestamp_as_object = not all(

2102 self.__can_cast_timestamp_ns(col)

2103 for col in record_batch

2104 # Type can be datetime and timestamp (plus units and time zone).

2105 # See: https://arrow.apache.org/docs/python/api/datatypes.html

2106 if pyarrow.types.is_timestamp(col.type)

2107 )

2108

2109 if len(record_batch) > 0:

2110 df = record_batch.to_pandas(

2111 date_as_object=date_as_object,

2112 timestamp_as_object=timestamp_as_object,

2113 integer_object_nulls=True,

2114 types_mapper=_pandas_helpers.default_types_mapper(

2115 date_as_object=date_as_object,

2116 bool_dtype=bool_dtype,

2117 int_dtype=int_dtype,

2118 float_dtype=float_dtype,

2119 string_dtype=string_dtype,

2120 ),

2121 )

2122 else:

2123 # Avoid "ValueError: need at least one array to concatenate" on

2124 # older versions of pandas when converting empty RecordBatch to

2125 # DataFrame. See: https://github.com/pandas-dev/pandas/issues/41241

2126 df = pandas.DataFrame([], columns=record_batch.schema.names)

2127

2128 for column in dtypes:

2129 df[column] = pandas.Series(df[column], dtype=dtypes[column], copy=False)

2130

2131 if geography_as_object:

2132 for field in self.schema:

2133 if field.field_type.upper() == "GEOGRAPHY" and field.mode != "REPEATED":

2134 df[field.name] = df[field.name].dropna().apply(_read_wkt)

2135

2136 return df

2137

2138 @staticmethod

2139 def __can_cast_timestamp_ns(column):

2140 try:

2141 column.cast("timestamp[ns]")

2142 except pyarrow.lib.ArrowInvalid:

2143 return False

2144 else:

2145 return True

2146

2147 # If changing the signature of this method, make sure to apply the same

2148 # changes to job.QueryJob.to_geodataframe()

2149 def to_geodataframe(

2150 self,

2151 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

2152 dtypes: Dict[str, Any] = None,

2153 progress_bar_type: str = None,

2154 create_bqstorage_client: bool = True,

2155 geography_column: Optional[str] = None,

2156 ) -> "geopandas.GeoDataFrame":

2157 """Create a GeoPandas GeoDataFrame by loading all pages of a query.

2158

2159 Args:

2160 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):

2161 A BigQuery Storage API client. If supplied, use the faster

2162 BigQuery Storage API to fetch rows from BigQuery.

2163

2164 This method requires the ``pyarrow`` and

2165 ``google-cloud-bigquery-storage`` libraries.

2166

2167 This method only exposes a subset of the capabilities of the

2168 BigQuery Storage API. For full access to all features

2169 (projections, filters, snapshots) use the Storage API directly.

2170

2171 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):

2172 A dictionary of column names pandas ``dtype``s. The provided

2173 ``dtype`` is used when constructing the series for the column

2174 specified. Otherwise, the default pandas behavior is used.

2175 progress_bar_type (Optional[str]):

2176 If set, use the `tqdm <https://tqdm.github.io/>`_ library to

2177 display a progress bar while the data downloads. Install the

2178 ``tqdm`` package to use this feature.

2179

2180 Possible values of ``progress_bar_type`` include:

2181

2182 ``None``

2183 No progress bar.

2184 ``'tqdm'``

2185 Use the :func:`tqdm.tqdm` function to print a progress bar

2186 to :data:`sys.stdout`.

2187 ``'tqdm_notebook'``

2188 Use the :func:`tqdm.notebook.tqdm` function to display a

2189 progress bar as a Jupyter notebook widget.

2190 ``'tqdm_gui'``

2191 Use the :func:`tqdm.tqdm_gui` function to display a

2192 progress bar as a graphical dialog box.

2193

2194 create_bqstorage_client (Optional[bool]):

2195 If ``True`` (default), create a BigQuery Storage API client

2196 using the default API settings. The BigQuery Storage API

2197 is a faster way to fetch rows from BigQuery. See the

2198 ``bqstorage_client`` parameter for more information.

2199

2200 This argument does nothing if ``bqstorage_client`` is supplied.

2201

2202 geography_column (Optional[str]):

2203 If there are more than one GEOGRAPHY column,

2204 identifies which one to use to construct a geopandas

2205 GeoDataFrame. This option can be ommitted if there's

2206 only one GEOGRAPHY column.

2207

2208 Returns:

2209 geopandas.GeoDataFrame:

2210 A :class:`geopandas.GeoDataFrame` populated with row

2211 data and column headers from the query results. The

2212 column headers are derived from the destination

2213 table's schema.

2214

2215 Raises:

2216 ValueError:

2217 If the :mod:`geopandas` library cannot be imported, or the

2218 :mod:`google.cloud.bigquery_storage_v1` module is

2219 required but cannot be imported.

2220

2221 .. versionadded:: 2.24.0

2222 """

2223 if geopandas is None:

2224 raise ValueError(_NO_GEOPANDAS_ERROR)

2225

2226 geography_columns = set(

2227 field.name

2228 for field in self.schema

2229 if field.field_type.upper() == "GEOGRAPHY"

2230 )

2231 if not geography_columns:

2232 raise TypeError(

2233 "There must be at least one GEOGRAPHY column"

2234 " to create a GeoDataFrame"

2235 )

2236

2237 if geography_column:

2238 if geography_column not in geography_columns:

2239 raise ValueError(

2240 f"The given geography column, {geography_column}, doesn't name"

2241 f" a GEOGRAPHY column in the result."

2242 )

2243 elif len(geography_columns) == 1:

2244 [geography_column] = geography_columns

2245 else:

2246 raise ValueError(

2247 "There is more than one GEOGRAPHY column in the result. "

2248 "The geography_column argument must be used to specify which "

2249 "one to use to create a GeoDataFrame"

2250 )

2251

2252 df = self.to_dataframe(

2253 bqstorage_client,

2254 dtypes,

2255 progress_bar_type,

2256 create_bqstorage_client,

2257 geography_as_object=True,

2258 )

2259

2260 return geopandas.GeoDataFrame(

2261 df, crs=_COORDINATE_REFERENCE_SYSTEM, geometry=geography_column

2262 )

2263

2264

2265class _EmptyRowIterator(RowIterator):

2266 """An empty row iterator.

2267

2268 This class prevents API requests when there are no rows to fetch or rows

2269 are impossible to fetch, such as with query results for DDL CREATE VIEW

2270 statements.

2271 """

2272

2273 schema = ()

2274 pages = ()

2275 total_rows = 0

2276

2277 def __init__(

2278 self, client=None, api_request=None, path=None, schema=(), *args, **kwargs

2279 ):

2280 super().__init__(

2281 client=client,

2282 api_request=api_request,

2283 path=path,

2284 schema=schema,

2285 *args,

2286 **kwargs,

2287 )

2288

2289 def to_arrow(

2290 self,

2291 progress_bar_type=None,

2292 bqstorage_client=None,

2293 create_bqstorage_client=True,

2294 ) -> "pyarrow.Table":

2295 """[Beta] Create an empty class:`pyarrow.Table`.

2296

2297 Args:

2298 progress_bar_type (str): Ignored. Added for compatibility with RowIterator.

2299 bqstorage_client (Any): Ignored. Added for compatibility with RowIterator.

2300 create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.

2301

2302 Returns:

2303 pyarrow.Table: An empty :class:`pyarrow.Table`.

2304 """

2305 if pyarrow is None:

2306 raise ValueError(_NO_PYARROW_ERROR)

2307 return pyarrow.Table.from_arrays(())

2308

2309 def to_dataframe(

2310 self,

2311 bqstorage_client=None,

2312 dtypes=None,

2313 progress_bar_type=None,

2314 create_bqstorage_client=True,

2315 geography_as_object=False,

2316 bool_dtype=None,

2317 int_dtype=None,

2318 float_dtype=None,

2319 string_dtype=None,

2320 ) -> "pandas.DataFrame":

2321 """Create an empty dataframe.

2322

2323 Args:

2324 bqstorage_client (Any): Ignored. Added for compatibility with RowIterator.

2325 dtypes (Any): Ignored. Added for compatibility with RowIterator.

2326 progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.

2327 create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.

2328 geography_as_object (bool): Ignored. Added for compatibility with RowIterator.

2329 bool_dtype (Any): Ignored. Added for compatibility with RowIterator.

2330 int_dtype (Any): Ignored. Added for compatibility with RowIterator.

2331 float_dtype (Any): Ignored. Added for compatibility with RowIterator.

2332 string_dtype (Any): Ignored. Added for compatibility with RowIterator.

2333

2334 Returns:

2335 pandas.DataFrame: An empty :class:`~pandas.DataFrame`.

2336 """

2337 _pandas_helpers.verify_pandas_imports()

2338 return pandas.DataFrame()

2339

2340 def to_geodataframe(

2341 self,

2342 bqstorage_client=None,

2343 dtypes=None,

2344 progress_bar_type=None,

2345 create_bqstorage_client=True,

2346 geography_column: Optional[str] = None,

2347 ) -> "pandas.DataFrame":

2348 """Create an empty dataframe.

2349

2350 Args:

2351 bqstorage_client (Any): Ignored. Added for compatibility with RowIterator.

2352 dtypes (Any): Ignored. Added for compatibility with RowIterator.

2353 progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.

2354 create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.

2355

2356 Returns:

2357 pandas.DataFrame: An empty :class:`~pandas.DataFrame`.

2358 """

2359 if geopandas is None:

2360 raise ValueError(_NO_GEOPANDAS_ERROR)

2361

2362 # Since an empty GeoDataFrame has no geometry column, we do not CRS on it,

2363 # because that's deprecated.

2364 return geopandas.GeoDataFrame()

2365

2366 def to_dataframe_iterable(

2367 self,

2368 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

2369 dtypes: Optional[Dict[str, Any]] = None,

2370 max_queue_size: Optional[int] = None,

2371 ) -> Iterator["pandas.DataFrame"]:

2372 """Create an iterable of pandas DataFrames, to process the table as a stream.

2373

2374 .. versionadded:: 2.21.0

2375

2376 Args:

2377 bqstorage_client:

2378 Ignored. Added for compatibility with RowIterator.

2379

2380 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):

2381 Ignored. Added for compatibility with RowIterator.

2382

2383 max_queue_size:

2384 Ignored. Added for compatibility with RowIterator.

2385

2386 Returns:

2387 An iterator yielding a single empty :class:`~pandas.DataFrame`.

2388

2389 Raises:

2390 ValueError:

2391 If the :mod:`pandas` library cannot be imported.

2392 """

2393 _pandas_helpers.verify_pandas_imports()

2394 return iter((pandas.DataFrame(),))

2395

2396 def to_arrow_iterable(

2397 self,

2398 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

2399 max_queue_size: Optional[int] = None,

2400 ) -> Iterator["pyarrow.RecordBatch"]:

2401 """Create an iterable of pandas DataFrames, to process the table as a stream.

2402

2403 .. versionadded:: 2.31.0

2404

2405 Args:

2406 bqstorage_client:

2407 Ignored. Added for compatibility with RowIterator.

2408

2409 max_queue_size:

2410 Ignored. Added for compatibility with RowIterator.

2411

2412 Returns:

2413 An iterator yielding a single empty :class:`~pyarrow.RecordBatch`.

2414 """

2415 return iter((pyarrow.record_batch([]),))

2416

2417 def __iter__(self):

2418 return iter(())

2419

2420

2421class PartitionRange(object):

2422 """Definition of the ranges for range partitioning.

2423

2424 .. note::

2425 **Beta**. The integer range partitioning feature is in a pre-release

2426 state and might change or have limited support.

2427

2428 Args:

2429 start (Optional[int]):

2430 Sets the

2431 :attr:`~google.cloud.bigquery.table.PartitionRange.start`

2432 property.

2433 end (Optional[int]):

2434 Sets the

2435 :attr:`~google.cloud.bigquery.table.PartitionRange.end`

2436 property.

2437 interval (Optional[int]):

2438 Sets the

2439 :attr:`~google.cloud.bigquery.table.PartitionRange.interval`

2440 property.

2441 _properties (Optional[dict]):

2442 Private. Used to construct object from API resource.

2443 """

2444

2445 def __init__(self, start=None, end=None, interval=None, _properties=None) -> None:

2446 if _properties is None:

2447 _properties = {}

2448 self._properties = _properties

2449

2450 if start is not None:

2451 self.start = start

2452 if end is not None:

2453 self.end = end

2454 if interval is not None:

2455 self.interval = interval

2456

2457 @property

2458 def start(self):

2459 """int: The start of range partitioning, inclusive."""

2460 return _helpers._int_or_none(self._properties.get("start"))

2461

2462 @start.setter

2463 def start(self, value):

2464 self._properties["start"] = _helpers._str_or_none(value)

2465

2466 @property

2467 def end(self):

2468 """int: The end of range partitioning, exclusive."""

2469 return _helpers._int_or_none(self._properties.get("end"))

2470

2471 @end.setter

2472 def end(self, value):

2473 self._properties["end"] = _helpers._str_or_none(value)

2474

2475 @property

2476 def interval(self):

2477 """int: The width of each interval."""

2478 return _helpers._int_or_none(self._properties.get("interval"))

2479

2480 @interval.setter

2481 def interval(self, value):

2482 self._properties["interval"] = _helpers._str_or_none(value)

2483

2484 def _key(self):

2485 return tuple(sorted(self._properties.items()))

2486

2487 def __eq__(self, other):

2488 if not isinstance(other, PartitionRange):

2489 return NotImplemented

2490 return self._key() == other._key()

2491

2492 def __ne__(self, other):

2493 return not self == other

2494

2495 def __repr__(self):

2496 key_vals = ["{}={}".format(key, val) for key, val in self._key()]

2497 return "PartitionRange({})".format(", ".join(key_vals))

2498

2499

2500class RangePartitioning(object):

2501 """Range-based partitioning configuration for a table.

2502

2503 .. note::

2504 **Beta**. The integer range partitioning feature is in a pre-release

2505 state and might change or have limited support.

2506

2507 Args:

2508 range_ (Optional[google.cloud.bigquery.table.PartitionRange]):

2509 Sets the

2510 :attr:`google.cloud.bigquery.table.RangePartitioning.range_`

2511 property.

2512 field (Optional[str]):

2513 Sets the

2514 :attr:`google.cloud.bigquery.table.RangePartitioning.field`

2515 property.

2516 _properties (Optional[dict]):

2517 Private. Used to construct object from API resource.

2518 """

2519

2520 def __init__(self, range_=None, field=None, _properties=None) -> None:

2521 if _properties is None:

2522 _properties = {}

2523 self._properties: Dict[str, Any] = _properties

2524

2525 if range_ is not None:

2526 self.range_ = range_

2527 if field is not None:

2528 self.field = field

2529

2530 # Trailing underscore to prevent conflict with built-in range() function.

2531 @property

2532 def range_(self):

2533 """google.cloud.bigquery.table.PartitionRange: Defines the

2534 ranges for range partitioning.

2535

2536 Raises:

2537 ValueError:

2538 If the value is not a :class:`PartitionRange`.

2539 """

2540 range_properties = self._properties.setdefault("range", {})

2541 return PartitionRange(_properties=range_properties)

2542

2543 @range_.setter

2544 def range_(self, value):

2545 if not isinstance(value, PartitionRange):

2546 raise ValueError("Expected a PartitionRange, but got {}.".format(value))

2547 self._properties["range"] = value._properties

2548

2549 @property

2550 def field(self):

2551 """str: The table is partitioned by this field.

2552

2553 The field must be a top-level ``NULLABLE`` / ``REQUIRED`` field. The

2554 only supported type is ``INTEGER`` / ``INT64``.

2555 """

2556 return self._properties.get("field")

2557

2558 @field.setter

2559 def field(self, value):

2560 self._properties["field"] = value

2561

2562 def _key(self):

2563 return (("field", self.field), ("range_", self.range_))

2564

2565 def __eq__(self, other):

2566 if not isinstance(other, RangePartitioning):

2567 return NotImplemented

2568 return self._key() == other._key()

2569

2570 def __ne__(self, other):

2571 return not self == other

2572

2573 def __repr__(self):

2574 key_vals = ["{}={}".format(key, repr(val)) for key, val in self._key()]

2575 return "RangePartitioning({})".format(", ".join(key_vals))

2576

2577

2578class TimePartitioningType(object):

2579 """Specifies the type of time partitioning to perform."""

2580

2581 DAY = "DAY"

2582 """str: Generates one partition per day."""

2583

2584 HOUR = "HOUR"

2585 """str: Generates one partition per hour."""

2586

2587 MONTH = "MONTH"

2588 """str: Generates one partition per month."""

2589

2590 YEAR = "YEAR"

2591 """str: Generates one partition per year."""

2592

2593

2594class TimePartitioning(object):

2595 """Configures time-based partitioning for a table.

2596

2597 Args:

2598 type_ (Optional[google.cloud.bigquery.table.TimePartitioningType]):

2599 Specifies the type of time partitioning to perform. Defaults to

2600 :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`.

2601

2602 Supported values are:

2603

2604 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.HOUR`

2605 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.DAY`

2606 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.MONTH`

2607 * :attr:`~google.cloud.bigquery.table.TimePartitioningType.YEAR`

2608

2609 field (Optional[str]):

2610 If set, the table is partitioned by this field. If not set, the

2611 table is partitioned by pseudo column ``_PARTITIONTIME``. The field

2612 must be a top-level ``TIMESTAMP``, ``DATETIME``, or ``DATE``

2613 field. Its mode must be ``NULLABLE`` or ``REQUIRED``.

2614

2615 See the `time-unit column-partitioned tables guide

2616 <https://cloud.google.com/bigquery/docs/creating-column-partitions>`_

2617 in the BigQuery documentation.

2618 expiration_ms(Optional[int]):

2619 Number of milliseconds for which to keep the storage for a

2620 partition.

2621 require_partition_filter (Optional[bool]):

2622 DEPRECATED: Use

2623 :attr:`~google.cloud.bigquery.table.Table.require_partition_filter`,

2624 instead.

2625 """

2626

2627 def __init__(

2628 self, type_=None, field=None, expiration_ms=None, require_partition_filter=None

2629 ) -> None:

2630 self._properties: Dict[str, Any] = {}

2631 if type_ is None:

2632 self.type_ = TimePartitioningType.DAY

2633 else:

2634 self.type_ = type_

2635 if field is not None:

2636 self.field = field

2637 if expiration_ms is not None:

2638 self.expiration_ms = expiration_ms

2639 if require_partition_filter is not None:

2640 self.require_partition_filter = require_partition_filter

2641

2642 @property

2643 def type_(self):

2644 """google.cloud.bigquery.table.TimePartitioningType: The type of time

2645 partitioning to use.

2646 """

2647 return self._properties.get("type")

2648

2649 @type_.setter

2650 def type_(self, value):

2651 self._properties["type"] = value

2652

2653 @property

2654 def field(self):

2655 """str: Field in the table to use for partitioning"""

2656 return self._properties.get("field")

2657

2658 @field.setter

2659 def field(self, value):

2660 self._properties["field"] = value

2661

2662 @property

2663 def expiration_ms(self):

2664 """int: Number of milliseconds to keep the storage for a partition."""

2665 return _helpers._int_or_none(self._properties.get("expirationMs"))

2666

2667 @expiration_ms.setter

2668 def expiration_ms(self, value):

2669 if value is not None:

2670 # Allow explicitly setting the expiration to None.

2671 value = str(value)

2672 self._properties["expirationMs"] = value

2673

2674 @property

2675 def require_partition_filter(self):

2676 """bool: Specifies whether partition filters are required for queries

2677

2678 DEPRECATED: Use

2679 :attr:`~google.cloud.bigquery.table.Table.require_partition_filter`,

2680 instead.

2681 """

2682 warnings.warn(

2683 (

2684 "TimePartitioning.require_partition_filter will be removed in "

2685 "future versions. Please use Table.require_partition_filter "

2686 "instead."

2687 ),

2688 PendingDeprecationWarning,

2689 stacklevel=2,

2690 )

2691 return self._properties.get("requirePartitionFilter")

2692

2693 @require_partition_filter.setter

2694 def require_partition_filter(self, value):

2695 warnings.warn(

2696 (

2697 "TimePartitioning.require_partition_filter will be removed in "

2698 "future versions. Please use Table.require_partition_filter "

2699 "instead."

2700 ),

2701 PendingDeprecationWarning,

2702 stacklevel=2,

2703 )

2704 self._properties["requirePartitionFilter"] = value

2705

2706 @classmethod

2707 def from_api_repr(cls, api_repr: dict) -> "TimePartitioning":

2708 """Return a :class:`TimePartitioning` object deserialized from a dict.

2709

2710 This method creates a new ``TimePartitioning`` instance that points to

2711 the ``api_repr`` parameter as its internal properties dict. This means

2712 that when a ``TimePartitioning`` instance is stored as a property of

2713 another object, any changes made at the higher level will also appear

2714 here::

2715

2716 >>> time_partitioning = TimePartitioning()

2717 >>> table.time_partitioning = time_partitioning

2718 >>> table.time_partitioning.field = 'timecolumn'

2719 >>> time_partitioning.field

2720 'timecolumn'

2721

2722 Args:

2723 api_repr (Mapping[str, str]):

2724 The serialized representation of the TimePartitioning, such as

2725 what is output by :meth:`to_api_repr`.

2726

2727 Returns:

2728 google.cloud.bigquery.table.TimePartitioning:

2729 The ``TimePartitioning`` object.

2730 """

2731 instance = cls()

2732 instance._properties = api_repr

2733 return instance

2734

2735 def to_api_repr(self) -> dict:

2736 """Return a dictionary representing this object.

2737

2738 This method returns the properties dict of the ``TimePartitioning``

2739 instance rather than making a copy. This means that when a

2740 ``TimePartitioning`` instance is stored as a property of another

2741 object, any changes made at the higher level will also appear here.

2742

2743 Returns:

2744 dict:

2745 A dictionary representing the TimePartitioning object in

2746 serialized form.

2747 """

2748 return self._properties

2749

2750 def _key(self):

2751 # because we are only "renaming" top level keys shallow copy is sufficient here.

2752 properties = self._properties.copy()

2753 # calling repr for non built-in type objects.

2754 properties["type_"] = repr(properties.pop("type"))

2755 if "field" in properties:

2756 # calling repr for non built-in type objects.

2757 properties["field"] = repr(properties["field"])

2758 if "requirePartitionFilter" in properties:

2759 properties["require_partition_filter"] = properties.pop(

2760 "requirePartitionFilter"

2761 )

2762 if "expirationMs" in properties:

2763 properties["expiration_ms"] = properties.pop("expirationMs")

2764 return tuple(sorted(properties.items()))

2765

2766 def __eq__(self, other):

2767 if not isinstance(other, TimePartitioning):

2768 return NotImplemented

2769 return self._key() == other._key()

2770

2771 def __ne__(self, other):

2772 return not self == other

2773

2774 def __hash__(self):

2775 return hash(self._key())

2776

2777 def __repr__(self):

2778 key_vals = ["{}={}".format(key, val) for key, val in self._key()]

2779 return "TimePartitioning({})".format(",".join(key_vals))

2780

2781

2782def _item_to_row(iterator, resource):

2783 """Convert a JSON row to the native object.

2784

2785 .. note::

2786

2787 This assumes that the ``schema`` attribute has been

2788 added to the iterator after being created, which

2789 should be done by the caller.

2790

2791 Args:

2792 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use.

2793 resource (Dict): An item to be converted to a row.

2794

2795 Returns:

2796 google.cloud.bigquery.table.Row: The next row in the page.

2797 """

2798 return Row(

2799 _helpers._row_tuple_from_json(resource, iterator.schema),

2800 iterator._field_to_index,

2801 )

2802

2803

2804def _row_iterator_page_columns(schema, response):

2805 """Make a generator of all the columns in a page from tabledata.list.

2806

2807 This enables creating a :class:`pandas.DataFrame` and other

2808 column-oriented data structures such as :class:`pyarrow.RecordBatch`

2809 """

2810 columns = []

2811 rows = response.get("rows", [])

2812

2813 def get_column_data(field_index, field):

2814 for row in rows:

2815 yield _helpers._field_from_json(row["f"][field_index]["v"], field)

2816

2817 for field_index, field in enumerate(schema):

2818 columns.append(get_column_data(field_index, field))

2819

2820 return columns

2821

2822

2823# pylint: disable=unused-argument

2824def _rows_page_start(iterator, page, response):

2825 """Grab total rows when :class:`~google.cloud.iterator.Page` starts.

2826

2827 Args:

2828 iterator (google.api_core.page_iterator.Iterator): The iterator that is currently in use.

2829 page (google.api_core.page_iterator.Page): The page that was just created.

2830 response (Dict): The JSON API response for a page of rows in a table.

2831 """

2832 # Make a (lazy) copy of the page in column-oriented format for use in data

2833 # science packages.

2834 page._columns = _row_iterator_page_columns(iterator._schema, response)

2835

2836 total_rows = response.get("totalRows")

2837 if total_rows is not None:

2838 total_rows = int(total_rows)

2839 iterator._total_rows = total_rows

2840

2841

2842# pylint: enable=unused-argument

2843

2844

2845def _table_arg_to_table_ref(value, default_project=None) -> TableReference:

2846 """Helper to convert a string or Table to TableReference.

2847

2848 This function keeps TableReference and other kinds of objects unchanged.

2849 """

2850 if isinstance(value, str):

2851 value = TableReference.from_string(value, default_project=default_project)

2852 if isinstance(value, (Table, TableListItem)):

2853 value = value.reference

2854 return value

2855

2856

2857def _table_arg_to_table(value, default_project=None) -> Table:

2858 """Helper to convert a string or TableReference to a Table.

2859

2860 This function keeps Table and other kinds of objects unchanged.

2861 """

2862 if isinstance(value, str):

2863 value = TableReference.from_string(value, default_project=default_project)

2864 if isinstance(value, TableReference):

2865 value = Table(value)

2866 if isinstance(value, TableListItem):

2867 newvalue = Table(value.reference)

2868 newvalue._properties = value._properties

2869 value = newvalue

2870

2871 return value