Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/cloud/bigquery/job/query.py: 48%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

15"""Classes for query jobs."""

17import concurrent.futures

18import copy

19import re

20import typing

21from typing import Any, Dict, Iterable, List, Optional, Union

23from google.api_core import exceptions

24from google.api_core.future import polling as polling_future

25import requests

27from google.cloud.bigquery.dataset import Dataset

28from google.cloud.bigquery.dataset import DatasetListItem

29from google.cloud.bigquery.dataset import DatasetReference

30from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration

31from google.cloud.bigquery.enums import KeyResultStatementKind, DefaultPandasDTypes

32from google.cloud.bigquery.external_config import ExternalConfig

33from google.cloud.bigquery import _helpers

34from google.cloud.bigquery.query import (

35 _query_param_from_api_repr,

36 ArrayQueryParameter,

37 ConnectionProperty,

38 ScalarQueryParameter,

39 StructQueryParameter,

40 UDFResource,

41)

42from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY

43from google.cloud.bigquery.routine import RoutineReference

44from google.cloud.bigquery.schema import SchemaField

45from google.cloud.bigquery.table import _EmptyRowIterator

46from google.cloud.bigquery.table import RangePartitioning

47from google.cloud.bigquery.table import _table_arg_to_table_ref

48from google.cloud.bigquery.table import TableReference

49from google.cloud.bigquery.table import TimePartitioning

50from google.cloud.bigquery._tqdm_helpers import wait_for_query

52from google.cloud.bigquery.job.base import _AsyncJob

53from google.cloud.bigquery.job.base import _JobConfig

54from google.cloud.bigquery.job.base import _JobReference

56try:

57 import pandas # type: ignore

58except ImportError: # pragma: NO COVER

59 pandas = None

61if typing.TYPE_CHECKING: # pragma: NO COVER

62 # Assumption: type checks are only used by library developers and CI environments

63 # that have all optional dependencies installed, thus no conditional imports.

64 import pandas # type: ignore

65 import geopandas # type: ignore

66 import pyarrow # type: ignore

67 from google.api_core import retry as retries

68 from google.cloud import bigquery_storage

69 from google.cloud.bigquery.client import Client

70 from google.cloud.bigquery.table import RowIterator

73_CONTAINS_ORDER_BY = re.compile(r"ORDER\s+BY", re.IGNORECASE)

74_EXCEPTION_FOOTER_TEMPLATE = "{message}\n\nLocation: {location}\nJob ID: {job_id}\n"

75_TIMEOUT_BUFFER_SECS = 0.1

78def _contains_order_by(query):

79 """Do we need to preserve the order of the query results?

81 This function has known false positives, such as with ordered window

82 functions:

84 .. code-block:: sql

86 SELECT SUM(x) OVER (

87 window_name

88 PARTITION BY...

89 ORDER BY...

90 window_frame_clause)

91 FROM ...

93 This false positive failure case means the behavior will be correct, but

94 downloading results with the BigQuery Storage API may be slower than it

95 otherwise would. This is preferable to the false negative case, where

96 results are expected to be in order but are not (due to parallel reads).

97 """

98 return query and _CONTAINS_ORDER_BY.search(query)

100

101def _from_api_repr_query_parameters(resource):

102 return [_query_param_from_api_repr(mapping) for mapping in resource]

103

104

105def _to_api_repr_query_parameters(value):

106 return [query_parameter.to_api_repr() for query_parameter in value]

107

108

109def _from_api_repr_udf_resources(resource):

110 udf_resources = []

111 for udf_mapping in resource:

112 for udf_type, udf_value in udf_mapping.items():

113 udf_resources.append(UDFResource(udf_type, udf_value))

114 return udf_resources

115

116

117def _to_api_repr_udf_resources(value):

118 return [{udf_resource.udf_type: udf_resource.value} for udf_resource in value]

119

120

121def _from_api_repr_table_defs(resource):

122 return {k: ExternalConfig.from_api_repr(v) for k, v in resource.items()}

123

124

125def _to_api_repr_table_defs(value):

126 return {k: ExternalConfig.to_api_repr(v) for k, v in value.items()}

127

128

129class BiEngineReason(typing.NamedTuple):

130 """Reason for BI Engine acceleration failure

131

132 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#bienginereason

133 """

134

135 code: str = "CODE_UNSPECIFIED"

136

137 reason: str = ""

138

139 @classmethod

140 def from_api_repr(cls, reason: Dict[str, str]) -> "BiEngineReason":

141 return cls(reason.get("code", "CODE_UNSPECIFIED"), reason.get("message", ""))

142

143

144class BiEngineStats(typing.NamedTuple):

145 """Statistics for a BI Engine query

146

147 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#bienginestatistics

148 """

149

150 mode: str = "ACCELERATION_MODE_UNSPECIFIED"

151 """ Specifies which mode of BI Engine acceleration was performed (if any)

152 """

153

154 reasons: List[BiEngineReason] = []

155 """ Contains explanatory messages in case of DISABLED / PARTIAL acceleration

156 """

157

158 @classmethod

159 def from_api_repr(cls, stats: Dict[str, Any]) -> "BiEngineStats":

160 mode = stats.get("biEngineMode", "ACCELERATION_MODE_UNSPECIFIED")

161 reasons = [

162 BiEngineReason.from_api_repr(r) for r in stats.get("biEngineReasons", [])

163 ]

164 return cls(mode, reasons)

165

166

167class DmlStats(typing.NamedTuple):

168 """Detailed statistics for DML statements.

169

170 https://cloud.google.com/bigquery/docs/reference/rest/v2/DmlStats

171 """

172

173 inserted_row_count: int = 0

174 """Number of inserted rows. Populated by DML INSERT and MERGE statements."""

175

176 deleted_row_count: int = 0

177 """Number of deleted rows. populated by DML DELETE, MERGE and TRUNCATE statements.

178 """

179

180 updated_row_count: int = 0

181 """Number of updated rows. Populated by DML UPDATE and MERGE statements."""

182

183 @classmethod

184 def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats":

185 # NOTE: The field order here must match the order of fields set at the

186 # class level.

187 api_fields = ("insertedRowCount", "deletedRowCount", "updatedRowCount")

188

189 args = (

190 int(stats.get(api_field, default_val))

191 for api_field, default_val in zip(api_fields, cls.__new__.__defaults__) # type: ignore

192 )

193 return cls(*args)

194

195

196class ScriptOptions:

197 """Options controlling the execution of scripts.

198

199 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#ScriptOptions

200 """

201

202 def __init__(

203 self,

204 statement_timeout_ms: Optional[int] = None,

205 statement_byte_budget: Optional[int] = None,

206 key_result_statement: Optional[KeyResultStatementKind] = None,

207 ):

208 self._properties: Dict[str, Any] = {}

209 self.statement_timeout_ms = statement_timeout_ms

210 self.statement_byte_budget = statement_byte_budget

211 self.key_result_statement = key_result_statement

212

213 @classmethod

214 def from_api_repr(cls, resource: Dict[str, Any]) -> "ScriptOptions":

215 """Factory: construct instance from the JSON repr.

216

217 Args:

218 resource(Dict[str: Any]):

219 ScriptOptions representation returned from API.

220

221 Returns:

222 google.cloud.bigquery.ScriptOptions:

223 ScriptOptions sample parsed from ``resource``.

224 """

225 entry = cls()

226 entry._properties = copy.deepcopy(resource)

227 return entry

228

229 def to_api_repr(self) -> Dict[str, Any]:

230 """Construct the API resource representation."""

231 return copy.deepcopy(self._properties)

232

233 @property

234 def statement_timeout_ms(self) -> Union[int, None]:

235 """Timeout period for each statement in a script."""

236 return _helpers._int_or_none(self._properties.get("statementTimeoutMs"))

237

238 @statement_timeout_ms.setter

239 def statement_timeout_ms(self, value: Union[int, None]):

240 new_value = None if value is None else str(value)

241 self._properties["statementTimeoutMs"] = new_value

242

243 @property

244 def statement_byte_budget(self) -> Union[int, None]:

245 """Limit on the number of bytes billed per statement.

246

247 Exceeding this budget results in an error.

248 """

249 return _helpers._int_or_none(self._properties.get("statementByteBudget"))

250

251 @statement_byte_budget.setter

252 def statement_byte_budget(self, value: Union[int, None]):

253 new_value = None if value is None else str(value)

254 self._properties["statementByteBudget"] = new_value

255

256 @property

257 def key_result_statement(self) -> Union[KeyResultStatementKind, None]:

258 """Determines which statement in the script represents the "key result".

259

260 This is used to populate the schema and query results of the script job.

261 Default is ``KeyResultStatementKind.LAST``.

262 """

263 return self._properties.get("keyResultStatement")

264

265 @key_result_statement.setter

266 def key_result_statement(self, value: Union[KeyResultStatementKind, None]):

267 self._properties["keyResultStatement"] = value

268

269

270class QueryJobConfig(_JobConfig):

271 """Configuration options for query jobs.

272

273 All properties in this class are optional. Values which are :data:`None` ->

274 server defaults. Set properties on the constructed configuration by using

275 the property name as the name of a keyword argument.

276 """

277

278 def __init__(self, **kwargs) -> None:

279 super(QueryJobConfig, self).__init__("query", **kwargs)

280

281 @property

282 def destination_encryption_configuration(self):

283 """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom

284 encryption configuration for the destination table.

285

286 Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None`

287 if using default encryption.

288

289 See

290 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_encryption_configuration

291 """

292 prop = self._get_sub_prop("destinationEncryptionConfiguration")

293 if prop is not None:

294 prop = EncryptionConfiguration.from_api_repr(prop)

295 return prop

296

297 @destination_encryption_configuration.setter

298 def destination_encryption_configuration(self, value):

299 api_repr = value

300 if value is not None:

301 api_repr = value.to_api_repr()

302 self._set_sub_prop("destinationEncryptionConfiguration", api_repr)

303

304 @property

305 def allow_large_results(self):

306 """bool: Allow large query results tables (legacy SQL, only)

307

308 See

309 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.allow_large_results

310 """

311 return self._get_sub_prop("allowLargeResults")

312

313 @allow_large_results.setter

314 def allow_large_results(self, value):

315 self._set_sub_prop("allowLargeResults", value)

316

317 @property

318 def connection_properties(self) -> List[ConnectionProperty]:

319 """Connection properties.

320

321 See

322 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.connection_properties

323

324 .. versionadded:: 2.29.0

325 """

326 resource = self._get_sub_prop("connectionProperties", [])

327 return [ConnectionProperty.from_api_repr(prop) for prop in resource]

328

329 @connection_properties.setter

330 def connection_properties(self, value: Iterable[ConnectionProperty]):

331 self._set_sub_prop(

332 "connectionProperties",

333 [prop.to_api_repr() for prop in value],

334 )

335

336 @property

337 def create_disposition(self):

338 """google.cloud.bigquery.job.CreateDisposition: Specifies behavior

339 for creating tables.

340

341 See

342 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.create_disposition

343 """

344 return self._get_sub_prop("createDisposition")

345

346 @create_disposition.setter

347 def create_disposition(self, value):

348 self._set_sub_prop("createDisposition", value)

349

350 @property

351 def create_session(self) -> Optional[bool]:

352 """[Preview] If :data:`True`, creates a new session, where

353 :attr:`~google.cloud.bigquery.job.QueryJob.session_info` will contain a

354 random server generated session id.

355

356 If :data:`False`, runs query with an existing ``session_id`` passed in

357 :attr:`~google.cloud.bigquery.job.QueryJobConfig.connection_properties`,

358 otherwise runs query in non-session mode.

359

360 See

361 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.create_session

362

363 .. versionadded:: 2.29.0

364 """

365 return self._get_sub_prop("createSession")

366

367 @create_session.setter

368 def create_session(self, value: Optional[bool]):

369 self._set_sub_prop("createSession", value)

370

371 @property

372 def default_dataset(self):

373 """google.cloud.bigquery.dataset.DatasetReference: the default dataset

374 to use for unqualified table names in the query or :data:`None` if not

375 set.

376

377 The ``default_dataset`` setter accepts:

378

379 - a :class:`~google.cloud.bigquery.dataset.Dataset`, or

380 - a :class:`~google.cloud.bigquery.dataset.DatasetReference`, or

381 - a :class:`str` of the fully-qualified dataset ID in standard SQL

382 format. The value must included a project ID and dataset ID

383 separated by ``.``. For example: ``your-project.your_dataset``.

384

385 See

386 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.default_dataset

387 """

388 prop = self._get_sub_prop("defaultDataset")

389 if prop is not None:

390 prop = DatasetReference.from_api_repr(prop)

391 return prop

392

393 @default_dataset.setter

394 def default_dataset(self, value):

395 if value is None:

396 self._set_sub_prop("defaultDataset", None)

397 return

398

399 if isinstance(value, str):

400 value = DatasetReference.from_string(value)

401

402 if isinstance(value, (Dataset, DatasetListItem)):

403 value = value.reference

404

405 resource = value.to_api_repr()

406 self._set_sub_prop("defaultDataset", resource)

407

408 @property

409 def destination(self):

410 """google.cloud.bigquery.table.TableReference: table where results are

411 written or :data:`None` if not set.

412

413 The ``destination`` setter accepts:

414

415 - a :class:`~google.cloud.bigquery.table.Table`, or

416 - a :class:`~google.cloud.bigquery.table.TableReference`, or

417 - a :class:`str` of the fully-qualified table ID in standard SQL

418 format. The value must included a project ID, dataset ID, and table

419 ID, each separated by ``.``. For example:

420 ``your-project.your_dataset.your_table``.

421

422 See

423 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_table

424 """

425 prop = self._get_sub_prop("destinationTable")

426 if prop is not None:

427 prop = TableReference.from_api_repr(prop)

428 return prop

429

430 @destination.setter

431 def destination(self, value):

432 if value is None:

433 self._set_sub_prop("destinationTable", None)

434 return

435

436 value = _table_arg_to_table_ref(value)

437 resource = value.to_api_repr()

438 self._set_sub_prop("destinationTable", resource)

439

440 @property

441 def dry_run(self):

442 """bool: :data:`True` if this query should be a dry run to estimate

443 costs.

444

445 See

446 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.dry_run

447 """

448 return self._properties.get("dryRun")

449

450 @dry_run.setter

451 def dry_run(self, value):

452 self._properties["dryRun"] = value

453

454 @property

455 def flatten_results(self):

456 """bool: Flatten nested/repeated fields in results. (Legacy SQL only)

457

458 See

459 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.flatten_results

460 """

461 return self._get_sub_prop("flattenResults")

462

463 @flatten_results.setter

464 def flatten_results(self, value):

465 self._set_sub_prop("flattenResults", value)

466

467 @property

468 def maximum_billing_tier(self):

469 """int: Deprecated. Changes the billing tier to allow high-compute

470 queries.

471

472 See

473 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_billing_tier

474 """

475 return self._get_sub_prop("maximumBillingTier")

476

477 @maximum_billing_tier.setter

478 def maximum_billing_tier(self, value):

479 self._set_sub_prop("maximumBillingTier", value)

480

481 @property

482 def maximum_bytes_billed(self):

483 """int: Maximum bytes to be billed for this job or :data:`None` if not set.

484

485 See

486 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.maximum_bytes_billed

487 """

488 return _helpers._int_or_none(self._get_sub_prop("maximumBytesBilled"))

489

490 @maximum_bytes_billed.setter

491 def maximum_bytes_billed(self, value):

492 self._set_sub_prop("maximumBytesBilled", str(value))

493

494 @property

495 def priority(self):

496 """google.cloud.bigquery.job.QueryPriority: Priority of the query.

497

498 See

499 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.priority

500 """

501 return self._get_sub_prop("priority")

502

503 @priority.setter

504 def priority(self, value):

505 self._set_sub_prop("priority", value)

506

507 @property

508 def query_parameters(self):

509 """List[Union[google.cloud.bigquery.query.ArrayQueryParameter, \

510 google.cloud.bigquery.query.ScalarQueryParameter, \

511 google.cloud.bigquery.query.StructQueryParameter]]: list of parameters

512 for parameterized query (empty by default)

513

514 See:

515 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query_parameters

516 """

517 prop = self._get_sub_prop("queryParameters", default=[])

518 return _from_api_repr_query_parameters(prop)

519

520 @query_parameters.setter

521 def query_parameters(self, values):

522 self._set_sub_prop("queryParameters", _to_api_repr_query_parameters(values))

523

524 @property

525 def range_partitioning(self):

526 """Optional[google.cloud.bigquery.table.RangePartitioning]:

527 Configures range-based partitioning for destination table.

528

529 .. note::

530 **Beta**. The integer range partitioning feature is in a

531 pre-release state and might change or have limited support.

532

533 Only specify at most one of

534 :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or

535 :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`.

536

537 Raises:

538 ValueError:

539 If the value is not

540 :class:`~google.cloud.bigquery.table.RangePartitioning` or

541 :data:`None`.

542 """

543 resource = self._get_sub_prop("rangePartitioning")

544 if resource is not None:

545 return RangePartitioning(_properties=resource)

546

547 @range_partitioning.setter

548 def range_partitioning(self, value):

549 resource = value

550 if isinstance(value, RangePartitioning):

551 resource = value._properties

552 elif value is not None:

553 raise ValueError(

554 "Expected value to be RangePartitioning or None, got {}.".format(value)

555 )

556 self._set_sub_prop("rangePartitioning", resource)

557

558 @property

559 def udf_resources(self):

560 """List[google.cloud.bigquery.query.UDFResource]: user

561 defined function resources (empty by default)

562

563 See:

564 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.user_defined_function_resources

565 """

566 prop = self._get_sub_prop("userDefinedFunctionResources", default=[])

567 return _from_api_repr_udf_resources(prop)

568

569 @udf_resources.setter

570 def udf_resources(self, values):

571 self._set_sub_prop(

572 "userDefinedFunctionResources", _to_api_repr_udf_resources(values)

573 )

574

575 @property

576 def use_legacy_sql(self):

577 """bool: Use legacy SQL syntax.

578

579 See

580 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_legacy_sql

581 """

582 return self._get_sub_prop("useLegacySql")

583

584 @use_legacy_sql.setter

585 def use_legacy_sql(self, value):

586 self._set_sub_prop("useLegacySql", value)

587

588 @property

589 def use_query_cache(self):

590 """bool: Look for the query result in the cache.

591

592 See

593 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.use_query_cache

594 """

595 return self._get_sub_prop("useQueryCache")

596

597 @use_query_cache.setter

598 def use_query_cache(self, value):

599 self._set_sub_prop("useQueryCache", value)

600

601 @property

602 def write_disposition(self):

603 """google.cloud.bigquery.job.WriteDisposition: Action that occurs if

604 the destination table already exists.

605

606 See

607 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.write_disposition

608 """

609 return self._get_sub_prop("writeDisposition")

610

611 @write_disposition.setter

612 def write_disposition(self, value):

613 self._set_sub_prop("writeDisposition", value)

614

615 @property

616 def table_definitions(self):

617 """Dict[str, google.cloud.bigquery.external_config.ExternalConfig]:

618 Definitions for external tables or :data:`None` if not set.

619

620 See

621 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.external_table_definitions

622 """

623 prop = self._get_sub_prop("tableDefinitions")

624 if prop is not None:

625 prop = _from_api_repr_table_defs(prop)

626 return prop

627

628 @table_definitions.setter

629 def table_definitions(self, values):

630 self._set_sub_prop("tableDefinitions", _to_api_repr_table_defs(values))

631

632 @property

633 def time_partitioning(self):

634 """Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies

635 time-based partitioning for the destination table.

636

637 Only specify at most one of

638 :attr:`~google.cloud.bigquery.job.LoadJobConfig.time_partitioning` or

639 :attr:`~google.cloud.bigquery.job.LoadJobConfig.range_partitioning`.

640

641 Raises:

642 ValueError:

643 If the value is not

644 :class:`~google.cloud.bigquery.table.TimePartitioning` or

645 :data:`None`.

646 """

647 prop = self._get_sub_prop("timePartitioning")

648 if prop is not None:

649 prop = TimePartitioning.from_api_repr(prop)

650 return prop

651

652 @time_partitioning.setter

653 def time_partitioning(self, value):

654 api_repr = value

655 if value is not None:

656 api_repr = value.to_api_repr()

657 self._set_sub_prop("timePartitioning", api_repr)

658

659 @property

660 def clustering_fields(self):

661 """Optional[List[str]]: Fields defining clustering for the table

662

663 (Defaults to :data:`None`).

664

665 Clustering fields are immutable after table creation.

666

667 .. note::

668

669 BigQuery supports clustering for both partitioned and

670 non-partitioned tables.

671 """

672 prop = self._get_sub_prop("clustering")

673 if prop is not None:

674 return list(prop.get("fields", ()))

675

676 @clustering_fields.setter

677 def clustering_fields(self, value):

678 """Optional[List[str]]: Fields defining clustering for the table

679

680 (Defaults to :data:`None`).

681 """

682 if value is not None:

683 self._set_sub_prop("clustering", {"fields": value})

684 else:

685 self._del_sub_prop("clustering")

686

687 @property

688 def schema_update_options(self):

689 """List[google.cloud.bigquery.job.SchemaUpdateOption]: Specifies

690 updates to the destination table schema to allow as a side effect of

691 the query job.

692 """

693 return self._get_sub_prop("schemaUpdateOptions")

694

695 @schema_update_options.setter

696 def schema_update_options(self, values):

697 self._set_sub_prop("schemaUpdateOptions", values)

698

699 @property

700 def script_options(self) -> ScriptOptions:

701 """Options controlling the execution of scripts.

702

703 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#scriptoptions

704 """

705 prop = self._get_sub_prop("scriptOptions")

706 if prop is not None:

707 prop = ScriptOptions.from_api_repr(prop)

708 return prop

709

710 @script_options.setter

711 def script_options(self, value: Union[ScriptOptions, None]):

712 new_value = None if value is None else value.to_api_repr()

713 self._set_sub_prop("scriptOptions", new_value)

714

715 def to_api_repr(self) -> dict:

716 """Build an API representation of the query job config.

717

718 Returns:

719 Dict: A dictionary in the format used by the BigQuery API.

720 """

721 resource = copy.deepcopy(self._properties)

722

723 # Query parameters have an addition property associated with them

724 # to indicate if the query is using named or positional parameters.

725 query_parameters = resource["query"].get("queryParameters")

726 if query_parameters:

727 if query_parameters[0].get("name") is None:

728 resource["query"]["parameterMode"] = "POSITIONAL"

729 else:

730 resource["query"]["parameterMode"] = "NAMED"

731

732 return resource

733

734

735class QueryJob(_AsyncJob):

736 """Asynchronous job: query tables.

737

738 Args:

739 job_id (str): the job's ID, within the project belonging to ``client``.

740

741 query (str): SQL query string.

742

743 client (google.cloud.bigquery.client.Client):

744 A client which holds credentials and project configuration

745 for the dataset (which requires a project).

746

747 job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):

748 Extra configuration options for the query job.

749 """

750

751 _JOB_TYPE = "query"

752 _UDF_KEY = "userDefinedFunctionResources"

753 _CONFIG_CLASS = QueryJobConfig

754

755 def __init__(self, job_id, query, client, job_config=None):

756 super(QueryJob, self).__init__(job_id, client)

757

758 if job_config is not None:

759 self._properties["configuration"] = job_config._properties

760 if self.configuration.use_legacy_sql is None:

761 self.configuration.use_legacy_sql = False

762

763 if query:

764 _helpers._set_sub_prop(

765 self._properties, ["configuration", "query", "query"], query

766 )

767

768 self._query_results = None

769 self._done_timeout = None

770 self._transport_timeout = None

771

772 @property

773 def allow_large_results(self):

774 """See

775 :attr:`google.cloud.bigquery.job.QueryJobConfig.allow_large_results`.

776 """

777 return self.configuration.allow_large_results

778

779 @property

780 def configuration(self) -> QueryJobConfig:

781 """The configuration for this query job."""

782 return typing.cast(QueryJobConfig, super().configuration)

783

784 @property

785 def connection_properties(self) -> List[ConnectionProperty]:

786 """See

787 :attr:`google.cloud.bigquery.job.QueryJobConfig.connection_properties`.

788

789 .. versionadded:: 2.29.0

790 """

791 return self.configuration.connection_properties

792

793 @property

794 def create_disposition(self):

795 """See

796 :attr:`google.cloud.bigquery.job.QueryJobConfig.create_disposition`.

797 """

798 return self.configuration.create_disposition

799

800 @property

801 def create_session(self) -> Optional[bool]:

802 """See

803 :attr:`google.cloud.bigquery.job.QueryJobConfig.create_session`.

804

805 .. versionadded:: 2.29.0

806 """

807 return self.configuration.create_session

808

809 @property

810 def default_dataset(self):

811 """See

812 :attr:`google.cloud.bigquery.job.QueryJobConfig.default_dataset`.

813 """

814 return self.configuration.default_dataset

815

816 @property

817 def destination(self):

818 """See

819 :attr:`google.cloud.bigquery.job.QueryJobConfig.destination`.

820 """

821 return self.configuration.destination

822

823 @property

824 def destination_encryption_configuration(self):

825 """google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom

826 encryption configuration for the destination table.

827

828 Custom encryption configuration (e.g., Cloud KMS keys) or :data:`None`

829 if using default encryption.

830

831 See

832 :attr:`google.cloud.bigquery.job.QueryJobConfig.destination_encryption_configuration`.

833 """

834 return self.configuration.destination_encryption_configuration

835

836 @property

837 def dry_run(self):

838 """See

839 :attr:`google.cloud.bigquery.job.QueryJobConfig.dry_run`.

840 """

841 return self.configuration.dry_run

842

843 @property

844 def flatten_results(self):

845 """See

846 :attr:`google.cloud.bigquery.job.QueryJobConfig.flatten_results`.

847 """

848 return self.configuration.flatten_results

849

850 @property

851 def priority(self):

852 """See

853 :attr:`google.cloud.bigquery.job.QueryJobConfig.priority`.

854 """

855 return self.configuration.priority

856

857 @property

858 def query(self):

859 """str: The query text used in this query job.

860

861 See:

862 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.query

863 """

864 return _helpers._get_sub_prop(

865 self._properties, ["configuration", "query", "query"]

866 )

867

868 @property

869 def query_parameters(self):

870 """See

871 :attr:`google.cloud.bigquery.job.QueryJobConfig.query_parameters`.

872 """

873 return self.configuration.query_parameters

874

875 @property

876 def udf_resources(self):

877 """See

878 :attr:`google.cloud.bigquery.job.QueryJobConfig.udf_resources`.

879 """

880 return self.configuration.udf_resources

881

882 @property

883 def use_legacy_sql(self):

884 """See

885 :attr:`google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`.

886 """

887 return self.configuration.use_legacy_sql

888

889 @property

890 def use_query_cache(self):

891 """See

892 :attr:`google.cloud.bigquery.job.QueryJobConfig.use_query_cache`.

893 """

894 return self.configuration.use_query_cache

895

896 @property

897 def write_disposition(self):

898 """See

899 :attr:`google.cloud.bigquery.job.QueryJobConfig.write_disposition`.

900 """

901 return self.configuration.write_disposition

902

903 @property

904 def maximum_billing_tier(self):

905 """See

906 :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`.

907 """

908 return self.configuration.maximum_billing_tier

909

910 @property

911 def maximum_bytes_billed(self):

912 """See

913 :attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`.

914 """

915 return self.configuration.maximum_bytes_billed

916

917 @property

918 def range_partitioning(self):

919 """See

920 :attr:`google.cloud.bigquery.job.QueryJobConfig.range_partitioning`.

921 """

922 return self.configuration.range_partitioning

923

924 @property

925 def table_definitions(self):

926 """See

927 :attr:`google.cloud.bigquery.job.QueryJobConfig.table_definitions`.

928 """

929 return self.configuration.table_definitions

930

931 @property

932 def time_partitioning(self):

933 """See

934 :attr:`google.cloud.bigquery.job.QueryJobConfig.time_partitioning`.

935 """

936 return self.configuration.time_partitioning

937

938 @property

939 def clustering_fields(self):

940 """See

941 :attr:`google.cloud.bigquery.job.QueryJobConfig.clustering_fields`.

942 """

943 return self.configuration.clustering_fields

944

945 @property

946 def schema_update_options(self):

947 """See

948 :attr:`google.cloud.bigquery.job.QueryJobConfig.schema_update_options`.

949 """

950 return self.configuration.schema_update_options

951

952 def to_api_repr(self):

953 """Generate a resource for :meth:`_begin`."""

954 # Use to_api_repr to allow for some configuration properties to be set

955 # automatically.

956 configuration = self.configuration.to_api_repr()

957 return {

958 "jobReference": self._properties["jobReference"],

959 "configuration": configuration,

960 }

961

962 @classmethod

963 def from_api_repr(cls, resource: dict, client: "Client") -> "QueryJob":

964 """Factory: construct a job given its API representation

965

966 Args:

967 resource (Dict): dataset job representation returned from the API

968

969 client (google.cloud.bigquery.client.Client):

970 Client which holds credentials and project

971 configuration for the dataset.

972

973 Returns:

974 google.cloud.bigquery.job.QueryJob: Job parsed from ``resource``.

975 """

976 job_ref_properties = resource.setdefault(

977 "jobReference", {"projectId": client.project, "jobId": None}

978 )

979 job_ref = _JobReference._from_api_repr(job_ref_properties)

980 job = cls(job_ref, None, client=client)

981 job._set_properties(resource)

982 return job

983

984 @property

985 def query_plan(self):

986 """Return query plan from job statistics, if present.

987

988 See:

989 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.query_plan

990

991 Returns:

992 List[google.cloud.bigquery.job.QueryPlanEntry]:

993 mappings describing the query plan, or an empty list

994 if the query has not yet completed.

995 """

996 plan_entries = self._job_statistics().get("queryPlan", ())

997 return [QueryPlanEntry.from_api_repr(entry) for entry in plan_entries]

998

999 @property

1000 def schema(self) -> Optional[List[SchemaField]]:

1001 """The schema of the results.

1002

1003 Present only for successful dry run of non-legacy SQL queries.

1004 """

1005 resource = self._job_statistics().get("schema")

1006 if resource is None:

1007 return None

1008 fields = resource.get("fields", [])

1009 return [SchemaField.from_api_repr(field) for field in fields]

1010

1011 @property

1012 def timeline(self):

1013 """List(TimelineEntry): Return the query execution timeline

1014 from job statistics.

1015 """

1016 raw = self._job_statistics().get("timeline", ())

1017 return [TimelineEntry.from_api_repr(entry) for entry in raw]

1018

1019 @property

1020 def total_bytes_processed(self):

1021 """Return total bytes processed from job statistics, if present.

1022

1023 See:

1024 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_processed

1025

1026 Returns:

1027 Optional[int]:

1028 Total bytes processed by the job, or None if job is not

1029 yet complete.

1030 """

1031 result = self._job_statistics().get("totalBytesProcessed")

1032 if result is not None:

1033 result = int(result)

1034 return result

1035

1036 @property

1037 def total_bytes_billed(self):

1038 """Return total bytes billed from job statistics, if present.

1039

1040 See:

1041 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.total_bytes_billed

1042

1043 Returns:

1044 Optional[int]:

1045 Total bytes processed by the job, or None if job is not

1046 yet complete.

1047 """

1048 result = self._job_statistics().get("totalBytesBilled")

1049 if result is not None:

1050 result = int(result)

1051 return result

1052

1053 @property

1054 def billing_tier(self):

1055 """Return billing tier from job statistics, if present.

1056

1057 See:

1058 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.billing_tier

1059

1060 Returns:

1061 Optional[int]:

1062 Billing tier used by the job, or None if job is not

1063 yet complete.

1064 """

1065 return self._job_statistics().get("billingTier")

1066

1067 @property

1068 def cache_hit(self):

1069 """Return whether or not query results were served from cache.

1070

1071 See:

1072 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.cache_hit

1073

1074 Returns:

1075 Optional[bool]:

1076 whether the query results were returned from cache, or None

1077 if job is not yet complete.

1078 """

1079 return self._job_statistics().get("cacheHit")

1080

1081 @property

1082 def ddl_operation_performed(self):

1083 """Optional[str]: Return the DDL operation performed.

1084

1085 See:

1086 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_operation_performed

1087

1088 """

1089 return self._job_statistics().get("ddlOperationPerformed")

1090

1091 @property

1092 def ddl_target_routine(self):

1093 """Optional[google.cloud.bigquery.routine.RoutineReference]: Return the DDL target routine, present

1094 for CREATE/DROP FUNCTION/PROCEDURE queries.

1095

1096 See:

1097 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_routine

1098 """

1099 prop = self._job_statistics().get("ddlTargetRoutine")

1100 if prop is not None:

1101 prop = RoutineReference.from_api_repr(prop)

1102 return prop

1103

1104 @property

1105 def ddl_target_table(self):

1106 """Optional[google.cloud.bigquery.table.TableReference]: Return the DDL target table, present

1107 for CREATE/DROP TABLE/VIEW queries.

1108

1109 See:

1110 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.ddl_target_table

1111 """

1112 prop = self._job_statistics().get("ddlTargetTable")

1113 if prop is not None:

1114 prop = TableReference.from_api_repr(prop)

1115 return prop

1116

1117 @property

1118 def num_dml_affected_rows(self) -> Optional[int]:

1119 """Return the number of DML rows affected by the job.

1120

1121 See:

1122 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.num_dml_affected_rows

1123

1124 Returns:

1125 Optional[int]:

1126 number of DML rows affected by the job, or None if job is not

1127 yet complete.

1128 """

1129 result = self._job_statistics().get("numDmlAffectedRows")

1130 if result is not None:

1131 result = int(result)

1132 return result

1133

1134 @property

1135 def slot_millis(self):

1136 """Union[int, None]: Slot-milliseconds used by this query job."""

1137 return _helpers._int_or_none(self._job_statistics().get("totalSlotMs"))

1138

1139 @property

1140 def statement_type(self):

1141 """Return statement type from job statistics, if present.

1142

1143 See:

1144 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.statement_type

1145

1146 Returns:

1147 Optional[str]:

1148 type of statement used by the job, or None if job is not

1149 yet complete.

1150 """

1151 return self._job_statistics().get("statementType")

1152

1153 @property

1154 def referenced_tables(self):

1155 """Return referenced tables from job statistics, if present.

1156

1157 See:

1158 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.referenced_tables

1159

1160 Returns:

1161 List[Dict]:

1162 mappings describing the query plan, or an empty list

1163 if the query has not yet completed.

1164 """

1165 tables = []

1166 datasets_by_project_name = {}

1167

1168 for table in self._job_statistics().get("referencedTables", ()):

1169

1170 t_project = table["projectId"]

1171

1172 ds_id = table["datasetId"]

1173 t_dataset = datasets_by_project_name.get((t_project, ds_id))

1174 if t_dataset is None:

1175 t_dataset = DatasetReference(t_project, ds_id)

1176 datasets_by_project_name[(t_project, ds_id)] = t_dataset

1177

1178 t_name = table["tableId"]

1179 tables.append(t_dataset.table(t_name))

1180

1181 return tables

1182

1183 @property

1184 def undeclared_query_parameters(self):

1185 """Return undeclared query parameters from job statistics, if present.

1186

1187 See:

1188 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.undeclared_query_parameters

1189

1190 Returns:

1191 List[Union[ \

1192 google.cloud.bigquery.query.ArrayQueryParameter, \

1193 google.cloud.bigquery.query.ScalarQueryParameter, \

1194 google.cloud.bigquery.query.StructQueryParameter \

1195 ]]:

1196 Undeclared parameters, or an empty list if the query has

1197 not yet completed.

1198 """

1199 parameters = []

1200 undeclared = self._job_statistics().get("undeclaredQueryParameters", ())

1201

1202 for parameter in undeclared:

1203 p_type = parameter["parameterType"]

1204

1205 if "arrayType" in p_type:

1206 klass = ArrayQueryParameter

1207 elif "structTypes" in p_type:

1208 klass = StructQueryParameter

1209 else:

1210 klass = ScalarQueryParameter

1211

1212 parameters.append(klass.from_api_repr(parameter))

1213

1214 return parameters

1215

1216 @property

1217 def estimated_bytes_processed(self):

1218 """Return the estimated number of bytes processed by the query.

1219

1220 See:

1221 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.estimated_bytes_processed

1222

1223 Returns:

1224 Optional[int]:

1225 number of DML rows affected by the job, or None if job is not

1226 yet complete.

1227 """

1228 result = self._job_statistics().get("estimatedBytesProcessed")

1229 if result is not None:

1230 result = int(result)

1231 return result

1232

1233 @property

1234 def dml_stats(self) -> Optional[DmlStats]:

1235 stats = self._job_statistics().get("dmlStats")

1236 if stats is None:

1237 return None

1238 else:

1239 return DmlStats.from_api_repr(stats)

1240

1241 @property

1242 def bi_engine_stats(self) -> Optional[BiEngineStats]:

1243 stats = self._job_statistics().get("biEngineStatistics")

1244

1245 if stats is None:

1246 return None

1247 else:

1248 return BiEngineStats.from_api_repr(stats)

1249

1250 def _blocking_poll(self, timeout=None, **kwargs):

1251 self._done_timeout = timeout

1252 self._transport_timeout = timeout

1253 super(QueryJob, self)._blocking_poll(timeout=timeout, **kwargs)

1254

1255 @staticmethod

1256 def _format_for_exception(message: str, query: str):

1257 """Format a query for the output in exception message.

1258

1259 Args:

1260 message (str): The original exception message.

1261 query (str): The SQL query to format.

1262

1263 Returns:

1264 str: A formatted query text.

1265 """

1266 template = "{message}\n\n{header}\n\n{ruler}\n{body}\n{ruler}"

1267

1268 lines = query.splitlines() if query is not None else [""]

1269 max_line_len = max(len(line) for line in lines)

1270

1271 header = "-----Query Job SQL Follows-----"

1272 header = "{:^{total_width}}".format(header, total_width=max_line_len + 5)

1273

1274 # Print out a "ruler" above and below the SQL so we can judge columns.

1275 # Left pad for the line numbers (4 digits plus ":").

1276 ruler = " |" + " . |" * (max_line_len // 10)

1277

1278 # Put line numbers next to the SQL.

1279 body = "\n".join(

1280 "{:4}:{}".format(n, line) for n, line in enumerate(lines, start=1)

1281 )

1282

1283 return template.format(message=message, header=header, ruler=ruler, body=body)

1284

1285 def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None):

1286 """API call: begin the job via a POST request

1287

1288 See

1289 https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert

1290

1291 Args:

1292 client (Optional[google.cloud.bigquery.client.Client]):

1293 The client to use. If not passed, falls back to the ``client``

1294 associated with the job object or``NoneType``.

1295 retry (Optional[google.api_core.retry.Retry]):

1296 How to retry the RPC.

1297 timeout (Optional[float]):

1298 The number of seconds to wait for the underlying HTTP transport

1299 before using ``retry``.

1300

1301 Raises:

1302 ValueError: If the job has already begun.

1303 """

1304

1305 try:

1306 super(QueryJob, self)._begin(client=client, retry=retry, timeout=timeout)

1307 except exceptions.GoogleAPICallError as exc:

1308 exc.message = _EXCEPTION_FOOTER_TEMPLATE.format(

1309 message=exc.message, location=self.location, job_id=self.job_id

1310 )

1311 exc.debug_message = self._format_for_exception(exc.message, self.query)

1312 exc.query_job = self

1313 raise

1314

1315 def _reload_query_results(

1316 self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None

1317 ):

1318 """Refresh the cached query results.

1319

1320 Args:

1321 retry (Optional[google.api_core.retry.Retry]):

1322 How to retry the call that retrieves query results.

1323 timeout (Optional[float]):

1324 The number of seconds to wait for the underlying HTTP transport

1325 before using ``retry``.

1326 """

1327 if self._query_results and self._query_results.complete:

1328 return

1329

1330 # Since the API to getQueryResults can hang up to the timeout value

1331 # (default of 10 seconds), set the timeout parameter to ensure that

1332 # the timeout from the futures API is respected. See:

1333 # https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135

1334 timeout_ms = None

1335 if self._done_timeout is not None:

1336 # Subtract a buffer for context switching, network latency, etc.

1337 api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS

1338 api_timeout = max(min(api_timeout, 10), 0)

1339 self._done_timeout -= api_timeout

1340 self._done_timeout = max(0, self._done_timeout)

1341 timeout_ms = int(api_timeout * 1000)

1342

1343 # If an explicit timeout is not given, fall back to the transport timeout

1344 # stored in _blocking_poll() in the process of polling for job completion.

1345 transport_timeout = timeout if timeout is not None else self._transport_timeout

1346

1347 self._query_results = self._client._get_query_results(

1348 self.job_id,

1349 retry,

1350 project=self.project,

1351 timeout_ms=timeout_ms,

1352 location=self.location,

1353 timeout=transport_timeout,

1354 )

1355

1356 def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None):

1357 """Check if the query has finished running and raise if it's not.

1358

1359 If the query has finished, also reload the job itself.

1360 """

1361 # If an explicit timeout is not given, fall back to the transport timeout

1362 # stored in _blocking_poll() in the process of polling for job completion.

1363 transport_timeout = timeout if timeout is not None else self._transport_timeout

1364

1365 try:

1366 self._reload_query_results(retry=retry, timeout=transport_timeout)

1367 except exceptions.GoogleAPIError as exc:

1368 # Reloading also updates error details on self, thus no need for an

1369 # explicit self.set_exception() call if reloading succeeds.

1370 try:

1371 self.reload(retry=retry, timeout=transport_timeout)

1372 except exceptions.GoogleAPIError:

1373 # Use the query results reload exception, as it generally contains

1374 # much more useful error information.

1375 self.set_exception(exc)

1376 finally:

1377 return

1378

1379 # Only reload the job once we know the query is complete.

1380 # This will ensure that fields such as the destination table are

1381 # correctly populated.

1382 if not self._query_results.complete:

1383 raise polling_future._OperationNotComplete()

1384 else:

1385 try:

1386 self.reload(retry=retry, timeout=transport_timeout)

1387 except exceptions.GoogleAPIError as exc:

1388 self.set_exception(exc)

1389

1390 def result( # type: ignore # (complaints about the overloaded signature)

1391 self,

1392 page_size: Optional[int] = None,

1393 max_results: Optional[int] = None,

1394 retry: "retries.Retry" = DEFAULT_RETRY,

1395 timeout: float = None,

1396 start_index: Optional[int] = None,

1397 job_retry: "retries.Retry" = DEFAULT_JOB_RETRY,

1398 ) -> Union["RowIterator", _EmptyRowIterator]:

1399 """Start the job and wait for it to complete and get the result.

1400

1401 Args:

1402 page_size (Optional[int]):

1403 The maximum number of rows in each page of results from this

1404 request. Non-positive values are ignored.

1405 max_results (Optional[int]):

1406 The maximum total number of rows from this request.

1407 retry (Optional[google.api_core.retry.Retry]):

1408 How to retry the call that retrieves rows. This only

1409 applies to making RPC calls. It isn't used to retry

1410 failed jobs. This has a reasonable default that

1411 should only be overridden with care. If the job state

1412 is ``DONE``, retrying is aborted early even if the

1413 results are not available, as this will not change

1414 anymore.

1415 timeout (Optional[float]):

1416 The number of seconds to wait for the underlying HTTP transport

1417 before using ``retry``.

1418 If multiple requests are made under the hood, ``timeout``

1419 applies to each individual request.

1420 start_index (Optional[int]):

1421 The zero-based index of the starting row to read.

1422 job_retry (Optional[google.api_core.retry.Retry]):

1423 How to retry failed jobs. The default retries

1424 rate-limit-exceeded errors. Passing ``None`` disables

1425 job retry.

1426

1427 Not all jobs can be retried. If ``job_id`` was

1428 provided to the query that created this job, then the

1429 job returned by the query will not be retryable, and

1430 an exception will be raised if non-``None``

1431 non-default ``job_retry`` is also provided.

1432

1433 Returns:

1434 google.cloud.bigquery.table.RowIterator:

1435 Iterator of row data

1436 :class:`~google.cloud.bigquery.table.Row`-s. During each

1437 page, the iterator will have the ``total_rows`` attribute

1438 set, which counts the total number of rows **in the result

1439 set** (this is distinct from the total number of rows in the

1440 current page: ``iterator.page.num_items``).

1441

1442 If the query is a special query that produces no results, e.g.

1443 a DDL query, an ``_EmptyRowIterator`` instance is returned.

1444

1445 Raises:

1446 google.cloud.exceptions.GoogleAPICallError:

1447 If the job failed and retries aren't successful.

1448 concurrent.futures.TimeoutError:

1449 If the job did not complete in the given timeout.

1450 TypeError:

1451 If Non-``None`` and non-default ``job_retry`` is

1452 provided and the job is not retryable.

1453 """

1454 if self.dry_run:

1455 return _EmptyRowIterator()

1456 try:

1457 retry_do_query = getattr(self, "_retry_do_query", None)

1458 if retry_do_query is not None:

1459 if job_retry is DEFAULT_JOB_RETRY:

1460 job_retry = self._job_retry # type: ignore

1461 else:

1462 if job_retry is not None and job_retry is not DEFAULT_JOB_RETRY:

1463 raise TypeError(

1464 "`job_retry` was provided, but this job is"

1465 " not retryable, because a custom `job_id` was"

1466 " provided to the query that created this job."

1467 )

1468

1469 first = True

1470

1471 def do_get_result():

1472 nonlocal first

1473

1474 if first:

1475 first = False

1476 else:

1477 # Note that we won't get here if retry_do_query is

1478 # None, because we won't use a retry.

1479

1480 # The orinal job is failed. Create a new one.

1481 job = retry_do_query()

1482

1483 # If it's already failed, we might as well stop:

1484 if job.done() and job.exception() is not None:

1485 raise job.exception()

1486

1487 # Become the new job:

1488 self.__dict__.clear()

1489 self.__dict__.update(job.__dict__)

1490

1491 # This shouldn't be necessary, because once we have a good

1492 # job, it should stay good,and we shouldn't have to retry.

1493 # But let's be paranoid. :)

1494 self._retry_do_query = retry_do_query

1495 self._job_retry = job_retry

1496

1497 super(QueryJob, self).result(retry=retry, timeout=timeout)

1498

1499 # Since the job could already be "done" (e.g. got a finished job

1500 # via client.get_job), the superclass call to done() might not

1501 # set the self._query_results cache.

1502 self._reload_query_results(retry=retry, timeout=timeout)

1503

1504 if retry_do_query is not None and job_retry is not None:

1505 do_get_result = job_retry(do_get_result)

1506

1507 do_get_result()

1508

1509 except exceptions.GoogleAPICallError as exc:

1510 exc.message = _EXCEPTION_FOOTER_TEMPLATE.format(

1511 message=exc.message, location=self.location, job_id=self.job_id

1512 )

1513 exc.debug_message = self._format_for_exception(exc.message, self.query) # type: ignore

1514 exc.query_job = self # type: ignore

1515 raise

1516 except requests.exceptions.Timeout as exc:

1517 raise concurrent.futures.TimeoutError from exc

1518

1519 # If the query job is complete but there are no query results, this was

1520 # special job, such as a DDL query. Return an empty result set to

1521 # indicate success and avoid calling tabledata.list on a table which

1522 # can't be read (such as a view table).

1523 if self._query_results.total_rows is None:

1524 return _EmptyRowIterator()

1525

1526 rows = self._client._list_rows_from_query_results(

1527 self.job_id,

1528 self.location,

1529 self.project,

1530 self._query_results.schema,

1531 total_rows=self._query_results.total_rows,

1532 destination=self.destination,

1533 page_size=page_size,

1534 max_results=max_results,

1535 start_index=start_index,

1536 retry=retry,

1537 timeout=timeout,

1538 )

1539 rows._preserve_order = _contains_order_by(self.query)

1540 return rows

1541

1542 # If changing the signature of this method, make sure to apply the same

1543 # changes to table.RowIterator.to_arrow(), except for the max_results parameter

1544 # that should only exist here in the QueryJob method.

1545 def to_arrow(

1546 self,

1547 progress_bar_type: str = None,

1548 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

1549 create_bqstorage_client: bool = True,

1550 max_results: Optional[int] = None,

1551 ) -> "pyarrow.Table":

1552 """[Beta] Create a class:`pyarrow.Table` by loading all pages of a

1553 table or query.

1554

1555 Args:

1556 progress_bar_type (Optional[str]):

1557 If set, use the `tqdm <https://tqdm.github.io/>`_ library to

1558 display a progress bar while the data downloads. Install the

1559 ``tqdm`` package to use this feature.

1560

1561 Possible values of ``progress_bar_type`` include:

1562

1563 ``None``

1564 No progress bar.

1565 ``'tqdm'``

1566 Use the :func:`tqdm.tqdm` function to print a progress bar

1567 to :data:`sys.stdout`.

1568 ``'tqdm_notebook'``

1569 Use the :func:`tqdm.notebook.tqdm` function to display a

1570 progress bar as a Jupyter notebook widget.

1571 ``'tqdm_gui'``

1572 Use the :func:`tqdm.tqdm_gui` function to display a

1573 progress bar as a graphical dialog box.

1574 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):

1575 A BigQuery Storage API client. If supplied, use the faster

1576 BigQuery Storage API to fetch rows from BigQuery. This API

1577 is a billable API.

1578

1579 This method requires ``google-cloud-bigquery-storage`` library.

1580

1581 Reading from a specific partition or snapshot is not

1582 currently supported by this method.

1583 create_bqstorage_client (Optional[bool]):

1584 If ``True`` (default), create a BigQuery Storage API client

1585 using the default API settings. The BigQuery Storage API

1586 is a faster way to fetch rows from BigQuery. See the

1587 ``bqstorage_client`` parameter for more information.

1588

1589 This argument does nothing if ``bqstorage_client`` is supplied.

1590

1591 .. versionadded:: 1.24.0

1592

1593 max_results (Optional[int]):

1594 Maximum number of rows to include in the result. No limit by default.

1595

1596 .. versionadded:: 2.21.0

1597

1598 Returns:

1599 pyarrow.Table

1600 A :class:`pyarrow.Table` populated with row data and column

1601 headers from the query results. The column headers are derived

1602 from the destination table's schema.

1603

1604 Raises:

1605 ValueError:

1606 If the :mod:`pyarrow` library cannot be imported.

1607

1608 .. versionadded:: 1.17.0

1609 """

1610 query_result = wait_for_query(self, progress_bar_type, max_results=max_results)

1611 return query_result.to_arrow(

1612 progress_bar_type=progress_bar_type,

1613 bqstorage_client=bqstorage_client,

1614 create_bqstorage_client=create_bqstorage_client,

1615 )

1616

1617 # If changing the signature of this method, make sure to apply the same

1618 # changes to table.RowIterator.to_dataframe(), except for the max_results parameter

1619 # that should only exist here in the QueryJob method.

1620 def to_dataframe(

1621 self,

1622 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

1623 dtypes: Dict[str, Any] = None,

1624 progress_bar_type: str = None,

1625 create_bqstorage_client: bool = True,

1626 max_results: Optional[int] = None,

1627 geography_as_object: bool = False,

1628 bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE,

1629 int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE,

1630 float_dtype: Union[Any, None] = None,

1631 string_dtype: Union[Any, None] = None,

1632 ) -> "pandas.DataFrame":

1633 """Return a pandas DataFrame from a QueryJob

1634

1635 Args:

1636 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):

1637 A BigQuery Storage API client. If supplied, use the faster

1638 BigQuery Storage API to fetch rows from BigQuery. This

1639 API is a billable API.

1640

1641 This method requires the ``fastavro`` and

1642 ``google-cloud-bigquery-storage`` libraries.

1643

1644 Reading from a specific partition or snapshot is not

1645 currently supported by this method.

1646

1647 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):

1648 A dictionary of column names pandas ``dtype``s. The provided

1649 ``dtype`` is used when constructing the series for the column

1650 specified. Otherwise, the default pandas behavior is used.

1651

1652 progress_bar_type (Optional[str]):

1653 If set, use the `tqdm <https://tqdm.github.io/>`_ library to

1654 display a progress bar while the data downloads. Install the

1655 ``tqdm`` package to use this feature.

1656

1657 See

1658 :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe`

1659 for details.

1660

1661 .. versionadded:: 1.11.0

1662 create_bqstorage_client (Optional[bool]):

1663 If ``True`` (default), create a BigQuery Storage API client

1664 using the default API settings. The BigQuery Storage API

1665 is a faster way to fetch rows from BigQuery. See the

1666 ``bqstorage_client`` parameter for more information.

1667

1668 This argument does nothing if ``bqstorage_client`` is supplied.

1669

1670 .. versionadded:: 1.24.0

1671

1672 max_results (Optional[int]):

1673 Maximum number of rows to include in the result. No limit by default.

1674

1675 .. versionadded:: 2.21.0

1676

1677 geography_as_object (Optional[bool]):

1678 If ``True``, convert GEOGRAPHY data to :mod:`shapely`

1679 geometry objects. If ``False`` (default), don't cast

1680 geography data to :mod:`shapely` geometry objects.

1681

1682 .. versionadded:: 2.24.0

1683

1684 bool_dtype (Optional[pandas.Series.dtype, None]):

1685 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``)

1686 to convert BigQuery Boolean type, instead of relying on the default

1687 ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``,

1688 then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean

1689 type can be found at:

1690 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type

1691

1692 .. versionadded:: 3.7.1

1693

1694 int_dtype (Optional[pandas.Series.dtype, None]):

1695 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``)

1696 to convert BigQuery Integer types, instead of relying on the default

1697 ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``,

1698 then the data type will be ``numpy.dtype("int64")``. A list of BigQuery

1699 Integer types can be found at:

1700 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types

1701

1702 .. versionadded:: 3.7.1

1703

1704 float_dtype (Optional[pandas.Series.dtype, None]):

1705 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``)

1706 to convert BigQuery Float type, instead of relying on the default

1707 ``numpy.dtype("float64")``. If you explicitly set the value to ``None``,

1708 then the data type will be ``numpy.dtype("float64")``. BigQuery Float

1709 type can be found at:

1710 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types

1711

1712 .. versionadded:: 3.7.1

1713

1714 string_dtype (Optional[pandas.Series.dtype, None]):

1715 If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to

1716 convert BigQuery String type, instead of relying on the default

1717 ``numpy.dtype("object")``. If you explicitly set the value to ``None``,

1718 then the data type will be ``numpy.dtype("object")``. BigQuery String

1719 type can be found at:

1720 https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type

1721

1722 .. versionadded:: 3.7.1

1723

1724 Returns:

1725 pandas.DataFrame:

1726 A :class:`~pandas.DataFrame` populated with row data

1727 and column headers from the query results. The column

1728 headers are derived from the destination table's

1729 schema.

1730

1731 Raises:

1732 ValueError:

1733 If the :mod:`pandas` library cannot be imported, or

1734 the :mod:`google.cloud.bigquery_storage_v1` module is

1735 required but cannot be imported. Also if

1736 `geography_as_object` is `True`, but the

1737 :mod:`shapely` library cannot be imported.

1738 """

1739 query_result = wait_for_query(self, progress_bar_type, max_results=max_results)

1740 return query_result.to_dataframe(

1741 bqstorage_client=bqstorage_client,

1742 dtypes=dtypes,

1743 progress_bar_type=progress_bar_type,

1744 create_bqstorage_client=create_bqstorage_client,

1745 geography_as_object=geography_as_object,

1746 bool_dtype=bool_dtype,

1747 int_dtype=int_dtype,

1748 float_dtype=float_dtype,

1749 string_dtype=string_dtype,

1750 )

1751

1752 # If changing the signature of this method, make sure to apply the same

1753 # changes to table.RowIterator.to_dataframe(), except for the max_results parameter

1754 # that should only exist here in the QueryJob method.

1755 def to_geodataframe(

1756 self,

1757 bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,

1758 dtypes: Dict[str, Any] = None,

1759 progress_bar_type: str = None,

1760 create_bqstorage_client: bool = True,

1761 max_results: Optional[int] = None,

1762 geography_column: Optional[str] = None,

1763 ) -> "geopandas.GeoDataFrame":

1764 """Return a GeoPandas GeoDataFrame from a QueryJob

1765

1766 Args:

1767 bqstorage_client (Optional[google.cloud.bigquery_storage_v1.BigQueryReadClient]):

1768 A BigQuery Storage API client. If supplied, use the faster

1769 BigQuery Storage API to fetch rows from BigQuery. This

1770 API is a billable API.

1771

1772 This method requires the ``fastavro`` and

1773 ``google-cloud-bigquery-storage`` libraries.

1774

1775 Reading from a specific partition or snapshot is not

1776 currently supported by this method.

1777

1778 dtypes (Optional[Map[str, Union[str, pandas.Series.dtype]]]):

1779 A dictionary of column names pandas ``dtype``s. The provided

1780 ``dtype`` is used when constructing the series for the column

1781 specified. Otherwise, the default pandas behavior is used.

1782

1783 progress_bar_type (Optional[str]):

1784 If set, use the `tqdm <https://tqdm.github.io/>`_ library to

1785 display a progress bar while the data downloads. Install the

1786 ``tqdm`` package to use this feature.

1787

1788 See

1789 :func:`~google.cloud.bigquery.table.RowIterator.to_dataframe`

1790 for details.

1791

1792 .. versionadded:: 1.11.0

1793 create_bqstorage_client (Optional[bool]):

1794 If ``True`` (default), create a BigQuery Storage API client

1795 using the default API settings. The BigQuery Storage API

1796 is a faster way to fetch rows from BigQuery. See the

1797 ``bqstorage_client`` parameter for more information.

1798

1799 This argument does nothing if ``bqstorage_client`` is supplied.

1800

1801 .. versionadded:: 1.24.0

1802

1803 max_results (Optional[int]):

1804 Maximum number of rows to include in the result. No limit by default.

1805

1806 .. versionadded:: 2.21.0

1807

1808 geography_column (Optional[str]):

1809 If there are more than one GEOGRAPHY column,

1810 identifies which one to use to construct a GeoPandas

1811 GeoDataFrame. This option can be ommitted if there's

1812 only one GEOGRAPHY column.

1813

1814 Returns:

1815 geopandas.GeoDataFrame:

1816 A :class:`geopandas.GeoDataFrame` populated with row

1817 data and column headers from the query results. The

1818 column headers are derived from the destination

1819 table's schema.

1820

1821 Raises:

1822 ValueError:

1823 If the :mod:`geopandas` library cannot be imported, or the

1824 :mod:`google.cloud.bigquery_storage_v1` module is

1825 required but cannot be imported.

1826

1827 .. versionadded:: 2.24.0

1828 """

1829 query_result = wait_for_query(self, progress_bar_type, max_results=max_results)

1830 return query_result.to_geodataframe(

1831 bqstorage_client=bqstorage_client,

1832 dtypes=dtypes,

1833 progress_bar_type=progress_bar_type,

1834 create_bqstorage_client=create_bqstorage_client,

1835 geography_column=geography_column,

1836 )

1837

1838 def __iter__(self):

1839 return iter(self.result())

1840

1841

1842class QueryPlanEntryStep(object):

1843 """Map a single step in a query plan entry.

1844

1845 Args:

1846 kind (str): step type.

1847 substeps (List): names of substeps.

1848 """

1849

1850 def __init__(self, kind, substeps):

1851 self.kind = kind

1852 self.substeps = list(substeps)

1853

1854 @classmethod

1855 def from_api_repr(cls, resource: dict) -> "QueryPlanEntryStep":

1856 """Factory: construct instance from the JSON repr.

1857

1858 Args:

1859 resource (Dict): JSON representation of the entry.

1860

1861 Returns:

1862 google.cloud.bigquery.job.QueryPlanEntryStep:

1863 New instance built from the resource.

1864 """

1865 return cls(kind=resource.get("kind"), substeps=resource.get("substeps", ()))

1866

1867 def __eq__(self, other):

1868 if not isinstance(other, self.__class__):

1869 return NotImplemented

1870 return self.kind == other.kind and self.substeps == other.substeps

1871

1872

1873class QueryPlanEntry(object):

1874 """QueryPlanEntry represents a single stage of a query execution plan.

1875

1876 See

1877 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#ExplainQueryStage

1878 for the underlying API representation within query statistics.

1879 """

1880

1881 def __init__(self):

1882 self._properties = {}

1883

1884 @classmethod

1885 def from_api_repr(cls, resource: dict) -> "QueryPlanEntry":

1886 """Factory: construct instance from the JSON repr.

1887

1888 Args:

1889 resource(Dict[str: object]):

1890 ExplainQueryStage representation returned from API.

1891

1892 Returns:

1893 google.cloud.bigquery.job.QueryPlanEntry:

1894 Query plan entry parsed from ``resource``.

1895 """

1896 entry = cls()

1897 entry._properties = resource

1898 return entry

1899

1900 @property

1901 def name(self):

1902 """Optional[str]: Human-readable name of the stage."""

1903 return self._properties.get("name")

1904

1905 @property

1906 def entry_id(self):

1907 """Optional[str]: Unique ID for the stage within the plan."""

1908 return self._properties.get("id")

1909

1910 @property

1911 def start(self):

1912 """Optional[Datetime]: Datetime when the stage started."""

1913 if self._properties.get("startMs") is None:

1914 return None

1915 return _helpers._datetime_from_microseconds(

1916 int(self._properties.get("startMs")) * 1000.0

1917 )

1918

1919 @property

1920 def end(self):

1921 """Optional[Datetime]: Datetime when the stage ended."""

1922 if self._properties.get("endMs") is None:

1923 return None

1924 return _helpers._datetime_from_microseconds(

1925 int(self._properties.get("endMs")) * 1000.0

1926 )

1927

1928 @property

1929 def input_stages(self):

1930 """List(int): Entry IDs for stages that were inputs for this stage."""

1931 if self._properties.get("inputStages") is None:

1932 return []

1933 return [

1934 _helpers._int_or_none(entry)

1935 for entry in self._properties.get("inputStages")

1936 ]

1937

1938 @property

1939 def parallel_inputs(self):

1940 """Optional[int]: Number of parallel input segments within

1941 the stage.

1942 """

1943 return _helpers._int_or_none(self._properties.get("parallelInputs"))

1944

1945 @property

1946 def completed_parallel_inputs(self):

1947 """Optional[int]: Number of parallel input segments completed."""

1948 return _helpers._int_or_none(self._properties.get("completedParallelInputs"))

1949

1950 @property

1951 def wait_ms_avg(self):

1952 """Optional[int]: Milliseconds the average worker spent waiting to

1953 be scheduled.

1954 """

1955 return _helpers._int_or_none(self._properties.get("waitMsAvg"))

1956

1957 @property

1958 def wait_ms_max(self):

1959 """Optional[int]: Milliseconds the slowest worker spent waiting to

1960 be scheduled.

1961 """

1962 return _helpers._int_or_none(self._properties.get("waitMsMax"))

1963

1964 @property

1965 def wait_ratio_avg(self):

1966 """Optional[float]: Ratio of time the average worker spent waiting

1967 to be scheduled, relative to the longest time spent by any worker in

1968 any stage of the overall plan.

1969 """

1970 return self._properties.get("waitRatioAvg")

1971

1972 @property

1973 def wait_ratio_max(self):

1974 """Optional[float]: Ratio of time the slowest worker spent waiting

1975 to be scheduled, relative to the longest time spent by any worker in

1976 any stage of the overall plan.

1977 """

1978 return self._properties.get("waitRatioMax")

1979

1980 @property

1981 def read_ms_avg(self):

1982 """Optional[int]: Milliseconds the average worker spent reading

1983 input.

1984 """

1985 return _helpers._int_or_none(self._properties.get("readMsAvg"))

1986

1987 @property

1988 def read_ms_max(self):

1989 """Optional[int]: Milliseconds the slowest worker spent reading

1990 input.

1991 """

1992 return _helpers._int_or_none(self._properties.get("readMsMax"))

1993

1994 @property

1995 def read_ratio_avg(self):

1996 """Optional[float]: Ratio of time the average worker spent reading

1997 input, relative to the longest time spent by any worker in any stage

1998 of the overall plan.

1999 """

2000 return self._properties.get("readRatioAvg")

2001

2002 @property

2003 def read_ratio_max(self):

2004 """Optional[float]: Ratio of time the slowest worker spent reading

2005 to be scheduled, relative to the longest time spent by any worker in

2006 any stage of the overall plan.

2007 """

2008 return self._properties.get("readRatioMax")

2009

2010 @property

2011 def compute_ms_avg(self):

2012 """Optional[int]: Milliseconds the average worker spent on CPU-bound

2013 processing.

2014 """

2015 return _helpers._int_or_none(self._properties.get("computeMsAvg"))

2016

2017 @property

2018 def compute_ms_max(self):

2019 """Optional[int]: Milliseconds the slowest worker spent on CPU-bound

2020 processing.

2021 """

2022 return _helpers._int_or_none(self._properties.get("computeMsMax"))

2023

2024 @property

2025 def compute_ratio_avg(self):

2026 """Optional[float]: Ratio of time the average worker spent on

2027 CPU-bound processing, relative to the longest time spent by any

2028 worker in any stage of the overall plan.

2029 """

2030 return self._properties.get("computeRatioAvg")

2031

2032 @property

2033 def compute_ratio_max(self):

2034 """Optional[float]: Ratio of time the slowest worker spent on

2035 CPU-bound processing, relative to the longest time spent by any

2036 worker in any stage of the overall plan.

2037 """

2038 return self._properties.get("computeRatioMax")

2039

2040 @property

2041 def write_ms_avg(self):

2042 """Optional[int]: Milliseconds the average worker spent writing

2043 output data.

2044 """

2045 return _helpers._int_or_none(self._properties.get("writeMsAvg"))

2046

2047 @property

2048 def write_ms_max(self):

2049 """Optional[int]: Milliseconds the slowest worker spent writing

2050 output data.

2051 """

2052 return _helpers._int_or_none(self._properties.get("writeMsMax"))

2053

2054 @property

2055 def write_ratio_avg(self):

2056 """Optional[float]: Ratio of time the average worker spent writing

2057 output data, relative to the longest time spent by any worker in any

2058 stage of the overall plan.

2059 """

2060 return self._properties.get("writeRatioAvg")

2061

2062 @property

2063 def write_ratio_max(self):

2064 """Optional[float]: Ratio of time the slowest worker spent writing

2065 output data, relative to the longest time spent by any worker in any

2066 stage of the overall plan.

2067 """

2068 return self._properties.get("writeRatioMax")

2069

2070 @property

2071 def records_read(self):

2072 """Optional[int]: Number of records read by this stage."""

2073 return _helpers._int_or_none(self._properties.get("recordsRead"))

2074

2075 @property

2076 def records_written(self):

2077 """Optional[int]: Number of records written by this stage."""

2078 return _helpers._int_or_none(self._properties.get("recordsWritten"))

2079

2080 @property

2081 def status(self):

2082 """Optional[str]: status of this stage."""

2083 return self._properties.get("status")

2084

2085 @property

2086 def shuffle_output_bytes(self):

2087 """Optional[int]: Number of bytes written by this stage to

2088 intermediate shuffle.

2089 """

2090 return _helpers._int_or_none(self._properties.get("shuffleOutputBytes"))

2091

2092 @property

2093 def shuffle_output_bytes_spilled(self):

2094 """Optional[int]: Number of bytes written by this stage to

2095 intermediate shuffle and spilled to disk.

2096 """

2097 return _helpers._int_or_none(self._properties.get("shuffleOutputBytesSpilled"))

2098

2099 @property

2100 def steps(self):

2101 """List(QueryPlanEntryStep): List of step operations performed by

2102 each worker in the stage.

2103 """

2104 return [

2105 QueryPlanEntryStep.from_api_repr(step)

2106 for step in self._properties.get("steps", [])

2107 ]

2108

2109

2110class TimelineEntry(object):

2111 """TimelineEntry represents progress of a query job at a particular

2112 point in time.

2113

2114 See

2115 https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#querytimelinesample

2116 for the underlying API representation within query statistics.

2117 """

2118

2119 def __init__(self):

2120 self._properties = {}

2121

2122 @classmethod

2123 def from_api_repr(cls, resource):

2124 """Factory: construct instance from the JSON repr.

2125

2126 Args:

2127 resource(Dict[str: object]):

2128 QueryTimelineSample representation returned from API.

2129

2130 Returns:

2131 google.cloud.bigquery.TimelineEntry:

2132 Timeline sample parsed from ``resource``.

2133 """

2134 entry = cls()

2135 entry._properties = resource

2136 return entry

2137

2138 @property

2139 def elapsed_ms(self):

2140 """Optional[int]: Milliseconds elapsed since start of query

2141 execution."""

2142 return _helpers._int_or_none(self._properties.get("elapsedMs"))

2143

2144 @property

2145 def active_units(self):

2146 """Optional[int]: Current number of input units being processed

2147 by workers, reported as largest value since the last sample."""

2148 return _helpers._int_or_none(self._properties.get("activeUnits"))

2149

2150 @property

2151 def pending_units(self):

2152 """Optional[int]: Current number of input units remaining for

2153 query stages active at this sample time."""

2154 return _helpers._int_or_none(self._properties.get("pendingUnits"))

2155

2156 @property

2157 def completed_units(self):

2158 """Optional[int]: Current number of input units completed by

2159 this query."""

2160 return _helpers._int_or_none(self._properties.get("completedUnits"))

2161

2162 @property

2163 def slot_millis(self):

2164 """Optional[int]: Cumulative slot-milliseconds consumed by

2165 this query."""

2166 return _helpers._int_or_none(self._properties.get("totalSlotMs"))