Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/google/cloud/bigquery/external_config.py: 54%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

555 statements  

1# Copyright 2017 Google LLC 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Define classes that describe external data sources. 

16 

17 These are used for both Table.externalDataConfiguration and 

18 Job.configuration.query.tableDefinitions. 

19""" 

20 

21from __future__ import absolute_import, annotations 

22 

23import base64 

24import copy 

25import typing 

26from typing import Any, Dict, FrozenSet, Iterable, Optional, Union 

27 

28from google.cloud.bigquery._helpers import _to_bytes 

29from google.cloud.bigquery._helpers import _bytes_to_json 

30from google.cloud.bigquery._helpers import _int_or_none 

31from google.cloud.bigquery._helpers import _str_or_none 

32from google.cloud.bigquery import _helpers 

33from google.cloud.bigquery.enums import SourceColumnMatch 

34from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions 

35from google.cloud.bigquery import schema 

36from google.cloud.bigquery.schema import SchemaField 

37 

38 

39class ExternalSourceFormat(object): 

40 """The format for external data files. 

41 

42 Note that the set of allowed values for external data sources is different 

43 than the set used for loading data (see 

44 :class:`~google.cloud.bigquery.job.SourceFormat`). 

45 """ 

46 

47 CSV = "CSV" 

48 """Specifies CSV format.""" 

49 

50 GOOGLE_SHEETS = "GOOGLE_SHEETS" 

51 """Specifies Google Sheets format.""" 

52 

53 NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON" 

54 """Specifies newline delimited JSON format.""" 

55 

56 AVRO = "AVRO" 

57 """Specifies Avro format.""" 

58 

59 DATASTORE_BACKUP = "DATASTORE_BACKUP" 

60 """Specifies datastore backup format""" 

61 

62 ORC = "ORC" 

63 """Specifies ORC format.""" 

64 

65 PARQUET = "PARQUET" 

66 """Specifies Parquet format.""" 

67 

68 BIGTABLE = "BIGTABLE" 

69 """Specifies Bigtable format.""" 

70 

71 

72class BigtableColumn(object): 

73 """Options for a Bigtable column.""" 

74 

75 def __init__(self): 

76 self._properties = {} 

77 

78 @property 

79 def encoding(self): 

80 """str: The encoding of the values when the type is not `STRING` 

81 

82 See 

83 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.encoding 

84 """ 

85 return self._properties.get("encoding") 

86 

87 @encoding.setter 

88 def encoding(self, value): 

89 self._properties["encoding"] = value 

90 

91 @property 

92 def field_name(self): 

93 """str: An identifier to use if the qualifier is not a valid BigQuery 

94 field identifier 

95 

96 See 

97 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.field_name 

98 """ 

99 return self._properties.get("fieldName") 

100 

101 @field_name.setter 

102 def field_name(self, value): 

103 self._properties["fieldName"] = value 

104 

105 @property 

106 def only_read_latest(self): 

107 """bool: If this is set, only the latest version of value in this 

108 column are exposed. 

109 

110 See 

111 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.only_read_latest 

112 """ 

113 return self._properties.get("onlyReadLatest") 

114 

115 @only_read_latest.setter 

116 def only_read_latest(self, value): 

117 self._properties["onlyReadLatest"] = value 

118 

119 @property 

120 def qualifier_encoded(self): 

121 """Union[str, bytes]: The qualifier encoded in binary. 

122 

123 The type is ``str`` (Python 2.x) or ``bytes`` (Python 3.x). The module 

124 will handle base64 encoding for you. 

125 

126 See 

127 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.qualifier_encoded 

128 """ 

129 prop = self._properties.get("qualifierEncoded") 

130 if prop is None: 

131 return None 

132 return base64.standard_b64decode(_to_bytes(prop)) 

133 

134 @qualifier_encoded.setter 

135 def qualifier_encoded(self, value): 

136 self._properties["qualifierEncoded"] = _bytes_to_json(value) 

137 

138 @property 

139 def qualifier_string(self): 

140 """str: A valid UTF-8 string qualifier 

141 

142 See 

143 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.qualifier_string 

144 """ 

145 return self._properties.get("qualifierString") 

146 

147 @qualifier_string.setter 

148 def qualifier_string(self, value): 

149 self._properties["qualifierString"] = value 

150 

151 @property 

152 def type_(self): 

153 """str: The type to convert the value in cells of this column. 

154 

155 See 

156 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.type 

157 """ 

158 return self._properties.get("type") 

159 

160 @type_.setter 

161 def type_(self, value): 

162 self._properties["type"] = value 

163 

164 def to_api_repr(self) -> dict: 

165 """Build an API representation of this object. 

166 

167 Returns: 

168 Dict[str, Any]: 

169 A dictionary in the format used by the BigQuery API. 

170 """ 

171 return copy.deepcopy(self._properties) 

172 

173 @classmethod 

174 def from_api_repr(cls, resource: dict) -> "BigtableColumn": 

175 """Factory: construct a :class:`~.external_config.BigtableColumn` 

176 instance given its API representation. 

177 

178 Args: 

179 resource (Dict[str, Any]): 

180 Definition of a :class:`~.external_config.BigtableColumn` 

181 instance in the same representation as is returned from the 

182 API. 

183 

184 Returns: 

185 external_config.BigtableColumn: Configuration parsed from ``resource``. 

186 """ 

187 config = cls() 

188 config._properties = copy.deepcopy(resource) 

189 return config 

190 

191 

192class BigtableColumnFamily(object): 

193 """Options for a Bigtable column family.""" 

194 

195 def __init__(self): 

196 self._properties = {} 

197 

198 @property 

199 def encoding(self): 

200 """str: The encoding of the values when the type is not `STRING` 

201 

202 See 

203 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.encoding 

204 """ 

205 return self._properties.get("encoding") 

206 

207 @encoding.setter 

208 def encoding(self, value): 

209 self._properties["encoding"] = value 

210 

211 @property 

212 def family_id(self): 

213 """str: Identifier of the column family. 

214 

215 See 

216 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.family_id 

217 """ 

218 return self._properties.get("familyId") 

219 

220 @family_id.setter 

221 def family_id(self, value): 

222 self._properties["familyId"] = value 

223 

224 @property 

225 def only_read_latest(self): 

226 """bool: If this is set only the latest version of value are exposed 

227 for all columns in this column family. 

228 

229 See 

230 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.only_read_latest 

231 """ 

232 return self._properties.get("onlyReadLatest") 

233 

234 @only_read_latest.setter 

235 def only_read_latest(self, value): 

236 self._properties["onlyReadLatest"] = value 

237 

238 @property 

239 def type_(self): 

240 """str: The type to convert the value in cells of this column family. 

241 

242 See 

243 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.type 

244 """ 

245 return self._properties.get("type") 

246 

247 @type_.setter 

248 def type_(self, value): 

249 self._properties["type"] = value 

250 

251 @property 

252 def columns(self): 

253 """List[BigtableColumn]: Lists of columns 

254 that should be exposed as individual fields. 

255 

256 See 

257 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.columns 

258 """ 

259 prop = self._properties.get("columns", []) 

260 return [BigtableColumn.from_api_repr(col) for col in prop] 

261 

262 @columns.setter 

263 def columns(self, value): 

264 self._properties["columns"] = [col.to_api_repr() for col in value] 

265 

266 def to_api_repr(self) -> dict: 

267 """Build an API representation of this object. 

268 

269 Returns: 

270 Dict[str, Any]: 

271 A dictionary in the format used by the BigQuery API. 

272 """ 

273 return copy.deepcopy(self._properties) 

274 

275 @classmethod 

276 def from_api_repr(cls, resource: dict) -> "BigtableColumnFamily": 

277 """Factory: construct a :class:`~.external_config.BigtableColumnFamily` 

278 instance given its API representation. 

279 

280 Args: 

281 resource (Dict[str, Any]): 

282 Definition of a :class:`~.external_config.BigtableColumnFamily` 

283 instance in the same representation as is returned from the 

284 API. 

285 

286 Returns: 

287 :class:`~.external_config.BigtableColumnFamily`: 

288 Configuration parsed from ``resource``. 

289 """ 

290 config = cls() 

291 config._properties = copy.deepcopy(resource) 

292 return config 

293 

294 

295class BigtableOptions(object): 

296 """Options that describe how to treat Bigtable tables as BigQuery tables.""" 

297 

298 _SOURCE_FORMAT = "BIGTABLE" 

299 _RESOURCE_NAME = "bigtableOptions" 

300 

301 def __init__(self): 

302 self._properties = {} 

303 

304 @property 

305 def ignore_unspecified_column_families(self): 

306 """bool: If :data:`True`, ignore columns not specified in 

307 :attr:`column_families` list. Defaults to :data:`False`. 

308 

309 See 

310 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.ignore_unspecified_column_families 

311 """ 

312 return self._properties.get("ignoreUnspecifiedColumnFamilies") 

313 

314 @ignore_unspecified_column_families.setter 

315 def ignore_unspecified_column_families(self, value): 

316 self._properties["ignoreUnspecifiedColumnFamilies"] = value 

317 

318 @property 

319 def read_rowkey_as_string(self): 

320 """bool: If :data:`True`, rowkey column families will be read and 

321 converted to string. Defaults to :data:`False`. 

322 

323 See 

324 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.read_rowkey_as_string 

325 """ 

326 return self._properties.get("readRowkeyAsString") 

327 

328 @read_rowkey_as_string.setter 

329 def read_rowkey_as_string(self, value): 

330 self._properties["readRowkeyAsString"] = value 

331 

332 @property 

333 def column_families(self): 

334 """List[:class:`~.external_config.BigtableColumnFamily`]: List of 

335 column families to expose in the table schema along with their types. 

336 

337 See 

338 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.column_families 

339 """ 

340 prop = self._properties.get("columnFamilies", []) 

341 return [BigtableColumnFamily.from_api_repr(cf) for cf in prop] 

342 

343 @column_families.setter 

344 def column_families(self, value): 

345 self._properties["columnFamilies"] = [cf.to_api_repr() for cf in value] 

346 

347 def to_api_repr(self) -> dict: 

348 """Build an API representation of this object. 

349 

350 Returns: 

351 Dict[str, Any]: 

352 A dictionary in the format used by the BigQuery API. 

353 """ 

354 return copy.deepcopy(self._properties) 

355 

356 @classmethod 

357 def from_api_repr(cls, resource: dict) -> "BigtableOptions": 

358 """Factory: construct a :class:`~.external_config.BigtableOptions` 

359 instance given its API representation. 

360 

361 Args: 

362 resource (Dict[str, Any]): 

363 Definition of a :class:`~.external_config.BigtableOptions` 

364 instance in the same representation as is returned from the 

365 API. 

366 

367 Returns: 

368 BigtableOptions: Configuration parsed from ``resource``. 

369 """ 

370 config = cls() 

371 config._properties = copy.deepcopy(resource) 

372 return config 

373 

374 

375class CSVOptions(object): 

376 """Options that describe how to treat CSV files as BigQuery tables.""" 

377 

378 _SOURCE_FORMAT = "CSV" 

379 _RESOURCE_NAME = "csvOptions" 

380 

381 def __init__(self): 

382 self._properties = {} 

383 

384 @property 

385 def allow_jagged_rows(self): 

386 """bool: If :data:`True`, BigQuery treats missing trailing columns as 

387 null values. Defaults to :data:`False`. 

388 

389 See 

390 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.allow_jagged_rows 

391 """ 

392 return self._properties.get("allowJaggedRows") 

393 

394 @allow_jagged_rows.setter 

395 def allow_jagged_rows(self, value): 

396 self._properties["allowJaggedRows"] = value 

397 

398 @property 

399 def allow_quoted_newlines(self): 

400 """bool: If :data:`True`, quoted data sections that contain newline 

401 characters in a CSV file are allowed. Defaults to :data:`False`. 

402 

403 See 

404 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.allow_quoted_newlines 

405 """ 

406 return self._properties.get("allowQuotedNewlines") 

407 

408 @allow_quoted_newlines.setter 

409 def allow_quoted_newlines(self, value): 

410 self._properties["allowQuotedNewlines"] = value 

411 

412 @property 

413 def encoding(self): 

414 """str: The character encoding of the data. 

415 

416 See 

417 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.encoding 

418 """ 

419 return self._properties.get("encoding") 

420 

421 @encoding.setter 

422 def encoding(self, value): 

423 self._properties["encoding"] = value 

424 

425 @property 

426 def preserve_ascii_control_characters(self): 

427 """bool: Indicates if the embedded ASCII control characters 

428 (the first 32 characters in the ASCII-table, from '\x00' to '\x1F') are preserved. 

429 

430 See 

431 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.preserve_ascii_control_characters 

432 """ 

433 return self._properties.get("preserveAsciiControlCharacters") 

434 

435 @preserve_ascii_control_characters.setter 

436 def preserve_ascii_control_characters(self, value): 

437 self._properties["preserveAsciiControlCharacters"] = value 

438 

439 @property 

440 def field_delimiter(self): 

441 """str: The separator for fields in a CSV file. Defaults to comma (','). 

442 

443 See 

444 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.field_delimiter 

445 """ 

446 return self._properties.get("fieldDelimiter") 

447 

448 @field_delimiter.setter 

449 def field_delimiter(self, value): 

450 self._properties["fieldDelimiter"] = value 

451 

452 @property 

453 def quote_character(self): 

454 """str: The value that is used to quote data sections in a CSV file. 

455 

456 See 

457 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.quote 

458 """ 

459 return self._properties.get("quote") 

460 

461 @quote_character.setter 

462 def quote_character(self, value): 

463 self._properties["quote"] = value 

464 

465 @property 

466 def skip_leading_rows(self): 

467 """int: The number of rows at the top of a CSV file. 

468 

469 See 

470 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.skip_leading_rows 

471 """ 

472 return _int_or_none(self._properties.get("skipLeadingRows")) 

473 

474 @skip_leading_rows.setter 

475 def skip_leading_rows(self, value): 

476 self._properties["skipLeadingRows"] = str(value) 

477 

478 @property 

479 def source_column_match(self) -> Optional[SourceColumnMatch]: 

480 """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the 

481 strategy used to match loaded columns to the schema. If not set, a sensible 

482 default is chosen based on how the schema is provided. If autodetect is 

483 used, then columns are matched by name. Otherwise, columns are matched by 

484 position. This is done to keep the behavior backward-compatible. 

485 

486 Acceptable values are: 

487 

488 SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option. 

489 POSITION: matches by position. This assumes that the columns are ordered 

490 the same way as the schema. 

491 NAME: matches by name. This reads the header row as column names and 

492 reorders columns to match the field names in the schema. 

493 

494 See 

495 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.source_column_match 

496 """ 

497 

498 value = self._properties.get("sourceColumnMatch") 

499 return SourceColumnMatch(value) if value is not None else None 

500 

501 @source_column_match.setter 

502 def source_column_match(self, value: Union[SourceColumnMatch, str, None]): 

503 if value is not None and not isinstance(value, (SourceColumnMatch, str)): 

504 raise TypeError( 

505 "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None" 

506 ) 

507 if isinstance(value, SourceColumnMatch): 

508 value = value.value 

509 self._properties["sourceColumnMatch"] = value if value else None 

510 

511 @property 

512 def null_markers(self) -> Optional[Iterable[str]]: 

513 """Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file. 

514 

515 .. note:: 

516 null_marker and null_markers can't be set at the same time. 

517 If null_marker is set, null_markers has to be not set. 

518 If null_markers is set, null_marker has to be not set. 

519 If both null_marker and null_markers are set at the same time, a user error would be thrown. 

520 Any strings listed in null_markers, including empty string would be interpreted as SQL NULL. 

521 This applies to all column types. 

522 

523 See 

524 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.null_markers 

525 """ 

526 return self._properties.get("nullMarkers") 

527 

528 @null_markers.setter 

529 def null_markers(self, value: Optional[Iterable[str]]): 

530 self._properties["nullMarkers"] = value 

531 

532 def to_api_repr(self) -> dict: 

533 """Build an API representation of this object. 

534 

535 Returns: 

536 Dict[str, Any]: A dictionary in the format used by the BigQuery API. 

537 """ 

538 return copy.deepcopy(self._properties) 

539 

540 @classmethod 

541 def from_api_repr(cls, resource: dict) -> "CSVOptions": 

542 """Factory: construct a :class:`~.external_config.CSVOptions` instance 

543 given its API representation. 

544 

545 Args: 

546 resource (Dict[str, Any]): 

547 Definition of a :class:`~.external_config.CSVOptions` 

548 instance in the same representation as is returned from the 

549 API. 

550 

551 Returns: 

552 CSVOptions: Configuration parsed from ``resource``. 

553 """ 

554 config = cls() 

555 config._properties = copy.deepcopy(resource) 

556 return config 

557 

558 

559class GoogleSheetsOptions(object): 

560 """Options that describe how to treat Google Sheets as BigQuery tables.""" 

561 

562 _SOURCE_FORMAT = "GOOGLE_SHEETS" 

563 _RESOURCE_NAME = "googleSheetsOptions" 

564 

565 def __init__(self): 

566 self._properties = {} 

567 

568 @property 

569 def skip_leading_rows(self): 

570 """int: The number of rows at the top of a sheet that BigQuery will 

571 skip when reading the data. 

572 

573 See 

574 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions.FIELDS.skip_leading_rows 

575 """ 

576 return _int_or_none(self._properties.get("skipLeadingRows")) 

577 

578 @skip_leading_rows.setter 

579 def skip_leading_rows(self, value): 

580 self._properties["skipLeadingRows"] = str(value) 

581 

582 @property 

583 def range(self): 

584 """str: The range of a sheet that BigQuery will query from. 

585 

586 See 

587 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions.FIELDS.range 

588 """ 

589 return _str_or_none(self._properties.get("range")) 

590 

591 @range.setter 

592 def range(self, value): 

593 self._properties["range"] = value 

594 

595 def to_api_repr(self) -> dict: 

596 """Build an API representation of this object. 

597 

598 Returns: 

599 Dict[str, Any]: A dictionary in the format used by the BigQuery API. 

600 """ 

601 return copy.deepcopy(self._properties) 

602 

603 @classmethod 

604 def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions": 

605 """Factory: construct a :class:`~.external_config.GoogleSheetsOptions` 

606 instance given its API representation. 

607 

608 Args: 

609 resource (Dict[str, Any]): 

610 Definition of a :class:`~.external_config.GoogleSheetsOptions` 

611 instance in the same representation as is returned from the 

612 API. 

613 

614 Returns: 

615 GoogleSheetsOptions: Configuration parsed from ``resource``. 

616 """ 

617 config = cls() 

618 config._properties = copy.deepcopy(resource) 

619 return config 

620 

621 

622_OPTION_CLASSES = ( 

623 AvroOptions, 

624 BigtableOptions, 

625 CSVOptions, 

626 GoogleSheetsOptions, 

627 ParquetOptions, 

628) 

629 

630OptionsType = Union[ 

631 AvroOptions, 

632 BigtableOptions, 

633 CSVOptions, 

634 GoogleSheetsOptions, 

635 ParquetOptions, 

636] 

637 

638 

639class HivePartitioningOptions(object): 

640 """[Beta] Options that configure hive partitioning. 

641 

642 .. note:: 

643 **Experimental**. This feature is experimental and might change or 

644 have limited support. 

645 

646 See 

647 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions 

648 """ 

649 

650 def __init__(self) -> None: 

651 self._properties: Dict[str, Any] = {} 

652 

653 @property 

654 def mode(self): 

655 """Optional[str]: When set, what mode of hive partitioning to use when reading data. 

656 

657 Two modes are supported: "AUTO" and "STRINGS". 

658 

659 See 

660 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode 

661 """ 

662 return self._properties.get("mode") 

663 

664 @mode.setter 

665 def mode(self, value): 

666 self._properties["mode"] = value 

667 

668 @property 

669 def source_uri_prefix(self): 

670 """Optional[str]: When hive partition detection is requested, a common prefix for 

671 all source URIs is required. 

672 

673 See 

674 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.source_uri_prefix 

675 """ 

676 return self._properties.get("sourceUriPrefix") 

677 

678 @source_uri_prefix.setter 

679 def source_uri_prefix(self, value): 

680 self._properties["sourceUriPrefix"] = value 

681 

682 @property 

683 def require_partition_filter(self): 

684 """Optional[bool]: If set to true, queries over the partitioned table require a 

685 partition filter that can be used for partition elimination to be 

686 specified. 

687 

688 See 

689 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode 

690 """ 

691 return self._properties.get("requirePartitionFilter") 

692 

693 @require_partition_filter.setter 

694 def require_partition_filter(self, value): 

695 self._properties["requirePartitionFilter"] = value 

696 

697 def to_api_repr(self) -> dict: 

698 """Build an API representation of this object. 

699 

700 Returns: 

701 Dict[str, Any]: A dictionary in the format used by the BigQuery API. 

702 """ 

703 return copy.deepcopy(self._properties) 

704 

705 @classmethod 

706 def from_api_repr(cls, resource: dict) -> "HivePartitioningOptions": 

707 """Factory: construct a :class:`~.external_config.HivePartitioningOptions` 

708 instance given its API representation. 

709 

710 Args: 

711 resource (Dict[str, Any]): 

712 Definition of a :class:`~.external_config.HivePartitioningOptions` 

713 instance in the same representation as is returned from the 

714 API. 

715 

716 Returns: 

717 HivePartitioningOptions: Configuration parsed from ``resource``. 

718 """ 

719 config = cls() 

720 config._properties = copy.deepcopy(resource) 

721 return config 

722 

723 

724class ExternalConfig(object): 

725 """Description of an external data source. 

726 

727 Args: 

728 source_format (ExternalSourceFormat): 

729 See :attr:`source_format`. 

730 """ 

731 

732 def __init__(self, source_format) -> None: 

733 self._properties = {"sourceFormat": source_format} 

734 

735 @property 

736 def source_format(self): 

737 """:class:`~.external_config.ExternalSourceFormat`: 

738 Format of external source. 

739 

740 See 

741 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_format 

742 """ 

743 return self._properties["sourceFormat"] 

744 

745 @property 

746 def options(self) -> Optional[OptionsType]: 

747 """Source-specific options.""" 

748 for optcls in _OPTION_CLASSES: 

749 # The code below is too much magic for mypy to handle. 

750 if self.source_format == optcls._SOURCE_FORMAT: # type: ignore 

751 options: OptionsType = optcls() # type: ignore 

752 options._properties = self._properties.setdefault( 

753 optcls._RESOURCE_NAME, {} # type: ignore 

754 ) 

755 return options 

756 

757 # No matching source format found. 

758 return None 

759 

760 @property 

761 def autodetect(self): 

762 """bool: If :data:`True`, try to detect schema and format options 

763 automatically. 

764 

765 See 

766 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.autodetect 

767 """ 

768 return self._properties.get("autodetect") 

769 

770 @autodetect.setter 

771 def autodetect(self, value): 

772 self._properties["autodetect"] = value 

773 

774 @property 

775 def compression(self): 

776 """str: The compression type of the data source. 

777 

778 See 

779 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.compression 

780 """ 

781 return self._properties.get("compression") 

782 

783 @compression.setter 

784 def compression(self, value): 

785 self._properties["compression"] = value 

786 

787 @property 

788 def decimal_target_types(self) -> Optional[FrozenSet[str]]: 

789 """Possible SQL data types to which the source decimal values are converted. 

790 

791 See: 

792 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.decimal_target_types 

793 

794 .. versionadded:: 2.21.0 

795 """ 

796 prop = self._properties.get("decimalTargetTypes") 

797 if prop is not None: 

798 prop = frozenset(prop) 

799 return prop 

800 

801 @decimal_target_types.setter 

802 def decimal_target_types(self, value: Optional[Iterable[str]]): 

803 if value is not None: 

804 self._properties["decimalTargetTypes"] = list(value) 

805 else: 

806 if "decimalTargetTypes" in self._properties: 

807 del self._properties["decimalTargetTypes"] 

808 

809 @property 

810 def hive_partitioning(self): 

811 """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \ 

812 it configures hive partitioning support. 

813 

814 .. note:: 

815 **Experimental**. This feature is experimental and might change or 

816 have limited support. 

817 

818 See 

819 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options 

820 """ 

821 prop = self._properties.get("hivePartitioningOptions") 

822 if prop is None: 

823 return None 

824 return HivePartitioningOptions.from_api_repr(prop) 

825 

826 @hive_partitioning.setter 

827 def hive_partitioning(self, value): 

828 prop = value.to_api_repr() if value is not None else None 

829 self._properties["hivePartitioningOptions"] = prop 

830 

831 @property 

832 def reference_file_schema_uri(self): 

833 """Optional[str]: 

834 When creating an external table, the user can provide a reference file with the 

835 table schema. This is enabled for the following formats: 

836 

837 AVRO, PARQUET, ORC 

838 """ 

839 return self._properties.get("referenceFileSchemaUri") 

840 

841 @reference_file_schema_uri.setter 

842 def reference_file_schema_uri(self, value): 

843 self._properties["referenceFileSchemaUri"] = value 

844 

845 @property 

846 def ignore_unknown_values(self): 

847 """bool: If :data:`True`, extra values that are not represented in the 

848 table schema are ignored. Defaults to :data:`False`. 

849 

850 See 

851 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.ignore_unknown_values 

852 """ 

853 return self._properties.get("ignoreUnknownValues") 

854 

855 @ignore_unknown_values.setter 

856 def ignore_unknown_values(self, value): 

857 self._properties["ignoreUnknownValues"] = value 

858 

859 @property 

860 def max_bad_records(self): 

861 """int: The maximum number of bad records that BigQuery can ignore when 

862 reading data. 

863 

864 See 

865 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.max_bad_records 

866 """ 

867 return self._properties.get("maxBadRecords") 

868 

869 @max_bad_records.setter 

870 def max_bad_records(self, value): 

871 self._properties["maxBadRecords"] = value 

872 

873 @property 

874 def source_uris(self): 

875 """List[str]: URIs that point to your data in Google Cloud. 

876 

877 See 

878 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_uris 

879 """ 

880 return self._properties.get("sourceUris", []) 

881 

882 @source_uris.setter 

883 def source_uris(self, value): 

884 self._properties["sourceUris"] = value 

885 

886 @property 

887 def schema(self): 

888 """List[:class:`~google.cloud.bigquery.schema.SchemaField`]: The schema 

889 for the data. 

890 

891 See 

892 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema 

893 """ 

894 prop: Dict[str, Any] = typing.cast( 

895 Dict[str, Any], self._properties.get("schema", {}) 

896 ) 

897 return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])] 

898 

899 @schema.setter 

900 def schema(self, value): 

901 prop = value 

902 if value is not None: 

903 prop = {"fields": [field.to_api_repr() for field in value]} 

904 self._properties["schema"] = prop 

905 

906 @property 

907 def date_format(self) -> Optional[str]: 

908 """Optional[str]: Format used to parse DATE values. Supports C-style and SQL-style values. 

909 

910 See: 

911 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.date_format 

912 """ 

913 result = self._properties.get("dateFormat") 

914 return typing.cast(str, result) 

915 

916 @date_format.setter 

917 def date_format(self, value: Optional[str]): 

918 self._properties["dateFormat"] = value 

919 

920 @property 

921 def datetime_format(self) -> Optional[str]: 

922 """Optional[str]: Format used to parse DATETIME values. Supports C-style 

923 and SQL-style values. 

924 

925 See: 

926 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.datetime_format 

927 """ 

928 result = self._properties.get("datetimeFormat") 

929 return typing.cast(str, result) 

930 

931 @datetime_format.setter 

932 def datetime_format(self, value: Optional[str]): 

933 self._properties["datetimeFormat"] = value 

934 

935 @property 

936 def time_zone(self) -> Optional[str]: 

937 """Optional[str]: Time zone used when parsing timestamp values that do not 

938 have specific time zone information (e.g. 2024-04-20 12:34:56). The expected 

939 format is an IANA timezone string (e.g. America/Los_Angeles). 

940 

941 See: 

942 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_zone 

943 """ 

944 

945 result = self._properties.get("timeZone") 

946 return typing.cast(str, result) 

947 

948 @time_zone.setter 

949 def time_zone(self, value: Optional[str]): 

950 self._properties["timeZone"] = value 

951 

952 @property 

953 def time_format(self) -> Optional[str]: 

954 """Optional[str]: Format used to parse TIME values. Supports C-style and SQL-style values. 

955 

956 See: 

957 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_format 

958 """ 

959 result = self._properties.get("timeFormat") 

960 return typing.cast(str, result) 

961 

962 @time_format.setter 

963 def time_format(self, value: Optional[str]): 

964 self._properties["timeFormat"] = value 

965 

966 @property 

967 def timestamp_format(self) -> Optional[str]: 

968 """Optional[str]: Format used to parse TIMESTAMP values. Supports C-style and SQL-style values. 

969 

970 See: 

971 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.timestamp_format 

972 """ 

973 result = self._properties.get("timestampFormat") 

974 return typing.cast(str, result) 

975 

976 @timestamp_format.setter 

977 def timestamp_format(self, value: Optional[str]): 

978 self._properties["timestampFormat"] = value 

979 

980 @property 

981 def connection_id(self): 

982 """Optional[str]: [Experimental] ID of a BigQuery Connection API 

983 resource. 

984 

985 .. WARNING:: 

986 

987 This feature is experimental. Pre-GA features may have limited 

988 support, and changes to pre-GA features may not be compatible with 

989 other pre-GA versions. 

990 """ 

991 return self._properties.get("connectionId") 

992 

993 @connection_id.setter 

994 def connection_id(self, value): 

995 self._properties["connectionId"] = value 

996 

997 @property 

998 def avro_options(self) -> Optional[AvroOptions]: 

999 """Additional properties to set if ``sourceFormat`` is set to AVRO. 

1000 

1001 See: 

1002 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.avro_options 

1003 """ 

1004 if self.source_format == ExternalSourceFormat.AVRO: 

1005 self._properties.setdefault(AvroOptions._RESOURCE_NAME, {}) 

1006 resource = self._properties.get(AvroOptions._RESOURCE_NAME) 

1007 if resource is None: 

1008 return None 

1009 options = AvroOptions() 

1010 options._properties = resource 

1011 return options 

1012 

1013 @avro_options.setter 

1014 def avro_options(self, value): 

1015 if self.source_format != ExternalSourceFormat.AVRO: 

1016 msg = f"Cannot set Avro options, source format is {self.source_format}" 

1017 raise TypeError(msg) 

1018 self._properties[AvroOptions._RESOURCE_NAME] = value._properties 

1019 

1020 @property 

1021 def bigtable_options(self) -> Optional[BigtableOptions]: 

1022 """Additional properties to set if ``sourceFormat`` is set to BIGTABLE. 

1023 

1024 See: 

1025 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.bigtable_options 

1026 """ 

1027 if self.source_format == ExternalSourceFormat.BIGTABLE: 

1028 self._properties.setdefault(BigtableOptions._RESOURCE_NAME, {}) 

1029 resource = self._properties.get(BigtableOptions._RESOURCE_NAME) 

1030 if resource is None: 

1031 return None 

1032 options = BigtableOptions() 

1033 options._properties = resource 

1034 return options 

1035 

1036 @bigtable_options.setter 

1037 def bigtable_options(self, value): 

1038 if self.source_format != ExternalSourceFormat.BIGTABLE: 

1039 msg = f"Cannot set Bigtable options, source format is {self.source_format}" 

1040 raise TypeError(msg) 

1041 self._properties[BigtableOptions._RESOURCE_NAME] = value._properties 

1042 

1043 @property 

1044 def csv_options(self) -> Optional[CSVOptions]: 

1045 """Additional properties to set if ``sourceFormat`` is set to CSV. 

1046 

1047 See: 

1048 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.csv_options 

1049 """ 

1050 if self.source_format == ExternalSourceFormat.CSV: 

1051 self._properties.setdefault(CSVOptions._RESOURCE_NAME, {}) 

1052 resource = self._properties.get(CSVOptions._RESOURCE_NAME) 

1053 if resource is None: 

1054 return None 

1055 options = CSVOptions() 

1056 options._properties = resource 

1057 return options 

1058 

1059 @csv_options.setter 

1060 def csv_options(self, value): 

1061 if self.source_format != ExternalSourceFormat.CSV: 

1062 msg = f"Cannot set CSV options, source format is {self.source_format}" 

1063 raise TypeError(msg) 

1064 self._properties[CSVOptions._RESOURCE_NAME] = value._properties 

1065 

1066 @property 

1067 def google_sheets_options(self) -> Optional[GoogleSheetsOptions]: 

1068 """Additional properties to set if ``sourceFormat`` is set to 

1069 GOOGLE_SHEETS. 

1070 

1071 See: 

1072 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.google_sheets_options 

1073 """ 

1074 if self.source_format == ExternalSourceFormat.GOOGLE_SHEETS: 

1075 self._properties.setdefault(GoogleSheetsOptions._RESOURCE_NAME, {}) 

1076 resource = self._properties.get(GoogleSheetsOptions._RESOURCE_NAME) 

1077 if resource is None: 

1078 return None 

1079 options = GoogleSheetsOptions() 

1080 options._properties = resource 

1081 return options 

1082 

1083 @google_sheets_options.setter 

1084 def google_sheets_options(self, value): 

1085 if self.source_format != ExternalSourceFormat.GOOGLE_SHEETS: 

1086 msg = f"Cannot set Google Sheets options, source format is {self.source_format}" 

1087 raise TypeError(msg) 

1088 self._properties[GoogleSheetsOptions._RESOURCE_NAME] = value._properties 

1089 

1090 @property 

1091 def parquet_options(self) -> Optional[ParquetOptions]: 

1092 """Additional properties to set if ``sourceFormat`` is set to PARQUET. 

1093 

1094 See: 

1095 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.parquet_options 

1096 """ 

1097 if self.source_format == ExternalSourceFormat.PARQUET: 

1098 self._properties.setdefault(ParquetOptions._RESOURCE_NAME, {}) 

1099 resource = self._properties.get(ParquetOptions._RESOURCE_NAME) 

1100 if resource is None: 

1101 return None 

1102 options = ParquetOptions() 

1103 options._properties = resource 

1104 return options 

1105 

1106 @parquet_options.setter 

1107 def parquet_options(self, value): 

1108 if self.source_format != ExternalSourceFormat.PARQUET: 

1109 msg = f"Cannot set Parquet options, source format is {self.source_format}" 

1110 raise TypeError(msg) 

1111 self._properties[ParquetOptions._RESOURCE_NAME] = value._properties 

1112 

1113 def to_api_repr(self) -> dict: 

1114 """Build an API representation of this object. 

1115 

1116 Returns: 

1117 Dict[str, Any]: 

1118 A dictionary in the format used by the BigQuery API. 

1119 """ 

1120 config = copy.deepcopy(self._properties) 

1121 return config 

1122 

1123 @classmethod 

1124 def from_api_repr(cls, resource: dict) -> "ExternalConfig": 

1125 """Factory: construct an :class:`~.external_config.ExternalConfig` 

1126 instance given its API representation. 

1127 

1128 Args: 

1129 resource (Dict[str, Any]): 

1130 Definition of an :class:`~.external_config.ExternalConfig` 

1131 instance in the same representation as is returned from the 

1132 API. 

1133 

1134 Returns: 

1135 ExternalConfig: Configuration parsed from ``resource``. 

1136 """ 

1137 config = cls(resource["sourceFormat"]) 

1138 config._properties = copy.deepcopy(resource) 

1139 return config 

1140 

1141 

1142class ExternalCatalogDatasetOptions: 

1143 """Options defining open source compatible datasets living in the BigQuery catalog. 

1144 Contains metadata of open source database, schema or namespace represented 

1145 by the current dataset. 

1146 

1147 Args: 

1148 default_storage_location_uri (Optional[str]): The storage location URI for all 

1149 tables in the dataset. Equivalent to hive metastore's database 

1150 locationUri. Maximum length of 1024 characters. (str) 

1151 parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters 

1152 and properties of the open source schema. Maximum size of 2Mib. 

1153 """ 

1154 

1155 def __init__( 

1156 self, 

1157 default_storage_location_uri: Optional[str] = None, 

1158 parameters: Optional[Dict[str, Any]] = None, 

1159 ): 

1160 self._properties: Dict[str, Any] = {} 

1161 self.default_storage_location_uri = default_storage_location_uri 

1162 self.parameters = parameters 

1163 

1164 @property 

1165 def default_storage_location_uri(self) -> Optional[str]: 

1166 """Optional. The storage location URI for all tables in the dataset. 

1167 Equivalent to hive metastore's database locationUri. Maximum length of 

1168 1024 characters.""" 

1169 

1170 return self._properties.get("defaultStorageLocationUri") 

1171 

1172 @default_storage_location_uri.setter 

1173 def default_storage_location_uri(self, value: Optional[str]): 

1174 value = _helpers._isinstance_or_raise(value, str, none_allowed=True) 

1175 self._properties["defaultStorageLocationUri"] = value 

1176 

1177 @property 

1178 def parameters(self) -> Optional[Dict[str, Any]]: 

1179 """Optional. A map of key value pairs defining the parameters and 

1180 properties of the open source schema. Maximum size of 2Mib.""" 

1181 

1182 return self._properties.get("parameters") 

1183 

1184 @parameters.setter 

1185 def parameters(self, value: Optional[Dict[str, Any]]): 

1186 value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) 

1187 self._properties["parameters"] = value 

1188 

1189 def to_api_repr(self) -> dict: 

1190 """Build an API representation of this object. 

1191 

1192 Returns: 

1193 Dict[str, Any]: 

1194 A dictionary in the format used by the BigQuery API. 

1195 """ 

1196 return self._properties 

1197 

1198 @classmethod 

1199 def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions: 

1200 """Factory: constructs an instance of the class (cls) 

1201 given its API representation. 

1202 

1203 Args: 

1204 api_repr (Dict[str, Any]): 

1205 API representation of the object to be instantiated. 

1206 

1207 Returns: 

1208 An instance of the class initialized with data from 'resource'. 

1209 """ 

1210 config = cls() 

1211 config._properties = api_repr 

1212 return config 

1213 

1214 

1215class ExternalCatalogTableOptions: 

1216 """Metadata about open source compatible table. The fields contained in these 

1217 options correspond to hive metastore's table level properties. 

1218 

1219 Args: 

1220 connection_id (Optional[str]): The connection specifying the credentials to be 

1221 used to read external storage, such as Azure Blob, Cloud Storage, or 

1222 S3. The connection is needed to read the open source table from 

1223 BigQuery Engine. The connection_id can have the form `..` or 

1224 `projects//locations//connections/`. 

1225 parameters (Union[Dict[str, Any], None]): A map of key value pairs defining the parameters 

1226 and properties of the open source table. Corresponds with hive meta 

1227 store table parameters. Maximum size of 4Mib. 

1228 storage_descriptor (Optional[StorageDescriptor]): A storage descriptor containing information 

1229 about the physical storage of this table. 

1230 """ 

1231 

1232 def __init__( 

1233 self, 

1234 connection_id: Optional[str] = None, 

1235 parameters: Union[Dict[str, Any], None] = None, 

1236 storage_descriptor: Optional[schema.StorageDescriptor] = None, 

1237 ): 

1238 self._properties: Dict[str, Any] = {} 

1239 self.connection_id = connection_id 

1240 self.parameters = parameters 

1241 self.storage_descriptor = storage_descriptor 

1242 

1243 @property 

1244 def connection_id(self) -> Optional[str]: 

1245 """Optional. The connection specifying the credentials to be 

1246 used to read external storage, such as Azure Blob, Cloud Storage, or 

1247 S3. The connection is needed to read the open source table from 

1248 BigQuery Engine. The connection_id can have the form `..` or 

1249 `projects//locations//connections/`. 

1250 """ 

1251 

1252 return self._properties.get("connectionId") 

1253 

1254 @connection_id.setter 

1255 def connection_id(self, value: Optional[str]): 

1256 value = _helpers._isinstance_or_raise(value, str, none_allowed=True) 

1257 self._properties["connectionId"] = value 

1258 

1259 @property 

1260 def parameters(self) -> Union[Dict[str, Any], None]: 

1261 """Optional. A map of key value pairs defining the parameters and 

1262 properties of the open source table. Corresponds with hive meta 

1263 store table parameters. Maximum size of 4Mib. 

1264 """ 

1265 

1266 return self._properties.get("parameters") 

1267 

1268 @parameters.setter 

1269 def parameters(self, value: Union[Dict[str, Any], None]): 

1270 value = _helpers._isinstance_or_raise(value, dict, none_allowed=True) 

1271 self._properties["parameters"] = value 

1272 

1273 @property 

1274 def storage_descriptor(self) -> Any: 

1275 """Optional. A storage descriptor containing information about the 

1276 physical storage of this table.""" 

1277 

1278 prop = _helpers._get_sub_prop(self._properties, ["storageDescriptor"]) 

1279 

1280 if prop is not None: 

1281 return schema.StorageDescriptor.from_api_repr(prop) 

1282 return None 

1283 

1284 @storage_descriptor.setter 

1285 def storage_descriptor(self, value: Union[schema.StorageDescriptor, dict, None]): 

1286 value = _helpers._isinstance_or_raise( 

1287 value, (schema.StorageDescriptor, dict), none_allowed=True 

1288 ) 

1289 if isinstance(value, schema.StorageDescriptor): 

1290 self._properties["storageDescriptor"] = value.to_api_repr() 

1291 else: 

1292 self._properties["storageDescriptor"] = value 

1293 

1294 def to_api_repr(self) -> dict: 

1295 """Build an API representation of this object. 

1296 

1297 Returns: 

1298 Dict[str, Any]: 

1299 A dictionary in the format used by the BigQuery API. 

1300 """ 

1301 

1302 return self._properties 

1303 

1304 @classmethod 

1305 def from_api_repr(cls, api_repr: dict) -> ExternalCatalogTableOptions: 

1306 """Factory: constructs an instance of the class (cls) 

1307 given its API representation. 

1308 

1309 Args: 

1310 api_repr (Dict[str, Any]): 

1311 API representation of the object to be instantiated. 

1312 

1313 Returns: 

1314 An instance of the class initialized with data from 'api_repr'. 

1315 """ 

1316 config = cls() 

1317 config._properties = api_repr 

1318 return config