1# Copyright 2017 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Define classes that describe external data sources.
16
17 These are used for both Table.externalDataConfiguration and
18 Job.configuration.query.tableDefinitions.
19"""
20
21from __future__ import absolute_import, annotations
22
23import base64
24import copy
25import typing
26from typing import Any, Dict, FrozenSet, Iterable, Optional, Union
27
28from google.cloud.bigquery._helpers import _to_bytes
29from google.cloud.bigquery._helpers import _bytes_to_json
30from google.cloud.bigquery._helpers import _int_or_none
31from google.cloud.bigquery._helpers import _str_or_none
32from google.cloud.bigquery import _helpers
33from google.cloud.bigquery.enums import SourceColumnMatch
34from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions
35from google.cloud.bigquery import schema
36from google.cloud.bigquery.schema import SchemaField
37
38
39class ExternalSourceFormat(object):
40 """The format for external data files.
41
42 Note that the set of allowed values for external data sources is different
43 than the set used for loading data (see
44 :class:`~google.cloud.bigquery.job.SourceFormat`).
45 """
46
47 CSV = "CSV"
48 """Specifies CSV format."""
49
50 GOOGLE_SHEETS = "GOOGLE_SHEETS"
51 """Specifies Google Sheets format."""
52
53 NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON"
54 """Specifies newline delimited JSON format."""
55
56 AVRO = "AVRO"
57 """Specifies Avro format."""
58
59 DATASTORE_BACKUP = "DATASTORE_BACKUP"
60 """Specifies datastore backup format"""
61
62 ORC = "ORC"
63 """Specifies ORC format."""
64
65 PARQUET = "PARQUET"
66 """Specifies Parquet format."""
67
68 BIGTABLE = "BIGTABLE"
69 """Specifies Bigtable format."""
70
71
72class BigtableColumn(object):
73 """Options for a Bigtable column."""
74
75 def __init__(self):
76 self._properties = {}
77
78 @property
79 def encoding(self):
80 """str: The encoding of the values when the type is not `STRING`
81
82 See
83 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.encoding
84 """
85 return self._properties.get("encoding")
86
87 @encoding.setter
88 def encoding(self, value):
89 self._properties["encoding"] = value
90
91 @property
92 def field_name(self):
93 """str: An identifier to use if the qualifier is not a valid BigQuery
94 field identifier
95
96 See
97 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.field_name
98 """
99 return self._properties.get("fieldName")
100
101 @field_name.setter
102 def field_name(self, value):
103 self._properties["fieldName"] = value
104
105 @property
106 def only_read_latest(self):
107 """bool: If this is set, only the latest version of value in this
108 column are exposed.
109
110 See
111 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.only_read_latest
112 """
113 return self._properties.get("onlyReadLatest")
114
115 @only_read_latest.setter
116 def only_read_latest(self, value):
117 self._properties["onlyReadLatest"] = value
118
119 @property
120 def qualifier_encoded(self):
121 """Union[str, bytes]: The qualifier encoded in binary.
122
123 The type is ``str`` (Python 2.x) or ``bytes`` (Python 3.x). The module
124 will handle base64 encoding for you.
125
126 See
127 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.qualifier_encoded
128 """
129 prop = self._properties.get("qualifierEncoded")
130 if prop is None:
131 return None
132 return base64.standard_b64decode(_to_bytes(prop))
133
134 @qualifier_encoded.setter
135 def qualifier_encoded(self, value):
136 self._properties["qualifierEncoded"] = _bytes_to_json(value)
137
138 @property
139 def qualifier_string(self):
140 """str: A valid UTF-8 string qualifier
141
142 See
143 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.qualifier_string
144 """
145 return self._properties.get("qualifierString")
146
147 @qualifier_string.setter
148 def qualifier_string(self, value):
149 self._properties["qualifierString"] = value
150
151 @property
152 def type_(self):
153 """str: The type to convert the value in cells of this column.
154
155 See
156 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.type
157 """
158 return self._properties.get("type")
159
160 @type_.setter
161 def type_(self, value):
162 self._properties["type"] = value
163
164 def to_api_repr(self) -> dict:
165 """Build an API representation of this object.
166
167 Returns:
168 Dict[str, Any]:
169 A dictionary in the format used by the BigQuery API.
170 """
171 return copy.deepcopy(self._properties)
172
173 @classmethod
174 def from_api_repr(cls, resource: dict) -> "BigtableColumn":
175 """Factory: construct a :class:`~.external_config.BigtableColumn`
176 instance given its API representation.
177
178 Args:
179 resource (Dict[str, Any]):
180 Definition of a :class:`~.external_config.BigtableColumn`
181 instance in the same representation as is returned from the
182 API.
183
184 Returns:
185 external_config.BigtableColumn: Configuration parsed from ``resource``.
186 """
187 config = cls()
188 config._properties = copy.deepcopy(resource)
189 return config
190
191
192class BigtableColumnFamily(object):
193 """Options for a Bigtable column family."""
194
195 def __init__(self):
196 self._properties = {}
197
198 @property
199 def encoding(self):
200 """str: The encoding of the values when the type is not `STRING`
201
202 See
203 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.encoding
204 """
205 return self._properties.get("encoding")
206
207 @encoding.setter
208 def encoding(self, value):
209 self._properties["encoding"] = value
210
211 @property
212 def family_id(self):
213 """str: Identifier of the column family.
214
215 See
216 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.family_id
217 """
218 return self._properties.get("familyId")
219
220 @family_id.setter
221 def family_id(self, value):
222 self._properties["familyId"] = value
223
224 @property
225 def only_read_latest(self):
226 """bool: If this is set only the latest version of value are exposed
227 for all columns in this column family.
228
229 See
230 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.only_read_latest
231 """
232 return self._properties.get("onlyReadLatest")
233
234 @only_read_latest.setter
235 def only_read_latest(self, value):
236 self._properties["onlyReadLatest"] = value
237
238 @property
239 def type_(self):
240 """str: The type to convert the value in cells of this column family.
241
242 See
243 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.type
244 """
245 return self._properties.get("type")
246
247 @type_.setter
248 def type_(self, value):
249 self._properties["type"] = value
250
251 @property
252 def columns(self):
253 """List[BigtableColumn]: Lists of columns
254 that should be exposed as individual fields.
255
256 See
257 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.columns
258 """
259 prop = self._properties.get("columns", [])
260 return [BigtableColumn.from_api_repr(col) for col in prop]
261
262 @columns.setter
263 def columns(self, value):
264 self._properties["columns"] = [col.to_api_repr() for col in value]
265
266 def to_api_repr(self) -> dict:
267 """Build an API representation of this object.
268
269 Returns:
270 Dict[str, Any]:
271 A dictionary in the format used by the BigQuery API.
272 """
273 return copy.deepcopy(self._properties)
274
275 @classmethod
276 def from_api_repr(cls, resource: dict) -> "BigtableColumnFamily":
277 """Factory: construct a :class:`~.external_config.BigtableColumnFamily`
278 instance given its API representation.
279
280 Args:
281 resource (Dict[str, Any]):
282 Definition of a :class:`~.external_config.BigtableColumnFamily`
283 instance in the same representation as is returned from the
284 API.
285
286 Returns:
287 :class:`~.external_config.BigtableColumnFamily`:
288 Configuration parsed from ``resource``.
289 """
290 config = cls()
291 config._properties = copy.deepcopy(resource)
292 return config
293
294
295class BigtableOptions(object):
296 """Options that describe how to treat Bigtable tables as BigQuery tables."""
297
298 _SOURCE_FORMAT = "BIGTABLE"
299 _RESOURCE_NAME = "bigtableOptions"
300
301 def __init__(self):
302 self._properties = {}
303
304 @property
305 def ignore_unspecified_column_families(self):
306 """bool: If :data:`True`, ignore columns not specified in
307 :attr:`column_families` list. Defaults to :data:`False`.
308
309 See
310 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.ignore_unspecified_column_families
311 """
312 return self._properties.get("ignoreUnspecifiedColumnFamilies")
313
314 @ignore_unspecified_column_families.setter
315 def ignore_unspecified_column_families(self, value):
316 self._properties["ignoreUnspecifiedColumnFamilies"] = value
317
318 @property
319 def read_rowkey_as_string(self):
320 """bool: If :data:`True`, rowkey column families will be read and
321 converted to string. Defaults to :data:`False`.
322
323 See
324 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.read_rowkey_as_string
325 """
326 return self._properties.get("readRowkeyAsString")
327
328 @read_rowkey_as_string.setter
329 def read_rowkey_as_string(self, value):
330 self._properties["readRowkeyAsString"] = value
331
332 @property
333 def column_families(self):
334 """List[:class:`~.external_config.BigtableColumnFamily`]: List of
335 column families to expose in the table schema along with their types.
336
337 See
338 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.column_families
339 """
340 prop = self._properties.get("columnFamilies", [])
341 return [BigtableColumnFamily.from_api_repr(cf) for cf in prop]
342
343 @column_families.setter
344 def column_families(self, value):
345 self._properties["columnFamilies"] = [cf.to_api_repr() for cf in value]
346
347 def to_api_repr(self) -> dict:
348 """Build an API representation of this object.
349
350 Returns:
351 Dict[str, Any]:
352 A dictionary in the format used by the BigQuery API.
353 """
354 return copy.deepcopy(self._properties)
355
356 @classmethod
357 def from_api_repr(cls, resource: dict) -> "BigtableOptions":
358 """Factory: construct a :class:`~.external_config.BigtableOptions`
359 instance given its API representation.
360
361 Args:
362 resource (Dict[str, Any]):
363 Definition of a :class:`~.external_config.BigtableOptions`
364 instance in the same representation as is returned from the
365 API.
366
367 Returns:
368 BigtableOptions: Configuration parsed from ``resource``.
369 """
370 config = cls()
371 config._properties = copy.deepcopy(resource)
372 return config
373
374
375class CSVOptions(object):
376 """Options that describe how to treat CSV files as BigQuery tables."""
377
378 _SOURCE_FORMAT = "CSV"
379 _RESOURCE_NAME = "csvOptions"
380
381 def __init__(self):
382 self._properties = {}
383
384 @property
385 def allow_jagged_rows(self):
386 """bool: If :data:`True`, BigQuery treats missing trailing columns as
387 null values. Defaults to :data:`False`.
388
389 See
390 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.allow_jagged_rows
391 """
392 return self._properties.get("allowJaggedRows")
393
394 @allow_jagged_rows.setter
395 def allow_jagged_rows(self, value):
396 self._properties["allowJaggedRows"] = value
397
398 @property
399 def allow_quoted_newlines(self):
400 """bool: If :data:`True`, quoted data sections that contain newline
401 characters in a CSV file are allowed. Defaults to :data:`False`.
402
403 See
404 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.allow_quoted_newlines
405 """
406 return self._properties.get("allowQuotedNewlines")
407
408 @allow_quoted_newlines.setter
409 def allow_quoted_newlines(self, value):
410 self._properties["allowQuotedNewlines"] = value
411
412 @property
413 def encoding(self):
414 """str: The character encoding of the data.
415
416 See
417 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.encoding
418 """
419 return self._properties.get("encoding")
420
421 @encoding.setter
422 def encoding(self, value):
423 self._properties["encoding"] = value
424
425 @property
426 def preserve_ascii_control_characters(self):
427 """bool: Indicates if the embedded ASCII control characters
428 (the first 32 characters in the ASCII-table, from '\x00' to '\x1F') are preserved.
429
430 See
431 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.preserve_ascii_control_characters
432 """
433 return self._properties.get("preserveAsciiControlCharacters")
434
435 @preserve_ascii_control_characters.setter
436 def preserve_ascii_control_characters(self, value):
437 self._properties["preserveAsciiControlCharacters"] = value
438
439 @property
440 def field_delimiter(self):
441 """str: The separator for fields in a CSV file. Defaults to comma (',').
442
443 See
444 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.field_delimiter
445 """
446 return self._properties.get("fieldDelimiter")
447
448 @field_delimiter.setter
449 def field_delimiter(self, value):
450 self._properties["fieldDelimiter"] = value
451
452 @property
453 def quote_character(self):
454 """str: The value that is used to quote data sections in a CSV file.
455
456 See
457 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.quote
458 """
459 return self._properties.get("quote")
460
461 @quote_character.setter
462 def quote_character(self, value):
463 self._properties["quote"] = value
464
465 @property
466 def skip_leading_rows(self):
467 """int: The number of rows at the top of a CSV file.
468
469 See
470 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.skip_leading_rows
471 """
472 return _int_or_none(self._properties.get("skipLeadingRows"))
473
474 @skip_leading_rows.setter
475 def skip_leading_rows(self, value):
476 self._properties["skipLeadingRows"] = str(value)
477
478 @property
479 def source_column_match(self) -> Optional[SourceColumnMatch]:
480 """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the
481 strategy used to match loaded columns to the schema. If not set, a sensible
482 default is chosen based on how the schema is provided. If autodetect is
483 used, then columns are matched by name. Otherwise, columns are matched by
484 position. This is done to keep the behavior backward-compatible.
485
486 Acceptable values are:
487
488 SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option.
489 POSITION: matches by position. This assumes that the columns are ordered
490 the same way as the schema.
491 NAME: matches by name. This reads the header row as column names and
492 reorders columns to match the field names in the schema.
493
494 See
495 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.source_column_match
496 """
497
498 value = self._properties.get("sourceColumnMatch")
499 return SourceColumnMatch(value) if value is not None else None
500
501 @source_column_match.setter
502 def source_column_match(self, value: Union[SourceColumnMatch, str, None]):
503 if value is not None and not isinstance(value, (SourceColumnMatch, str)):
504 raise TypeError(
505 "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None"
506 )
507 if isinstance(value, SourceColumnMatch):
508 value = value.value
509 self._properties["sourceColumnMatch"] = value if value else None
510
511 @property
512 def null_markers(self) -> Optional[Iterable[str]]:
513 """Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file.
514
515 .. note::
516 null_marker and null_markers can't be set at the same time.
517 If null_marker is set, null_markers has to be not set.
518 If null_markers is set, null_marker has to be not set.
519 If both null_marker and null_markers are set at the same time, a user error would be thrown.
520 Any strings listed in null_markers, including empty string would be interpreted as SQL NULL.
521 This applies to all column types.
522
523 See
524 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.null_markers
525 """
526 return self._properties.get("nullMarkers")
527
528 @null_markers.setter
529 def null_markers(self, value: Optional[Iterable[str]]):
530 self._properties["nullMarkers"] = value
531
532 def to_api_repr(self) -> dict:
533 """Build an API representation of this object.
534
535 Returns:
536 Dict[str, Any]: A dictionary in the format used by the BigQuery API.
537 """
538 return copy.deepcopy(self._properties)
539
540 @classmethod
541 def from_api_repr(cls, resource: dict) -> "CSVOptions":
542 """Factory: construct a :class:`~.external_config.CSVOptions` instance
543 given its API representation.
544
545 Args:
546 resource (Dict[str, Any]):
547 Definition of a :class:`~.external_config.CSVOptions`
548 instance in the same representation as is returned from the
549 API.
550
551 Returns:
552 CSVOptions: Configuration parsed from ``resource``.
553 """
554 config = cls()
555 config._properties = copy.deepcopy(resource)
556 return config
557
558
559class GoogleSheetsOptions(object):
560 """Options that describe how to treat Google Sheets as BigQuery tables."""
561
562 _SOURCE_FORMAT = "GOOGLE_SHEETS"
563 _RESOURCE_NAME = "googleSheetsOptions"
564
565 def __init__(self):
566 self._properties = {}
567
568 @property
569 def skip_leading_rows(self):
570 """int: The number of rows at the top of a sheet that BigQuery will
571 skip when reading the data.
572
573 See
574 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions.FIELDS.skip_leading_rows
575 """
576 return _int_or_none(self._properties.get("skipLeadingRows"))
577
578 @skip_leading_rows.setter
579 def skip_leading_rows(self, value):
580 self._properties["skipLeadingRows"] = str(value)
581
582 @property
583 def range(self):
584 """str: The range of a sheet that BigQuery will query from.
585
586 See
587 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions.FIELDS.range
588 """
589 return _str_or_none(self._properties.get("range"))
590
591 @range.setter
592 def range(self, value):
593 self._properties["range"] = value
594
595 def to_api_repr(self) -> dict:
596 """Build an API representation of this object.
597
598 Returns:
599 Dict[str, Any]: A dictionary in the format used by the BigQuery API.
600 """
601 return copy.deepcopy(self._properties)
602
603 @classmethod
604 def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions":
605 """Factory: construct a :class:`~.external_config.GoogleSheetsOptions`
606 instance given its API representation.
607
608 Args:
609 resource (Dict[str, Any]):
610 Definition of a :class:`~.external_config.GoogleSheetsOptions`
611 instance in the same representation as is returned from the
612 API.
613
614 Returns:
615 GoogleSheetsOptions: Configuration parsed from ``resource``.
616 """
617 config = cls()
618 config._properties = copy.deepcopy(resource)
619 return config
620
621
622_OPTION_CLASSES = (
623 AvroOptions,
624 BigtableOptions,
625 CSVOptions,
626 GoogleSheetsOptions,
627 ParquetOptions,
628)
629
630OptionsType = Union[
631 AvroOptions,
632 BigtableOptions,
633 CSVOptions,
634 GoogleSheetsOptions,
635 ParquetOptions,
636]
637
638
639class HivePartitioningOptions(object):
640 """Options that configure hive partitioning.
641
642 See
643 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions
644 """
645
646 def __init__(self) -> None:
647 self._properties: Dict[str, Any] = {}
648
649 @property
650 def mode(self):
651 """Optional[str]: When set, what mode of hive partitioning to use when reading data.
652
653 Two modes are supported: "AUTO" and "STRINGS".
654
655 See
656 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode
657 """
658 return self._properties.get("mode")
659
660 @mode.setter
661 def mode(self, value):
662 self._properties["mode"] = value
663
664 @property
665 def source_uri_prefix(self):
666 """Optional[str]: When hive partition detection is requested, a common prefix for
667 all source URIs is required.
668
669 See
670 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.source_uri_prefix
671 """
672 return self._properties.get("sourceUriPrefix")
673
674 @source_uri_prefix.setter
675 def source_uri_prefix(self, value):
676 self._properties["sourceUriPrefix"] = value
677
678 @property
679 def require_partition_filter(self):
680 """Optional[bool]: If set to true, queries over the partitioned table require a
681 partition filter that can be used for partition elimination to be
682 specified.
683
684 See
685 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode
686 """
687 return self._properties.get("requirePartitionFilter")
688
689 @require_partition_filter.setter
690 def require_partition_filter(self, value):
691 self._properties["requirePartitionFilter"] = value
692
693 def to_api_repr(self) -> dict:
694 """Build an API representation of this object.
695
696 Returns:
697 Dict[str, Any]: A dictionary in the format used by the BigQuery API.
698 """
699 return copy.deepcopy(self._properties)
700
701 @classmethod
702 def from_api_repr(cls, resource: dict) -> "HivePartitioningOptions":
703 """Factory: construct a :class:`~.external_config.HivePartitioningOptions`
704 instance given its API representation.
705
706 Args:
707 resource (Dict[str, Any]):
708 Definition of a :class:`~.external_config.HivePartitioningOptions`
709 instance in the same representation as is returned from the
710 API.
711
712 Returns:
713 HivePartitioningOptions: Configuration parsed from ``resource``.
714 """
715 config = cls()
716 config._properties = copy.deepcopy(resource)
717 return config
718
719
720class ExternalConfig(object):
721 """Description of an external data source.
722
723 Args:
724 source_format (ExternalSourceFormat):
725 See :attr:`source_format`.
726 """
727
728 def __init__(self, source_format) -> None:
729 self._properties = {"sourceFormat": source_format}
730
731 @property
732 def source_format(self):
733 """:class:`~.external_config.ExternalSourceFormat`:
734 Format of external source.
735
736 See
737 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_format
738 """
739 return self._properties["sourceFormat"]
740
741 @property
742 def options(self) -> Optional[OptionsType]:
743 """Source-specific options."""
744 for optcls in _OPTION_CLASSES:
745 # The code below is too much magic for mypy to handle.
746 if self.source_format == optcls._SOURCE_FORMAT: # type: ignore
747 options: OptionsType = optcls() # type: ignore
748 options._properties = self._properties.setdefault(
749 optcls._RESOURCE_NAME, {} # type: ignore
750 )
751 return options
752
753 # No matching source format found.
754 return None
755
756 @property
757 def autodetect(self):
758 """bool: If :data:`True`, try to detect schema and format options
759 automatically.
760
761 See
762 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.autodetect
763 """
764 return self._properties.get("autodetect")
765
766 @autodetect.setter
767 def autodetect(self, value):
768 self._properties["autodetect"] = value
769
770 @property
771 def compression(self):
772 """str: The compression type of the data source.
773
774 See
775 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.compression
776 """
777 return self._properties.get("compression")
778
779 @compression.setter
780 def compression(self, value):
781 self._properties["compression"] = value
782
783 @property
784 def decimal_target_types(self) -> Optional[FrozenSet[str]]:
785 """Possible SQL data types to which the source decimal values are converted.
786
787 See:
788 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.decimal_target_types
789
790 .. versionadded:: 2.21.0
791 """
792 prop = self._properties.get("decimalTargetTypes")
793 if prop is not None:
794 prop = frozenset(prop)
795 return prop
796
797 @decimal_target_types.setter
798 def decimal_target_types(self, value: Optional[Iterable[str]]):
799 if value is not None:
800 self._properties["decimalTargetTypes"] = list(value)
801 else:
802 if "decimalTargetTypes" in self._properties:
803 del self._properties["decimalTargetTypes"]
804
805 @property
806 def hive_partitioning(self):
807 """Optional[:class:`~.external_config.HivePartitioningOptions`]: When set, \
808 it configures hive partitioning support.
809
810 See
811 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options
812 """
813 prop = self._properties.get("hivePartitioningOptions")
814 if prop is None:
815 return None
816 return HivePartitioningOptions.from_api_repr(prop)
817
818 @hive_partitioning.setter
819 def hive_partitioning(self, value):
820 prop = value.to_api_repr() if value is not None else None
821 self._properties["hivePartitioningOptions"] = prop
822
823 @property
824 def reference_file_schema_uri(self):
825 """Optional[str]:
826 When creating an external table, the user can provide a reference file with the
827 table schema. This is enabled for the following formats:
828
829 AVRO, PARQUET, ORC
830 """
831 return self._properties.get("referenceFileSchemaUri")
832
833 @reference_file_schema_uri.setter
834 def reference_file_schema_uri(self, value):
835 self._properties["referenceFileSchemaUri"] = value
836
837 @property
838 def ignore_unknown_values(self):
839 """bool: If :data:`True`, extra values that are not represented in the
840 table schema are ignored. Defaults to :data:`False`.
841
842 See
843 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.ignore_unknown_values
844 """
845 return self._properties.get("ignoreUnknownValues")
846
847 @ignore_unknown_values.setter
848 def ignore_unknown_values(self, value):
849 self._properties["ignoreUnknownValues"] = value
850
851 @property
852 def max_bad_records(self):
853 """int: The maximum number of bad records that BigQuery can ignore when
854 reading data.
855
856 See
857 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.max_bad_records
858 """
859 return self._properties.get("maxBadRecords")
860
861 @max_bad_records.setter
862 def max_bad_records(self, value):
863 self._properties["maxBadRecords"] = value
864
865 @property
866 def source_uris(self):
867 """List[str]: URIs that point to your data in Google Cloud.
868
869 See
870 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_uris
871 """
872 return self._properties.get("sourceUris", [])
873
874 @source_uris.setter
875 def source_uris(self, value):
876 self._properties["sourceUris"] = value
877
878 @property
879 def schema(self):
880 """List[:class:`~google.cloud.bigquery.schema.SchemaField`]: The schema
881 for the data.
882
883 See
884 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema
885 """
886 prop: Dict[str, Any] = typing.cast(
887 Dict[str, Any], self._properties.get("schema", {})
888 )
889 return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])]
890
891 @schema.setter
892 def schema(self, value):
893 prop = value
894 if value is not None:
895 prop = {"fields": [field.to_api_repr() for field in value]}
896 self._properties["schema"] = prop
897
898 @property
899 def date_format(self) -> Optional[str]:
900 """Optional[str]: Format used to parse DATE values. Supports C-style and SQL-style values.
901
902 See:
903 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.date_format
904 """
905 result = self._properties.get("dateFormat")
906 return typing.cast(str, result)
907
908 @date_format.setter
909 def date_format(self, value: Optional[str]):
910 self._properties["dateFormat"] = value
911
912 @property
913 def datetime_format(self) -> Optional[str]:
914 """Optional[str]: Format used to parse DATETIME values. Supports C-style
915 and SQL-style values.
916
917 See:
918 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.datetime_format
919 """
920 result = self._properties.get("datetimeFormat")
921 return typing.cast(str, result)
922
923 @datetime_format.setter
924 def datetime_format(self, value: Optional[str]):
925 self._properties["datetimeFormat"] = value
926
927 @property
928 def time_zone(self) -> Optional[str]:
929 """Optional[str]: Time zone used when parsing timestamp values that do not
930 have specific time zone information (e.g. 2024-04-20 12:34:56). The expected
931 format is an IANA timezone string (e.g. America/Los_Angeles).
932
933 See:
934 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_zone
935 """
936
937 result = self._properties.get("timeZone")
938 return typing.cast(str, result)
939
940 @time_zone.setter
941 def time_zone(self, value: Optional[str]):
942 self._properties["timeZone"] = value
943
944 @property
945 def time_format(self) -> Optional[str]:
946 """Optional[str]: Format used to parse TIME values. Supports C-style and SQL-style values.
947
948 See:
949 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_format
950 """
951 result = self._properties.get("timeFormat")
952 return typing.cast(str, result)
953
954 @time_format.setter
955 def time_format(self, value: Optional[str]):
956 self._properties["timeFormat"] = value
957
958 @property
959 def timestamp_format(self) -> Optional[str]:
960 """Optional[str]: Format used to parse TIMESTAMP values. Supports C-style and SQL-style values.
961
962 See:
963 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.timestamp_format
964 """
965 result = self._properties.get("timestampFormat")
966 return typing.cast(str, result)
967
968 @timestamp_format.setter
969 def timestamp_format(self, value: Optional[str]):
970 self._properties["timestampFormat"] = value
971
972 @property
973 def connection_id(self):
974 """Optional[str]: ID of a BigQuery Connection API
975 resource.
976 """
977 return self._properties.get("connectionId")
978
979 @connection_id.setter
980 def connection_id(self, value):
981 self._properties["connectionId"] = value
982
983 @property
984 def avro_options(self) -> Optional[AvroOptions]:
985 """Additional properties to set if ``sourceFormat`` is set to AVRO.
986
987 See:
988 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.avro_options
989 """
990 if self.source_format == ExternalSourceFormat.AVRO:
991 self._properties.setdefault(AvroOptions._RESOURCE_NAME, {})
992 resource = self._properties.get(AvroOptions._RESOURCE_NAME)
993 if resource is None:
994 return None
995 options = AvroOptions()
996 options._properties = resource
997 return options
998
999 @avro_options.setter
1000 def avro_options(self, value):
1001 if self.source_format != ExternalSourceFormat.AVRO:
1002 msg = f"Cannot set Avro options, source format is {self.source_format}"
1003 raise TypeError(msg)
1004 self._properties[AvroOptions._RESOURCE_NAME] = value._properties
1005
1006 @property
1007 def bigtable_options(self) -> Optional[BigtableOptions]:
1008 """Additional properties to set if ``sourceFormat`` is set to BIGTABLE.
1009
1010 See:
1011 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.bigtable_options
1012 """
1013 if self.source_format == ExternalSourceFormat.BIGTABLE:
1014 self._properties.setdefault(BigtableOptions._RESOURCE_NAME, {})
1015 resource = self._properties.get(BigtableOptions._RESOURCE_NAME)
1016 if resource is None:
1017 return None
1018 options = BigtableOptions()
1019 options._properties = resource
1020 return options
1021
1022 @bigtable_options.setter
1023 def bigtable_options(self, value):
1024 if self.source_format != ExternalSourceFormat.BIGTABLE:
1025 msg = f"Cannot set Bigtable options, source format is {self.source_format}"
1026 raise TypeError(msg)
1027 self._properties[BigtableOptions._RESOURCE_NAME] = value._properties
1028
1029 @property
1030 def csv_options(self) -> Optional[CSVOptions]:
1031 """Additional properties to set if ``sourceFormat`` is set to CSV.
1032
1033 See:
1034 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.csv_options
1035 """
1036 if self.source_format == ExternalSourceFormat.CSV:
1037 self._properties.setdefault(CSVOptions._RESOURCE_NAME, {})
1038 resource = self._properties.get(CSVOptions._RESOURCE_NAME)
1039 if resource is None:
1040 return None
1041 options = CSVOptions()
1042 options._properties = resource
1043 return options
1044
1045 @csv_options.setter
1046 def csv_options(self, value):
1047 if self.source_format != ExternalSourceFormat.CSV:
1048 msg = f"Cannot set CSV options, source format is {self.source_format}"
1049 raise TypeError(msg)
1050 self._properties[CSVOptions._RESOURCE_NAME] = value._properties
1051
1052 @property
1053 def google_sheets_options(self) -> Optional[GoogleSheetsOptions]:
1054 """Additional properties to set if ``sourceFormat`` is set to
1055 GOOGLE_SHEETS.
1056
1057 See:
1058 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.google_sheets_options
1059 """
1060 if self.source_format == ExternalSourceFormat.GOOGLE_SHEETS:
1061 self._properties.setdefault(GoogleSheetsOptions._RESOURCE_NAME, {})
1062 resource = self._properties.get(GoogleSheetsOptions._RESOURCE_NAME)
1063 if resource is None:
1064 return None
1065 options = GoogleSheetsOptions()
1066 options._properties = resource
1067 return options
1068
1069 @google_sheets_options.setter
1070 def google_sheets_options(self, value):
1071 if self.source_format != ExternalSourceFormat.GOOGLE_SHEETS:
1072 msg = f"Cannot set Google Sheets options, source format is {self.source_format}"
1073 raise TypeError(msg)
1074 self._properties[GoogleSheetsOptions._RESOURCE_NAME] = value._properties
1075
1076 @property
1077 def parquet_options(self) -> Optional[ParquetOptions]:
1078 """Additional properties to set if ``sourceFormat`` is set to PARQUET.
1079
1080 See:
1081 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.parquet_options
1082 """
1083 if self.source_format == ExternalSourceFormat.PARQUET:
1084 self._properties.setdefault(ParquetOptions._RESOURCE_NAME, {})
1085 resource = self._properties.get(ParquetOptions._RESOURCE_NAME)
1086 if resource is None:
1087 return None
1088 options = ParquetOptions()
1089 options._properties = resource
1090 return options
1091
1092 @parquet_options.setter
1093 def parquet_options(self, value):
1094 if self.source_format != ExternalSourceFormat.PARQUET:
1095 msg = f"Cannot set Parquet options, source format is {self.source_format}"
1096 raise TypeError(msg)
1097 self._properties[ParquetOptions._RESOURCE_NAME] = value._properties
1098
1099 def to_api_repr(self) -> dict:
1100 """Build an API representation of this object.
1101
1102 Returns:
1103 Dict[str, Any]:
1104 A dictionary in the format used by the BigQuery API.
1105 """
1106 config = copy.deepcopy(self._properties)
1107 return config
1108
1109 @classmethod
1110 def from_api_repr(cls, resource: dict) -> "ExternalConfig":
1111 """Factory: construct an :class:`~.external_config.ExternalConfig`
1112 instance given its API representation.
1113
1114 Args:
1115 resource (Dict[str, Any]):
1116 Definition of an :class:`~.external_config.ExternalConfig`
1117 instance in the same representation as is returned from the
1118 API.
1119
1120 Returns:
1121 ExternalConfig: Configuration parsed from ``resource``.
1122 """
1123 config = cls(resource["sourceFormat"])
1124 config._properties = copy.deepcopy(resource)
1125 return config
1126
1127
1128class ExternalCatalogDatasetOptions:
1129 """Options defining open source compatible datasets living in the BigQuery catalog.
1130 Contains metadata of open source database, schema or namespace represented
1131 by the current dataset.
1132
1133 Args:
1134 default_storage_location_uri (Optional[str]): The storage location URI for all
1135 tables in the dataset. Equivalent to hive metastore's database
1136 locationUri. Maximum length of 1024 characters. (str)
1137 parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters
1138 and properties of the open source schema. Maximum size of 2Mib.
1139 """
1140
1141 def __init__(
1142 self,
1143 default_storage_location_uri: Optional[str] = None,
1144 parameters: Optional[Dict[str, Any]] = None,
1145 ):
1146 self._properties: Dict[str, Any] = {}
1147 self.default_storage_location_uri = default_storage_location_uri
1148 self.parameters = parameters
1149
1150 @property
1151 def default_storage_location_uri(self) -> Optional[str]:
1152 """Optional. The storage location URI for all tables in the dataset.
1153 Equivalent to hive metastore's database locationUri. Maximum length of
1154 1024 characters."""
1155
1156 return self._properties.get("defaultStorageLocationUri")
1157
1158 @default_storage_location_uri.setter
1159 def default_storage_location_uri(self, value: Optional[str]):
1160 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
1161 self._properties["defaultStorageLocationUri"] = value
1162
1163 @property
1164 def parameters(self) -> Optional[Dict[str, Any]]:
1165 """Optional. A map of key value pairs defining the parameters and
1166 properties of the open source schema. Maximum size of 2Mib."""
1167
1168 return self._properties.get("parameters")
1169
1170 @parameters.setter
1171 def parameters(self, value: Optional[Dict[str, Any]]):
1172 value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
1173 self._properties["parameters"] = value
1174
1175 def to_api_repr(self) -> dict:
1176 """Build an API representation of this object.
1177
1178 Returns:
1179 Dict[str, Any]:
1180 A dictionary in the format used by the BigQuery API.
1181 """
1182 return self._properties
1183
1184 @classmethod
1185 def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions:
1186 """Factory: constructs an instance of the class (cls)
1187 given its API representation.
1188
1189 Args:
1190 api_repr (Dict[str, Any]):
1191 API representation of the object to be instantiated.
1192
1193 Returns:
1194 An instance of the class initialized with data from 'resource'.
1195 """
1196 config = cls()
1197 config._properties = api_repr
1198 return config
1199
1200
1201class ExternalCatalogTableOptions:
1202 """Metadata about open source compatible table. The fields contained in these
1203 options correspond to hive metastore's table level properties.
1204
1205 Args:
1206 connection_id (Optional[str]): The connection specifying the credentials to be
1207 used to read external storage, such as Azure Blob, Cloud Storage, or
1208 S3. The connection is needed to read the open source table from
1209 BigQuery Engine. The connection_id can have the form `..` or
1210 `projects//locations//connections/`.
1211 parameters (Union[Dict[str, Any], None]): A map of key value pairs defining the parameters
1212 and properties of the open source table. Corresponds with hive meta
1213 store table parameters. Maximum size of 4Mib.
1214 storage_descriptor (Optional[StorageDescriptor]): A storage descriptor containing information
1215 about the physical storage of this table.
1216 """
1217
1218 def __init__(
1219 self,
1220 connection_id: Optional[str] = None,
1221 parameters: Union[Dict[str, Any], None] = None,
1222 storage_descriptor: Optional[schema.StorageDescriptor] = None,
1223 ):
1224 self._properties: Dict[str, Any] = {}
1225 self.connection_id = connection_id
1226 self.parameters = parameters
1227 self.storage_descriptor = storage_descriptor
1228
1229 @property
1230 def connection_id(self) -> Optional[str]:
1231 """Optional. The connection specifying the credentials to be
1232 used to read external storage, such as Azure Blob, Cloud Storage, or
1233 S3. The connection is needed to read the open source table from
1234 BigQuery Engine. The connection_id can have the form `..` or
1235 `projects//locations//connections/`.
1236 """
1237
1238 return self._properties.get("connectionId")
1239
1240 @connection_id.setter
1241 def connection_id(self, value: Optional[str]):
1242 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
1243 self._properties["connectionId"] = value
1244
1245 @property
1246 def parameters(self) -> Union[Dict[str, Any], None]:
1247 """Optional. A map of key value pairs defining the parameters and
1248 properties of the open source table. Corresponds with hive meta
1249 store table parameters. Maximum size of 4Mib.
1250 """
1251
1252 return self._properties.get("parameters")
1253
1254 @parameters.setter
1255 def parameters(self, value: Union[Dict[str, Any], None]):
1256 value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
1257 self._properties["parameters"] = value
1258
1259 @property
1260 def storage_descriptor(self) -> Any:
1261 """Optional. A storage descriptor containing information about the
1262 physical storage of this table."""
1263
1264 prop = _helpers._get_sub_prop(self._properties, ["storageDescriptor"])
1265
1266 if prop is not None:
1267 return schema.StorageDescriptor.from_api_repr(prop)
1268 return None
1269
1270 @storage_descriptor.setter
1271 def storage_descriptor(self, value: Union[schema.StorageDescriptor, dict, None]):
1272 value = _helpers._isinstance_or_raise(
1273 value, (schema.StorageDescriptor, dict), none_allowed=True
1274 )
1275 if isinstance(value, schema.StorageDescriptor):
1276 self._properties["storageDescriptor"] = value.to_api_repr()
1277 else:
1278 self._properties["storageDescriptor"] = value
1279
1280 def to_api_repr(self) -> dict:
1281 """Build an API representation of this object.
1282
1283 Returns:
1284 Dict[str, Any]:
1285 A dictionary in the format used by the BigQuery API.
1286 """
1287
1288 return self._properties
1289
1290 @classmethod
1291 def from_api_repr(cls, api_repr: dict) -> ExternalCatalogTableOptions:
1292 """Factory: constructs an instance of the class (cls)
1293 given its API representation.
1294
1295 Args:
1296 api_repr (Dict[str, Any]):
1297 API representation of the object to be instantiated.
1298
1299 Returns:
1300 An instance of the class initialized with data from 'api_repr'.
1301 """
1302 config = cls()
1303 config._properties = api_repr
1304 return config