1# Copyright 2017 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Define classes that describe external data sources.
16
17 These are used for both Table.externalDataConfiguration and
18 Job.configuration.query.tableDefinitions.
19"""
20
21from __future__ import absolute_import, annotations
22
23import base64
24import copy
25import typing
26from typing import Any, Dict, FrozenSet, Iterable, Optional, Union
27
28from google.cloud.bigquery._helpers import _to_bytes
29from google.cloud.bigquery._helpers import _bytes_to_json
30from google.cloud.bigquery._helpers import _int_or_none
31from google.cloud.bigquery._helpers import _str_or_none
32from google.cloud.bigquery import _helpers
33from google.cloud.bigquery.enums import SourceColumnMatch
34from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions
35from google.cloud.bigquery import schema
36from google.cloud.bigquery.schema import SchemaField
37
38
39class ExternalSourceFormat(object):
40 """The format for external data files.
41
42 Note that the set of allowed values for external data sources is different
43 than the set used for loading data (see
44 :class:`~google.cloud.bigquery.job.SourceFormat`).
45 """
46
47 CSV = "CSV"
48 """Specifies CSV format."""
49
50 GOOGLE_SHEETS = "GOOGLE_SHEETS"
51 """Specifies Google Sheets format."""
52
53 NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON"
54 """Specifies newline delimited JSON format."""
55
56 AVRO = "AVRO"
57 """Specifies Avro format."""
58
59 DATASTORE_BACKUP = "DATASTORE_BACKUP"
60 """Specifies datastore backup format"""
61
62 ORC = "ORC"
63 """Specifies ORC format."""
64
65 PARQUET = "PARQUET"
66 """Specifies Parquet format."""
67
68 BIGTABLE = "BIGTABLE"
69 """Specifies Bigtable format."""
70
71
72class BigtableColumn(object):
73 """Options for a Bigtable column."""
74
75 def __init__(self):
76 self._properties = {}
77
78 @property
79 def encoding(self):
80 """str: The encoding of the values when the type is not `STRING`
81
82 See
83 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.encoding
84 """
85 return self._properties.get("encoding")
86
87 @encoding.setter
88 def encoding(self, value):
89 self._properties["encoding"] = value
90
91 @property
92 def field_name(self):
93 """str: An identifier to use if the qualifier is not a valid BigQuery
94 field identifier
95
96 See
97 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.field_name
98 """
99 return self._properties.get("fieldName")
100
101 @field_name.setter
102 def field_name(self, value):
103 self._properties["fieldName"] = value
104
105 @property
106 def only_read_latest(self):
107 """bool: If this is set, only the latest version of value in this
108 column are exposed.
109
110 See
111 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.only_read_latest
112 """
113 return self._properties.get("onlyReadLatest")
114
115 @only_read_latest.setter
116 def only_read_latest(self, value):
117 self._properties["onlyReadLatest"] = value
118
119 @property
120 def qualifier_encoded(self):
121 """Union[str, bytes]: The qualifier encoded in binary.
122
123 The type is ``str`` (Python 2.x) or ``bytes`` (Python 3.x). The module
124 will handle base64 encoding for you.
125
126 See
127 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.qualifier_encoded
128 """
129 prop = self._properties.get("qualifierEncoded")
130 if prop is None:
131 return None
132 return base64.standard_b64decode(_to_bytes(prop))
133
134 @qualifier_encoded.setter
135 def qualifier_encoded(self, value):
136 self._properties["qualifierEncoded"] = _bytes_to_json(value)
137
138 @property
139 def qualifier_string(self):
140 """str: A valid UTF-8 string qualifier
141
142 See
143 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.qualifier_string
144 """
145 return self._properties.get("qualifierString")
146
147 @qualifier_string.setter
148 def qualifier_string(self, value):
149 self._properties["qualifierString"] = value
150
151 @property
152 def type_(self):
153 """str: The type to convert the value in cells of this column.
154
155 See
156 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.type
157 """
158 return self._properties.get("type")
159
160 @type_.setter
161 def type_(self, value):
162 self._properties["type"] = value
163
164 def to_api_repr(self) -> dict:
165 """Build an API representation of this object.
166
167 Returns:
168 Dict[str, Any]:
169 A dictionary in the format used by the BigQuery API.
170 """
171 return copy.deepcopy(self._properties)
172
173 @classmethod
174 def from_api_repr(cls, resource: dict) -> "BigtableColumn":
175 """Factory: construct a :class:`~.external_config.BigtableColumn`
176 instance given its API representation.
177
178 Args:
179 resource (Dict[str, Any]):
180 Definition of a :class:`~.external_config.BigtableColumn`
181 instance in the same representation as is returned from the
182 API.
183
184 Returns:
185 external_config.BigtableColumn: Configuration parsed from ``resource``.
186 """
187 config = cls()
188 config._properties = copy.deepcopy(resource)
189 return config
190
191
192class BigtableColumnFamily(object):
193 """Options for a Bigtable column family."""
194
195 def __init__(self):
196 self._properties = {}
197
198 @property
199 def encoding(self):
200 """str: The encoding of the values when the type is not `STRING`
201
202 See
203 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.encoding
204 """
205 return self._properties.get("encoding")
206
207 @encoding.setter
208 def encoding(self, value):
209 self._properties["encoding"] = value
210
211 @property
212 def family_id(self):
213 """str: Identifier of the column family.
214
215 See
216 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.family_id
217 """
218 return self._properties.get("familyId")
219
220 @family_id.setter
221 def family_id(self, value):
222 self._properties["familyId"] = value
223
224 @property
225 def only_read_latest(self):
226 """bool: If this is set only the latest version of value are exposed
227 for all columns in this column family.
228
229 See
230 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.only_read_latest
231 """
232 return self._properties.get("onlyReadLatest")
233
234 @only_read_latest.setter
235 def only_read_latest(self, value):
236 self._properties["onlyReadLatest"] = value
237
238 @property
239 def type_(self):
240 """str: The type to convert the value in cells of this column family.
241
242 See
243 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.type
244 """
245 return self._properties.get("type")
246
247 @type_.setter
248 def type_(self, value):
249 self._properties["type"] = value
250
251 @property
252 def columns(self):
253 """List[BigtableColumn]: Lists of columns
254 that should be exposed as individual fields.
255
256 See
257 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.columns
258 """
259 prop = self._properties.get("columns", [])
260 return [BigtableColumn.from_api_repr(col) for col in prop]
261
262 @columns.setter
263 def columns(self, value):
264 self._properties["columns"] = [col.to_api_repr() for col in value]
265
266 def to_api_repr(self) -> dict:
267 """Build an API representation of this object.
268
269 Returns:
270 Dict[str, Any]:
271 A dictionary in the format used by the BigQuery API.
272 """
273 return copy.deepcopy(self._properties)
274
275 @classmethod
276 def from_api_repr(cls, resource: dict) -> "BigtableColumnFamily":
277 """Factory: construct a :class:`~.external_config.BigtableColumnFamily`
278 instance given its API representation.
279
280 Args:
281 resource (Dict[str, Any]):
282 Definition of a :class:`~.external_config.BigtableColumnFamily`
283 instance in the same representation as is returned from the
284 API.
285
286 Returns:
287 :class:`~.external_config.BigtableColumnFamily`:
288 Configuration parsed from ``resource``.
289 """
290 config = cls()
291 config._properties = copy.deepcopy(resource)
292 return config
293
294
295class BigtableOptions(object):
296 """Options that describe how to treat Bigtable tables as BigQuery tables."""
297
298 _SOURCE_FORMAT = "BIGTABLE"
299 _RESOURCE_NAME = "bigtableOptions"
300
301 def __init__(self):
302 self._properties = {}
303
304 @property
305 def ignore_unspecified_column_families(self):
306 """bool: If :data:`True`, ignore columns not specified in
307 :attr:`column_families` list. Defaults to :data:`False`.
308
309 See
310 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.ignore_unspecified_column_families
311 """
312 return self._properties.get("ignoreUnspecifiedColumnFamilies")
313
314 @ignore_unspecified_column_families.setter
315 def ignore_unspecified_column_families(self, value):
316 self._properties["ignoreUnspecifiedColumnFamilies"] = value
317
318 @property
319 def read_rowkey_as_string(self):
320 """bool: If :data:`True`, rowkey column families will be read and
321 converted to string. Defaults to :data:`False`.
322
323 See
324 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.read_rowkey_as_string
325 """
326 return self._properties.get("readRowkeyAsString")
327
328 @read_rowkey_as_string.setter
329 def read_rowkey_as_string(self, value):
330 self._properties["readRowkeyAsString"] = value
331
332 @property
333 def column_families(self):
334 """List[:class:`~.external_config.BigtableColumnFamily`]: List of
335 column families to expose in the table schema along with their types.
336
337 See
338 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.column_families
339 """
340 prop = self._properties.get("columnFamilies", [])
341 return [BigtableColumnFamily.from_api_repr(cf) for cf in prop]
342
343 @column_families.setter
344 def column_families(self, value):
345 self._properties["columnFamilies"] = [cf.to_api_repr() for cf in value]
346
347 def to_api_repr(self) -> dict:
348 """Build an API representation of this object.
349
350 Returns:
351 Dict[str, Any]:
352 A dictionary in the format used by the BigQuery API.
353 """
354 return copy.deepcopy(self._properties)
355
356 @classmethod
357 def from_api_repr(cls, resource: dict) -> "BigtableOptions":
358 """Factory: construct a :class:`~.external_config.BigtableOptions`
359 instance given its API representation.
360
361 Args:
362 resource (Dict[str, Any]):
363 Definition of a :class:`~.external_config.BigtableOptions`
364 instance in the same representation as is returned from the
365 API.
366
367 Returns:
368 BigtableOptions: Configuration parsed from ``resource``.
369 """
370 config = cls()
371 config._properties = copy.deepcopy(resource)
372 return config
373
374
375class CSVOptions(object):
376 """Options that describe how to treat CSV files as BigQuery tables."""
377
378 _SOURCE_FORMAT = "CSV"
379 _RESOURCE_NAME = "csvOptions"
380
381 def __init__(self):
382 self._properties = {}
383
384 @property
385 def allow_jagged_rows(self):
386 """bool: If :data:`True`, BigQuery treats missing trailing columns as
387 null values. Defaults to :data:`False`.
388
389 See
390 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.allow_jagged_rows
391 """
392 return self._properties.get("allowJaggedRows")
393
394 @allow_jagged_rows.setter
395 def allow_jagged_rows(self, value):
396 self._properties["allowJaggedRows"] = value
397
398 @property
399 def allow_quoted_newlines(self):
400 """bool: If :data:`True`, quoted data sections that contain newline
401 characters in a CSV file are allowed. Defaults to :data:`False`.
402
403 See
404 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.allow_quoted_newlines
405 """
406 return self._properties.get("allowQuotedNewlines")
407
408 @allow_quoted_newlines.setter
409 def allow_quoted_newlines(self, value):
410 self._properties["allowQuotedNewlines"] = value
411
412 @property
413 def encoding(self):
414 """str: The character encoding of the data.
415
416 See
417 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.encoding
418 """
419 return self._properties.get("encoding")
420
421 @encoding.setter
422 def encoding(self, value):
423 self._properties["encoding"] = value
424
425 @property
426 def preserve_ascii_control_characters(self):
427 """bool: Indicates if the embedded ASCII control characters
428 (the first 32 characters in the ASCII-table, from '\x00' to '\x1F') are preserved.
429
430 See
431 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.preserve_ascii_control_characters
432 """
433 return self._properties.get("preserveAsciiControlCharacters")
434
435 @preserve_ascii_control_characters.setter
436 def preserve_ascii_control_characters(self, value):
437 self._properties["preserveAsciiControlCharacters"] = value
438
439 @property
440 def field_delimiter(self):
441 """str: The separator for fields in a CSV file. Defaults to comma (',').
442
443 See
444 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.field_delimiter
445 """
446 return self._properties.get("fieldDelimiter")
447
448 @field_delimiter.setter
449 def field_delimiter(self, value):
450 self._properties["fieldDelimiter"] = value
451
452 @property
453 def quote_character(self):
454 """str: The value that is used to quote data sections in a CSV file.
455
456 See
457 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.quote
458 """
459 return self._properties.get("quote")
460
461 @quote_character.setter
462 def quote_character(self, value):
463 self._properties["quote"] = value
464
465 @property
466 def skip_leading_rows(self):
467 """int: The number of rows at the top of a CSV file.
468
469 See
470 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.skip_leading_rows
471 """
472 return _int_or_none(self._properties.get("skipLeadingRows"))
473
474 @skip_leading_rows.setter
475 def skip_leading_rows(self, value):
476 self._properties["skipLeadingRows"] = str(value)
477
478 @property
479 def source_column_match(self) -> Optional[SourceColumnMatch]:
480 """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the
481 strategy used to match loaded columns to the schema. If not set, a sensible
482 default is chosen based on how the schema is provided. If autodetect is
483 used, then columns are matched by name. Otherwise, columns are matched by
484 position. This is done to keep the behavior backward-compatible.
485
486 Acceptable values are:
487
488 SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option.
489 POSITION: matches by position. This assumes that the columns are ordered
490 the same way as the schema.
491 NAME: matches by name. This reads the header row as column names and
492 reorders columns to match the field names in the schema.
493
494 See
495 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.source_column_match
496 """
497
498 value = self._properties.get("sourceColumnMatch")
499 return SourceColumnMatch(value) if value is not None else None
500
501 @source_column_match.setter
502 def source_column_match(self, value: Union[SourceColumnMatch, str, None]):
503 if value is not None and not isinstance(value, (SourceColumnMatch, str)):
504 raise TypeError(
505 "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None"
506 )
507 if isinstance(value, SourceColumnMatch):
508 value = value.value
509 self._properties["sourceColumnMatch"] = value if value else None
510
511 @property
512 def null_markers(self) -> Optional[Iterable[str]]:
513 """Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file.
514
515 .. note::
516 null_marker and null_markers can't be set at the same time.
517 If null_marker is set, null_markers has to be not set.
518 If null_markers is set, null_marker has to be not set.
519 If both null_marker and null_markers are set at the same time, a user error would be thrown.
520 Any strings listed in null_markers, including empty string would be interpreted as SQL NULL.
521 This applies to all column types.
522
523 See
524 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.null_markers
525 """
526 return self._properties.get("nullMarkers")
527
528 @null_markers.setter
529 def null_markers(self, value: Optional[Iterable[str]]):
530 self._properties["nullMarkers"] = value
531
532 def to_api_repr(self) -> dict:
533 """Build an API representation of this object.
534
535 Returns:
536 Dict[str, Any]: A dictionary in the format used by the BigQuery API.
537 """
538 return copy.deepcopy(self._properties)
539
540 @classmethod
541 def from_api_repr(cls, resource: dict) -> "CSVOptions":
542 """Factory: construct a :class:`~.external_config.CSVOptions` instance
543 given its API representation.
544
545 Args:
546 resource (Dict[str, Any]):
547 Definition of a :class:`~.external_config.CSVOptions`
548 instance in the same representation as is returned from the
549 API.
550
551 Returns:
552 CSVOptions: Configuration parsed from ``resource``.
553 """
554 config = cls()
555 config._properties = copy.deepcopy(resource)
556 return config
557
558
559class GoogleSheetsOptions(object):
560 """Options that describe how to treat Google Sheets as BigQuery tables."""
561
562 _SOURCE_FORMAT = "GOOGLE_SHEETS"
563 _RESOURCE_NAME = "googleSheetsOptions"
564
565 def __init__(self):
566 self._properties = {}
567
568 @property
569 def skip_leading_rows(self):
570 """int: The number of rows at the top of a sheet that BigQuery will
571 skip when reading the data.
572
573 See
574 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions.FIELDS.skip_leading_rows
575 """
576 return _int_or_none(self._properties.get("skipLeadingRows"))
577
578 @skip_leading_rows.setter
579 def skip_leading_rows(self, value):
580 self._properties["skipLeadingRows"] = str(value)
581
582 @property
583 def range(self):
584 """str: The range of a sheet that BigQuery will query from.
585
586 See
587 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions.FIELDS.range
588 """
589 return _str_or_none(self._properties.get("range"))
590
591 @range.setter
592 def range(self, value):
593 self._properties["range"] = value
594
595 def to_api_repr(self) -> dict:
596 """Build an API representation of this object.
597
598 Returns:
599 Dict[str, Any]: A dictionary in the format used by the BigQuery API.
600 """
601 return copy.deepcopy(self._properties)
602
603 @classmethod
604 def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions":
605 """Factory: construct a :class:`~.external_config.GoogleSheetsOptions`
606 instance given its API representation.
607
608 Args:
609 resource (Dict[str, Any]):
610 Definition of a :class:`~.external_config.GoogleSheetsOptions`
611 instance in the same representation as is returned from the
612 API.
613
614 Returns:
615 GoogleSheetsOptions: Configuration parsed from ``resource``.
616 """
617 config = cls()
618 config._properties = copy.deepcopy(resource)
619 return config
620
621
622_OPTION_CLASSES = (
623 AvroOptions,
624 BigtableOptions,
625 CSVOptions,
626 GoogleSheetsOptions,
627 ParquetOptions,
628)
629
630OptionsType = Union[
631 AvroOptions,
632 BigtableOptions,
633 CSVOptions,
634 GoogleSheetsOptions,
635 ParquetOptions,
636]
637
638
639class HivePartitioningOptions(object):
640 """[Beta] Options that configure hive partitioning.
641
642 .. note::
643 **Experimental**. This feature is experimental and might change or
644 have limited support.
645
646 See
647 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions
648 """
649
650 def __init__(self) -> None:
651 self._properties: Dict[str, Any] = {}
652
653 @property
654 def mode(self):
655 """Optional[str]: When set, what mode of hive partitioning to use when reading data.
656
657 Two modes are supported: "AUTO" and "STRINGS".
658
659 See
660 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode
661 """
662 return self._properties.get("mode")
663
664 @mode.setter
665 def mode(self, value):
666 self._properties["mode"] = value
667
668 @property
669 def source_uri_prefix(self):
670 """Optional[str]: When hive partition detection is requested, a common prefix for
671 all source URIs is required.
672
673 See
674 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.source_uri_prefix
675 """
676 return self._properties.get("sourceUriPrefix")
677
678 @source_uri_prefix.setter
679 def source_uri_prefix(self, value):
680 self._properties["sourceUriPrefix"] = value
681
682 @property
683 def require_partition_filter(self):
684 """Optional[bool]: If set to true, queries over the partitioned table require a
685 partition filter that can be used for partition elimination to be
686 specified.
687
688 See
689 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode
690 """
691 return self._properties.get("requirePartitionFilter")
692
693 @require_partition_filter.setter
694 def require_partition_filter(self, value):
695 self._properties["requirePartitionFilter"] = value
696
697 def to_api_repr(self) -> dict:
698 """Build an API representation of this object.
699
700 Returns:
701 Dict[str, Any]: A dictionary in the format used by the BigQuery API.
702 """
703 return copy.deepcopy(self._properties)
704
705 @classmethod
706 def from_api_repr(cls, resource: dict) -> "HivePartitioningOptions":
707 """Factory: construct a :class:`~.external_config.HivePartitioningOptions`
708 instance given its API representation.
709
710 Args:
711 resource (Dict[str, Any]):
712 Definition of a :class:`~.external_config.HivePartitioningOptions`
713 instance in the same representation as is returned from the
714 API.
715
716 Returns:
717 HivePartitioningOptions: Configuration parsed from ``resource``.
718 """
719 config = cls()
720 config._properties = copy.deepcopy(resource)
721 return config
722
723
724class ExternalConfig(object):
725 """Description of an external data source.
726
727 Args:
728 source_format (ExternalSourceFormat):
729 See :attr:`source_format`.
730 """
731
732 def __init__(self, source_format) -> None:
733 self._properties = {"sourceFormat": source_format}
734
735 @property
736 def source_format(self):
737 """:class:`~.external_config.ExternalSourceFormat`:
738 Format of external source.
739
740 See
741 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_format
742 """
743 return self._properties["sourceFormat"]
744
745 @property
746 def options(self) -> Optional[OptionsType]:
747 """Source-specific options."""
748 for optcls in _OPTION_CLASSES:
749 # The code below is too much magic for mypy to handle.
750 if self.source_format == optcls._SOURCE_FORMAT: # type: ignore
751 options: OptionsType = optcls() # type: ignore
752 options._properties = self._properties.setdefault(
753 optcls._RESOURCE_NAME, {} # type: ignore
754 )
755 return options
756
757 # No matching source format found.
758 return None
759
760 @property
761 def autodetect(self):
762 """bool: If :data:`True`, try to detect schema and format options
763 automatically.
764
765 See
766 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.autodetect
767 """
768 return self._properties.get("autodetect")
769
770 @autodetect.setter
771 def autodetect(self, value):
772 self._properties["autodetect"] = value
773
774 @property
775 def compression(self):
776 """str: The compression type of the data source.
777
778 See
779 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.compression
780 """
781 return self._properties.get("compression")
782
783 @compression.setter
784 def compression(self, value):
785 self._properties["compression"] = value
786
787 @property
788 def decimal_target_types(self) -> Optional[FrozenSet[str]]:
789 """Possible SQL data types to which the source decimal values are converted.
790
791 See:
792 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.decimal_target_types
793
794 .. versionadded:: 2.21.0
795 """
796 prop = self._properties.get("decimalTargetTypes")
797 if prop is not None:
798 prop = frozenset(prop)
799 return prop
800
801 @decimal_target_types.setter
802 def decimal_target_types(self, value: Optional[Iterable[str]]):
803 if value is not None:
804 self._properties["decimalTargetTypes"] = list(value)
805 else:
806 if "decimalTargetTypes" in self._properties:
807 del self._properties["decimalTargetTypes"]
808
809 @property
810 def hive_partitioning(self):
811 """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \
812 it configures hive partitioning support.
813
814 .. note::
815 **Experimental**. This feature is experimental and might change or
816 have limited support.
817
818 See
819 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options
820 """
821 prop = self._properties.get("hivePartitioningOptions")
822 if prop is None:
823 return None
824 return HivePartitioningOptions.from_api_repr(prop)
825
826 @hive_partitioning.setter
827 def hive_partitioning(self, value):
828 prop = value.to_api_repr() if value is not None else None
829 self._properties["hivePartitioningOptions"] = prop
830
831 @property
832 def reference_file_schema_uri(self):
833 """Optional[str]:
834 When creating an external table, the user can provide a reference file with the
835 table schema. This is enabled for the following formats:
836
837 AVRO, PARQUET, ORC
838 """
839 return self._properties.get("referenceFileSchemaUri")
840
841 @reference_file_schema_uri.setter
842 def reference_file_schema_uri(self, value):
843 self._properties["referenceFileSchemaUri"] = value
844
845 @property
846 def ignore_unknown_values(self):
847 """bool: If :data:`True`, extra values that are not represented in the
848 table schema are ignored. Defaults to :data:`False`.
849
850 See
851 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.ignore_unknown_values
852 """
853 return self._properties.get("ignoreUnknownValues")
854
855 @ignore_unknown_values.setter
856 def ignore_unknown_values(self, value):
857 self._properties["ignoreUnknownValues"] = value
858
859 @property
860 def max_bad_records(self):
861 """int: The maximum number of bad records that BigQuery can ignore when
862 reading data.
863
864 See
865 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.max_bad_records
866 """
867 return self._properties.get("maxBadRecords")
868
869 @max_bad_records.setter
870 def max_bad_records(self, value):
871 self._properties["maxBadRecords"] = value
872
873 @property
874 def source_uris(self):
875 """List[str]: URIs that point to your data in Google Cloud.
876
877 See
878 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_uris
879 """
880 return self._properties.get("sourceUris", [])
881
882 @source_uris.setter
883 def source_uris(self, value):
884 self._properties["sourceUris"] = value
885
886 @property
887 def schema(self):
888 """List[:class:`~google.cloud.bigquery.schema.SchemaField`]: The schema
889 for the data.
890
891 See
892 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema
893 """
894 prop: Dict[str, Any] = typing.cast(
895 Dict[str, Any], self._properties.get("schema", {})
896 )
897 return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])]
898
899 @schema.setter
900 def schema(self, value):
901 prop = value
902 if value is not None:
903 prop = {"fields": [field.to_api_repr() for field in value]}
904 self._properties["schema"] = prop
905
906 @property
907 def date_format(self) -> Optional[str]:
908 """Optional[str]: Format used to parse DATE values. Supports C-style and SQL-style values.
909
910 See:
911 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.date_format
912 """
913 result = self._properties.get("dateFormat")
914 return typing.cast(str, result)
915
916 @date_format.setter
917 def date_format(self, value: Optional[str]):
918 self._properties["dateFormat"] = value
919
920 @property
921 def datetime_format(self) -> Optional[str]:
922 """Optional[str]: Format used to parse DATETIME values. Supports C-style
923 and SQL-style values.
924
925 See:
926 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.datetime_format
927 """
928 result = self._properties.get("datetimeFormat")
929 return typing.cast(str, result)
930
931 @datetime_format.setter
932 def datetime_format(self, value: Optional[str]):
933 self._properties["datetimeFormat"] = value
934
935 @property
936 def time_zone(self) -> Optional[str]:
937 """Optional[str]: Time zone used when parsing timestamp values that do not
938 have specific time zone information (e.g. 2024-04-20 12:34:56). The expected
939 format is an IANA timezone string (e.g. America/Los_Angeles).
940
941 See:
942 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_zone
943 """
944
945 result = self._properties.get("timeZone")
946 return typing.cast(str, result)
947
948 @time_zone.setter
949 def time_zone(self, value: Optional[str]):
950 self._properties["timeZone"] = value
951
952 @property
953 def time_format(self) -> Optional[str]:
954 """Optional[str]: Format used to parse TIME values. Supports C-style and SQL-style values.
955
956 See:
957 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_format
958 """
959 result = self._properties.get("timeFormat")
960 return typing.cast(str, result)
961
962 @time_format.setter
963 def time_format(self, value: Optional[str]):
964 self._properties["timeFormat"] = value
965
966 @property
967 def timestamp_format(self) -> Optional[str]:
968 """Optional[str]: Format used to parse TIMESTAMP values. Supports C-style and SQL-style values.
969
970 See:
971 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.timestamp_format
972 """
973 result = self._properties.get("timestampFormat")
974 return typing.cast(str, result)
975
976 @timestamp_format.setter
977 def timestamp_format(self, value: Optional[str]):
978 self._properties["timestampFormat"] = value
979
980 @property
981 def connection_id(self):
982 """Optional[str]: [Experimental] ID of a BigQuery Connection API
983 resource.
984
985 .. WARNING::
986
987 This feature is experimental. Pre-GA features may have limited
988 support, and changes to pre-GA features may not be compatible with
989 other pre-GA versions.
990 """
991 return self._properties.get("connectionId")
992
993 @connection_id.setter
994 def connection_id(self, value):
995 self._properties["connectionId"] = value
996
997 @property
998 def avro_options(self) -> Optional[AvroOptions]:
999 """Additional properties to set if ``sourceFormat`` is set to AVRO.
1000
1001 See:
1002 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.avro_options
1003 """
1004 if self.source_format == ExternalSourceFormat.AVRO:
1005 self._properties.setdefault(AvroOptions._RESOURCE_NAME, {})
1006 resource = self._properties.get(AvroOptions._RESOURCE_NAME)
1007 if resource is None:
1008 return None
1009 options = AvroOptions()
1010 options._properties = resource
1011 return options
1012
1013 @avro_options.setter
1014 def avro_options(self, value):
1015 if self.source_format != ExternalSourceFormat.AVRO:
1016 msg = f"Cannot set Avro options, source format is {self.source_format}"
1017 raise TypeError(msg)
1018 self._properties[AvroOptions._RESOURCE_NAME] = value._properties
1019
1020 @property
1021 def bigtable_options(self) -> Optional[BigtableOptions]:
1022 """Additional properties to set if ``sourceFormat`` is set to BIGTABLE.
1023
1024 See:
1025 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.bigtable_options
1026 """
1027 if self.source_format == ExternalSourceFormat.BIGTABLE:
1028 self._properties.setdefault(BigtableOptions._RESOURCE_NAME, {})
1029 resource = self._properties.get(BigtableOptions._RESOURCE_NAME)
1030 if resource is None:
1031 return None
1032 options = BigtableOptions()
1033 options._properties = resource
1034 return options
1035
1036 @bigtable_options.setter
1037 def bigtable_options(self, value):
1038 if self.source_format != ExternalSourceFormat.BIGTABLE:
1039 msg = f"Cannot set Bigtable options, source format is {self.source_format}"
1040 raise TypeError(msg)
1041 self._properties[BigtableOptions._RESOURCE_NAME] = value._properties
1042
1043 @property
1044 def csv_options(self) -> Optional[CSVOptions]:
1045 """Additional properties to set if ``sourceFormat`` is set to CSV.
1046
1047 See:
1048 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.csv_options
1049 """
1050 if self.source_format == ExternalSourceFormat.CSV:
1051 self._properties.setdefault(CSVOptions._RESOURCE_NAME, {})
1052 resource = self._properties.get(CSVOptions._RESOURCE_NAME)
1053 if resource is None:
1054 return None
1055 options = CSVOptions()
1056 options._properties = resource
1057 return options
1058
1059 @csv_options.setter
1060 def csv_options(self, value):
1061 if self.source_format != ExternalSourceFormat.CSV:
1062 msg = f"Cannot set CSV options, source format is {self.source_format}"
1063 raise TypeError(msg)
1064 self._properties[CSVOptions._RESOURCE_NAME] = value._properties
1065
1066 @property
1067 def google_sheets_options(self) -> Optional[GoogleSheetsOptions]:
1068 """Additional properties to set if ``sourceFormat`` is set to
1069 GOOGLE_SHEETS.
1070
1071 See:
1072 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.google_sheets_options
1073 """
1074 if self.source_format == ExternalSourceFormat.GOOGLE_SHEETS:
1075 self._properties.setdefault(GoogleSheetsOptions._RESOURCE_NAME, {})
1076 resource = self._properties.get(GoogleSheetsOptions._RESOURCE_NAME)
1077 if resource is None:
1078 return None
1079 options = GoogleSheetsOptions()
1080 options._properties = resource
1081 return options
1082
1083 @google_sheets_options.setter
1084 def google_sheets_options(self, value):
1085 if self.source_format != ExternalSourceFormat.GOOGLE_SHEETS:
1086 msg = f"Cannot set Google Sheets options, source format is {self.source_format}"
1087 raise TypeError(msg)
1088 self._properties[GoogleSheetsOptions._RESOURCE_NAME] = value._properties
1089
1090 @property
1091 def parquet_options(self) -> Optional[ParquetOptions]:
1092 """Additional properties to set if ``sourceFormat`` is set to PARQUET.
1093
1094 See:
1095 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.parquet_options
1096 """
1097 if self.source_format == ExternalSourceFormat.PARQUET:
1098 self._properties.setdefault(ParquetOptions._RESOURCE_NAME, {})
1099 resource = self._properties.get(ParquetOptions._RESOURCE_NAME)
1100 if resource is None:
1101 return None
1102 options = ParquetOptions()
1103 options._properties = resource
1104 return options
1105
1106 @parquet_options.setter
1107 def parquet_options(self, value):
1108 if self.source_format != ExternalSourceFormat.PARQUET:
1109 msg = f"Cannot set Parquet options, source format is {self.source_format}"
1110 raise TypeError(msg)
1111 self._properties[ParquetOptions._RESOURCE_NAME] = value._properties
1112
1113 def to_api_repr(self) -> dict:
1114 """Build an API representation of this object.
1115
1116 Returns:
1117 Dict[str, Any]:
1118 A dictionary in the format used by the BigQuery API.
1119 """
1120 config = copy.deepcopy(self._properties)
1121 return config
1122
1123 @classmethod
1124 def from_api_repr(cls, resource: dict) -> "ExternalConfig":
1125 """Factory: construct an :class:`~.external_config.ExternalConfig`
1126 instance given its API representation.
1127
1128 Args:
1129 resource (Dict[str, Any]):
1130 Definition of an :class:`~.external_config.ExternalConfig`
1131 instance in the same representation as is returned from the
1132 API.
1133
1134 Returns:
1135 ExternalConfig: Configuration parsed from ``resource``.
1136 """
1137 config = cls(resource["sourceFormat"])
1138 config._properties = copy.deepcopy(resource)
1139 return config
1140
1141
1142class ExternalCatalogDatasetOptions:
1143 """Options defining open source compatible datasets living in the BigQuery catalog.
1144 Contains metadata of open source database, schema or namespace represented
1145 by the current dataset.
1146
1147 Args:
1148 default_storage_location_uri (Optional[str]): The storage location URI for all
1149 tables in the dataset. Equivalent to hive metastore's database
1150 locationUri. Maximum length of 1024 characters. (str)
1151 parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters
1152 and properties of the open source schema. Maximum size of 2Mib.
1153 """
1154
1155 def __init__(
1156 self,
1157 default_storage_location_uri: Optional[str] = None,
1158 parameters: Optional[Dict[str, Any]] = None,
1159 ):
1160 self._properties: Dict[str, Any] = {}
1161 self.default_storage_location_uri = default_storage_location_uri
1162 self.parameters = parameters
1163
1164 @property
1165 def default_storage_location_uri(self) -> Optional[str]:
1166 """Optional. The storage location URI for all tables in the dataset.
1167 Equivalent to hive metastore's database locationUri. Maximum length of
1168 1024 characters."""
1169
1170 return self._properties.get("defaultStorageLocationUri")
1171
1172 @default_storage_location_uri.setter
1173 def default_storage_location_uri(self, value: Optional[str]):
1174 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
1175 self._properties["defaultStorageLocationUri"] = value
1176
1177 @property
1178 def parameters(self) -> Optional[Dict[str, Any]]:
1179 """Optional. A map of key value pairs defining the parameters and
1180 properties of the open source schema. Maximum size of 2Mib."""
1181
1182 return self._properties.get("parameters")
1183
1184 @parameters.setter
1185 def parameters(self, value: Optional[Dict[str, Any]]):
1186 value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
1187 self._properties["parameters"] = value
1188
1189 def to_api_repr(self) -> dict:
1190 """Build an API representation of this object.
1191
1192 Returns:
1193 Dict[str, Any]:
1194 A dictionary in the format used by the BigQuery API.
1195 """
1196 return self._properties
1197
1198 @classmethod
1199 def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions:
1200 """Factory: constructs an instance of the class (cls)
1201 given its API representation.
1202
1203 Args:
1204 api_repr (Dict[str, Any]):
1205 API representation of the object to be instantiated.
1206
1207 Returns:
1208 An instance of the class initialized with data from 'resource'.
1209 """
1210 config = cls()
1211 config._properties = api_repr
1212 return config
1213
1214
1215class ExternalCatalogTableOptions:
1216 """Metadata about open source compatible table. The fields contained in these
1217 options correspond to hive metastore's table level properties.
1218
1219 Args:
1220 connection_id (Optional[str]): The connection specifying the credentials to be
1221 used to read external storage, such as Azure Blob, Cloud Storage, or
1222 S3. The connection is needed to read the open source table from
1223 BigQuery Engine. The connection_id can have the form `..` or
1224 `projects//locations//connections/`.
1225 parameters (Union[Dict[str, Any], None]): A map of key value pairs defining the parameters
1226 and properties of the open source table. Corresponds with hive meta
1227 store table parameters. Maximum size of 4Mib.
1228 storage_descriptor (Optional[StorageDescriptor]): A storage descriptor containing information
1229 about the physical storage of this table.
1230 """
1231
1232 def __init__(
1233 self,
1234 connection_id: Optional[str] = None,
1235 parameters: Union[Dict[str, Any], None] = None,
1236 storage_descriptor: Optional[schema.StorageDescriptor] = None,
1237 ):
1238 self._properties: Dict[str, Any] = {}
1239 self.connection_id = connection_id
1240 self.parameters = parameters
1241 self.storage_descriptor = storage_descriptor
1242
1243 @property
1244 def connection_id(self) -> Optional[str]:
1245 """Optional. The connection specifying the credentials to be
1246 used to read external storage, such as Azure Blob, Cloud Storage, or
1247 S3. The connection is needed to read the open source table from
1248 BigQuery Engine. The connection_id can have the form `..` or
1249 `projects//locations//connections/`.
1250 """
1251
1252 return self._properties.get("connectionId")
1253
1254 @connection_id.setter
1255 def connection_id(self, value: Optional[str]):
1256 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
1257 self._properties["connectionId"] = value
1258
1259 @property
1260 def parameters(self) -> Union[Dict[str, Any], None]:
1261 """Optional. A map of key value pairs defining the parameters and
1262 properties of the open source table. Corresponds with hive meta
1263 store table parameters. Maximum size of 4Mib.
1264 """
1265
1266 return self._properties.get("parameters")
1267
1268 @parameters.setter
1269 def parameters(self, value: Union[Dict[str, Any], None]):
1270 value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
1271 self._properties["parameters"] = value
1272
1273 @property
1274 def storage_descriptor(self) -> Any:
1275 """Optional. A storage descriptor containing information about the
1276 physical storage of this table."""
1277
1278 prop = _helpers._get_sub_prop(self._properties, ["storageDescriptor"])
1279
1280 if prop is not None:
1281 return schema.StorageDescriptor.from_api_repr(prop)
1282 return None
1283
1284 @storage_descriptor.setter
1285 def storage_descriptor(self, value: Union[schema.StorageDescriptor, dict, None]):
1286 value = _helpers._isinstance_or_raise(
1287 value, (schema.StorageDescriptor, dict), none_allowed=True
1288 )
1289 if isinstance(value, schema.StorageDescriptor):
1290 self._properties["storageDescriptor"] = value.to_api_repr()
1291 else:
1292 self._properties["storageDescriptor"] = value
1293
1294 def to_api_repr(self) -> dict:
1295 """Build an API representation of this object.
1296
1297 Returns:
1298 Dict[str, Any]:
1299 A dictionary in the format used by the BigQuery API.
1300 """
1301
1302 return self._properties
1303
1304 @classmethod
1305 def from_api_repr(cls, api_repr: dict) -> ExternalCatalogTableOptions:
1306 """Factory: constructs an instance of the class (cls)
1307 given its API representation.
1308
1309 Args:
1310 api_repr (Dict[str, Any]):
1311 API representation of the object to be instantiated.
1312
1313 Returns:
1314 An instance of the class initialized with data from 'api_repr'.
1315 """
1316 config = cls()
1317 config._properties = api_repr
1318 return config