1# Copyright 2017 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Define classes that describe external data sources.
16
17 These are used for both Table.externalDataConfiguration and
18 Job.configuration.query.tableDefinitions.
19"""
20
21from __future__ import absolute_import, annotations
22
23import base64
24import copy
25import typing
26from typing import Any, Dict, FrozenSet, Iterable, Optional, Union
27
28from google.cloud.bigquery._helpers import _to_bytes
29from google.cloud.bigquery._helpers import _bytes_to_json
30from google.cloud.bigquery._helpers import _int_or_none
31from google.cloud.bigquery._helpers import _str_or_none
32from google.cloud.bigquery import _helpers
33from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions
34from google.cloud.bigquery import schema
35from google.cloud.bigquery.schema import SchemaField
36
37
38class ExternalSourceFormat(object):
39 """The format for external data files.
40
41 Note that the set of allowed values for external data sources is different
42 than the set used for loading data (see
43 :class:`~google.cloud.bigquery.job.SourceFormat`).
44 """
45
46 CSV = "CSV"
47 """Specifies CSV format."""
48
49 GOOGLE_SHEETS = "GOOGLE_SHEETS"
50 """Specifies Google Sheets format."""
51
52 NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON"
53 """Specifies newline delimited JSON format."""
54
55 AVRO = "AVRO"
56 """Specifies Avro format."""
57
58 DATASTORE_BACKUP = "DATASTORE_BACKUP"
59 """Specifies datastore backup format"""
60
61 ORC = "ORC"
62 """Specifies ORC format."""
63
64 PARQUET = "PARQUET"
65 """Specifies Parquet format."""
66
67 BIGTABLE = "BIGTABLE"
68 """Specifies Bigtable format."""
69
70
71class BigtableColumn(object):
72 """Options for a Bigtable column."""
73
74 def __init__(self):
75 self._properties = {}
76
77 @property
78 def encoding(self):
79 """str: The encoding of the values when the type is not `STRING`
80
81 See
82 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.encoding
83 """
84 return self._properties.get("encoding")
85
86 @encoding.setter
87 def encoding(self, value):
88 self._properties["encoding"] = value
89
90 @property
91 def field_name(self):
92 """str: An identifier to use if the qualifier is not a valid BigQuery
93 field identifier
94
95 See
96 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.field_name
97 """
98 return self._properties.get("fieldName")
99
100 @field_name.setter
101 def field_name(self, value):
102 self._properties["fieldName"] = value
103
104 @property
105 def only_read_latest(self):
106 """bool: If this is set, only the latest version of value in this
107 column are exposed.
108
109 See
110 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.only_read_latest
111 """
112 return self._properties.get("onlyReadLatest")
113
114 @only_read_latest.setter
115 def only_read_latest(self, value):
116 self._properties["onlyReadLatest"] = value
117
118 @property
119 def qualifier_encoded(self):
120 """Union[str, bytes]: The qualifier encoded in binary.
121
122 The type is ``str`` (Python 2.x) or ``bytes`` (Python 3.x). The module
123 will handle base64 encoding for you.
124
125 See
126 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.qualifier_encoded
127 """
128 prop = self._properties.get("qualifierEncoded")
129 if prop is None:
130 return None
131 return base64.standard_b64decode(_to_bytes(prop))
132
133 @qualifier_encoded.setter
134 def qualifier_encoded(self, value):
135 self._properties["qualifierEncoded"] = _bytes_to_json(value)
136
137 @property
138 def qualifier_string(self):
139 """str: A valid UTF-8 string qualifier
140
141 See
142 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.qualifier_string
143 """
144 return self._properties.get("qualifierString")
145
146 @qualifier_string.setter
147 def qualifier_string(self, value):
148 self._properties["qualifierString"] = value
149
150 @property
151 def type_(self):
152 """str: The type to convert the value in cells of this column.
153
154 See
155 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.type
156 """
157 return self._properties.get("type")
158
159 @type_.setter
160 def type_(self, value):
161 self._properties["type"] = value
162
163 def to_api_repr(self) -> dict:
164 """Build an API representation of this object.
165
166 Returns:
167 Dict[str, Any]:
168 A dictionary in the format used by the BigQuery API.
169 """
170 return copy.deepcopy(self._properties)
171
172 @classmethod
173 def from_api_repr(cls, resource: dict) -> "BigtableColumn":
174 """Factory: construct a :class:`~.external_config.BigtableColumn`
175 instance given its API representation.
176
177 Args:
178 resource (Dict[str, Any]):
179 Definition of a :class:`~.external_config.BigtableColumn`
180 instance in the same representation as is returned from the
181 API.
182
183 Returns:
184 external_config.BigtableColumn: Configuration parsed from ``resource``.
185 """
186 config = cls()
187 config._properties = copy.deepcopy(resource)
188 return config
189
190
191class BigtableColumnFamily(object):
192 """Options for a Bigtable column family."""
193
194 def __init__(self):
195 self._properties = {}
196
197 @property
198 def encoding(self):
199 """str: The encoding of the values when the type is not `STRING`
200
201 See
202 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.encoding
203 """
204 return self._properties.get("encoding")
205
206 @encoding.setter
207 def encoding(self, value):
208 self._properties["encoding"] = value
209
210 @property
211 def family_id(self):
212 """str: Identifier of the column family.
213
214 See
215 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.family_id
216 """
217 return self._properties.get("familyId")
218
219 @family_id.setter
220 def family_id(self, value):
221 self._properties["familyId"] = value
222
223 @property
224 def only_read_latest(self):
225 """bool: If this is set only the latest version of value are exposed
226 for all columns in this column family.
227
228 See
229 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.only_read_latest
230 """
231 return self._properties.get("onlyReadLatest")
232
233 @only_read_latest.setter
234 def only_read_latest(self, value):
235 self._properties["onlyReadLatest"] = value
236
237 @property
238 def type_(self):
239 """str: The type to convert the value in cells of this column family.
240
241 See
242 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.type
243 """
244 return self._properties.get("type")
245
246 @type_.setter
247 def type_(self, value):
248 self._properties["type"] = value
249
250 @property
251 def columns(self):
252 """List[BigtableColumn]: Lists of columns
253 that should be exposed as individual fields.
254
255 See
256 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.columns
257 """
258 prop = self._properties.get("columns", [])
259 return [BigtableColumn.from_api_repr(col) for col in prop]
260
261 @columns.setter
262 def columns(self, value):
263 self._properties["columns"] = [col.to_api_repr() for col in value]
264
265 def to_api_repr(self) -> dict:
266 """Build an API representation of this object.
267
268 Returns:
269 Dict[str, Any]:
270 A dictionary in the format used by the BigQuery API.
271 """
272 return copy.deepcopy(self._properties)
273
274 @classmethod
275 def from_api_repr(cls, resource: dict) -> "BigtableColumnFamily":
276 """Factory: construct a :class:`~.external_config.BigtableColumnFamily`
277 instance given its API representation.
278
279 Args:
280 resource (Dict[str, Any]):
281 Definition of a :class:`~.external_config.BigtableColumnFamily`
282 instance in the same representation as is returned from the
283 API.
284
285 Returns:
286 :class:`~.external_config.BigtableColumnFamily`:
287 Configuration parsed from ``resource``.
288 """
289 config = cls()
290 config._properties = copy.deepcopy(resource)
291 return config
292
293
294class BigtableOptions(object):
295 """Options that describe how to treat Bigtable tables as BigQuery tables."""
296
297 _SOURCE_FORMAT = "BIGTABLE"
298 _RESOURCE_NAME = "bigtableOptions"
299
300 def __init__(self):
301 self._properties = {}
302
303 @property
304 def ignore_unspecified_column_families(self):
305 """bool: If :data:`True`, ignore columns not specified in
306 :attr:`column_families` list. Defaults to :data:`False`.
307
308 See
309 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.ignore_unspecified_column_families
310 """
311 return self._properties.get("ignoreUnspecifiedColumnFamilies")
312
313 @ignore_unspecified_column_families.setter
314 def ignore_unspecified_column_families(self, value):
315 self._properties["ignoreUnspecifiedColumnFamilies"] = value
316
317 @property
318 def read_rowkey_as_string(self):
319 """bool: If :data:`True`, rowkey column families will be read and
320 converted to string. Defaults to :data:`False`.
321
322 See
323 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.read_rowkey_as_string
324 """
325 return self._properties.get("readRowkeyAsString")
326
327 @read_rowkey_as_string.setter
328 def read_rowkey_as_string(self, value):
329 self._properties["readRowkeyAsString"] = value
330
331 @property
332 def column_families(self):
333 """List[:class:`~.external_config.BigtableColumnFamily`]: List of
334 column families to expose in the table schema along with their types.
335
336 See
337 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.column_families
338 """
339 prop = self._properties.get("columnFamilies", [])
340 return [BigtableColumnFamily.from_api_repr(cf) for cf in prop]
341
342 @column_families.setter
343 def column_families(self, value):
344 self._properties["columnFamilies"] = [cf.to_api_repr() for cf in value]
345
346 def to_api_repr(self) -> dict:
347 """Build an API representation of this object.
348
349 Returns:
350 Dict[str, Any]:
351 A dictionary in the format used by the BigQuery API.
352 """
353 return copy.deepcopy(self._properties)
354
355 @classmethod
356 def from_api_repr(cls, resource: dict) -> "BigtableOptions":
357 """Factory: construct a :class:`~.external_config.BigtableOptions`
358 instance given its API representation.
359
360 Args:
361 resource (Dict[str, Any]):
362 Definition of a :class:`~.external_config.BigtableOptions`
363 instance in the same representation as is returned from the
364 API.
365
366 Returns:
367 BigtableOptions: Configuration parsed from ``resource``.
368 """
369 config = cls()
370 config._properties = copy.deepcopy(resource)
371 return config
372
373
374class CSVOptions(object):
375 """Options that describe how to treat CSV files as BigQuery tables."""
376
377 _SOURCE_FORMAT = "CSV"
378 _RESOURCE_NAME = "csvOptions"
379
380 def __init__(self):
381 self._properties = {}
382
383 @property
384 def allow_jagged_rows(self):
385 """bool: If :data:`True`, BigQuery treats missing trailing columns as
386 null values. Defaults to :data:`False`.
387
388 See
389 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.allow_jagged_rows
390 """
391 return self._properties.get("allowJaggedRows")
392
393 @allow_jagged_rows.setter
394 def allow_jagged_rows(self, value):
395 self._properties["allowJaggedRows"] = value
396
397 @property
398 def allow_quoted_newlines(self):
399 """bool: If :data:`True`, quoted data sections that contain newline
400 characters in a CSV file are allowed. Defaults to :data:`False`.
401
402 See
403 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.allow_quoted_newlines
404 """
405 return self._properties.get("allowQuotedNewlines")
406
407 @allow_quoted_newlines.setter
408 def allow_quoted_newlines(self, value):
409 self._properties["allowQuotedNewlines"] = value
410
411 @property
412 def encoding(self):
413 """str: The character encoding of the data.
414
415 See
416 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.encoding
417 """
418 return self._properties.get("encoding")
419
420 @encoding.setter
421 def encoding(self, value):
422 self._properties["encoding"] = value
423
424 @property
425 def preserve_ascii_control_characters(self):
426 """bool: Indicates if the embedded ASCII control characters
427 (the first 32 characters in the ASCII-table, from '\x00' to '\x1F') are preserved.
428
429 See
430 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.preserve_ascii_control_characters
431 """
432 return self._properties.get("preserveAsciiControlCharacters")
433
434 @preserve_ascii_control_characters.setter
435 def preserve_ascii_control_characters(self, value):
436 self._properties["preserveAsciiControlCharacters"] = value
437
438 @property
439 def field_delimiter(self):
440 """str: The separator for fields in a CSV file. Defaults to comma (',').
441
442 See
443 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.field_delimiter
444 """
445 return self._properties.get("fieldDelimiter")
446
447 @field_delimiter.setter
448 def field_delimiter(self, value):
449 self._properties["fieldDelimiter"] = value
450
451 @property
452 def quote_character(self):
453 """str: The value that is used to quote data sections in a CSV file.
454
455 See
456 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.quote
457 """
458 return self._properties.get("quote")
459
460 @quote_character.setter
461 def quote_character(self, value):
462 self._properties["quote"] = value
463
464 @property
465 def skip_leading_rows(self):
466 """int: The number of rows at the top of a CSV file.
467
468 See
469 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.skip_leading_rows
470 """
471 return _int_or_none(self._properties.get("skipLeadingRows"))
472
473 @skip_leading_rows.setter
474 def skip_leading_rows(self, value):
475 self._properties["skipLeadingRows"] = str(value)
476
477 def to_api_repr(self) -> dict:
478 """Build an API representation of this object.
479
480 Returns:
481 Dict[str, Any]: A dictionary in the format used by the BigQuery API.
482 """
483 return copy.deepcopy(self._properties)
484
485 @classmethod
486 def from_api_repr(cls, resource: dict) -> "CSVOptions":
487 """Factory: construct a :class:`~.external_config.CSVOptions` instance
488 given its API representation.
489
490 Args:
491 resource (Dict[str, Any]):
492 Definition of a :class:`~.external_config.CSVOptions`
493 instance in the same representation as is returned from the
494 API.
495
496 Returns:
497 CSVOptions: Configuration parsed from ``resource``.
498 """
499 config = cls()
500 config._properties = copy.deepcopy(resource)
501 return config
502
503
504class GoogleSheetsOptions(object):
505 """Options that describe how to treat Google Sheets as BigQuery tables."""
506
507 _SOURCE_FORMAT = "GOOGLE_SHEETS"
508 _RESOURCE_NAME = "googleSheetsOptions"
509
510 def __init__(self):
511 self._properties = {}
512
513 @property
514 def skip_leading_rows(self):
515 """int: The number of rows at the top of a sheet that BigQuery will
516 skip when reading the data.
517
518 See
519 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions.FIELDS.skip_leading_rows
520 """
521 return _int_or_none(self._properties.get("skipLeadingRows"))
522
523 @skip_leading_rows.setter
524 def skip_leading_rows(self, value):
525 self._properties["skipLeadingRows"] = str(value)
526
527 @property
528 def range(self):
529 """str: The range of a sheet that BigQuery will query from.
530
531 See
532 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions.FIELDS.range
533 """
534 return _str_or_none(self._properties.get("range"))
535
536 @range.setter
537 def range(self, value):
538 self._properties["range"] = value
539
540 def to_api_repr(self) -> dict:
541 """Build an API representation of this object.
542
543 Returns:
544 Dict[str, Any]: A dictionary in the format used by the BigQuery API.
545 """
546 return copy.deepcopy(self._properties)
547
548 @classmethod
549 def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions":
550 """Factory: construct a :class:`~.external_config.GoogleSheetsOptions`
551 instance given its API representation.
552
553 Args:
554 resource (Dict[str, Any]):
555 Definition of a :class:`~.external_config.GoogleSheetsOptions`
556 instance in the same representation as is returned from the
557 API.
558
559 Returns:
560 GoogleSheetsOptions: Configuration parsed from ``resource``.
561 """
562 config = cls()
563 config._properties = copy.deepcopy(resource)
564 return config
565
566
567_OPTION_CLASSES = (
568 AvroOptions,
569 BigtableOptions,
570 CSVOptions,
571 GoogleSheetsOptions,
572 ParquetOptions,
573)
574
575OptionsType = Union[
576 AvroOptions,
577 BigtableOptions,
578 CSVOptions,
579 GoogleSheetsOptions,
580 ParquetOptions,
581]
582
583
584class HivePartitioningOptions(object):
585 """[Beta] Options that configure hive partitioning.
586
587 .. note::
588 **Experimental**. This feature is experimental and might change or
589 have limited support.
590
591 See
592 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions
593 """
594
595 def __init__(self) -> None:
596 self._properties: Dict[str, Any] = {}
597
598 @property
599 def mode(self):
600 """Optional[str]: When set, what mode of hive partitioning to use when reading data.
601
602 Two modes are supported: "AUTO" and "STRINGS".
603
604 See
605 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode
606 """
607 return self._properties.get("mode")
608
609 @mode.setter
610 def mode(self, value):
611 self._properties["mode"] = value
612
613 @property
614 def source_uri_prefix(self):
615 """Optional[str]: When hive partition detection is requested, a common prefix for
616 all source URIs is required.
617
618 See
619 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.source_uri_prefix
620 """
621 return self._properties.get("sourceUriPrefix")
622
623 @source_uri_prefix.setter
624 def source_uri_prefix(self, value):
625 self._properties["sourceUriPrefix"] = value
626
627 @property
628 def require_partition_filter(self):
629 """Optional[bool]: If set to true, queries over the partitioned table require a
630 partition filter that can be used for partition elimination to be
631 specified.
632
633 See
634 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode
635 """
636 return self._properties.get("requirePartitionFilter")
637
638 @require_partition_filter.setter
639 def require_partition_filter(self, value):
640 self._properties["requirePartitionFilter"] = value
641
642 def to_api_repr(self) -> dict:
643 """Build an API representation of this object.
644
645 Returns:
646 Dict[str, Any]: A dictionary in the format used by the BigQuery API.
647 """
648 return copy.deepcopy(self._properties)
649
650 @classmethod
651 def from_api_repr(cls, resource: dict) -> "HivePartitioningOptions":
652 """Factory: construct a :class:`~.external_config.HivePartitioningOptions`
653 instance given its API representation.
654
655 Args:
656 resource (Dict[str, Any]):
657 Definition of a :class:`~.external_config.HivePartitioningOptions`
658 instance in the same representation as is returned from the
659 API.
660
661 Returns:
662 HivePartitioningOptions: Configuration parsed from ``resource``.
663 """
664 config = cls()
665 config._properties = copy.deepcopy(resource)
666 return config
667
668
669class ExternalConfig(object):
670 """Description of an external data source.
671
672 Args:
673 source_format (ExternalSourceFormat):
674 See :attr:`source_format`.
675 """
676
677 def __init__(self, source_format) -> None:
678 self._properties = {"sourceFormat": source_format}
679
680 @property
681 def source_format(self):
682 """:class:`~.external_config.ExternalSourceFormat`:
683 Format of external source.
684
685 See
686 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_format
687 """
688 return self._properties["sourceFormat"]
689
690 @property
691 def options(self) -> Optional[OptionsType]:
692 """Source-specific options."""
693 for optcls in _OPTION_CLASSES:
694 # The code below is too much magic for mypy to handle.
695 if self.source_format == optcls._SOURCE_FORMAT: # type: ignore
696 options: OptionsType = optcls() # type: ignore
697 options._properties = self._properties.setdefault(
698 optcls._RESOURCE_NAME, {} # type: ignore
699 )
700 return options
701
702 # No matching source format found.
703 return None
704
705 @property
706 def autodetect(self):
707 """bool: If :data:`True`, try to detect schema and format options
708 automatically.
709
710 See
711 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.autodetect
712 """
713 return self._properties.get("autodetect")
714
715 @autodetect.setter
716 def autodetect(self, value):
717 self._properties["autodetect"] = value
718
719 @property
720 def compression(self):
721 """str: The compression type of the data source.
722
723 See
724 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.compression
725 """
726 return self._properties.get("compression")
727
728 @compression.setter
729 def compression(self, value):
730 self._properties["compression"] = value
731
732 @property
733 def decimal_target_types(self) -> Optional[FrozenSet[str]]:
734 """Possible SQL data types to which the source decimal values are converted.
735
736 See:
737 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.decimal_target_types
738
739 .. versionadded:: 2.21.0
740 """
741 prop = self._properties.get("decimalTargetTypes")
742 if prop is not None:
743 prop = frozenset(prop)
744 return prop
745
746 @decimal_target_types.setter
747 def decimal_target_types(self, value: Optional[Iterable[str]]):
748 if value is not None:
749 self._properties["decimalTargetTypes"] = list(value)
750 else:
751 if "decimalTargetTypes" in self._properties:
752 del self._properties["decimalTargetTypes"]
753
754 @property
755 def hive_partitioning(self):
756 """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \
757 it configures hive partitioning support.
758
759 .. note::
760 **Experimental**. This feature is experimental and might change or
761 have limited support.
762
763 See
764 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options
765 """
766 prop = self._properties.get("hivePartitioningOptions")
767 if prop is None:
768 return None
769 return HivePartitioningOptions.from_api_repr(prop)
770
771 @hive_partitioning.setter
772 def hive_partitioning(self, value):
773 prop = value.to_api_repr() if value is not None else None
774 self._properties["hivePartitioningOptions"] = prop
775
776 @property
777 def reference_file_schema_uri(self):
778 """Optional[str]:
779 When creating an external table, the user can provide a reference file with the
780 table schema. This is enabled for the following formats:
781
782 AVRO, PARQUET, ORC
783 """
784 return self._properties.get("referenceFileSchemaUri")
785
786 @reference_file_schema_uri.setter
787 def reference_file_schema_uri(self, value):
788 self._properties["referenceFileSchemaUri"] = value
789
790 @property
791 def ignore_unknown_values(self):
792 """bool: If :data:`True`, extra values that are not represented in the
793 table schema are ignored. Defaults to :data:`False`.
794
795 See
796 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.ignore_unknown_values
797 """
798 return self._properties.get("ignoreUnknownValues")
799
800 @ignore_unknown_values.setter
801 def ignore_unknown_values(self, value):
802 self._properties["ignoreUnknownValues"] = value
803
804 @property
805 def max_bad_records(self):
806 """int: The maximum number of bad records that BigQuery can ignore when
807 reading data.
808
809 See
810 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.max_bad_records
811 """
812 return self._properties.get("maxBadRecords")
813
814 @max_bad_records.setter
815 def max_bad_records(self, value):
816 self._properties["maxBadRecords"] = value
817
818 @property
819 def source_uris(self):
820 """List[str]: URIs that point to your data in Google Cloud.
821
822 See
823 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_uris
824 """
825 return self._properties.get("sourceUris", [])
826
827 @source_uris.setter
828 def source_uris(self, value):
829 self._properties["sourceUris"] = value
830
831 @property
832 def schema(self):
833 """List[:class:`~google.cloud.bigquery.schema.SchemaField`]: The schema
834 for the data.
835
836 See
837 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema
838 """
839 prop: Dict[str, Any] = typing.cast(
840 Dict[str, Any], self._properties.get("schema", {})
841 )
842 return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])]
843
844 @schema.setter
845 def schema(self, value):
846 prop = value
847 if value is not None:
848 prop = {"fields": [field.to_api_repr() for field in value]}
849 self._properties["schema"] = prop
850
851 @property
852 def connection_id(self):
853 """Optional[str]: [Experimental] ID of a BigQuery Connection API
854 resource.
855
856 .. WARNING::
857
858 This feature is experimental. Pre-GA features may have limited
859 support, and changes to pre-GA features may not be compatible with
860 other pre-GA versions.
861 """
862 return self._properties.get("connectionId")
863
864 @connection_id.setter
865 def connection_id(self, value):
866 self._properties["connectionId"] = value
867
868 @property
869 def avro_options(self) -> Optional[AvroOptions]:
870 """Additional properties to set if ``sourceFormat`` is set to AVRO.
871
872 See:
873 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.avro_options
874 """
875 if self.source_format == ExternalSourceFormat.AVRO:
876 self._properties.setdefault(AvroOptions._RESOURCE_NAME, {})
877 resource = self._properties.get(AvroOptions._RESOURCE_NAME)
878 if resource is None:
879 return None
880 options = AvroOptions()
881 options._properties = resource
882 return options
883
884 @avro_options.setter
885 def avro_options(self, value):
886 if self.source_format != ExternalSourceFormat.AVRO:
887 msg = f"Cannot set Avro options, source format is {self.source_format}"
888 raise TypeError(msg)
889 self._properties[AvroOptions._RESOURCE_NAME] = value._properties
890
891 @property
892 def bigtable_options(self) -> Optional[BigtableOptions]:
893 """Additional properties to set if ``sourceFormat`` is set to BIGTABLE.
894
895 See:
896 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.bigtable_options
897 """
898 if self.source_format == ExternalSourceFormat.BIGTABLE:
899 self._properties.setdefault(BigtableOptions._RESOURCE_NAME, {})
900 resource = self._properties.get(BigtableOptions._RESOURCE_NAME)
901 if resource is None:
902 return None
903 options = BigtableOptions()
904 options._properties = resource
905 return options
906
907 @bigtable_options.setter
908 def bigtable_options(self, value):
909 if self.source_format != ExternalSourceFormat.BIGTABLE:
910 msg = f"Cannot set Bigtable options, source format is {self.source_format}"
911 raise TypeError(msg)
912 self._properties[BigtableOptions._RESOURCE_NAME] = value._properties
913
914 @property
915 def csv_options(self) -> Optional[CSVOptions]:
916 """Additional properties to set if ``sourceFormat`` is set to CSV.
917
918 See:
919 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.csv_options
920 """
921 if self.source_format == ExternalSourceFormat.CSV:
922 self._properties.setdefault(CSVOptions._RESOURCE_NAME, {})
923 resource = self._properties.get(CSVOptions._RESOURCE_NAME)
924 if resource is None:
925 return None
926 options = CSVOptions()
927 options._properties = resource
928 return options
929
930 @csv_options.setter
931 def csv_options(self, value):
932 if self.source_format != ExternalSourceFormat.CSV:
933 msg = f"Cannot set CSV options, source format is {self.source_format}"
934 raise TypeError(msg)
935 self._properties[CSVOptions._RESOURCE_NAME] = value._properties
936
937 @property
938 def google_sheets_options(self) -> Optional[GoogleSheetsOptions]:
939 """Additional properties to set if ``sourceFormat`` is set to
940 GOOGLE_SHEETS.
941
942 See:
943 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.google_sheets_options
944 """
945 if self.source_format == ExternalSourceFormat.GOOGLE_SHEETS:
946 self._properties.setdefault(GoogleSheetsOptions._RESOURCE_NAME, {})
947 resource = self._properties.get(GoogleSheetsOptions._RESOURCE_NAME)
948 if resource is None:
949 return None
950 options = GoogleSheetsOptions()
951 options._properties = resource
952 return options
953
954 @google_sheets_options.setter
955 def google_sheets_options(self, value):
956 if self.source_format != ExternalSourceFormat.GOOGLE_SHEETS:
957 msg = f"Cannot set Google Sheets options, source format is {self.source_format}"
958 raise TypeError(msg)
959 self._properties[GoogleSheetsOptions._RESOURCE_NAME] = value._properties
960
961 @property
962 def parquet_options(self) -> Optional[ParquetOptions]:
963 """Additional properties to set if ``sourceFormat`` is set to PARQUET.
964
965 See:
966 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.parquet_options
967 """
968 if self.source_format == ExternalSourceFormat.PARQUET:
969 self._properties.setdefault(ParquetOptions._RESOURCE_NAME, {})
970 resource = self._properties.get(ParquetOptions._RESOURCE_NAME)
971 if resource is None:
972 return None
973 options = ParquetOptions()
974 options._properties = resource
975 return options
976
977 @parquet_options.setter
978 def parquet_options(self, value):
979 if self.source_format != ExternalSourceFormat.PARQUET:
980 msg = f"Cannot set Parquet options, source format is {self.source_format}"
981 raise TypeError(msg)
982 self._properties[ParquetOptions._RESOURCE_NAME] = value._properties
983
984 def to_api_repr(self) -> dict:
985 """Build an API representation of this object.
986
987 Returns:
988 Dict[str, Any]:
989 A dictionary in the format used by the BigQuery API.
990 """
991 config = copy.deepcopy(self._properties)
992 return config
993
994 @classmethod
995 def from_api_repr(cls, resource: dict) -> "ExternalConfig":
996 """Factory: construct an :class:`~.external_config.ExternalConfig`
997 instance given its API representation.
998
999 Args:
1000 resource (Dict[str, Any]):
1001 Definition of an :class:`~.external_config.ExternalConfig`
1002 instance in the same representation as is returned from the
1003 API.
1004
1005 Returns:
1006 ExternalConfig: Configuration parsed from ``resource``.
1007 """
1008 config = cls(resource["sourceFormat"])
1009 config._properties = copy.deepcopy(resource)
1010 return config
1011
1012
1013class ExternalCatalogDatasetOptions:
1014 """Options defining open source compatible datasets living in the BigQuery catalog.
1015 Contains metadata of open source database, schema or namespace represented
1016 by the current dataset.
1017
1018 Args:
1019 default_storage_location_uri (Optional[str]): The storage location URI for all
1020 tables in the dataset. Equivalent to hive metastore's database
1021 locationUri. Maximum length of 1024 characters. (str)
1022 parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters
1023 and properties of the open source schema. Maximum size of 2Mib.
1024 """
1025
1026 def __init__(
1027 self,
1028 default_storage_location_uri: Optional[str] = None,
1029 parameters: Optional[Dict[str, Any]] = None,
1030 ):
1031 self._properties: Dict[str, Any] = {}
1032 self.default_storage_location_uri = default_storage_location_uri
1033 self.parameters = parameters
1034
1035 @property
1036 def default_storage_location_uri(self) -> Optional[str]:
1037 """Optional. The storage location URI for all tables in the dataset.
1038 Equivalent to hive metastore's database locationUri. Maximum length of
1039 1024 characters."""
1040
1041 return self._properties.get("defaultStorageLocationUri")
1042
1043 @default_storage_location_uri.setter
1044 def default_storage_location_uri(self, value: Optional[str]):
1045 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
1046 self._properties["defaultStorageLocationUri"] = value
1047
1048 @property
1049 def parameters(self) -> Optional[Dict[str, Any]]:
1050 """Optional. A map of key value pairs defining the parameters and
1051 properties of the open source schema. Maximum size of 2Mib."""
1052
1053 return self._properties.get("parameters")
1054
1055 @parameters.setter
1056 def parameters(self, value: Optional[Dict[str, Any]]):
1057 value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
1058 self._properties["parameters"] = value
1059
1060 def to_api_repr(self) -> dict:
1061 """Build an API representation of this object.
1062
1063 Returns:
1064 Dict[str, Any]:
1065 A dictionary in the format used by the BigQuery API.
1066 """
1067 return self._properties
1068
1069 @classmethod
1070 def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions:
1071 """Factory: constructs an instance of the class (cls)
1072 given its API representation.
1073
1074 Args:
1075 api_repr (Dict[str, Any]):
1076 API representation of the object to be instantiated.
1077
1078 Returns:
1079 An instance of the class initialized with data from 'resource'.
1080 """
1081 config = cls()
1082 config._properties = api_repr
1083 return config
1084
1085
1086class ExternalCatalogTableOptions:
1087 """Metadata about open source compatible table. The fields contained in these
1088 options correspond to hive metastore's table level properties.
1089
1090 Args:
1091 connection_id (Optional[str]): The connection specifying the credentials to be
1092 used to read external storage, such as Azure Blob, Cloud Storage, or
1093 S3. The connection is needed to read the open source table from
1094 BigQuery Engine. The connection_id can have the form `..` or
1095 `projects//locations//connections/`.
1096 parameters (Union[Dict[str, Any], None]): A map of key value pairs defining the parameters
1097 and properties of the open source table. Corresponds with hive meta
1098 store table parameters. Maximum size of 4Mib.
1099 storage_descriptor (Optional[StorageDescriptor]): A storage descriptor containing information
1100 about the physical storage of this table.
1101 """
1102
1103 def __init__(
1104 self,
1105 connection_id: Optional[str] = None,
1106 parameters: Union[Dict[str, Any], None] = None,
1107 storage_descriptor: Optional[schema.StorageDescriptor] = None,
1108 ):
1109 self._properties: Dict[str, Any] = {}
1110 self.connection_id = connection_id
1111 self.parameters = parameters
1112 self.storage_descriptor = storage_descriptor
1113
1114 @property
1115 def connection_id(self) -> Optional[str]:
1116 """Optional. The connection specifying the credentials to be
1117 used to read external storage, such as Azure Blob, Cloud Storage, or
1118 S3. The connection is needed to read the open source table from
1119 BigQuery Engine. The connection_id can have the form `..` or
1120 `projects//locations//connections/`.
1121 """
1122
1123 return self._properties.get("connectionId")
1124
1125 @connection_id.setter
1126 def connection_id(self, value: Optional[str]):
1127 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
1128 self._properties["connectionId"] = value
1129
1130 @property
1131 def parameters(self) -> Union[Dict[str, Any], None]:
1132 """Optional. A map of key value pairs defining the parameters and
1133 properties of the open source table. Corresponds with hive meta
1134 store table parameters. Maximum size of 4Mib.
1135 """
1136
1137 return self._properties.get("parameters")
1138
1139 @parameters.setter
1140 def parameters(self, value: Union[Dict[str, Any], None]):
1141 value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
1142 self._properties["parameters"] = value
1143
1144 @property
1145 def storage_descriptor(self) -> Any:
1146 """Optional. A storage descriptor containing information about the
1147 physical storage of this table."""
1148
1149 prop = _helpers._get_sub_prop(self._properties, ["storageDescriptor"])
1150
1151 if prop is not None:
1152 return schema.StorageDescriptor.from_api_repr(prop)
1153 return None
1154
1155 @storage_descriptor.setter
1156 def storage_descriptor(self, value: Union[schema.StorageDescriptor, dict, None]):
1157 value = _helpers._isinstance_or_raise(
1158 value, (schema.StorageDescriptor, dict), none_allowed=True
1159 )
1160 if isinstance(value, schema.StorageDescriptor):
1161 self._properties["storageDescriptor"] = value.to_api_repr()
1162 else:
1163 self._properties["storageDescriptor"] = value
1164
1165 def to_api_repr(self) -> dict:
1166 """Build an API representation of this object.
1167
1168 Returns:
1169 Dict[str, Any]:
1170 A dictionary in the format used by the BigQuery API.
1171 """
1172
1173 return self._properties
1174
1175 @classmethod
1176 def from_api_repr(cls, api_repr: dict) -> ExternalCatalogTableOptions:
1177 """Factory: constructs an instance of the class (cls)
1178 given its API representation.
1179
1180 Args:
1181 api_repr (Dict[str, Any]):
1182 API representation of the object to be instantiated.
1183
1184 Returns:
1185 An instance of the class initialized with data from 'api_repr'.
1186 """
1187 config = cls()
1188 config._properties = api_repr
1189 return config