1# Copyright 2017 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Define classes that describe external data sources.
16
17 These are used for both Table.externalDataConfiguration and
18 Job.configuration.query.tableDefinitions.
19"""
20
21from __future__ import absolute_import, annotations
22
23import base64
24import copy
25import typing
26from typing import Any, Dict, FrozenSet, Iterable, Optional, Union
27
28from google.cloud.bigquery._helpers import _to_bytes
29from google.cloud.bigquery._helpers import _bytes_to_json
30from google.cloud.bigquery._helpers import _int_or_none
31from google.cloud.bigquery._helpers import _str_or_none
32from google.cloud.bigquery import _helpers
33from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions
34from google.cloud.bigquery import schema
35from google.cloud.bigquery.schema import SchemaField
36
37
38class ExternalSourceFormat(object):
39 """The format for external data files.
40
41 Note that the set of allowed values for external data sources is different
42 than the set used for loading data (see
43 :class:`~google.cloud.bigquery.job.SourceFormat`).
44 """
45
46 CSV = "CSV"
47 """Specifies CSV format."""
48
49 GOOGLE_SHEETS = "GOOGLE_SHEETS"
50 """Specifies Google Sheets format."""
51
52 NEWLINE_DELIMITED_JSON = "NEWLINE_DELIMITED_JSON"
53 """Specifies newline delimited JSON format."""
54
55 AVRO = "AVRO"
56 """Specifies Avro format."""
57
58 DATASTORE_BACKUP = "DATASTORE_BACKUP"
59 """Specifies datastore backup format"""
60
61 ORC = "ORC"
62 """Specifies ORC format."""
63
64 PARQUET = "PARQUET"
65 """Specifies Parquet format."""
66
67 BIGTABLE = "BIGTABLE"
68 """Specifies Bigtable format."""
69
70
71class BigtableColumn(object):
72 """Options for a Bigtable column."""
73
74 def __init__(self):
75 self._properties = {}
76
77 @property
78 def encoding(self):
79 """str: The encoding of the values when the type is not `STRING`
80
81 See
82 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.encoding
83 """
84 return self._properties.get("encoding")
85
86 @encoding.setter
87 def encoding(self, value):
88 self._properties["encoding"] = value
89
90 @property
91 def field_name(self):
92 """str: An identifier to use if the qualifier is not a valid BigQuery
93 field identifier
94
95 See
96 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.field_name
97 """
98 return self._properties.get("fieldName")
99
100 @field_name.setter
101 def field_name(self, value):
102 self._properties["fieldName"] = value
103
104 @property
105 def only_read_latest(self):
106 """bool: If this is set, only the latest version of value in this
107 column are exposed.
108
109 See
110 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.only_read_latest
111 """
112 return self._properties.get("onlyReadLatest")
113
114 @only_read_latest.setter
115 def only_read_latest(self, value):
116 self._properties["onlyReadLatest"] = value
117
118 @property
119 def qualifier_encoded(self):
120 """Union[str, bytes]: The qualifier encoded in binary.
121
122 The type is ``str`` (Python 2.x) or ``bytes`` (Python 3.x). The module
123 will handle base64 encoding for you.
124
125 See
126 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.qualifier_encoded
127 """
128 prop = self._properties.get("qualifierEncoded")
129 if prop is None:
130 return None
131 return base64.standard_b64decode(_to_bytes(prop))
132
133 @qualifier_encoded.setter
134 def qualifier_encoded(self, value):
135 self._properties["qualifierEncoded"] = _bytes_to_json(value)
136
137 @property
138 def qualifier_string(self):
139 """str: A valid UTF-8 string qualifier
140
141 See
142 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.qualifier_string
143 """
144 return self._properties.get("qualifierString")
145
146 @qualifier_string.setter
147 def qualifier_string(self, value):
148 self._properties["qualifierString"] = value
149
150 @property
151 def type_(self):
152 """str: The type to convert the value in cells of this column.
153
154 See
155 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumn.FIELDS.type
156 """
157 return self._properties.get("type")
158
159 @type_.setter
160 def type_(self, value):
161 self._properties["type"] = value
162
163 def to_api_repr(self) -> dict:
164 """Build an API representation of this object.
165
166 Returns:
167 Dict[str, Any]:
168 A dictionary in the format used by the BigQuery API.
169 """
170 return copy.deepcopy(self._properties)
171
172 @classmethod
173 def from_api_repr(cls, resource: dict) -> "BigtableColumn":
174 """Factory: construct a :class:`~.external_config.BigtableColumn`
175 instance given its API representation.
176
177 Args:
178 resource (Dict[str, Any]):
179 Definition of a :class:`~.external_config.BigtableColumn`
180 instance in the same representation as is returned from the
181 API.
182
183 Returns:
184 external_config.BigtableColumn: Configuration parsed from ``resource``.
185 """
186 config = cls()
187 config._properties = copy.deepcopy(resource)
188 return config
189
190
191class BigtableColumnFamily(object):
192 """Options for a Bigtable column family."""
193
194 def __init__(self):
195 self._properties = {}
196
197 @property
198 def encoding(self):
199 """str: The encoding of the values when the type is not `STRING`
200
201 See
202 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.encoding
203 """
204 return self._properties.get("encoding")
205
206 @encoding.setter
207 def encoding(self, value):
208 self._properties["encoding"] = value
209
210 @property
211 def family_id(self):
212 """str: Identifier of the column family.
213
214 See
215 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.family_id
216 """
217 return self._properties.get("familyId")
218
219 @family_id.setter
220 def family_id(self, value):
221 self._properties["familyId"] = value
222
223 @property
224 def only_read_latest(self):
225 """bool: If this is set only the latest version of value are exposed
226 for all columns in this column family.
227
228 See
229 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.only_read_latest
230 """
231 return self._properties.get("onlyReadLatest")
232
233 @only_read_latest.setter
234 def only_read_latest(self, value):
235 self._properties["onlyReadLatest"] = value
236
237 @property
238 def type_(self):
239 """str: The type to convert the value in cells of this column family.
240
241 See
242 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.type
243 """
244 return self._properties.get("type")
245
246 @type_.setter
247 def type_(self, value):
248 self._properties["type"] = value
249
250 @property
251 def columns(self):
252 """List[BigtableColumn]: Lists of columns
253 that should be exposed as individual fields.
254
255 See
256 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableColumnFamily.FIELDS.columns
257 """
258 prop = self._properties.get("columns", [])
259 return [BigtableColumn.from_api_repr(col) for col in prop]
260
261 @columns.setter
262 def columns(self, value):
263 self._properties["columns"] = [col.to_api_repr() for col in value]
264
265 def to_api_repr(self) -> dict:
266 """Build an API representation of this object.
267
268 Returns:
269 Dict[str, Any]:
270 A dictionary in the format used by the BigQuery API.
271 """
272 return copy.deepcopy(self._properties)
273
274 @classmethod
275 def from_api_repr(cls, resource: dict) -> "BigtableColumnFamily":
276 """Factory: construct a :class:`~.external_config.BigtableColumnFamily`
277 instance given its API representation.
278
279 Args:
280 resource (Dict[str, Any]):
281 Definition of a :class:`~.external_config.BigtableColumnFamily`
282 instance in the same representation as is returned from the
283 API.
284
285 Returns:
286 :class:`~.external_config.BigtableColumnFamily`:
287 Configuration parsed from ``resource``.
288 """
289 config = cls()
290 config._properties = copy.deepcopy(resource)
291 return config
292
293
294class BigtableOptions(object):
295 """Options that describe how to treat Bigtable tables as BigQuery tables."""
296
297 _SOURCE_FORMAT = "BIGTABLE"
298 _RESOURCE_NAME = "bigtableOptions"
299
300 def __init__(self):
301 self._properties = {}
302
303 @property
304 def ignore_unspecified_column_families(self):
305 """bool: If :data:`True`, ignore columns not specified in
306 :attr:`column_families` list. Defaults to :data:`False`.
307
308 See
309 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.ignore_unspecified_column_families
310 """
311 return self._properties.get("ignoreUnspecifiedColumnFamilies")
312
313 @ignore_unspecified_column_families.setter
314 def ignore_unspecified_column_families(self, value):
315 self._properties["ignoreUnspecifiedColumnFamilies"] = value
316
317 @property
318 def read_rowkey_as_string(self):
319 """bool: If :data:`True`, rowkey column families will be read and
320 converted to string. Defaults to :data:`False`.
321
322 See
323 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.read_rowkey_as_string
324 """
325 return self._properties.get("readRowkeyAsString")
326
327 @read_rowkey_as_string.setter
328 def read_rowkey_as_string(self, value):
329 self._properties["readRowkeyAsString"] = value
330
331 @property
332 def column_families(self):
333 """List[:class:`~.external_config.BigtableColumnFamily`]: List of
334 column families to expose in the table schema along with their types.
335
336 See
337 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#BigtableOptions.FIELDS.column_families
338 """
339 prop = self._properties.get("columnFamilies", [])
340 return [BigtableColumnFamily.from_api_repr(cf) for cf in prop]
341
342 @column_families.setter
343 def column_families(self, value):
344 self._properties["columnFamilies"] = [cf.to_api_repr() for cf in value]
345
346 def to_api_repr(self) -> dict:
347 """Build an API representation of this object.
348
349 Returns:
350 Dict[str, Any]:
351 A dictionary in the format used by the BigQuery API.
352 """
353 return copy.deepcopy(self._properties)
354
355 @classmethod
356 def from_api_repr(cls, resource: dict) -> "BigtableOptions":
357 """Factory: construct a :class:`~.external_config.BigtableOptions`
358 instance given its API representation.
359
360 Args:
361 resource (Dict[str, Any]):
362 Definition of a :class:`~.external_config.BigtableOptions`
363 instance in the same representation as is returned from the
364 API.
365
366 Returns:
367 BigtableOptions: Configuration parsed from ``resource``.
368 """
369 config = cls()
370 config._properties = copy.deepcopy(resource)
371 return config
372
373
374class CSVOptions(object):
375 """Options that describe how to treat CSV files as BigQuery tables."""
376
377 _SOURCE_FORMAT = "CSV"
378 _RESOURCE_NAME = "csvOptions"
379
380 def __init__(self):
381 self._properties = {}
382
383 @property
384 def allow_jagged_rows(self):
385 """bool: If :data:`True`, BigQuery treats missing trailing columns as
386 null values. Defaults to :data:`False`.
387
388 See
389 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.allow_jagged_rows
390 """
391 return self._properties.get("allowJaggedRows")
392
393 @allow_jagged_rows.setter
394 def allow_jagged_rows(self, value):
395 self._properties["allowJaggedRows"] = value
396
397 @property
398 def allow_quoted_newlines(self):
399 """bool: If :data:`True`, quoted data sections that contain newline
400 characters in a CSV file are allowed. Defaults to :data:`False`.
401
402 See
403 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.allow_quoted_newlines
404 """
405 return self._properties.get("allowQuotedNewlines")
406
407 @allow_quoted_newlines.setter
408 def allow_quoted_newlines(self, value):
409 self._properties["allowQuotedNewlines"] = value
410
411 @property
412 def encoding(self):
413 """str: The character encoding of the data.
414
415 See
416 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.encoding
417 """
418 return self._properties.get("encoding")
419
420 @encoding.setter
421 def encoding(self, value):
422 self._properties["encoding"] = value
423
424 @property
425 def preserve_ascii_control_characters(self):
426 """bool: Indicates if the embedded ASCII control characters
427 (the first 32 characters in the ASCII-table, from '\x00' to '\x1F') are preserved.
428
429 See
430 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.preserve_ascii_control_characters
431 """
432 return self._properties.get("preserveAsciiControlCharacters")
433
434 @preserve_ascii_control_characters.setter
435 def preserve_ascii_control_characters(self, value):
436 self._properties["preserveAsciiControlCharacters"] = value
437
438 @property
439 def field_delimiter(self):
440 """str: The separator for fields in a CSV file. Defaults to comma (',').
441
442 See
443 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.field_delimiter
444 """
445 return self._properties.get("fieldDelimiter")
446
447 @field_delimiter.setter
448 def field_delimiter(self, value):
449 self._properties["fieldDelimiter"] = value
450
451 @property
452 def quote_character(self):
453 """str: The value that is used to quote data sections in a CSV file.
454
455 See
456 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.quote
457 """
458 return self._properties.get("quote")
459
460 @quote_character.setter
461 def quote_character(self, value):
462 self._properties["quote"] = value
463
464 @property
465 def skip_leading_rows(self):
466 """int: The number of rows at the top of a CSV file.
467
468 See
469 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.skip_leading_rows
470 """
471 return _int_or_none(self._properties.get("skipLeadingRows"))
472
473 @skip_leading_rows.setter
474 def skip_leading_rows(self, value):
475 self._properties["skipLeadingRows"] = str(value)
476
477 def to_api_repr(self) -> dict:
478 """Build an API representation of this object.
479
480 Returns:
481 Dict[str, Any]: A dictionary in the format used by the BigQuery API.
482 """
483 return copy.deepcopy(self._properties)
484
485 @classmethod
486 def from_api_repr(cls, resource: dict) -> "CSVOptions":
487 """Factory: construct a :class:`~.external_config.CSVOptions` instance
488 given its API representation.
489
490 Args:
491 resource (Dict[str, Any]):
492 Definition of a :class:`~.external_config.CSVOptions`
493 instance in the same representation as is returned from the
494 API.
495
496 Returns:
497 CSVOptions: Configuration parsed from ``resource``.
498 """
499 config = cls()
500 config._properties = copy.deepcopy(resource)
501 return config
502
503
504class GoogleSheetsOptions(object):
505 """Options that describe how to treat Google Sheets as BigQuery tables."""
506
507 _SOURCE_FORMAT = "GOOGLE_SHEETS"
508 _RESOURCE_NAME = "googleSheetsOptions"
509
510 def __init__(self):
511 self._properties = {}
512
513 @property
514 def skip_leading_rows(self):
515 """int: The number of rows at the top of a sheet that BigQuery will
516 skip when reading the data.
517
518 See
519 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions.FIELDS.skip_leading_rows
520 """
521 return _int_or_none(self._properties.get("skipLeadingRows"))
522
523 @skip_leading_rows.setter
524 def skip_leading_rows(self, value):
525 self._properties["skipLeadingRows"] = str(value)
526
527 @property
528 def range(self):
529 """str: The range of a sheet that BigQuery will query from.
530
531 See
532 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#GoogleSheetsOptions.FIELDS.range
533 """
534 return _str_or_none(self._properties.get("range"))
535
536 @range.setter
537 def range(self, value):
538 self._properties["range"] = value
539
540 def to_api_repr(self) -> dict:
541 """Build an API representation of this object.
542
543 Returns:
544 Dict[str, Any]: A dictionary in the format used by the BigQuery API.
545 """
546 return copy.deepcopy(self._properties)
547
548 @classmethod
549 def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions":
550 """Factory: construct a :class:`~.external_config.GoogleSheetsOptions`
551 instance given its API representation.
552
553 Args:
554 resource (Dict[str, Any]):
555 Definition of a :class:`~.external_config.GoogleSheetsOptions`
556 instance in the same representation as is returned from the
557 API.
558
559 Returns:
560 GoogleSheetsOptions: Configuration parsed from ``resource``.
561 """
562 config = cls()
563 config._properties = copy.deepcopy(resource)
564 return config
565
566
567_OPTION_CLASSES = (
568 AvroOptions,
569 BigtableOptions,
570 CSVOptions,
571 GoogleSheetsOptions,
572 ParquetOptions,
573)
574
575OptionsType = Union[
576 AvroOptions,
577 BigtableOptions,
578 CSVOptions,
579 GoogleSheetsOptions,
580 ParquetOptions,
581]
582
583
584class HivePartitioningOptions(object):
585 """[Beta] Options that configure hive partitioning.
586
587 .. note::
588 **Experimental**. This feature is experimental and might change or
589 have limited support.
590
591 See
592 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions
593 """
594
595 def __init__(self) -> None:
596 self._properties: Dict[str, Any] = {}
597
598 @property
599 def mode(self):
600 """Optional[str]: When set, what mode of hive partitioning to use when reading data.
601
602 Two modes are supported: "AUTO" and "STRINGS".
603
604 See
605 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode
606 """
607 return self._properties.get("mode")
608
609 @mode.setter
610 def mode(self, value):
611 self._properties["mode"] = value
612
613 @property
614 def source_uri_prefix(self):
615 """Optional[str]: When hive partition detection is requested, a common prefix for
616 all source URIs is required.
617
618 See
619 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.source_uri_prefix
620 """
621 return self._properties.get("sourceUriPrefix")
622
623 @source_uri_prefix.setter
624 def source_uri_prefix(self, value):
625 self._properties["sourceUriPrefix"] = value
626
627 @property
628 def require_partition_filter(self):
629 """Optional[bool]: If set to true, queries over the partitioned table require a
630 partition filter that can be used for partition elimination to be
631 specified.
632
633 See
634 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions.FIELDS.mode
635 """
636 return self._properties.get("requirePartitionFilter")
637
638 @require_partition_filter.setter
639 def require_partition_filter(self, value):
640 self._properties["requirePartitionFilter"] = value
641
642 def to_api_repr(self) -> dict:
643 """Build an API representation of this object.
644
645 Returns:
646 Dict[str, Any]: A dictionary in the format used by the BigQuery API.
647 """
648 return copy.deepcopy(self._properties)
649
650 @classmethod
651 def from_api_repr(cls, resource: dict) -> "HivePartitioningOptions":
652 """Factory: construct a :class:`~.external_config.HivePartitioningOptions`
653 instance given its API representation.
654
655 Args:
656 resource (Dict[str, Any]):
657 Definition of a :class:`~.external_config.HivePartitioningOptions`
658 instance in the same representation as is returned from the
659 API.
660
661 Returns:
662 HivePartitioningOptions: Configuration parsed from ``resource``.
663 """
664 config = cls()
665 config._properties = copy.deepcopy(resource)
666 return config
667
668
669class ExternalConfig(object):
670 """Description of an external data source.
671
672 Args:
673 source_format (ExternalSourceFormat):
674 See :attr:`source_format`.
675 """
676
677 def __init__(self, source_format) -> None:
678 self._properties = {"sourceFormat": source_format}
679
680 @property
681 def source_format(self):
682 """:class:`~.external_config.ExternalSourceFormat`:
683 Format of external source.
684
685 See
686 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_format
687 """
688 return self._properties["sourceFormat"]
689
690 @property
691 def options(self) -> Optional[OptionsType]:
692 """Source-specific options."""
693 for optcls in _OPTION_CLASSES:
694 # The code below is too much magic for mypy to handle.
695 if self.source_format == optcls._SOURCE_FORMAT: # type: ignore
696 options: OptionsType = optcls() # type: ignore
697 options._properties = self._properties.setdefault(
698 optcls._RESOURCE_NAME, {} # type: ignore
699 )
700 return options
701
702 # No matching source format found.
703 return None
704
705 @property
706 def autodetect(self):
707 """bool: If :data:`True`, try to detect schema and format options
708 automatically.
709
710 See
711 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.autodetect
712 """
713 return self._properties.get("autodetect")
714
715 @autodetect.setter
716 def autodetect(self, value):
717 self._properties["autodetect"] = value
718
719 @property
720 def compression(self):
721 """str: The compression type of the data source.
722
723 See
724 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.compression
725 """
726 return self._properties.get("compression")
727
728 @compression.setter
729 def compression(self, value):
730 self._properties["compression"] = value
731
732 @property
733 def decimal_target_types(self) -> Optional[FrozenSet[str]]:
734 """Possible SQL data types to which the source decimal values are converted.
735
736 See:
737 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.decimal_target_types
738
739 .. versionadded:: 2.21.0
740 """
741 prop = self._properties.get("decimalTargetTypes")
742 if prop is not None:
743 prop = frozenset(prop)
744 return prop
745
746 @decimal_target_types.setter
747 def decimal_target_types(self, value: Optional[Iterable[str]]):
748 if value is not None:
749 self._properties["decimalTargetTypes"] = list(value)
750 else:
751 if "decimalTargetTypes" in self._properties:
752 del self._properties["decimalTargetTypes"]
753
754 @property
755 def hive_partitioning(self):
756 """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \
757 it configures hive partitioning support.
758
759 .. note::
760 **Experimental**. This feature is experimental and might change or
761 have limited support.
762
763 See
764 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options
765 """
766 prop = self._properties.get("hivePartitioningOptions")
767 if prop is None:
768 return None
769 return HivePartitioningOptions.from_api_repr(prop)
770
771 @hive_partitioning.setter
772 def hive_partitioning(self, value):
773 prop = value.to_api_repr() if value is not None else None
774 self._properties["hivePartitioningOptions"] = prop
775
776 @property
777 def reference_file_schema_uri(self):
778 """Optional[str]:
779 When creating an external table, the user can provide a reference file with the
780 table schema. This is enabled for the following formats:
781
782 AVRO, PARQUET, ORC
783 """
784 return self._properties.get("referenceFileSchemaUri")
785
786 @reference_file_schema_uri.setter
787 def reference_file_schema_uri(self, value):
788 self._properties["referenceFileSchemaUri"] = value
789
790 @property
791 def ignore_unknown_values(self):
792 """bool: If :data:`True`, extra values that are not represented in the
793 table schema are ignored. Defaults to :data:`False`.
794
795 See
796 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.ignore_unknown_values
797 """
798 return self._properties.get("ignoreUnknownValues")
799
800 @ignore_unknown_values.setter
801 def ignore_unknown_values(self, value):
802 self._properties["ignoreUnknownValues"] = value
803
804 @property
805 def max_bad_records(self):
806 """int: The maximum number of bad records that BigQuery can ignore when
807 reading data.
808
809 See
810 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.max_bad_records
811 """
812 return self._properties.get("maxBadRecords")
813
814 @max_bad_records.setter
815 def max_bad_records(self, value):
816 self._properties["maxBadRecords"] = value
817
818 @property
819 def source_uris(self):
820 """List[str]: URIs that point to your data in Google Cloud.
821
822 See
823 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.source_uris
824 """
825 return self._properties.get("sourceUris", [])
826
827 @source_uris.setter
828 def source_uris(self, value):
829 self._properties["sourceUris"] = value
830
831 @property
832 def schema(self):
833 """List[:class:`~google.cloud.bigquery.schema.SchemaField`]: The schema
834 for the data.
835
836 See
837 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema
838 """
839 prop: Dict[str, Any] = typing.cast(
840 Dict[str, Any], self._properties.get("schema", {})
841 )
842 return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])]
843
844 @schema.setter
845 def schema(self, value):
846 prop = value
847 if value is not None:
848 prop = {"fields": [field.to_api_repr() for field in value]}
849 self._properties["schema"] = prop
850
851 @property
852 def date_format(self) -> Optional[str]:
853 """Optional[str]: Format used to parse DATE values. Supports C-style and SQL-style values.
854
855 See:
856 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.date_format
857 """
858 result = self._properties.get("dateFormat")
859 return typing.cast(str, result)
860
861 @date_format.setter
862 def date_format(self, value: Optional[str]):
863 self._properties["dateFormat"] = value
864
865 @property
866 def time_zone(self) -> Optional[str]:
867 """Optional[str]: Time zone used when parsing timestamp values that do not
868 have specific time zone information (e.g. 2024-04-20 12:34:56). The expected
869 format is an IANA timezone string (e.g. America/Los_Angeles).
870
871 See:
872 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_zone
873 """
874
875 result = self._properties.get("timeZone")
876 return typing.cast(str, result)
877
878 @time_zone.setter
879 def time_zone(self, value: Optional[str]):
880 self._properties["timeZone"] = value
881
882 @property
883 def connection_id(self):
884 """Optional[str]: [Experimental] ID of a BigQuery Connection API
885 resource.
886
887 .. WARNING::
888
889 This feature is experimental. Pre-GA features may have limited
890 support, and changes to pre-GA features may not be compatible with
891 other pre-GA versions.
892 """
893 return self._properties.get("connectionId")
894
895 @connection_id.setter
896 def connection_id(self, value):
897 self._properties["connectionId"] = value
898
899 @property
900 def avro_options(self) -> Optional[AvroOptions]:
901 """Additional properties to set if ``sourceFormat`` is set to AVRO.
902
903 See:
904 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.avro_options
905 """
906 if self.source_format == ExternalSourceFormat.AVRO:
907 self._properties.setdefault(AvroOptions._RESOURCE_NAME, {})
908 resource = self._properties.get(AvroOptions._RESOURCE_NAME)
909 if resource is None:
910 return None
911 options = AvroOptions()
912 options._properties = resource
913 return options
914
915 @avro_options.setter
916 def avro_options(self, value):
917 if self.source_format != ExternalSourceFormat.AVRO:
918 msg = f"Cannot set Avro options, source format is {self.source_format}"
919 raise TypeError(msg)
920 self._properties[AvroOptions._RESOURCE_NAME] = value._properties
921
922 @property
923 def bigtable_options(self) -> Optional[BigtableOptions]:
924 """Additional properties to set if ``sourceFormat`` is set to BIGTABLE.
925
926 See:
927 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.bigtable_options
928 """
929 if self.source_format == ExternalSourceFormat.BIGTABLE:
930 self._properties.setdefault(BigtableOptions._RESOURCE_NAME, {})
931 resource = self._properties.get(BigtableOptions._RESOURCE_NAME)
932 if resource is None:
933 return None
934 options = BigtableOptions()
935 options._properties = resource
936 return options
937
938 @bigtable_options.setter
939 def bigtable_options(self, value):
940 if self.source_format != ExternalSourceFormat.BIGTABLE:
941 msg = f"Cannot set Bigtable options, source format is {self.source_format}"
942 raise TypeError(msg)
943 self._properties[BigtableOptions._RESOURCE_NAME] = value._properties
944
945 @property
946 def csv_options(self) -> Optional[CSVOptions]:
947 """Additional properties to set if ``sourceFormat`` is set to CSV.
948
949 See:
950 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.csv_options
951 """
952 if self.source_format == ExternalSourceFormat.CSV:
953 self._properties.setdefault(CSVOptions._RESOURCE_NAME, {})
954 resource = self._properties.get(CSVOptions._RESOURCE_NAME)
955 if resource is None:
956 return None
957 options = CSVOptions()
958 options._properties = resource
959 return options
960
961 @csv_options.setter
962 def csv_options(self, value):
963 if self.source_format != ExternalSourceFormat.CSV:
964 msg = f"Cannot set CSV options, source format is {self.source_format}"
965 raise TypeError(msg)
966 self._properties[CSVOptions._RESOURCE_NAME] = value._properties
967
968 @property
969 def google_sheets_options(self) -> Optional[GoogleSheetsOptions]:
970 """Additional properties to set if ``sourceFormat`` is set to
971 GOOGLE_SHEETS.
972
973 See:
974 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.google_sheets_options
975 """
976 if self.source_format == ExternalSourceFormat.GOOGLE_SHEETS:
977 self._properties.setdefault(GoogleSheetsOptions._RESOURCE_NAME, {})
978 resource = self._properties.get(GoogleSheetsOptions._RESOURCE_NAME)
979 if resource is None:
980 return None
981 options = GoogleSheetsOptions()
982 options._properties = resource
983 return options
984
985 @google_sheets_options.setter
986 def google_sheets_options(self, value):
987 if self.source_format != ExternalSourceFormat.GOOGLE_SHEETS:
988 msg = f"Cannot set Google Sheets options, source format is {self.source_format}"
989 raise TypeError(msg)
990 self._properties[GoogleSheetsOptions._RESOURCE_NAME] = value._properties
991
992 @property
993 def parquet_options(self) -> Optional[ParquetOptions]:
994 """Additional properties to set if ``sourceFormat`` is set to PARQUET.
995
996 See:
997 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.parquet_options
998 """
999 if self.source_format == ExternalSourceFormat.PARQUET:
1000 self._properties.setdefault(ParquetOptions._RESOURCE_NAME, {})
1001 resource = self._properties.get(ParquetOptions._RESOURCE_NAME)
1002 if resource is None:
1003 return None
1004 options = ParquetOptions()
1005 options._properties = resource
1006 return options
1007
1008 @parquet_options.setter
1009 def parquet_options(self, value):
1010 if self.source_format != ExternalSourceFormat.PARQUET:
1011 msg = f"Cannot set Parquet options, source format is {self.source_format}"
1012 raise TypeError(msg)
1013 self._properties[ParquetOptions._RESOURCE_NAME] = value._properties
1014
1015 def to_api_repr(self) -> dict:
1016 """Build an API representation of this object.
1017
1018 Returns:
1019 Dict[str, Any]:
1020 A dictionary in the format used by the BigQuery API.
1021 """
1022 config = copy.deepcopy(self._properties)
1023 return config
1024
1025 @classmethod
1026 def from_api_repr(cls, resource: dict) -> "ExternalConfig":
1027 """Factory: construct an :class:`~.external_config.ExternalConfig`
1028 instance given its API representation.
1029
1030 Args:
1031 resource (Dict[str, Any]):
1032 Definition of an :class:`~.external_config.ExternalConfig`
1033 instance in the same representation as is returned from the
1034 API.
1035
1036 Returns:
1037 ExternalConfig: Configuration parsed from ``resource``.
1038 """
1039 config = cls(resource["sourceFormat"])
1040 config._properties = copy.deepcopy(resource)
1041 return config
1042
1043
1044class ExternalCatalogDatasetOptions:
1045 """Options defining open source compatible datasets living in the BigQuery catalog.
1046 Contains metadata of open source database, schema or namespace represented
1047 by the current dataset.
1048
1049 Args:
1050 default_storage_location_uri (Optional[str]): The storage location URI for all
1051 tables in the dataset. Equivalent to hive metastore's database
1052 locationUri. Maximum length of 1024 characters. (str)
1053 parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters
1054 and properties of the open source schema. Maximum size of 2Mib.
1055 """
1056
1057 def __init__(
1058 self,
1059 default_storage_location_uri: Optional[str] = None,
1060 parameters: Optional[Dict[str, Any]] = None,
1061 ):
1062 self._properties: Dict[str, Any] = {}
1063 self.default_storage_location_uri = default_storage_location_uri
1064 self.parameters = parameters
1065
1066 @property
1067 def default_storage_location_uri(self) -> Optional[str]:
1068 """Optional. The storage location URI for all tables in the dataset.
1069 Equivalent to hive metastore's database locationUri. Maximum length of
1070 1024 characters."""
1071
1072 return self._properties.get("defaultStorageLocationUri")
1073
1074 @default_storage_location_uri.setter
1075 def default_storage_location_uri(self, value: Optional[str]):
1076 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
1077 self._properties["defaultStorageLocationUri"] = value
1078
1079 @property
1080 def parameters(self) -> Optional[Dict[str, Any]]:
1081 """Optional. A map of key value pairs defining the parameters and
1082 properties of the open source schema. Maximum size of 2Mib."""
1083
1084 return self._properties.get("parameters")
1085
1086 @parameters.setter
1087 def parameters(self, value: Optional[Dict[str, Any]]):
1088 value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
1089 self._properties["parameters"] = value
1090
1091 def to_api_repr(self) -> dict:
1092 """Build an API representation of this object.
1093
1094 Returns:
1095 Dict[str, Any]:
1096 A dictionary in the format used by the BigQuery API.
1097 """
1098 return self._properties
1099
1100 @classmethod
1101 def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions:
1102 """Factory: constructs an instance of the class (cls)
1103 given its API representation.
1104
1105 Args:
1106 api_repr (Dict[str, Any]):
1107 API representation of the object to be instantiated.
1108
1109 Returns:
1110 An instance of the class initialized with data from 'resource'.
1111 """
1112 config = cls()
1113 config._properties = api_repr
1114 return config
1115
1116
1117class ExternalCatalogTableOptions:
1118 """Metadata about open source compatible table. The fields contained in these
1119 options correspond to hive metastore's table level properties.
1120
1121 Args:
1122 connection_id (Optional[str]): The connection specifying the credentials to be
1123 used to read external storage, such as Azure Blob, Cloud Storage, or
1124 S3. The connection is needed to read the open source table from
1125 BigQuery Engine. The connection_id can have the form `..` or
1126 `projects//locations//connections/`.
1127 parameters (Union[Dict[str, Any], None]): A map of key value pairs defining the parameters
1128 and properties of the open source table. Corresponds with hive meta
1129 store table parameters. Maximum size of 4Mib.
1130 storage_descriptor (Optional[StorageDescriptor]): A storage descriptor containing information
1131 about the physical storage of this table.
1132 """
1133
1134 def __init__(
1135 self,
1136 connection_id: Optional[str] = None,
1137 parameters: Union[Dict[str, Any], None] = None,
1138 storage_descriptor: Optional[schema.StorageDescriptor] = None,
1139 ):
1140 self._properties: Dict[str, Any] = {}
1141 self.connection_id = connection_id
1142 self.parameters = parameters
1143 self.storage_descriptor = storage_descriptor
1144
1145 @property
1146 def connection_id(self) -> Optional[str]:
1147 """Optional. The connection specifying the credentials to be
1148 used to read external storage, such as Azure Blob, Cloud Storage, or
1149 S3. The connection is needed to read the open source table from
1150 BigQuery Engine. The connection_id can have the form `..` or
1151 `projects//locations//connections/`.
1152 """
1153
1154 return self._properties.get("connectionId")
1155
1156 @connection_id.setter
1157 def connection_id(self, value: Optional[str]):
1158 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
1159 self._properties["connectionId"] = value
1160
1161 @property
1162 def parameters(self) -> Union[Dict[str, Any], None]:
1163 """Optional. A map of key value pairs defining the parameters and
1164 properties of the open source table. Corresponds with hive meta
1165 store table parameters. Maximum size of 4Mib.
1166 """
1167
1168 return self._properties.get("parameters")
1169
1170 @parameters.setter
1171 def parameters(self, value: Union[Dict[str, Any], None]):
1172 value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
1173 self._properties["parameters"] = value
1174
1175 @property
1176 def storage_descriptor(self) -> Any:
1177 """Optional. A storage descriptor containing information about the
1178 physical storage of this table."""
1179
1180 prop = _helpers._get_sub_prop(self._properties, ["storageDescriptor"])
1181
1182 if prop is not None:
1183 return schema.StorageDescriptor.from_api_repr(prop)
1184 return None
1185
1186 @storage_descriptor.setter
1187 def storage_descriptor(self, value: Union[schema.StorageDescriptor, dict, None]):
1188 value = _helpers._isinstance_or_raise(
1189 value, (schema.StorageDescriptor, dict), none_allowed=True
1190 )
1191 if isinstance(value, schema.StorageDescriptor):
1192 self._properties["storageDescriptor"] = value.to_api_repr()
1193 else:
1194 self._properties["storageDescriptor"] = value
1195
1196 def to_api_repr(self) -> dict:
1197 """Build an API representation of this object.
1198
1199 Returns:
1200 Dict[str, Any]:
1201 A dictionary in the format used by the BigQuery API.
1202 """
1203
1204 return self._properties
1205
1206 @classmethod
1207 def from_api_repr(cls, api_repr: dict) -> ExternalCatalogTableOptions:
1208 """Factory: constructs an instance of the class (cls)
1209 given its API representation.
1210
1211 Args:
1212 api_repr (Dict[str, Any]):
1213 API representation of the object to be instantiated.
1214
1215 Returns:
1216 An instance of the class initialized with data from 'api_repr'.
1217 """
1218 config = cls()
1219 config._properties = api_repr
1220 return config