1# Copyright 2015 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Schemas for BigQuery tables / queries."""
16
17from __future__ import annotations
18import enum
19import typing
20from typing import Any, cast, Dict, Iterable, Optional, Union, Sequence
21
22from google.cloud.bigquery import _helpers
23from google.cloud.bigquery import standard_sql
24from google.cloud.bigquery import enums
25from google.cloud.bigquery.enums import StandardSqlTypeNames
26
27
28_STRUCT_TYPES = ("RECORD", "STRUCT")
29
30# SQL types reference:
31# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
32# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
33LEGACY_TO_STANDARD_TYPES = {
34 "STRING": StandardSqlTypeNames.STRING,
35 "BYTES": StandardSqlTypeNames.BYTES,
36 "INTEGER": StandardSqlTypeNames.INT64,
37 "INT64": StandardSqlTypeNames.INT64,
38 "FLOAT": StandardSqlTypeNames.FLOAT64,
39 "FLOAT64": StandardSqlTypeNames.FLOAT64,
40 "NUMERIC": StandardSqlTypeNames.NUMERIC,
41 "BIGNUMERIC": StandardSqlTypeNames.BIGNUMERIC,
42 "BOOLEAN": StandardSqlTypeNames.BOOL,
43 "BOOL": StandardSqlTypeNames.BOOL,
44 "GEOGRAPHY": StandardSqlTypeNames.GEOGRAPHY,
45 "RECORD": StandardSqlTypeNames.STRUCT,
46 "STRUCT": StandardSqlTypeNames.STRUCT,
47 "TIMESTAMP": StandardSqlTypeNames.TIMESTAMP,
48 "DATE": StandardSqlTypeNames.DATE,
49 "TIME": StandardSqlTypeNames.TIME,
50 "DATETIME": StandardSqlTypeNames.DATETIME,
51 "FOREIGN": StandardSqlTypeNames.FOREIGN,
52 # no direct conversion from ARRAY, the latter is represented by mode="REPEATED"
53}
54"""String names of the legacy SQL types to integer codes of Standard SQL standard_sql."""
55
56
57class _DefaultSentinel(enum.Enum):
58 """Object used as 'sentinel' indicating default value should be used.
59
60 Uses enum so that pytype/mypy knows that this is the only possible value.
61 https://stackoverflow.com/a/60605919/101923
62
63 Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8.
64 https://docs.python.org/3/library/typing.html#typing.Literal
65 """
66
67 DEFAULT_VALUE = object()
68
69
70_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE
71
72
73class FieldElementType(object):
74 """Represents the type of a field element.
75
76 Args:
77 element_type (str): The type of a field element.
78 """
79
80 def __init__(self, element_type: str):
81 self._properties = {}
82 self._properties["type"] = element_type.upper()
83
84 @property
85 def element_type(self):
86 return self._properties.get("type")
87
88 @classmethod
89 def from_api_repr(cls, api_repr: Optional[dict]) -> Optional["FieldElementType"]:
90 """Factory: construct a FieldElementType given its API representation.
91
92 Args:
93 api_repr (Dict[str, str]): field element type as returned from
94 the API.
95
96 Returns:
97 google.cloud.bigquery.FieldElementType:
98 Python object, as parsed from ``api_repr``.
99 """
100 if not api_repr:
101 return None
102 return cls(api_repr["type"].upper())
103
104 def to_api_repr(self) -> dict:
105 """Construct the API resource representation of this field element type.
106
107 Returns:
108 Dict[str, str]: Field element type represented as an API resource.
109 """
110 return self._properties
111
112
113class SchemaField(object):
114 """Describe a single field within a table schema.
115
116 Args:
117 name: The name of the field.
118
119 field_type:
120 The type of the field. See
121 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
122
123 mode:
124 Defaults to ``'NULLABLE'``. The mode of the field. See
125 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
126
127 description: Description for the field.
128
129 fields: Subfields (requires ``field_type`` of 'RECORD').
130
131 policy_tags: The policy tag list for the field.
132
133 precision:
134 Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type.
135
136 scale:
137 Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type.
138
139 max_length: Maximum length of fields with STRING or BYTES type.
140
141 default_value_expression: str, Optional
142 Used to specify the default value of a field using a SQL expression. It can only be set for
143 top level fields (columns).
144
145 You can use a struct or array expression to specify default value for the entire struct or
146 array. The valid SQL expressions are:
147
148 - Literals for all data types, including STRUCT and ARRAY.
149
150 - The following functions:
151
152 `CURRENT_TIMESTAMP`
153 `CURRENT_TIME`
154 `CURRENT_DATE`
155 `CURRENT_DATETIME`
156 `GENERATE_UUID`
157 `RAND`
158 `SESSION_USER`
159 `ST_GEOPOINT`
160
161 - Struct or array composed with the above allowed functions, for example:
162
163 "[CURRENT_DATE(), DATE '2020-01-01'"]
164
165 range_element_type: FieldElementType, str, Optional
166 The subtype of the RANGE, if the type of this field is RANGE. If
167 the type is RANGE, this field is required. Possible values for the
168 field element type of a RANGE include `DATE`, `DATETIME` and
169 `TIMESTAMP`.
170
171 rounding_mode: Union[enums.RoundingMode, str, None]
172 Specifies the rounding mode to be used when storing values of
173 NUMERIC and BIGNUMERIC type.
174
175 Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO.
176 ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero
177 when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
178 values.
179
180 For Scale: 0
181 1.1, 1.2, 1.3, 1.4 => 1
182 1.5, 1.6, 1.7, 1.8, 1.9 => 2
183
184 ROUND_HALF_EVEN rounds half values to the nearest even value
185 when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
186 values.
187
188 For Scale: 0
189 1.1, 1.2, 1.3, 1.4 => 1
190 1.5 => 2
191 1.6, 1.7, 1.8, 1.9 => 2
192 2.5 => 2
193
194 foreign_type_definition: Optional[str]
195 Definition of the foreign data type.
196
197 Only valid for top-level schema fields (not nested fields).
198 If the type is FOREIGN, this field is required.
199
200 timestamp_precision: Optional[enums.TimestampPrecision]
201 Precision (maximum number of total digits in base 10) for seconds
202 of TIMESTAMP type.
203
204 Defaults to `enums.TimestampPrecision.MICROSECOND` (`None`) for
205 microsecond precision. Use `enums.TimestampPrecision.PICOSECOND`
206 (`12`) for picosecond precision.
207 """
208
209 def __init__(
210 self,
211 name: str,
212 field_type: str,
213 mode: str = "NULLABLE",
214 default_value_expression: Optional[str] = None,
215 description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE,
216 fields: Iterable["SchemaField"] = (),
217 policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE,
218 precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
219 scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
220 max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
221 range_element_type: Union[FieldElementType, str, None] = None,
222 rounding_mode: Union[enums.RoundingMode, str, None] = None,
223 foreign_type_definition: Optional[str] = None,
224 timestamp_precision: Optional[enums.TimestampPrecision] = None,
225 ):
226 self._properties: Dict[str, Any] = {
227 "name": name,
228 "type": field_type,
229 }
230 self._properties["name"] = name
231 if mode is not None:
232 self._properties["mode"] = mode.upper()
233 if description is not _DEFAULT_VALUE:
234 self._properties["description"] = description
235 if default_value_expression is not None:
236 self._properties["defaultValueExpression"] = default_value_expression
237 if precision is not _DEFAULT_VALUE:
238 self._properties["precision"] = precision
239 if scale is not _DEFAULT_VALUE:
240 self._properties["scale"] = scale
241 if max_length is not _DEFAULT_VALUE:
242 self._properties["maxLength"] = max_length
243 if policy_tags is not _DEFAULT_VALUE:
244 self._properties["policyTags"] = (
245 policy_tags.to_api_repr()
246 if isinstance(policy_tags, PolicyTagList)
247 else None
248 )
249 if isinstance(timestamp_precision, enums.TimestampPrecision):
250 self._properties["timestampPrecision"] = timestamp_precision.value
251 elif timestamp_precision is not None:
252 raise ValueError(
253 "timestamp_precision must be class enums.TimestampPrecision "
254 f"or None, got {type(timestamp_precision)} instead."
255 )
256 if isinstance(range_element_type, str):
257 self._properties["rangeElementType"] = {"type": range_element_type}
258 if isinstance(range_element_type, FieldElementType):
259 self._properties["rangeElementType"] = range_element_type.to_api_repr()
260 if rounding_mode is not None:
261 self._properties["roundingMode"] = rounding_mode
262 if foreign_type_definition is not None:
263 self._properties["foreignTypeDefinition"] = foreign_type_definition
264
265 if fields: # Don't set the property if it's not set.
266 self._properties["fields"] = [field.to_api_repr() for field in fields]
267
268 @classmethod
269 def from_api_repr(cls, api_repr: dict) -> "SchemaField":
270 """Return a ``SchemaField`` object deserialized from a dictionary.
271
272 Args:
273 api_repr (dict): The serialized representation of the SchemaField,
274 such as what is output by :meth:`to_api_repr`.
275
276 Returns:
277 google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object.
278 """
279 placeholder = cls("this_will_be_replaced", "PLACEHOLDER")
280
281 # The API would return a string despite we send an integer. To ensure
282 # success of resending received schema, we convert string to integer
283 # to ensure consistency.
284 try:
285 api_repr["timestampPrecision"] = int(api_repr["timestampPrecision"])
286 except (TypeError, KeyError):
287 pass
288
289 # Note: we don't make a copy of api_repr because this can cause
290 # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
291 # fields. See https://github.com/googleapis/python-bigquery/issues/6
292 placeholder._properties = api_repr
293
294 # Add the field `mode` with default value if it does not exist. Fixes
295 # an incompatibility issue with pandas-gbq:
296 # https://github.com/googleapis/python-bigquery-pandas/issues/854
297 if "mode" not in placeholder._properties:
298 placeholder._properties["mode"] = "NULLABLE"
299
300 return placeholder
301
302 @property
303 def name(self):
304 """str: The name of the field."""
305 return self._properties.get("name", "")
306
307 @property
308 def field_type(self) -> str:
309 """str: The type of the field.
310
311 See:
312 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
313 """
314 type_ = self._properties.get("type")
315 return cast(str, type_).upper()
316
317 @property
318 def mode(self):
319 """Optional[str]: The mode of the field.
320
321 See:
322 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
323 """
324 return cast(str, self._properties.get("mode", "NULLABLE")).upper()
325
326 @property
327 def is_nullable(self):
328 """bool: whether 'mode' is 'nullable'."""
329 return self.mode == "NULLABLE"
330
331 @property
332 def default_value_expression(self):
333 """Optional[str] default value of a field, using an SQL expression"""
334 return self._properties.get("defaultValueExpression")
335
336 @property
337 def description(self):
338 """Optional[str]: description for the field."""
339 return self._properties.get("description")
340
341 @property
342 def precision(self):
343 """Optional[int]: Precision (number of digits) for the NUMERIC field."""
344 return _helpers._int_or_none(self._properties.get("precision"))
345
346 @property
347 def scale(self):
348 """Optional[int]: Scale (digits after decimal) for the NUMERIC field."""
349 return _helpers._int_or_none(self._properties.get("scale"))
350
351 @property
352 def max_length(self):
353 """Optional[int]: Maximum length for the STRING or BYTES field."""
354 return _helpers._int_or_none(self._properties.get("maxLength"))
355
356 @property
357 def range_element_type(self):
358 """Optional[FieldElementType]: The subtype of the RANGE, if the
359 type of this field is RANGE.
360
361 Must be set when ``type`` is `"RANGE"`. Must be one of `"DATE"`,
362 `"DATETIME"` or `"TIMESTAMP"`.
363 """
364 if self._properties.get("rangeElementType"):
365 ret = self._properties.get("rangeElementType")
366 return FieldElementType.from_api_repr(ret)
367
368 @property
369 def rounding_mode(self):
370 """Enum that specifies the rounding mode to be used when storing values of
371 NUMERIC and BIGNUMERIC type.
372 """
373 return self._properties.get("roundingMode")
374
375 @property
376 def foreign_type_definition(self):
377 """Definition of the foreign data type.
378
379 Only valid for top-level schema fields (not nested fields).
380 If the type is FOREIGN, this field is required.
381 """
382 return self._properties.get("foreignTypeDefinition")
383
384 @property
385 def fields(self):
386 """Optional[tuple]: Subfields contained in this field.
387
388 Must be empty unset if ``field_type`` is not 'RECORD'.
389 """
390 return tuple(_to_schema_fields(self._properties.get("fields", [])))
391
392 @property
393 def policy_tags(self):
394 """Optional[google.cloud.bigquery.schema.PolicyTagList]: Policy tag list
395 definition for this field.
396 """
397 resource = self._properties.get("policyTags")
398 return PolicyTagList.from_api_repr(resource) if resource is not None else None
399
400 @property
401 def timestamp_precision(self) -> enums.TimestampPrecision:
402 """Precision (maximum number of total digits in base 10) for seconds of
403 TIMESTAMP type.
404
405 Returns:
406 enums.TimestampPrecision: value of TimestampPrecision.
407 """
408 return enums.TimestampPrecision(self._properties.get("timestampPrecision"))
409
410 def to_api_repr(self) -> dict:
411 """Return a dictionary representing this schema field.
412
413 Returns:
414 Dict: A dictionary representing the SchemaField in a serialized form.
415 """
416 # Note: we don't make a copy of _properties because this can cause
417 # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
418 # fields. See https://github.com/googleapis/python-bigquery/issues/6
419 return self._properties
420
421 def _key(self):
422 """A tuple key that uniquely describes this field.
423
424 Used to compute this instance's hashcode and evaluate equality.
425
426 Returns:
427 Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`.
428 """
429 field_type = self.field_type
430 if field_type == "STRING" or field_type == "BYTES":
431 if self.max_length is not None:
432 field_type = f"{field_type}({self.max_length})"
433 elif field_type.endswith("NUMERIC"):
434 if self.precision is not None:
435 if self.scale is not None:
436 field_type = f"{field_type}({self.precision}, {self.scale})"
437 else:
438 field_type = f"{field_type}({self.precision})"
439
440 policy_tags = (
441 None if self.policy_tags is None else tuple(sorted(self.policy_tags.names))
442 )
443
444 timestamp_precision = self._properties.get("timestampPrecision")
445
446 return (
447 self.name,
448 field_type,
449 # Mode is always str, if not given it defaults to a str value
450 self.mode.upper(), # pytype: disable=attribute-error
451 self.default_value_expression,
452 self.description,
453 self.fields,
454 policy_tags,
455 timestamp_precision,
456 )
457
458 def to_standard_sql(self) -> standard_sql.StandardSqlField:
459 """Return the field as the standard SQL field representation object."""
460 sql_type = standard_sql.StandardSqlDataType()
461
462 if self.mode == "REPEATED":
463 sql_type.type_kind = StandardSqlTypeNames.ARRAY
464 else:
465 sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get(
466 self.field_type,
467 StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
468 )
469
470 if sql_type.type_kind == StandardSqlTypeNames.ARRAY: # noqa: E721
471 array_element_type = LEGACY_TO_STANDARD_TYPES.get(
472 self.field_type,
473 StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
474 )
475 sql_type.array_element_type = standard_sql.StandardSqlDataType(
476 type_kind=array_element_type
477 )
478
479 # ARRAY cannot directly contain other arrays, only scalar types and STRUCTs
480 # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type
481 if array_element_type == StandardSqlTypeNames.STRUCT: # noqa: E721
482 sql_type.array_element_type.struct_type = (
483 standard_sql.StandardSqlStructType(
484 fields=(field.to_standard_sql() for field in self.fields)
485 )
486 )
487 elif sql_type.type_kind == StandardSqlTypeNames.STRUCT: # noqa: E721
488 sql_type.struct_type = standard_sql.StandardSqlStructType(
489 fields=(field.to_standard_sql() for field in self.fields)
490 )
491
492 return standard_sql.StandardSqlField(name=self.name, type=sql_type)
493
494 def __eq__(self, other):
495 if not isinstance(other, SchemaField):
496 return NotImplemented
497 return self._key() == other._key()
498
499 def __ne__(self, other):
500 return not self == other
501
502 def __hash__(self):
503 return hash(self._key())
504
505 def __repr__(self):
506 *initial_tags, policy_tags, timestamp_precision_tag = self._key()
507 policy_tags_inst = None if policy_tags is None else PolicyTagList(policy_tags)
508 adjusted_key = (*initial_tags, policy_tags_inst, timestamp_precision_tag)
509 return f"{self.__class__.__name__}{adjusted_key}"
510
511
512def _parse_schema_resource(info):
513 """Parse a resource fragment into a schema field.
514
515 Args:
516 info: (Mapping[str, Dict]): should contain a "fields" key to be parsed
517
518 Returns:
519 Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]:
520 A list of parsed fields, or ``None`` if no "fields" key found.
521 """
522 if isinstance(info, list):
523 return [SchemaField.from_api_repr(f) for f in info]
524 return [SchemaField.from_api_repr(f) for f in info.get("fields", ())]
525
526
527def _build_schema_resource(fields):
528 """Generate a resource fragment for a schema.
529
530 Args:
531 fields (Sequence[google.cloud.bigquery.schema.SchemaField): schema to be dumped.
532
533 Returns:
534 Sequence[Dict]: Mappings describing the schema of the supplied fields.
535 """
536 if isinstance(fields, Sequence):
537 # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
538 return [field.to_api_repr() for field in fields]
539
540 else:
541 raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
542
543
544def _to_schema_fields(schema):
545 """Coerces schema to a list of SchemaField instances while
546 preserving the original structure as much as possible.
547
548 Args:
549 schema (Sequence[Union[ \
550 :class:`~google.cloud.bigquery.schema.SchemaField`, \
551 Mapping[str, Any] \
552 ]
553 ]
554 )::
555 Table schema to convert. Can be a list of SchemaField
556 objects or mappings.
557
558 Returns:
559 A list of SchemaField objects.
560
561 Raises:
562 TypeError: If schema is not a Sequence.
563 """
564
565 if isinstance(schema, Sequence):
566 # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
567 return [
568 (
569 field
570 if isinstance(field, SchemaField)
571 else SchemaField.from_api_repr(field)
572 )
573 for field in schema
574 ]
575
576 else:
577 raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
578
579
580class PolicyTagList(object):
581 """Define Policy Tags for a column.
582
583 Args:
584 names (
585 Optional[Tuple[str]]): list of policy tags to associate with
586 the column. Policy tag identifiers are of the form
587 `projects/*/locations/*/taxonomies/*/policyTags/*`.
588 """
589
590 def __init__(self, names: Iterable[str] = ()):
591 self._properties = {}
592 self._properties["names"] = tuple(names)
593
594 @property
595 def names(self):
596 """Tuple[str]: Policy tags associated with this definition."""
597 return self._properties.get("names", ())
598
599 def _key(self):
600 """A tuple key that uniquely describes this PolicyTagList.
601
602 Used to compute this instance's hashcode and evaluate equality.
603
604 Returns:
605 Tuple: The contents of this :class:`~google.cloud.bigquery.schema.PolicyTagList`.
606 """
607 return tuple(sorted(self._properties.get("names", ())))
608
609 def __eq__(self, other):
610 if not isinstance(other, PolicyTagList):
611 return NotImplemented
612 return self._key() == other._key()
613
614 def __ne__(self, other):
615 return not self == other
616
617 def __hash__(self):
618 return hash(self._key())
619
620 def __repr__(self):
621 return f"{self.__class__.__name__}(names={self._key()})"
622
623 @classmethod
624 def from_api_repr(cls, api_repr: dict) -> "PolicyTagList":
625 """Return a :class:`PolicyTagList` object deserialized from a dict.
626
627 This method creates a new ``PolicyTagList`` instance that points to
628 the ``api_repr`` parameter as its internal properties dict. This means
629 that when a ``PolicyTagList`` instance is stored as a property of
630 another object, any changes made at the higher level will also appear
631 here.
632
633 Args:
634 api_repr (Mapping[str, str]):
635 The serialized representation of the PolicyTagList, such as
636 what is output by :meth:`to_api_repr`.
637
638 Returns:
639 Optional[google.cloud.bigquery.schema.PolicyTagList]:
640 The ``PolicyTagList`` object or None.
641 """
642 if api_repr is None:
643 return None
644 names = api_repr.get("names", ())
645 return cls(names=names)
646
647 def to_api_repr(self) -> dict:
648 """Return a dictionary representing this object.
649
650 This method returns the properties dict of the ``PolicyTagList``
651 instance rather than making a copy. This means that when a
652 ``PolicyTagList`` instance is stored as a property of another
653 object, any changes made at the higher level will also appear here.
654
655 Returns:
656 dict:
657 A dictionary representing the PolicyTagList object in
658 serialized form.
659 """
660 answer = {"names": list(self.names)}
661 return answer
662
663
664class ForeignTypeInfo:
665 """Metadata about the foreign data type definition such as the system in which the
666 type is defined.
667
668 Args:
669 type_system (str): Required. Specifies the system which defines the
670 foreign data type.
671
672 TypeSystem enum currently includes:
673 * "TYPE_SYSTEM_UNSPECIFIED"
674 * "HIVE"
675 """
676
677 def __init__(self, type_system: Optional[str] = None):
678 self._properties: Dict[str, Any] = {}
679 self.type_system = type_system
680
681 @property
682 def type_system(self) -> Optional[str]:
683 """Required. Specifies the system which defines the foreign data
684 type."""
685
686 return self._properties.get("typeSystem")
687
688 @type_system.setter
689 def type_system(self, value: Optional[str]):
690 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
691 self._properties["typeSystem"] = value
692
693 def to_api_repr(self) -> dict:
694 """Build an API representation of this object.
695
696 Returns:
697 Dict[str, Any]:
698 A dictionary in the format used by the BigQuery API.
699 """
700
701 return self._properties
702
703 @classmethod
704 def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignTypeInfo":
705 """Factory: constructs an instance of the class (cls)
706 given its API representation.
707
708 Args:
709 api_repr (Dict[str, Any]):
710 API representation of the object to be instantiated.
711
712 Returns:
713 An instance of the class initialized with data from 'api_repr'.
714 """
715
716 config = cls()
717 config._properties = api_repr
718 return config
719
720
721class SerDeInfo:
722 """Serializer and deserializer information.
723
724 Args:
725 serialization_library (str): Required. Specifies a fully-qualified class
726 name of the serialization library that is responsible for the
727 translation of data between table representation and the underlying
728 low-level input and output format structures. The maximum length is
729 256 characters.
730 name (Optional[str]): Name of the SerDe. The maximum length is 256
731 characters.
732 parameters: (Optional[dict[str, str]]): Key-value pairs that define the initialization
733 parameters for the serialization library. Maximum size 10 Kib.
734 """
735
736 def __init__(
737 self,
738 serialization_library: str,
739 name: Optional[str] = None,
740 parameters: Optional[dict[str, str]] = None,
741 ):
742 self._properties: Dict[str, Any] = {}
743 self.serialization_library = serialization_library
744 self.name = name
745 self.parameters = parameters
746
747 @property
748 def serialization_library(self) -> str:
749 """Required. Specifies a fully-qualified class name of the serialization
750 library that is responsible for the translation of data between table
751 representation and the underlying low-level input and output format
752 structures. The maximum length is 256 characters."""
753
754 return typing.cast(str, self._properties.get("serializationLibrary"))
755
756 @serialization_library.setter
757 def serialization_library(self, value: str):
758 value = _helpers._isinstance_or_raise(value, str, none_allowed=False)
759 self._properties["serializationLibrary"] = value
760
761 @property
762 def name(self) -> Optional[str]:
763 """Optional. Name of the SerDe. The maximum length is 256 characters."""
764
765 return self._properties.get("name")
766
767 @name.setter
768 def name(self, value: Optional[str] = None):
769 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
770 self._properties["name"] = value
771
772 @property
773 def parameters(self) -> Optional[dict[str, str]]:
774 """Optional. Key-value pairs that define the initialization parameters
775 for the serialization library. Maximum size 10 Kib."""
776
777 return self._properties.get("parameters")
778
779 @parameters.setter
780 def parameters(self, value: Optional[dict[str, str]] = None):
781 value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
782 self._properties["parameters"] = value
783
784 def to_api_repr(self) -> dict:
785 """Build an API representation of this object.
786
787 Returns:
788 Dict[str, Any]:
789 A dictionary in the format used by the BigQuery API.
790 """
791 return self._properties
792
793 @classmethod
794 def from_api_repr(cls, api_repr: dict) -> SerDeInfo:
795 """Factory: constructs an instance of the class (cls)
796 given its API representation.
797
798 Args:
799 api_repr (Dict[str, Any]):
800 API representation of the object to be instantiated.
801
802 Returns:
803 An instance of the class initialized with data from 'api_repr'.
804 """
805 config = cls("PLACEHOLDER")
806 config._properties = api_repr
807 return config
808
809
810class StorageDescriptor:
811 """Contains information about how a table's data is stored and accessed by open
812 source query engines.
813
814 Args:
815 input_format (Optional[str]): Specifies the fully qualified class name of
816 the InputFormat (e.g.
817 "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum
818 length is 128 characters.
819 location_uri (Optional[str]): The physical location of the table (e.g.
820 'gs://spark-dataproc-data/pangea-data/case_sensitive/' or
821 'gs://spark-dataproc-data/pangea-data/'). The maximum length is
822 2056 bytes.
823 output_format (Optional[str]): Specifies the fully qualified class name
824 of the OutputFormat (e.g.
825 "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). The maximum
826 length is 128 characters.
827 serde_info (Union[SerDeInfo, dict, None]): Serializer and deserializer information.
828 """
829
830 def __init__(
831 self,
832 input_format: Optional[str] = None,
833 location_uri: Optional[str] = None,
834 output_format: Optional[str] = None,
835 serde_info: Union[SerDeInfo, dict, None] = None,
836 ):
837 self._properties: Dict[str, Any] = {}
838 self.input_format = input_format
839 self.location_uri = location_uri
840 self.output_format = output_format
841 # Using typing.cast() because mypy cannot wrap it's head around the fact that:
842 # the setter can accept Union[SerDeInfo, dict, None]
843 # but the getter will only ever return Optional[SerDeInfo].
844 self.serde_info = typing.cast(Optional[SerDeInfo], serde_info)
845
846 @property
847 def input_format(self) -> Optional[str]:
848 """Optional. Specifies the fully qualified class name of the InputFormat
849 (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum
850 length is 128 characters."""
851
852 return self._properties.get("inputFormat")
853
854 @input_format.setter
855 def input_format(self, value: Optional[str]):
856 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
857 self._properties["inputFormat"] = value
858
859 @property
860 def location_uri(self) -> Optional[str]:
861 """Optional. The physical location of the table (e.g. 'gs://spark-
862 dataproc-data/pangea-data/case_sensitive/' or 'gs://spark-dataproc-
863 data/pangea-data/'). The maximum length is 2056 bytes."""
864
865 return self._properties.get("locationUri")
866
867 @location_uri.setter
868 def location_uri(self, value: Optional[str]):
869 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
870 self._properties["locationUri"] = value
871
872 @property
873 def output_format(self) -> Optional[str]:
874 """Optional. Specifies the fully qualified class name of the
875 OutputFormat (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat").
876 The maximum length is 128 characters."""
877
878 return self._properties.get("outputFormat")
879
880 @output_format.setter
881 def output_format(self, value: Optional[str]):
882 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
883 self._properties["outputFormat"] = value
884
885 @property
886 def serde_info(self) -> Optional[SerDeInfo]:
887 """Optional. Serializer and deserializer information."""
888
889 prop = _helpers._get_sub_prop(self._properties, ["serDeInfo"])
890 if prop is not None:
891 return typing.cast(SerDeInfo, SerDeInfo.from_api_repr(prop))
892 return None
893
894 @serde_info.setter
895 def serde_info(self, value: Union[SerDeInfo, dict, None]):
896 value = _helpers._isinstance_or_raise(
897 value, (SerDeInfo, dict), none_allowed=True
898 )
899
900 if isinstance(value, SerDeInfo):
901 self._properties["serDeInfo"] = value.to_api_repr()
902 else:
903 self._properties["serDeInfo"] = value
904
905 def to_api_repr(self) -> dict:
906 """Build an API representation of this object.
907 Returns:
908 Dict[str, Any]:
909 A dictionary in the format used by the BigQuery API.
910 """
911 return self._properties
912
913 @classmethod
914 def from_api_repr(cls, resource: dict) -> StorageDescriptor:
915 """Factory: constructs an instance of the class (cls)
916 given its API representation.
917 Args:
918 resource (Dict[str, Any]):
919 API representation of the object to be instantiated.
920 Returns:
921 An instance of the class initialized with data from 'resource'.
922 """
923 config = cls()
924 config._properties = resource
925 return config