1# Copyright 2015 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
15"""Schemas for BigQuery tables / queries."""
16
17from __future__ import annotations
18import enum
19import typing
20from typing import Any, cast, Dict, Iterable, Optional, Union, Sequence
21
22from google.cloud.bigquery import _helpers
23from google.cloud.bigquery import standard_sql
24from google.cloud.bigquery import enums
25from google.cloud.bigquery.enums import StandardSqlTypeNames
26
27
28_STRUCT_TYPES = ("RECORD", "STRUCT")
29
30# SQL types reference:
31# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
32# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
33LEGACY_TO_STANDARD_TYPES = {
34 "STRING": StandardSqlTypeNames.STRING,
35 "BYTES": StandardSqlTypeNames.BYTES,
36 "INTEGER": StandardSqlTypeNames.INT64,
37 "INT64": StandardSqlTypeNames.INT64,
38 "FLOAT": StandardSqlTypeNames.FLOAT64,
39 "FLOAT64": StandardSqlTypeNames.FLOAT64,
40 "NUMERIC": StandardSqlTypeNames.NUMERIC,
41 "BIGNUMERIC": StandardSqlTypeNames.BIGNUMERIC,
42 "BOOLEAN": StandardSqlTypeNames.BOOL,
43 "BOOL": StandardSqlTypeNames.BOOL,
44 "GEOGRAPHY": StandardSqlTypeNames.GEOGRAPHY,
45 "RECORD": StandardSqlTypeNames.STRUCT,
46 "STRUCT": StandardSqlTypeNames.STRUCT,
47 "TIMESTAMP": StandardSqlTypeNames.TIMESTAMP,
48 "DATE": StandardSqlTypeNames.DATE,
49 "TIME": StandardSqlTypeNames.TIME,
50 "DATETIME": StandardSqlTypeNames.DATETIME,
51 "FOREIGN": StandardSqlTypeNames.FOREIGN,
52 # no direct conversion from ARRAY, the latter is represented by mode="REPEATED"
53}
54"""String names of the legacy SQL types to integer codes of Standard SQL standard_sql."""
55
56
57class _DefaultSentinel(enum.Enum):
58 """Object used as 'sentinel' indicating default value should be used.
59
60 Uses enum so that pytype/mypy knows that this is the only possible value.
61 https://stackoverflow.com/a/60605919/101923
62
63 Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8.
64 https://docs.python.org/3/library/typing.html#typing.Literal
65 """
66
67 DEFAULT_VALUE = object()
68
69
70_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE
71
72
73class FieldElementType(object):
74 """Represents the type of a field element.
75
76 Args:
77 element_type (str): The type of a field element.
78 """
79
80 def __init__(self, element_type: str):
81 self._properties = {}
82 self._properties["type"] = element_type.upper()
83
84 @property
85 def element_type(self):
86 return self._properties.get("type")
87
88 @classmethod
89 def from_api_repr(cls, api_repr: Optional[dict]) -> Optional["FieldElementType"]:
90 """Factory: construct a FieldElementType given its API representation.
91
92 Args:
93 api_repr (Dict[str, str]): field element type as returned from
94 the API.
95
96 Returns:
97 google.cloud.bigquery.FieldElementType:
98 Python object, as parsed from ``api_repr``.
99 """
100 if not api_repr:
101 return None
102 return cls(api_repr["type"].upper())
103
104 def to_api_repr(self) -> dict:
105 """Construct the API resource representation of this field element type.
106
107 Returns:
108 Dict[str, str]: Field element type represented as an API resource.
109 """
110 return self._properties
111
112
113class SchemaField(object):
114 """Describe a single field within a table schema.
115
116 Args:
117 name: The name of the field.
118
119 field_type:
120 The type of the field. See
121 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
122
123 mode:
124 Defaults to ``'NULLABLE'``. The mode of the field. See
125 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
126
127 description: Description for the field.
128
129 fields: Subfields (requires ``field_type`` of 'RECORD').
130
131 policy_tags: The policy tag list for the field.
132
133 precision:
134 Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type.
135
136 scale:
137 Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type.
138
139 max_length: Maximum length of fields with STRING or BYTES type.
140
141 default_value_expression: str, Optional
142 Used to specify the default value of a field using a SQL expression. It can only be set for
143 top level fields (columns).
144
145 You can use a struct or array expression to specify default value for the entire struct or
146 array. The valid SQL expressions are:
147
148 - Literals for all data types, including STRUCT and ARRAY.
149
150 - The following functions:
151
152 `CURRENT_TIMESTAMP`
153 `CURRENT_TIME`
154 `CURRENT_DATE`
155 `CURRENT_DATETIME`
156 `GENERATE_UUID`
157 `RAND`
158 `SESSION_USER`
159 `ST_GEOPOINT`
160
161 - Struct or array composed with the above allowed functions, for example:
162
163 "[CURRENT_DATE(), DATE '2020-01-01'"]
164
165 range_element_type: FieldElementType, str, Optional
166 The subtype of the RANGE, if the type of this field is RANGE. If
167 the type is RANGE, this field is required. Possible values for the
168 field element type of a RANGE include `DATE`, `DATETIME` and
169 `TIMESTAMP`.
170
171 rounding_mode: Union[enums.RoundingMode, str, None]
172 Specifies the rounding mode to be used when storing values of
173 NUMERIC and BIGNUMERIC type.
174
175 Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO.
176 ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero
177 when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
178 values.
179
180 For Scale: 0
181 1.1, 1.2, 1.3, 1.4 => 1
182 1.5, 1.6, 1.7, 1.8, 1.9 => 2
183
184 ROUND_HALF_EVEN rounds half values to the nearest even value
185 when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
186 values.
187
188 For Scale: 0
189 1.1, 1.2, 1.3, 1.4 => 1
190 1.5 => 2
191 1.6, 1.7, 1.8, 1.9 => 2
192 2.5 => 2
193
194 foreign_type_definition: Optional[str]
195 Definition of the foreign data type.
196
197 Only valid for top-level schema fields (not nested fields).
198 If the type is FOREIGN, this field is required.
199 """
200
201 def __init__(
202 self,
203 name: str,
204 field_type: str,
205 mode: str = "NULLABLE",
206 default_value_expression: Optional[str] = None,
207 description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE,
208 fields: Iterable["SchemaField"] = (),
209 policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE,
210 precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
211 scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
212 max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
213 range_element_type: Union[FieldElementType, str, None] = None,
214 rounding_mode: Union[enums.RoundingMode, str, None] = None,
215 foreign_type_definition: Optional[str] = None,
216 ):
217 self._properties: Dict[str, Any] = {
218 "name": name,
219 "type": field_type,
220 }
221 self._properties["name"] = name
222 if mode is not None:
223 self._properties["mode"] = mode.upper()
224 if description is not _DEFAULT_VALUE:
225 self._properties["description"] = description
226 if default_value_expression is not None:
227 self._properties["defaultValueExpression"] = default_value_expression
228 if precision is not _DEFAULT_VALUE:
229 self._properties["precision"] = precision
230 if scale is not _DEFAULT_VALUE:
231 self._properties["scale"] = scale
232 if max_length is not _DEFAULT_VALUE:
233 self._properties["maxLength"] = max_length
234 if policy_tags is not _DEFAULT_VALUE:
235 self._properties["policyTags"] = (
236 policy_tags.to_api_repr()
237 if isinstance(policy_tags, PolicyTagList)
238 else None
239 )
240 if isinstance(range_element_type, str):
241 self._properties["rangeElementType"] = {"type": range_element_type}
242 if isinstance(range_element_type, FieldElementType):
243 self._properties["rangeElementType"] = range_element_type.to_api_repr()
244 if rounding_mode is not None:
245 self._properties["roundingMode"] = rounding_mode
246 if foreign_type_definition is not None:
247 self._properties["foreignTypeDefinition"] = foreign_type_definition
248
249 if fields: # Don't set the property if it's not set.
250 self._properties["fields"] = [field.to_api_repr() for field in fields]
251
252 @classmethod
253 def from_api_repr(cls, api_repr: dict) -> "SchemaField":
254 """Return a ``SchemaField`` object deserialized from a dictionary.
255
256 Args:
257 api_repr (Mapping[str, str]): The serialized representation
258 of the SchemaField, such as what is output by
259 :meth:`to_api_repr`.
260
261 Returns:
262 google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object.
263 """
264 placeholder = cls("this_will_be_replaced", "PLACEHOLDER")
265
266 # Note: we don't make a copy of api_repr because this can cause
267 # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
268 # fields. See https://github.com/googleapis/python-bigquery/issues/6
269 placeholder._properties = api_repr
270
271 # Add the field `mode` with default value if it does not exist. Fixes
272 # an incompatibility issue with pandas-gbq:
273 # https://github.com/googleapis/python-bigquery-pandas/issues/854
274 if "mode" not in placeholder._properties:
275 placeholder._properties["mode"] = "NULLABLE"
276
277 return placeholder
278
279 @property
280 def name(self):
281 """str: The name of the field."""
282 return self._properties.get("name", "")
283
284 @property
285 def field_type(self) -> str:
286 """str: The type of the field.
287
288 See:
289 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
290 """
291 type_ = self._properties.get("type")
292 return cast(str, type_).upper()
293
294 @property
295 def mode(self):
296 """Optional[str]: The mode of the field.
297
298 See:
299 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
300 """
301 return cast(str, self._properties.get("mode", "NULLABLE")).upper()
302
303 @property
304 def is_nullable(self):
305 """bool: whether 'mode' is 'nullable'."""
306 return self.mode == "NULLABLE"
307
308 @property
309 def default_value_expression(self):
310 """Optional[str] default value of a field, using an SQL expression"""
311 return self._properties.get("defaultValueExpression")
312
313 @property
314 def description(self):
315 """Optional[str]: description for the field."""
316 return self._properties.get("description")
317
318 @property
319 def precision(self):
320 """Optional[int]: Precision (number of digits) for the NUMERIC field."""
321 return _helpers._int_or_none(self._properties.get("precision"))
322
323 @property
324 def scale(self):
325 """Optional[int]: Scale (digits after decimal) for the NUMERIC field."""
326 return _helpers._int_or_none(self._properties.get("scale"))
327
328 @property
329 def max_length(self):
330 """Optional[int]: Maximum length for the STRING or BYTES field."""
331 return _helpers._int_or_none(self._properties.get("maxLength"))
332
333 @property
334 def range_element_type(self):
335 """Optional[FieldElementType]: The subtype of the RANGE, if the
336 type of this field is RANGE.
337
338 Must be set when ``type`` is `"RANGE"`. Must be one of `"DATE"`,
339 `"DATETIME"` or `"TIMESTAMP"`.
340 """
341 if self._properties.get("rangeElementType"):
342 ret = self._properties.get("rangeElementType")
343 return FieldElementType.from_api_repr(ret)
344
345 @property
346 def rounding_mode(self):
347 """Enum that specifies the rounding mode to be used when storing values of
348 NUMERIC and BIGNUMERIC type.
349 """
350 return self._properties.get("roundingMode")
351
352 @property
353 def foreign_type_definition(self):
354 """Definition of the foreign data type.
355
356 Only valid for top-level schema fields (not nested fields).
357 If the type is FOREIGN, this field is required.
358 """
359 return self._properties.get("foreignTypeDefinition")
360
361 @property
362 def fields(self):
363 """Optional[tuple]: Subfields contained in this field.
364
365 Must be empty unset if ``field_type`` is not 'RECORD'.
366 """
367 return tuple(_to_schema_fields(self._properties.get("fields", [])))
368
369 @property
370 def policy_tags(self):
371 """Optional[google.cloud.bigquery.schema.PolicyTagList]: Policy tag list
372 definition for this field.
373 """
374 resource = self._properties.get("policyTags")
375 return PolicyTagList.from_api_repr(resource) if resource is not None else None
376
377 def to_api_repr(self) -> dict:
378 """Return a dictionary representing this schema field.
379
380 Returns:
381 Dict: A dictionary representing the SchemaField in a serialized form.
382 """
383 # Note: we don't make a copy of _properties because this can cause
384 # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
385 # fields. See https://github.com/googleapis/python-bigquery/issues/6
386 return self._properties
387
388 def _key(self):
389 """A tuple key that uniquely describes this field.
390
391 Used to compute this instance's hashcode and evaluate equality.
392
393 Returns:
394 Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`.
395 """
396 field_type = self.field_type
397 if field_type == "STRING" or field_type == "BYTES":
398 if self.max_length is not None:
399 field_type = f"{field_type}({self.max_length})"
400 elif field_type.endswith("NUMERIC"):
401 if self.precision is not None:
402 if self.scale is not None:
403 field_type = f"{field_type}({self.precision}, {self.scale})"
404 else:
405 field_type = f"{field_type}({self.precision})"
406
407 policy_tags = (
408 None if self.policy_tags is None else tuple(sorted(self.policy_tags.names))
409 )
410
411 return (
412 self.name,
413 field_type,
414 # Mode is always str, if not given it defaults to a str value
415 self.mode.upper(), # pytype: disable=attribute-error
416 self.default_value_expression,
417 self.description,
418 self.fields,
419 policy_tags,
420 )
421
422 def to_standard_sql(self) -> standard_sql.StandardSqlField:
423 """Return the field as the standard SQL field representation object."""
424 sql_type = standard_sql.StandardSqlDataType()
425
426 if self.mode == "REPEATED":
427 sql_type.type_kind = StandardSqlTypeNames.ARRAY
428 else:
429 sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get(
430 self.field_type,
431 StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
432 )
433
434 if sql_type.type_kind == StandardSqlTypeNames.ARRAY: # noqa: E721
435 array_element_type = LEGACY_TO_STANDARD_TYPES.get(
436 self.field_type,
437 StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
438 )
439 sql_type.array_element_type = standard_sql.StandardSqlDataType(
440 type_kind=array_element_type
441 )
442
443 # ARRAY cannot directly contain other arrays, only scalar types and STRUCTs
444 # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type
445 if array_element_type == StandardSqlTypeNames.STRUCT: # noqa: E721
446 sql_type.array_element_type.struct_type = (
447 standard_sql.StandardSqlStructType(
448 fields=(field.to_standard_sql() for field in self.fields)
449 )
450 )
451 elif sql_type.type_kind == StandardSqlTypeNames.STRUCT: # noqa: E721
452 sql_type.struct_type = standard_sql.StandardSqlStructType(
453 fields=(field.to_standard_sql() for field in self.fields)
454 )
455
456 return standard_sql.StandardSqlField(name=self.name, type=sql_type)
457
458 def __eq__(self, other):
459 if not isinstance(other, SchemaField):
460 return NotImplemented
461 return self._key() == other._key()
462
463 def __ne__(self, other):
464 return not self == other
465
466 def __hash__(self):
467 return hash(self._key())
468
469 def __repr__(self):
470 key = self._key()
471 policy_tags = key[-1]
472 policy_tags_inst = None if policy_tags is None else PolicyTagList(policy_tags)
473 adjusted_key = key[:-1] + (policy_tags_inst,)
474 return f"{self.__class__.__name__}{adjusted_key}"
475
476
477def _parse_schema_resource(info):
478 """Parse a resource fragment into a schema field.
479
480 Args:
481 info: (Mapping[str, Dict]): should contain a "fields" key to be parsed
482
483 Returns:
484 Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]:
485 A list of parsed fields, or ``None`` if no "fields" key found.
486 """
487 if isinstance(info, list):
488 return [SchemaField.from_api_repr(f) for f in info]
489 return [SchemaField.from_api_repr(f) for f in info.get("fields", ())]
490
491
492def _build_schema_resource(fields):
493 """Generate a resource fragment for a schema.
494
495 Args:
496 fields (Sequence[google.cloud.bigquery.schema.SchemaField): schema to be dumped.
497
498 Returns:
499 Sequence[Dict]: Mappings describing the schema of the supplied fields.
500 """
501 if isinstance(fields, Sequence):
502 # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
503 return [field.to_api_repr() for field in fields]
504
505 else:
506 raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
507
508
509def _to_schema_fields(schema):
510 """Coerces schema to a list of SchemaField instances while
511 preserving the original structure as much as possible.
512
513 Args:
514 schema (Sequence[Union[ \
515 :class:`~google.cloud.bigquery.schema.SchemaField`, \
516 Mapping[str, Any] \
517 ]
518 ]
519 )::
520 Table schema to convert. Can be a list of SchemaField
521 objects or mappings.
522
523 Returns:
524 A list of SchemaField objects.
525
526 Raises:
527 TypeError: If schema is not a Sequence.
528 """
529
530 if isinstance(schema, Sequence):
531 # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
532 return [
533 field
534 if isinstance(field, SchemaField)
535 else SchemaField.from_api_repr(field)
536 for field in schema
537 ]
538
539 else:
540 raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
541
542
543class PolicyTagList(object):
544 """Define Policy Tags for a column.
545
546 Args:
547 names (
548 Optional[Tuple[str]]): list of policy tags to associate with
549 the column. Policy tag identifiers are of the form
550 `projects/*/locations/*/taxonomies/*/policyTags/*`.
551 """
552
553 def __init__(self, names: Iterable[str] = ()):
554 self._properties = {}
555 self._properties["names"] = tuple(names)
556
557 @property
558 def names(self):
559 """Tuple[str]: Policy tags associated with this definition."""
560 return self._properties.get("names", ())
561
562 def _key(self):
563 """A tuple key that uniquely describes this PolicyTagList.
564
565 Used to compute this instance's hashcode and evaluate equality.
566
567 Returns:
568 Tuple: The contents of this :class:`~google.cloud.bigquery.schema.PolicyTagList`.
569 """
570 return tuple(sorted(self._properties.get("names", ())))
571
572 def __eq__(self, other):
573 if not isinstance(other, PolicyTagList):
574 return NotImplemented
575 return self._key() == other._key()
576
577 def __ne__(self, other):
578 return not self == other
579
580 def __hash__(self):
581 return hash(self._key())
582
583 def __repr__(self):
584 return f"{self.__class__.__name__}(names={self._key()})"
585
586 @classmethod
587 def from_api_repr(cls, api_repr: dict) -> "PolicyTagList":
588 """Return a :class:`PolicyTagList` object deserialized from a dict.
589
590 This method creates a new ``PolicyTagList`` instance that points to
591 the ``api_repr`` parameter as its internal properties dict. This means
592 that when a ``PolicyTagList`` instance is stored as a property of
593 another object, any changes made at the higher level will also appear
594 here.
595
596 Args:
597 api_repr (Mapping[str, str]):
598 The serialized representation of the PolicyTagList, such as
599 what is output by :meth:`to_api_repr`.
600
601 Returns:
602 Optional[google.cloud.bigquery.schema.PolicyTagList]:
603 The ``PolicyTagList`` object or None.
604 """
605 if api_repr is None:
606 return None
607 names = api_repr.get("names", ())
608 return cls(names=names)
609
610 def to_api_repr(self) -> dict:
611 """Return a dictionary representing this object.
612
613 This method returns the properties dict of the ``PolicyTagList``
614 instance rather than making a copy. This means that when a
615 ``PolicyTagList`` instance is stored as a property of another
616 object, any changes made at the higher level will also appear here.
617
618 Returns:
619 dict:
620 A dictionary representing the PolicyTagList object in
621 serialized form.
622 """
623 answer = {"names": list(self.names)}
624 return answer
625
626
627class ForeignTypeInfo:
628 """Metadata about the foreign data type definition such as the system in which the
629 type is defined.
630
631 Args:
632 type_system (str): Required. Specifies the system which defines the
633 foreign data type.
634
635 TypeSystem enum currently includes:
636 * "TYPE_SYSTEM_UNSPECIFIED"
637 * "HIVE"
638 """
639
640 def __init__(self, type_system: Optional[str] = None):
641 self._properties: Dict[str, Any] = {}
642 self.type_system = type_system
643
644 @property
645 def type_system(self) -> Optional[str]:
646 """Required. Specifies the system which defines the foreign data
647 type."""
648
649 return self._properties.get("typeSystem")
650
651 @type_system.setter
652 def type_system(self, value: Optional[str]):
653 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
654 self._properties["typeSystem"] = value
655
656 def to_api_repr(self) -> dict:
657 """Build an API representation of this object.
658
659 Returns:
660 Dict[str, Any]:
661 A dictionary in the format used by the BigQuery API.
662 """
663
664 return self._properties
665
666 @classmethod
667 def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignTypeInfo":
668 """Factory: constructs an instance of the class (cls)
669 given its API representation.
670
671 Args:
672 api_repr (Dict[str, Any]):
673 API representation of the object to be instantiated.
674
675 Returns:
676 An instance of the class initialized with data from 'api_repr'.
677 """
678
679 config = cls()
680 config._properties = api_repr
681 return config
682
683
684class SerDeInfo:
685 """Serializer and deserializer information.
686
687 Args:
688 serialization_library (str): Required. Specifies a fully-qualified class
689 name of the serialization library that is responsible for the
690 translation of data between table representation and the underlying
691 low-level input and output format structures. The maximum length is
692 256 characters.
693 name (Optional[str]): Name of the SerDe. The maximum length is 256
694 characters.
695 parameters: (Optional[dict[str, str]]): Key-value pairs that define the initialization
696 parameters for the serialization library. Maximum size 10 Kib.
697 """
698
699 def __init__(
700 self,
701 serialization_library: str,
702 name: Optional[str] = None,
703 parameters: Optional[dict[str, str]] = None,
704 ):
705 self._properties: Dict[str, Any] = {}
706 self.serialization_library = serialization_library
707 self.name = name
708 self.parameters = parameters
709
710 @property
711 def serialization_library(self) -> str:
712 """Required. Specifies a fully-qualified class name of the serialization
713 library that is responsible for the translation of data between table
714 representation and the underlying low-level input and output format
715 structures. The maximum length is 256 characters."""
716
717 return typing.cast(str, self._properties.get("serializationLibrary"))
718
719 @serialization_library.setter
720 def serialization_library(self, value: str):
721 value = _helpers._isinstance_or_raise(value, str, none_allowed=False)
722 self._properties["serializationLibrary"] = value
723
724 @property
725 def name(self) -> Optional[str]:
726 """Optional. Name of the SerDe. The maximum length is 256 characters."""
727
728 return self._properties.get("name")
729
730 @name.setter
731 def name(self, value: Optional[str] = None):
732 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
733 self._properties["name"] = value
734
735 @property
736 def parameters(self) -> Optional[dict[str, str]]:
737 """Optional. Key-value pairs that define the initialization parameters
738 for the serialization library. Maximum size 10 Kib."""
739
740 return self._properties.get("parameters")
741
742 @parameters.setter
743 def parameters(self, value: Optional[dict[str, str]] = None):
744 value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
745 self._properties["parameters"] = value
746
747 def to_api_repr(self) -> dict:
748 """Build an API representation of this object.
749
750 Returns:
751 Dict[str, Any]:
752 A dictionary in the format used by the BigQuery API.
753 """
754 return self._properties
755
756 @classmethod
757 def from_api_repr(cls, api_repr: dict) -> SerDeInfo:
758 """Factory: constructs an instance of the class (cls)
759 given its API representation.
760
761 Args:
762 api_repr (Dict[str, Any]):
763 API representation of the object to be instantiated.
764
765 Returns:
766 An instance of the class initialized with data from 'api_repr'.
767 """
768 config = cls("PLACEHOLDER")
769 config._properties = api_repr
770 return config
771
772
773class StorageDescriptor:
774 """Contains information about how a table's data is stored and accessed by open
775 source query engines.
776
777 Args:
778 input_format (Optional[str]): Specifies the fully qualified class name of
779 the InputFormat (e.g.
780 "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum
781 length is 128 characters.
782 location_uri (Optional[str]): The physical location of the table (e.g.
783 'gs://spark-dataproc-data/pangea-data/case_sensitive/' or
784 'gs://spark-dataproc-data/pangea-data/'). The maximum length is
785 2056 bytes.
786 output_format (Optional[str]): Specifies the fully qualified class name
787 of the OutputFormat (e.g.
788 "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). The maximum
789 length is 128 characters.
790 serde_info (Union[SerDeInfo, dict, None]): Serializer and deserializer information.
791 """
792
793 def __init__(
794 self,
795 input_format: Optional[str] = None,
796 location_uri: Optional[str] = None,
797 output_format: Optional[str] = None,
798 serde_info: Union[SerDeInfo, dict, None] = None,
799 ):
800 self._properties: Dict[str, Any] = {}
801 self.input_format = input_format
802 self.location_uri = location_uri
803 self.output_format = output_format
804 # Using typing.cast() because mypy cannot wrap it's head around the fact that:
805 # the setter can accept Union[SerDeInfo, dict, None]
806 # but the getter will only ever return Optional[SerDeInfo].
807 self.serde_info = typing.cast(Optional[SerDeInfo], serde_info)
808
809 @property
810 def input_format(self) -> Optional[str]:
811 """Optional. Specifies the fully qualified class name of the InputFormat
812 (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum
813 length is 128 characters."""
814
815 return self._properties.get("inputFormat")
816
817 @input_format.setter
818 def input_format(self, value: Optional[str]):
819 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
820 self._properties["inputFormat"] = value
821
822 @property
823 def location_uri(self) -> Optional[str]:
824 """Optional. The physical location of the table (e.g. 'gs://spark-
825 dataproc-data/pangea-data/case_sensitive/' or 'gs://spark-dataproc-
826 data/pangea-data/'). The maximum length is 2056 bytes."""
827
828 return self._properties.get("locationUri")
829
830 @location_uri.setter
831 def location_uri(self, value: Optional[str]):
832 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
833 self._properties["locationUri"] = value
834
835 @property
836 def output_format(self) -> Optional[str]:
837 """Optional. Specifies the fully qualified class name of the
838 OutputFormat (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat").
839 The maximum length is 128 characters."""
840
841 return self._properties.get("outputFormat")
842
843 @output_format.setter
844 def output_format(self, value: Optional[str]):
845 value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
846 self._properties["outputFormat"] = value
847
848 @property
849 def serde_info(self) -> Optional[SerDeInfo]:
850 """Optional. Serializer and deserializer information."""
851
852 prop = _helpers._get_sub_prop(self._properties, ["serDeInfo"])
853 if prop is not None:
854 return typing.cast(SerDeInfo, SerDeInfo.from_api_repr(prop))
855 return None
856
857 @serde_info.setter
858 def serde_info(self, value: Union[SerDeInfo, dict, None]):
859 value = _helpers._isinstance_or_raise(
860 value, (SerDeInfo, dict), none_allowed=True
861 )
862
863 if isinstance(value, SerDeInfo):
864 self._properties["serDeInfo"] = value.to_api_repr()
865 else:
866 self._properties["serDeInfo"] = value
867
868 def to_api_repr(self) -> dict:
869 """Build an API representation of this object.
870 Returns:
871 Dict[str, Any]:
872 A dictionary in the format used by the BigQuery API.
873 """
874 return self._properties
875
876 @classmethod
877 def from_api_repr(cls, resource: dict) -> StorageDescriptor:
878 """Factory: constructs an instance of the class (cls)
879 given its API representation.
880 Args:
881 resource (Dict[str, Any]):
882 API representation of the object to be instantiated.
883 Returns:
884 An instance of the class initialized with data from 'resource'.
885 """
886 config = cls()
887 config._properties = resource
888 return config