Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/cloud/bigquery/schema.py: 38%
162 statements
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 06:07 +0000
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 06:07 +0000
1# Copyright 2015 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15"""Schemas for BigQuery tables / queries."""
17import collections
18import enum
19from typing import Any, Dict, Iterable, Union
21from google.cloud.bigquery import standard_sql
22from google.cloud.bigquery.enums import StandardSqlTypeNames
25_STRUCT_TYPES = ("RECORD", "STRUCT")
27# SQL types reference:
28# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
29# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
30LEGACY_TO_STANDARD_TYPES = {
31 "STRING": StandardSqlTypeNames.STRING,
32 "BYTES": StandardSqlTypeNames.BYTES,
33 "INTEGER": StandardSqlTypeNames.INT64,
34 "INT64": StandardSqlTypeNames.INT64,
35 "FLOAT": StandardSqlTypeNames.FLOAT64,
36 "FLOAT64": StandardSqlTypeNames.FLOAT64,
37 "NUMERIC": StandardSqlTypeNames.NUMERIC,
38 "BIGNUMERIC": StandardSqlTypeNames.BIGNUMERIC,
39 "BOOLEAN": StandardSqlTypeNames.BOOL,
40 "BOOL": StandardSqlTypeNames.BOOL,
41 "GEOGRAPHY": StandardSqlTypeNames.GEOGRAPHY,
42 "RECORD": StandardSqlTypeNames.STRUCT,
43 "STRUCT": StandardSqlTypeNames.STRUCT,
44 "TIMESTAMP": StandardSqlTypeNames.TIMESTAMP,
45 "DATE": StandardSqlTypeNames.DATE,
46 "TIME": StandardSqlTypeNames.TIME,
47 "DATETIME": StandardSqlTypeNames.DATETIME,
48 # no direct conversion from ARRAY, the latter is represented by mode="REPEATED"
49}
50"""String names of the legacy SQL types to integer codes of Standard SQL standard_sql."""
53class _DefaultSentinel(enum.Enum):
54 """Object used as 'sentinel' indicating default value should be used.
56 Uses enum so that pytype/mypy knows that this is the only possible value.
57 https://stackoverflow.com/a/60605919/101923
59 Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8.
60 https://docs.python.org/3/library/typing.html#typing.Literal
61 """
63 DEFAULT_VALUE = object()
66_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE
69class SchemaField(object):
70 """Describe a single field within a table schema.
72 Args:
73 name: The name of the field.
75 field_type:
76 The type of the field. See
77 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
79 mode:
80 Defaults to ``'NULLABLE'``. The mode of the field. See
81 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
83 description: Description for the field.
85 fields: Subfields (requires ``field_type`` of 'RECORD').
87 policy_tags: The policy tag list for the field.
89 precision:
90 Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type.
92 scale:
93 Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type.
95 max_length: Maximum length of fields with STRING or BYTES type.
97 default_value_expression: str, Optional
98 Used to specify the default value of a field using a SQL expression. It can only be set for
99 top level fields (columns).
101 You can use a struct or array expression to specify default value for the entire struct or
102 array. The valid SQL expressions are:
104 - Literals for all data types, including STRUCT and ARRAY.
106 - The following functions:
108 `CURRENT_TIMESTAMP`
109 `CURRENT_TIME`
110 `CURRENT_DATE`
111 `CURRENT_DATETIME`
112 `GENERATE_UUID`
113 `RAND`
114 `SESSION_USER`
115 `ST_GEOPOINT`
117 - Struct or array composed with the above allowed functions, for example:
119 "[CURRENT_DATE(), DATE '2020-01-01'"]
120 """
122 def __init__(
123 self,
124 name: str,
125 field_type: str,
126 mode: str = "NULLABLE",
127 default_value_expression: str = None,
128 description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE,
129 fields: Iterable["SchemaField"] = (),
130 policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE,
131 precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
132 scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
133 max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
134 ):
135 self._properties: Dict[str, Any] = {
136 "name": name,
137 "type": field_type,
138 }
139 if mode is not None:
140 self._properties["mode"] = mode.upper()
141 if description is not _DEFAULT_VALUE:
142 self._properties["description"] = description
143 if default_value_expression is not None:
144 self._properties["defaultValueExpression"] = default_value_expression
145 if precision is not _DEFAULT_VALUE:
146 self._properties["precision"] = precision
147 if scale is not _DEFAULT_VALUE:
148 self._properties["scale"] = scale
149 if max_length is not _DEFAULT_VALUE:
150 self._properties["maxLength"] = max_length
151 if policy_tags is not _DEFAULT_VALUE:
152 self._properties["policyTags"] = (
153 policy_tags.to_api_repr() if policy_tags is not None else None
154 )
155 self._fields = tuple(fields)
157 @staticmethod
158 def __get_int(api_repr, name):
159 v = api_repr.get(name, _DEFAULT_VALUE)
160 if v is not _DEFAULT_VALUE:
161 v = int(v)
162 return v
164 @classmethod
165 def from_api_repr(cls, api_repr: dict) -> "SchemaField":
166 """Return a ``SchemaField`` object deserialized from a dictionary.
168 Args:
169 api_repr (Mapping[str, str]): The serialized representation
170 of the SchemaField, such as what is output by
171 :meth:`to_api_repr`.
173 Returns:
174 google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object.
175 """
176 field_type = api_repr["type"].upper()
178 # Handle optional properties with default values
179 mode = api_repr.get("mode", "NULLABLE")
180 description = api_repr.get("description", _DEFAULT_VALUE)
181 fields = api_repr.get("fields", ())
182 policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE)
184 default_value_expression = api_repr.get("defaultValueExpression", None)
186 if policy_tags is not None and policy_tags is not _DEFAULT_VALUE:
187 policy_tags = PolicyTagList.from_api_repr(policy_tags)
189 return cls(
190 field_type=field_type,
191 fields=[cls.from_api_repr(f) for f in fields],
192 mode=mode.upper(),
193 default_value_expression=default_value_expression,
194 description=description,
195 name=api_repr["name"],
196 policy_tags=policy_tags,
197 precision=cls.__get_int(api_repr, "precision"),
198 scale=cls.__get_int(api_repr, "scale"),
199 max_length=cls.__get_int(api_repr, "maxLength"),
200 )
202 @property
203 def name(self):
204 """str: The name of the field."""
205 return self._properties["name"]
207 @property
208 def field_type(self):
209 """str: The type of the field.
211 See:
212 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
213 """
214 return self._properties["type"]
216 @property
217 def mode(self):
218 """Optional[str]: The mode of the field.
220 See:
221 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
222 """
223 return self._properties.get("mode")
225 @property
226 def is_nullable(self):
227 """bool: whether 'mode' is 'nullable'."""
228 return self.mode == "NULLABLE"
230 @property
231 def default_value_expression(self):
232 """Optional[str] default value of a field, using an SQL expression"""
233 return self._properties.get("defaultValueExpression")
235 @property
236 def description(self):
237 """Optional[str]: description for the field."""
238 return self._properties.get("description")
240 @property
241 def precision(self):
242 """Optional[int]: Precision (number of digits) for the NUMERIC field."""
243 return self._properties.get("precision")
245 @property
246 def scale(self):
247 """Optional[int]: Scale (digits after decimal) for the NUMERIC field."""
248 return self._properties.get("scale")
250 @property
251 def max_length(self):
252 """Optional[int]: Maximum length for the STRING or BYTES field."""
253 return self._properties.get("maxLength")
255 @property
256 def fields(self):
257 """Optional[tuple]: Subfields contained in this field.
259 Must be empty unset if ``field_type`` is not 'RECORD'.
260 """
261 return self._fields
263 @property
264 def policy_tags(self):
265 """Optional[google.cloud.bigquery.schema.PolicyTagList]: Policy tag list
266 definition for this field.
267 """
268 resource = self._properties.get("policyTags")
269 return PolicyTagList.from_api_repr(resource) if resource is not None else None
271 def to_api_repr(self) -> dict:
272 """Return a dictionary representing this schema field.
274 Returns:
275 Dict: A dictionary representing the SchemaField in a serialized form.
276 """
277 answer = self._properties.copy()
279 # If this is a RECORD type, then sub-fields are also included,
280 # add this to the serialized representation.
281 if self.field_type.upper() in _STRUCT_TYPES:
282 answer["fields"] = [f.to_api_repr() for f in self.fields]
284 # Done; return the serialized dictionary.
285 return answer
287 def _key(self):
288 """A tuple key that uniquely describes this field.
290 Used to compute this instance's hashcode and evaluate equality.
292 Returns:
293 Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`.
294 """
295 field_type = self.field_type.upper() if self.field_type is not None else None
297 # Type can temporarily be set to None if the code needs a SchemaField instance,
298 # but has not determined the exact type of the field yet.
299 if field_type is not None:
300 if field_type == "STRING" or field_type == "BYTES":
301 if self.max_length is not None:
302 field_type = f"{field_type}({self.max_length})"
303 elif field_type.endswith("NUMERIC"):
304 if self.precision is not None:
305 if self.scale is not None:
306 field_type = f"{field_type}({self.precision}, {self.scale})"
307 else:
308 field_type = f"{field_type}({self.precision})"
310 policy_tags = (
311 None if self.policy_tags is None else tuple(sorted(self.policy_tags.names))
312 )
314 return (
315 self.name,
316 field_type,
317 # Mode is always str, if not given it defaults to a str value
318 self.mode.upper(), # pytype: disable=attribute-error
319 self.default_value_expression,
320 self.description,
321 self._fields,
322 policy_tags,
323 )
325 def to_standard_sql(self) -> standard_sql.StandardSqlField:
326 """Return the field as the standard SQL field representation object."""
327 sql_type = standard_sql.StandardSqlDataType()
329 if self.mode == "REPEATED":
330 sql_type.type_kind = StandardSqlTypeNames.ARRAY
331 else:
332 sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get(
333 self.field_type,
334 StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
335 )
337 if sql_type.type_kind == StandardSqlTypeNames.ARRAY: # noqa: E721
338 array_element_type = LEGACY_TO_STANDARD_TYPES.get(
339 self.field_type,
340 StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
341 )
342 sql_type.array_element_type = standard_sql.StandardSqlDataType(
343 type_kind=array_element_type
344 )
346 # ARRAY cannot directly contain other arrays, only scalar types and STRUCTs
347 # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type
348 if array_element_type == StandardSqlTypeNames.STRUCT: # noqa: E721
349 sql_type.array_element_type.struct_type = (
350 standard_sql.StandardSqlStructType(
351 fields=(field.to_standard_sql() for field in self.fields)
352 )
353 )
354 elif sql_type.type_kind == StandardSqlTypeNames.STRUCT: # noqa: E721
355 sql_type.struct_type = standard_sql.StandardSqlStructType(
356 fields=(field.to_standard_sql() for field in self.fields)
357 )
359 return standard_sql.StandardSqlField(name=self.name, type=sql_type)
361 def __eq__(self, other):
362 if not isinstance(other, SchemaField):
363 return NotImplemented
364 return self._key() == other._key()
366 def __ne__(self, other):
367 return not self == other
369 def __hash__(self):
370 return hash(self._key())
372 def __repr__(self):
373 key = self._key()
374 policy_tags = key[-1]
375 policy_tags_inst = None if policy_tags is None else PolicyTagList(policy_tags)
376 adjusted_key = key[:-1] + (policy_tags_inst,)
377 return f"{self.__class__.__name__}{adjusted_key}"
380def _parse_schema_resource(info):
381 """Parse a resource fragment into a schema field.
383 Args:
384 info: (Mapping[str, Dict]): should contain a "fields" key to be parsed
386 Returns:
387 Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]:
388 A list of parsed fields, or ``None`` if no "fields" key found.
389 """
390 return [SchemaField.from_api_repr(f) for f in info.get("fields", ())]
393def _build_schema_resource(fields):
394 """Generate a resource fragment for a schema.
396 Args:
397 fields (Sequence[google.cloud.bigquery.schema.SchemaField): schema to be dumped.
399 Returns:
400 Sequence[Dict]: Mappings describing the schema of the supplied fields.
401 """
402 return [field.to_api_repr() for field in fields]
405def _to_schema_fields(schema):
406 """Coerce `schema` to a list of schema field instances.
408 Args:
409 schema(Sequence[Union[ \
410 :class:`~google.cloud.bigquery.schema.SchemaField`, \
411 Mapping[str, Any] \
412 ]]):
413 Table schema to convert. If some items are passed as mappings,
414 their content must be compatible with
415 :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.
417 Returns:
418 Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`]
420 Raises:
421 Exception: If ``schema`` is not a sequence, or if any item in the
422 sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField`
423 instance or a compatible mapping representation of the field.
424 """
425 for field in schema:
426 if not isinstance(field, (SchemaField, collections.abc.Mapping)):
427 raise ValueError(
428 "Schema items must either be fields or compatible "
429 "mapping representations."
430 )
432 return [
433 field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field)
434 for field in schema
435 ]
438class PolicyTagList(object):
439 """Define Policy Tags for a column.
441 Args:
442 names (
443 Optional[Tuple[str]]): list of policy tags to associate with
444 the column. Policy tag identifiers are of the form
445 `projects/*/locations/*/taxonomies/*/policyTags/*`.
446 """
448 def __init__(self, names: Iterable[str] = ()):
449 self._properties = {}
450 self._properties["names"] = tuple(names)
452 @property
453 def names(self):
454 """Tuple[str]: Policy tags associated with this definition."""
455 return self._properties.get("names", ())
457 def _key(self):
458 """A tuple key that uniquely describes this PolicyTagList.
460 Used to compute this instance's hashcode and evaluate equality.
462 Returns:
463 Tuple: The contents of this :class:`~google.cloud.bigquery.schema.PolicyTagList`.
464 """
465 return tuple(sorted(self._properties.get("names", ())))
467 def __eq__(self, other):
468 if not isinstance(other, PolicyTagList):
469 return NotImplemented
470 return self._key() == other._key()
472 def __ne__(self, other):
473 return not self == other
475 def __hash__(self):
476 return hash(self._key())
478 def __repr__(self):
479 return f"{self.__class__.__name__}(names={self._key()})"
481 @classmethod
482 def from_api_repr(cls, api_repr: dict) -> "PolicyTagList":
483 """Return a :class:`PolicyTagList` object deserialized from a dict.
485 This method creates a new ``PolicyTagList`` instance that points to
486 the ``api_repr`` parameter as its internal properties dict. This means
487 that when a ``PolicyTagList`` instance is stored as a property of
488 another object, any changes made at the higher level will also appear
489 here.
491 Args:
492 api_repr (Mapping[str, str]):
493 The serialized representation of the PolicyTagList, such as
494 what is output by :meth:`to_api_repr`.
496 Returns:
497 Optional[google.cloud.bigquery.schema.PolicyTagList]:
498 The ``PolicyTagList`` object or None.
499 """
500 if api_repr is None:
501 return None
502 names = api_repr.get("names", ())
503 return cls(names=names)
505 def to_api_repr(self) -> dict:
506 """Return a dictionary representing this object.
508 This method returns the properties dict of the ``PolicyTagList``
509 instance rather than making a copy. This means that when a
510 ``PolicyTagList`` instance is stored as a property of another
511 object, any changes made at the higher level will also appear here.
513 Returns:
514 dict:
515 A dictionary representing the PolicyTagList object in
516 serialized form.
517 """
518 answer = {"names": list(self.names)}
519 return answer