Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/google/cloud/bigquery/schema.py: 38%

162 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 06:07 +0000

1# Copyright 2015 Google LLC 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Schemas for BigQuery tables / queries.""" 

16 

17import collections 

18import enum 

19from typing import Any, Dict, Iterable, Union 

20 

21from google.cloud.bigquery import standard_sql 

22from google.cloud.bigquery.enums import StandardSqlTypeNames 

23 

24 

25_STRUCT_TYPES = ("RECORD", "STRUCT") 

26 

27# SQL types reference: 

28# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types 

29# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types 

30LEGACY_TO_STANDARD_TYPES = { 

31 "STRING": StandardSqlTypeNames.STRING, 

32 "BYTES": StandardSqlTypeNames.BYTES, 

33 "INTEGER": StandardSqlTypeNames.INT64, 

34 "INT64": StandardSqlTypeNames.INT64, 

35 "FLOAT": StandardSqlTypeNames.FLOAT64, 

36 "FLOAT64": StandardSqlTypeNames.FLOAT64, 

37 "NUMERIC": StandardSqlTypeNames.NUMERIC, 

38 "BIGNUMERIC": StandardSqlTypeNames.BIGNUMERIC, 

39 "BOOLEAN": StandardSqlTypeNames.BOOL, 

40 "BOOL": StandardSqlTypeNames.BOOL, 

41 "GEOGRAPHY": StandardSqlTypeNames.GEOGRAPHY, 

42 "RECORD": StandardSqlTypeNames.STRUCT, 

43 "STRUCT": StandardSqlTypeNames.STRUCT, 

44 "TIMESTAMP": StandardSqlTypeNames.TIMESTAMP, 

45 "DATE": StandardSqlTypeNames.DATE, 

46 "TIME": StandardSqlTypeNames.TIME, 

47 "DATETIME": StandardSqlTypeNames.DATETIME, 

48 # no direct conversion from ARRAY, the latter is represented by mode="REPEATED" 

49} 

50"""String names of the legacy SQL types to integer codes of Standard SQL standard_sql.""" 

51 

52 

53class _DefaultSentinel(enum.Enum): 

54 """Object used as 'sentinel' indicating default value should be used. 

55 

56 Uses enum so that pytype/mypy knows that this is the only possible value. 

57 https://stackoverflow.com/a/60605919/101923 

58 

59 Literal[_DEFAULT_VALUE] is an alternative, but only added in Python 3.8. 

60 https://docs.python.org/3/library/typing.html#typing.Literal 

61 """ 

62 

63 DEFAULT_VALUE = object() 

64 

65 

66_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE 

67 

68 

69class SchemaField(object): 

70 """Describe a single field within a table schema. 

71 

72 Args: 

73 name: The name of the field. 

74 

75 field_type: 

76 The type of the field. See 

77 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type 

78 

79 mode: 

80 Defaults to ``'NULLABLE'``. The mode of the field. See 

81 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode 

82 

83 description: Description for the field. 

84 

85 fields: Subfields (requires ``field_type`` of 'RECORD'). 

86 

87 policy_tags: The policy tag list for the field. 

88 

89 precision: 

90 Precison (number of digits) of fields with NUMERIC or BIGNUMERIC type. 

91 

92 scale: 

93 Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type. 

94 

95 max_length: Maximum length of fields with STRING or BYTES type. 

96 

97 default_value_expression: str, Optional 

98 Used to specify the default value of a field using a SQL expression. It can only be set for 

99 top level fields (columns). 

100 

101 You can use a struct or array expression to specify default value for the entire struct or 

102 array. The valid SQL expressions are: 

103 

104 - Literals for all data types, including STRUCT and ARRAY. 

105 

106 - The following functions: 

107 

108 `CURRENT_TIMESTAMP` 

109 `CURRENT_TIME` 

110 `CURRENT_DATE` 

111 `CURRENT_DATETIME` 

112 `GENERATE_UUID` 

113 `RAND` 

114 `SESSION_USER` 

115 `ST_GEOPOINT` 

116 

117 - Struct or array composed with the above allowed functions, for example: 

118 

119 "[CURRENT_DATE(), DATE '2020-01-01'"] 

120 """ 

121 

122 def __init__( 

123 self, 

124 name: str, 

125 field_type: str, 

126 mode: str = "NULLABLE", 

127 default_value_expression: str = None, 

128 description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE, 

129 fields: Iterable["SchemaField"] = (), 

130 policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE, 

131 precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, 

132 scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, 

133 max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, 

134 ): 

135 self._properties: Dict[str, Any] = { 

136 "name": name, 

137 "type": field_type, 

138 } 

139 if mode is not None: 

140 self._properties["mode"] = mode.upper() 

141 if description is not _DEFAULT_VALUE: 

142 self._properties["description"] = description 

143 if default_value_expression is not None: 

144 self._properties["defaultValueExpression"] = default_value_expression 

145 if precision is not _DEFAULT_VALUE: 

146 self._properties["precision"] = precision 

147 if scale is not _DEFAULT_VALUE: 

148 self._properties["scale"] = scale 

149 if max_length is not _DEFAULT_VALUE: 

150 self._properties["maxLength"] = max_length 

151 if policy_tags is not _DEFAULT_VALUE: 

152 self._properties["policyTags"] = ( 

153 policy_tags.to_api_repr() if policy_tags is not None else None 

154 ) 

155 self._fields = tuple(fields) 

156 

157 @staticmethod 

158 def __get_int(api_repr, name): 

159 v = api_repr.get(name, _DEFAULT_VALUE) 

160 if v is not _DEFAULT_VALUE: 

161 v = int(v) 

162 return v 

163 

164 @classmethod 

165 def from_api_repr(cls, api_repr: dict) -> "SchemaField": 

166 """Return a ``SchemaField`` object deserialized from a dictionary. 

167 

168 Args: 

169 api_repr (Mapping[str, str]): The serialized representation 

170 of the SchemaField, such as what is output by 

171 :meth:`to_api_repr`. 

172 

173 Returns: 

174 google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object. 

175 """ 

176 field_type = api_repr["type"].upper() 

177 

178 # Handle optional properties with default values 

179 mode = api_repr.get("mode", "NULLABLE") 

180 description = api_repr.get("description", _DEFAULT_VALUE) 

181 fields = api_repr.get("fields", ()) 

182 policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE) 

183 

184 default_value_expression = api_repr.get("defaultValueExpression", None) 

185 

186 if policy_tags is not None and policy_tags is not _DEFAULT_VALUE: 

187 policy_tags = PolicyTagList.from_api_repr(policy_tags) 

188 

189 return cls( 

190 field_type=field_type, 

191 fields=[cls.from_api_repr(f) for f in fields], 

192 mode=mode.upper(), 

193 default_value_expression=default_value_expression, 

194 description=description, 

195 name=api_repr["name"], 

196 policy_tags=policy_tags, 

197 precision=cls.__get_int(api_repr, "precision"), 

198 scale=cls.__get_int(api_repr, "scale"), 

199 max_length=cls.__get_int(api_repr, "maxLength"), 

200 ) 

201 

202 @property 

203 def name(self): 

204 """str: The name of the field.""" 

205 return self._properties["name"] 

206 

207 @property 

208 def field_type(self): 

209 """str: The type of the field. 

210 

211 See: 

212 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type 

213 """ 

214 return self._properties["type"] 

215 

216 @property 

217 def mode(self): 

218 """Optional[str]: The mode of the field. 

219 

220 See: 

221 https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode 

222 """ 

223 return self._properties.get("mode") 

224 

225 @property 

226 def is_nullable(self): 

227 """bool: whether 'mode' is 'nullable'.""" 

228 return self.mode == "NULLABLE" 

229 

230 @property 

231 def default_value_expression(self): 

232 """Optional[str] default value of a field, using an SQL expression""" 

233 return self._properties.get("defaultValueExpression") 

234 

235 @property 

236 def description(self): 

237 """Optional[str]: description for the field.""" 

238 return self._properties.get("description") 

239 

240 @property 

241 def precision(self): 

242 """Optional[int]: Precision (number of digits) for the NUMERIC field.""" 

243 return self._properties.get("precision") 

244 

245 @property 

246 def scale(self): 

247 """Optional[int]: Scale (digits after decimal) for the NUMERIC field.""" 

248 return self._properties.get("scale") 

249 

250 @property 

251 def max_length(self): 

252 """Optional[int]: Maximum length for the STRING or BYTES field.""" 

253 return self._properties.get("maxLength") 

254 

255 @property 

256 def fields(self): 

257 """Optional[tuple]: Subfields contained in this field. 

258 

259 Must be empty unset if ``field_type`` is not 'RECORD'. 

260 """ 

261 return self._fields 

262 

263 @property 

264 def policy_tags(self): 

265 """Optional[google.cloud.bigquery.schema.PolicyTagList]: Policy tag list 

266 definition for this field. 

267 """ 

268 resource = self._properties.get("policyTags") 

269 return PolicyTagList.from_api_repr(resource) if resource is not None else None 

270 

271 def to_api_repr(self) -> dict: 

272 """Return a dictionary representing this schema field. 

273 

274 Returns: 

275 Dict: A dictionary representing the SchemaField in a serialized form. 

276 """ 

277 answer = self._properties.copy() 

278 

279 # If this is a RECORD type, then sub-fields are also included, 

280 # add this to the serialized representation. 

281 if self.field_type.upper() in _STRUCT_TYPES: 

282 answer["fields"] = [f.to_api_repr() for f in self.fields] 

283 

284 # Done; return the serialized dictionary. 

285 return answer 

286 

287 def _key(self): 

288 """A tuple key that uniquely describes this field. 

289 

290 Used to compute this instance's hashcode and evaluate equality. 

291 

292 Returns: 

293 Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`. 

294 """ 

295 field_type = self.field_type.upper() if self.field_type is not None else None 

296 

297 # Type can temporarily be set to None if the code needs a SchemaField instance, 

298 # but has not determined the exact type of the field yet. 

299 if field_type is not None: 

300 if field_type == "STRING" or field_type == "BYTES": 

301 if self.max_length is not None: 

302 field_type = f"{field_type}({self.max_length})" 

303 elif field_type.endswith("NUMERIC"): 

304 if self.precision is not None: 

305 if self.scale is not None: 

306 field_type = f"{field_type}({self.precision}, {self.scale})" 

307 else: 

308 field_type = f"{field_type}({self.precision})" 

309 

310 policy_tags = ( 

311 None if self.policy_tags is None else tuple(sorted(self.policy_tags.names)) 

312 ) 

313 

314 return ( 

315 self.name, 

316 field_type, 

317 # Mode is always str, if not given it defaults to a str value 

318 self.mode.upper(), # pytype: disable=attribute-error 

319 self.default_value_expression, 

320 self.description, 

321 self._fields, 

322 policy_tags, 

323 ) 

324 

325 def to_standard_sql(self) -> standard_sql.StandardSqlField: 

326 """Return the field as the standard SQL field representation object.""" 

327 sql_type = standard_sql.StandardSqlDataType() 

328 

329 if self.mode == "REPEATED": 

330 sql_type.type_kind = StandardSqlTypeNames.ARRAY 

331 else: 

332 sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get( 

333 self.field_type, 

334 StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, 

335 ) 

336 

337 if sql_type.type_kind == StandardSqlTypeNames.ARRAY: # noqa: E721 

338 array_element_type = LEGACY_TO_STANDARD_TYPES.get( 

339 self.field_type, 

340 StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, 

341 ) 

342 sql_type.array_element_type = standard_sql.StandardSqlDataType( 

343 type_kind=array_element_type 

344 ) 

345 

346 # ARRAY cannot directly contain other arrays, only scalar types and STRUCTs 

347 # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type 

348 if array_element_type == StandardSqlTypeNames.STRUCT: # noqa: E721 

349 sql_type.array_element_type.struct_type = ( 

350 standard_sql.StandardSqlStructType( 

351 fields=(field.to_standard_sql() for field in self.fields) 

352 ) 

353 ) 

354 elif sql_type.type_kind == StandardSqlTypeNames.STRUCT: # noqa: E721 

355 sql_type.struct_type = standard_sql.StandardSqlStructType( 

356 fields=(field.to_standard_sql() for field in self.fields) 

357 ) 

358 

359 return standard_sql.StandardSqlField(name=self.name, type=sql_type) 

360 

361 def __eq__(self, other): 

362 if not isinstance(other, SchemaField): 

363 return NotImplemented 

364 return self._key() == other._key() 

365 

366 def __ne__(self, other): 

367 return not self == other 

368 

369 def __hash__(self): 

370 return hash(self._key()) 

371 

372 def __repr__(self): 

373 key = self._key() 

374 policy_tags = key[-1] 

375 policy_tags_inst = None if policy_tags is None else PolicyTagList(policy_tags) 

376 adjusted_key = key[:-1] + (policy_tags_inst,) 

377 return f"{self.__class__.__name__}{adjusted_key}" 

378 

379 

380def _parse_schema_resource(info): 

381 """Parse a resource fragment into a schema field. 

382 

383 Args: 

384 info: (Mapping[str, Dict]): should contain a "fields" key to be parsed 

385 

386 Returns: 

387 Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]: 

388 A list of parsed fields, or ``None`` if no "fields" key found. 

389 """ 

390 return [SchemaField.from_api_repr(f) for f in info.get("fields", ())] 

391 

392 

393def _build_schema_resource(fields): 

394 """Generate a resource fragment for a schema. 

395 

396 Args: 

397 fields (Sequence[google.cloud.bigquery.schema.SchemaField): schema to be dumped. 

398 

399 Returns: 

400 Sequence[Dict]: Mappings describing the schema of the supplied fields. 

401 """ 

402 return [field.to_api_repr() for field in fields] 

403 

404 

405def _to_schema_fields(schema): 

406 """Coerce `schema` to a list of schema field instances. 

407 

408 Args: 

409 schema(Sequence[Union[ \ 

410 :class:`~google.cloud.bigquery.schema.SchemaField`, \ 

411 Mapping[str, Any] \ 

412 ]]): 

413 Table schema to convert. If some items are passed as mappings, 

414 their content must be compatible with 

415 :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. 

416 

417 Returns: 

418 Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`] 

419 

420 Raises: 

421 Exception: If ``schema`` is not a sequence, or if any item in the 

422 sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` 

423 instance or a compatible mapping representation of the field. 

424 """ 

425 for field in schema: 

426 if not isinstance(field, (SchemaField, collections.abc.Mapping)): 

427 raise ValueError( 

428 "Schema items must either be fields or compatible " 

429 "mapping representations." 

430 ) 

431 

432 return [ 

433 field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field) 

434 for field in schema 

435 ] 

436 

437 

438class PolicyTagList(object): 

439 """Define Policy Tags for a column. 

440 

441 Args: 

442 names ( 

443 Optional[Tuple[str]]): list of policy tags to associate with 

444 the column. Policy tag identifiers are of the form 

445 `projects/*/locations/*/taxonomies/*/policyTags/*`. 

446 """ 

447 

448 def __init__(self, names: Iterable[str] = ()): 

449 self._properties = {} 

450 self._properties["names"] = tuple(names) 

451 

452 @property 

453 def names(self): 

454 """Tuple[str]: Policy tags associated with this definition.""" 

455 return self._properties.get("names", ()) 

456 

457 def _key(self): 

458 """A tuple key that uniquely describes this PolicyTagList. 

459 

460 Used to compute this instance's hashcode and evaluate equality. 

461 

462 Returns: 

463 Tuple: The contents of this :class:`~google.cloud.bigquery.schema.PolicyTagList`. 

464 """ 

465 return tuple(sorted(self._properties.get("names", ()))) 

466 

467 def __eq__(self, other): 

468 if not isinstance(other, PolicyTagList): 

469 return NotImplemented 

470 return self._key() == other._key() 

471 

472 def __ne__(self, other): 

473 return not self == other 

474 

475 def __hash__(self): 

476 return hash(self._key()) 

477 

478 def __repr__(self): 

479 return f"{self.__class__.__name__}(names={self._key()})" 

480 

481 @classmethod 

482 def from_api_repr(cls, api_repr: dict) -> "PolicyTagList": 

483 """Return a :class:`PolicyTagList` object deserialized from a dict. 

484 

485 This method creates a new ``PolicyTagList`` instance that points to 

486 the ``api_repr`` parameter as its internal properties dict. This means 

487 that when a ``PolicyTagList`` instance is stored as a property of 

488 another object, any changes made at the higher level will also appear 

489 here. 

490 

491 Args: 

492 api_repr (Mapping[str, str]): 

493 The serialized representation of the PolicyTagList, such as 

494 what is output by :meth:`to_api_repr`. 

495 

496 Returns: 

497 Optional[google.cloud.bigquery.schema.PolicyTagList]: 

498 The ``PolicyTagList`` object or None. 

499 """ 

500 if api_repr is None: 

501 return None 

502 names = api_repr.get("names", ()) 

503 return cls(names=names) 

504 

505 def to_api_repr(self) -> dict: 

506 """Return a dictionary representing this object. 

507 

508 This method returns the properties dict of the ``PolicyTagList`` 

509 instance rather than making a copy. This means that when a 

510 ``PolicyTagList`` instance is stored as a property of another 

511 object, any changes made at the higher level will also appear here. 

512 

513 Returns: 

514 dict: 

515 A dictionary representing the PolicyTagList object in 

516 serialized form. 

517 """ 

518 answer = {"names": list(self.names)} 

519 return answer