Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/keras/src/utils/feature_space.py: 22%

295 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2022 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""FeatureSpace structured data preprocessing & encoding utility.""" 

16 

17import tensorflow.compat.v2 as tf 

18 

19from keras.src import backend 

20from keras.src.engine import base_layer 

21from keras.src.saving import saving_lib 

22from keras.src.saving import serialization_lib 

23from keras.src.utils.generic_utils import LazyLoader 

24 

25# isort: off 

26from tensorflow.python.util.tf_export import keras_export 

27 

28layers = LazyLoader("layers", globals(), "keras.src.layers") 

29 

30 

31class Cross: 

32 def __init__(self, feature_names, crossing_dim, output_mode="one_hot"): 

33 if output_mode not in {"int", "one_hot"}: 

34 raise ValueError( 

35 "Invalid value for argument `output_mode`. " 

36 "Expected one of {'int', 'one_hot'}. " 

37 f"Received: output_mode={output_mode}" 

38 ) 

39 self.feature_names = tuple(feature_names) 

40 self.crossing_dim = crossing_dim 

41 self.output_mode = output_mode 

42 

43 @property 

44 def name(self): 

45 return "_X_".join(self.feature_names) 

46 

47 def get_config(self): 

48 return { 

49 "feature_names": self.feature_names, 

50 "crossing_dim": self.crossing_dim, 

51 "output_mode": self.output_mode, 

52 } 

53 

54 @classmethod 

55 def from_config(cls, config): 

56 return cls(**config) 

57 

58 

59class Feature: 

60 def __init__(self, dtype, preprocessor, output_mode): 

61 if output_mode not in {"int", "one_hot", "float"}: 

62 raise ValueError( 

63 "Invalid value for argument `output_mode`. " 

64 "Expected one of {'int', 'one_hot', 'float'}. " 

65 f"Received: output_mode={output_mode}" 

66 ) 

67 self.dtype = dtype 

68 if isinstance(preprocessor, dict): 

69 preprocessor = serialization_lib.deserialize_keras_object( 

70 preprocessor 

71 ) 

72 self.preprocessor = preprocessor 

73 self.output_mode = output_mode 

74 

75 def get_config(self): 

76 return { 

77 "dtype": self.dtype, 

78 "preprocessor": serialization_lib.serialize_keras_object( 

79 self.preprocessor 

80 ), 

81 "output_mode": self.output_mode, 

82 } 

83 

84 @classmethod 

85 def from_config(cls, config): 

86 return cls(**config) 

87 

88 

89@keras_export("keras.utils.FeatureSpace", v1=[]) 

90class FeatureSpace(base_layer.Layer): 

91 """One-stop utility for preprocessing and encoding structured data. 

92 

93 Arguments: 

94 feature_names: Dict mapping the names of your features to their 

95 type specification, e.g. `{"my_feature": "integer_categorical"}` 

96 or `{"my_feature": FeatureSpace.integer_categorical()}`. 

97 For a complete list of all supported types, see 

98 "Available feature types" paragraph below. 

99 output_mode: One of `"concat"` or `"dict"`. In concat mode, all 

100 features get concatenated together into a single vector. 

101 In dict mode, the FeatureSpace returns a dict of individually 

102 encoded features (with the same keys as the input dict keys). 

103 crosses: List of features to be crossed together, e.g. 

104 `crosses=[("feature_1", "feature_2")]`. The features will be 

105 "crossed" by hashing their combined value into 

106 a fixed-length vector. 

107 crossing_dim: Default vector size for hashing crossed features. 

108 Defaults to `32`. 

109 hashing_dim: Default vector size for hashing features of type 

110 `"integer_hashed"` and `"string_hashed"`. Defaults to `32`. 

111 num_discretization_bins: Default number of bins to be used for 

112 discretizing features of type `"float_discretized"`. 

113 Defaults to `32`. 

114 

115 **Available feature types:** 

116 

117 Note that all features can be referred to by their string name, 

118 e.g. `"integer_categorical"`. When using the string name, the default 

119 argument values are used. 

120 

121 ```python 

122 # Plain float values. 

123 FeatureSpace.float(name=None) 

124 

125 # Float values to be preprocessed via featurewise standardization 

126 # (i.e. via a `keras.layers.Normalization` layer). 

127 FeatureSpace.float_normalized(name=None) 

128 

129 # Float values to be preprocessed via linear rescaling 

130 # (i.e. via a `keras.layers.Rescaling` layer). 

131 FeatureSpace.float_rescaled(scale=1., offset=0., name=None) 

132 

133 # Float values to be discretized. By default, the discrete 

134 # representation will then be one-hot encoded. 

135 FeatureSpace.float_discretized( 

136 num_bins, bin_boundaries=None, output_mode="one_hot", name=None) 

137 

138 # Integer values to be indexed. By default, the discrete 

139 # representation will then be one-hot encoded. 

140 FeatureSpace.integer_categorical( 

141 max_tokens=None, num_oov_indices=1, output_mode="one_hot", name=None) 

142 

143 # String values to be indexed. By default, the discrete 

144 # representation will then be one-hot encoded. 

145 FeatureSpace.string_categorical( 

146 max_tokens=None, num_oov_indices=1, output_mode="one_hot", name=None) 

147 

148 # Integer values to be hashed into a fixed number of bins. 

149 # By default, the discrete representation will then be one-hot encoded. 

150 FeatureSpace.integer_hashed(num_bins, output_mode="one_hot", name=None) 

151 

152 # String values to be hashed into a fixed number of bins. 

153 # By default, the discrete representation will then be one-hot encoded. 

154 FeatureSpace.string_hashed(num_bins, output_mode="one_hot", name=None) 

155 ``` 

156 

157 Examples: 

158 

159 **Basic usage with a dict of input data:** 

160 

161 ```python 

162 raw_data = { 

163 "float_values": [0.0, 0.1, 0.2, 0.3], 

164 "string_values": ["zero", "one", "two", "three"], 

165 "int_values": [0, 1, 2, 3], 

166 } 

167 dataset = tf.data.Dataset.from_tensor_slices(raw_data) 

168 

169 feature_space = FeatureSpace( 

170 features={ 

171 "float_values": "float_normalized", 

172 "string_values": "string_categorical", 

173 "int_values": "integer_categorical", 

174 }, 

175 crosses=[("string_values", "int_values")], 

176 output_mode="concat", 

177 ) 

178 # Before you start using the FeatureSpace, 

179 # you must `adapt()` it on some data. 

180 feature_space.adapt(dataset) 

181 

182 # You can call the FeatureSpace on a dict of data (batched or unbatched). 

183 output_vector = feature_space(raw_data) 

184 ``` 

185 

186 **Basic usage with `tf.data`:** 

187 

188 ```python 

189 # Unlabeled data 

190 preprocessed_ds = unlabeled_dataset.map(feature_space) 

191 

192 # Labeled data 

193 preprocessed_ds = labeled_dataset.map(lambda x, y: (feature_space(x), y)) 

194 ``` 

195 

196 **Basic usage with the Keras Functional API:** 

197 

198 ```python 

199 # Retrieve a dict Keras Input objects 

200 inputs = feature_space.get_inputs() 

201 # Retrieve the corresponding encoded Keras tensors 

202 encoded_features = feature_space.get_encoded_features() 

203 # Build a Functional model 

204 outputs = keras.layers.Dense(1, activation="sigmoid")(encoded_features) 

205 model = keras.Model(inputs, outputs) 

206 ``` 

207 

208 **Customizing each feature or feature cross:** 

209 

210 ```python 

211 feature_space = FeatureSpace( 

212 features={ 

213 "float_values": FeatureSpace.float_normalized(), 

214 "string_values": FeatureSpace.string_categorical(max_tokens=10), 

215 "int_values": FeatureSpace.integer_categorical(max_tokens=10), 

216 }, 

217 crosses=[ 

218 FeatureSpace.cross(("string_values", "int_values"), crossing_dim=32) 

219 ], 

220 output_mode="concat", 

221 ) 

222 ``` 

223 

224 **Returning a dict of integer-encoded features:** 

225 

226 ```python 

227 feature_space = FeatureSpace( 

228 features={ 

229 "string_values": FeatureSpace.string_categorical(output_mode="int"), 

230 "int_values": FeatureSpace.integer_categorical(output_mode="int"), 

231 }, 

232 crosses=[ 

233 FeatureSpace.cross( 

234 feature_names=("string_values", "int_values"), 

235 crossing_dim=32, 

236 output_mode="int", 

237 ) 

238 ], 

239 output_mode="dict", 

240 ) 

241 ``` 

242 

243 **Specifying your own Keras preprocessing layer:** 

244 

245 ```python 

246 # Let's say that one of the features is a short text paragraph that 

247 # we want to encode as a vector (one vector per paragraph) via TF-IDF. 

248 data = { 

249 "text": ["1st string", "2nd string", "3rd string"], 

250 } 

251 

252 # There's a Keras layer for this: TextVectorization. 

253 custom_layer = layers.TextVectorization(output_mode="tf_idf") 

254 

255 # We can use FeatureSpace.feature to create a custom feature 

256 # that will use our preprocessing layer. 

257 feature_space = FeatureSpace( 

258 features={ 

259 "text": FeatureSpace.feature( 

260 preprocessor=custom_layer, dtype="string", output_mode="float" 

261 ), 

262 }, 

263 output_mode="concat", 

264 ) 

265 feature_space.adapt(tf.data.Dataset.from_tensor_slices(data)) 

266 output_vector = feature_space(data) 

267 ``` 

268 

269 **Retrieving the underlying Keras preprocessing layers:** 

270 

271 ```python 

272 # The preprocessing layer of each feature is available in `.preprocessors`. 

273 preprocessing_layer = feature_space.preprocessors["feature1"] 

274 

275 # The crossing layer of each feature cross is available in `.crossers`. 

276 # It's an instance of keras.layers.HashedCrossing. 

277 crossing_layer = feature_space.crossers["feature1_X_feature2"] 

278 ``` 

279 

280 **Saving and reloading a FeatureSpace:** 

281 

282 ```python 

283 feature_space.save("myfeaturespace.keras") 

284 reloaded_feature_space = keras.models.load_model("myfeaturespace.keras") 

285 ``` 

286 """ 

287 

288 @classmethod 

289 def cross(cls, feature_names, crossing_dim, output_mode="one_hot"): 

290 return Cross(feature_names, crossing_dim, output_mode=output_mode) 

291 

292 @classmethod 

293 def feature(cls, dtype, preprocessor, output_mode): 

294 return Feature(dtype, preprocessor, output_mode) 

295 

296 @classmethod 

297 def float(cls, name=None): 

298 from keras.src.layers.core import identity 

299 

300 name = name or backend.unique_object_name("float") 

301 preprocessor = identity.Identity( 

302 dtype="float32", name=f"{name}_preprocessor" 

303 ) 

304 return Feature( 

305 dtype="float32", preprocessor=preprocessor, output_mode="float" 

306 ) 

307 

308 @classmethod 

309 def float_rescaled(cls, scale=1.0, offset=0.0, name=None): 

310 name = name or backend.unique_object_name("float_rescaled") 

311 preprocessor = layers.Rescaling( 

312 scale=scale, offset=offset, name=f"{name}_preprocessor" 

313 ) 

314 return Feature( 

315 dtype="float32", preprocessor=preprocessor, output_mode="float" 

316 ) 

317 

318 @classmethod 

319 def float_normalized(cls, name=None): 

320 name = name or backend.unique_object_name("float_normalized") 

321 preprocessor = layers.Normalization( 

322 axis=-1, name=f"{name}_preprocessor" 

323 ) 

324 return Feature( 

325 dtype="float32", preprocessor=preprocessor, output_mode="float" 

326 ) 

327 

328 @classmethod 

329 def float_discretized( 

330 cls, num_bins, bin_boundaries=None, output_mode="one_hot", name=None 

331 ): 

332 name = name or backend.unique_object_name("float_discretized") 

333 preprocessor = layers.Discretization( 

334 num_bins=num_bins, 

335 bin_boundaries=bin_boundaries, 

336 name=f"{name}_preprocessor", 

337 ) 

338 return Feature( 

339 dtype="float32", preprocessor=preprocessor, output_mode=output_mode 

340 ) 

341 

342 @classmethod 

343 def integer_categorical( 

344 cls, 

345 max_tokens=None, 

346 num_oov_indices=1, 

347 output_mode="one_hot", 

348 name=None, 

349 ): 

350 name = name or backend.unique_object_name("integer_categorical") 

351 preprocessor = layers.IntegerLookup( 

352 name=f"{name}_preprocessor", 

353 max_tokens=max_tokens, 

354 num_oov_indices=num_oov_indices, 

355 ) 

356 return Feature( 

357 dtype="int64", preprocessor=preprocessor, output_mode=output_mode 

358 ) 

359 

360 @classmethod 

361 def string_categorical( 

362 cls, 

363 max_tokens=None, 

364 num_oov_indices=1, 

365 output_mode="one_hot", 

366 name=None, 

367 ): 

368 name = name or backend.unique_object_name("string_categorical") 

369 preprocessor = layers.StringLookup( 

370 name=f"{name}_preprocessor", 

371 max_tokens=max_tokens, 

372 num_oov_indices=num_oov_indices, 

373 ) 

374 return Feature( 

375 dtype="string", preprocessor=preprocessor, output_mode=output_mode 

376 ) 

377 

378 @classmethod 

379 def string_hashed(cls, num_bins, output_mode="one_hot", name=None): 

380 name = name or backend.unique_object_name("string_hashed") 

381 preprocessor = layers.Hashing( 

382 name=f"{name}_preprocessor", num_bins=num_bins 

383 ) 

384 return Feature( 

385 dtype="string", preprocessor=preprocessor, output_mode=output_mode 

386 ) 

387 

388 @classmethod 

389 def integer_hashed(cls, num_bins, output_mode="one_hot", name=None): 

390 name = name or backend.unique_object_name("integer_hashed") 

391 preprocessor = layers.Hashing( 

392 name=f"{name}_preprocessor", num_bins=num_bins 

393 ) 

394 return Feature( 

395 dtype="int64", preprocessor=preprocessor, output_mode=output_mode 

396 ) 

397 

398 def __init__( 

399 self, 

400 features, 

401 output_mode="concat", 

402 crosses=None, 

403 crossing_dim=32, 

404 hashing_dim=32, 

405 num_discretization_bins=32, 

406 ): 

407 if not features: 

408 raise ValueError("The `features` argument cannot be None or empty.") 

409 self.crossing_dim = crossing_dim 

410 self.hashing_dim = hashing_dim 

411 self.num_discretization_bins = num_discretization_bins 

412 self.features = { 

413 name: self._standardize_feature(name, value) 

414 for name, value in features.items() 

415 } 

416 self.crosses = [] 

417 if crosses: 

418 feature_set = set(features.keys()) 

419 for cross in crosses: 

420 if isinstance(cross, dict): 

421 cross = serialization_lib.deserialize_keras_object(cross) 

422 if isinstance(cross, Cross): 

423 self.crosses.append(cross) 

424 else: 

425 if not crossing_dim: 

426 raise ValueError( 

427 "When specifying `crosses`, the argument " 

428 "`crossing_dim` " 

429 "(dimensionality of the crossing space) " 

430 "should be specified as well." 

431 ) 

432 for key in cross: 

433 if key not in feature_set: 

434 raise ValueError( 

435 "All features referenced " 

436 "in the `crosses` argument " 

437 "should be present in the `features` dict. " 

438 f"Received unknown features: {cross}" 

439 ) 

440 self.crosses.append(Cross(cross, crossing_dim=crossing_dim)) 

441 self.crosses_by_name = {cross.name: cross for cross in self.crosses} 

442 

443 if output_mode not in {"dict", "concat"}: 

444 raise ValueError( 

445 "Invalid value for argument `output_mode`. " 

446 "Expected one of {'dict', 'concat'}. " 

447 f"Received: output_mode={output_mode}" 

448 ) 

449 self.output_mode = output_mode 

450 

451 self.inputs = { 

452 name: self._feature_to_input(name, value) 

453 for name, value in self.features.items() 

454 } 

455 self.preprocessors = { 

456 name: value.preprocessor for name, value in self.features.items() 

457 } 

458 self.encoded_features = None 

459 self.crossers = { 

460 cross.name: self._cross_to_crosser(cross) for cross in self.crosses 

461 } 

462 self.one_hot_encoders = {} 

463 self.built = False 

464 self._is_adapted = False 

465 self.concat = None 

466 self._preprocessed_features_names = None 

467 self._crossed_features_names = None 

468 

469 def _feature_to_input(self, name, feature): 

470 return layers.Input(shape=(1,), dtype=feature.dtype, name=name) 

471 

472 def _standardize_feature(self, name, feature): 

473 if isinstance(feature, Feature): 

474 return feature 

475 

476 if isinstance(feature, dict): 

477 return serialization_lib.deserialize_keras_object(feature) 

478 

479 if feature == "float": 

480 return self.float(name=name) 

481 elif feature == "float_normalized": 

482 return self.float_normalized(name=name) 

483 elif feature == "float_rescaled": 

484 return self.float_rescaled(name=name) 

485 elif feature == "float_discretized": 

486 return self.float_discretized( 

487 name=name, num_bins=self.num_discretization_bins 

488 ) 

489 elif feature == "integer_categorical": 

490 return self.integer_categorical(name=name) 

491 elif feature == "string_categorical": 

492 return self.string_categorical(name=name) 

493 elif feature == "integer_hashed": 

494 return self.integer_hashed(self.hashing_dim, name=name) 

495 elif feature == "string_hashed": 

496 return self.string_hashed(self.hashing_dim, name=name) 

497 else: 

498 raise ValueError(f"Invalid feature type: {feature}") 

499 

500 def _cross_to_crosser(self, cross): 

501 return layers.HashedCrossing(cross.crossing_dim, name=cross.name) 

502 

503 def _list_adaptable_preprocessors(self): 

504 adaptable_preprocessors = [] 

505 for name in self.features.keys(): 

506 preprocessor = self.preprocessors[name] 

507 # Special case: a Normalization layer with preset mean/variance. 

508 # Not adaptable. 

509 if isinstance(preprocessor, layers.Normalization): 

510 if preprocessor.input_mean is not None: 

511 continue 

512 if hasattr(preprocessor, "adapt"): 

513 adaptable_preprocessors.append(name) 

514 return adaptable_preprocessors 

515 

516 def adapt(self, dataset): 

517 if not isinstance(dataset, tf.data.Dataset): 

518 raise ValueError( 

519 "`adapt()` can only be called on a tf.data.Dataset. " 

520 f"Received instead: {dataset} (of type {type(dataset)})" 

521 ) 

522 

523 for name in self._list_adaptable_preprocessors(): 

524 # Call adapt() on each individual adaptable layer. 

525 

526 # TODO: consider rewriting this to instead iterate on the 

527 # dataset once, split each batch into individual features, 

528 # and call the layer's `_adapt_function` on each batch 

529 # to simulate the behavior of adapt() in a more performant fashion. 

530 

531 feature_dataset = dataset.map(lambda x: x[name]) 

532 preprocessor = self.preprocessors[name] 

533 # TODO: consider adding an adapt progress bar. 

534 # Sample 1 element to check the rank 

535 for x in feature_dataset.take(1): 

536 pass 

537 if x.shape.rank == 0: 

538 # The dataset yields unbatched scalars; batch it. 

539 feature_dataset = feature_dataset.batch(32) 

540 if x.shape.rank in {0, 1}: 

541 # If the rank is 1, add a dimension 

542 # so we can reduce on axis=-1. 

543 # Note: if rank was previously 0, it is now 1. 

544 feature_dataset = feature_dataset.map( 

545 lambda x: tf.expand_dims(x, -1) 

546 ) 

547 preprocessor.adapt(feature_dataset) 

548 self._is_adapted = True 

549 self.get_encoded_features() # Finish building the layer 

550 self.built = True 

551 

552 def get_inputs(self): 

553 self._check_if_built() 

554 return self.inputs 

555 

556 def get_encoded_features(self): 

557 self._check_if_adapted() 

558 

559 if self.encoded_features is None: 

560 preprocessed_features = self._preprocess_features(self.inputs) 

561 crossed_features = self._cross_features(preprocessed_features) 

562 merged_features = self._merge_features( 

563 preprocessed_features, crossed_features 

564 ) 

565 self.encoded_features = merged_features 

566 return self.encoded_features 

567 

568 def _preprocess_features(self, features): 

569 return { 

570 name: self.preprocessors[name](features[name]) 

571 for name in features.keys() 

572 } 

573 

574 def _cross_features(self, features): 

575 all_outputs = {} 

576 for cross in self.crosses: 

577 inputs = [features[name] for name in cross.feature_names] 

578 outputs = self.crossers[cross.name](inputs) 

579 all_outputs[cross.name] = outputs 

580 return all_outputs 

581 

582 def _merge_features(self, preprocessed_features, crossed_features): 

583 if not self._preprocessed_features_names: 

584 self._preprocessed_features_names = sorted( 

585 preprocessed_features.keys() 

586 ) 

587 self._crossed_features_names = sorted(crossed_features.keys()) 

588 

589 all_names = ( 

590 self._preprocessed_features_names + self._crossed_features_names 

591 ) 

592 all_features = [ 

593 preprocessed_features[name] 

594 for name in self._preprocessed_features_names 

595 ] + [crossed_features[name] for name in self._crossed_features_names] 

596 

597 if self.output_mode == "dict": 

598 output_dict = {} 

599 else: 

600 features_to_concat = [] 

601 

602 if self.built: 

603 # Fast mode. 

604 for name, feature in zip(all_names, all_features): 

605 encoder = self.one_hot_encoders.get(name, None) 

606 if encoder: 

607 feature = encoder(feature) 

608 if self.output_mode == "dict": 

609 output_dict[name] = feature 

610 else: 

611 features_to_concat.append(feature) 

612 if self.output_mode == "dict": 

613 return output_dict 

614 else: 

615 return self.concat(features_to_concat) 

616 

617 # If the object isn't built, 

618 # we create the encoder and concat layers below 

619 all_specs = [ 

620 self.features[name] for name in self._preprocessed_features_names 

621 ] + [ 

622 self.crosses_by_name[name] for name in self._crossed_features_names 

623 ] 

624 for name, feature, spec in zip(all_names, all_features, all_specs): 

625 dtype = feature.dtype.name 

626 

627 if spec.output_mode == "one_hot": 

628 preprocessor = self.preprocessors.get( 

629 name 

630 ) or self.crossers.get(name) 

631 cardinality = None 

632 if not feature.dtype.name.startswith("int"): 

633 raise ValueError( 

634 f"Feature '{name}' has `output_mode='one_hot'`. " 

635 "Thus its preprocessor should return an int64 dtype. " 

636 f"Instead it returns a {dtype} dtype." 

637 ) 

638 

639 if isinstance( 

640 preprocessor, (layers.IntegerLookup, layers.StringLookup) 

641 ): 

642 cardinality = preprocessor.vocabulary_size() 

643 elif isinstance(preprocessor, layers.CategoryEncoding): 

644 cardinality = preprocessor.num_tokens 

645 elif isinstance(preprocessor, layers.Discretization): 

646 cardinality = preprocessor.num_bins 

647 elif isinstance( 

648 preprocessor, (layers.HashedCrossing, layers.Hashing) 

649 ): 

650 cardinality = preprocessor.num_bins 

651 else: 

652 raise ValueError( 

653 f"Feature '{name}' has `output_mode='one_hot'`. " 

654 "However it isn't a standard feature and the " 

655 "dimensionality of its output space is not known, " 

656 "thus it cannot be one-hot encoded. " 

657 "Try using `output_mode='int'`." 

658 ) 

659 if cardinality is not None: 

660 encoder = layers.CategoryEncoding( 

661 num_tokens=cardinality, output_mode="multi_hot" 

662 ) 

663 self.one_hot_encoders[name] = encoder 

664 feature = encoder(feature) 

665 

666 if self.output_mode == "concat": 

667 dtype = feature.dtype.name 

668 if dtype.startswith("int") or dtype == "string": 

669 raise ValueError( 

670 f"Cannot concatenate features because feature '{name}' " 

671 f"has not been encoded (it has dtype {dtype}). " 

672 "Consider using `output_mode='dict'`." 

673 ) 

674 features_to_concat.append(feature) 

675 else: 

676 output_dict[name] = feature 

677 

678 if self.output_mode == "concat": 

679 self.concat = layers.Concatenate(axis=-1) 

680 return self.concat(features_to_concat) 

681 else: 

682 return output_dict 

683 

684 def _check_if_adapted(self): 

685 if not self._is_adapted: 

686 if not self._list_adaptable_preprocessors(): 

687 self._is_adapted = True 

688 else: 

689 raise ValueError( 

690 "You need to call `.adapt(dataset)` on the FeatureSpace " 

691 "before you can start using it." 

692 ) 

693 

694 def _check_if_built(self): 

695 if not self.built: 

696 self._check_if_adapted() 

697 # Finishes building 

698 self.get_encoded_features() 

699 self.built = True 

700 

701 def __call__(self, data): 

702 self._check_if_built() 

703 if not isinstance(data, dict): 

704 raise ValueError( 

705 "A FeatureSpace can only be called with a dict. " 

706 f"Received: data={data} (of type {type(data)}" 

707 ) 

708 

709 data = {key: tf.convert_to_tensor(value) for key, value in data.items()} 

710 rebatched = False 

711 for name, x in data.items(): 

712 if x.shape.rank == 0: 

713 data[name] = tf.reshape(x, [1, 1]) 

714 rebatched = True 

715 elif x.shape.rank == 1: 

716 data[name] = tf.expand_dims(x, -1) 

717 

718 preprocessed_data = self._preprocess_features(data) 

719 crossed_data = self._cross_features(preprocessed_data) 

720 merged_data = self._merge_features(preprocessed_data, crossed_data) 

721 if rebatched: 

722 if self.output_mode == "concat": 

723 assert merged_data.shape[0] == 1 

724 return tf.squeeze(merged_data, axis=0) 

725 else: 

726 for name, x in merged_data.items(): 

727 if x.shape.rank == 2 and x.shape[0] == 1: 

728 merged_data[name] = tf.squeeze(x, axis=0) 

729 return merged_data 

730 

731 def get_config(self): 

732 return { 

733 "features": serialization_lib.serialize_keras_object(self.features), 

734 "output_mode": self.output_mode, 

735 "crosses": serialization_lib.serialize_keras_object(self.crosses), 

736 "crossing_dim": self.crossing_dim, 

737 "hashing_dim": self.hashing_dim, 

738 "num_discretization_bins": self.num_discretization_bins, 

739 } 

740 

741 @classmethod 

742 def from_config(cls, config): 

743 return cls(**config) 

744 

745 def get_build_config(self): 

746 return { 

747 name: feature.preprocessor.get_build_config() 

748 for name, feature in self.features.items() 

749 } 

750 

751 def build_from_config(self, config): 

752 for name in config.keys(): 

753 self.features[name].preprocessor.build_from_config(config[name]) 

754 self._is_adapted = True 

755 

756 def save(self, filepath): 

757 """Save the `FeatureSpace` instance to a `.keras` file. 

758 

759 You can reload it via `keras.models.load_model()`: 

760 

761 ```python 

762 feature_space.save("myfeaturespace.keras") 

763 reloaded_feature_space = keras.models.load_model("myfeaturespace.keras") 

764 ``` 

765 """ 

766 saving_lib.save_model(self, filepath) 

767 

768 def save_own_variables(self, store): 

769 return 

770 

771 def load_own_variables(self, store): 

772 return 

773