Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/tpu/tpu_embedding_v2

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""Companion classes for mid level API for TPU Embeddings in TF2."""

17import abc

18import math

19import typing

20from typing import Any, Dict, Callable, Iterable, List, Optional, Text, Tuple, TypeVar, Union

22from absl import logging

24from tensorflow.core.protobuf.tpu import optimization_parameters_pb2

25from tensorflow.core.protobuf.tpu import tpu_embedding_configuration_pb2

26from tensorflow.python.distribute import device_util

27from tensorflow.python.distribute import sharded_variable

28from tensorflow.python.distribute import tpu_strategy

29from tensorflow.python.framework import device_spec

30from tensorflow.python.framework import ops

31from tensorflow.python.framework.tensor_shape import TensorShape

32from tensorflow.python.ops import init_ops_v2

33from tensorflow.python.ops import variables as tf_variables

34from tensorflow.python.tpu.ops import tpu_ops

35from tensorflow.python.types import core

36from tensorflow.python.util.tf_export import tf_export

39TableVariable = TypeVar("TableVariable", sharded_variable.ShardedVariable,

40 tf_variables.Variable)

41SlotVarCreationFnType = Callable[

42 [TableVariable, List[Text], List[init_ops_v2.Initializer]],

43 Dict[Text, TableVariable]]

44ClipValueType = Union[Tuple[float, float], float]

47class _Optimizer(metaclass=abc.ABCMeta):

48 """Base class for all optimizers, with common parameters."""

50 def __init__(

51 self,

52 learning_rate: Union[float, Callable[[], float]],

53 use_gradient_accumulation: bool,

54 clip_weight_min: Optional[float],

55 clip_weight_max: Optional[float],

56 weight_decay_factor: Optional[float],

57 multiply_weight_decay_factor_by_learning_rate: bool,

58 clipvalue: Optional[ClipValueType] = None,

59 slot_variable_creation_fn: Optional[SlotVarCreationFnType] = None,

60 low_dimensional_packing_status: bool = False,

61 ):

62 self.learning_rate = learning_rate

63 self.use_gradient_accumulation = use_gradient_accumulation

64 self.clip_weight_min = clip_weight_min

65 self.clip_weight_max = clip_weight_max

66 if not use_gradient_accumulation and clipvalue is not None:

67 raise ValueError(

68 f"When `use_gradient_accumulation` is False, gradient clipping "

69 f"cannot be used and `clipvalue` should be left as None. "

70 f"Received value {clipvalue} for argument `clipvalue`.")

71 if clipvalue is None:

72 clipvalue = (None, None)

73 elif not isinstance(clipvalue, tuple):

74 clipvalue = (-1. * clipvalue, clipvalue)

75 self.clip_gradient_min, self.clip_gradient_max = clipvalue

77 self.weight_decay_factor = weight_decay_factor

78 self.multiply_weight_decay_factor_by_learning_rate = (

79 multiply_weight_decay_factor_by_learning_rate)

81 if (slot_variable_creation_fn is not None and

82 not callable(slot_variable_creation_fn)):

83 raise ValueError(

84 f"Argument `slot_variable_creation_fn` must be either None or a "

85 f"callable. Received: {slot_variable_creation_fn}")

86 self.slot_variable_creation_fn = slot_variable_creation_fn

87 self.low_dimensional_packing_status = low_dimensional_packing_status

89 @abc.abstractmethod

90 def _slot_names(self) -> List[Text]:

91 """Returns the name of all the slot variables.

93 This does not include the 'parameters' variable and these names must match

94 the names of the slots variables as used in the corresponding

95 `tpu_ops.load_tpu_embedding_*` ops.

96 """

97 raise NotImplementedError

99 @abc.abstractmethod

100 def _slot_initializers(self) -> List[init_ops_v2.Initializer]:

101 """Returns initializers for slot variables.

102

103 This returns a parallel list to self._slot_names().

104 """

105 raise NotImplementedError

106

107 def _set_optimization_parameters(

108 self, parameters: optimization_parameters_pb2.OptimizationParameters):

109 """Sets the optimizer fields in the OptimizationParameters."""

110 if self.use_gradient_accumulation:

111 parameters.gradient_accumulation_status = (

112 optimization_parameters_pb2.GradientAccumulationStatus.ENABLED)

113 else:

114 parameters.gradient_accumulation_status = (

115 optimization_parameters_pb2.GradientAccumulationStatus.DISABLED)

116

117 if self.clip_weight_min is not None:

118 parameters.clipping_limits.lower.value = self.clip_weight_min

119

120 if self.clip_weight_max is not None:

121 parameters.clipping_limits.upper.value = self.clip_weight_max

122

123 if self.clip_gradient_min is not None:

124 parameters.gradient_clipping_limits.lower.value = self.clip_gradient_min

125

126 if self.clip_gradient_max is not None:

127 parameters.gradient_clipping_limits.upper.value = self.clip_gradient_max

128

129 if self.weight_decay_factor:

130 parameters.weight_decay_factor = self.weight_decay_factor

131 if self.multiply_weight_decay_factor_by_learning_rate:

132 parameters.multiply_weight_decay_factor_by_learning_rate = True

133

134 parameters.low_dimensional_packing_status = (

135 self.low_dimensional_packing_status

136 )

137

138 @abc.abstractmethod

139 def _load(self) -> Callable[..., ops.Operation]:

140 """Returns the load function for the optimizer."""

141 raise NotImplementedError

142

143 @abc.abstractmethod

144 def _retrieve(self) -> Callable[..., core.Tensor]:

145 """Returns the retrieve function for the optimizer."""

146 raise NotImplementedError

147

148 def _create_slots(

149 self, table: "TableConfig",

150 variable_creator: Callable[[Text, init_ops_v2.Initializer],

151 tf_variables.Variable]

152 ) -> Dict[Text, tf_variables.Variable]:

153 """Creates slot variables for table.

154

155 Args:

156 table: The table variable to create slots for.

157 variable_creator: A function which creates variables. Takes parameters

158 'name', 'initializer'.

159

160 Returns:

161 A dict of variables, keyed by self._slot_names().

162 """

163 if self.slot_variable_creation_fn is not None:

164 return self.slot_variable_creation_fn(table, self._slot_names(),

165 self._slot_initializers())

166 else:

167 slots = {}

168 for slot, initializer in zip(self._slot_names(),

169 self._slot_initializers()):

170 slots[slot] = variable_creator(slot, initializer)

171 return slots

172

173 def __eq__(self, other: Any) -> Union[Any, bool]:

174 if isinstance(other, self.__class__):

175 return all([

176 attr1 == attr2

177 for attr1, attr2 in zip(self.__dict__.items(), other.__dict__.items())

178 ])

179 else:

180 return False

181

182 def __hash__(self) -> int:

183 return hash(tuple(self.__dict__.items()))

184

185

186@tf_export("tpu.experimental.embedding.SGD")

187class SGD(_Optimizer):

188 """Optimization parameters for stochastic gradient descent for TPU embeddings.

189

190 Pass this to `tf.tpu.experimental.embedding.TPUEmbedding` via the `optimizer`

191 argument to set the global optimizer and its parameters:

192

193 ```

194 embedding = tf.tpu.experimental.embedding.TPUEmbedding(

195 ...

196 optimizer=tf.tpu.experimental.embedding.SGD(0.1))

197 ```

198

199 This can also be used in a `tf.tpu.experimental.embedding.TableConfig` as the

200 optimizer parameter to set a table specific optimizer. This will override the

201 optimizer and parameters for global embedding optimizer defined above:

202

203 ```

204 table_one = tf.tpu.experimental.embedding.TableConfig(

205 vocabulary_size=...,

206 dim=...,

207 optimizer=tf.tpu.experimental.embedding.SGD(0.2))

208 table_two = tf.tpu.experimental.embedding.TableConfig(

209 vocabulary_size=...,

210 dim=...)

211

212 feature_config = (

213 tf.tpu.experimental.embedding.FeatureConfig(

214 table=table_one),

215 tf.tpu.experimental.embedding.FeatureConfig(

216 table=table_two))

217

218 embedding = tf.tpu.experimental.embedding.TPUEmbedding(

219 feature_config=feature_config,

220 batch_size=...

221 optimizer=tf.tpu.experimental.embedding.SGD(0.1))

222 ```

223

224 In the above example, the first feature will be looked up in a table that has

225 a learning rate of 0.2 while the second feature will be looked up in a table

226 that has a learning rate of 0.1.

227

228 See 'tensorflow/core/protobuf/tpu/optimization_parameters.proto' for a

229 complete description of these parameters and their impacts on the optimizer

230 algorithm.

231 """

232

233 def __init__(

234 self,

235 learning_rate: Union[float, Callable[[], float]] = 0.01,

236 use_gradient_accumulation: bool = True,

237 clip_weight_min: Optional[float] = None,

238 clip_weight_max: Optional[float] = None,

239 weight_decay_factor: Optional[float] = None,

240 multiply_weight_decay_factor_by_learning_rate: bool = None,

241 clipvalue: Optional[ClipValueType] = None,

242 low_dimensional_packing_status: bool = False,

243 ):

244 """Optimization parameters for stochastic gradient descent.

245

246 Args:

247 learning_rate: The learning rate. It should be a floating point value or a

248 callable taking no arguments for a dynamic learning rate.

249 use_gradient_accumulation: setting this to `False` makes embedding

250 gradients calculation less accurate but faster.

251 clip_weight_min: the minimum value to clip by; None means -infinity.

252 clip_weight_max: the maximum value to clip by; None means +infinity.

253 weight_decay_factor: amount of weight decay to apply; None means that the

254 weights are not decayed. Weights are decayed by multiplying the weight

255 by this factor each step.

256 multiply_weight_decay_factor_by_learning_rate: if true,

257 `weight_decay_factor` is multiplied by the current learning rate.

258 clipvalue: Controls clipping of the gradient. Set to either a single

259 positive scalar value to get clipping or a tiple of scalar values (min,

260 max) to set a separate maximum or minimum. If one of the two entries is

261 None, then there will be no clipping that direction. Note if this is

262 set, you may see a decrease in performance as gradient accumulation

263 will be enabled (it is normally off for SGD as it has no affect on

264 accuracy). See

265 'tensorflow/core/protobuf/tpu/optimization_parameters.proto' for more

266 information on gradient accumulation and its impact on tpu embeddings.

267 low_dimensional_packing_status: Status of the low-dimensional embedding

268 packing optimization controls whether to optimize the packing of

269 1-dimensional, 2-dimensional, and 4-dimensional embedding tables in

270 memory.

271 """

272 super().__init__(

273 learning_rate,

274 use_gradient_accumulation,

275 clip_weight_min,

276 clip_weight_max,

277 weight_decay_factor,

278 multiply_weight_decay_factor_by_learning_rate,

279 clipvalue,

280 None,

281 low_dimensional_packing_status,

282 )

283

284 def _slot_names(self) -> List[Text]:

285 return []

286

287 def _slot_initializers(self) -> List[init_ops_v2.Initializer]:

288 return []

289

290 def _set_optimization_parameters(

291 self, parameters: optimization_parameters_pb2.OptimizationParameters):

292 super()._set_optimization_parameters(parameters)

293 parameters.stochastic_gradient_descent.SetInParent()

294

295 def _load(self) -> Callable[..., ops.Operation]:

296 return tpu_ops.load_tpu_embedding_stochastic_gradient_descent_parameters

297

298 def _retrieve(self) -> Callable[..., core.Tensor]:

299 return tpu_ops.retrieve_tpu_embedding_stochastic_gradient_descent_parameters

300

301

302@tf_export("tpu.experimental.embedding.Adagrad")

303class Adagrad(_Optimizer):

304 """Optimization parameters for Adagrad with TPU embeddings.

305

306 Pass this to `tf.tpu.experimental.embedding.TPUEmbedding` via the `optimizer`

307 argument to set the global optimizer and its parameters:

308

309 ```python

310 embedding = tf.tpu.experimental.embedding.TPUEmbedding(

311 ...

312 optimizer=tf.tpu.experimental.embedding.Adagrad(0.1))

313 ```

314

315 This can also be used in a `tf.tpu.experimental.embedding.TableConfig` as the

316 optimizer parameter to set a table specific optimizer. This will override the

317 optimizer and parameters for global embedding optimizer defined above:

318

319 ```python

320 table_one = tf.tpu.experimental.embedding.TableConfig(

321 vocabulary_size=...,

322 dim=...,

323 optimizer=tf.tpu.experimental.embedding.Adagrad(0.2))

324 table_two = tf.tpu.experimental.embedding.TableConfig(

325 vocabulary_size=...,

326 dim=...)

327

328 feature_config = (

329 tf.tpu.experimental.embedding.FeatureConfig(

330 table=table_one),

331 tf.tpu.experimental.embedding.FeatureConfig(

332 table=table_two))

333

334 embedding = tf.tpu.experimental.embedding.TPUEmbedding(

335 feature_config=feature_config,

336 batch_size=...

337 optimizer=tf.tpu.experimental.embedding.Adagrad(0.1))

338 ```

339

340 In the above example, the first feature will be looked up in a table that has

341 a learning rate of 0.2 while the second feature will be looked up in a table

342 that has a learning rate of 0.1.

343

344 See 'tensorflow/core/protobuf/tpu/optimization_parameters.proto' for a

345 complete description of these parameters and their impacts on the optimizer

346 algorithm.

347 """

348

349 def __init__(

350 self,

351 learning_rate: Union[float, Callable[[], float]] = 0.001,

352 initial_accumulator_value: float = 0.1,

353 use_gradient_accumulation: bool = True,

354 clip_weight_min: Optional[float] = None,

355 clip_weight_max: Optional[float] = None,

356 weight_decay_factor: Optional[float] = None,

357 multiply_weight_decay_factor_by_learning_rate: bool = None,

358 slot_variable_creation_fn: Optional[SlotVarCreationFnType] = None,

359 clipvalue: Optional[ClipValueType] = None,

360 low_dimensional_packing_status: bool = False,

361 ):

362 """Optimization parameters for Adagrad.

363

364 Args:

365 learning_rate: The learning rate. It should be a floating point value or a

366 callable taking no arguments for a dynamic learning rate.

367 initial_accumulator_value: initial accumulator for Adagrad.

368 use_gradient_accumulation: setting this to `False` makes embedding

369 gradients calculation less accurate but faster.

370 clip_weight_min: the minimum value to clip by; None means -infinity.

371 clip_weight_max: the maximum value to clip by; None means +infinity.

372 weight_decay_factor: amount of weight decay to apply; None means that the

373 weights are not decayed.

374 multiply_weight_decay_factor_by_learning_rate: if true,

375 `weight_decay_factor` is multiplied by the current learning rate.

376 slot_variable_creation_fn: If you wish do directly control the creation of

377 the slot variables, set this to a callable taking three parameters: a

378 table variable, a list of slot names to create for it, and a list of

379 initializers. This function should return a dict with the slot names as

380 keys and the created variables as values with types matching the table

381 variable. When set to None (the default), uses the built-in variable

382 creation.

383 clipvalue: Controls clipping of the gradient. Set to either a single

384 positive scalar value to get clipping or a tuple of scalar values (min,

385 max) to set a separate maximum or minimum. If one of the two entries is

386 None, then there will be no clipping that direction.

387 low_dimensional_packing_status: Status of the low-dimensional embedding

388 packing optimization controls whether to optimize the packing of

389 1-dimensional, 2-dimensional, and 4-dimensional embedding tables in

390 memory.

391 """

392 super().__init__(

393 learning_rate,

394 use_gradient_accumulation,

395 clip_weight_min,

396 clip_weight_max,

397 weight_decay_factor,

398 multiply_weight_decay_factor_by_learning_rate,

399 clipvalue,

400 slot_variable_creation_fn,

401 low_dimensional_packing_status,

402 )

403 if initial_accumulator_value <= 0:

404 raise ValueError(

405 f"Argument `initial_accumulator_value` must be a positive float. "

406 f"Received: {initial_accumulator_value}")

407 self.initial_accumulator_value = initial_accumulator_value

408

409 def _slot_names(self) -> List[Text]:

410 return ["accumulators"]

411

412 def _slot_initializers(self) -> List[init_ops_v2.Initializer]:

413 return [init_ops_v2.Constant(self.initial_accumulator_value)]

414

415 def _set_optimization_parameters(

416 self, parameters: optimization_parameters_pb2.OptimizationParameters):

417 super()._set_optimization_parameters(parameters)

418 parameters.adagrad.SetInParent()

419

420 def _load(self) -> Callable[..., ops.Operation]:

421 return tpu_ops.load_tpu_embedding_adagrad_parameters

422

423 def _retrieve(self) -> Callable[..., core.Tensor]:

424 return tpu_ops.retrieve_tpu_embedding_adagrad_parameters

425

426

427@tf_export("tpu.experimental.embedding.AdagradMomentum")

428class AdagradMomentum(_Optimizer):

429 """Optimization parameters for Adagrad + Momentum with TPU embeddings.

430

431 Pass this to `tf.tpu.experimental.embedding.TPUEmbedding` via the `optimizer`

432 argument to set the global optimizer and its parameters:

433

434 ```python

435 embedding = tf.tpu.experimental.embedding.TPUEmbedding(

436 ...

437 optimizer=tf.tpu.experimental.embedding.AdagradMomentum(0.1))

438 ```

439

440 This can also be used in a `tf.tpu.experimental.embedding.TableConfig` as the

441 optimizer parameter to set a table specific optimizer. This will override the

442 optimizer and parameters for global embedding optimizer defined above:

443

444 ```python

445 table_one = tf.tpu.experimental.embedding.TableConfig(

446 vocabulary_size=...,

447 dim=...,

448 optimizer=tf.tpu.experimental.embedding.AdagradMomentum(0.2))

449 table_two = tf.tpu.experimental.embedding.TableConfig(

450 vocabulary_size=...,

451 dim=...)

452

453 feature_config = (

454 tf.tpu.experimental.embedding.FeatureConfig(

455 table=table_one),

456 tf.tpu.experimental.embedding.FeatureConfig(

457 table=table_two))

458

459 embedding = tf.tpu.experimental.embedding.TPUEmbedding(

460 feature_config=feature_config,

461 batch_size=...

462 optimizer=tf.tpu.experimental.embedding.AdagradMomentum(0.1))

463 ```

464

465 In the above example, the first feature will be looked up in a table that has

466 a learning rate of 0.2 while the second feature will be looked up in a table

467 that has a learning rate of 0.1.

468

469 See 'tensorflow/core/protobuf/tpu/optimization_parameters.proto' for a

470 complete description of these parameters and their impacts on the optimizer

471 algorithm.

472 """

473

474 def __init__(

475 self,

476 learning_rate: Union[float, Callable[[], float]] = 0.001,

477 momentum: float = 0.0,

478 use_nesterov: bool = False,

479 exponent: float = 2,

480 beta2: float = 1,

481 epsilon: float = 1e-10,

482 use_gradient_accumulation: bool = True,

483 clip_weight_min: Optional[float] = None,

484 clip_weight_max: Optional[float] = None,

485 weight_decay_factor: Optional[float] = None,

486 multiply_weight_decay_factor_by_learning_rate: bool = None,

487 slot_variable_creation_fn: Optional[SlotVarCreationFnType] = None,

488 clipvalue: Optional[ClipValueType] = None,

489 low_dimensional_packing_status: bool = False,

490 ):

491 """Optimization parameters for Adagrad + Momentum.

492

493 Args:

494 learning_rate: The learning rate. It should be a floating point value or a

495 callable taking no arguments for a dynamic learning rate.

496 momentum: Moving average parameter for the momentum accumulator.

497 use_nesterov: Whether to use the Nesterov variant of momentum. See

498 Sutskever et al., 2013.

499 exponent: Exponent for the Adagrad accumulator.

500 beta2: Moving average parameter for the Adagrad accumulator.

501 epsilon: initial accumulator for Adagrad accumulator.

502 use_gradient_accumulation: setting this to `False` makes embedding

503 gradients calculation less accurate but faster.

504 clip_weight_min: the minimum value to clip by; None means -infinity.

505 clip_weight_max: the maximum value to clip by; None means +infinity.

506 weight_decay_factor: amount of weight decay to apply; None means that the

507 weights are not decayed.

508 multiply_weight_decay_factor_by_learning_rate: if true,

509 `weight_decay_factor` is multiplied by the current learning rate.

510 slot_variable_creation_fn: If you wish do directly control the creation of

511 the slot variables, set this to a callable taking three parameters: a

512 table variable, a list of slot names to create for it, and a list of

513 initializers. This function should return a dict with the slot names as

514 keys and the created variables as values with types matching the table

515 variable. When set to None (the default), uses the built-in variable

516 creation.

517 clipvalue: Controls clipping of the gradient. Set to either a single

518 positive scalar value to get clipping or a tuple of scalar values (min,

519 max) to set a separate maximum or minimum. If one of the two entries is

520 None, then there will be no clipping that direction.

521 low_dimensional_packing_status: Status of the low-dimensional embedding

522 packing optimization controls whether to optimize the packing of

523 1-dimensional, 2-dimensional, and 4-dimensional embedding tables in

524 memory.

525 """

526 super().__init__(

527 learning_rate,

528 use_gradient_accumulation,

529 clip_weight_min,

530 clip_weight_max,

531 weight_decay_factor,

532 multiply_weight_decay_factor_by_learning_rate,

533 clipvalue,

534 slot_variable_creation_fn,

535 low_dimensional_packing_status,

536 )

537 if epsilon <= 0:

538 raise ValueError("Adagrad momentum: epsilon must be positive")

539 if exponent <= 0:

540 raise ValueError("Adagrad momentum: Precondition exponent must >0")

541 self.momentum = momentum

542 self.use_nesterov = use_nesterov

543 self.exponent = exponent

544 self.beta2 = beta2

545 self.epsilon = epsilon

546

547 def _slot_names(self) -> List[Text]:

548 return ["accumulators", "momenta"]

549

550 def _slot_initializers(self) -> List[init_ops_v2.Initializer]:

551 return [init_ops_v2.Constant(), init_ops_v2.Constant()]

552

553 def _set_optimization_parameters(

554 self, parameters: optimization_parameters_pb2.OptimizationParameters):

555 super()._set_optimization_parameters(parameters)

556 parameters.adagrad_momentum.SetInParent()

557 parameters.adagrad_momentum.momentum = self.momentum

558 parameters.adagrad_momentum.use_nesterov = self.use_nesterov

559 parameters.adagrad_momentum.exponent = self.exponent

560 parameters.adagrad_momentum.beta2 = self.beta2

561 parameters.adagrad_momentum.epsilon = self.epsilon

562

563 def _load(self) -> Callable[..., ops.Operation]:

564 return tpu_ops.load_tpu_embedding_adagrad_momentum_parameters

565

566 def _retrieve(self) -> Callable[..., core.Tensor]:

567 return tpu_ops.retrieve_tpu_embedding_adagrad_momentum_parameters

568

569

570@tf_export("tpu.experimental.embedding.FTRL")

571class FTRL(_Optimizer):

572 """Optimization parameters for FTRL with TPU embeddings.

573

574 See Algorithm 1 of this

575 [paper](https://research.google.com/pubs/archive/41159.pdf).

576

577 Pass this to `tf.tpu.experimental.embedding.TPUEmbedding` via the `optimizer`

578 argument to set the global optimizer and its parameters:

579

580 ```python

581 embedding = tf.tpu.experimental.embedding.TPUEmbedding(

582 ...

583 optimizer=tf.tpu.experimental.embedding.FTRL(0.1))

584 ```

585

586 This can also be used in a `tf.tpu.experimental.embedding.TableConfig` as the

587 optimizer parameter to set a table specific optimizer. This will override the

588 optimizer and parameters for global embedding optimizer defined above:

589

590 ```python

591 table_one = tf.tpu.experimental.embedding.TableConfig(

592 vocabulary_size=...,

593 dim=...,

594 optimizer=tf.tpu.experimental.embedding.FTRL(0.2))

595 table_two = tf.tpu.experimental.embedding.TableConfig(

596 vocabulary_size=...,

597 dim=...)

598

599 feature_config = (

600 tf.tpu.experimental.embedding.FeatureConfig(

601 table=table_one),

602 tf.tpu.experimental.embedding.FeatureConfig(

603 table=table_two))

604

605 embedding = tf.tpu.experimental.embedding.TPUEmbedding(

606 feature_config=feature_config,

607 batch_size=...

608 optimizer=tf.tpu.experimental.embedding.FTRL(0.1))

609 ```

610

611 In the above example, the first feature will be looked up in a table that has

612 a learning rate of 0.2 while the second feature will be looked up in a table

613 that has a learning rate of 0.1.

614

615 See 'tensorflow/core/protobuf/tpu/optimization_parameters.proto' for a

616 complete description of these parameters and their impacts on the optimizer

617 algorithm.

618 """

619

620 def __init__(

621 self,

622 learning_rate: Union[float, Callable[[], float]] = 0.001,

623 learning_rate_power: float = -0.5,

624 l1_regularization_strength: float = 0.0,

625 l2_regularization_strength: float = 0.0,

626 beta: float = 0.0,

627 initial_accumulator_value: float = 0.1,

628 use_gradient_accumulation: bool = True,

629 clip_weight_min: Optional[float] = None,

630 clip_weight_max: Optional[float] = None,

631 weight_decay_factor: Optional[float] = None,

632 multiply_weight_decay_factor_by_learning_rate: bool = None,

633 slot_variable_creation_fn: Optional[SlotVarCreationFnType] = None,

634 clipvalue: Optional[ClipValueType] = None,

635 multiply_linear_by_learning_rate: bool = False,

636 allow_zero_accumulator: bool = False,

637 low_dimensional_packing_status: bool = False,

638 ):

639 """Optimization parameters for Adagrad.

640

641 Args:

642 learning_rate: The learning rate. It should be a floating point value or a

643 callable taking no arguments for a dynamic learning rate.

644 learning_rate_power: A float value, must be less or equal to zero.

645 Controls how the learning rate decreases during training. Use zero for a

646 fixed learning rate.

647 l1_regularization_strength: A float value, must be greater than or equal

648 to zero.

649 l2_regularization_strength: A float value, must be greater than or equal

650 to zero.

651 beta: A float value, representing the beta value from the paper.

652 initial_accumulator_value: The starting value for accumulators. Only zero

653 or positive values are allowed.

654 use_gradient_accumulation: setting this to `False` makes embedding

655 gradients calculation less accurate but faster.

656 clip_weight_min: the minimum value to clip by; None means -infinity.

657 clip_weight_max: the maximum value to clip by; None means +infinity.

658 weight_decay_factor: amount of weight decay to apply; None means that the

659 weights are not decayed.

660 multiply_weight_decay_factor_by_learning_rate: if true,

661 `weight_decay_factor` is multiplied by the current learning rate.

662 slot_variable_creation_fn: If you wish do directly control the creation of

663 the slot variables, set this to a callable taking three parameters: a

664 table variable, a list of slot names to create for it, and a list of

665 initializers. This function should return a dict with the slot names as

666 keys and the created variables as values with types matching the table

667 variable. When set to None (the default), uses the built-in variable

668 creation.

669 clipvalue: Controls clipping of the gradient. Set to either a single

670 positive scalar value to get clipping or a tuple of scalar values (min,

671 max) to set a separate maximum or minimum. If one of the two entries is

672 None, then there will be no clipping that direction.

673 multiply_linear_by_learning_rate: If set to True, a modified formula is

674 used for FTRL that treats the "linear" accumulator as being

675 pre-multiplied by the learning rate (i.e., the accumulator named

676 "linear" actually stores "linear * learning_rate"). Other than

677 checkpoint compatibility, this is mathematically equivalent for a static

678 learning rate; for a dynamic learning rate, it is nearly the same as

679 long as the learning rate does not change quickly. The benefit of this

680 is that the modified formula handles zero and near-zero learning rates

681 without producing NaNs, improving flexibility for learning rate ramp-up.

682 allow_zero_accumulator: If set to True, changes some internal formulas to

683 allow zero and near-zero accumulator values at the cost of some

684 performance; this only needs to be set if you are using an initial

685 accumulator value of zero, which is uncommon.

686 low_dimensional_packing_status: Status of the low-dimensional embedding

687 packing optimization controls whether to optimize the packing of

688 1-dimensional, 2-dimensional, and 4-dimensional embedding tables in

689 memory.

690 """

691 super().__init__(

692 learning_rate,

693 use_gradient_accumulation,

694 clip_weight_min,

695 clip_weight_max,

696 weight_decay_factor,

697 multiply_weight_decay_factor_by_learning_rate,

698 clipvalue,

699 slot_variable_creation_fn,

700 low_dimensional_packing_status,

701 )

702 if initial_accumulator_value <= 0:

703 raise ValueError(

704 f"Argument `initial_accumulator_value` must be a positive float. "

705 f"Received: {initial_accumulator_value}")

706 self.initial_accumulator_value = initial_accumulator_value

707 self.learning_rate_power = learning_rate_power

708 self.l1_regularization_strength = l1_regularization_strength

709 self.l2_regularization_strength = l2_regularization_strength

710 self.beta = beta

711 self.multiply_linear_by_learning_rate = multiply_linear_by_learning_rate

712 self.allow_zero_accumulator = allow_zero_accumulator

713

714 def _slot_names(self) -> List[Text]:

715 return ["accumulators", "linears"]

716

717 def _slot_initializers(self) -> List[init_ops_v2.Initializer]:

718 return [

719 init_ops_v2.Constant(self.initial_accumulator_value),

720 init_ops_v2.Constant()

721 ]

722

723 def _set_optimization_parameters(

724 self, parameters: optimization_parameters_pb2.OptimizationParameters):

725 super()._set_optimization_parameters(parameters)

726 ftrl = parameters.ftrl

727 ftrl.l1 = self.l1_regularization_strength

728 ftrl.l2 = self.l2_regularization_strength

729 ftrl.lr_power = self.learning_rate_power

730 ftrl.beta = self.beta

731 ftrl.multiply_linear_by_lr = self.multiply_linear_by_learning_rate

732 ftrl.allow_zero_accumulator = self.allow_zero_accumulator

733

734 def _load(self) -> Callable[..., ops.Operation]:

735 return tpu_ops.load_tpu_embedding_ftrl_parameters

736

737 def _retrieve(self) -> Callable[..., core.Tensor]:

738 return tpu_ops.retrieve_tpu_embedding_ftrl_parameters

739

740

741@tf_export("tpu.experimental.embedding.Adam")

742class Adam(_Optimizer):

743 """Optimization parameters for Adam with TPU embeddings.

744

745 Pass this to `tf.tpu.experimental.embedding.TPUEmbedding` via the `optimizer`

746 argument to set the global optimizer and its parameters:

747

748 NOTE: By default this optimizer is lazy, i.e. it will not apply the gradient

749 update of zero to rows that were not looked up. You can change this behavior

750 by setting `lazy_adam` to `False`.

751

752 ```python

753 embedding = tf.tpu.experimental.embedding.TPUEmbedding(

754 ...

755 optimizer=tf.tpu.experimental.embedding.Adam(0.1))

756 ```

757

758 This can also be used in a `tf.tpu.experimental.embedding.TableConfig` as the

759 optimizer parameter to set a table specific optimizer. This will override the

760 optimizer and parameters for global embedding optimizer defined above:

761

762 ```python

763 table_one = tf.tpu.experimental.embedding.TableConfig(

764 vocabulary_size=...,

765 dim=...,

766 optimizer=tf.tpu.experimental.embedding.Adam(0.2))

767 table_two = tf.tpu.experimental.embedding.TableConfig(

768 vocabulary_size=...,

769 dim=...)

770

771 feature_config = (

772 tf.tpu.experimental.embedding.FeatureConfig(

773 table=table_one),

774 tf.tpu.experimental.embedding.FeatureConfig(

775 table=table_two))

776

777 embedding = tf.tpu.experimental.embedding.TPUEmbedding(

778 feature_config=feature_config,

779 batch_size=...

780 optimizer=tf.tpu.experimental.embedding.Adam(0.1))

781 ```

782

783 In the above example, the first feature will be looked up in a table that has

784 a learning rate of 0.2 while the second feature will be looked up in a table

785 that has a learning rate of 0.1.

786

787 See 'tensorflow/core/protobuf/tpu/optimization_parameters.proto' for a

788 complete description of these parameters and their impacts on the optimizer

789 algorithm.

790 """

791

792 def __init__(

793 self,

794 learning_rate: Union[float, Callable[[], float]] = 0.001,

795 beta_1: float = 0.9,

796 beta_2: float = 0.999,

797 epsilon: float = 1e-07,

798 lazy_adam: bool = True,

799 sum_inside_sqrt: bool = True,

800 use_gradient_accumulation: bool = True,

801 clip_weight_min: Optional[float] = None,

802 clip_weight_max: Optional[float] = None,

803 weight_decay_factor: Optional[float] = None,

804 multiply_weight_decay_factor_by_learning_rate: bool = None,

805 slot_variable_creation_fn: Optional[SlotVarCreationFnType] = None,

806 clipvalue: Optional[ClipValueType] = None,

807 low_dimensional_packing_status: bool = False,

808 ):

809 """Optimization parameters for Adam.

810

811 See 'tensorflow/core/protobuf/tpu/optimization_parameters.proto' for a

812 complete description of these parameters and their impacts on the optimizer

813 algorithm.

814

815 Args:

816 learning_rate: The learning rate. It should be a floating point value or a

817 callable taking no arguments for a dynamic learning rate.

818 beta_1: A float value. The exponential decay rate for the 1st moment

819 estimates.

820 beta_2: A float value. The exponential decay rate for the 2nd moment

821 estimates.

822 epsilon: A small constant for numerical stability.

823 lazy_adam: Use lazy Adam instead of Adam. Lazy Adam trains faster.

824 sum_inside_sqrt: When this is true, the Adam update formula is changed

825 from `m / (sqrt(v) + epsilon)` to `m / sqrt(v + epsilon**2)`. This

826 option improves the performance of TPU training and is not expected to

827 harm model quality.

828 use_gradient_accumulation: Setting this to `False` makes embedding

829 gradients calculation less accurate but faster.

830 clip_weight_min: the minimum value to clip by; None means -infinity.

831 clip_weight_max: the maximum value to clip by; None means +infinity.

832 weight_decay_factor: amount of weight decay to apply; None means that the

833 weights are not decayed.

834 multiply_weight_decay_factor_by_learning_rate: if true,

835 `weight_decay_factor` is multiplied by the current learning rate.

836 slot_variable_creation_fn: If you wish do directly control the creation of

837 the slot variables, set this to a callable taking three parameters: a

838 table variable, a list of slot names to create for it, and a list of

839 initializers. This function should return a dict with the slot names as

840 keys and the created variables as values with types matching the table

841 variable. When set to None (the default), uses the built-in variable

842 creation.

843 clipvalue: Controls clipping of the gradient. Set to either a single

844 positive scalar value to get clipping or a tiple of scalar values (min,

845 max) to set a separate maximum or minimum. If one of the two entries is

846 None, then there will be no clipping that direction.

847 low_dimensional_packing_status: Status of the low-dimensional embedding

848 packing optimization controls whether to optimize the packing of

849 1-dimensional, 2-dimensional, and 4-dimensional embedding tables in

850 memory.

851 """

852 super(Adam, self).__init__(

853 learning_rate,

854 use_gradient_accumulation,

855 clip_weight_min,

856 clip_weight_max,

857 weight_decay_factor,

858 multiply_weight_decay_factor_by_learning_rate,

859 clipvalue,

860 slot_variable_creation_fn,

861 low_dimensional_packing_status,

862 )

863 if beta_1 < 0. or beta_1 >= 1.:

864 raise ValueError(

865 f"Argument `beta_1` must be >= 0 and < 1. Received: {beta_1}.")

866 if beta_2 < 0. or beta_2 >= 1.:

867 raise ValueError(

868 f"Argument `beta_2` must be >= 0 and < 1. Received: {beta_1}.")

869 if epsilon <= 0.:

870 raise ValueError("epsilon must be positive; got {}.".format(epsilon))

871 if not use_gradient_accumulation and not lazy_adam:

872 raise ValueError(

873 "When disabling lazy Adam (`lazy_adam=False`), "

874 "gradient accumulation must be used. "

875 "Set `use_gradient_accumulation` to False.")

876

877 self.beta_1 = beta_1

878 self.beta_2 = beta_2

879 self.epsilon = epsilon

880 self.lazy_adam = lazy_adam

881 self.sum_inside_sqrt = sum_inside_sqrt

882

883 def _slot_names(self) -> List[Text]:

884 return ["momenta", "velocities"]

885

886 def _slot_initializers(self) -> List[init_ops_v2.Initializer]:

887 return [init_ops_v2.Constant(), init_ops_v2.Constant()]

888

889 def _set_optimization_parameters(

890 self, parameters: optimization_parameters_pb2.OptimizationParameters):

891 super(Adam, self)._set_optimization_parameters(parameters)

892 parameters.adam.beta1 = self.beta_1

893 parameters.adam.beta2 = self.beta_2

894 parameters.adam.epsilon = self.epsilon

895 parameters.adam.use_non_lazy_adam = not self.lazy_adam

896 parameters.adam.use_sum_inside_sqrt = self.sum_inside_sqrt

897

898 def _load(self) -> Callable[..., ops.Operation]:

899 return tpu_ops.load_tpu_embedding_adam_parameters

900

901 def _retrieve(self) -> Callable[..., core.Tensor]:

902 return tpu_ops.retrieve_tpu_embedding_adam_parameters

903

904

905@tf_export("tpu.experimental.embedding.QuantizationConfig")

906class QuantizationConfig:

907 """Settings for simulated quantization of the tpu embedding table.

908

909 When simulated quantization is enabled, the results of the embedding lookup

910 are clipped and quantized according to the settings here before the combiner

911 is applied.

912

913 For example, to quantize `input` the following is done:

914 ```python

915 if input < lower

916 input = lower

917 if input > upper

918 input = upper

919 quantum = (upper - lower) / (num_buckets - 1)

920 input = math.floor((input - lower) / quantum + 0.5) * quantium + lower

921 ```

922

923 See tensorflow/core/protobuf/tpu/optimization_parameters.proto for more

924 details.

925

926 NOTE: This does not change the storage type of the embedding table, that will

927 continue to be float32 as will the saved variable in the checkpoint. You will

928 have to manually quantize the variable (typically with the same algorithm and

929 settings as above) manually.

930 """

931

932 def __init__(self, num_buckets: int, lower: float, upper: float):

933 """Simulated quantizaiton configuration.

934

935 Args:

936 num_buckets: The number of quantization buckets, must be atleast 2.

937 lower: The lower bound for the quantization range.

938 upper: The upper bound for the quantization range.

939

940 Returns:

941 `QuantizationConfig`.

942

943 Raises:

944 ValueError: if `num_buckets` is less than 2.

945 """

946 if num_buckets < 2:

947 raise ValueError(f"num_buckets is {num_buckets}, must be at least 2 for "

948 f"simulated quantization.")

949

950 self.num_buckets = num_buckets

951 self.lower = lower

952 self.upper = upper

953

954 def _set_optimization_parameters(

955 self, parameters: optimization_parameters_pb2.OptimizationParameters):

956 parameters.simulated_quantization.enabled = True

957 parameters.simulated_quantization.num_buckets = self.num_buckets

958 parameters.simulated_quantization.clipping_limits.lower.value = self.lower

959 parameters.simulated_quantization.clipping_limits.upper.value = self.upper

960

961 def __repr__(self):

962 return ("QuantizationConfig(num_buckets={num_buckets!r}, lower={lower!r}, "

963 "upper={upper!r})".format(

964 num_buckets=self.num_buckets,

965 lower=self.lower,

966 upper=self.upper))

967

968

969@tf_export("tpu.experimental.embedding.TableConfig")

970class TableConfig:

971 """Configuration data for one embedding table.

972

973 This class holds the configuration data for a single embedding table. It is

974 used as the `table` parameter of a

975 `tf.tpu.experimental.embedding.FeatureConfig`. Multiple

976 `tf.tpu.experimental.embedding.FeatureConfig` objects can use the same

977 `tf.tpu.experimental.embedding.TableConfig` object. In this case a shared

978 table will be created for those feature lookups.

979

980 ```python

981 table_config_one = tf.tpu.experimental.embedding.TableConfig(

982 vocabulary_size=...,

983 dim=...)

984 table_config_two = tf.tpu.experimental.embedding.TableConfig(

985 vocabulary_size=...,

986 dim=...)

987 feature_config = {

988 'feature_one': tf.tpu.experimental.embedding.FeatureConfig(

989 table=table_config_one),

990 'feature_two': tf.tpu.experimental.embedding.FeatureConfig(

991 table=table_config_one),

992 'feature_three': tf.tpu.experimental.embedding.FeatureConfig(

993 table=table_config_two)}

994 embedding = tf.tpu.experimental.embedding.TPUEmbedding(

995 feature_config=feature_config,

996 batch_size=...

997 optimizer=tf.tpu.experimental.embedding.Adam(0.1))

998 ```

999

1000 The above configuration has 2 tables, and three features. The first two

1001 features will be looked up in the first table and the third feature will be

1002 looked up in the second table.

1003

1004 """

1005

1006 def __init__(self,

1007 vocabulary_size: int,

1008 dim: int,

1009 initializer: Optional[Callable[[Any], None]] = None,

1010 optimizer: Optional[_Optimizer] = None,

1011 combiner: Text = "mean",

1012 name: Optional[Text] = None,

1013 quantization_config: QuantizationConfig = None):

1014 """Embedding table configuration.

1015

1016 Args:

1017 vocabulary_size: Size of the table's vocabulary (number of rows).

1018 dim: The embedding dimension (width) of the table.

1019 initializer: A callable initializer taking one parameter, the shape of the

1020 variable that will be initialized. Will be called once per task, to

1021 initialize that task's shard of the embedding table. If not specified,

1022 defaults to `truncated_normal_initializer` with mean `0.0` and standard

1023 deviation `1/sqrt(dim)`.

1024 optimizer: An optional instance of an optimizer parameters class, instance

1025 of one of `tf.tpu.experimental.embedding.SGD`,

1026 `tf.tpu.experimental.embedding.Adagrad` or

1027 `tf.tpu.experimental.embedding.Adam`. If set will override the global

1028 optimizer passed to `tf.tpu.experimental.embedding.TPUEmbedding`.

1029 combiner: A string specifying how to reduce if there are multiple entries

1030 in a single row. Currently 'mean', 'sqrtn', 'sum' are supported, with

1031 'mean' the default. 'sqrtn' often achieves good accuracy, in particular

1032 with bag-of-words columns. For more information, see

1033 `tf.nn.embedding_lookup_sparse`.

1034 name: An optional string used to name the table. Useful for debugging.

1035 quantization_config: The simulated quantization config. An instance of

1036 `tf.tpu.experimental.embedding.QuantizationConfig`. See the class for

1037 more documentation.

1038

1039 Returns:

1040 `TableConfig`.

1041

1042 Raises:

1043 ValueError: if `vocabulary_size` is not a positive integer.

1044 ValueError: if `dim` is not a positive integer.

1045 ValueError: if `initializer` is specified and is not callable.

1046 ValueError: if `combiner` is not supported.

1047 """

1048 if not isinstance(vocabulary_size, int) or vocabulary_size < 1:

1049 raise ValueError(

1050 f"Argument `vocabulary_size` must be an int and must be >= 1. "

1051 f"Received: {vocabulary_size}")

1052

1053 if not isinstance(dim, int) or dim < 1:

1054 raise ValueError(

1055 f"Argument `dim` (embedding dimension) "

1056 f"must be an int and must be >= 1. Received: {dim}")

1057

1058 if (initializer is not None) and (not callable(initializer)):

1059 raise ValueError(

1060 f"Argument `initializer` must be a callable (or None). "

1061 f"Received: {initializer}")

1062 if initializer is None:

1063 initializer = init_ops_v2.TruncatedNormal(mean=0.0,

1064 stddev=1/math.sqrt(dim))

1065 accepted_combiners = ("mean", "sum", "sqrtn")

1066 if combiner not in accepted_combiners:

1067 raise ValueError(

1068 f"Argument `combiner` must be one of {accepted_combiners}. "

1069 f"Received: {combiner}")

1070

1071 self.vocabulary_size = vocabulary_size

1072 self.dim = dim

1073 self.initializer = initializer

1074 self.optimizer = optimizer

1075 self.combiner = combiner

1076 self.name = name

1077 self.quantization_config = quantization_config

1078

1079 def __repr__(self):

1080 # If using the default initializer, just print "None" for clarity.

1081 initializer = self.initializer

1082

1083 if isinstance(initializer, init_ops_v2.TruncatedNormal):

1084 # PY2 type checking can't infer type of initializer even after if.

1085 initializer = typing.cast(init_ops_v2.TruncatedNormal, initializer)

1086 if (initializer.mean == 0.0

1087 and math.isclose(initializer.stddev, 1/math.sqrt(self.dim))):

1088 initializer = None

1089

1090 return ("TableConfig(vocabulary_size={vocabulary_size!r}, dim={dim!r}, "

1091 "initializer={initializer!r}, optimizer={optimizer!r}, "

1092 "combiner={combiner!r}, name={name!r}, "

1093 "quantization_config={quantization!r})".format(

1094 vocabulary_size=self.vocabulary_size,

1095 dim=self.dim,

1096 initializer=initializer,

1097 optimizer=self.optimizer,

1098 combiner=self.combiner,

1099 name=self.name,

1100 quantization=self.quantization_config,

1101 ))

1102

1103 def _set_table_descriptor(

1104 self,

1105 table_descriptor: tpu_embedding_configuration_pb2

1106 .TPUEmbeddingConfiguration.TableDescriptor,

1107 num_hosts: int,

1108 learning_rate_index: Dict[Callable[[], Any], int]):

1109 """Set the table descriptor from the table data."""

1110 table_descriptor.name = self.name

1111

1112 # For small tables, we pad to the number of hosts so that at least one

1113 # id will be assigned to each host.

1114 table_descriptor.vocabulary_size = max(self.vocabulary_size, num_hosts)

1115 table_descriptor.dimension = self.dim

1116

1117 parameters = table_descriptor.optimization_parameters

1118

1119 # We handle the learning rate separately here and don't allow the

1120 # optimization class to handle this, as it doesn't know about dynamic

1121 # rates.

1122 if callable(self.optimizer.learning_rate):

1123 parameters.learning_rate.dynamic.tag = (

1124 learning_rate_index[self.optimizer.learning_rate])

1125 else:

1126 parameters.learning_rate.constant = self.optimizer.learning_rate

1127

1128 if self.optimizer.low_dimensional_packing_status:

1129 parameters.low_dimensional_packing_status = (

1130 optimization_parameters_pb2.LowDimensionalPackingStatus.Status.ENABLED

1131 )

1132 # Use optimizer to handle the rest of the parameters.

1133 self.optimizer._set_optimization_parameters(parameters) # pylint: disable=protected-access

1134 if self.quantization_config:

1135 self.quantization_config._set_optimization_parameters(parameters) # pylint: disable=protected-access

1136

1137

1138@tf_export("tpu.experimental.embedding.FeatureConfig")

1139class FeatureConfig:

1140 """Configuration data for one embedding feature.

1141

1142 This class holds the configuration data for a single embedding feature. The

1143 main use is to assign features to `tf.tpu.experimental.embedding.TableConfig`s

1144 via the table parameter:

1145

1146 ```python

1147 table_config_one = tf.tpu.experimental.embedding.TableConfig(

1148 vocabulary_size=...,

1149 dim=...)

1150 table_config_two = tf.tpu.experimental.embedding.TableConfig(

1151 vocabulary_size=...,

1152 dim=...)

1153 feature_config = {

1154 'feature_one': tf.tpu.experimental.embedding.FeatureConfig(

1155 table=table_config_one),

1156 'feature_two': tf.tpu.experimental.embedding.FeatureConfig(

1157 table=table_config_one),

1158 'feature_three': tf.tpu.experimental.embedding.FeatureConfig(

1159 table=table_config_two)}

1160 embedding = tf.tpu.experimental.embedding.TPUEmbedding(

1161 feature_config=feature_config,

1162 batch_size=...

1163 optimizer=tf.tpu.experimental.embedding.Adam(0.1))

1164 ```

1165

1166 The above configuration has 2 tables, and three features. The first two

1167 features will be looked up in the first table and the third feature will be

1168 looked up in the second table.

1169

1170 You can also specify the output shape for each feature. The output shape

1171 should be the expected activation shape excluding the table dimension. For

1172 dense and sparse tensor, the output shape should be the same as the input

1173 shape excluding the last dimension. For ragged tensor, the output shape can

1174 mismatch the input shape.

1175

1176 NOTE: The `max_sequence_length` will be only used when the input tensor has

1177 rank 2 and the `output_shape` is not set in the feature config.

1178

1179 When feeding features into `embedding.enqueue` they can be `tf.Tensor`s,

1180 `tf.SparseTensor`s or `tf.RaggedTensor`s. When the argument

1181 `max_sequence_length` is 0, the default, you should expect a output of

1182 `embedding.dequeue` for this feature of shape `(batch_size, dim)`. If

1183 `max_sequence_length` is greater than 0, the feature is embedded as a sequence

1184 and padded up to the given length. The shape of the output for this feature

1185 will be `(batch_size, max_sequence_length, dim)`.

1186 """

1187

1188 def __init__(self,

1189 table: TableConfig,

1190 max_sequence_length: int = 0,

1191 validate_weights_and_indices: bool = True,

1192 output_shape: Optional[Union[List[int], TensorShape]] = None,

1193 name: Optional[Text] = None):

1194 """Feature configuration.

1195

1196 Args:

1197 table: An instance of `tf.tpu.experimental.embedding.TableConfig`,

1198 describing the table in which this feature should be looked up.

1199 max_sequence_length: If positive, the feature is a sequence feature with

1200 the corresponding maximum sequence length. If the sequence is longer

1201 than this, it will be truncated. If 0, the feature is not a sequence

1202 feature.

1203 validate_weights_and_indices: If true, uses safe_embedding_lookup during

1204 serving which ensures there are no empty rows and all weights and ids

1205 are positive at the expense of extra compute cost.

1206 output_shape: Optional argument to config the output shape of the feature

1207 activation. If provided, the feature feeding to the `embedding.enqueue`

1208 has to match the shape (for ragged tensor, the input shape and output

1209 shape can mismatch). If not provided, the shape can be either provided

1210 to the `embedding.build` or auto detected at the runtime.

1211 name: An optional name for the feature, useful for debugging.

1212

1213 Returns:

1214 `FeatureConfig`.

1215

1216 Raises:

1217 ValueError: if `table` is not an instance of

1218 `tf.tpu.experimental.embedding.TableConfig`.

1219 ValueError: if `max_sequence_length` not an integer or is negative.

1220 """

1221 if not isinstance(table, TableConfig):

1222 raise ValueError(f"Argument `table` has invalid type {type(table)}. "

1223 "Expected `tf.tpu.experimental.embedding.TableConfig`.")

1224

1225 if not isinstance(max_sequence_length, int) or max_sequence_length < 0:

1226 raise ValueError(

1227 f"Argument `max_sequence_length` must be an int and must be >= 0. "

1228 f"Received: {max_sequence_length}")

1229

1230 self.table = table

1231 self.max_sequence_length = max_sequence_length

1232 self.name = name

1233 self.output_shape = TensorShape(output_shape)

1234

1235 if not isinstance(

1236 validate_weights_and_indices, bool):

1237 raise ValueError(

1238 f"Argument `validate_weights_and_indices` must be a boolean. "

1239 f"Received: {validate_weights_and_indices}")

1240

1241 self.validate_weights_and_indices = validate_weights_and_indices

1242

1243 def __repr__(self):

1244 return ("FeatureConfig(table={table!r}, "

1245 "max_sequence_length={max_sequence_length!r}, "

1246 "validate_weights_and_indices={validate_weights_and_indices!r}, "

1247 "output_shape={output_shape!r}, name={name!r})".format(

1248 table=self.table,

1249 max_sequence_length=self.max_sequence_length,

1250 validate_weights_and_indices=self.validate_weights_and_indices,

1251 output_shape=self.output_shape,

1252 name=self.name))

1253

1254

1255def log_tpu_embedding_configuration(

1256 config: tpu_embedding_configuration_pb2.TPUEmbeddingConfiguration) -> None:

1257 """Logs a TPUEmbeddingConfiguration proto across multiple statements.

1258

1259 Args:

1260 config: TPUEmbeddingConfiguration proto to log. Necessary because

1261 logging.info has a maximum length to each log statement, which

1262 particularly large configs can exceed.

1263 """

1264 logging.info("Beginning log of TPUEmbeddingConfiguration.")

1265 for line in str(config).splitlines():

1266 logging.info(line)

1267 logging.info("Done with log of TPUEmbeddingConfiguration.")

1268

1269

1270def _sort_device_spec_strings(device_strings: Iterable[str]) -> List[str]:

1271 sorted_specs = sorted(

1272 (device_spec.DeviceSpecV2.from_string(spec) for spec in device_strings),

1273 key=lambda s: (s.replica, s.task, s.device_index),

1274 )

1275 return [spec.to_string() for spec in sorted_specs]

1276

1277

1278def get_list_of_hosts(strategy: tpu_strategy.TPUStrategy) -> List[Text]:

1279 """Returns a sorted list of CPU devices for the remote jobs.

1280

1281 Args:

1282 strategy: A TPUStrategy object.

1283

1284 Returns:

1285 A sorted list of device host strings.

1286 """

1287

1288 list_of_hosts = []

1289 # Elsewehere we assume that the list of hosts is sorted.

1290 for tpu_device in _sort_device_spec_strings(strategy.extended.worker_devices):

1291 host = device_util.get_host_for_device(tpu_device)

1292 if host not in list_of_hosts:

1293 list_of_hosts.append(host)

1294 assert len(list_of_hosts) == strategy.extended.num_hosts

1295 return list_of_hosts

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/tpu/tpu_embedding_v2_utils.py: 32%

295 statements