Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/feature_column/sequence_feature

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

15"""This API defines FeatureColumn for sequential input.

17NOTE: This API is a work in progress and will likely be changing frequently.

18"""

20import collections

22from tensorflow.python.feature_column import feature_column_v2 as fc

23from tensorflow.python.feature_column import utils as fc_utils

24from tensorflow.python.framework import dtypes

25from tensorflow.python.framework import ops

26from tensorflow.python.framework import tensor_shape

27from tensorflow.python.ops import array_ops

28from tensorflow.python.ops import check_ops

29from tensorflow.python.ops import parsing_ops

30from tensorflow.python.ops import sparse_ops

31from tensorflow.python.util import deprecation

32from tensorflow.python.util.tf_export import tf_export

33from tensorflow.tools.docs import doc_controls

35_FEATURE_COLUMN_DEPRECATION_WARNING = """\

36 Warning: tf.feature_column is not recommended for new code. Instead,

37 feature preprocessing can be done directly using either [Keras preprocessing

38 layers](https://www.tensorflow.org/guide/migrate/migrating_feature_columns)

39 or through the one-stop utility [`tf.keras.utils.FeatureSpace`](https://www.tensorflow.org/api_docs/python/tf/keras/utils/FeatureSpace)

40 built on top of them. See the [migration guide](https://tensorflow.org/guide/migrate)

41 for details.

42 """

44_FEATURE_COLUMN_DEPRECATION_RUNTIME_WARNING = (

45 'Use Keras preprocessing layers instead, either directly or via the '

46 '`tf.keras.utils.FeatureSpace` utility. Each of `tf.feature_column.*` has '

47 'a functional equivalent in `tf.keras.layers` for feature preprocessing '

48 'when training a Keras model.')

51# pylint: disable=protected-access

52def concatenate_context_input(context_input, sequence_input):

53 """Replicates `context_input` across all timesteps of `sequence_input`.

55 Expands dimension 1 of `context_input` then tiles it `sequence_length` times.

56 This value is appended to `sequence_input` on dimension 2 and the result is

57 returned.

59 Args:

60 context_input: A `Tensor` of dtype `float32` and shape `[batch_size, d1]`.

61 sequence_input: A `Tensor` of dtype `float32` and shape `[batch_size,

62 padded_length, d0]`.

64 Returns:

65 A `Tensor` of dtype `float32` and shape `[batch_size, padded_length,

66 d0 + d1]`.

68 Raises:

69 ValueError: If `sequence_input` does not have rank 3 or `context_input` does

70 not have rank 2.

71 """

72 seq_rank_check = check_ops.assert_rank(

73 sequence_input,

74 3,

75 message='sequence_input must have rank 3',

76 data=[array_ops.shape(sequence_input)])

77 seq_type_check = check_ops.assert_type(

78 sequence_input,

79 dtypes.float32,

80 message='sequence_input must have dtype float32; got {}.'.format(

81 sequence_input.dtype))

82 ctx_rank_check = check_ops.assert_rank(

83 context_input,

84 2,

85 message='context_input must have rank 2',

86 data=[array_ops.shape(context_input)])

87 ctx_type_check = check_ops.assert_type(

88 context_input,

89 dtypes.float32,

90 message='context_input must have dtype float32; got {}.'.format(

91 context_input.dtype))

92 with ops.control_dependencies(

93 [seq_rank_check, seq_type_check, ctx_rank_check, ctx_type_check]):

94 padded_length = array_ops.shape(sequence_input)[1]

95 tiled_context_input = array_ops.tile(

96 array_ops.expand_dims(context_input, 1),

97 array_ops.concat([[1], [padded_length], [1]], 0))

98 return array_ops.concat([sequence_input, tiled_context_input], 2)

100

101@doc_controls.header(_FEATURE_COLUMN_DEPRECATION_WARNING)

102@tf_export('feature_column.sequence_categorical_column_with_identity')

103@deprecation.deprecated(None, _FEATURE_COLUMN_DEPRECATION_RUNTIME_WARNING)

104def sequence_categorical_column_with_identity(key,

105 num_buckets,

106 default_value=None):

107 """Returns a feature column that represents sequences of integers.

108

109 Pass this to `embedding_column` or `indicator_column` to convert sequence

110 categorical data into dense representation for input to sequence NN, such as

111 RNN.

112

113 Example:

114

115 ```python

116 watches = sequence_categorical_column_with_identity(

117 'watches', num_buckets=1000)

118 watches_embedding = embedding_column(watches, dimension=10)

119 columns = [watches_embedding]

120

121 features = tf.io.parse_example(..., features=make_parse_example_spec(columns))

122 sequence_feature_layer = SequenceFeatures(columns)

123 sequence_input, sequence_length = sequence_feature_layer(features)

124 sequence_length_mask = tf.sequence_mask(sequence_length)

125

126 rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)

127 rnn_layer = tf.keras.layers.RNN(rnn_cell)

128 outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)

129 ```

130

131 Args:

132 key: A unique string identifying the input feature.

133 num_buckets: Range of inputs. Namely, inputs are expected to be in the range

134 `[0, num_buckets)`.

135 default_value: If `None`, this column's graph operations will fail for

136 out-of-range inputs. Otherwise, this value must be in the range `[0,

137 num_buckets)`, and will replace out-of-range inputs.

138

139 Returns:

140 A `SequenceCategoricalColumn`.

141

142 Raises:

143 ValueError: if `num_buckets` is less than one.

144 ValueError: if `default_value` is not in range `[0, num_buckets)`.

145 """

146 return fc.SequenceCategoricalColumn(

147 fc.categorical_column_with_identity(

148 key=key, num_buckets=num_buckets, default_value=default_value))

149

150

151@doc_controls.header(_FEATURE_COLUMN_DEPRECATION_WARNING)

152@tf_export('feature_column.sequence_categorical_column_with_hash_bucket')

153@deprecation.deprecated(None, _FEATURE_COLUMN_DEPRECATION_RUNTIME_WARNING)

154def sequence_categorical_column_with_hash_bucket(key,

155 hash_bucket_size,

156 dtype=dtypes.string):

157 """A sequence of categorical terms where ids are set by hashing.

158

159 Pass this to `embedding_column` or `indicator_column` to convert sequence

160 categorical data into dense representation for input to sequence NN, such as

161 RNN.

162

163 Example:

164

165 ```python

166 tokens = sequence_categorical_column_with_hash_bucket(

167 'tokens', hash_bucket_size=1000)

168 tokens_embedding = embedding_column(tokens, dimension=10)

169 columns = [tokens_embedding]

170

171 features = tf.io.parse_example(..., features=make_parse_example_spec(columns))

172 sequence_feature_layer = SequenceFeatures(columns)

173 sequence_input, sequence_length = sequence_feature_layer(features)

174 sequence_length_mask = tf.sequence_mask(sequence_length)

175

176 rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)

177 rnn_layer = tf.keras.layers.RNN(rnn_cell)

178 outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)

179 ```

180

181 Args:

182 key: A unique string identifying the input feature.

183 hash_bucket_size: An int > 1. The number of buckets.

184 dtype: The type of features. Only string and integer types are supported.

185

186 Returns:

187 A `SequenceCategoricalColumn`.

188

189 Raises:

190 ValueError: `hash_bucket_size` is not greater than 1.

191 ValueError: `dtype` is neither string nor integer.

192 """

193 return fc.SequenceCategoricalColumn(

194 fc.categorical_column_with_hash_bucket(

195 key=key, hash_bucket_size=hash_bucket_size, dtype=dtype))

196

197

198@doc_controls.header(_FEATURE_COLUMN_DEPRECATION_WARNING)

199@tf_export('feature_column.sequence_categorical_column_with_vocabulary_file')

200@deprecation.deprecated(None, _FEATURE_COLUMN_DEPRECATION_RUNTIME_WARNING)

201def sequence_categorical_column_with_vocabulary_file(key,

202 vocabulary_file,

203 vocabulary_size=None,

204 num_oov_buckets=0,

205 default_value=None,

206 dtype=dtypes.string):

207 """A sequence of categorical terms where ids use a vocabulary file.

208

209 Pass this to `embedding_column` or `indicator_column` to convert sequence

210 categorical data into dense representation for input to sequence NN, such as

211 RNN.

212

213 Example:

214

215 ```python

216 states = sequence_categorical_column_with_vocabulary_file(

217 key='states', vocabulary_file='/us/states.txt', vocabulary_size=50,

218 num_oov_buckets=5)

219 states_embedding = embedding_column(states, dimension=10)

220 columns = [states_embedding]

221

222 features = tf.io.parse_example(..., features=make_parse_example_spec(columns))

223 sequence_feature_layer = SequenceFeatures(columns)

224 sequence_input, sequence_length = sequence_feature_layer(features)

225 sequence_length_mask = tf.sequence_mask(sequence_length)

226

227 rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)

228 rnn_layer = tf.keras.layers.RNN(rnn_cell)

229 outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)

230 ```

231

232 Args:

233 key: A unique string identifying the input feature.

234 vocabulary_file: The vocabulary file name.

235 vocabulary_size: Number of the elements in the vocabulary. This must be no

236 greater than length of `vocabulary_file`, if less than length, later

237 values are ignored. If None, it is set to the length of `vocabulary_file`.

238 num_oov_buckets: Non-negative integer, the number of out-of-vocabulary

239 buckets. All out-of-vocabulary inputs will be assigned IDs in the range

240 `[vocabulary_size, vocabulary_size+num_oov_buckets)` based on a hash of

241 the input value. A positive `num_oov_buckets` can not be specified with

242 `default_value`.

243 default_value: The integer ID value to return for out-of-vocabulary feature

244 values, defaults to `-1`. This can not be specified with a positive

245 `num_oov_buckets`.

246 dtype: The type of features. Only string and integer types are supported.

247

248 Returns:

249 A `SequenceCategoricalColumn`.

250

251 Raises:

252 ValueError: `vocabulary_file` is missing or cannot be opened.

253 ValueError: `vocabulary_size` is missing or < 1.

254 ValueError: `num_oov_buckets` is a negative integer.

255 ValueError: `num_oov_buckets` and `default_value` are both specified.

256 ValueError: `dtype` is neither string nor integer.

257 """

258 return fc.SequenceCategoricalColumn(

259 fc.categorical_column_with_vocabulary_file(

260 key=key,

261 vocabulary_file=vocabulary_file,

262 vocabulary_size=vocabulary_size,

263 num_oov_buckets=num_oov_buckets,

264 default_value=default_value,

265 dtype=dtype))

266

267

268@doc_controls.header(_FEATURE_COLUMN_DEPRECATION_WARNING)

269@tf_export('feature_column.sequence_categorical_column_with_vocabulary_list')

270@deprecation.deprecated(None, _FEATURE_COLUMN_DEPRECATION_RUNTIME_WARNING)

271def sequence_categorical_column_with_vocabulary_list(key,

272 vocabulary_list,

273 dtype=None,

274 default_value=-1,

275 num_oov_buckets=0):

276 """A sequence of categorical terms where ids use an in-memory list.

277

278 Pass this to `embedding_column` or `indicator_column` to convert sequence

279 categorical data into dense representation for input to sequence NN, such as

280 RNN.

281

282 Example:

283

284 ```python

285 colors = sequence_categorical_column_with_vocabulary_list(

286 key='colors', vocabulary_list=('R', 'G', 'B', 'Y'),

287 num_oov_buckets=2)

288 colors_embedding = embedding_column(colors, dimension=3)

289 columns = [colors_embedding]

290

291 features = tf.io.parse_example(..., features=make_parse_example_spec(columns))

292 sequence_feature_layer = SequenceFeatures(columns)

293 sequence_input, sequence_length = sequence_feature_layer(features)

294 sequence_length_mask = tf.sequence_mask(sequence_length)

295

296 rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)

297 rnn_layer = tf.keras.layers.RNN(rnn_cell)

298 outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)

299 ```

300

301 Args:

302 key: A unique string identifying the input feature.

303 vocabulary_list: An ordered iterable defining the vocabulary. Each feature

304 is mapped to the index of its value (if present) in `vocabulary_list`.

305 Must be castable to `dtype`.

306 dtype: The type of features. Only string and integer types are supported. If

307 `None`, it will be inferred from `vocabulary_list`.

308 default_value: The integer ID value to return for out-of-vocabulary feature

309 values, defaults to `-1`. This can not be specified with a positive

310 `num_oov_buckets`.

311 num_oov_buckets: Non-negative integer, the number of out-of-vocabulary

312 buckets. All out-of-vocabulary inputs will be assigned IDs in the range

313 `[len(vocabulary_list), len(vocabulary_list)+num_oov_buckets)` based on a

314 hash of the input value. A positive `num_oov_buckets` can not be specified

315 with `default_value`.

316

317 Returns:

318 A `SequenceCategoricalColumn`.

319

320 Raises:

321 ValueError: if `vocabulary_list` is empty, or contains duplicate keys.

322 ValueError: `num_oov_buckets` is a negative integer.

323 ValueError: `num_oov_buckets` and `default_value` are both specified.

324 ValueError: if `dtype` is not integer or string.

325 """

326 return fc.SequenceCategoricalColumn(

327 fc.categorical_column_with_vocabulary_list(

328 key=key,

329 vocabulary_list=vocabulary_list,

330 dtype=dtype,

331 default_value=default_value,

332 num_oov_buckets=num_oov_buckets))

333

334

335@doc_controls.header(_FEATURE_COLUMN_DEPRECATION_WARNING)

336@tf_export('feature_column.sequence_numeric_column')

337@deprecation.deprecated(None, _FEATURE_COLUMN_DEPRECATION_RUNTIME_WARNING)

338def sequence_numeric_column(key,

339 shape=(1,),

340 default_value=0.,

341 dtype=dtypes.float32,

342 normalizer_fn=None):

343 """Returns a feature column that represents sequences of numeric data.

344

345 Example:

346

347 ```python

348 temperature = sequence_numeric_column('temperature')

349 columns = [temperature]

350

351 features = tf.io.parse_example(..., features=make_parse_example_spec(columns))

352 sequence_feature_layer = SequenceFeatures(columns)

353 sequence_input, sequence_length = sequence_feature_layer(features)

354 sequence_length_mask = tf.sequence_mask(sequence_length)

355

356 rnn_cell = tf.keras.layers.SimpleRNNCell(hidden_size)

357 rnn_layer = tf.keras.layers.RNN(rnn_cell)

358 outputs, state = rnn_layer(sequence_input, mask=sequence_length_mask)

359 ```

360

361 Args:

362 key: A unique string identifying the input features.

363 shape: The shape of the input data per sequence id. E.g. if `shape=(2,)`,

364 each example must contain `2 * sequence_length` values.

365 default_value: A single value compatible with `dtype` that is used for

366 padding the sparse data into a dense `Tensor`.

367 dtype: The type of values.

368 normalizer_fn: If not `None`, a function that can be used to normalize the

369 value of the tensor after `default_value` is applied for parsing.

370 Normalizer function takes the input `Tensor` as its argument, and returns

371 the output `Tensor`. (e.g. lambda x: (x - 3.0) / 4.2). Please note that

372 even though the most common use case of this function is normalization, it

373 can be used for any kind of Tensorflow transformations.

374

375 Returns:

376 A `SequenceNumericColumn`.

377

378 Raises:

379 TypeError: if any dimension in shape is not an int.

380 ValueError: if any dimension in shape is not a positive integer.

381 ValueError: if `dtype` is not convertible to `tf.float32`.

382 """

383 shape = fc._check_shape(shape=shape, key=key)

384 if not (dtype.is_integer or dtype.is_floating):

385 raise ValueError('dtype must be convertible to float. '

386 'dtype: {}, key: {}'.format(dtype, key))

387 if normalizer_fn is not None and not callable(normalizer_fn):

388 raise TypeError(

389 'normalizer_fn must be a callable. Given: {}'.format(normalizer_fn))

390

391 return SequenceNumericColumn(

392 key,

393 shape=shape,

394 default_value=default_value,

395 dtype=dtype,

396 normalizer_fn=normalizer_fn)

397

398

399def _assert_all_equal_and_return(tensors, name=None):

400 """Asserts that all tensors are equal and returns the first one."""

401 with ops.name_scope(name, 'assert_all_equal', values=tensors):

402 if len(tensors) == 1:

403 return tensors[0]

404 assert_equal_ops = []

405 for t in tensors[1:]:

406 assert_equal_ops.append(check_ops.assert_equal(tensors[0], t))

407 with ops.control_dependencies(assert_equal_ops):

408 return array_ops.identity(tensors[0])

413class SequenceNumericColumn(

414 fc.SequenceDenseColumn,

415 collections.namedtuple(

416 'SequenceNumericColumn',

417 ('key', 'shape', 'default_value', 'dtype', 'normalizer_fn'))):

418 """Represents sequences of numeric data."""

419

420 @property

421 def _is_v2_column(self):

422 return True

423

424 @property

425 def name(self):

426 """See `FeatureColumn` base class."""

427 return self.key

428

429 @property

430 def parse_example_spec(self):

431 """See `FeatureColumn` base class."""

432 return {self.key: parsing_ops.VarLenFeature(self.dtype)}

433

434 def transform_feature(self, transformation_cache, state_manager):

435 """See `FeatureColumn` base class.

436

437 In this case, we apply the `normalizer_fn` to the input tensor.

438

439 Args:

440 transformation_cache: A `FeatureTransformationCache` object to access

441 features.

442 state_manager: A `StateManager` to create / access resources such as

443 lookup tables.

444

445 Returns:

446 Normalized input tensor.

447 """

448 input_tensor = transformation_cache.get(self.key, state_manager)

449 if self.normalizer_fn is not None:

450 input_tensor = self.normalizer_fn(input_tensor)

451 return input_tensor

452

453 @property

454 def variable_shape(self):

455 """Returns a `TensorShape` representing the shape of sequence input."""

456 return tensor_shape.TensorShape(self.shape)

457

458 def get_sequence_dense_tensor(self, transformation_cache, state_manager):

459 """Returns a `TensorSequenceLengthPair`.

460

461 Args:

462 transformation_cache: A `FeatureTransformationCache` object to access

463 features.

464 state_manager: A `StateManager` to create / access resources such as

465 lookup tables.

466 """

467 sp_tensor = transformation_cache.get(self, state_manager)

468 dense_tensor = sparse_ops.sparse_tensor_to_dense(

469 sp_tensor, default_value=self.default_value)

470 # Reshape into [batch_size, T, variable_shape].

471 dense_shape = array_ops.concat(

472 [array_ops.shape(dense_tensor)[:1], [-1], self.variable_shape], axis=0)

473 dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape)

474

475 # Get the number of timesteps per example

476 # For the 2D case, the raw values are grouped according to num_elements;

477 # for the 3D case, the grouping happens in the third dimension, and

478 # sequence length is not affected.

479 if sp_tensor.shape.ndims == 2:

480 num_elements = self.variable_shape.num_elements()

481 else:

482 num_elements = 1

483 seq_length = fc_utils.sequence_length_from_sparse_tensor(

484 sp_tensor, num_elements=num_elements)

485

486 return fc.SequenceDenseColumn.TensorSequenceLengthPair(

487 dense_tensor=dense_tensor, sequence_length=seq_length)

488

489 @property

490 def parents(self):

491 """See 'FeatureColumn` base class."""

492 return [self.key]

493

494 def get_config(self):

495 """See 'FeatureColumn` base class."""

496 config = dict(zip(self._fields, self))

497 config['dtype'] = self.dtype.name

498 return config

499

500 @classmethod

501 def from_config(cls, config, custom_objects=None, columns_by_name=None):

502 """See 'FeatureColumn` base class."""

503 fc._check_config_keys(config, cls._fields)

504 kwargs = fc._standardize_and_copy_config(config)

505 kwargs['dtype'] = dtypes.as_dtype(config['dtype'])

506 return cls(**kwargs)

507

508

509# pylint: enable=protected-access

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/feature_column/sequence_feature_column.py: 51%

105 statements