Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/data/experimental/ops/from_list.py: 34%
38 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Python API for creating a dataset from a list."""
17import itertools
19from tensorflow.python.data.ops import dataset_ops
20from tensorflow.python.data.util import nest
21from tensorflow.python.data.util import structure
22from tensorflow.python.ops import gen_experimental_dataset_ops
23from tensorflow.python.util.tf_export import tf_export
26class _ListDataset(dataset_ops.DatasetSource):
27 """A `Dataset` of elements from a list."""
29 def __init__(self, elements, name=None):
30 if not elements:
31 raise ValueError("Invalid `elements`. `elements` should not be empty.")
32 if not isinstance(elements, list):
33 raise ValueError("Invalid `elements`. `elements` must be a list.")
35 elements = [structure.normalize_element(element) for element in elements]
36 type_specs = [
37 structure.type_spec_from_value(element) for element in elements
38 ]
40 # Check that elements have same nested structure.
41 num_elements = len(elements)
42 for i in range(1, num_elements):
43 nest.assert_same_structure(type_specs[0], type_specs[i])
45 # Infer elements' supershape.
46 flattened_type_specs = [nest.flatten(type_spec) for type_spec in type_specs]
47 num_tensors_per_element = len(flattened_type_specs[0])
48 flattened_structure = [None] * num_tensors_per_element
49 for i in range(num_tensors_per_element):
50 flattened_structure[i] = flattened_type_specs[0][i]
51 for j in range(1, num_elements):
52 flattened_structure[i] = flattened_structure[
53 i].most_specific_common_supertype([flattened_type_specs[j][i]])
55 if not isinstance(type_specs[0], dataset_ops.DatasetSpec):
56 self._tensors = list(
57 itertools.chain.from_iterable(
58 [nest.flatten(element) for element in elements]))
59 else:
60 self._tensors = [x._variant_tensor for x in elements]
61 self._structure = nest.pack_sequence_as(type_specs[0], flattened_structure)
62 self._name = name
63 variant_tensor = gen_experimental_dataset_ops.list_dataset(
64 self._tensors,
65 output_types=self._flat_types,
66 output_shapes=self._flat_shapes,
67 metadata=self._metadata.SerializeToString())
68 super(_ListDataset, self).__init__(variant_tensor)
70 @property
71 def element_spec(self):
72 return self._structure
75@tf_export("data.experimental.from_list")
76def from_list(elements, name=None):
77 """Creates a `Dataset` comprising the given list of elements.
79 The returned dataset will produce the items in the list one by one. The
80 functionality is identical to `Dataset.from_tensor_slices` when elements are
81 scalars, but different when elements have structure. Consider the following
82 example.
84 >>> dataset = tf.data.experimental.from_list([(1, 'a'), (2, 'b'), (3, 'c')])
85 >>> list(dataset.as_numpy_iterator())
86 [(1, b'a'), (2, b'b'), (3, b'c')]
88 To get the same output with `from_tensor_slices`, the data needs to be
89 reorganized:
91 >>> dataset = tf.data.Dataset.from_tensor_slices(([1, 2, 3], ['a', 'b', 'c']))
92 >>> list(dataset.as_numpy_iterator())
93 [(1, b'a'), (2, b'b'), (3, b'c')]
95 Unlike `from_tensor_slices`, `from_list` supports non-rectangular input:
97 >>> dataset = tf.data.experimental.from_list([[1], [2, 3]])
98 >>> list(dataset.as_numpy_iterator())
99 [array([1], dtype=int32), array([2, 3], dtype=int32)]
101 Achieving the same with `from_tensor_slices` requires the use of ragged
102 tensors.
104 `from_list` can be more performant than `from_tensor_slices` in some cases,
105 since it avoids the need for data slicing each epoch. However, it can also be
106 less performant, because data is stored as many small tensors rather than a
107 few large tensors as in `from_tensor_slices`. The general guidance is to
108 prefer `from_list` from a performance perspective when the number of elements
109 is small (less than 1000).
111 Args:
112 elements: A list of elements whose components have the same nested
113 structure.
114 name: (Optional.) A name for the tf.data operation.
116 Returns:
117 Dataset: A `Dataset` of the `elements`.
118 """
119 return _ListDataset(elements, name)