Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/data/experimental/ops/from_list.py: 34%

38 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2022 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14# ============================================================================== 

15"""Python API for creating a dataset from a list.""" 

16 

17import itertools 

18 

19from tensorflow.python.data.ops import dataset_ops 

20from tensorflow.python.data.util import nest 

21from tensorflow.python.data.util import structure 

22from tensorflow.python.ops import gen_experimental_dataset_ops 

23from tensorflow.python.util.tf_export import tf_export 

24 

25 

26class _ListDataset(dataset_ops.DatasetSource): 

27 """A `Dataset` of elements from a list.""" 

28 

29 def __init__(self, elements, name=None): 

30 if not elements: 

31 raise ValueError("Invalid `elements`. `elements` should not be empty.") 

32 if not isinstance(elements, list): 

33 raise ValueError("Invalid `elements`. `elements` must be a list.") 

34 

35 elements = [structure.normalize_element(element) for element in elements] 

36 type_specs = [ 

37 structure.type_spec_from_value(element) for element in elements 

38 ] 

39 

40 # Check that elements have same nested structure. 

41 num_elements = len(elements) 

42 for i in range(1, num_elements): 

43 nest.assert_same_structure(type_specs[0], type_specs[i]) 

44 

45 # Infer elements' supershape. 

46 flattened_type_specs = [nest.flatten(type_spec) for type_spec in type_specs] 

47 num_tensors_per_element = len(flattened_type_specs[0]) 

48 flattened_structure = [None] * num_tensors_per_element 

49 for i in range(num_tensors_per_element): 

50 flattened_structure[i] = flattened_type_specs[0][i] 

51 for j in range(1, num_elements): 

52 flattened_structure[i] = flattened_structure[ 

53 i].most_specific_common_supertype([flattened_type_specs[j][i]]) 

54 

55 if not isinstance(type_specs[0], dataset_ops.DatasetSpec): 

56 self._tensors = list( 

57 itertools.chain.from_iterable( 

58 [nest.flatten(element) for element in elements])) 

59 else: 

60 self._tensors = [x._variant_tensor for x in elements] 

61 self._structure = nest.pack_sequence_as(type_specs[0], flattened_structure) 

62 self._name = name 

63 variant_tensor = gen_experimental_dataset_ops.list_dataset( 

64 self._tensors, 

65 output_types=self._flat_types, 

66 output_shapes=self._flat_shapes, 

67 metadata=self._metadata.SerializeToString()) 

68 super(_ListDataset, self).__init__(variant_tensor) 

69 

70 @property 

71 def element_spec(self): 

72 return self._structure 

73 

74 

75@tf_export("data.experimental.from_list") 

76def from_list(elements, name=None): 

77 """Creates a `Dataset` comprising the given list of elements. 

78 

79 The returned dataset will produce the items in the list one by one. The 

80 functionality is identical to `Dataset.from_tensor_slices` when elements are 

81 scalars, but different when elements have structure. Consider the following 

82 example. 

83 

84 >>> dataset = tf.data.experimental.from_list([(1, 'a'), (2, 'b'), (3, 'c')]) 

85 >>> list(dataset.as_numpy_iterator()) 

86 [(1, b'a'), (2, b'b'), (3, b'c')] 

87 

88 To get the same output with `from_tensor_slices`, the data needs to be 

89 reorganized: 

90 

91 >>> dataset = tf.data.Dataset.from_tensor_slices(([1, 2, 3], ['a', 'b', 'c'])) 

92 >>> list(dataset.as_numpy_iterator()) 

93 [(1, b'a'), (2, b'b'), (3, b'c')] 

94 

95 Unlike `from_tensor_slices`, `from_list` supports non-rectangular input: 

96 

97 >>> dataset = tf.data.experimental.from_list([[1], [2, 3]]) 

98 >>> list(dataset.as_numpy_iterator()) 

99 [array([1], dtype=int32), array([2, 3], dtype=int32)] 

100 

101 Achieving the same with `from_tensor_slices` requires the use of ragged 

102 tensors. 

103 

104 `from_list` can be more performant than `from_tensor_slices` in some cases, 

105 since it avoids the need for data slicing each epoch. However, it can also be 

106 less performant, because data is stored as many small tensors rather than a 

107 few large tensors as in `from_tensor_slices`. The general guidance is to 

108 prefer `from_list` from a performance perspective when the number of elements 

109 is small (less than 1000). 

110 

111 Args: 

112 elements: A list of elements whose components have the same nested 

113 structure. 

114 name: (Optional.) A name for the tf.data operation. 

115 

116 Returns: 

117 Dataset: A `Dataset` of the `elements`. 

118 """ 

119 return _ListDataset(elements, name)