Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/distribute/numpy_dataset.py: 33%
43 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14# ==============================================================================
15"""Code for creating a dataset out of a NumPy array."""
17import numpy as np
19from tensorflow.python.data.ops import dataset_ops
20from tensorflow.python.eager import context
21from tensorflow.python.framework import dtypes
22from tensorflow.python.framework import ops
23from tensorflow.python.ops import array_ops
24from tensorflow.python.ops import variable_scope
25from tensorflow.python.ops import variable_v1
26from tensorflow.python.util import nest
29def init_var_from_numpy(input_var, numpy_input, session):
30 """Initialize `input_var` to `numpy_input` using `session` in graph mode."""
31 with ops.init_scope():
32 if context.executing_eagerly():
33 input_var.assign(numpy_input)
34 return
36 assert session is not None
37 session.run(input_var.initializer)
39 start_placeholder = array_ops.placeholder(dtypes.int64, ())
40 end_placeholder = array_ops.placeholder(dtypes.int64, ())
41 slice_placeholder = array_ops.placeholder(input_var.dtype)
42 assign_slice_op = input_var[start_placeholder:end_placeholder].assign(
43 slice_placeholder)
45 # If each batch element is > 64 MB, then we copy each batch element
46 # individually. Otherwise, the slices will be < 128 MB. There might be
47 # padding which might mean that the slices are 128 MB even if the size of
48 # the tensor allocated is less than 128 MB. This formula gives slices with
49 # size: ceil(64 MB / byte size per batch element) bytes. Using ceil()
50 # guarantees we get a number >= 1.
52 # Calculate the size of each batch element.
53 byte_size_per_batch_element = (
54 np.prod(numpy_input.shape[1:]) * input_var.dtype.size)
56 # Calculate number of elements we want to copy per slice.
57 batch_size_per_slice = int(
58 np.ceil((64 << 20) / byte_size_per_batch_element))
60 # Copy slices of the above size starting at 0, except the last slice will be
61 # smaller.
62 start = 0
63 limit = numpy_input.shape[0]
64 while start < limit:
65 end = min(start + batch_size_per_slice, limit)
66 session.run(assign_slice_op, feed_dict={
67 start_placeholder: start,
68 end_placeholder: end,
69 slice_placeholder: numpy_input[start:end]})
70 start = end
73def one_host_numpy_dataset(numpy_input, colocate_with, session):
74 """Create a dataset on `colocate_with` from `numpy_input`."""
76 def create_colocated_variable(next_creator, **kwargs):
77 kwargs["colocate_with"] = colocate_with
78 return next_creator(**kwargs)
80 numpy_flat = nest.flatten(numpy_input)
81 with variable_scope.variable_creator_scope(create_colocated_variable):
82 vars_flat = tuple(variable_v1.VariableV1(array_ops.zeros(i.shape, i.dtype),
83 trainable=False)
84 for i in numpy_flat)
85 for v, i in zip(vars_flat, numpy_flat):
86 init_var_from_numpy(v, i, session)
87 vars_nested = nest.pack_sequence_as(numpy_input, vars_flat)
88 return dataset_ops.Dataset.from_tensor_slices(vars_nested)
91class SingleDevice(object):
92 """Used with `colocate_with` to create a non-mirrored variable."""
94 def __init__(self, device):
95 self.device = device