Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/bincount_ops.py: 15%
158 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-03 07:57 +0000
1# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# maxlengthations under the License.
14# ==============================================================================
15"""bincount ops."""
17from tensorflow.python.framework import constant_op
18from tensorflow.python.framework import dtypes
19from tensorflow.python.framework import ops
20from tensorflow.python.framework import sparse_tensor
21from tensorflow.python.ops import array_ops
22from tensorflow.python.ops import check_ops
23from tensorflow.python.ops import gen_count_ops
24from tensorflow.python.ops import gen_math_ops
25from tensorflow.python.ops import math_ops
26from tensorflow.python.ops.ragged import ragged_tensor
27from tensorflow.python.util import deprecation
28from tensorflow.python.util.tf_export import tf_export
31@tf_export("math.bincount", v1=[])
32def bincount(arr,
33 weights=None,
34 minlength=None,
35 maxlength=None,
36 dtype=dtypes.int32,
37 name=None,
38 axis=None,
39 binary_output=False):
40 """Counts the number of occurrences of each value in an integer array.
42 If `minlength` and `maxlength` are not given, returns a vector with length
43 `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise.
44 If `weights` are non-None, then index `i` of the output stores the sum of the
45 value in `weights` at each index where the corresponding value in `arr` is
46 `i`.
48 ```python
49 values = tf.constant([1,1,2,3,2,4,4,5])
50 tf.math.bincount(values) #[0 2 2 1 2 1]
51 ```
52 Vector length = Maximum element in vector `values` is 5. Adding 1, which is 6
53 will be the vector length.
55 Each bin value in the output indicates number of occurrences of the particular
56 index. Here, index 1 in output has a value 2. This indicates value 1 occurs
57 two times in `values`.
59 ```python
60 values = tf.constant([1,1,2,3,2,4,4,5])
61 weights = tf.constant([1,5,0,1,0,5,4,5])
62 tf.math.bincount(values, weights=weights) #[0 6 0 1 9 5]
63 ```
64 Bin will be incremented by the corresponding weight instead of 1.
65 Here, index 1 in output has a value 6. This is the summation of weights
66 corresponding to the value in `values`.
68 **Bin-counting on a certain axis**
70 This example takes a 2 dimensional input and returns a `Tensor` with
71 bincounting on each sample.
73 >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32)
74 >>> tf.math.bincount(data, axis=-1)
75 <tf.Tensor: shape=(2, 4), dtype=int32, numpy=
76 array([[1, 1, 1, 1],
77 [2, 1, 1, 0]], dtype=int32)>
80 **Bin-counting with binary_output**
82 This example gives binary output instead of counting the occurrence.
84 >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32)
85 >>> tf.math.bincount(data, axis=-1, binary_output=True)
86 <tf.Tensor: shape=(2, 4), dtype=int32, numpy=
87 array([[1, 1, 1, 1],
88 [1, 1, 1, 0]], dtype=int32)>
90 Args:
91 arr: A Tensor, RaggedTensor, or SparseTensor whose values should be counted.
92 These tensors must have a rank of 2 if `axis=-1`.
93 weights: If non-None, must be the same shape as arr. For each value in
94 `arr`, the bin will be incremented by the corresponding weight instead of
95 1.
96 minlength: If given, ensures the output has length at least `minlength`,
97 padding with zeros at the end if necessary.
98 maxlength: If given, skips values in `arr` that are equal or greater than
99 `maxlength`, ensuring that the output has length at most `maxlength`.
100 dtype: If `weights` is None, determines the type of the output bins.
101 name: A name scope for the associated operations (optional).
102 axis: The axis to slice over. Axes at and below `axis` will be flattened
103 before bin counting. Currently, only `0`, and `-1` are supported. If None,
104 all axes will be flattened (identical to passing `0`).
105 binary_output: If True, this op will output 1 instead of the number of times
106 a token appears (equivalent to one_hot + reduce_any instead of one_hot +
107 reduce_add). Defaults to False.
109 Returns:
110 A vector with the same dtype as `weights` or the given `dtype`. The bin
111 values.
113 Raises:
114 `InvalidArgumentError` if negative values are provided as an input.
116 """
117 name = "bincount" if name is None else name
118 with ops.name_scope(name):
119 # TODO(b/255381064) Remove the following block which uses older kernels for
120 # backwards compatibility for certain cases once all tests pass with the
121 # newer (dense_bincount, ragged_bincount and sparse_bincount) kernels.
122 if (
123 not isinstance(arr, ragged_tensor.RaggedTensor)
124 and not binary_output
125 and axis is None
126 ):
127 arr = ops.convert_to_tensor(arr, name="arr", dtype=dtypes.int32)
128 array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0
129 output_size = math_ops.cast(array_is_nonempty, dtypes.int32) * (
130 math_ops.reduce_max(arr) + 1)
131 if minlength is not None:
132 minlength = ops.convert_to_tensor(
133 minlength, name="minlength", dtype=dtypes.int32)
134 output_size = gen_math_ops.maximum(minlength, output_size)
135 if maxlength is not None:
136 maxlength = ops.convert_to_tensor(
137 maxlength, name="maxlength", dtype=dtypes.int32)
138 output_size = gen_math_ops.minimum(maxlength, output_size)
139 if weights is not None:
140 weights = ops.convert_to_tensor(weights, name="weights")
141 return gen_math_ops.unsorted_segment_sum(weights, arr, output_size)
142 weights = constant_op.constant([], dtype)
143 arr = array_ops.reshape(arr, [-1])
144 return gen_math_ops.bincount(arr, output_size, weights)
146 if not isinstance(arr, sparse_tensor.SparseTensor):
147 arr = ragged_tensor.convert_to_tensor_or_ragged_tensor(arr, name="arr")
148 if weights is not None:
149 if not isinstance(weights, sparse_tensor.SparseTensor):
150 weights = ragged_tensor.convert_to_tensor_or_ragged_tensor(
151 weights, name="weights")
153 if weights is not None and binary_output:
154 raise ValueError("Arguments `binary_output` and `weights` are mutually "
155 "exclusive. Please specify only one.")
157 if not arr.dtype.is_integer:
158 arr = math_ops.cast(arr, dtypes.int32)
159 if axis is None:
160 axis = 0
162 if axis not in [0, -1]:
163 raise ValueError(f"Unsupported value for argument axis={axis}. Only 0 and"
164 " -1 are currently supported.")
166 array_is_nonempty = array_ops.size(arr) > 0
167 if isinstance(arr, sparse_tensor.SparseTensor):
168 output_size = math_ops.cast(array_is_nonempty, arr.dtype) * (
169 math_ops.reduce_max(arr.values) + 1)
170 else:
171 output_size = math_ops.cast(array_is_nonempty, arr.dtype) * (
172 math_ops.reduce_max(arr) + 1)
173 if minlength is not None:
174 minlength = ops.convert_to_tensor(
175 minlength, name="minlength", dtype=arr.dtype)
176 output_size = gen_math_ops.maximum(minlength, output_size)
177 if maxlength is not None:
178 maxlength = ops.convert_to_tensor(
179 maxlength, name="maxlength", dtype=arr.dtype)
180 output_size = gen_math_ops.minimum(maxlength, output_size)
182 if axis == 0:
183 if isinstance(arr, sparse_tensor.SparseTensor):
184 if weights is not None:
185 weights = validate_sparse_weights(arr, weights, dtype)
186 arr = arr.values
187 elif isinstance(arr, ragged_tensor.RaggedTensor):
188 # Flatten RaggedTensors with multiple ragged dimensions which use a
189 # nested RaggedTensor for the values tensor.
190 while isinstance(arr, ragged_tensor.RaggedTensor):
191 if weights is not None:
192 weights = validate_ragged_weights(arr, weights, dtype)
193 arr = arr.values
194 else:
195 if weights is not None:
196 weights = array_ops.reshape(weights, [-1])
197 arr = array_ops.reshape(arr, [-1])
199 if isinstance(arr, sparse_tensor.SparseTensor):
200 weights = validate_sparse_weights(arr, weights, dtype)
201 return gen_math_ops.sparse_bincount(
202 indices=arr.indices,
203 values=arr.values,
204 dense_shape=arr.dense_shape,
205 size=output_size,
206 weights=weights,
207 binary_output=binary_output)
208 elif isinstance(arr, ragged_tensor.RaggedTensor):
209 weights = validate_ragged_weights(arr, weights, dtype)
210 return gen_math_ops.ragged_bincount(
211 splits=arr.row_splits,
212 values=arr.values,
213 size=output_size,
214 weights=weights,
215 binary_output=binary_output)
216 else:
217 weights = validate_dense_weights(arr, weights, dtype)
218 return gen_math_ops.dense_bincount(
219 input=arr,
220 size=output_size,
221 weights=weights,
222 binary_output=binary_output)
225@tf_export(v1=["math.bincount", "bincount"])
226@deprecation.deprecated_endpoints("bincount")
227def bincount_v1(arr,
228 weights=None,
229 minlength=None,
230 maxlength=None,
231 dtype=dtypes.int32):
232 """Counts the number of occurrences of each value in an integer array.
234 If `minlength` and `maxlength` are not given, returns a vector with length
235 `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise.
236 If `weights` are non-None, then index `i` of the output stores the sum of the
237 value in `weights` at each index where the corresponding value in `arr` is
238 `i`.
240 Args:
241 arr: An int32 tensor of non-negative values.
242 weights: If non-None, must be the same shape as arr. For each value in
243 `arr`, the bin will be incremented by the corresponding weight instead of
244 1.
245 minlength: If given, ensures the output has length at least `minlength`,
246 padding with zeros at the end if necessary.
247 maxlength: If given, skips values in `arr` that are equal or greater than
248 `maxlength`, ensuring that the output has length at most `maxlength`.
249 dtype: If `weights` is None, determines the type of the output bins.
251 Returns:
252 A vector with the same dtype as `weights` or the given `dtype`. The bin
253 values.
254 """
255 return bincount(arr, weights, minlength, maxlength, dtype)
258@tf_export("sparse.bincount")
259def sparse_bincount(values,
260 weights=None,
261 axis=0,
262 minlength=None,
263 maxlength=None,
264 binary_output=False,
265 name=None):
266 """Count the number of times an integer value appears in a tensor.
268 This op takes an N-dimensional `Tensor`, `RaggedTensor`, or `SparseTensor`,
269 and returns an N-dimensional int64 SparseTensor where element
270 `[i0...i[axis], j]` contains the number of times the value `j` appears in
271 slice `[i0...i[axis], :]` of the input tensor. Currently, only N=0 and
272 N=-1 are supported.
274 Args:
275 values: A Tensor, RaggedTensor, or SparseTensor whose values should be
276 counted. These tensors must have a rank of 2 if `axis=-1`.
277 weights: If non-None, must be the same shape as arr. For each value in
278 `value`, the bin will be incremented by the corresponding weight instead
279 of 1.
280 axis: The axis to slice over. Axes at and below `axis` will be flattened
281 before bin counting. Currently, only `0`, and `-1` are supported. If None,
282 all axes will be flattened (identical to passing `0`).
283 minlength: If given, ensures the output has length at least `minlength`,
284 padding with zeros at the end if necessary.
285 maxlength: If given, skips values in `values` that are equal or greater than
286 `maxlength`, ensuring that the output has length at most `maxlength`.
287 binary_output: If True, this op will output 1 instead of the number of times
288 a token appears (equivalent to one_hot + reduce_any instead of one_hot +
289 reduce_add). Defaults to False.
290 name: A name for this op.
292 Returns:
293 A SparseTensor with `output.shape = values.shape[:axis] + [N]`, where `N` is
294 * `maxlength` (if set);
295 * `minlength` (if set, and `minlength > reduce_max(values)`);
296 * `0` (if `values` is empty);
297 * `reduce_max(values) + 1` otherwise.
299 Raises:
300 `InvalidArgumentError` if negative values are provided as an input.
302 Examples:
304 **Bin-counting every item in individual batches**
306 This example takes an input (which could be a Tensor, RaggedTensor, or
307 SparseTensor) and returns a SparseTensor where the value of (i,j) is the
308 number of times value j appears in batch i.
310 >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64)
311 >>> output = tf.sparse.bincount(data, axis=-1)
312 >>> print(output)
313 SparseTensor(indices=tf.Tensor(
314 [[ 0 10]
315 [ 0 20]
316 [ 0 30]
317 [ 1 11]
318 [ 1 101]
319 [ 1 10001]], shape=(6, 2), dtype=int64),
320 values=tf.Tensor([1 2 1 2 1 1], shape=(6,), dtype=int64),
321 dense_shape=tf.Tensor([ 2 10002], shape=(2,), dtype=int64))
323 **Bin-counting with defined output shape**
325 This example takes an input (which could be a Tensor, RaggedTensor, or
326 SparseTensor) and returns a SparseTensor where the value of (i,j) is the
327 number of times value j appears in batch i. However, all values of j
328 above 'maxlength' are ignored. The dense_shape of the output sparse tensor
329 is set to 'minlength'. Note that, while the input is identical to the
330 example above, the value '10001' in batch item 2 is dropped, and the
331 dense shape is [2, 500] instead of [2,10002] or [2, 102].
333 >>> minlength = maxlength = 500
334 >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64)
335 >>> output = tf.sparse.bincount(
336 ... data, axis=-1, minlength=minlength, maxlength=maxlength)
337 >>> print(output)
338 SparseTensor(indices=tf.Tensor(
339 [[ 0 10]
340 [ 0 20]
341 [ 0 30]
342 [ 1 11]
343 [ 1 101]], shape=(5, 2), dtype=int64),
344 values=tf.Tensor([1 2 1 2 1], shape=(5,), dtype=int64),
345 dense_shape=tf.Tensor([ 2 500], shape=(2,), dtype=int64))
347 **Binary bin-counting**
349 This example takes an input (which could be a Tensor, RaggedTensor, or
350 SparseTensor) and returns a SparseTensor where (i,j) is 1 if the value j
351 appears in batch i at least once and is 0 otherwise. Note that, even though
352 some values (like 20 in batch 1 and 11 in batch 2) appear more than once,
353 the 'values' tensor is all 1s.
355 >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64)
356 >>> output = tf.sparse.bincount(data, binary_output=True, axis=-1)
357 >>> print(output)
358 SparseTensor(indices=tf.Tensor(
359 [[ 0 10]
360 [ 0 20]
361 [ 0 30]
362 [ 1 11]
363 [ 1 101]
364 [ 1 10001]], shape=(6, 2), dtype=int64),
365 values=tf.Tensor([1 1 1 1 1 1], shape=(6,), dtype=int64),
366 dense_shape=tf.Tensor([ 2 10002], shape=(2,), dtype=int64))
368 **Weighted bin-counting**
370 This example takes two inputs - a values tensor and a weights tensor. These
371 tensors must be identically shaped, and have the same row splits or indices
372 in the case of RaggedTensors or SparseTensors. When performing a weighted
373 count, the op will output a SparseTensor where the value of (i, j) is the
374 sum of the values in the weight tensor's batch i in the locations where
375 the values tensor has the value j. In this case, the output dtype is the
376 same as the dtype of the weights tensor.
378 >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64)
379 >>> weights = [[2, 0.25, 15, 0.5], [2, 17, 3, 0.9]]
380 >>> output = tf.sparse.bincount(data, weights=weights, axis=-1)
381 >>> print(output)
382 SparseTensor(indices=tf.Tensor(
383 [[ 0 10]
384 [ 0 20]
385 [ 0 30]
386 [ 1 11]
387 [ 1 101]
388 [ 1 10001]], shape=(6, 2), dtype=int64),
389 values=tf.Tensor([2. 0.75 15. 5. 17. 0.9], shape=(6,), dtype=float32),
390 dense_shape=tf.Tensor([ 2 10002], shape=(2,), dtype=int64))
392 """
393 with ops.name_scope(name, "count", [values, weights]):
394 if not isinstance(values, sparse_tensor.SparseTensor):
395 values = ragged_tensor.convert_to_tensor_or_ragged_tensor(
396 values, name="values")
397 if weights is not None:
398 if not isinstance(weights, sparse_tensor.SparseTensor):
399 weights = ragged_tensor.convert_to_tensor_or_ragged_tensor(
400 weights, name="weights")
402 if weights is not None and binary_output:
403 raise ValueError("Arguments `binary_output` and `weights` are mutually "
404 "exclusive. Please specify only one.")
406 if axis is None:
407 axis = 0
409 if axis not in [0, -1]:
410 raise ValueError(f"Unsupported value for argument axis={axis}. Only 0 and"
411 " -1 are currently supported.")
413 minlength_value = minlength if minlength is not None else -1
414 maxlength_value = maxlength if maxlength is not None else -1
416 if axis == 0:
417 if isinstance(values, sparse_tensor.SparseTensor):
418 if weights is not None:
419 weights = validate_sparse_weights(values, weights)
420 values = values.values
421 elif isinstance(values, ragged_tensor.RaggedTensor):
422 if weights is not None:
423 weights = validate_ragged_weights(values, weights)
424 values = values.values
425 else:
426 if weights is not None:
427 weights = array_ops.reshape(weights, [-1])
428 values = array_ops.reshape(values, [-1])
430 if isinstance(values, sparse_tensor.SparseTensor):
431 weights = validate_sparse_weights(values, weights)
432 c_ind, c_val, c_shape = gen_count_ops.sparse_count_sparse_output(
433 values.indices,
434 values.values,
435 values.dense_shape,
436 weights,
437 minlength=minlength_value,
438 maxlength=maxlength_value,
439 binary_output=binary_output)
440 elif isinstance(values, ragged_tensor.RaggedTensor):
441 weights = validate_ragged_weights(values, weights)
442 c_ind, c_val, c_shape = gen_count_ops.ragged_count_sparse_output(
443 values.row_splits,
444 values.values,
445 weights,
446 minlength=minlength_value,
447 maxlength=maxlength_value,
448 binary_output=binary_output)
449 else:
450 weights = validate_dense_weights(values, weights)
451 c_ind, c_val, c_shape = gen_count_ops.dense_count_sparse_output(
452 values,
453 weights=weights,
454 minlength=minlength_value,
455 maxlength=maxlength_value,
456 binary_output=binary_output)
458 return sparse_tensor.SparseTensor(c_ind, c_val, c_shape)
461def validate_dense_weights(values, weights, dtype=None):
462 """Validates the passed weight tensor or creates an empty one."""
463 if weights is None:
464 if dtype:
465 return array_ops.constant([], dtype=dtype)
466 return array_ops.constant([], dtype=values.dtype)
468 if not isinstance(weights, ops.Tensor):
469 raise ValueError(
470 "Argument `weights` must be a tf.Tensor if `values` is a tf.Tensor. "
471 f"Received weights={weights} of type: {type(weights).__name__}")
473 return weights
476def validate_sparse_weights(values, weights, dtype=None):
477 """Validates the passed weight tensor or creates an empty one."""
478 if weights is None:
479 if dtype:
480 return array_ops.constant([], dtype=dtype)
481 return array_ops.constant([], dtype=values.values.dtype)
483 if not isinstance(weights, sparse_tensor.SparseTensor):
484 raise ValueError(
485 "Argument `weights` must be a SparseTensor if `values` is a "
486 f"SparseTensor. Received weights={weights} of type: "
487 f"{type(weights).__name__}")
489 checks = []
490 if weights.dense_shape is not values.dense_shape:
491 checks.append(
492 check_ops.assert_equal(
493 weights.dense_shape,
494 values.dense_shape,
495 message="'weights' and 'values' must have the same dense shape."))
496 if weights.indices is not values.indices:
497 checks.append(
498 check_ops.assert_equal(
499 weights.indices,
500 values.indices,
501 message="'weights' and 'values' must have the same indices.")
502 )
503 if checks:
504 with ops.control_dependencies(checks):
505 weights = array_ops.identity(weights.values)
506 else:
507 weights = weights.values
509 return weights
512def validate_ragged_weights(values, weights, dtype=None):
513 """Validates the passed weight tensor or creates an empty one."""
514 if weights is None:
515 if dtype:
516 return array_ops.constant([], dtype=dtype)
517 return array_ops.constant([], dtype=values.values.dtype)
519 if not isinstance(weights, ragged_tensor.RaggedTensor):
520 raise ValueError(
521 "`weights` must be a RaggedTensor if `values` is a RaggedTensor. "
522 f"Received argument weights={weights} of type: "
523 f"{type(weights).__name__}.")
525 checks = []
526 if weights.row_splits is not values.row_splits:
527 checks.append(
528 check_ops.assert_equal(
529 weights.row_splits,
530 values.row_splits,
531 message="'weights' and 'values' must have the same row splits."))
532 if checks:
533 with ops.control_dependencies(checks):
534 weights = array_ops.identity(weights.values)
535 else:
536 weights = weights.values
538 return weights