Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/bincount_ops.py: 15%

158 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-03 07:57 +0000

1# Copyright 2020 The TensorFlow Authors. All Rights Reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# maxlengthations under the License. 

14# ============================================================================== 

15"""bincount ops.""" 

16 

17from tensorflow.python.framework import constant_op 

18from tensorflow.python.framework import dtypes 

19from tensorflow.python.framework import ops 

20from tensorflow.python.framework import sparse_tensor 

21from tensorflow.python.ops import array_ops 

22from tensorflow.python.ops import check_ops 

23from tensorflow.python.ops import gen_count_ops 

24from tensorflow.python.ops import gen_math_ops 

25from tensorflow.python.ops import math_ops 

26from tensorflow.python.ops.ragged import ragged_tensor 

27from tensorflow.python.util import deprecation 

28from tensorflow.python.util.tf_export import tf_export 

29 

30 

31@tf_export("math.bincount", v1=[]) 

32def bincount(arr, 

33 weights=None, 

34 minlength=None, 

35 maxlength=None, 

36 dtype=dtypes.int32, 

37 name=None, 

38 axis=None, 

39 binary_output=False): 

40 """Counts the number of occurrences of each value in an integer array. 

41 

42 If `minlength` and `maxlength` are not given, returns a vector with length 

43 `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise. 

44 If `weights` are non-None, then index `i` of the output stores the sum of the 

45 value in `weights` at each index where the corresponding value in `arr` is 

46 `i`. 

47 

48 ```python 

49 values = tf.constant([1,1,2,3,2,4,4,5]) 

50 tf.math.bincount(values) #[0 2 2 1 2 1] 

51 ``` 

52 Vector length = Maximum element in vector `values` is 5. Adding 1, which is 6 

53 will be the vector length. 

54 

55 Each bin value in the output indicates number of occurrences of the particular 

56 index. Here, index 1 in output has a value 2. This indicates value 1 occurs 

57 two times in `values`. 

58 

59 ```python 

60 values = tf.constant([1,1,2,3,2,4,4,5]) 

61 weights = tf.constant([1,5,0,1,0,5,4,5]) 

62 tf.math.bincount(values, weights=weights) #[0 6 0 1 9 5] 

63 ``` 

64 Bin will be incremented by the corresponding weight instead of 1. 

65 Here, index 1 in output has a value 6. This is the summation of weights 

66 corresponding to the value in `values`. 

67 

68 **Bin-counting on a certain axis** 

69 

70 This example takes a 2 dimensional input and returns a `Tensor` with 

71 bincounting on each sample. 

72 

73 >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32) 

74 >>> tf.math.bincount(data, axis=-1) 

75 <tf.Tensor: shape=(2, 4), dtype=int32, numpy= 

76 array([[1, 1, 1, 1], 

77 [2, 1, 1, 0]], dtype=int32)> 

78 

79 

80 **Bin-counting with binary_output** 

81 

82 This example gives binary output instead of counting the occurrence. 

83 

84 >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32) 

85 >>> tf.math.bincount(data, axis=-1, binary_output=True) 

86 <tf.Tensor: shape=(2, 4), dtype=int32, numpy= 

87 array([[1, 1, 1, 1], 

88 [1, 1, 1, 0]], dtype=int32)> 

89 

90 Args: 

91 arr: A Tensor, RaggedTensor, or SparseTensor whose values should be counted. 

92 These tensors must have a rank of 2 if `axis=-1`. 

93 weights: If non-None, must be the same shape as arr. For each value in 

94 `arr`, the bin will be incremented by the corresponding weight instead of 

95 1. 

96 minlength: If given, ensures the output has length at least `minlength`, 

97 padding with zeros at the end if necessary. 

98 maxlength: If given, skips values in `arr` that are equal or greater than 

99 `maxlength`, ensuring that the output has length at most `maxlength`. 

100 dtype: If `weights` is None, determines the type of the output bins. 

101 name: A name scope for the associated operations (optional). 

102 axis: The axis to slice over. Axes at and below `axis` will be flattened 

103 before bin counting. Currently, only `0`, and `-1` are supported. If None, 

104 all axes will be flattened (identical to passing `0`). 

105 binary_output: If True, this op will output 1 instead of the number of times 

106 a token appears (equivalent to one_hot + reduce_any instead of one_hot + 

107 reduce_add). Defaults to False. 

108 

109 Returns: 

110 A vector with the same dtype as `weights` or the given `dtype`. The bin 

111 values. 

112 

113 Raises: 

114 `InvalidArgumentError` if negative values are provided as an input. 

115 

116 """ 

117 name = "bincount" if name is None else name 

118 with ops.name_scope(name): 

119 # TODO(b/255381064) Remove the following block which uses older kernels for 

120 # backwards compatibility for certain cases once all tests pass with the 

121 # newer (dense_bincount, ragged_bincount and sparse_bincount) kernels. 

122 if ( 

123 not isinstance(arr, ragged_tensor.RaggedTensor) 

124 and not binary_output 

125 and axis is None 

126 ): 

127 arr = ops.convert_to_tensor(arr, name="arr", dtype=dtypes.int32) 

128 array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0 

129 output_size = math_ops.cast(array_is_nonempty, dtypes.int32) * ( 

130 math_ops.reduce_max(arr) + 1) 

131 if minlength is not None: 

132 minlength = ops.convert_to_tensor( 

133 minlength, name="minlength", dtype=dtypes.int32) 

134 output_size = gen_math_ops.maximum(minlength, output_size) 

135 if maxlength is not None: 

136 maxlength = ops.convert_to_tensor( 

137 maxlength, name="maxlength", dtype=dtypes.int32) 

138 output_size = gen_math_ops.minimum(maxlength, output_size) 

139 if weights is not None: 

140 weights = ops.convert_to_tensor(weights, name="weights") 

141 return gen_math_ops.unsorted_segment_sum(weights, arr, output_size) 

142 weights = constant_op.constant([], dtype) 

143 arr = array_ops.reshape(arr, [-1]) 

144 return gen_math_ops.bincount(arr, output_size, weights) 

145 

146 if not isinstance(arr, sparse_tensor.SparseTensor): 

147 arr = ragged_tensor.convert_to_tensor_or_ragged_tensor(arr, name="arr") 

148 if weights is not None: 

149 if not isinstance(weights, sparse_tensor.SparseTensor): 

150 weights = ragged_tensor.convert_to_tensor_or_ragged_tensor( 

151 weights, name="weights") 

152 

153 if weights is not None and binary_output: 

154 raise ValueError("Arguments `binary_output` and `weights` are mutually " 

155 "exclusive. Please specify only one.") 

156 

157 if not arr.dtype.is_integer: 

158 arr = math_ops.cast(arr, dtypes.int32) 

159 if axis is None: 

160 axis = 0 

161 

162 if axis not in [0, -1]: 

163 raise ValueError(f"Unsupported value for argument axis={axis}. Only 0 and" 

164 " -1 are currently supported.") 

165 

166 array_is_nonempty = array_ops.size(arr) > 0 

167 if isinstance(arr, sparse_tensor.SparseTensor): 

168 output_size = math_ops.cast(array_is_nonempty, arr.dtype) * ( 

169 math_ops.reduce_max(arr.values) + 1) 

170 else: 

171 output_size = math_ops.cast(array_is_nonempty, arr.dtype) * ( 

172 math_ops.reduce_max(arr) + 1) 

173 if minlength is not None: 

174 minlength = ops.convert_to_tensor( 

175 minlength, name="minlength", dtype=arr.dtype) 

176 output_size = gen_math_ops.maximum(minlength, output_size) 

177 if maxlength is not None: 

178 maxlength = ops.convert_to_tensor( 

179 maxlength, name="maxlength", dtype=arr.dtype) 

180 output_size = gen_math_ops.minimum(maxlength, output_size) 

181 

182 if axis == 0: 

183 if isinstance(arr, sparse_tensor.SparseTensor): 

184 if weights is not None: 

185 weights = validate_sparse_weights(arr, weights, dtype) 

186 arr = arr.values 

187 elif isinstance(arr, ragged_tensor.RaggedTensor): 

188 # Flatten RaggedTensors with multiple ragged dimensions which use a 

189 # nested RaggedTensor for the values tensor. 

190 while isinstance(arr, ragged_tensor.RaggedTensor): 

191 if weights is not None: 

192 weights = validate_ragged_weights(arr, weights, dtype) 

193 arr = arr.values 

194 else: 

195 if weights is not None: 

196 weights = array_ops.reshape(weights, [-1]) 

197 arr = array_ops.reshape(arr, [-1]) 

198 

199 if isinstance(arr, sparse_tensor.SparseTensor): 

200 weights = validate_sparse_weights(arr, weights, dtype) 

201 return gen_math_ops.sparse_bincount( 

202 indices=arr.indices, 

203 values=arr.values, 

204 dense_shape=arr.dense_shape, 

205 size=output_size, 

206 weights=weights, 

207 binary_output=binary_output) 

208 elif isinstance(arr, ragged_tensor.RaggedTensor): 

209 weights = validate_ragged_weights(arr, weights, dtype) 

210 return gen_math_ops.ragged_bincount( 

211 splits=arr.row_splits, 

212 values=arr.values, 

213 size=output_size, 

214 weights=weights, 

215 binary_output=binary_output) 

216 else: 

217 weights = validate_dense_weights(arr, weights, dtype) 

218 return gen_math_ops.dense_bincount( 

219 input=arr, 

220 size=output_size, 

221 weights=weights, 

222 binary_output=binary_output) 

223 

224 

225@tf_export(v1=["math.bincount", "bincount"]) 

226@deprecation.deprecated_endpoints("bincount") 

227def bincount_v1(arr, 

228 weights=None, 

229 minlength=None, 

230 maxlength=None, 

231 dtype=dtypes.int32): 

232 """Counts the number of occurrences of each value in an integer array. 

233 

234 If `minlength` and `maxlength` are not given, returns a vector with length 

235 `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise. 

236 If `weights` are non-None, then index `i` of the output stores the sum of the 

237 value in `weights` at each index where the corresponding value in `arr` is 

238 `i`. 

239 

240 Args: 

241 arr: An int32 tensor of non-negative values. 

242 weights: If non-None, must be the same shape as arr. For each value in 

243 `arr`, the bin will be incremented by the corresponding weight instead of 

244 1. 

245 minlength: If given, ensures the output has length at least `minlength`, 

246 padding with zeros at the end if necessary. 

247 maxlength: If given, skips values in `arr` that are equal or greater than 

248 `maxlength`, ensuring that the output has length at most `maxlength`. 

249 dtype: If `weights` is None, determines the type of the output bins. 

250 

251 Returns: 

252 A vector with the same dtype as `weights` or the given `dtype`. The bin 

253 values. 

254 """ 

255 return bincount(arr, weights, minlength, maxlength, dtype) 

256 

257 

258@tf_export("sparse.bincount") 

259def sparse_bincount(values, 

260 weights=None, 

261 axis=0, 

262 minlength=None, 

263 maxlength=None, 

264 binary_output=False, 

265 name=None): 

266 """Count the number of times an integer value appears in a tensor. 

267 

268 This op takes an N-dimensional `Tensor`, `RaggedTensor`, or `SparseTensor`, 

269 and returns an N-dimensional int64 SparseTensor where element 

270 `[i0...i[axis], j]` contains the number of times the value `j` appears in 

271 slice `[i0...i[axis], :]` of the input tensor. Currently, only N=0 and 

272 N=-1 are supported. 

273 

274 Args: 

275 values: A Tensor, RaggedTensor, or SparseTensor whose values should be 

276 counted. These tensors must have a rank of 2 if `axis=-1`. 

277 weights: If non-None, must be the same shape as arr. For each value in 

278 `value`, the bin will be incremented by the corresponding weight instead 

279 of 1. 

280 axis: The axis to slice over. Axes at and below `axis` will be flattened 

281 before bin counting. Currently, only `0`, and `-1` are supported. If None, 

282 all axes will be flattened (identical to passing `0`). 

283 minlength: If given, ensures the output has length at least `minlength`, 

284 padding with zeros at the end if necessary. 

285 maxlength: If given, skips values in `values` that are equal or greater than 

286 `maxlength`, ensuring that the output has length at most `maxlength`. 

287 binary_output: If True, this op will output 1 instead of the number of times 

288 a token appears (equivalent to one_hot + reduce_any instead of one_hot + 

289 reduce_add). Defaults to False. 

290 name: A name for this op. 

291 

292 Returns: 

293 A SparseTensor with `output.shape = values.shape[:axis] + [N]`, where `N` is 

294 * `maxlength` (if set); 

295 * `minlength` (if set, and `minlength > reduce_max(values)`); 

296 * `0` (if `values` is empty); 

297 * `reduce_max(values) + 1` otherwise. 

298 

299 Raises: 

300 `InvalidArgumentError` if negative values are provided as an input. 

301 

302 Examples: 

303 

304 **Bin-counting every item in individual batches** 

305 

306 This example takes an input (which could be a Tensor, RaggedTensor, or 

307 SparseTensor) and returns a SparseTensor where the value of (i,j) is the 

308 number of times value j appears in batch i. 

309 

310 >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64) 

311 >>> output = tf.sparse.bincount(data, axis=-1) 

312 >>> print(output) 

313 SparseTensor(indices=tf.Tensor( 

314 [[ 0 10] 

315 [ 0 20] 

316 [ 0 30] 

317 [ 1 11] 

318 [ 1 101] 

319 [ 1 10001]], shape=(6, 2), dtype=int64), 

320 values=tf.Tensor([1 2 1 2 1 1], shape=(6,), dtype=int64), 

321 dense_shape=tf.Tensor([ 2 10002], shape=(2,), dtype=int64)) 

322 

323 **Bin-counting with defined output shape** 

324 

325 This example takes an input (which could be a Tensor, RaggedTensor, or 

326 SparseTensor) and returns a SparseTensor where the value of (i,j) is the 

327 number of times value j appears in batch i. However, all values of j 

328 above 'maxlength' are ignored. The dense_shape of the output sparse tensor 

329 is set to 'minlength'. Note that, while the input is identical to the 

330 example above, the value '10001' in batch item 2 is dropped, and the 

331 dense shape is [2, 500] instead of [2,10002] or [2, 102]. 

332 

333 >>> minlength = maxlength = 500 

334 >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64) 

335 >>> output = tf.sparse.bincount( 

336 ... data, axis=-1, minlength=minlength, maxlength=maxlength) 

337 >>> print(output) 

338 SparseTensor(indices=tf.Tensor( 

339 [[ 0 10] 

340 [ 0 20] 

341 [ 0 30] 

342 [ 1 11] 

343 [ 1 101]], shape=(5, 2), dtype=int64), 

344 values=tf.Tensor([1 2 1 2 1], shape=(5,), dtype=int64), 

345 dense_shape=tf.Tensor([ 2 500], shape=(2,), dtype=int64)) 

346 

347 **Binary bin-counting** 

348 

349 This example takes an input (which could be a Tensor, RaggedTensor, or 

350 SparseTensor) and returns a SparseTensor where (i,j) is 1 if the value j 

351 appears in batch i at least once and is 0 otherwise. Note that, even though 

352 some values (like 20 in batch 1 and 11 in batch 2) appear more than once, 

353 the 'values' tensor is all 1s. 

354 

355 >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64) 

356 >>> output = tf.sparse.bincount(data, binary_output=True, axis=-1) 

357 >>> print(output) 

358 SparseTensor(indices=tf.Tensor( 

359 [[ 0 10] 

360 [ 0 20] 

361 [ 0 30] 

362 [ 1 11] 

363 [ 1 101] 

364 [ 1 10001]], shape=(6, 2), dtype=int64), 

365 values=tf.Tensor([1 1 1 1 1 1], shape=(6,), dtype=int64), 

366 dense_shape=tf.Tensor([ 2 10002], shape=(2,), dtype=int64)) 

367 

368 **Weighted bin-counting** 

369 

370 This example takes two inputs - a values tensor and a weights tensor. These 

371 tensors must be identically shaped, and have the same row splits or indices 

372 in the case of RaggedTensors or SparseTensors. When performing a weighted 

373 count, the op will output a SparseTensor where the value of (i, j) is the 

374 sum of the values in the weight tensor's batch i in the locations where 

375 the values tensor has the value j. In this case, the output dtype is the 

376 same as the dtype of the weights tensor. 

377 

378 >>> data = np.array([[10, 20, 30, 20], [11, 101, 11, 10001]], dtype=np.int64) 

379 >>> weights = [[2, 0.25, 15, 0.5], [2, 17, 3, 0.9]] 

380 >>> output = tf.sparse.bincount(data, weights=weights, axis=-1) 

381 >>> print(output) 

382 SparseTensor(indices=tf.Tensor( 

383 [[ 0 10] 

384 [ 0 20] 

385 [ 0 30] 

386 [ 1 11] 

387 [ 1 101] 

388 [ 1 10001]], shape=(6, 2), dtype=int64), 

389 values=tf.Tensor([2. 0.75 15. 5. 17. 0.9], shape=(6,), dtype=float32), 

390 dense_shape=tf.Tensor([ 2 10002], shape=(2,), dtype=int64)) 

391 

392 """ 

393 with ops.name_scope(name, "count", [values, weights]): 

394 if not isinstance(values, sparse_tensor.SparseTensor): 

395 values = ragged_tensor.convert_to_tensor_or_ragged_tensor( 

396 values, name="values") 

397 if weights is not None: 

398 if not isinstance(weights, sparse_tensor.SparseTensor): 

399 weights = ragged_tensor.convert_to_tensor_or_ragged_tensor( 

400 weights, name="weights") 

401 

402 if weights is not None and binary_output: 

403 raise ValueError("Arguments `binary_output` and `weights` are mutually " 

404 "exclusive. Please specify only one.") 

405 

406 if axis is None: 

407 axis = 0 

408 

409 if axis not in [0, -1]: 

410 raise ValueError(f"Unsupported value for argument axis={axis}. Only 0 and" 

411 " -1 are currently supported.") 

412 

413 minlength_value = minlength if minlength is not None else -1 

414 maxlength_value = maxlength if maxlength is not None else -1 

415 

416 if axis == 0: 

417 if isinstance(values, sparse_tensor.SparseTensor): 

418 if weights is not None: 

419 weights = validate_sparse_weights(values, weights) 

420 values = values.values 

421 elif isinstance(values, ragged_tensor.RaggedTensor): 

422 if weights is not None: 

423 weights = validate_ragged_weights(values, weights) 

424 values = values.values 

425 else: 

426 if weights is not None: 

427 weights = array_ops.reshape(weights, [-1]) 

428 values = array_ops.reshape(values, [-1]) 

429 

430 if isinstance(values, sparse_tensor.SparseTensor): 

431 weights = validate_sparse_weights(values, weights) 

432 c_ind, c_val, c_shape = gen_count_ops.sparse_count_sparse_output( 

433 values.indices, 

434 values.values, 

435 values.dense_shape, 

436 weights, 

437 minlength=minlength_value, 

438 maxlength=maxlength_value, 

439 binary_output=binary_output) 

440 elif isinstance(values, ragged_tensor.RaggedTensor): 

441 weights = validate_ragged_weights(values, weights) 

442 c_ind, c_val, c_shape = gen_count_ops.ragged_count_sparse_output( 

443 values.row_splits, 

444 values.values, 

445 weights, 

446 minlength=minlength_value, 

447 maxlength=maxlength_value, 

448 binary_output=binary_output) 

449 else: 

450 weights = validate_dense_weights(values, weights) 

451 c_ind, c_val, c_shape = gen_count_ops.dense_count_sparse_output( 

452 values, 

453 weights=weights, 

454 minlength=minlength_value, 

455 maxlength=maxlength_value, 

456 binary_output=binary_output) 

457 

458 return sparse_tensor.SparseTensor(c_ind, c_val, c_shape) 

459 

460 

461def validate_dense_weights(values, weights, dtype=None): 

462 """Validates the passed weight tensor or creates an empty one.""" 

463 if weights is None: 

464 if dtype: 

465 return array_ops.constant([], dtype=dtype) 

466 return array_ops.constant([], dtype=values.dtype) 

467 

468 if not isinstance(weights, ops.Tensor): 

469 raise ValueError( 

470 "Argument `weights` must be a tf.Tensor if `values` is a tf.Tensor. " 

471 f"Received weights={weights} of type: {type(weights).__name__}") 

472 

473 return weights 

474 

475 

476def validate_sparse_weights(values, weights, dtype=None): 

477 """Validates the passed weight tensor or creates an empty one.""" 

478 if weights is None: 

479 if dtype: 

480 return array_ops.constant([], dtype=dtype) 

481 return array_ops.constant([], dtype=values.values.dtype) 

482 

483 if not isinstance(weights, sparse_tensor.SparseTensor): 

484 raise ValueError( 

485 "Argument `weights` must be a SparseTensor if `values` is a " 

486 f"SparseTensor. Received weights={weights} of type: " 

487 f"{type(weights).__name__}") 

488 

489 checks = [] 

490 if weights.dense_shape is not values.dense_shape: 

491 checks.append( 

492 check_ops.assert_equal( 

493 weights.dense_shape, 

494 values.dense_shape, 

495 message="'weights' and 'values' must have the same dense shape.")) 

496 if weights.indices is not values.indices: 

497 checks.append( 

498 check_ops.assert_equal( 

499 weights.indices, 

500 values.indices, 

501 message="'weights' and 'values' must have the same indices.") 

502 ) 

503 if checks: 

504 with ops.control_dependencies(checks): 

505 weights = array_ops.identity(weights.values) 

506 else: 

507 weights = weights.values 

508 

509 return weights 

510 

511 

512def validate_ragged_weights(values, weights, dtype=None): 

513 """Validates the passed weight tensor or creates an empty one.""" 

514 if weights is None: 

515 if dtype: 

516 return array_ops.constant([], dtype=dtype) 

517 return array_ops.constant([], dtype=values.values.dtype) 

518 

519 if not isinstance(weights, ragged_tensor.RaggedTensor): 

520 raise ValueError( 

521 "`weights` must be a RaggedTensor if `values` is a RaggedTensor. " 

522 f"Received argument weights={weights} of type: " 

523 f"{type(weights).__name__}.") 

524 

525 checks = [] 

526 if weights.row_splits is not values.row_splits: 

527 checks.append( 

528 check_ops.assert_equal( 

529 weights.row_splits, 

530 values.row_splits, 

531 message="'weights' and 'values' must have the same row splits.")) 

532 if checks: 

533 with ops.control_dependencies(checks): 

534 weights = array_ops.identity(weights.values) 

535 else: 

536 weights = weights.values 

537 

538 return weights