Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/candidate_sampling

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

14# ==============================================================================

16"""Wrappers for candidate sampling operations."""

18from tensorflow.python.framework import random_seed

19from tensorflow.python.ops import array_ops # pylint: disable=unused-import

20from tensorflow.python.ops import gen_candidate_sampling_ops

21from tensorflow.python.ops import math_ops # pylint: disable=unused-import

22from tensorflow.python.util import deprecation

23from tensorflow.python.util import dispatch

24from tensorflow.python.util.tf_export import tf_export

27@tf_export(

28 'random.uniform_candidate_sampler',

29 v1=['random.uniform_candidate_sampler', 'nn.uniform_candidate_sampler'])

30@dispatch.add_dispatch_support

31@deprecation.deprecated_endpoints('nn.uniform_candidate_sampler')

32def uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,

33 range_max, seed=None, name=None):

34 """Samples a set of classes using a uniform base distribution.

36 This operation randomly samples a tensor of sampled classes

37 (`sampled_candidates`) from the range of integers `[0, range_max)`.

39 See the [Candidate Sampling Algorithms

40 Reference](http://www.tensorflow.org/extras/candidate_sampling.pdf)

41 for a quick course on Candidate Sampling.

43 The elements of `sampled_candidates` are drawn without replacement

44 (if `unique=True`) or with replacement (if `unique=False`) from

45 the base distribution.

47 The base distribution for this operation is the uniform distribution

48 over the range of integers `[0, range_max)`.

50 In addition, this operation returns tensors `true_expected_count`

51 and `sampled_expected_count` representing the number of times each

52 of the target classes (`true_classes`) and the sampled

53 classes (`sampled_candidates`) is expected to occur in an average

54 tensor of sampled classes. These values correspond to `Q(y|x)`

55 defined in the [Candidate Sampling Algorithms

56 Reference](http://www.tensorflow.org/extras/candidate_sampling.pdf).

57 If `unique=True`, then these are post-rejection probabilities and we

58 compute them approximately.

60 Note that this function (and also other `*_candidate_sampler`

61 functions) only gives you the ingredients to implement the various

62 Candidate Sampling algorithms listed in the big table in the

63 [Candidate Sampling Algorithms

64 Reference](http://www.tensorflow.org/extras/candidate_sampling.pdf). You

65 still need to implement the algorithms yourself.

67 For example, according to that table, the phrase "negative samples"

68 may mean different things in different algorithms. For instance, in

69 NCE, "negative samples" means `S_i` (which is just the sampled

70 classes) which may overlap with true classes, while in Sampled

71 Logistic, "negative samples" means `S_i - T_i` which excludes the

72 true classes. The return value `sampled_candidates` corresponds to

73 `S_i`, not to any specific definition of "negative samples" in any

74 specific algorithm. It's your responsibility to pick an algorithm

75 and calculate the "negative samples" defined by that algorithm

76 (e.g. `S_i - T_i`).

78 As another example, the `true_classes` argument is for calculating

79 the `true_expected_count` output (as a by-product of this function's

80 main calculation), which may be needed by some algorithms (according

81 to that table). It's not for excluding true classes in the return

82 value `sampled_candidates`. Again that step is algorithm-specific

83 and should be carried out by you.

85 Args:

86 true_classes: A `Tensor` of type `int64` and shape `[batch_size,

87 num_true]`. The target classes.

88 num_true: An `int`. The number of target classes per training example.

89 num_sampled: An `int`. The number of classes to randomly sample. The

90 `sampled_candidates` return value will have shape `[num_sampled]`. If

91 `unique=True`, `num_sampled` must be less than or equal to `range_max`.

92 unique: A `bool`. Determines whether all sampled classes in a batch are

93 unique.

94 range_max: An `int`. The number of possible classes.

95 seed: An `int`. An operation-specific seed. Default is 0.

96 name: A name for the operation (optional).

98 Returns:

99 sampled_candidates: A tensor of type `int64` and shape

100 `[num_sampled]`. The sampled classes, either with possible

101 duplicates (`unique=False`) or all unique (`unique=True`). As

102 noted above, `sampled_candidates` may overlap with true classes.

103 true_expected_count: A tensor of type `float`. Same shape as

104 `true_classes`. The expected counts under the sampling distribution

105 of each of `true_classes`.

106 sampled_expected_count: A tensor of type `float`. Same shape as

107 `sampled_candidates`. The expected counts under the sampling distribution

108 of each of `sampled_candidates`.

109 """

110 seed1, seed2 = random_seed.get_seed(seed)

111 return gen_candidate_sampling_ops.uniform_candidate_sampler(

112 true_classes, num_true, num_sampled, unique, range_max, seed=seed1,

113 seed2=seed2, name=name)

114

115

116@tf_export(

117 'random.log_uniform_candidate_sampler',

118 v1=[

119 'random.log_uniform_candidate_sampler',

120 'nn.log_uniform_candidate_sampler'

121 ])

122@dispatch.add_dispatch_support

123@deprecation.deprecated_endpoints('nn.log_uniform_candidate_sampler')

124def log_uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,

125 range_max, seed=None, name=None):

126 """Samples a set of classes using a log-uniform (Zipfian) base distribution.

127

128 This operation randomly samples a tensor of sampled classes

129 (`sampled_candidates`) from the range of integers `[0, range_max)`.

130

131 See the [Candidate Sampling Algorithms

132 Reference](http://www.tensorflow.org/extras/candidate_sampling.pdf)

133 for a quick course on Candidate Sampling.

134

135 The elements of `sampled_candidates` are drawn without replacement

136 (if `unique=True`) or with replacement (if `unique=False`) from

137 the base distribution.

138

139 The base distribution for this operation is an approximately log-uniform

140 or Zipfian distribution:

141

142 `P(class) = (log(class + 2) - log(class + 1)) / log(range_max + 1)`

143

144 This sampler is useful when the target classes approximately follow such

145 a distribution - for example, if the classes represent words in a lexicon

146 sorted in decreasing order of frequency. If your classes are not ordered by

147 decreasing frequency, do not use this op.

148

149 In addition, this operation returns tensors `true_expected_count`

150 and `sampled_expected_count` representing the number of times each

151 of the target classes (`true_classes`) and the sampled

152 classes (`sampled_candidates`) is expected to occur in an average

153 tensor of sampled classes. These values correspond to `Q(y|x)`

154 defined in the [Candidate Sampling Algorithms

155 Reference](http://www.tensorflow.org/extras/candidate_sampling.pdf).

156 If `unique=True`, then these are post-rejection probabilities and we

157 compute them approximately.

158

159 Note that this function (and also other `*_candidate_sampler`

160 functions) only gives you the ingredients to implement the various

161 Candidate Sampling algorithms listed in the big table in the

162 [Candidate Sampling Algorithms

163 Reference](http://www.tensorflow.org/extras/candidate_sampling.pdf). You

164 still need to implement the algorithms yourself.

165

166 For example, according to that table, the phrase "negative samples"

167 may mean different things in different algorithms. For instance, in

168 NCE, "negative samples" means `S_i` (which is just the sampled

169 classes) which may overlap with true classes, while in Sampled

170 Logistic, "negative samples" means `S_i - T_i` which excludes the

171 true classes. The return value `sampled_candidates` corresponds to

172 `S_i`, not to any specific definition of "negative samples" in any

173 specific algorithm. It's your responsibility to pick an algorithm

174 and calculate the "negative samples" defined by that algorithm

175 (e.g. `S_i - T_i`).

176

177 As another example, the `true_classes` argument is for calculating

178 the `true_expected_count` output (as a by-product of this function's

179 main calculation), which may be needed by some algorithms (according

180 to that table). It's not for excluding true classes in the return

181 value `sampled_candidates`. Again that step is algorithm-specific

182 and should be carried out by you.

183

184 Args:

185 true_classes: A `Tensor` of type `int64` and shape `[batch_size,

186 num_true]`. The target classes.

187 num_true: An `int`. The number of target classes per training example.

188 num_sampled: An `int`. The number of classes to randomly sample.

189 unique: A `bool`. Determines whether all sampled classes in a batch are

190 unique.

191 range_max: An `int`. The number of possible classes.

192 seed: An `int`. An operation-specific seed. Default is 0.

193 name: A name for the operation (optional).

194

195 Returns:

196 sampled_candidates: A tensor of type `int64` and shape

197 `[num_sampled]`. The sampled classes. As noted above,

198 `sampled_candidates` may overlap with true classes.

199 true_expected_count: A tensor of type `float`. Same shape as

200 `true_classes`. The expected counts under the sampling distribution

201 of each of `true_classes`.

202 sampled_expected_count: A tensor of type `float`. Same shape as

203 `sampled_candidates`. The expected counts under the sampling distribution

204 of each of `sampled_candidates`.

205 """

206 seed1, seed2 = random_seed.get_seed(seed)

207 return gen_candidate_sampling_ops.log_uniform_candidate_sampler(

208 true_classes, num_true, num_sampled, unique, range_max, seed=seed1,

209 seed2=seed2, name=name)

210

211

212@tf_export(

213 'random.learned_unigram_candidate_sampler',

214 'nn.learned_unigram_candidate_sampler')

215@dispatch.add_dispatch_support

216@deprecation.deprecated_endpoints(['nn.learned_unigram_candidate_sampler'])

217def learned_unigram_candidate_sampler(true_classes, num_true, num_sampled,

218 unique, range_max, seed=None, name=None):

219 """Samples a set of classes from a distribution learned during training.

220

221 This operation randomly samples a tensor of sampled classes

222 (`sampled_candidates`) from the range of integers `[0, range_max)`.

223

224 See the [Candidate Sampling Algorithms

225 Reference](http://www.tensorflow.org/extras/candidate_sampling.pdf)

226 for a quick course on Candidate Sampling.

227

228 The elements of `sampled_candidates` are drawn without replacement

229 (if `unique=True`) or with replacement (if `unique=False`) from

230 the base distribution.

231

232 The base distribution for this operation is constructed on the fly

233 during training. It is a unigram distribution over the target

234 classes seen so far during training. Every integer in `[0, range_max)`

235 begins with a weight of 1, and is incremented by 1 each time it is

236 seen as a target class. The base distribution is not saved to checkpoints,

237 so it is reset when the model is reloaded.

238

239 In addition, this operation returns tensors `true_expected_count`

240 and `sampled_expected_count` representing the number of times each

241 of the target classes (`true_classes`) and the sampled

242 classes (`sampled_candidates`) is expected to occur in an average

243 tensor of sampled classes. These values correspond to `Q(y|x)`

244 defined in the [Candidate Sampling Algorithms

245 Reference](http://www.tensorflow.org/extras/candidate_sampling.pdf).

246 If `unique=True`, then these are post-rejection probabilities and we

247 compute them approximately.

248

249 Note that this function (and also other `*_candidate_sampler`

250 functions) only gives you the ingredients to implement the various

251 Candidate Sampling algorithms listed in the big table in the

252 [Candidate Sampling Algorithms

253 Reference](http://www.tensorflow.org/extras/candidate_sampling.pdf). You

254 still need to implement the algorithms yourself.

255

256 For example, according to that table, the phrase "negative samples"

257 may mean different things in different algorithms. For instance, in

258 NCE, "negative samples" means `S_i` (which is just the sampled

259 classes) which may overlap with true classes, while in Sampled

260 Logistic, "negative samples" means `S_i - T_i` which excludes the

261 true classes. The return value `sampled_candidates` corresponds to

262 `S_i`, not to any specific definition of "negative samples" in any

263 specific algorithm. It's your responsibility to pick an algorithm

264 and calculate the "negative samples" defined by that algorithm

265 (e.g. `S_i - T_i`).

266

267 As another example, the `true_classes` argument is for calculating

268 the `true_expected_count` output (as a by-product of this function's

269 main calculation), which may be needed by some algorithms (according

270 to that table). It's not for excluding true classes in the return

271 value `sampled_candidates`. Again that step is algorithm-specific

272 and should be carried out by you.

273

274 Args:

275 true_classes: A `Tensor` of type `int64` and shape `[batch_size,

276 num_true]`. The target classes.

277 num_true: An `int`. The number of target classes per training example.

278 num_sampled: An `int`. The number of classes to randomly sample.

279 unique: A `bool`. Determines whether all sampled classes in a batch are

280 unique.

281 range_max: An `int`. The number of possible classes.

282 seed: An `int`. An operation-specific seed. Default is 0.

283 name: A name for the operation (optional).

284

285 Returns:

286 sampled_candidates: A tensor of type `int64` and shape

287 `[num_sampled]`. The sampled classes. As noted above,

288 `sampled_candidates` may overlap with true classes.

289 true_expected_count: A tensor of type `float`. Same shape as

290 `true_classes`. The expected counts under the sampling distribution

291 of each of `true_classes`.

292 sampled_expected_count: A tensor of type `float`. Same shape as

293 `sampled_candidates`. The expected counts under the sampling distribution

294 of each of `sampled_candidates`.

295

296 """

297 seed1, seed2 = random_seed.get_seed(seed)

298 # Limiting to Max int32 value

299 if range_max > 2147483647:

300 raise ValueError(f'Value of range_max:{range_max} is too large to handle')

301 return gen_candidate_sampling_ops.learned_unigram_candidate_sampler(

302 true_classes, num_true, num_sampled, unique, range_max, seed=seed1,

303 seed2=seed2, name=name)

304

305

306@tf_export('random.fixed_unigram_candidate_sampler',

307 'nn.fixed_unigram_candidate_sampler')

308@dispatch.add_dispatch_support

309def fixed_unigram_candidate_sampler(true_classes,

310 num_true,

311 num_sampled,

312 unique,

313 range_max,

314 vocab_file='',

315 distortion=1.0,

316 num_reserved_ids=0,

317 num_shards=1,

318 shard=0,

319 unigrams=(),

320 seed=None,

321 name=None):

322 """Samples a set of classes using the provided (fixed) base distribution.

323

324 This operation randomly samples a tensor of sampled classes

325 (`sampled_candidates`) from the range of integers `[0, range_max)`.

326

327 See the [Candidate Sampling Algorithms

328 Reference](http://www.tensorflow.org/extras/candidate_sampling.pdf)

329 for a quick course on Candidate Sampling.

330

331 The elements of `sampled_candidates` are drawn without replacement

332 (if `unique=True`) or with replacement (if `unique=False`) from

333 the base distribution.

334

335 The base distribution is read from a file or passed in as an

336 in-memory array. There is also an option to skew the distribution by

337 applying a distortion power to the weights.

338

339 In addition, this operation returns tensors `true_expected_count`

340 and `sampled_expected_count` representing the number of times each

341 of the target classes (`true_classes`) and the sampled

342 classes (`sampled_candidates`) is expected to occur in an average

343 tensor of sampled classes. These values correspond to `Q(y|x)`

344 defined in the [Candidate Sampling Algorithms

345 Reference](http://www.tensorflow.org/extras/candidate_sampling.pdf).

346 If `unique=True`, then these are post-rejection probabilities and we

347 compute them approximately.

348

349 Note that this function (and also other `*_candidate_sampler`

350 functions) only gives you the ingredients to implement the various

351 Candidate Sampling algorithms listed in the big table in the

352 [Candidate Sampling Algorithms

353 Reference](http://www.tensorflow.org/extras/candidate_sampling.pdf). You

354 still need to implement the algorithms yourself.

355

356 For example, according to that table, the phrase "negative samples"

357 may mean different things in different algorithms. For instance, in

358 NCE, "negative samples" means `S_i` (which is just the sampled

359 classes) which may overlap with true classes, while in Sampled

360 Logistic, "negative samples" means `S_i - T_i` which excludes the

361 true classes. The return value `sampled_candidates` corresponds to

362 `S_i`, not to any specific definition of "negative samples" in any

363 specific algorithm. It's your responsibility to pick an algorithm

364 and calculate the "negative samples" defined by that algorithm

365 (e.g. `S_i - T_i`).

366

367 As another example, the `true_classes` argument is for calculating

368 the `true_expected_count` output (as a by-product of this function's

369 main calculation), which may be needed by some algorithms (according

370 to that table). It's not for excluding true classes in the return

371 value `sampled_candidates`. Again that step is algorithm-specific

372 and should be carried out by you.

373

374 Args:

375 true_classes: A `Tensor` of type `int64` and shape `[batch_size,

376 num_true]`. The target classes.

377 num_true: An `int`. The number of target classes per training example.

378 num_sampled: An `int`. The number of classes to randomly sample.

379 unique: A `bool`. Determines whether all sampled classes in a batch are

380 unique.

381 range_max: An `int`. The number of possible classes.

382 vocab_file: Each valid line in this file (which should have a CSV-like

383 format) corresponds to a valid word ID. IDs are in sequential order,

384 starting from num_reserved_ids. The last entry in each line is expected

385 to be a value corresponding to the count or relative probability. Exactly

386 one of `vocab_file` and `unigrams` needs to be passed to this operation.

387 distortion: The distortion is used to skew the unigram probability

388 distribution. Each weight is first raised to the distortion's power

389 before adding to the internal unigram distribution. As a result,

390 `distortion = 1.0` gives regular unigram sampling (as defined by the vocab

391 file), and `distortion = 0.0` gives a uniform distribution.

392 num_reserved_ids: Optionally some reserved IDs can be added in the range

393 `[0, num_reserved_ids)` by the users. One use case is that a special

394 unknown word token is used as ID 0. These IDs will have a sampling

395 probability of 0.

396 num_shards: A sampler can be used to sample from a subset of the original

397 range in order to speed up the whole computation through parallelism. This

398 parameter (together with `shard`) indicates the number of partitions that

399 are being used in the overall computation.

400 shard: A sampler can be used to sample from a subset of the original range

401 in order to speed up the whole computation through parallelism. This

402 parameter (together with `num_shards`) indicates the particular partition

403 number of the operation, when partitioning is being used.

404 unigrams: A list of unigram counts or probabilities, one per ID in

405 sequential order. Exactly one of `vocab_file` and `unigrams` should be

406 passed to this operation.

407 seed: An `int`. An operation-specific seed. Default is 0.

408 name: A name for the operation (optional).

409

410 Returns:

411 sampled_candidates: A tensor of type `int64` and shape

412 `[num_sampled]`. The sampled classes. As noted above,

413 `sampled_candidates` may overlap with true classes.

414 true_expected_count: A tensor of type `float`. Same shape as

415 `true_classes`. The expected counts under the sampling distribution

416 of each of `true_classes`.

417 sampled_expected_count: A tensor of type `float`. Same shape as

418 `sampled_candidates`. The expected counts under the sampling distribution

419 of each of `sampled_candidates`.

420

421 """

422 seed1, seed2 = random_seed.get_seed(seed)

423 return gen_candidate_sampling_ops.fixed_unigram_candidate_sampler(

424 true_classes, num_true, num_sampled, unique, range_max,

425 vocab_file=vocab_file, distortion=distortion,

426 num_reserved_ids=num_reserved_ids, num_shards=num_shards, shard=shard,

427 unigrams=unigrams, seed=seed1, seed2=seed2, name=name)

428

429

430@tf_export('random.all_candidate_sampler', 'nn.all_candidate_sampler')

431def all_candidate_sampler(true_classes, num_true, num_sampled, unique,

432 seed=None, name=None):

433 """Generate the set of all classes.

434

435 Deterministically generates and returns the set of all possible classes.

436 For testing purposes. There is no need to use this, since you might as

437 well use full softmax or full logistic regression.

438

439 Args:

440 true_classes: A `Tensor` of type `int64` and shape `[batch_size,

441 num_true]`. The target classes.

442 num_true: An `int`. The number of target classes per training example.

443 num_sampled: An `int`. The number of possible classes.

444 unique: A `bool`. Ignored.

445 unique.

446 seed: An `int`. An operation-specific seed. Default is 0.

447 name: A name for the operation (optional).

448

449 Returns:

450 sampled_candidates: A tensor of type `int64` and shape `[num_sampled]`.

451 This operation deterministically returns the entire range

452 `[0, num_sampled]`.

453 true_expected_count: A tensor of type `float`. Same shape as

454 `true_classes`. The expected counts under the sampling distribution

455 of each of `true_classes`. All returned values are 1.0.

456 sampled_expected_count: A tensor of type `float`. Same shape as

457 `sampled_candidates`. The expected counts under the sampling distribution

458 of each of `sampled_candidates`. All returned values are 1.0.

459 """

460 seed1, seed2 = random_seed.get_seed(seed)

461 return gen_candidate_sampling_ops.all_candidate_sampler(

462 true_classes, num_true, num_sampled, unique, seed=seed1, seed2=seed2,

463 name=name)

464

465

466@tf_export('nn.compute_accidental_hits')

467@dispatch.add_dispatch_support

468def compute_accidental_hits(true_classes, sampled_candidates, num_true,

469 seed=None, name=None):

470 """Compute the position ids in `sampled_candidates` matching `true_classes`.

471

472 In Candidate Sampling, this operation facilitates virtually removing

473 sampled classes which happen to match target classes. This is done

474 in Sampled Softmax and Sampled Logistic.

475

476 See our [Candidate Sampling Algorithms

477 Reference](http://www.tensorflow.org/extras/candidate_sampling.pdf).

478

479 We presuppose that the `sampled_candidates` are unique.

480

481 We call it an 'accidental hit' when one of the target classes

482 matches one of the sampled classes. This operation reports

483 accidental hits as triples `(index, id, weight)`, where `index`

484 represents the row number in `true_classes`, `id` represents the

485 position in `sampled_candidates`, and weight is `-FLOAT_MAX`.

486

487 The result of this op should be passed through a `sparse_to_dense`

488 operation, then added to the logits of the sampled classes. This

489 removes the contradictory effect of accidentally sampling the true

490 target classes as noise classes for the same example.

491

492 Args:

493 true_classes: A `Tensor` of type `int64` and shape `[batch_size,

494 num_true]`. The target classes.

495 sampled_candidates: A tensor of type `int64` and shape `[num_sampled]`.

496 The sampled_candidates output of CandidateSampler.

497 num_true: An `int`. The number of target classes per training example.

498 seed: An `int`. An operation-specific seed. Default is 0.

499 name: A name for the operation (optional).

500

501 Returns:

502 indices: A `Tensor` of type `int32` and shape `[num_accidental_hits]`.

503 Values indicate rows in `true_classes`.

504 ids: A `Tensor` of type `int64` and shape `[num_accidental_hits]`.

505 Values indicate positions in `sampled_candidates`.

506 weights: A `Tensor` of type `float` and shape `[num_accidental_hits]`.

507 Each value is `-FLOAT_MAX`.

508

509 """

510 seed1, seed2 = random_seed.get_seed(seed)

511 return gen_candidate_sampling_ops.compute_accidental_hits(

512 true_classes, sampled_candidates, num_true, seed=seed1, seed2=seed2,

513 name=name)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tensorflow/python/ops/candidate_sampling_ops.py: 67%

42 statements