Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/lib/_arraysetops_impl.py: 21%

280 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-09 06:12 +0000

1""" 

2Set operations for arrays based on sorting. 

3 

4Notes 

5----- 

6 

7For floating point arrays, inaccurate results may appear due to usual round-off 

8and floating point comparison issues. 

9 

10Speed could be gained in some operations by an implementation of 

11`numpy.sort`, that can provide directly the permutation vectors, thus avoiding 

12calls to `numpy.argsort`. 

13 

14Original author: Robert Cimrman 

15 

16""" 

17import functools 

18import warnings 

19from typing import NamedTuple 

20 

21import numpy as np 

22from numpy._core import overrides 

23from numpy._core._multiarray_umath import _array_converter 

24 

25 

26array_function_dispatch = functools.partial( 

27 overrides.array_function_dispatch, module='numpy') 

28 

29 

30__all__ = [ 

31 "ediff1d", "in1d", "intersect1d", "isin", "setdiff1d", "setxor1d", 

32 "union1d", "unique", "unique_all", "unique_counts", "unique_inverse", 

33 "unique_values" 

34] 

35 

36 

37def _ediff1d_dispatcher(ary, to_end=None, to_begin=None): 

38 return (ary, to_end, to_begin) 

39 

40 

41@array_function_dispatch(_ediff1d_dispatcher) 

42def ediff1d(ary, to_end=None, to_begin=None): 

43 """ 

44 The differences between consecutive elements of an array. 

45 

46 Parameters 

47 ---------- 

48 ary : array_like 

49 If necessary, will be flattened before the differences are taken. 

50 to_end : array_like, optional 

51 Number(s) to append at the end of the returned differences. 

52 to_begin : array_like, optional 

53 Number(s) to prepend at the beginning of the returned differences. 

54 

55 Returns 

56 ------- 

57 ediff1d : ndarray 

58 The differences. Loosely, this is ``ary.flat[1:] - ary.flat[:-1]``. 

59 

60 See Also 

61 -------- 

62 diff, gradient 

63 

64 Notes 

65 ----- 

66 When applied to masked arrays, this function drops the mask information 

67 if the `to_begin` and/or `to_end` parameters are used. 

68 

69 Examples 

70 -------- 

71 >>> x = np.array([1, 2, 4, 7, 0]) 

72 >>> np.ediff1d(x) 

73 array([ 1, 2, 3, -7]) 

74 

75 >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99])) 

76 array([-99, 1, 2, ..., -7, 88, 99]) 

77 

78 The returned array is always 1D. 

79 

80 >>> y = [[1, 2, 4], [1, 6, 24]] 

81 >>> np.ediff1d(y) 

82 array([ 1, 2, -3, 5, 18]) 

83 

84 """ 

85 conv = _array_converter(ary) 

86 # Convert to (any) array and ravel: 

87 ary = conv[0].ravel() 

88 

89 # enforce that the dtype of `ary` is used for the output 

90 dtype_req = ary.dtype 

91 

92 # fast track default case 

93 if to_begin is None and to_end is None: 

94 return ary[1:] - ary[:-1] 

95 

96 if to_begin is None: 

97 l_begin = 0 

98 else: 

99 to_begin = np.asanyarray(to_begin) 

100 if not np.can_cast(to_begin, dtype_req, casting="same_kind"): 

101 raise TypeError("dtype of `to_begin` must be compatible " 

102 "with input `ary` under the `same_kind` rule.") 

103 

104 to_begin = to_begin.ravel() 

105 l_begin = len(to_begin) 

106 

107 if to_end is None: 

108 l_end = 0 

109 else: 

110 to_end = np.asanyarray(to_end) 

111 if not np.can_cast(to_end, dtype_req, casting="same_kind"): 

112 raise TypeError("dtype of `to_end` must be compatible " 

113 "with input `ary` under the `same_kind` rule.") 

114 

115 to_end = to_end.ravel() 

116 l_end = len(to_end) 

117 

118 # do the calculation in place and copy to_begin and to_end 

119 l_diff = max(len(ary) - 1, 0) 

120 result = np.empty_like(ary, shape=l_diff + l_begin + l_end) 

121 

122 if l_begin > 0: 

123 result[:l_begin] = to_begin 

124 if l_end > 0: 

125 result[l_begin + l_diff:] = to_end 

126 np.subtract(ary[1:], ary[:-1], result[l_begin:l_begin + l_diff]) 

127 

128 return conv.wrap(result) 

129 

130 

131def _unpack_tuple(x): 

132 """ Unpacks one-element tuples for use as return values """ 

133 if len(x) == 1: 

134 return x[0] 

135 else: 

136 return x 

137 

138 

139def _unique_dispatcher(ar, return_index=None, return_inverse=None, 

140 return_counts=None, axis=None, *, equal_nan=None): 

141 return (ar,) 

142 

143 

144@array_function_dispatch(_unique_dispatcher) 

145def unique(ar, return_index=False, return_inverse=False, 

146 return_counts=False, axis=None, *, equal_nan=True): 

147 """ 

148 Find the unique elements of an array. 

149 

150 Returns the sorted unique elements of an array. There are three optional 

151 outputs in addition to the unique elements: 

152 

153 * the indices of the input array that give the unique values 

154 * the indices of the unique array that reconstruct the input array 

155 * the number of times each unique value comes up in the input array 

156 

157 Parameters 

158 ---------- 

159 ar : array_like 

160 Input array. Unless `axis` is specified, this will be flattened if it 

161 is not already 1-D. 

162 return_index : bool, optional 

163 If True, also return the indices of `ar` (along the specified axis, 

164 if provided, or in the flattened array) that result in the unique array. 

165 return_inverse : bool, optional 

166 If True, also return the indices of the unique array (for the specified 

167 axis, if provided) that can be used to reconstruct `ar`. 

168 return_counts : bool, optional 

169 If True, also return the number of times each unique item appears 

170 in `ar`. 

171 axis : int or None, optional 

172 The axis to operate on. If None, `ar` will be flattened. If an integer, 

173 the subarrays indexed by the given axis will be flattened and treated 

174 as the elements of a 1-D array with the dimension of the given axis, 

175 see the notes for more details. Object arrays or structured arrays 

176 that contain objects are not supported if the `axis` kwarg is used. The 

177 default is None. 

178 

179 .. versionadded:: 1.13.0 

180 

181 equal_nan : bool, optional 

182 If True, collapses multiple NaN values in the return array into one. 

183 

184 .. versionadded:: 1.24 

185 

186 Returns 

187 ------- 

188 unique : ndarray 

189 The sorted unique values. 

190 unique_indices : ndarray, optional 

191 The indices of the first occurrences of the unique values in the 

192 original array. Only provided if `return_index` is True. 

193 unique_inverse : ndarray, optional 

194 The indices to reconstruct the original array from the 

195 unique array. Only provided if `return_inverse` is True. 

196 unique_counts : ndarray, optional 

197 The number of times each of the unique values comes up in the 

198 original array. Only provided if `return_counts` is True. 

199 

200 .. versionadded:: 1.9.0 

201 

202 See Also 

203 -------- 

204 repeat : Repeat elements of an array. 

205 

206 Notes 

207 ----- 

208 When an axis is specified the subarrays indexed by the axis are sorted. 

209 This is done by making the specified axis the first dimension of the array 

210 (move the axis to the first dimension to keep the order of the other axes) 

211 and then flattening the subarrays in C order. The flattened subarrays are 

212 then viewed as a structured type with each element given a label, with the 

213 effect that we end up with a 1-D array of structured types that can be 

214 treated in the same way as any other 1-D array. The result is that the 

215 flattened subarrays are sorted in lexicographic order starting with the 

216 first element. 

217 

218 .. versionchanged: 1.21 

219 If nan values are in the input array, a single nan is put 

220 to the end of the sorted unique values. 

221 

222 Also for complex arrays all NaN values are considered equivalent 

223 (no matter whether the NaN is in the real or imaginary part). 

224 As the representant for the returned array the smallest one in the 

225 lexicographical order is chosen - see np.sort for how the lexicographical 

226 order is defined for complex arrays. 

227 

228 .. versionchanged: 2.0 

229 For multi-dimensional inputs, ``unique_inverse`` is reshaped 

230 such that the input can be reconstructed using 

231 ``np.take(unique, unique_inverse)`` when ``axis = None``, and 

232 ``np.take_along_axis(unique, unique_inverse, axis=axis)`` otherwise. 

233 

234 Examples 

235 -------- 

236 >>> np.unique([1, 1, 2, 2, 3, 3]) 

237 array([1, 2, 3]) 

238 >>> a = np.array([[1, 1], [2, 3]]) 

239 >>> np.unique(a) 

240 array([1, 2, 3]) 

241 

242 Return the unique rows of a 2D array 

243 

244 >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]]) 

245 >>> np.unique(a, axis=0) 

246 array([[1, 0, 0], [2, 3, 4]]) 

247 

248 Return the indices of the original array that give the unique values: 

249 

250 >>> a = np.array(['a', 'b', 'b', 'c', 'a']) 

251 >>> u, indices = np.unique(a, return_index=True) 

252 >>> u 

253 array(['a', 'b', 'c'], dtype='<U1') 

254 >>> indices 

255 array([0, 1, 3]) 

256 >>> a[indices] 

257 array(['a', 'b', 'c'], dtype='<U1') 

258 

259 Reconstruct the input array from the unique values and inverse: 

260 

261 >>> a = np.array([1, 2, 6, 4, 2, 3, 2]) 

262 >>> u, indices = np.unique(a, return_inverse=True) 

263 >>> u 

264 array([1, 2, 3, 4, 6]) 

265 >>> indices 

266 array([0, 1, 4, 3, 1, 2, 1]) 

267 >>> u[indices] 

268 array([1, 2, 6, 4, 2, 3, 2]) 

269 

270 Reconstruct the input values from the unique values and counts: 

271 

272 >>> a = np.array([1, 2, 6, 4, 2, 3, 2]) 

273 >>> values, counts = np.unique(a, return_counts=True) 

274 >>> values 

275 array([1, 2, 3, 4, 6]) 

276 >>> counts 

277 array([1, 3, 1, 1, 1]) 

278 >>> np.repeat(values, counts) 

279 array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved 

280 

281 """ 

282 ar = np.asanyarray(ar) 

283 if axis is None: 

284 ret = _unique1d(ar, return_index, return_inverse, return_counts, 

285 equal_nan=equal_nan, inverse_shape=ar.shape) 

286 return _unpack_tuple(ret) 

287 

288 # axis was specified and not None 

289 try: 

290 ar = np.moveaxis(ar, axis, 0) 

291 except np.exceptions.AxisError: 

292 # this removes the "axis1" or "axis2" prefix from the error message 

293 raise np.exceptions.AxisError(axis, ar.ndim) from None 

294 inverse_shape = [1] * ar.ndim 

295 inverse_shape[axis] = ar.shape[0] 

296 

297 # Must reshape to a contiguous 2D array for this to work... 

298 orig_shape, orig_dtype = ar.shape, ar.dtype 

299 ar = ar.reshape(orig_shape[0], np.prod(orig_shape[1:], dtype=np.intp)) 

300 ar = np.ascontiguousarray(ar) 

301 dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])] 

302 

303 # At this point, `ar` has shape `(n, m)`, and `dtype` is a structured 

304 # data type with `m` fields where each field has the data type of `ar`. 

305 # In the following, we create the array `consolidated`, which has 

306 # shape `(n,)` with data type `dtype`. 

307 try: 

308 if ar.shape[1] > 0: 

309 consolidated = ar.view(dtype) 

310 else: 

311 # If ar.shape[1] == 0, then dtype will be `np.dtype([])`, which is 

312 # a data type with itemsize 0, and the call `ar.view(dtype)` will 

313 # fail. Instead, we'll use `np.empty` to explicitly create the 

314 # array with shape `(len(ar),)`. Because `dtype` in this case has 

315 # itemsize 0, the total size of the result is still 0 bytes. 

316 consolidated = np.empty(len(ar), dtype=dtype) 

317 except TypeError as e: 

318 # There's no good way to do this for object arrays, etc... 

319 msg = 'The axis argument to unique is not supported for dtype {dt}' 

320 raise TypeError(msg.format(dt=ar.dtype)) from e 

321 

322 def reshape_uniq(uniq): 

323 n = len(uniq) 

324 uniq = uniq.view(orig_dtype) 

325 uniq = uniq.reshape(n, *orig_shape[1:]) 

326 uniq = np.moveaxis(uniq, 0, axis) 

327 return uniq 

328 

329 output = _unique1d(consolidated, return_index, 

330 return_inverse, return_counts, 

331 equal_nan=equal_nan, inverse_shape=inverse_shape) 

332 output = (reshape_uniq(output[0]),) + output[1:] 

333 return _unpack_tuple(output) 

334 

335 

336def _unique1d(ar, return_index=False, return_inverse=False, 

337 return_counts=False, *, equal_nan=True, inverse_shape=None): 

338 """ 

339 Find the unique elements of an array, ignoring shape. 

340 """ 

341 ar = np.asanyarray(ar).flatten() 

342 

343 optional_indices = return_index or return_inverse 

344 

345 if optional_indices: 

346 perm = ar.argsort(kind='mergesort' if return_index else 'quicksort') 

347 aux = ar[perm] 

348 else: 

349 ar.sort() 

350 aux = ar 

351 mask = np.empty(aux.shape, dtype=np.bool) 

352 mask[:1] = True 

353 if (equal_nan and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and 

354 np.isnan(aux[-1])): 

355 if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent 

356 aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left') 

357 else: 

358 aux_firstnan = np.searchsorted(aux, aux[-1], side='left') 

359 if aux_firstnan > 0: 

360 mask[1:aux_firstnan] = ( 

361 aux[1:aux_firstnan] != aux[:aux_firstnan - 1]) 

362 mask[aux_firstnan] = True 

363 mask[aux_firstnan + 1:] = False 

364 else: 

365 mask[1:] = aux[1:] != aux[:-1] 

366 

367 ret = (aux[mask],) 

368 if return_index: 

369 ret += (perm[mask],) 

370 if return_inverse: 

371 imask = np.cumsum(mask) - 1 

372 inv_idx = np.empty(mask.shape, dtype=np.intp) 

373 inv_idx[perm] = imask 

374 ret += (inv_idx.reshape(inverse_shape),) 

375 if return_counts: 

376 idx = np.concatenate(np.nonzero(mask) + ([mask.size],)) 

377 ret += (np.diff(idx),) 

378 return ret 

379 

380 

381# Array API set functions 

382 

383class UniqueAllResult(NamedTuple): 

384 values: np.ndarray 

385 indices: np.ndarray 

386 inverse_indices: np.ndarray 

387 counts: np.ndarray 

388 

389 

390class UniqueCountsResult(NamedTuple): 

391 values: np.ndarray 

392 counts: np.ndarray 

393 

394 

395class UniqueInverseResult(NamedTuple): 

396 values: np.ndarray 

397 inverse_indices: np.ndarray 

398 

399 

400def _unique_all_dispatcher(x, /): 

401 return (x,) 

402 

403 

404@array_function_dispatch(_unique_all_dispatcher) 

405def unique_all(x): 

406 """ 

407 Find the unique elements of an array, and counts, inverse and indices. 

408 

409 This function is an Array API compatible alternative to: 

410 

411 >>> x = np.array([1, 1, 2]) 

412 >>> np.unique(x, return_index=True, return_inverse=True, 

413 ... return_counts=True, equal_nan=False) 

414 (array([1, 2]), array([0, 2]), array([0, 0, 1]), array([2, 1])) 

415 

416 Parameters 

417 ---------- 

418 x : array_like 

419 Input array. It will be flattened if it is not already 1-D. 

420 

421 Returns 

422 ------- 

423 out : namedtuple 

424 The result containing: 

425 

426 * values - The unique elements of an input array. 

427 * indices - The first occurring indices for each unique element. 

428 * inverse_indices - The indices from the set of unique elements 

429 that reconstruct `x`. 

430 * counts - The corresponding counts for each unique element. 

431 

432 See Also 

433 -------- 

434 unique : Find the unique elements of an array. 

435 

436 """ 

437 result = unique( 

438 x, 

439 return_index=True, 

440 return_inverse=True, 

441 return_counts=True, 

442 equal_nan=False 

443 ) 

444 return UniqueAllResult(*result) 

445 

446 

447def _unique_counts_dispatcher(x, /): 

448 return (x,) 

449 

450 

451@array_function_dispatch(_unique_counts_dispatcher) 

452def unique_counts(x): 

453 """ 

454 Find the unique elements and counts of an input array `x`. 

455 

456 This function is an Array API compatible alternative to: 

457 

458 >>> x = np.array([1, 1, 2]) 

459 >>> np.unique(x, return_counts=True, equal_nan=False) 

460 (array([1, 2]), array([2, 1])) 

461 

462 Parameters 

463 ---------- 

464 x : array_like 

465 Input array. It will be flattened if it is not already 1-D. 

466 

467 Returns 

468 ------- 

469 out : namedtuple 

470 The result containing: 

471 

472 * values - The unique elements of an input array. 

473 * counts - The corresponding counts for each unique element. 

474 

475 See Also 

476 -------- 

477 unique : Find the unique elements of an array. 

478 

479 """ 

480 result = unique( 

481 x, 

482 return_index=False, 

483 return_inverse=False, 

484 return_counts=True, 

485 equal_nan=False 

486 ) 

487 return UniqueCountsResult(*result) 

488 

489 

490def _unique_inverse_dispatcher(x, /): 

491 return (x,) 

492 

493 

494@array_function_dispatch(_unique_inverse_dispatcher) 

495def unique_inverse(x): 

496 """ 

497 Find the unique elements of `x` and indices to reconstruct `x`. 

498 

499 This function is Array API compatible alternative to: 

500 

501 >>> x = np.array([1, 1, 2]) 

502 >>> np.unique(x, return_inverse=True, equal_nan=False) 

503 (array([1, 2]), array([0, 0, 1])) 

504 

505 Parameters 

506 ---------- 

507 x : array_like 

508 Input array. It will be flattened if it is not already 1-D. 

509 

510 Returns 

511 ------- 

512 out : namedtuple 

513 The result containing: 

514 

515 * values - The unique elements of an input array. 

516 * inverse_indices - The indices from the set of unique elements 

517 that reconstruct `x`. 

518 

519 See Also 

520 -------- 

521 unique : Find the unique elements of an array. 

522 

523 """ 

524 result = unique( 

525 x, 

526 return_index=False, 

527 return_inverse=True, 

528 return_counts=False, 

529 equal_nan=False 

530 ) 

531 return UniqueInverseResult(*result) 

532 

533 

534def _unique_values_dispatcher(x, /): 

535 return (x,) 

536 

537 

538@array_function_dispatch(_unique_values_dispatcher) 

539def unique_values(x): 

540 """ 

541 Returns the unique elements of an input array `x`. 

542 

543 This function is Array API compatible alternative to: 

544 

545 >>> x = np.array([1, 1, 2]) 

546 >>> np.unique(x, equal_nan=False) 

547 array([1, 2]) 

548 

549 Parameters 

550 ---------- 

551 x : array_like 

552 Input array. It will be flattened if it is not already 1-D. 

553 

554 Returns 

555 ------- 

556 out : ndarray 

557 The unique elements of an input array. 

558 

559 See Also 

560 -------- 

561 unique : Find the unique elements of an array. 

562 

563 """ 

564 return unique( 

565 x, 

566 return_index=False, 

567 return_inverse=False, 

568 return_counts=False, 

569 equal_nan=False 

570 ) 

571 

572 

573def _intersect1d_dispatcher( 

574 ar1, ar2, assume_unique=None, return_indices=None): 

575 return (ar1, ar2) 

576 

577 

578@array_function_dispatch(_intersect1d_dispatcher) 

579def intersect1d(ar1, ar2, assume_unique=False, return_indices=False): 

580 """ 

581 Find the intersection of two arrays. 

582 

583 Return the sorted, unique values that are in both of the input arrays. 

584 

585 Parameters 

586 ---------- 

587 ar1, ar2 : array_like 

588 Input arrays. Will be flattened if not already 1D. 

589 assume_unique : bool 

590 If True, the input arrays are both assumed to be unique, which 

591 can speed up the calculation. If True but ``ar1`` or ``ar2`` are not 

592 unique, incorrect results and out-of-bounds indices could result. 

593 Default is False. 

594 return_indices : bool 

595 If True, the indices which correspond to the intersection of the two 

596 arrays are returned. The first instance of a value is used if there are 

597 multiple. Default is False. 

598 

599 .. versionadded:: 1.15.0 

600 

601 Returns 

602 ------- 

603 intersect1d : ndarray 

604 Sorted 1D array of common and unique elements. 

605 comm1 : ndarray 

606 The indices of the first occurrences of the common values in `ar1`. 

607 Only provided if `return_indices` is True. 

608 comm2 : ndarray 

609 The indices of the first occurrences of the common values in `ar2`. 

610 Only provided if `return_indices` is True. 

611 

612 Examples 

613 -------- 

614 >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1]) 

615 array([1, 3]) 

616 

617 To intersect more than two arrays, use functools.reduce: 

618 

619 >>> from functools import reduce 

620 >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) 

621 array([3]) 

622 

623 To return the indices of the values common to the input arrays 

624 along with the intersected values: 

625 

626 >>> x = np.array([1, 1, 2, 3, 4]) 

627 >>> y = np.array([2, 1, 4, 6]) 

628 >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True) 

629 >>> x_ind, y_ind 

630 (array([0, 2, 4]), array([1, 0, 2])) 

631 >>> xy, x[x_ind], y[y_ind] 

632 (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4])) 

633 

634 """ 

635 ar1 = np.asanyarray(ar1) 

636 ar2 = np.asanyarray(ar2) 

637 

638 if not assume_unique: 

639 if return_indices: 

640 ar1, ind1 = unique(ar1, return_index=True) 

641 ar2, ind2 = unique(ar2, return_index=True) 

642 else: 

643 ar1 = unique(ar1) 

644 ar2 = unique(ar2) 

645 else: 

646 ar1 = ar1.ravel() 

647 ar2 = ar2.ravel() 

648 

649 aux = np.concatenate((ar1, ar2)) 

650 if return_indices: 

651 aux_sort_indices = np.argsort(aux, kind='mergesort') 

652 aux = aux[aux_sort_indices] 

653 else: 

654 aux.sort() 

655 

656 mask = aux[1:] == aux[:-1] 

657 int1d = aux[:-1][mask] 

658 

659 if return_indices: 

660 ar1_indices = aux_sort_indices[:-1][mask] 

661 ar2_indices = aux_sort_indices[1:][mask] - ar1.size 

662 if not assume_unique: 

663 ar1_indices = ind1[ar1_indices] 

664 ar2_indices = ind2[ar2_indices] 

665 

666 return int1d, ar1_indices, ar2_indices 

667 else: 

668 return int1d 

669 

670 

671def _setxor1d_dispatcher(ar1, ar2, assume_unique=None): 

672 return (ar1, ar2) 

673 

674 

675@array_function_dispatch(_setxor1d_dispatcher) 

676def setxor1d(ar1, ar2, assume_unique=False): 

677 """ 

678 Find the set exclusive-or of two arrays. 

679 

680 Return the sorted, unique values that are in only one (not both) of the 

681 input arrays. 

682 

683 Parameters 

684 ---------- 

685 ar1, ar2 : array_like 

686 Input arrays. 

687 assume_unique : bool 

688 If True, the input arrays are both assumed to be unique, which 

689 can speed up the calculation. Default is False. 

690 

691 Returns 

692 ------- 

693 setxor1d : ndarray 

694 Sorted 1D array of unique values that are in only one of the input 

695 arrays. 

696 

697 Examples 

698 -------- 

699 >>> a = np.array([1, 2, 3, 2, 4]) 

700 >>> b = np.array([2, 3, 5, 7, 5]) 

701 >>> np.setxor1d(a,b) 

702 array([1, 4, 5, 7]) 

703 

704 """ 

705 if not assume_unique: 

706 ar1 = unique(ar1) 

707 ar2 = unique(ar2) 

708 

709 aux = np.concatenate((ar1, ar2)) 

710 if aux.size == 0: 

711 return aux 

712 

713 aux.sort() 

714 flag = np.concatenate(([True], aux[1:] != aux[:-1], [True])) 

715 return aux[flag[1:] & flag[:-1]] 

716 

717 

718def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None, *, 

719 kind=None): 

720 return (ar1, ar2) 

721 

722 

723@array_function_dispatch(_in1d_dispatcher) 

724def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None): 

725 """ 

726 Test whether each element of a 1-D array is also present in a second array. 

727 

728 .. deprecated:: 2.0 

729 Use :func:`isin` instead of `in1d` for new code. 

730 

731 Returns a boolean array the same length as `ar1` that is True 

732 where an element of `ar1` is in `ar2` and False otherwise. 

733 

734 Parameters 

735 ---------- 

736 ar1 : (M,) array_like 

737 Input array. 

738 ar2 : array_like 

739 The values against which to test each value of `ar1`. 

740 assume_unique : bool, optional 

741 If True, the input arrays are both assumed to be unique, which 

742 can speed up the calculation. Default is False. 

743 invert : bool, optional 

744 If True, the values in the returned array are inverted (that is, 

745 False where an element of `ar1` is in `ar2` and True otherwise). 

746 Default is False. ``np.in1d(a, b, invert=True)`` is equivalent 

747 to (but is faster than) ``np.invert(in1d(a, b))``. 

748 kind : {None, 'sort', 'table'}, optional 

749 The algorithm to use. This will not affect the final result, 

750 but will affect the speed and memory use. The default, None, 

751 will select automatically based on memory considerations. 

752 

753 * If 'sort', will use a mergesort-based approach. This will have 

754 a memory usage of roughly 6 times the sum of the sizes of 

755 `ar1` and `ar2`, not accounting for size of dtypes. 

756 * If 'table', will use a lookup table approach similar 

757 to a counting sort. This is only available for boolean and 

758 integer arrays. This will have a memory usage of the 

759 size of `ar1` plus the max-min value of `ar2`. `assume_unique` 

760 has no effect when the 'table' option is used. 

761 * If None, will automatically choose 'table' if 

762 the required memory allocation is less than or equal to 

763 6 times the sum of the sizes of `ar1` and `ar2`, 

764 otherwise will use 'sort'. This is done to not use 

765 a large amount of memory by default, even though 

766 'table' may be faster in most cases. If 'table' is chosen, 

767 `assume_unique` will have no effect. 

768 

769 .. versionadded:: 1.8.0 

770 

771 Returns 

772 ------- 

773 in1d : (M,) ndarray, bool 

774 The values `ar1[in1d]` are in `ar2`. 

775 

776 See Also 

777 -------- 

778 isin : Version of this function that preserves the 

779 shape of ar1. 

780 

781 Notes 

782 ----- 

783 `in1d` can be considered as an element-wise function version of the 

784 python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly 

785 equivalent to ``np.array([item in b for item in a])``. 

786 However, this idea fails if `ar2` is a set, or similar (non-sequence) 

787 container: As ``ar2`` is converted to an array, in those cases 

788 ``asarray(ar2)`` is an object array rather than the expected array of 

789 contained values. 

790 

791 Using ``kind='table'`` tends to be faster than `kind='sort'` if the 

792 following relationship is true: 

793 ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``, 

794 but may use greater memory. The default value for `kind` will 

795 be automatically selected based only on memory usage, so one may 

796 manually set ``kind='table'`` if memory constraints can be relaxed. 

797 

798 .. versionadded:: 1.4.0 

799 

800 Examples 

801 -------- 

802 >>> test = np.array([0, 1, 2, 5, 0]) 

803 >>> states = [0, 2] 

804 >>> mask = np.in1d(test, states) 

805 >>> mask 

806 array([ True, False, True, False, True]) 

807 >>> test[mask] 

808 array([0, 2, 0]) 

809 >>> mask = np.in1d(test, states, invert=True) 

810 >>> mask 

811 array([False, True, False, True, False]) 

812 >>> test[mask] 

813 array([1, 5]) 

814 """ 

815 

816 # Deprecated in NumPy 2.0, 2023-08-18 

817 warnings.warn( 

818 "`in1d` is deprecated. Use `np.isin` instead.", 

819 DeprecationWarning, 

820 stacklevel=2 

821 ) 

822 

823 return _in1d(ar1, ar2, assume_unique, invert, kind=kind) 

824 

825 

826def _in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None): 

827 # Ravel both arrays, behavior for the first array could be different 

828 ar1 = np.asarray(ar1).ravel() 

829 ar2 = np.asarray(ar2).ravel() 

830 

831 # Ensure that iteration through object arrays yields size-1 arrays 

832 if ar2.dtype == object: 

833 ar2 = ar2.reshape(-1, 1) 

834 

835 if kind not in {None, 'sort', 'table'}: 

836 raise ValueError( 

837 f"Invalid kind: '{kind}'. Please use None, 'sort' or 'table'.") 

838 

839 # Can use the table method if all arrays are integers or boolean: 

840 is_int_arrays = all(ar.dtype.kind in ("u", "i", "b") for ar in (ar1, ar2)) 

841 use_table_method = is_int_arrays and kind in {None, 'table'} 

842 

843 if use_table_method: 

844 if ar2.size == 0: 

845 if invert: 

846 return np.ones_like(ar1, dtype=bool) 

847 else: 

848 return np.zeros_like(ar1, dtype=bool) 

849 

850 # Convert booleans to uint8 so we can use the fast integer algorithm 

851 if ar1.dtype == bool: 

852 ar1 = ar1.astype(np.uint8) 

853 if ar2.dtype == bool: 

854 ar2 = ar2.astype(np.uint8) 

855 

856 ar2_min = np.min(ar2) 

857 ar2_max = np.max(ar2) 

858 

859 ar2_range = int(ar2_max) - int(ar2_min) 

860 

861 # Constraints on whether we can actually use the table method: 

862 # 1. Assert memory usage is not too large 

863 below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size) 

864 # 2. Check overflows for (ar2 - ar2_min); dtype=ar2.dtype 

865 range_safe_from_overflow = ar2_range <= np.iinfo(ar2.dtype).max 

866 # 3. Check overflows for (ar1 - ar2_min); dtype=ar1.dtype 

867 if ar1.size > 0: 

868 ar1_min = np.min(ar1) 

869 ar1_max = np.max(ar1) 

870 

871 # After masking, the range of ar1 is guaranteed to be 

872 # within the range of ar2: 

873 ar1_upper = min(int(ar1_max), int(ar2_max)) 

874 ar1_lower = max(int(ar1_min), int(ar2_min)) 

875 

876 range_safe_from_overflow &= all(( 

877 ar1_upper - int(ar2_min) <= np.iinfo(ar1.dtype).max, 

878 ar1_lower - int(ar2_min) >= np.iinfo(ar1.dtype).min 

879 )) 

880 

881 # Optimal performance is for approximately 

882 # log10(size) > (log10(range) - 2.27) / 0.927. 

883 # However, here we set the requirement that by default 

884 # the intermediate array can only be 6x 

885 # the combined memory allocation of the original 

886 # arrays. See discussion on  

887 # https://github.com/numpy/numpy/pull/12065. 

888 

889 if ( 

890 range_safe_from_overflow and 

891 (below_memory_constraint or kind == 'table') 

892 ): 

893 

894 if invert: 

895 outgoing_array = np.ones_like(ar1, dtype=bool) 

896 else: 

897 outgoing_array = np.zeros_like(ar1, dtype=bool) 

898 

899 # Make elements 1 where the integer exists in ar2 

900 if invert: 

901 isin_helper_ar = np.ones(ar2_range + 1, dtype=bool) 

902 isin_helper_ar[ar2 - ar2_min] = 0 

903 else: 

904 isin_helper_ar = np.zeros(ar2_range + 1, dtype=bool) 

905 isin_helper_ar[ar2 - ar2_min] = 1 

906 

907 # Mask out elements we know won't work 

908 basic_mask = (ar1 <= ar2_max) & (ar1 >= ar2_min) 

909 outgoing_array[basic_mask] = isin_helper_ar[ar1[basic_mask] - 

910 ar2_min] 

911 

912 return outgoing_array 

913 elif kind == 'table': # not range_safe_from_overflow 

914 raise RuntimeError( 

915 "You have specified kind='table', " 

916 "but the range of values in `ar2` or `ar1` exceed the " 

917 "maximum integer of the datatype. " 

918 "Please set `kind` to None or 'sort'." 

919 ) 

920 elif kind == 'table': 

921 raise ValueError( 

922 "The 'table' method is only " 

923 "supported for boolean or integer arrays. " 

924 "Please select 'sort' or None for kind." 

925 ) 

926 

927 

928 # Check if one of the arrays may contain arbitrary objects 

929 contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject 

930 

931 # This code is run when 

932 # a) the first condition is true, making the code significantly faster 

933 # b) the second condition is true (i.e. `ar1` or `ar2` may contain 

934 # arbitrary objects), since then sorting is not guaranteed to work 

935 if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object: 

936 if invert: 

937 mask = np.ones(len(ar1), dtype=bool) 

938 for a in ar2: 

939 mask &= (ar1 != a) 

940 else: 

941 mask = np.zeros(len(ar1), dtype=bool) 

942 for a in ar2: 

943 mask |= (ar1 == a) 

944 return mask 

945 

946 # Otherwise use sorting 

947 if not assume_unique: 

948 ar1, rev_idx = np.unique(ar1, return_inverse=True) 

949 ar2 = np.unique(ar2) 

950 

951 ar = np.concatenate((ar1, ar2)) 

952 # We need this to be a stable sort, so always use 'mergesort' 

953 # here. The values from the first array should always come before 

954 # the values from the second array. 

955 order = ar.argsort(kind='mergesort') 

956 sar = ar[order] 

957 if invert: 

958 bool_ar = (sar[1:] != sar[:-1]) 

959 else: 

960 bool_ar = (sar[1:] == sar[:-1]) 

961 flag = np.concatenate((bool_ar, [invert])) 

962 ret = np.empty(ar.shape, dtype=bool) 

963 ret[order] = flag 

964 

965 if assume_unique: 

966 return ret[:len(ar1)] 

967 else: 

968 return ret[rev_idx] 

969 

970 

971def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None, 

972 *, kind=None): 

973 return (element, test_elements) 

974 

975 

976@array_function_dispatch(_isin_dispatcher) 

977def isin(element, test_elements, assume_unique=False, invert=False, *, 

978 kind=None): 

979 """ 

980 Calculates ``element in test_elements``, broadcasting over `element` only. 

981 Returns a boolean array of the same shape as `element` that is True 

982 where an element of `element` is in `test_elements` and False otherwise. 

983 

984 Parameters 

985 ---------- 

986 element : array_like 

987 Input array. 

988 test_elements : array_like 

989 The values against which to test each value of `element`. 

990 This argument is flattened if it is an array or array_like. 

991 See notes for behavior with non-array-like parameters. 

992 assume_unique : bool, optional 

993 If True, the input arrays are both assumed to be unique, which 

994 can speed up the calculation. Default is False. 

995 invert : bool, optional 

996 If True, the values in the returned array are inverted, as if 

997 calculating `element not in test_elements`. Default is False. 

998 ``np.isin(a, b, invert=True)`` is equivalent to (but faster 

999 than) ``np.invert(np.isin(a, b))``. 

1000 kind : {None, 'sort', 'table'}, optional 

1001 The algorithm to use. This will not affect the final result, 

1002 but will affect the speed and memory use. The default, None, 

1003 will select automatically based on memory considerations. 

1004 

1005 * If 'sort', will use a mergesort-based approach. This will have 

1006 a memory usage of roughly 6 times the sum of the sizes of 

1007 `element` and `test_elements`, not accounting for size of dtypes. 

1008 * If 'table', will use a lookup table approach similar 

1009 to a counting sort. This is only available for boolean and 

1010 integer arrays. This will have a memory usage of the 

1011 size of `element` plus the max-min value of `test_elements`. 

1012 `assume_unique` has no effect when the 'table' option is used. 

1013 * If None, will automatically choose 'table' if 

1014 the required memory allocation is less than or equal to 

1015 6 times the sum of the sizes of `element` and `test_elements`, 

1016 otherwise will use 'sort'. This is done to not use 

1017 a large amount of memory by default, even though 

1018 'table' may be faster in most cases. If 'table' is chosen, 

1019 `assume_unique` will have no effect. 

1020 

1021 

1022 Returns 

1023 ------- 

1024 isin : ndarray, bool 

1025 Has the same shape as `element`. The values `element[isin]` 

1026 are in `test_elements`. 

1027 

1028 Notes 

1029 ----- 

1030 

1031 `isin` is an element-wise function version of the python keyword `in`. 

1032 ``isin(a, b)`` is roughly equivalent to 

1033 ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences. 

1034 

1035 `element` and `test_elements` are converted to arrays if they are not 

1036 already. If `test_elements` is a set (or other non-sequence collection) 

1037 it will be converted to an object array with one element, rather than an 

1038 array of the values contained in `test_elements`. This is a consequence 

1039 of the `array` constructor's way of handling non-sequence collections. 

1040 Converting the set to a list usually gives the desired behavior. 

1041 

1042 Using ``kind='table'`` tends to be faster than `kind='sort'` if the 

1043 following relationship is true: 

1044 ``log10(len(test_elements)) > 

1045 (log10(max(test_elements)-min(test_elements)) - 2.27) / 0.927``, 

1046 but may use greater memory. The default value for `kind` will 

1047 be automatically selected based only on memory usage, so one may 

1048 manually set ``kind='table'`` if memory constraints can be relaxed. 

1049 

1050 .. versionadded:: 1.13.0 

1051 

1052 Examples 

1053 -------- 

1054 >>> element = 2*np.arange(4).reshape((2, 2)) 

1055 >>> element 

1056 array([[0, 2], 

1057 [4, 6]]) 

1058 >>> test_elements = [1, 2, 4, 8] 

1059 >>> mask = np.isin(element, test_elements) 

1060 >>> mask 

1061 array([[False, True], 

1062 [ True, False]]) 

1063 >>> element[mask] 

1064 array([2, 4]) 

1065 

1066 The indices of the matched values can be obtained with `nonzero`: 

1067 

1068 >>> np.nonzero(mask) 

1069 (array([0, 1]), array([1, 0])) 

1070 

1071 The test can also be inverted: 

1072 

1073 >>> mask = np.isin(element, test_elements, invert=True) 

1074 >>> mask 

1075 array([[ True, False], 

1076 [False, True]]) 

1077 >>> element[mask] 

1078 array([0, 6]) 

1079 

1080 Because of how `array` handles sets, the following does not 

1081 work as expected: 

1082 

1083 >>> test_set = {1, 2, 4, 8} 

1084 >>> np.isin(element, test_set) 

1085 array([[False, False], 

1086 [False, False]]) 

1087 

1088 Casting the set to a list gives the expected result: 

1089 

1090 >>> np.isin(element, list(test_set)) 

1091 array([[False, True], 

1092 [ True, False]]) 

1093 """ 

1094 element = np.asarray(element) 

1095 return _in1d(element, test_elements, assume_unique=assume_unique, 

1096 invert=invert, kind=kind).reshape(element.shape) 

1097 

1098 

1099def _union1d_dispatcher(ar1, ar2): 

1100 return (ar1, ar2) 

1101 

1102 

1103@array_function_dispatch(_union1d_dispatcher) 

1104def union1d(ar1, ar2): 

1105 """ 

1106 Find the union of two arrays. 

1107 

1108 Return the unique, sorted array of values that are in either of the two 

1109 input arrays. 

1110 

1111 Parameters 

1112 ---------- 

1113 ar1, ar2 : array_like 

1114 Input arrays. They are flattened if they are not already 1D. 

1115 

1116 Returns 

1117 ------- 

1118 union1d : ndarray 

1119 Unique, sorted union of the input arrays. 

1120 

1121 Examples 

1122 -------- 

1123 >>> np.union1d([-1, 0, 1], [-2, 0, 2]) 

1124 array([-2, -1, 0, 1, 2]) 

1125 

1126 To find the union of more than two arrays, use functools.reduce: 

1127 

1128 >>> from functools import reduce 

1129 >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) 

1130 array([1, 2, 3, 4, 6]) 

1131 """ 

1132 return unique(np.concatenate((ar1, ar2), axis=None)) 

1133 

1134 

1135def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None): 

1136 return (ar1, ar2) 

1137 

1138 

1139@array_function_dispatch(_setdiff1d_dispatcher) 

1140def setdiff1d(ar1, ar2, assume_unique=False): 

1141 """ 

1142 Find the set difference of two arrays. 

1143 

1144 Return the unique values in `ar1` that are not in `ar2`. 

1145 

1146 Parameters 

1147 ---------- 

1148 ar1 : array_like 

1149 Input array. 

1150 ar2 : array_like 

1151 Input comparison array. 

1152 assume_unique : bool 

1153 If True, the input arrays are both assumed to be unique, which 

1154 can speed up the calculation. Default is False. 

1155 

1156 Returns 

1157 ------- 

1158 setdiff1d : ndarray 

1159 1D array of values in `ar1` that are not in `ar2`. The result 

1160 is sorted when `assume_unique=False`, but otherwise only sorted 

1161 if the input is sorted. 

1162 

1163 Examples 

1164 -------- 

1165 >>> a = np.array([1, 2, 3, 2, 4, 1]) 

1166 >>> b = np.array([3, 4, 5, 6]) 

1167 >>> np.setdiff1d(a, b) 

1168 array([1, 2]) 

1169 

1170 """ 

1171 if assume_unique: 

1172 ar1 = np.asarray(ar1).ravel() 

1173 else: 

1174 ar1 = unique(ar1) 

1175 ar2 = unique(ar2) 

1176 return ar1[_in1d(ar1, ar2, assume_unique=True, invert=True)]