Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/numpy/lib/_arraysetops_impl.py: 19%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

289 statements  

1""" 

2Set operations for arrays based on sorting. 

3 

4Notes 

5----- 

6 

7For floating point arrays, inaccurate results may appear due to usual round-off 

8and floating point comparison issues. 

9 

10Speed could be gained in some operations by an implementation of 

11`numpy.sort`, that can provide directly the permutation vectors, thus avoiding 

12calls to `numpy.argsort`. 

13 

14Original author: Robert Cimrman 

15 

16""" 

17import functools 

18from typing import NamedTuple 

19 

20import numpy as np 

21from numpy._core import overrides 

22from numpy._core._multiarray_umath import _array_converter, _unique_hash 

23from numpy.lib.array_utils import normalize_axis_index 

24 

25array_function_dispatch = functools.partial( 

26 overrides.array_function_dispatch, module='numpy') 

27 

28 

29__all__ = [ 

30 "ediff1d", "intersect1d", "isin", "setdiff1d", "setxor1d", 

31 "union1d", "unique", "unique_all", "unique_counts", "unique_inverse", 

32 "unique_values" 

33] 

34 

35 

36def _ediff1d_dispatcher(ary, to_end=None, to_begin=None): 

37 return (ary, to_end, to_begin) 

38 

39 

40@array_function_dispatch(_ediff1d_dispatcher) 

41def ediff1d(ary, to_end=None, to_begin=None): 

42 """ 

43 The differences between consecutive elements of an array. 

44 

45 Parameters 

46 ---------- 

47 ary : array_like 

48 If necessary, will be flattened before the differences are taken. 

49 to_end : array_like, optional 

50 Number(s) to append at the end of the returned differences. 

51 to_begin : array_like, optional 

52 Number(s) to prepend at the beginning of the returned differences. 

53 

54 Returns 

55 ------- 

56 ediff1d : ndarray 

57 The differences. Loosely, this is ``ary.flat[1:] - ary.flat[:-1]``. 

58 

59 See Also 

60 -------- 

61 diff, gradient 

62 

63 Notes 

64 ----- 

65 When applied to masked arrays, this function drops the mask information 

66 if the `to_begin` and/or `to_end` parameters are used. 

67 

68 Examples 

69 -------- 

70 >>> import numpy as np 

71 >>> x = np.array([1, 2, 4, 7, 0]) 

72 >>> np.ediff1d(x) 

73 array([ 1, 2, 3, -7]) 

74 

75 >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99])) 

76 array([-99, 1, 2, ..., -7, 88, 99]) 

77 

78 The returned array is always 1D. 

79 

80 >>> y = [[1, 2, 4], [1, 6, 24]] 

81 >>> np.ediff1d(y) 

82 array([ 1, 2, -3, 5, 18]) 

83 

84 """ 

85 conv = _array_converter(ary) 

86 # Convert to (any) array and ravel: 

87 ary = conv[0].ravel() 

88 

89 # enforce that the dtype of `ary` is used for the output 

90 dtype_req = ary.dtype 

91 

92 # fast track default case 

93 if to_begin is None and to_end is None: 

94 return ary[1:] - ary[:-1] 

95 

96 if to_begin is None: 

97 l_begin = 0 

98 else: 

99 to_begin = np.asanyarray(to_begin) 

100 if not np.can_cast(to_begin, dtype_req, casting="same_kind"): 

101 raise TypeError("dtype of `to_begin` must be compatible " 

102 "with input `ary` under the `same_kind` rule.") 

103 

104 to_begin = to_begin.ravel() 

105 l_begin = len(to_begin) 

106 

107 if to_end is None: 

108 l_end = 0 

109 else: 

110 to_end = np.asanyarray(to_end) 

111 if not np.can_cast(to_end, dtype_req, casting="same_kind"): 

112 raise TypeError("dtype of `to_end` must be compatible " 

113 "with input `ary` under the `same_kind` rule.") 

114 

115 to_end = to_end.ravel() 

116 l_end = len(to_end) 

117 

118 # do the calculation in place and copy to_begin and to_end 

119 l_diff = max(len(ary) - 1, 0) 

120 result = np.empty_like(ary, shape=l_diff + l_begin + l_end) 

121 

122 if l_begin > 0: 

123 result[:l_begin] = to_begin 

124 if l_end > 0: 

125 result[l_begin + l_diff:] = to_end 

126 np.subtract(ary[1:], ary[:-1], result[l_begin:l_begin + l_diff]) 

127 

128 return conv.wrap(result) 

129 

130 

131def _unpack_tuple(x): 

132 """ Unpacks one-element tuples for use as return values """ 

133 if len(x) == 1: 

134 return x[0] 

135 else: 

136 return x 

137 

138 

139def _unique_dispatcher(ar, return_index=None, return_inverse=None, 

140 return_counts=None, axis=None, *, equal_nan=None, 

141 sorted=True): 

142 return (ar,) 

143 

144 

145@array_function_dispatch(_unique_dispatcher) 

146def unique(ar, return_index=False, return_inverse=False, 

147 return_counts=False, axis=None, *, equal_nan=True, 

148 sorted=True): 

149 """ 

150 Find the unique elements of an array. 

151 

152 Returns the sorted unique elements of an array. There are three optional 

153 outputs in addition to the unique elements: 

154 

155 * the indices of the input array that give the unique values 

156 * the indices of the unique array that reconstruct the input array 

157 * the number of times each unique value comes up in the input array 

158 

159 Parameters 

160 ---------- 

161 ar : array_like 

162 Input array. Unless `axis` is specified, this will be flattened if it 

163 is not already 1-D. 

164 return_index : bool, optional 

165 If True, also return the indices of `ar` (along the specified axis, 

166 if provided, or in the flattened array) that result in the unique array. 

167 return_inverse : bool, optional 

168 If True, also return the indices of the unique array (for the specified 

169 axis, if provided) that can be used to reconstruct `ar`. 

170 return_counts : bool, optional 

171 If True, also return the number of times each unique item appears 

172 in `ar`. 

173 axis : int or None, optional 

174 The axis to operate on. If None, `ar` will be flattened. If an integer, 

175 the subarrays indexed by the given axis will be flattened and treated 

176 as the elements of a 1-D array with the dimension of the given axis, 

177 see the notes for more details. Object arrays or structured arrays 

178 that contain objects are not supported if the `axis` kwarg is used. The 

179 default is None. 

180 

181 equal_nan : bool, optional 

182 If True, collapses multiple NaN values in the return array into one. 

183 

184 .. versionadded:: 1.24 

185 

186 sorted : bool, optional 

187 If True, the unique elements are sorted. Elements may be sorted in 

188 practice even if ``sorted=False``, but this could change without 

189 notice. 

190 

191 .. versionadded:: 2.3 

192 

193 Returns 

194 ------- 

195 unique : ndarray 

196 The sorted unique values. 

197 unique_indices : ndarray, optional 

198 The indices of the first occurrences of the unique values in the 

199 original array. Only provided if `return_index` is True. 

200 unique_inverse : ndarray, optional 

201 The indices to reconstruct the original array from the 

202 unique array. Only provided if `return_inverse` is True. 

203 unique_counts : ndarray, optional 

204 The number of times each of the unique values comes up in the 

205 original array. Only provided if `return_counts` is True. 

206 

207 See Also 

208 -------- 

209 repeat : Repeat elements of an array. 

210 sort : Return a sorted copy of an array. 

211 

212 Notes 

213 ----- 

214 When an axis is specified the subarrays indexed by the axis are sorted. 

215 This is done by making the specified axis the first dimension of the array 

216 (move the axis to the first dimension to keep the order of the other axes) 

217 and then flattening the subarrays in C order. The flattened subarrays are 

218 then viewed as a structured type with each element given a label, with the 

219 effect that we end up with a 1-D array of structured types that can be 

220 treated in the same way as any other 1-D array. The result is that the 

221 flattened subarrays are sorted in lexicographic order starting with the 

222 first element. 

223 

224 .. versionchanged:: 1.21 

225 Like np.sort, NaN will sort to the end of the values. 

226 For complex arrays all NaN values are considered equivalent 

227 (no matter whether the NaN is in the real or imaginary part). 

228 As the representant for the returned array the smallest one in the 

229 lexicographical order is chosen - see np.sort for how the lexicographical 

230 order is defined for complex arrays. 

231 

232 .. versionchanged:: 2.0 

233 For multi-dimensional inputs, ``unique_inverse`` is reshaped 

234 such that the input can be reconstructed using 

235 ``np.take(unique, unique_inverse, axis=axis)``. The result is 

236 now not 1-dimensional when ``axis=None``. 

237 

238 Note that in NumPy 2.0.0 a higher dimensional array was returned also 

239 when ``axis`` was not ``None``. This was reverted, but 

240 ``inverse.reshape(-1)`` can be used to ensure compatibility with both 

241 versions. 

242 

243 Examples 

244 -------- 

245 >>> import numpy as np 

246 >>> np.unique([1, 1, 2, 2, 3, 3]) 

247 array([1, 2, 3]) 

248 >>> a = np.array([[1, 1], [2, 3]]) 

249 >>> np.unique(a) 

250 array([1, 2, 3]) 

251 

252 Return the unique rows of a 2D array 

253 

254 >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]]) 

255 >>> np.unique(a, axis=0) 

256 array([[1, 0, 0], [2, 3, 4]]) 

257 

258 Return the indices of the original array that give the unique values: 

259 

260 >>> a = np.array(['a', 'b', 'b', 'c', 'a']) 

261 >>> u, indices = np.unique(a, return_index=True) 

262 >>> u 

263 array(['a', 'b', 'c'], dtype='<U1') 

264 >>> indices 

265 array([0, 1, 3]) 

266 >>> a[indices] 

267 array(['a', 'b', 'c'], dtype='<U1') 

268 

269 Reconstruct the input array from the unique values and inverse: 

270 

271 >>> a = np.array([1, 2, 6, 4, 2, 3, 2]) 

272 >>> u, indices = np.unique(a, return_inverse=True) 

273 >>> u 

274 array([1, 2, 3, 4, 6]) 

275 >>> indices 

276 array([0, 1, 4, 3, 1, 2, 1]) 

277 >>> u[indices] 

278 array([1, 2, 6, 4, 2, 3, 2]) 

279 

280 Reconstruct the input values from the unique values and counts: 

281 

282 >>> a = np.array([1, 2, 6, 4, 2, 3, 2]) 

283 >>> values, counts = np.unique(a, return_counts=True) 

284 >>> values 

285 array([1, 2, 3, 4, 6]) 

286 >>> counts 

287 array([1, 3, 1, 1, 1]) 

288 >>> np.repeat(values, counts) 

289 array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved 

290 

291 """ 

292 ar = np.asanyarray(ar) 

293 if axis is None or ar.ndim == 1: 

294 if axis is not None: 

295 normalize_axis_index(axis, ar.ndim) 

296 ret = _unique1d(ar, return_index, return_inverse, return_counts, 

297 equal_nan=equal_nan, inverse_shape=ar.shape, axis=None, 

298 sorted=sorted) 

299 return _unpack_tuple(ret) 

300 

301 # axis was specified and not None 

302 try: 

303 ar = np.moveaxis(ar, axis, 0) 

304 except np.exceptions.AxisError: 

305 # this removes the "axis1" or "axis2" prefix from the error message 

306 raise np.exceptions.AxisError(axis, ar.ndim) from None 

307 inverse_shape = [1] * ar.ndim 

308 inverse_shape[axis] = ar.shape[0] 

309 

310 # Must reshape to a contiguous 2D array for this to work... 

311 orig_shape, orig_dtype = ar.shape, ar.dtype 

312 ar = ar.reshape(orig_shape[0], np.prod(orig_shape[1:], dtype=np.intp)) 

313 ar = np.ascontiguousarray(ar) 

314 dtype = [(f'f{i}', ar.dtype) for i in range(ar.shape[1])] 

315 

316 # At this point, `ar` has shape `(n, m)`, and `dtype` is a structured 

317 # data type with `m` fields where each field has the data type of `ar`. 

318 # In the following, we create the array `consolidated`, which has 

319 # shape `(n,)` with data type `dtype`. 

320 try: 

321 if ar.shape[1] > 0: 

322 consolidated = ar.view(dtype) 

323 else: 

324 # If ar.shape[1] == 0, then dtype will be `np.dtype([])`, which is 

325 # a data type with itemsize 0, and the call `ar.view(dtype)` will 

326 # fail. Instead, we'll use `np.empty` to explicitly create the 

327 # array with shape `(len(ar),)`. Because `dtype` in this case has 

328 # itemsize 0, the total size of the result is still 0 bytes. 

329 consolidated = np.empty(len(ar), dtype=dtype) 

330 except TypeError as e: 

331 # There's no good way to do this for object arrays, etc... 

332 msg = 'The axis argument to unique is not supported for dtype {dt}' 

333 raise TypeError(msg.format(dt=ar.dtype)) from e 

334 

335 def reshape_uniq(uniq): 

336 n = len(uniq) 

337 uniq = uniq.view(orig_dtype) 

338 uniq = uniq.reshape(n, *orig_shape[1:]) 

339 uniq = np.moveaxis(uniq, 0, axis) 

340 return uniq 

341 

342 output = _unique1d(consolidated, return_index, 

343 return_inverse, return_counts, 

344 equal_nan=equal_nan, inverse_shape=inverse_shape, 

345 axis=axis, sorted=sorted) 

346 output = (reshape_uniq(output[0]),) + output[1:] 

347 return _unpack_tuple(output) 

348 

349 

350def _unique1d(ar, return_index=False, return_inverse=False, 

351 return_counts=False, *, equal_nan=True, inverse_shape=None, 

352 axis=None, sorted=True): 

353 """ 

354 Find the unique elements of an array, ignoring shape. 

355 

356 Uses a hash table to find the unique elements if possible. 

357 """ 

358 ar = np.asanyarray(ar).flatten() 

359 if len(ar.shape) != 1: 

360 # np.matrix, and maybe some other array subclasses, insist on keeping 

361 # two dimensions for all operations. Coerce to an ndarray in such cases. 

362 ar = np.asarray(ar).flatten() 

363 

364 optional_indices = return_index or return_inverse 

365 

366 # masked arrays are not supported yet. 

367 if not optional_indices and not return_counts and not np.ma.is_masked(ar): 

368 # First we convert the array to a numpy array, later we wrap it back 

369 # in case it was a subclass of numpy.ndarray. 

370 conv = _array_converter(ar) 

371 ar_, = conv 

372 

373 if (hash_unique := _unique_hash(ar_, equal_nan=equal_nan)) \ 

374 is not NotImplemented: 

375 if sorted: 

376 hash_unique.sort() 

377 # We wrap the result back in case it was a subclass of numpy.ndarray. 

378 return (conv.wrap(hash_unique),) 

379 

380 # If we don't use the hash map, we use the slower sorting method. 

381 if optional_indices: 

382 perm = ar.argsort(kind='mergesort' if return_index else 'quicksort') 

383 aux = ar[perm] 

384 else: 

385 ar.sort() 

386 aux = ar 

387 mask = np.empty(aux.shape, dtype=np.bool) 

388 mask[:1] = True 

389 if (equal_nan and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and 

390 np.isnan(aux[-1])): 

391 if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent 

392 aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left') 

393 else: 

394 aux_firstnan = np.searchsorted(aux, aux[-1], side='left') 

395 if aux_firstnan > 0: 

396 mask[1:aux_firstnan] = ( 

397 aux[1:aux_firstnan] != aux[:aux_firstnan - 1]) 

398 mask[aux_firstnan] = True 

399 mask[aux_firstnan + 1:] = False 

400 else: 

401 mask[1:] = aux[1:] != aux[:-1] 

402 

403 ret = (aux[mask],) 

404 if return_index: 

405 ret += (perm[mask],) 

406 if return_inverse: 

407 imask = np.cumsum(mask) - 1 

408 inv_idx = np.empty(mask.shape, dtype=np.intp) 

409 inv_idx[perm] = imask 

410 ret += (inv_idx.reshape(inverse_shape) if axis is None else inv_idx,) 

411 if return_counts: 

412 idx = np.concatenate(np.nonzero(mask) + ([mask.size],)) 

413 ret += (np.diff(idx),) 

414 return ret 

415 

416 

417# Array API set functions 

418 

419class UniqueAllResult(NamedTuple): 

420 values: np.ndarray 

421 indices: np.ndarray 

422 inverse_indices: np.ndarray 

423 counts: np.ndarray 

424 

425 

426class UniqueCountsResult(NamedTuple): 

427 values: np.ndarray 

428 counts: np.ndarray 

429 

430 

431class UniqueInverseResult(NamedTuple): 

432 values: np.ndarray 

433 inverse_indices: np.ndarray 

434 

435 

436def _unique_all_dispatcher(x, /): 

437 return (x,) 

438 

439 

440@array_function_dispatch(_unique_all_dispatcher) 

441def unique_all(x): 

442 """ 

443 Find the unique elements of an array, and counts, inverse, and indices. 

444 

445 This function is an Array API compatible alternative to:: 

446 

447 np.unique(x, return_index=True, return_inverse=True, 

448 return_counts=True, equal_nan=False, sorted=False) 

449 

450 but returns a namedtuple for easier access to each output. 

451 

452 .. note:: 

453 This function currently always returns a sorted result, however, 

454 this could change in any NumPy minor release. 

455 

456 Parameters 

457 ---------- 

458 x : array_like 

459 Input array. It will be flattened if it is not already 1-D. 

460 

461 Returns 

462 ------- 

463 out : namedtuple 

464 The result containing: 

465 

466 * values - The unique elements of an input array. 

467 * indices - The first occurring indices for each unique element. 

468 * inverse_indices - The indices from the set of unique elements 

469 that reconstruct `x`. 

470 * counts - The corresponding counts for each unique element. 

471 

472 See Also 

473 -------- 

474 unique : Find the unique elements of an array. 

475 

476 Examples 

477 -------- 

478 >>> import numpy as np 

479 >>> x = [1, 1, 2] 

480 >>> uniq = np.unique_all(x) 

481 >>> uniq.values 

482 array([1, 2]) 

483 >>> uniq.indices 

484 array([0, 2]) 

485 >>> uniq.inverse_indices 

486 array([0, 0, 1]) 

487 >>> uniq.counts 

488 array([2, 1]) 

489 """ 

490 result = unique( 

491 x, 

492 return_index=True, 

493 return_inverse=True, 

494 return_counts=True, 

495 equal_nan=False, 

496 ) 

497 return UniqueAllResult(*result) 

498 

499 

500def _unique_counts_dispatcher(x, /): 

501 return (x,) 

502 

503 

504@array_function_dispatch(_unique_counts_dispatcher) 

505def unique_counts(x): 

506 """ 

507 Find the unique elements and counts of an input array `x`. 

508 

509 This function is an Array API compatible alternative to:: 

510 

511 np.unique(x, return_counts=True, equal_nan=False, sorted=False) 

512 

513 but returns a namedtuple for easier access to each output. 

514 

515 .. note:: 

516 This function currently always returns a sorted result, however, 

517 this could change in any NumPy minor release. 

518 

519 Parameters 

520 ---------- 

521 x : array_like 

522 Input array. It will be flattened if it is not already 1-D. 

523 

524 Returns 

525 ------- 

526 out : namedtuple 

527 The result containing: 

528 

529 * values - The unique elements of an input array. 

530 * counts - The corresponding counts for each unique element. 

531 

532 See Also 

533 -------- 

534 unique : Find the unique elements of an array. 

535 

536 Examples 

537 -------- 

538 >>> import numpy as np 

539 >>> x = [1, 1, 2] 

540 >>> uniq = np.unique_counts(x) 

541 >>> uniq.values 

542 array([1, 2]) 

543 >>> uniq.counts 

544 array([2, 1]) 

545 """ 

546 result = unique( 

547 x, 

548 return_index=False, 

549 return_inverse=False, 

550 return_counts=True, 

551 equal_nan=False, 

552 ) 

553 return UniqueCountsResult(*result) 

554 

555 

556def _unique_inverse_dispatcher(x, /): 

557 return (x,) 

558 

559 

560@array_function_dispatch(_unique_inverse_dispatcher) 

561def unique_inverse(x): 

562 """ 

563 Find the unique elements of `x` and indices to reconstruct `x`. 

564 

565 This function is an Array API compatible alternative to:: 

566 

567 np.unique(x, return_inverse=True, equal_nan=False, sorted=False) 

568 

569 but returns a namedtuple for easier access to each output. 

570 

571 .. note:: 

572 This function currently always returns a sorted result, however, 

573 this could change in any NumPy minor release. 

574 

575 Parameters 

576 ---------- 

577 x : array_like 

578 Input array. It will be flattened if it is not already 1-D. 

579 

580 Returns 

581 ------- 

582 out : namedtuple 

583 The result containing: 

584 

585 * values - The unique elements of an input array. 

586 * inverse_indices - The indices from the set of unique elements 

587 that reconstruct `x`. 

588 

589 See Also 

590 -------- 

591 unique : Find the unique elements of an array. 

592 

593 Examples 

594 -------- 

595 >>> import numpy as np 

596 >>> x = [1, 1, 2] 

597 >>> uniq = np.unique_inverse(x) 

598 >>> uniq.values 

599 array([1, 2]) 

600 >>> uniq.inverse_indices 

601 array([0, 0, 1]) 

602 """ 

603 result = unique( 

604 x, 

605 return_index=False, 

606 return_inverse=True, 

607 return_counts=False, 

608 equal_nan=False, 

609 ) 

610 return UniqueInverseResult(*result) 

611 

612 

613def _unique_values_dispatcher(x, /): 

614 return (x,) 

615 

616 

617@array_function_dispatch(_unique_values_dispatcher) 

618def unique_values(x): 

619 """ 

620 Returns the unique elements of an input array `x`. 

621 

622 This function is an Array API compatible alternative to:: 

623 

624 np.unique(x, equal_nan=False, sorted=False) 

625 

626 .. versionchanged:: 2.3 

627 The algorithm was changed to a faster one that does not rely on 

628 sorting, and hence the results are no longer implicitly sorted. 

629 

630 Parameters 

631 ---------- 

632 x : array_like 

633 Input array. It will be flattened if it is not already 1-D. 

634 

635 Returns 

636 ------- 

637 out : ndarray 

638 The unique elements of an input array. 

639 

640 See Also 

641 -------- 

642 unique : Find the unique elements of an array. 

643 

644 Examples 

645 -------- 

646 >>> import numpy as np 

647 >>> np.unique_values([1, 1, 2]) 

648 array([1, 2]) # may vary 

649 

650 """ 

651 return unique( 

652 x, 

653 return_index=False, 

654 return_inverse=False, 

655 return_counts=False, 

656 equal_nan=False, 

657 sorted=False, 

658 ) 

659 

660 

661def _intersect1d_dispatcher( 

662 ar1, ar2, assume_unique=None, return_indices=None): 

663 return (ar1, ar2) 

664 

665 

666@array_function_dispatch(_intersect1d_dispatcher) 

667def intersect1d(ar1, ar2, assume_unique=False, return_indices=False): 

668 """ 

669 Find the intersection of two arrays. 

670 

671 Return the sorted, unique values that are in both of the input arrays. 

672 

673 Parameters 

674 ---------- 

675 ar1, ar2 : array_like 

676 Input arrays. Will be flattened if not already 1D. 

677 assume_unique : bool 

678 If True, the input arrays are both assumed to be unique, which 

679 can speed up the calculation. If True but ``ar1`` or ``ar2`` are not 

680 unique, incorrect results and out-of-bounds indices could result. 

681 Default is False. 

682 return_indices : bool 

683 If True, the indices which correspond to the intersection of the two 

684 arrays are returned. The first instance of a value is used if there are 

685 multiple. Default is False. 

686 

687 Returns 

688 ------- 

689 intersect1d : ndarray 

690 Sorted 1D array of common and unique elements. 

691 comm1 : ndarray 

692 The indices of the first occurrences of the common values in `ar1`. 

693 Only provided if `return_indices` is True. 

694 comm2 : ndarray 

695 The indices of the first occurrences of the common values in `ar2`. 

696 Only provided if `return_indices` is True. 

697 

698 Examples 

699 -------- 

700 >>> import numpy as np 

701 >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1]) 

702 array([1, 3]) 

703 

704 To intersect more than two arrays, use functools.reduce: 

705 

706 >>> from functools import reduce 

707 >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) 

708 array([3]) 

709 

710 To return the indices of the values common to the input arrays 

711 along with the intersected values: 

712 

713 >>> x = np.array([1, 1, 2, 3, 4]) 

714 >>> y = np.array([2, 1, 4, 6]) 

715 >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True) 

716 >>> x_ind, y_ind 

717 (array([0, 2, 4]), array([1, 0, 2])) 

718 >>> xy, x[x_ind], y[y_ind] 

719 (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4])) 

720 

721 """ 

722 ar1 = np.asanyarray(ar1) 

723 ar2 = np.asanyarray(ar2) 

724 

725 if not assume_unique: 

726 if return_indices: 

727 ar1, ind1 = unique(ar1, return_index=True) 

728 ar2, ind2 = unique(ar2, return_index=True) 

729 else: 

730 ar1 = unique(ar1) 

731 ar2 = unique(ar2) 

732 else: 

733 ar1 = ar1.ravel() 

734 ar2 = ar2.ravel() 

735 

736 aux = np.concatenate((ar1, ar2)) 

737 if return_indices: 

738 aux_sort_indices = np.argsort(aux, kind='mergesort') 

739 aux = aux[aux_sort_indices] 

740 else: 

741 aux.sort() 

742 

743 mask = aux[1:] == aux[:-1] 

744 int1d = aux[:-1][mask] 

745 

746 if return_indices: 

747 ar1_indices = aux_sort_indices[:-1][mask] 

748 ar2_indices = aux_sort_indices[1:][mask] - ar1.size 

749 if not assume_unique: 

750 ar1_indices = ind1[ar1_indices] 

751 ar2_indices = ind2[ar2_indices] 

752 

753 return int1d, ar1_indices, ar2_indices 

754 else: 

755 return int1d 

756 

757 

758def _setxor1d_dispatcher(ar1, ar2, assume_unique=None): 

759 return (ar1, ar2) 

760 

761 

762@array_function_dispatch(_setxor1d_dispatcher) 

763def setxor1d(ar1, ar2, assume_unique=False): 

764 """ 

765 Find the set exclusive-or of two arrays. 

766 

767 Return the sorted, unique values that are in only one (not both) of the 

768 input arrays. 

769 

770 Parameters 

771 ---------- 

772 ar1, ar2 : array_like 

773 Input arrays. 

774 assume_unique : bool 

775 If True, the input arrays are both assumed to be unique, which 

776 can speed up the calculation. Default is False. 

777 

778 Returns 

779 ------- 

780 setxor1d : ndarray 

781 Sorted 1D array of unique values that are in only one of the input 

782 arrays. 

783 

784 Examples 

785 -------- 

786 >>> import numpy as np 

787 >>> a = np.array([1, 2, 3, 2, 4]) 

788 >>> b = np.array([2, 3, 5, 7, 5]) 

789 >>> np.setxor1d(a,b) 

790 array([1, 4, 5, 7]) 

791 

792 """ 

793 if not assume_unique: 

794 ar1 = unique(ar1) 

795 ar2 = unique(ar2) 

796 

797 aux = np.concatenate((ar1, ar2), axis=None) 

798 if aux.size == 0: 

799 return aux 

800 

801 aux.sort() 

802 flag = np.concatenate(([True], aux[1:] != aux[:-1], [True])) 

803 return aux[flag[1:] & flag[:-1]] 

804 

805 

806def _isin(ar1, ar2, assume_unique=False, invert=False, *, kind=None): 

807 # Ravel both arrays, behavior for the first array could be different 

808 ar1 = np.asarray(ar1).ravel() 

809 ar2 = np.asarray(ar2).ravel() 

810 

811 # Ensure that iteration through object arrays yields size-1 arrays 

812 if ar2.dtype == object: 

813 ar2 = ar2.reshape(-1, 1) 

814 

815 if kind not in {None, 'sort', 'table'}: 

816 raise ValueError( 

817 f"Invalid kind: '{kind}'. Please use None, 'sort' or 'table'.") 

818 

819 # Can use the table method if all arrays are integers or boolean: 

820 is_int_arrays = all(ar.dtype.kind in ("u", "i", "b") for ar in (ar1, ar2)) 

821 use_table_method = is_int_arrays and kind in {None, 'table'} 

822 

823 if use_table_method: 

824 if ar2.size == 0: 

825 if invert: 

826 return np.ones_like(ar1, dtype=bool) 

827 else: 

828 return np.zeros_like(ar1, dtype=bool) 

829 

830 # Convert booleans to uint8 so we can use the fast integer algorithm 

831 if ar1.dtype == bool: 

832 ar1 = ar1.astype(np.uint8) 

833 if ar2.dtype == bool: 

834 ar2 = ar2.astype(np.uint8) 

835 

836 ar2_min = int(np.min(ar2)) 

837 ar2_max = int(np.max(ar2)) 

838 

839 ar2_range = ar2_max - ar2_min 

840 

841 # Constraints on whether we can actually use the table method: 

842 # 1. Assert memory usage is not too large 

843 below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size) 

844 # 2. Check overflows for (ar2 - ar2_min); dtype=ar2.dtype 

845 range_safe_from_overflow = ar2_range <= np.iinfo(ar2.dtype).max 

846 

847 # Optimal performance is for approximately 

848 # log10(size) > (log10(range) - 2.27) / 0.927. 

849 # However, here we set the requirement that by default 

850 # the intermediate array can only be 6x 

851 # the combined memory allocation of the original 

852 # arrays. See discussion on 

853 # https://github.com/numpy/numpy/pull/12065. 

854 

855 if ( 

856 range_safe_from_overflow and 

857 (below_memory_constraint or kind == 'table') 

858 ): 

859 

860 if invert: 

861 outgoing_array = np.ones_like(ar1, dtype=bool) 

862 else: 

863 outgoing_array = np.zeros_like(ar1, dtype=bool) 

864 

865 # Make elements 1 where the integer exists in ar2 

866 if invert: 

867 isin_helper_ar = np.ones(ar2_range + 1, dtype=bool) 

868 isin_helper_ar[ar2 - ar2_min] = 0 

869 else: 

870 isin_helper_ar = np.zeros(ar2_range + 1, dtype=bool) 

871 isin_helper_ar[ar2 - ar2_min] = 1 

872 

873 # Mask out elements we know won't work 

874 basic_mask = (ar1 <= ar2_max) & (ar1 >= ar2_min) 

875 in_range_ar1 = ar1[basic_mask] 

876 if in_range_ar1.size == 0: 

877 # Nothing more to do, since all values are out of range. 

878 return outgoing_array 

879 

880 # Unfortunately, ar2_min can be out of range for `intp` even 

881 # if the calculation result must fit in range (and be positive). 

882 # In that case, use ar2.dtype which must work for all unmasked 

883 # values. 

884 try: 

885 ar2_min = np.array(ar2_min, dtype=np.intp) 

886 dtype = np.intp 

887 except OverflowError: 

888 dtype = ar2.dtype 

889 

890 out = np.empty_like(in_range_ar1, dtype=np.intp) 

891 outgoing_array[basic_mask] = isin_helper_ar[ 

892 np.subtract(in_range_ar1, ar2_min, dtype=dtype, 

893 out=out, casting="unsafe")] 

894 

895 return outgoing_array 

896 elif kind == 'table': # not range_safe_from_overflow 

897 raise RuntimeError( 

898 "You have specified kind='table', " 

899 "but the range of values in `ar2` or `ar1` exceed the " 

900 "maximum integer of the datatype. " 

901 "Please set `kind` to None or 'sort'." 

902 ) 

903 elif kind == 'table': 

904 raise ValueError( 

905 "The 'table' method is only " 

906 "supported for boolean or integer arrays. " 

907 "Please select 'sort' or None for kind." 

908 ) 

909 

910 # Check if one of the arrays may contain arbitrary objects 

911 contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject 

912 

913 # This code is run when 

914 # a) the first condition is true, making the code significantly faster 

915 # b) the second condition is true (i.e. `ar1` or `ar2` may contain 

916 # arbitrary objects), since then sorting is not guaranteed to work 

917 if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object: 

918 if invert: 

919 mask = np.ones(len(ar1), dtype=bool) 

920 for a in ar2: 

921 mask &= (ar1 != a) 

922 else: 

923 mask = np.zeros(len(ar1), dtype=bool) 

924 for a in ar2: 

925 mask |= (ar1 == a) 

926 return mask 

927 

928 # Otherwise use sorting 

929 if not assume_unique: 

930 ar1, rev_idx = np.unique(ar1, return_inverse=True) 

931 ar2 = np.unique(ar2) 

932 

933 ar = np.concatenate((ar1, ar2)) 

934 # We need this to be a stable sort, so always use 'mergesort' 

935 # here. The values from the first array should always come before 

936 # the values from the second array. 

937 order = ar.argsort(kind='mergesort') 

938 sar = ar[order] 

939 if invert: 

940 bool_ar = (sar[1:] != sar[:-1]) 

941 else: 

942 bool_ar = (sar[1:] == sar[:-1]) 

943 flag = np.concatenate((bool_ar, [invert])) 

944 ret = np.empty(ar.shape, dtype=bool) 

945 ret[order] = flag 

946 

947 if assume_unique: 

948 return ret[:len(ar1)] 

949 else: 

950 return ret[rev_idx] 

951 

952 

953def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None, 

954 *, kind=None): 

955 return (element, test_elements) 

956 

957 

958@array_function_dispatch(_isin_dispatcher) 

959def isin(element, test_elements, assume_unique=False, invert=False, *, 

960 kind=None): 

961 """ 

962 Calculates ``element in test_elements``, broadcasting over `element` only. 

963 Returns a boolean array of the same shape as `element` that is True 

964 where an element of `element` is in `test_elements` and False otherwise. 

965 

966 Parameters 

967 ---------- 

968 element : array_like 

969 Input array. 

970 test_elements : array_like 

971 The values against which to test each value of `element`. 

972 This argument is flattened if it is an array or array_like. 

973 See notes for behavior with non-array-like parameters. 

974 assume_unique : bool, optional 

975 If True, the input arrays are both assumed to be unique, which 

976 can speed up the calculation. Default is False. 

977 invert : bool, optional 

978 If True, the values in the returned array are inverted, as if 

979 calculating `element not in test_elements`. Default is False. 

980 ``np.isin(a, b, invert=True)`` is equivalent to (but faster 

981 than) ``np.invert(np.isin(a, b))``. 

982 kind : {None, 'sort', 'table'}, optional 

983 The algorithm to use. This will not affect the final result, 

984 but will affect the speed and memory use. The default, None, 

985 will select automatically based on memory considerations. 

986 

987 * If 'sort', will use a mergesort-based approach. This will have 

988 a memory usage of roughly 6 times the sum of the sizes of 

989 `element` and `test_elements`, not accounting for size of dtypes. 

990 * If 'table', will use a lookup table approach similar 

991 to a counting sort. This is only available for boolean and 

992 integer arrays. This will have a memory usage of the 

993 size of `element` plus the max-min value of `test_elements`. 

994 `assume_unique` has no effect when the 'table' option is used. 

995 * If None, will automatically choose 'table' if 

996 the required memory allocation is less than or equal to 

997 6 times the sum of the sizes of `element` and `test_elements`, 

998 otherwise will use 'sort'. This is done to not use 

999 a large amount of memory by default, even though 

1000 'table' may be faster in most cases. If 'table' is chosen, 

1001 `assume_unique` will have no effect. 

1002 

1003 

1004 Returns 

1005 ------- 

1006 isin : ndarray, bool 

1007 Has the same shape as `element`. The values `element[isin]` 

1008 are in `test_elements`. 

1009 

1010 Notes 

1011 ----- 

1012 `isin` is an element-wise function version of the python keyword `in`. 

1013 ``isin(a, b)`` is roughly equivalent to 

1014 ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences. 

1015 

1016 `element` and `test_elements` are converted to arrays if they are not 

1017 already. If `test_elements` is a set (or other non-sequence collection) 

1018 it will be converted to an object array with one element, rather than an 

1019 array of the values contained in `test_elements`. This is a consequence 

1020 of the `array` constructor's way of handling non-sequence collections. 

1021 Converting the set to a list usually gives the desired behavior. 

1022 

1023 Using ``kind='table'`` tends to be faster than `kind='sort'` if the 

1024 following relationship is true: 

1025 ``log10(len(test_elements)) > 

1026 (log10(max(test_elements)-min(test_elements)) - 2.27) / 0.927``, 

1027 but may use greater memory. The default value for `kind` will 

1028 be automatically selected based only on memory usage, so one may 

1029 manually set ``kind='table'`` if memory constraints can be relaxed. 

1030 

1031 Examples 

1032 -------- 

1033 >>> import numpy as np 

1034 >>> element = 2*np.arange(4).reshape((2, 2)) 

1035 >>> element 

1036 array([[0, 2], 

1037 [4, 6]]) 

1038 >>> test_elements = [1, 2, 4, 8] 

1039 >>> mask = np.isin(element, test_elements) 

1040 >>> mask 

1041 array([[False, True], 

1042 [ True, False]]) 

1043 >>> element[mask] 

1044 array([2, 4]) 

1045 

1046 The indices of the matched values can be obtained with `nonzero`: 

1047 

1048 >>> np.nonzero(mask) 

1049 (array([0, 1]), array([1, 0])) 

1050 

1051 The test can also be inverted: 

1052 

1053 >>> mask = np.isin(element, test_elements, invert=True) 

1054 >>> mask 

1055 array([[ True, False], 

1056 [False, True]]) 

1057 >>> element[mask] 

1058 array([0, 6]) 

1059 

1060 Because of how `array` handles sets, the following does not 

1061 work as expected: 

1062 

1063 >>> test_set = {1, 2, 4, 8} 

1064 >>> np.isin(element, test_set) 

1065 array([[False, False], 

1066 [False, False]]) 

1067 

1068 Casting the set to a list gives the expected result: 

1069 

1070 >>> np.isin(element, list(test_set)) 

1071 array([[False, True], 

1072 [ True, False]]) 

1073 """ 

1074 element = np.asarray(element) 

1075 return _isin(element, test_elements, assume_unique=assume_unique, 

1076 invert=invert, kind=kind).reshape(element.shape) 

1077 

1078 

1079def _union1d_dispatcher(ar1, ar2): 

1080 return (ar1, ar2) 

1081 

1082 

1083@array_function_dispatch(_union1d_dispatcher) 

1084def union1d(ar1, ar2): 

1085 """ 

1086 Find the union of two arrays. 

1087 

1088 Return the unique, sorted array of values that are in either of the two 

1089 input arrays. 

1090 

1091 Parameters 

1092 ---------- 

1093 ar1, ar2 : array_like 

1094 Input arrays. They are flattened if they are not already 1D. 

1095 

1096 Returns 

1097 ------- 

1098 union1d : ndarray 

1099 Unique, sorted union of the input arrays. 

1100 

1101 Examples 

1102 -------- 

1103 >>> import numpy as np 

1104 >>> np.union1d([-1, 0, 1], [-2, 0, 2]) 

1105 array([-2, -1, 0, 1, 2]) 

1106 

1107 To find the union of more than two arrays, use functools.reduce: 

1108 

1109 >>> from functools import reduce 

1110 >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) 

1111 array([1, 2, 3, 4, 6]) 

1112 """ 

1113 return unique(np.concatenate((ar1, ar2), axis=None)) 

1114 

1115 

1116def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None): 

1117 return (ar1, ar2) 

1118 

1119 

1120@array_function_dispatch(_setdiff1d_dispatcher) 

1121def setdiff1d(ar1, ar2, assume_unique=False): 

1122 """ 

1123 Find the set difference of two arrays. 

1124 

1125 Return the unique values in `ar1` that are not in `ar2`. 

1126 

1127 Parameters 

1128 ---------- 

1129 ar1 : array_like 

1130 Input array. 

1131 ar2 : array_like 

1132 Input comparison array. 

1133 assume_unique : bool 

1134 If True, the input arrays are both assumed to be unique, which 

1135 can speed up the calculation. Default is False. 

1136 

1137 Returns 

1138 ------- 

1139 setdiff1d : ndarray 

1140 1D array of values in `ar1` that are not in `ar2`. The result 

1141 is sorted when `assume_unique=False`, but otherwise only sorted 

1142 if the input is sorted. 

1143 

1144 Examples 

1145 -------- 

1146 >>> import numpy as np 

1147 >>> a = np.array([1, 2, 3, 2, 4, 1]) 

1148 >>> b = np.array([3, 4, 5, 6]) 

1149 >>> np.setdiff1d(a, b) 

1150 array([1, 2]) 

1151 

1152 """ 

1153 if assume_unique: 

1154 ar1 = np.asarray(ar1).ravel() 

1155 else: 

1156 ar1 = unique(ar1) 

1157 ar2 = unique(ar2) 

1158 return ar1[_isin(ar1, ar2, assume_unique=True, invert=True)]