Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/numpy/lib/_arraysetops_impl.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

283 statements  

1""" 

2Set operations for arrays based on sorting. 

3 

4Notes 

5----- 

6 

7For floating point arrays, inaccurate results may appear due to usual round-off 

8and floating point comparison issues. 

9 

10Speed could be gained in some operations by an implementation of 

11`numpy.sort`, that can provide directly the permutation vectors, thus avoiding 

12calls to `numpy.argsort`. 

13 

14Original author: Robert Cimrman 

15 

16""" 

17import functools 

18import warnings 

19from typing import NamedTuple 

20 

21import numpy as np 

22from numpy._core import overrides 

23from numpy._core._multiarray_umath import _array_converter 

24 

25 

26array_function_dispatch = functools.partial( 

27 overrides.array_function_dispatch, module='numpy') 

28 

29 

30__all__ = [ 

31 "ediff1d", "in1d", "intersect1d", "isin", "setdiff1d", "setxor1d", 

32 "union1d", "unique", "unique_all", "unique_counts", "unique_inverse", 

33 "unique_values" 

34] 

35 

36 

37def _ediff1d_dispatcher(ary, to_end=None, to_begin=None): 

38 return (ary, to_end, to_begin) 

39 

40 

41@array_function_dispatch(_ediff1d_dispatcher) 

42def ediff1d(ary, to_end=None, to_begin=None): 

43 """ 

44 The differences between consecutive elements of an array. 

45 

46 Parameters 

47 ---------- 

48 ary : array_like 

49 If necessary, will be flattened before the differences are taken. 

50 to_end : array_like, optional 

51 Number(s) to append at the end of the returned differences. 

52 to_begin : array_like, optional 

53 Number(s) to prepend at the beginning of the returned differences. 

54 

55 Returns 

56 ------- 

57 ediff1d : ndarray 

58 The differences. Loosely, this is ``ary.flat[1:] - ary.flat[:-1]``. 

59 

60 See Also 

61 -------- 

62 diff, gradient 

63 

64 Notes 

65 ----- 

66 When applied to masked arrays, this function drops the mask information 

67 if the `to_begin` and/or `to_end` parameters are used. 

68 

69 Examples 

70 -------- 

71 >>> import numpy as np 

72 >>> x = np.array([1, 2, 4, 7, 0]) 

73 >>> np.ediff1d(x) 

74 array([ 1, 2, 3, -7]) 

75 

76 >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99])) 

77 array([-99, 1, 2, ..., -7, 88, 99]) 

78 

79 The returned array is always 1D. 

80 

81 >>> y = [[1, 2, 4], [1, 6, 24]] 

82 >>> np.ediff1d(y) 

83 array([ 1, 2, -3, 5, 18]) 

84 

85 """ 

86 conv = _array_converter(ary) 

87 # Convert to (any) array and ravel: 

88 ary = conv[0].ravel() 

89 

90 # enforce that the dtype of `ary` is used for the output 

91 dtype_req = ary.dtype 

92 

93 # fast track default case 

94 if to_begin is None and to_end is None: 

95 return ary[1:] - ary[:-1] 

96 

97 if to_begin is None: 

98 l_begin = 0 

99 else: 

100 to_begin = np.asanyarray(to_begin) 

101 if not np.can_cast(to_begin, dtype_req, casting="same_kind"): 

102 raise TypeError("dtype of `to_begin` must be compatible " 

103 "with input `ary` under the `same_kind` rule.") 

104 

105 to_begin = to_begin.ravel() 

106 l_begin = len(to_begin) 

107 

108 if to_end is None: 

109 l_end = 0 

110 else: 

111 to_end = np.asanyarray(to_end) 

112 if not np.can_cast(to_end, dtype_req, casting="same_kind"): 

113 raise TypeError("dtype of `to_end` must be compatible " 

114 "with input `ary` under the `same_kind` rule.") 

115 

116 to_end = to_end.ravel() 

117 l_end = len(to_end) 

118 

119 # do the calculation in place and copy to_begin and to_end 

120 l_diff = max(len(ary) - 1, 0) 

121 result = np.empty_like(ary, shape=l_diff + l_begin + l_end) 

122 

123 if l_begin > 0: 

124 result[:l_begin] = to_begin 

125 if l_end > 0: 

126 result[l_begin + l_diff:] = to_end 

127 np.subtract(ary[1:], ary[:-1], result[l_begin:l_begin + l_diff]) 

128 

129 return conv.wrap(result) 

130 

131 

132def _unpack_tuple(x): 

133 """ Unpacks one-element tuples for use as return values """ 

134 if len(x) == 1: 

135 return x[0] 

136 else: 

137 return x 

138 

139 

140def _unique_dispatcher(ar, return_index=None, return_inverse=None, 

141 return_counts=None, axis=None, *, equal_nan=None): 

142 return (ar,) 

143 

144 

145@array_function_dispatch(_unique_dispatcher) 

146def unique(ar, return_index=False, return_inverse=False, 

147 return_counts=False, axis=None, *, equal_nan=True): 

148 """ 

149 Find the unique elements of an array. 

150 

151 Returns the sorted unique elements of an array. There are three optional 

152 outputs in addition to the unique elements: 

153 

154 * the indices of the input array that give the unique values 

155 * the indices of the unique array that reconstruct the input array 

156 * the number of times each unique value comes up in the input array 

157 

158 Parameters 

159 ---------- 

160 ar : array_like 

161 Input array. Unless `axis` is specified, this will be flattened if it 

162 is not already 1-D. 

163 return_index : bool, optional 

164 If True, also return the indices of `ar` (along the specified axis, 

165 if provided, or in the flattened array) that result in the unique array. 

166 return_inverse : bool, optional 

167 If True, also return the indices of the unique array (for the specified 

168 axis, if provided) that can be used to reconstruct `ar`. 

169 return_counts : bool, optional 

170 If True, also return the number of times each unique item appears 

171 in `ar`. 

172 axis : int or None, optional 

173 The axis to operate on. If None, `ar` will be flattened. If an integer, 

174 the subarrays indexed by the given axis will be flattened and treated 

175 as the elements of a 1-D array with the dimension of the given axis, 

176 see the notes for more details. Object arrays or structured arrays 

177 that contain objects are not supported if the `axis` kwarg is used. The 

178 default is None. 

179 

180 equal_nan : bool, optional 

181 If True, collapses multiple NaN values in the return array into one. 

182 

183 .. versionadded:: 1.24 

184 

185 Returns 

186 ------- 

187 unique : ndarray 

188 The sorted unique values. 

189 unique_indices : ndarray, optional 

190 The indices of the first occurrences of the unique values in the 

191 original array. Only provided if `return_index` is True. 

192 unique_inverse : ndarray, optional 

193 The indices to reconstruct the original array from the 

194 unique array. Only provided if `return_inverse` is True. 

195 unique_counts : ndarray, optional 

196 The number of times each of the unique values comes up in the 

197 original array. Only provided if `return_counts` is True. 

198 

199 See Also 

200 -------- 

201 repeat : Repeat elements of an array. 

202 sort : Return a sorted copy of an array. 

203 

204 Notes 

205 ----- 

206 When an axis is specified the subarrays indexed by the axis are sorted. 

207 This is done by making the specified axis the first dimension of the array 

208 (move the axis to the first dimension to keep the order of the other axes) 

209 and then flattening the subarrays in C order. The flattened subarrays are 

210 then viewed as a structured type with each element given a label, with the 

211 effect that we end up with a 1-D array of structured types that can be 

212 treated in the same way as any other 1-D array. The result is that the 

213 flattened subarrays are sorted in lexicographic order starting with the 

214 first element. 

215 

216 .. versionchanged:: 1.21 

217 Like np.sort, NaN will sort to the end of the values. 

218 For complex arrays all NaN values are considered equivalent 

219 (no matter whether the NaN is in the real or imaginary part). 

220 As the representant for the returned array the smallest one in the 

221 lexicographical order is chosen - see np.sort for how the lexicographical 

222 order is defined for complex arrays. 

223 

224 .. versionchanged:: 2.0 

225 For multi-dimensional inputs, ``unique_inverse`` is reshaped 

226 such that the input can be reconstructed using 

227 ``np.take(unique, unique_inverse, axis=axis)``. The result is 

228 now not 1-dimensional when ``axis=None``. 

229 

230 Note that in NumPy 2.0.0 a higher dimensional array was returned also 

231 when ``axis`` was not ``None``. This was reverted, but 

232 ``inverse.reshape(-1)`` can be used to ensure compatibility with both 

233 versions. 

234 

235 Examples 

236 -------- 

237 >>> import numpy as np 

238 >>> np.unique([1, 1, 2, 2, 3, 3]) 

239 array([1, 2, 3]) 

240 >>> a = np.array([[1, 1], [2, 3]]) 

241 >>> np.unique(a) 

242 array([1, 2, 3]) 

243 

244 Return the unique rows of a 2D array 

245 

246 >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]]) 

247 >>> np.unique(a, axis=0) 

248 array([[1, 0, 0], [2, 3, 4]]) 

249 

250 Return the indices of the original array that give the unique values: 

251 

252 >>> a = np.array(['a', 'b', 'b', 'c', 'a']) 

253 >>> u, indices = np.unique(a, return_index=True) 

254 >>> u 

255 array(['a', 'b', 'c'], dtype='<U1') 

256 >>> indices 

257 array([0, 1, 3]) 

258 >>> a[indices] 

259 array(['a', 'b', 'c'], dtype='<U1') 

260 

261 Reconstruct the input array from the unique values and inverse: 

262 

263 >>> a = np.array([1, 2, 6, 4, 2, 3, 2]) 

264 >>> u, indices = np.unique(a, return_inverse=True) 

265 >>> u 

266 array([1, 2, 3, 4, 6]) 

267 >>> indices 

268 array([0, 1, 4, 3, 1, 2, 1]) 

269 >>> u[indices] 

270 array([1, 2, 6, 4, 2, 3, 2]) 

271 

272 Reconstruct the input values from the unique values and counts: 

273 

274 >>> a = np.array([1, 2, 6, 4, 2, 3, 2]) 

275 >>> values, counts = np.unique(a, return_counts=True) 

276 >>> values 

277 array([1, 2, 3, 4, 6]) 

278 >>> counts 

279 array([1, 3, 1, 1, 1]) 

280 >>> np.repeat(values, counts) 

281 array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved 

282 

283 """ 

284 ar = np.asanyarray(ar) 

285 if axis is None: 

286 ret = _unique1d(ar, return_index, return_inverse, return_counts, 

287 equal_nan=equal_nan, inverse_shape=ar.shape, axis=None) 

288 return _unpack_tuple(ret) 

289 

290 # axis was specified and not None 

291 try: 

292 ar = np.moveaxis(ar, axis, 0) 

293 except np.exceptions.AxisError: 

294 # this removes the "axis1" or "axis2" prefix from the error message 

295 raise np.exceptions.AxisError(axis, ar.ndim) from None 

296 inverse_shape = [1] * ar.ndim 

297 inverse_shape[axis] = ar.shape[0] 

298 

299 # Must reshape to a contiguous 2D array for this to work... 

300 orig_shape, orig_dtype = ar.shape, ar.dtype 

301 ar = ar.reshape(orig_shape[0], np.prod(orig_shape[1:], dtype=np.intp)) 

302 ar = np.ascontiguousarray(ar) 

303 dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])] 

304 

305 # At this point, `ar` has shape `(n, m)`, and `dtype` is a structured 

306 # data type with `m` fields where each field has the data type of `ar`. 

307 # In the following, we create the array `consolidated`, which has 

308 # shape `(n,)` with data type `dtype`. 

309 try: 

310 if ar.shape[1] > 0: 

311 consolidated = ar.view(dtype) 

312 else: 

313 # If ar.shape[1] == 0, then dtype will be `np.dtype([])`, which is 

314 # a data type with itemsize 0, and the call `ar.view(dtype)` will 

315 # fail. Instead, we'll use `np.empty` to explicitly create the 

316 # array with shape `(len(ar),)`. Because `dtype` in this case has 

317 # itemsize 0, the total size of the result is still 0 bytes. 

318 consolidated = np.empty(len(ar), dtype=dtype) 

319 except TypeError as e: 

320 # There's no good way to do this for object arrays, etc... 

321 msg = 'The axis argument to unique is not supported for dtype {dt}' 

322 raise TypeError(msg.format(dt=ar.dtype)) from e 

323 

324 def reshape_uniq(uniq): 

325 n = len(uniq) 

326 uniq = uniq.view(orig_dtype) 

327 uniq = uniq.reshape(n, *orig_shape[1:]) 

328 uniq = np.moveaxis(uniq, 0, axis) 

329 return uniq 

330 

331 output = _unique1d(consolidated, return_index, 

332 return_inverse, return_counts, 

333 equal_nan=equal_nan, inverse_shape=inverse_shape, 

334 axis=axis) 

335 output = (reshape_uniq(output[0]),) + output[1:] 

336 return _unpack_tuple(output) 

337 

338 

339def _unique1d(ar, return_index=False, return_inverse=False, 

340 return_counts=False, *, equal_nan=True, inverse_shape=None, 

341 axis=None): 

342 """ 

343 Find the unique elements of an array, ignoring shape. 

344 """ 

345 ar = np.asanyarray(ar).flatten() 

346 

347 optional_indices = return_index or return_inverse 

348 

349 if optional_indices: 

350 perm = ar.argsort(kind='mergesort' if return_index else 'quicksort') 

351 aux = ar[perm] 

352 else: 

353 ar.sort() 

354 aux = ar 

355 mask = np.empty(aux.shape, dtype=np.bool) 

356 mask[:1] = True 

357 if (equal_nan and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and 

358 np.isnan(aux[-1])): 

359 if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent 

360 aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left') 

361 else: 

362 aux_firstnan = np.searchsorted(aux, aux[-1], side='left') 

363 if aux_firstnan > 0: 

364 mask[1:aux_firstnan] = ( 

365 aux[1:aux_firstnan] != aux[:aux_firstnan - 1]) 

366 mask[aux_firstnan] = True 

367 mask[aux_firstnan + 1:] = False 

368 else: 

369 mask[1:] = aux[1:] != aux[:-1] 

370 

371 ret = (aux[mask],) 

372 if return_index: 

373 ret += (perm[mask],) 

374 if return_inverse: 

375 imask = np.cumsum(mask) - 1 

376 inv_idx = np.empty(mask.shape, dtype=np.intp) 

377 inv_idx[perm] = imask 

378 ret += (inv_idx.reshape(inverse_shape) if axis is None else inv_idx,) 

379 if return_counts: 

380 idx = np.concatenate(np.nonzero(mask) + ([mask.size],)) 

381 ret += (np.diff(idx),) 

382 return ret 

383 

384 

385# Array API set functions 

386 

387class UniqueAllResult(NamedTuple): 

388 values: np.ndarray 

389 indices: np.ndarray 

390 inverse_indices: np.ndarray 

391 counts: np.ndarray 

392 

393 

394class UniqueCountsResult(NamedTuple): 

395 values: np.ndarray 

396 counts: np.ndarray 

397 

398 

399class UniqueInverseResult(NamedTuple): 

400 values: np.ndarray 

401 inverse_indices: np.ndarray 

402 

403 

404def _unique_all_dispatcher(x, /): 

405 return (x,) 

406 

407 

408@array_function_dispatch(_unique_all_dispatcher) 

409def unique_all(x): 

410 """ 

411 Find the unique elements of an array, and counts, inverse, and indices. 

412 

413 This function is an Array API compatible alternative to:: 

414 

415 np.unique(x, return_index=True, return_inverse=True, 

416 return_counts=True, equal_nan=False) 

417 

418 but returns a namedtuple for easier access to each output. 

419 

420 Parameters 

421 ---------- 

422 x : array_like 

423 Input array. It will be flattened if it is not already 1-D. 

424 

425 Returns 

426 ------- 

427 out : namedtuple 

428 The result containing: 

429 

430 * values - The unique elements of an input array. 

431 * indices - The first occurring indices for each unique element. 

432 * inverse_indices - The indices from the set of unique elements 

433 that reconstruct `x`. 

434 * counts - The corresponding counts for each unique element. 

435 

436 See Also 

437 -------- 

438 unique : Find the unique elements of an array. 

439 

440 Examples 

441 -------- 

442 >>> import numpy as np 

443 >>> x = [1, 1, 2] 

444 >>> uniq = np.unique_all(x) 

445 >>> uniq.values 

446 array([1, 2]) 

447 >>> uniq.indices 

448 array([0, 2]) 

449 >>> uniq.inverse_indices 

450 array([0, 0, 1]) 

451 >>> uniq.counts 

452 array([2, 1]) 

453 """ 

454 result = unique( 

455 x, 

456 return_index=True, 

457 return_inverse=True, 

458 return_counts=True, 

459 equal_nan=False 

460 ) 

461 return UniqueAllResult(*result) 

462 

463 

464def _unique_counts_dispatcher(x, /): 

465 return (x,) 

466 

467 

468@array_function_dispatch(_unique_counts_dispatcher) 

469def unique_counts(x): 

470 """ 

471 Find the unique elements and counts of an input array `x`. 

472 

473 This function is an Array API compatible alternative to:: 

474 

475 np.unique(x, return_counts=True, equal_nan=False) 

476 

477 but returns a namedtuple for easier access to each output. 

478 

479 Parameters 

480 ---------- 

481 x : array_like 

482 Input array. It will be flattened if it is not already 1-D. 

483 

484 Returns 

485 ------- 

486 out : namedtuple 

487 The result containing: 

488 

489 * values - The unique elements of an input array. 

490 * counts - The corresponding counts for each unique element. 

491 

492 See Also 

493 -------- 

494 unique : Find the unique elements of an array. 

495 

496 Examples 

497 -------- 

498 >>> import numpy as np 

499 >>> x = [1, 1, 2] 

500 >>> uniq = np.unique_counts(x) 

501 >>> uniq.values 

502 array([1, 2]) 

503 >>> uniq.counts 

504 array([2, 1]) 

505 """ 

506 result = unique( 

507 x, 

508 return_index=False, 

509 return_inverse=False, 

510 return_counts=True, 

511 equal_nan=False 

512 ) 

513 return UniqueCountsResult(*result) 

514 

515 

516def _unique_inverse_dispatcher(x, /): 

517 return (x,) 

518 

519 

520@array_function_dispatch(_unique_inverse_dispatcher) 

521def unique_inverse(x): 

522 """ 

523 Find the unique elements of `x` and indices to reconstruct `x`. 

524 

525 This function is an Array API compatible alternative to:: 

526 

527 np.unique(x, return_inverse=True, equal_nan=False) 

528 

529 but returns a namedtuple for easier access to each output. 

530 

531 Parameters 

532 ---------- 

533 x : array_like 

534 Input array. It will be flattened if it is not already 1-D. 

535 

536 Returns 

537 ------- 

538 out : namedtuple 

539 The result containing: 

540 

541 * values - The unique elements of an input array. 

542 * inverse_indices - The indices from the set of unique elements 

543 that reconstruct `x`. 

544 

545 See Also 

546 -------- 

547 unique : Find the unique elements of an array. 

548 

549 Examples 

550 -------- 

551 >>> import numpy as np 

552 >>> x = [1, 1, 2] 

553 >>> uniq = np.unique_inverse(x) 

554 >>> uniq.values 

555 array([1, 2]) 

556 >>> uniq.inverse_indices 

557 array([0, 0, 1]) 

558 """ 

559 result = unique( 

560 x, 

561 return_index=False, 

562 return_inverse=True, 

563 return_counts=False, 

564 equal_nan=False 

565 ) 

566 return UniqueInverseResult(*result) 

567 

568 

569def _unique_values_dispatcher(x, /): 

570 return (x,) 

571 

572 

573@array_function_dispatch(_unique_values_dispatcher) 

574def unique_values(x): 

575 """ 

576 Returns the unique elements of an input array `x`. 

577 

578 This function is an Array API compatible alternative to:: 

579 

580 np.unique(x, equal_nan=False) 

581 

582 Parameters 

583 ---------- 

584 x : array_like 

585 Input array. It will be flattened if it is not already 1-D. 

586 

587 Returns 

588 ------- 

589 out : ndarray 

590 The unique elements of an input array. 

591 

592 See Also 

593 -------- 

594 unique : Find the unique elements of an array. 

595 

596 Examples 

597 -------- 

598 >>> import numpy as np 

599 >>> np.unique_values([1, 1, 2]) 

600 array([1, 2]) 

601 

602 """ 

603 return unique( 

604 x, 

605 return_index=False, 

606 return_inverse=False, 

607 return_counts=False, 

608 equal_nan=False 

609 ) 

610 

611 

612def _intersect1d_dispatcher( 

613 ar1, ar2, assume_unique=None, return_indices=None): 

614 return (ar1, ar2) 

615 

616 

617@array_function_dispatch(_intersect1d_dispatcher) 

618def intersect1d(ar1, ar2, assume_unique=False, return_indices=False): 

619 """ 

620 Find the intersection of two arrays. 

621 

622 Return the sorted, unique values that are in both of the input arrays. 

623 

624 Parameters 

625 ---------- 

626 ar1, ar2 : array_like 

627 Input arrays. Will be flattened if not already 1D. 

628 assume_unique : bool 

629 If True, the input arrays are both assumed to be unique, which 

630 can speed up the calculation. If True but ``ar1`` or ``ar2`` are not 

631 unique, incorrect results and out-of-bounds indices could result. 

632 Default is False. 

633 return_indices : bool 

634 If True, the indices which correspond to the intersection of the two 

635 arrays are returned. The first instance of a value is used if there are 

636 multiple. Default is False. 

637 

638 Returns 

639 ------- 

640 intersect1d : ndarray 

641 Sorted 1D array of common and unique elements. 

642 comm1 : ndarray 

643 The indices of the first occurrences of the common values in `ar1`. 

644 Only provided if `return_indices` is True. 

645 comm2 : ndarray 

646 The indices of the first occurrences of the common values in `ar2`. 

647 Only provided if `return_indices` is True. 

648 

649 Examples 

650 -------- 

651 >>> import numpy as np 

652 >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1]) 

653 array([1, 3]) 

654 

655 To intersect more than two arrays, use functools.reduce: 

656 

657 >>> from functools import reduce 

658 >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) 

659 array([3]) 

660 

661 To return the indices of the values common to the input arrays 

662 along with the intersected values: 

663 

664 >>> x = np.array([1, 1, 2, 3, 4]) 

665 >>> y = np.array([2, 1, 4, 6]) 

666 >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True) 

667 >>> x_ind, y_ind 

668 (array([0, 2, 4]), array([1, 0, 2])) 

669 >>> xy, x[x_ind], y[y_ind] 

670 (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4])) 

671 

672 """ 

673 ar1 = np.asanyarray(ar1) 

674 ar2 = np.asanyarray(ar2) 

675 

676 if not assume_unique: 

677 if return_indices: 

678 ar1, ind1 = unique(ar1, return_index=True) 

679 ar2, ind2 = unique(ar2, return_index=True) 

680 else: 

681 ar1 = unique(ar1) 

682 ar2 = unique(ar2) 

683 else: 

684 ar1 = ar1.ravel() 

685 ar2 = ar2.ravel() 

686 

687 aux = np.concatenate((ar1, ar2)) 

688 if return_indices: 

689 aux_sort_indices = np.argsort(aux, kind='mergesort') 

690 aux = aux[aux_sort_indices] 

691 else: 

692 aux.sort() 

693 

694 mask = aux[1:] == aux[:-1] 

695 int1d = aux[:-1][mask] 

696 

697 if return_indices: 

698 ar1_indices = aux_sort_indices[:-1][mask] 

699 ar2_indices = aux_sort_indices[1:][mask] - ar1.size 

700 if not assume_unique: 

701 ar1_indices = ind1[ar1_indices] 

702 ar2_indices = ind2[ar2_indices] 

703 

704 return int1d, ar1_indices, ar2_indices 

705 else: 

706 return int1d 

707 

708 

709def _setxor1d_dispatcher(ar1, ar2, assume_unique=None): 

710 return (ar1, ar2) 

711 

712 

713@array_function_dispatch(_setxor1d_dispatcher) 

714def setxor1d(ar1, ar2, assume_unique=False): 

715 """ 

716 Find the set exclusive-or of two arrays. 

717 

718 Return the sorted, unique values that are in only one (not both) of the 

719 input arrays. 

720 

721 Parameters 

722 ---------- 

723 ar1, ar2 : array_like 

724 Input arrays. 

725 assume_unique : bool 

726 If True, the input arrays are both assumed to be unique, which 

727 can speed up the calculation. Default is False. 

728 

729 Returns 

730 ------- 

731 setxor1d : ndarray 

732 Sorted 1D array of unique values that are in only one of the input 

733 arrays. 

734 

735 Examples 

736 -------- 

737 >>> import numpy as np 

738 >>> a = np.array([1, 2, 3, 2, 4]) 

739 >>> b = np.array([2, 3, 5, 7, 5]) 

740 >>> np.setxor1d(a,b) 

741 array([1, 4, 5, 7]) 

742 

743 """ 

744 if not assume_unique: 

745 ar1 = unique(ar1) 

746 ar2 = unique(ar2) 

747 

748 aux = np.concatenate((ar1, ar2), axis=None) 

749 if aux.size == 0: 

750 return aux 

751 

752 aux.sort() 

753 flag = np.concatenate(([True], aux[1:] != aux[:-1], [True])) 

754 return aux[flag[1:] & flag[:-1]] 

755 

756 

757def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None, *, 

758 kind=None): 

759 return (ar1, ar2) 

760 

761 

762@array_function_dispatch(_in1d_dispatcher) 

763def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None): 

764 """ 

765 Test whether each element of a 1-D array is also present in a second array. 

766 

767 .. deprecated:: 2.0 

768 Use :func:`isin` instead of `in1d` for new code. 

769 

770 Returns a boolean array the same length as `ar1` that is True 

771 where an element of `ar1` is in `ar2` and False otherwise. 

772 

773 Parameters 

774 ---------- 

775 ar1 : (M,) array_like 

776 Input array. 

777 ar2 : array_like 

778 The values against which to test each value of `ar1`. 

779 assume_unique : bool, optional 

780 If True, the input arrays are both assumed to be unique, which 

781 can speed up the calculation. Default is False. 

782 invert : bool, optional 

783 If True, the values in the returned array are inverted (that is, 

784 False where an element of `ar1` is in `ar2` and True otherwise). 

785 Default is False. ``np.in1d(a, b, invert=True)`` is equivalent 

786 to (but is faster than) ``np.invert(in1d(a, b))``. 

787 kind : {None, 'sort', 'table'}, optional 

788 The algorithm to use. This will not affect the final result, 

789 but will affect the speed and memory use. The default, None, 

790 will select automatically based on memory considerations. 

791 

792 * If 'sort', will use a mergesort-based approach. This will have 

793 a memory usage of roughly 6 times the sum of the sizes of 

794 `ar1` and `ar2`, not accounting for size of dtypes. 

795 * If 'table', will use a lookup table approach similar 

796 to a counting sort. This is only available for boolean and 

797 integer arrays. This will have a memory usage of the 

798 size of `ar1` plus the max-min value of `ar2`. `assume_unique` 

799 has no effect when the 'table' option is used. 

800 * If None, will automatically choose 'table' if 

801 the required memory allocation is less than or equal to 

802 6 times the sum of the sizes of `ar1` and `ar2`, 

803 otherwise will use 'sort'. This is done to not use 

804 a large amount of memory by default, even though 

805 'table' may be faster in most cases. If 'table' is chosen, 

806 `assume_unique` will have no effect. 

807 

808 Returns 

809 ------- 

810 in1d : (M,) ndarray, bool 

811 The values `ar1[in1d]` are in `ar2`. 

812 

813 See Also 

814 -------- 

815 isin : Version of this function that preserves the 

816 shape of ar1. 

817 

818 Notes 

819 ----- 

820 `in1d` can be considered as an element-wise function version of the 

821 python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly 

822 equivalent to ``np.array([item in b for item in a])``. 

823 However, this idea fails if `ar2` is a set, or similar (non-sequence) 

824 container: As ``ar2`` is converted to an array, in those cases 

825 ``asarray(ar2)`` is an object array rather than the expected array of 

826 contained values. 

827 

828 Using ``kind='table'`` tends to be faster than `kind='sort'` if the 

829 following relationship is true: 

830 ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``, 

831 but may use greater memory. The default value for `kind` will 

832 be automatically selected based only on memory usage, so one may 

833 manually set ``kind='table'`` if memory constraints can be relaxed. 

834 

835 Examples 

836 -------- 

837 >>> import numpy as np 

838 >>> test = np.array([0, 1, 2, 5, 0]) 

839 >>> states = [0, 2] 

840 >>> mask = np.in1d(test, states) 

841 >>> mask 

842 array([ True, False, True, False, True]) 

843 >>> test[mask] 

844 array([0, 2, 0]) 

845 >>> mask = np.in1d(test, states, invert=True) 

846 >>> mask 

847 array([False, True, False, True, False]) 

848 >>> test[mask] 

849 array([1, 5]) 

850 """ 

851 

852 # Deprecated in NumPy 2.0, 2023-08-18 

853 warnings.warn( 

854 "`in1d` is deprecated. Use `np.isin` instead.", 

855 DeprecationWarning, 

856 stacklevel=2 

857 ) 

858 

859 return _in1d(ar1, ar2, assume_unique, invert, kind=kind) 

860 

861 

862def _in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None): 

863 # Ravel both arrays, behavior for the first array could be different 

864 ar1 = np.asarray(ar1).ravel() 

865 ar2 = np.asarray(ar2).ravel() 

866 

867 # Ensure that iteration through object arrays yields size-1 arrays 

868 if ar2.dtype == object: 

869 ar2 = ar2.reshape(-1, 1) 

870 

871 if kind not in {None, 'sort', 'table'}: 

872 raise ValueError( 

873 f"Invalid kind: '{kind}'. Please use None, 'sort' or 'table'.") 

874 

875 # Can use the table method if all arrays are integers or boolean: 

876 is_int_arrays = all(ar.dtype.kind in ("u", "i", "b") for ar in (ar1, ar2)) 

877 use_table_method = is_int_arrays and kind in {None, 'table'} 

878 

879 if use_table_method: 

880 if ar2.size == 0: 

881 if invert: 

882 return np.ones_like(ar1, dtype=bool) 

883 else: 

884 return np.zeros_like(ar1, dtype=bool) 

885 

886 # Convert booleans to uint8 so we can use the fast integer algorithm 

887 if ar1.dtype == bool: 

888 ar1 = ar1.astype(np.uint8) 

889 if ar2.dtype == bool: 

890 ar2 = ar2.astype(np.uint8) 

891 

892 ar2_min = int(np.min(ar2)) 

893 ar2_max = int(np.max(ar2)) 

894 

895 ar2_range = ar2_max - ar2_min 

896 

897 # Constraints on whether we can actually use the table method: 

898 # 1. Assert memory usage is not too large 

899 below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size) 

900 # 2. Check overflows for (ar2 - ar2_min); dtype=ar2.dtype 

901 range_safe_from_overflow = ar2_range <= np.iinfo(ar2.dtype).max 

902 

903 # Optimal performance is for approximately 

904 # log10(size) > (log10(range) - 2.27) / 0.927. 

905 # However, here we set the requirement that by default 

906 # the intermediate array can only be 6x 

907 # the combined memory allocation of the original 

908 # arrays. See discussion on 

909 # https://github.com/numpy/numpy/pull/12065. 

910 

911 if ( 

912 range_safe_from_overflow and 

913 (below_memory_constraint or kind == 'table') 

914 ): 

915 

916 if invert: 

917 outgoing_array = np.ones_like(ar1, dtype=bool) 

918 else: 

919 outgoing_array = np.zeros_like(ar1, dtype=bool) 

920 

921 # Make elements 1 where the integer exists in ar2 

922 if invert: 

923 isin_helper_ar = np.ones(ar2_range + 1, dtype=bool) 

924 isin_helper_ar[ar2 - ar2_min] = 0 

925 else: 

926 isin_helper_ar = np.zeros(ar2_range + 1, dtype=bool) 

927 isin_helper_ar[ar2 - ar2_min] = 1 

928 

929 # Mask out elements we know won't work 

930 basic_mask = (ar1 <= ar2_max) & (ar1 >= ar2_min) 

931 in_range_ar1 = ar1[basic_mask] 

932 if in_range_ar1.size == 0: 

933 # Nothing more to do, since all values are out of range. 

934 return outgoing_array 

935 

936 # Unfortunately, ar2_min can be out of range for `intp` even 

937 # if the calculation result must fit in range (and be positive). 

938 # In that case, use ar2.dtype which must work for all unmasked 

939 # values. 

940 try: 

941 ar2_min = np.array(ar2_min, dtype=np.intp) 

942 dtype = np.intp 

943 except OverflowError: 

944 dtype = ar2.dtype 

945 

946 out = np.empty_like(in_range_ar1, dtype=np.intp) 

947 outgoing_array[basic_mask] = isin_helper_ar[ 

948 np.subtract(in_range_ar1, ar2_min, dtype=dtype, 

949 out=out, casting="unsafe")] 

950 

951 return outgoing_array 

952 elif kind == 'table': # not range_safe_from_overflow 

953 raise RuntimeError( 

954 "You have specified kind='table', " 

955 "but the range of values in `ar2` or `ar1` exceed the " 

956 "maximum integer of the datatype. " 

957 "Please set `kind` to None or 'sort'." 

958 ) 

959 elif kind == 'table': 

960 raise ValueError( 

961 "The 'table' method is only " 

962 "supported for boolean or integer arrays. " 

963 "Please select 'sort' or None for kind." 

964 ) 

965 

966 

967 # Check if one of the arrays may contain arbitrary objects 

968 contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject 

969 

970 # This code is run when 

971 # a) the first condition is true, making the code significantly faster 

972 # b) the second condition is true (i.e. `ar1` or `ar2` may contain 

973 # arbitrary objects), since then sorting is not guaranteed to work 

974 if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object: 

975 if invert: 

976 mask = np.ones(len(ar1), dtype=bool) 

977 for a in ar2: 

978 mask &= (ar1 != a) 

979 else: 

980 mask = np.zeros(len(ar1), dtype=bool) 

981 for a in ar2: 

982 mask |= (ar1 == a) 

983 return mask 

984 

985 # Otherwise use sorting 

986 if not assume_unique: 

987 ar1, rev_idx = np.unique(ar1, return_inverse=True) 

988 ar2 = np.unique(ar2) 

989 

990 ar = np.concatenate((ar1, ar2)) 

991 # We need this to be a stable sort, so always use 'mergesort' 

992 # here. The values from the first array should always come before 

993 # the values from the second array. 

994 order = ar.argsort(kind='mergesort') 

995 sar = ar[order] 

996 if invert: 

997 bool_ar = (sar[1:] != sar[:-1]) 

998 else: 

999 bool_ar = (sar[1:] == sar[:-1]) 

1000 flag = np.concatenate((bool_ar, [invert])) 

1001 ret = np.empty(ar.shape, dtype=bool) 

1002 ret[order] = flag 

1003 

1004 if assume_unique: 

1005 return ret[:len(ar1)] 

1006 else: 

1007 return ret[rev_idx] 

1008 

1009 

1010def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None, 

1011 *, kind=None): 

1012 return (element, test_elements) 

1013 

1014 

1015@array_function_dispatch(_isin_dispatcher) 

1016def isin(element, test_elements, assume_unique=False, invert=False, *, 

1017 kind=None): 

1018 """ 

1019 Calculates ``element in test_elements``, broadcasting over `element` only. 

1020 Returns a boolean array of the same shape as `element` that is True 

1021 where an element of `element` is in `test_elements` and False otherwise. 

1022 

1023 Parameters 

1024 ---------- 

1025 element : array_like 

1026 Input array. 

1027 test_elements : array_like 

1028 The values against which to test each value of `element`. 

1029 This argument is flattened if it is an array or array_like. 

1030 See notes for behavior with non-array-like parameters. 

1031 assume_unique : bool, optional 

1032 If True, the input arrays are both assumed to be unique, which 

1033 can speed up the calculation. Default is False. 

1034 invert : bool, optional 

1035 If True, the values in the returned array are inverted, as if 

1036 calculating `element not in test_elements`. Default is False. 

1037 ``np.isin(a, b, invert=True)`` is equivalent to (but faster 

1038 than) ``np.invert(np.isin(a, b))``. 

1039 kind : {None, 'sort', 'table'}, optional 

1040 The algorithm to use. This will not affect the final result, 

1041 but will affect the speed and memory use. The default, None, 

1042 will select automatically based on memory considerations. 

1043 

1044 * If 'sort', will use a mergesort-based approach. This will have 

1045 a memory usage of roughly 6 times the sum of the sizes of 

1046 `element` and `test_elements`, not accounting for size of dtypes. 

1047 * If 'table', will use a lookup table approach similar 

1048 to a counting sort. This is only available for boolean and 

1049 integer arrays. This will have a memory usage of the 

1050 size of `element` plus the max-min value of `test_elements`. 

1051 `assume_unique` has no effect when the 'table' option is used. 

1052 * If None, will automatically choose 'table' if 

1053 the required memory allocation is less than or equal to 

1054 6 times the sum of the sizes of `element` and `test_elements`, 

1055 otherwise will use 'sort'. This is done to not use 

1056 a large amount of memory by default, even though 

1057 'table' may be faster in most cases. If 'table' is chosen, 

1058 `assume_unique` will have no effect. 

1059 

1060 

1061 Returns 

1062 ------- 

1063 isin : ndarray, bool 

1064 Has the same shape as `element`. The values `element[isin]` 

1065 are in `test_elements`. 

1066 

1067 Notes 

1068 ----- 

1069 `isin` is an element-wise function version of the python keyword `in`. 

1070 ``isin(a, b)`` is roughly equivalent to 

1071 ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences. 

1072 

1073 `element` and `test_elements` are converted to arrays if they are not 

1074 already. If `test_elements` is a set (or other non-sequence collection) 

1075 it will be converted to an object array with one element, rather than an 

1076 array of the values contained in `test_elements`. This is a consequence 

1077 of the `array` constructor's way of handling non-sequence collections. 

1078 Converting the set to a list usually gives the desired behavior. 

1079 

1080 Using ``kind='table'`` tends to be faster than `kind='sort'` if the 

1081 following relationship is true: 

1082 ``log10(len(test_elements)) > 

1083 (log10(max(test_elements)-min(test_elements)) - 2.27) / 0.927``, 

1084 but may use greater memory. The default value for `kind` will 

1085 be automatically selected based only on memory usage, so one may 

1086 manually set ``kind='table'`` if memory constraints can be relaxed. 

1087 

1088 Examples 

1089 -------- 

1090 >>> import numpy as np 

1091 >>> element = 2*np.arange(4).reshape((2, 2)) 

1092 >>> element 

1093 array([[0, 2], 

1094 [4, 6]]) 

1095 >>> test_elements = [1, 2, 4, 8] 

1096 >>> mask = np.isin(element, test_elements) 

1097 >>> mask 

1098 array([[False, True], 

1099 [ True, False]]) 

1100 >>> element[mask] 

1101 array([2, 4]) 

1102 

1103 The indices of the matched values can be obtained with `nonzero`: 

1104 

1105 >>> np.nonzero(mask) 

1106 (array([0, 1]), array([1, 0])) 

1107 

1108 The test can also be inverted: 

1109 

1110 >>> mask = np.isin(element, test_elements, invert=True) 

1111 >>> mask 

1112 array([[ True, False], 

1113 [False, True]]) 

1114 >>> element[mask] 

1115 array([0, 6]) 

1116 

1117 Because of how `array` handles sets, the following does not 

1118 work as expected: 

1119 

1120 >>> test_set = {1, 2, 4, 8} 

1121 >>> np.isin(element, test_set) 

1122 array([[False, False], 

1123 [False, False]]) 

1124 

1125 Casting the set to a list gives the expected result: 

1126 

1127 >>> np.isin(element, list(test_set)) 

1128 array([[False, True], 

1129 [ True, False]]) 

1130 """ 

1131 element = np.asarray(element) 

1132 return _in1d(element, test_elements, assume_unique=assume_unique, 

1133 invert=invert, kind=kind).reshape(element.shape) 

1134 

1135 

1136def _union1d_dispatcher(ar1, ar2): 

1137 return (ar1, ar2) 

1138 

1139 

1140@array_function_dispatch(_union1d_dispatcher) 

1141def union1d(ar1, ar2): 

1142 """ 

1143 Find the union of two arrays. 

1144 

1145 Return the unique, sorted array of values that are in either of the two 

1146 input arrays. 

1147 

1148 Parameters 

1149 ---------- 

1150 ar1, ar2 : array_like 

1151 Input arrays. They are flattened if they are not already 1D. 

1152 

1153 Returns 

1154 ------- 

1155 union1d : ndarray 

1156 Unique, sorted union of the input arrays. 

1157 

1158 Examples 

1159 -------- 

1160 >>> import numpy as np 

1161 >>> np.union1d([-1, 0, 1], [-2, 0, 2]) 

1162 array([-2, -1, 0, 1, 2]) 

1163 

1164 To find the union of more than two arrays, use functools.reduce: 

1165 

1166 >>> from functools import reduce 

1167 >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) 

1168 array([1, 2, 3, 4, 6]) 

1169 """ 

1170 return unique(np.concatenate((ar1, ar2), axis=None)) 

1171 

1172 

1173def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None): 

1174 return (ar1, ar2) 

1175 

1176 

1177@array_function_dispatch(_setdiff1d_dispatcher) 

1178def setdiff1d(ar1, ar2, assume_unique=False): 

1179 """ 

1180 Find the set difference of two arrays. 

1181 

1182 Return the unique values in `ar1` that are not in `ar2`. 

1183 

1184 Parameters 

1185 ---------- 

1186 ar1 : array_like 

1187 Input array. 

1188 ar2 : array_like 

1189 Input comparison array. 

1190 assume_unique : bool 

1191 If True, the input arrays are both assumed to be unique, which 

1192 can speed up the calculation. Default is False. 

1193 

1194 Returns 

1195 ------- 

1196 setdiff1d : ndarray 

1197 1D array of values in `ar1` that are not in `ar2`. The result 

1198 is sorted when `assume_unique=False`, but otherwise only sorted 

1199 if the input is sorted. 

1200 

1201 Examples 

1202 -------- 

1203 >>> import numpy as np 

1204 >>> a = np.array([1, 2, 3, 2, 4, 1]) 

1205 >>> b = np.array([3, 4, 5, 6]) 

1206 >>> np.setdiff1d(a, b) 

1207 array([1, 2]) 

1208 

1209 """ 

1210 if assume_unique: 

1211 ar1 = np.asarray(ar1).ravel() 

1212 else: 

1213 ar1 = unique(ar1) 

1214 ar2 = unique(ar2) 

1215 return ar1[_in1d(ar1, ar2, assume_unique=True, invert=True)]