Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/numpy/lib/arraysetops.py: 13%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

238 statements  

1""" 

2Set operations for arrays based on sorting. 

3 

4Notes 

5----- 

6 

7For floating point arrays, inaccurate results may appear due to usual round-off 

8and floating point comparison issues. 

9 

10Speed could be gained in some operations by an implementation of 

11`numpy.sort`, that can provide directly the permutation vectors, thus avoiding 

12calls to `numpy.argsort`. 

13 

14Original author: Robert Cimrman 

15 

16""" 

17import functools 

18 

19import numpy as np 

20from numpy.core import overrides 

21 

22 

23array_function_dispatch = functools.partial( 

24 overrides.array_function_dispatch, module='numpy') 

25 

26 

27__all__ = [ 

28 'ediff1d', 'intersect1d', 'setxor1d', 'union1d', 'setdiff1d', 'unique', 

29 'in1d', 'isin' 

30 ] 

31 

32 

33def _ediff1d_dispatcher(ary, to_end=None, to_begin=None): 

34 return (ary, to_end, to_begin) 

35 

36 

37@array_function_dispatch(_ediff1d_dispatcher) 

38def ediff1d(ary, to_end=None, to_begin=None): 

39 """ 

40 The differences between consecutive elements of an array. 

41 

42 Parameters 

43 ---------- 

44 ary : array_like 

45 If necessary, will be flattened before the differences are taken. 

46 to_end : array_like, optional 

47 Number(s) to append at the end of the returned differences. 

48 to_begin : array_like, optional 

49 Number(s) to prepend at the beginning of the returned differences. 

50 

51 Returns 

52 ------- 

53 ediff1d : ndarray 

54 The differences. Loosely, this is ``ary.flat[1:] - ary.flat[:-1]``. 

55 

56 See Also 

57 -------- 

58 diff, gradient 

59 

60 Notes 

61 ----- 

62 When applied to masked arrays, this function drops the mask information 

63 if the `to_begin` and/or `to_end` parameters are used. 

64 

65 Examples 

66 -------- 

67 >>> x = np.array([1, 2, 4, 7, 0]) 

68 >>> np.ediff1d(x) 

69 array([ 1, 2, 3, -7]) 

70 

71 >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99])) 

72 array([-99, 1, 2, ..., -7, 88, 99]) 

73 

74 The returned array is always 1D. 

75 

76 >>> y = [[1, 2, 4], [1, 6, 24]] 

77 >>> np.ediff1d(y) 

78 array([ 1, 2, -3, 5, 18]) 

79 

80 """ 

81 # force a 1d array 

82 ary = np.asanyarray(ary).ravel() 

83 

84 # enforce that the dtype of `ary` is used for the output 

85 dtype_req = ary.dtype 

86 

87 # fast track default case 

88 if to_begin is None and to_end is None: 

89 return ary[1:] - ary[:-1] 

90 

91 if to_begin is None: 

92 l_begin = 0 

93 else: 

94 to_begin = np.asanyarray(to_begin) 

95 if not np.can_cast(to_begin, dtype_req, casting="same_kind"): 

96 raise TypeError("dtype of `to_begin` must be compatible " 

97 "with input `ary` under the `same_kind` rule.") 

98 

99 to_begin = to_begin.ravel() 

100 l_begin = len(to_begin) 

101 

102 if to_end is None: 

103 l_end = 0 

104 else: 

105 to_end = np.asanyarray(to_end) 

106 if not np.can_cast(to_end, dtype_req, casting="same_kind"): 

107 raise TypeError("dtype of `to_end` must be compatible " 

108 "with input `ary` under the `same_kind` rule.") 

109 

110 to_end = to_end.ravel() 

111 l_end = len(to_end) 

112 

113 # do the calculation in place and copy to_begin and to_end 

114 l_diff = max(len(ary) - 1, 0) 

115 result = np.empty(l_diff + l_begin + l_end, dtype=ary.dtype) 

116 result = ary.__array_wrap__(result) 

117 if l_begin > 0: 

118 result[:l_begin] = to_begin 

119 if l_end > 0: 

120 result[l_begin + l_diff:] = to_end 

121 np.subtract(ary[1:], ary[:-1], result[l_begin:l_begin + l_diff]) 

122 return result 

123 

124 

125def _unpack_tuple(x): 

126 """ Unpacks one-element tuples for use as return values """ 

127 if len(x) == 1: 

128 return x[0] 

129 else: 

130 return x 

131 

132 

133def _unique_dispatcher(ar, return_index=None, return_inverse=None, 

134 return_counts=None, axis=None, *, equal_nan=None): 

135 return (ar,) 

136 

137 

138@array_function_dispatch(_unique_dispatcher) 

139def unique(ar, return_index=False, return_inverse=False, 

140 return_counts=False, axis=None, *, equal_nan=True): 

141 """ 

142 Find the unique elements of an array. 

143 

144 Returns the sorted unique elements of an array. There are three optional 

145 outputs in addition to the unique elements: 

146 

147 * the indices of the input array that give the unique values 

148 * the indices of the unique array that reconstruct the input array 

149 * the number of times each unique value comes up in the input array 

150 

151 Parameters 

152 ---------- 

153 ar : array_like 

154 Input array. Unless `axis` is specified, this will be flattened if it 

155 is not already 1-D. 

156 return_index : bool, optional 

157 If True, also return the indices of `ar` (along the specified axis, 

158 if provided, or in the flattened array) that result in the unique array. 

159 return_inverse : bool, optional 

160 If True, also return the indices of the unique array (for the specified 

161 axis, if provided) that can be used to reconstruct `ar`. 

162 return_counts : bool, optional 

163 If True, also return the number of times each unique item appears 

164 in `ar`. 

165 axis : int or None, optional 

166 The axis to operate on. If None, `ar` will be flattened. If an integer, 

167 the subarrays indexed by the given axis will be flattened and treated 

168 as the elements of a 1-D array with the dimension of the given axis, 

169 see the notes for more details. Object arrays or structured arrays 

170 that contain objects are not supported if the `axis` kwarg is used. The 

171 default is None. 

172 

173 .. versionadded:: 1.13.0 

174 

175 equal_nan : bool, optional 

176 If True, collapses multiple NaN values in the return array into one. 

177 

178 .. versionadded:: 1.24 

179 

180 Returns 

181 ------- 

182 unique : ndarray 

183 The sorted unique values. 

184 unique_indices : ndarray, optional 

185 The indices of the first occurrences of the unique values in the 

186 original array. Only provided if `return_index` is True. 

187 unique_inverse : ndarray, optional 

188 The indices to reconstruct the original array from the 

189 unique array. Only provided if `return_inverse` is True. 

190 unique_counts : ndarray, optional 

191 The number of times each of the unique values comes up in the 

192 original array. Only provided if `return_counts` is True. 

193 

194 .. versionadded:: 1.9.0 

195 

196 See Also 

197 -------- 

198 numpy.lib.arraysetops : Module with a number of other functions for 

199 performing set operations on arrays. 

200 repeat : Repeat elements of an array. 

201 

202 Notes 

203 ----- 

204 When an axis is specified the subarrays indexed by the axis are sorted. 

205 This is done by making the specified axis the first dimension of the array 

206 (move the axis to the first dimension to keep the order of the other axes) 

207 and then flattening the subarrays in C order. The flattened subarrays are 

208 then viewed as a structured type with each element given a label, with the 

209 effect that we end up with a 1-D array of structured types that can be 

210 treated in the same way as any other 1-D array. The result is that the 

211 flattened subarrays are sorted in lexicographic order starting with the 

212 first element. 

213 

214 .. versionchanged: NumPy 1.21 

215 If nan values are in the input array, a single nan is put 

216 to the end of the sorted unique values. 

217 

218 Also for complex arrays all NaN values are considered equivalent 

219 (no matter whether the NaN is in the real or imaginary part). 

220 As the representant for the returned array the smallest one in the 

221 lexicographical order is chosen - see np.sort for how the lexicographical 

222 order is defined for complex arrays. 

223 

224 Examples 

225 -------- 

226 >>> np.unique([1, 1, 2, 2, 3, 3]) 

227 array([1, 2, 3]) 

228 >>> a = np.array([[1, 1], [2, 3]]) 

229 >>> np.unique(a) 

230 array([1, 2, 3]) 

231 

232 Return the unique rows of a 2D array 

233 

234 >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]]) 

235 >>> np.unique(a, axis=0) 

236 array([[1, 0, 0], [2, 3, 4]]) 

237 

238 Return the indices of the original array that give the unique values: 

239 

240 >>> a = np.array(['a', 'b', 'b', 'c', 'a']) 

241 >>> u, indices = np.unique(a, return_index=True) 

242 >>> u 

243 array(['a', 'b', 'c'], dtype='<U1') 

244 >>> indices 

245 array([0, 1, 3]) 

246 >>> a[indices] 

247 array(['a', 'b', 'c'], dtype='<U1') 

248 

249 Reconstruct the input array from the unique values and inverse: 

250 

251 >>> a = np.array([1, 2, 6, 4, 2, 3, 2]) 

252 >>> u, indices = np.unique(a, return_inverse=True) 

253 >>> u 

254 array([1, 2, 3, 4, 6]) 

255 >>> indices 

256 array([0, 1, 4, 3, 1, 2, 1]) 

257 >>> u[indices] 

258 array([1, 2, 6, 4, 2, 3, 2]) 

259 

260 Reconstruct the input values from the unique values and counts: 

261 

262 >>> a = np.array([1, 2, 6, 4, 2, 3, 2]) 

263 >>> values, counts = np.unique(a, return_counts=True) 

264 >>> values 

265 array([1, 2, 3, 4, 6]) 

266 >>> counts 

267 array([1, 3, 1, 1, 1]) 

268 >>> np.repeat(values, counts) 

269 array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved 

270 

271 """ 

272 ar = np.asanyarray(ar) 

273 if axis is None: 

274 ret = _unique1d(ar, return_index, return_inverse, return_counts, 

275 equal_nan=equal_nan) 

276 return _unpack_tuple(ret) 

277 

278 # axis was specified and not None 

279 try: 

280 ar = np.moveaxis(ar, axis, 0) 

281 except np.AxisError: 

282 # this removes the "axis1" or "axis2" prefix from the error message 

283 raise np.AxisError(axis, ar.ndim) from None 

284 

285 # Must reshape to a contiguous 2D array for this to work... 

286 orig_shape, orig_dtype = ar.shape, ar.dtype 

287 ar = ar.reshape(orig_shape[0], np.prod(orig_shape[1:], dtype=np.intp)) 

288 ar = np.ascontiguousarray(ar) 

289 dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])] 

290 

291 # At this point, `ar` has shape `(n, m)`, and `dtype` is a structured 

292 # data type with `m` fields where each field has the data type of `ar`. 

293 # In the following, we create the array `consolidated`, which has 

294 # shape `(n,)` with data type `dtype`. 

295 try: 

296 if ar.shape[1] > 0: 

297 consolidated = ar.view(dtype) 

298 else: 

299 # If ar.shape[1] == 0, then dtype will be `np.dtype([])`, which is 

300 # a data type with itemsize 0, and the call `ar.view(dtype)` will 

301 # fail. Instead, we'll use `np.empty` to explicitly create the 

302 # array with shape `(len(ar),)`. Because `dtype` in this case has 

303 # itemsize 0, the total size of the result is still 0 bytes. 

304 consolidated = np.empty(len(ar), dtype=dtype) 

305 except TypeError as e: 

306 # There's no good way to do this for object arrays, etc... 

307 msg = 'The axis argument to unique is not supported for dtype {dt}' 

308 raise TypeError(msg.format(dt=ar.dtype)) from e 

309 

310 def reshape_uniq(uniq): 

311 n = len(uniq) 

312 uniq = uniq.view(orig_dtype) 

313 uniq = uniq.reshape(n, *orig_shape[1:]) 

314 uniq = np.moveaxis(uniq, 0, axis) 

315 return uniq 

316 

317 output = _unique1d(consolidated, return_index, 

318 return_inverse, return_counts, equal_nan=equal_nan) 

319 output = (reshape_uniq(output[0]),) + output[1:] 

320 return _unpack_tuple(output) 

321 

322 

323def _unique1d(ar, return_index=False, return_inverse=False, 

324 return_counts=False, *, equal_nan=True): 

325 """ 

326 Find the unique elements of an array, ignoring shape. 

327 """ 

328 ar = np.asanyarray(ar).flatten() 

329 

330 optional_indices = return_index or return_inverse 

331 

332 if optional_indices: 

333 perm = ar.argsort(kind='mergesort' if return_index else 'quicksort') 

334 aux = ar[perm] 

335 else: 

336 ar.sort() 

337 aux = ar 

338 mask = np.empty(aux.shape, dtype=np.bool_) 

339 mask[:1] = True 

340 if (equal_nan and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and 

341 np.isnan(aux[-1])): 

342 if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent 

343 aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left') 

344 else: 

345 aux_firstnan = np.searchsorted(aux, aux[-1], side='left') 

346 if aux_firstnan > 0: 

347 mask[1:aux_firstnan] = ( 

348 aux[1:aux_firstnan] != aux[:aux_firstnan - 1]) 

349 mask[aux_firstnan] = True 

350 mask[aux_firstnan + 1:] = False 

351 else: 

352 mask[1:] = aux[1:] != aux[:-1] 

353 

354 ret = (aux[mask],) 

355 if return_index: 

356 ret += (perm[mask],) 

357 if return_inverse: 

358 imask = np.cumsum(mask) - 1 

359 inv_idx = np.empty(mask.shape, dtype=np.intp) 

360 inv_idx[perm] = imask 

361 ret += (inv_idx,) 

362 if return_counts: 

363 idx = np.concatenate(np.nonzero(mask) + ([mask.size],)) 

364 ret += (np.diff(idx),) 

365 return ret 

366 

367 

368def _intersect1d_dispatcher( 

369 ar1, ar2, assume_unique=None, return_indices=None): 

370 return (ar1, ar2) 

371 

372 

373@array_function_dispatch(_intersect1d_dispatcher) 

374def intersect1d(ar1, ar2, assume_unique=False, return_indices=False): 

375 """ 

376 Find the intersection of two arrays. 

377 

378 Return the sorted, unique values that are in both of the input arrays. 

379 

380 Parameters 

381 ---------- 

382 ar1, ar2 : array_like 

383 Input arrays. Will be flattened if not already 1D. 

384 assume_unique : bool 

385 If True, the input arrays are both assumed to be unique, which 

386 can speed up the calculation. If True but ``ar1`` or ``ar2`` are not 

387 unique, incorrect results and out-of-bounds indices could result. 

388 Default is False. 

389 return_indices : bool 

390 If True, the indices which correspond to the intersection of the two 

391 arrays are returned. The first instance of a value is used if there are 

392 multiple. Default is False. 

393 

394 .. versionadded:: 1.15.0 

395 

396 Returns 

397 ------- 

398 intersect1d : ndarray 

399 Sorted 1D array of common and unique elements. 

400 comm1 : ndarray 

401 The indices of the first occurrences of the common values in `ar1`. 

402 Only provided if `return_indices` is True. 

403 comm2 : ndarray 

404 The indices of the first occurrences of the common values in `ar2`. 

405 Only provided if `return_indices` is True. 

406 

407 

408 See Also 

409 -------- 

410 numpy.lib.arraysetops : Module with a number of other functions for 

411 performing set operations on arrays. 

412 

413 Examples 

414 -------- 

415 >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1]) 

416 array([1, 3]) 

417 

418 To intersect more than two arrays, use functools.reduce: 

419 

420 >>> from functools import reduce 

421 >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) 

422 array([3]) 

423 

424 To return the indices of the values common to the input arrays 

425 along with the intersected values: 

426 

427 >>> x = np.array([1, 1, 2, 3, 4]) 

428 >>> y = np.array([2, 1, 4, 6]) 

429 >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True) 

430 >>> x_ind, y_ind 

431 (array([0, 2, 4]), array([1, 0, 2])) 

432 >>> xy, x[x_ind], y[y_ind] 

433 (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4])) 

434 

435 """ 

436 ar1 = np.asanyarray(ar1) 

437 ar2 = np.asanyarray(ar2) 

438 

439 if not assume_unique: 

440 if return_indices: 

441 ar1, ind1 = unique(ar1, return_index=True) 

442 ar2, ind2 = unique(ar2, return_index=True) 

443 else: 

444 ar1 = unique(ar1) 

445 ar2 = unique(ar2) 

446 else: 

447 ar1 = ar1.ravel() 

448 ar2 = ar2.ravel() 

449 

450 aux = np.concatenate((ar1, ar2)) 

451 if return_indices: 

452 aux_sort_indices = np.argsort(aux, kind='mergesort') 

453 aux = aux[aux_sort_indices] 

454 else: 

455 aux.sort() 

456 

457 mask = aux[1:] == aux[:-1] 

458 int1d = aux[:-1][mask] 

459 

460 if return_indices: 

461 ar1_indices = aux_sort_indices[:-1][mask] 

462 ar2_indices = aux_sort_indices[1:][mask] - ar1.size 

463 if not assume_unique: 

464 ar1_indices = ind1[ar1_indices] 

465 ar2_indices = ind2[ar2_indices] 

466 

467 return int1d, ar1_indices, ar2_indices 

468 else: 

469 return int1d 

470 

471 

472def _setxor1d_dispatcher(ar1, ar2, assume_unique=None): 

473 return (ar1, ar2) 

474 

475 

476@array_function_dispatch(_setxor1d_dispatcher) 

477def setxor1d(ar1, ar2, assume_unique=False): 

478 """ 

479 Find the set exclusive-or of two arrays. 

480 

481 Return the sorted, unique values that are in only one (not both) of the 

482 input arrays. 

483 

484 Parameters 

485 ---------- 

486 ar1, ar2 : array_like 

487 Input arrays. 

488 assume_unique : bool 

489 If True, the input arrays are both assumed to be unique, which 

490 can speed up the calculation. Default is False. 

491 

492 Returns 

493 ------- 

494 setxor1d : ndarray 

495 Sorted 1D array of unique values that are in only one of the input 

496 arrays. 

497 

498 Examples 

499 -------- 

500 >>> a = np.array([1, 2, 3, 2, 4]) 

501 >>> b = np.array([2, 3, 5, 7, 5]) 

502 >>> np.setxor1d(a,b) 

503 array([1, 4, 5, 7]) 

504 

505 """ 

506 if not assume_unique: 

507 ar1 = unique(ar1) 

508 ar2 = unique(ar2) 

509 

510 aux = np.concatenate((ar1, ar2)) 

511 if aux.size == 0: 

512 return aux 

513 

514 aux.sort() 

515 flag = np.concatenate(([True], aux[1:] != aux[:-1], [True])) 

516 return aux[flag[1:] & flag[:-1]] 

517 

518 

519def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None, *, 

520 kind=None): 

521 return (ar1, ar2) 

522 

523 

524@array_function_dispatch(_in1d_dispatcher) 

525def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None): 

526 """ 

527 Test whether each element of a 1-D array is also present in a second array. 

528 

529 Returns a boolean array the same length as `ar1` that is True 

530 where an element of `ar1` is in `ar2` and False otherwise. 

531 

532 We recommend using :func:`isin` instead of `in1d` for new code. 

533 

534 Parameters 

535 ---------- 

536 ar1 : (M,) array_like 

537 Input array. 

538 ar2 : array_like 

539 The values against which to test each value of `ar1`. 

540 assume_unique : bool, optional 

541 If True, the input arrays are both assumed to be unique, which 

542 can speed up the calculation. Default is False. 

543 invert : bool, optional 

544 If True, the values in the returned array are inverted (that is, 

545 False where an element of `ar1` is in `ar2` and True otherwise). 

546 Default is False. ``np.in1d(a, b, invert=True)`` is equivalent 

547 to (but is faster than) ``np.invert(in1d(a, b))``. 

548 kind : {None, 'sort', 'table'}, optional 

549 The algorithm to use. This will not affect the final result, 

550 but will affect the speed and memory use. The default, None, 

551 will select automatically based on memory considerations. 

552 

553 * If 'sort', will use a mergesort-based approach. This will have 

554 a memory usage of roughly 6 times the sum of the sizes of 

555 `ar1` and `ar2`, not accounting for size of dtypes. 

556 * If 'table', will use a lookup table approach similar 

557 to a counting sort. This is only available for boolean and 

558 integer arrays. This will have a memory usage of the 

559 size of `ar1` plus the max-min value of `ar2`. `assume_unique` 

560 has no effect when the 'table' option is used. 

561 * If None, will automatically choose 'table' if 

562 the required memory allocation is less than or equal to 

563 6 times the sum of the sizes of `ar1` and `ar2`, 

564 otherwise will use 'sort'. This is done to not use 

565 a large amount of memory by default, even though 

566 'table' may be faster in most cases. If 'table' is chosen, 

567 `assume_unique` will have no effect. 

568 

569 .. versionadded:: 1.8.0 

570 

571 Returns 

572 ------- 

573 in1d : (M,) ndarray, bool 

574 The values `ar1[in1d]` are in `ar2`. 

575 

576 See Also 

577 -------- 

578 isin : Version of this function that preserves the 

579 shape of ar1. 

580 numpy.lib.arraysetops : Module with a number of other functions for 

581 performing set operations on arrays. 

582 

583 Notes 

584 ----- 

585 `in1d` can be considered as an element-wise function version of the 

586 python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly 

587 equivalent to ``np.array([item in b for item in a])``. 

588 However, this idea fails if `ar2` is a set, or similar (non-sequence) 

589 container: As ``ar2`` is converted to an array, in those cases 

590 ``asarray(ar2)`` is an object array rather than the expected array of 

591 contained values. 

592 

593 Using ``kind='table'`` tends to be faster than `kind='sort'` if the 

594 following relationship is true: 

595 ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``, 

596 but may use greater memory. The default value for `kind` will 

597 be automatically selected based only on memory usage, so one may 

598 manually set ``kind='table'`` if memory constraints can be relaxed. 

599 

600 .. versionadded:: 1.4.0 

601 

602 Examples 

603 -------- 

604 >>> test = np.array([0, 1, 2, 5, 0]) 

605 >>> states = [0, 2] 

606 >>> mask = np.in1d(test, states) 

607 >>> mask 

608 array([ True, False, True, False, True]) 

609 >>> test[mask] 

610 array([0, 2, 0]) 

611 >>> mask = np.in1d(test, states, invert=True) 

612 >>> mask 

613 array([False, True, False, True, False]) 

614 >>> test[mask] 

615 array([1, 5]) 

616 """ 

617 # Ravel both arrays, behavior for the first array could be different 

618 ar1 = np.asarray(ar1).ravel() 

619 ar2 = np.asarray(ar2).ravel() 

620 

621 # Ensure that iteration through object arrays yields size-1 arrays 

622 if ar2.dtype == object: 

623 ar2 = ar2.reshape(-1, 1) 

624 

625 if kind not in {None, 'sort', 'table'}: 

626 raise ValueError( 

627 f"Invalid kind: '{kind}'. Please use None, 'sort' or 'table'.") 

628 

629 # Can use the table method if all arrays are integers or boolean: 

630 is_int_arrays = all(ar.dtype.kind in ("u", "i", "b") for ar in (ar1, ar2)) 

631 use_table_method = is_int_arrays and kind in {None, 'table'} 

632 

633 if use_table_method: 

634 if ar2.size == 0: 

635 if invert: 

636 return np.ones_like(ar1, dtype=bool) 

637 else: 

638 return np.zeros_like(ar1, dtype=bool) 

639 

640 # Convert booleans to uint8 so we can use the fast integer algorithm 

641 if ar1.dtype == bool: 

642 ar1 = ar1.astype(np.uint8) 

643 if ar2.dtype == bool: 

644 ar2 = ar2.astype(np.uint8) 

645 

646 ar2_min = np.min(ar2) 

647 ar2_max = np.max(ar2) 

648 

649 ar2_range = int(ar2_max) - int(ar2_min) 

650 

651 # Constraints on whether we can actually use the table method: 

652 # 1. Assert memory usage is not too large 

653 below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size) 

654 # 2. Check overflows for (ar2 - ar2_min); dtype=ar2.dtype 

655 range_safe_from_overflow = ar2_range <= np.iinfo(ar2.dtype).max 

656 # 3. Check overflows for (ar1 - ar2_min); dtype=ar1.dtype 

657 if ar1.size > 0: 

658 ar1_min = np.min(ar1) 

659 ar1_max = np.max(ar1) 

660 

661 # After masking, the range of ar1 is guaranteed to be 

662 # within the range of ar2: 

663 ar1_upper = min(int(ar1_max), int(ar2_max)) 

664 ar1_lower = max(int(ar1_min), int(ar2_min)) 

665 

666 range_safe_from_overflow &= all(( 

667 ar1_upper - int(ar2_min) <= np.iinfo(ar1.dtype).max, 

668 ar1_lower - int(ar2_min) >= np.iinfo(ar1.dtype).min 

669 )) 

670 

671 # Optimal performance is for approximately 

672 # log10(size) > (log10(range) - 2.27) / 0.927. 

673 # However, here we set the requirement that by default 

674 # the intermediate array can only be 6x 

675 # the combined memory allocation of the original 

676 # arrays. See discussion on  

677 # https://github.com/numpy/numpy/pull/12065. 

678 

679 if ( 

680 range_safe_from_overflow and 

681 (below_memory_constraint or kind == 'table') 

682 ): 

683 

684 if invert: 

685 outgoing_array = np.ones_like(ar1, dtype=bool) 

686 else: 

687 outgoing_array = np.zeros_like(ar1, dtype=bool) 

688 

689 # Make elements 1 where the integer exists in ar2 

690 if invert: 

691 isin_helper_ar = np.ones(ar2_range + 1, dtype=bool) 

692 isin_helper_ar[ar2 - ar2_min] = 0 

693 else: 

694 isin_helper_ar = np.zeros(ar2_range + 1, dtype=bool) 

695 isin_helper_ar[ar2 - ar2_min] = 1 

696 

697 # Mask out elements we know won't work 

698 basic_mask = (ar1 <= ar2_max) & (ar1 >= ar2_min) 

699 outgoing_array[basic_mask] = isin_helper_ar[ar1[basic_mask] - 

700 ar2_min] 

701 

702 return outgoing_array 

703 elif kind == 'table': # not range_safe_from_overflow 

704 raise RuntimeError( 

705 "You have specified kind='table', " 

706 "but the range of values in `ar2` or `ar1` exceed the " 

707 "maximum integer of the datatype. " 

708 "Please set `kind` to None or 'sort'." 

709 ) 

710 elif kind == 'table': 

711 raise ValueError( 

712 "The 'table' method is only " 

713 "supported for boolean or integer arrays. " 

714 "Please select 'sort' or None for kind." 

715 ) 

716 

717 

718 # Check if one of the arrays may contain arbitrary objects 

719 contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject 

720 

721 # This code is run when 

722 # a) the first condition is true, making the code significantly faster 

723 # b) the second condition is true (i.e. `ar1` or `ar2` may contain 

724 # arbitrary objects), since then sorting is not guaranteed to work 

725 if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object: 

726 if invert: 

727 mask = np.ones(len(ar1), dtype=bool) 

728 for a in ar2: 

729 mask &= (ar1 != a) 

730 else: 

731 mask = np.zeros(len(ar1), dtype=bool) 

732 for a in ar2: 

733 mask |= (ar1 == a) 

734 return mask 

735 

736 # Otherwise use sorting 

737 if not assume_unique: 

738 ar1, rev_idx = np.unique(ar1, return_inverse=True) 

739 ar2 = np.unique(ar2) 

740 

741 ar = np.concatenate((ar1, ar2)) 

742 # We need this to be a stable sort, so always use 'mergesort' 

743 # here. The values from the first array should always come before 

744 # the values from the second array. 

745 order = ar.argsort(kind='mergesort') 

746 sar = ar[order] 

747 if invert: 

748 bool_ar = (sar[1:] != sar[:-1]) 

749 else: 

750 bool_ar = (sar[1:] == sar[:-1]) 

751 flag = np.concatenate((bool_ar, [invert])) 

752 ret = np.empty(ar.shape, dtype=bool) 

753 ret[order] = flag 

754 

755 if assume_unique: 

756 return ret[:len(ar1)] 

757 else: 

758 return ret[rev_idx] 

759 

760 

761def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None, 

762 *, kind=None): 

763 return (element, test_elements) 

764 

765 

766@array_function_dispatch(_isin_dispatcher) 

767def isin(element, test_elements, assume_unique=False, invert=False, *, 

768 kind=None): 

769 """ 

770 Calculates ``element in test_elements``, broadcasting over `element` only. 

771 Returns a boolean array of the same shape as `element` that is True 

772 where an element of `element` is in `test_elements` and False otherwise. 

773 

774 Parameters 

775 ---------- 

776 element : array_like 

777 Input array. 

778 test_elements : array_like 

779 The values against which to test each value of `element`. 

780 This argument is flattened if it is an array or array_like. 

781 See notes for behavior with non-array-like parameters. 

782 assume_unique : bool, optional 

783 If True, the input arrays are both assumed to be unique, which 

784 can speed up the calculation. Default is False. 

785 invert : bool, optional 

786 If True, the values in the returned array are inverted, as if 

787 calculating `element not in test_elements`. Default is False. 

788 ``np.isin(a, b, invert=True)`` is equivalent to (but faster 

789 than) ``np.invert(np.isin(a, b))``. 

790 kind : {None, 'sort', 'table'}, optional 

791 The algorithm to use. This will not affect the final result, 

792 but will affect the speed and memory use. The default, None, 

793 will select automatically based on memory considerations. 

794 

795 * If 'sort', will use a mergesort-based approach. This will have 

796 a memory usage of roughly 6 times the sum of the sizes of 

797 `ar1` and `ar2`, not accounting for size of dtypes. 

798 * If 'table', will use a lookup table approach similar 

799 to a counting sort. This is only available for boolean and 

800 integer arrays. This will have a memory usage of the 

801 size of `ar1` plus the max-min value of `ar2`. `assume_unique` 

802 has no effect when the 'table' option is used. 

803 * If None, will automatically choose 'table' if 

804 the required memory allocation is less than or equal to 

805 6 times the sum of the sizes of `ar1` and `ar2`, 

806 otherwise will use 'sort'. This is done to not use 

807 a large amount of memory by default, even though 

808 'table' may be faster in most cases. If 'table' is chosen, 

809 `assume_unique` will have no effect. 

810 

811 

812 Returns 

813 ------- 

814 isin : ndarray, bool 

815 Has the same shape as `element`. The values `element[isin]` 

816 are in `test_elements`. 

817 

818 See Also 

819 -------- 

820 in1d : Flattened version of this function. 

821 numpy.lib.arraysetops : Module with a number of other functions for 

822 performing set operations on arrays. 

823 

824 Notes 

825 ----- 

826 

827 `isin` is an element-wise function version of the python keyword `in`. 

828 ``isin(a, b)`` is roughly equivalent to 

829 ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences. 

830 

831 `element` and `test_elements` are converted to arrays if they are not 

832 already. If `test_elements` is a set (or other non-sequence collection) 

833 it will be converted to an object array with one element, rather than an 

834 array of the values contained in `test_elements`. This is a consequence 

835 of the `array` constructor's way of handling non-sequence collections. 

836 Converting the set to a list usually gives the desired behavior. 

837 

838 Using ``kind='table'`` tends to be faster than `kind='sort'` if the 

839 following relationship is true: 

840 ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``, 

841 but may use greater memory. The default value for `kind` will 

842 be automatically selected based only on memory usage, so one may 

843 manually set ``kind='table'`` if memory constraints can be relaxed. 

844 

845 .. versionadded:: 1.13.0 

846 

847 Examples 

848 -------- 

849 >>> element = 2*np.arange(4).reshape((2, 2)) 

850 >>> element 

851 array([[0, 2], 

852 [4, 6]]) 

853 >>> test_elements = [1, 2, 4, 8] 

854 >>> mask = np.isin(element, test_elements) 

855 >>> mask 

856 array([[False, True], 

857 [ True, False]]) 

858 >>> element[mask] 

859 array([2, 4]) 

860 

861 The indices of the matched values can be obtained with `nonzero`: 

862 

863 >>> np.nonzero(mask) 

864 (array([0, 1]), array([1, 0])) 

865 

866 The test can also be inverted: 

867 

868 >>> mask = np.isin(element, test_elements, invert=True) 

869 >>> mask 

870 array([[ True, False], 

871 [False, True]]) 

872 >>> element[mask] 

873 array([0, 6]) 

874 

875 Because of how `array` handles sets, the following does not 

876 work as expected: 

877 

878 >>> test_set = {1, 2, 4, 8} 

879 >>> np.isin(element, test_set) 

880 array([[False, False], 

881 [False, False]]) 

882 

883 Casting the set to a list gives the expected result: 

884 

885 >>> np.isin(element, list(test_set)) 

886 array([[False, True], 

887 [ True, False]]) 

888 """ 

889 element = np.asarray(element) 

890 return in1d(element, test_elements, assume_unique=assume_unique, 

891 invert=invert, kind=kind).reshape(element.shape) 

892 

893 

894def _union1d_dispatcher(ar1, ar2): 

895 return (ar1, ar2) 

896 

897 

898@array_function_dispatch(_union1d_dispatcher) 

899def union1d(ar1, ar2): 

900 """ 

901 Find the union of two arrays. 

902 

903 Return the unique, sorted array of values that are in either of the two 

904 input arrays. 

905 

906 Parameters 

907 ---------- 

908 ar1, ar2 : array_like 

909 Input arrays. They are flattened if they are not already 1D. 

910 

911 Returns 

912 ------- 

913 union1d : ndarray 

914 Unique, sorted union of the input arrays. 

915 

916 See Also 

917 -------- 

918 numpy.lib.arraysetops : Module with a number of other functions for 

919 performing set operations on arrays. 

920 

921 Examples 

922 -------- 

923 >>> np.union1d([-1, 0, 1], [-2, 0, 2]) 

924 array([-2, -1, 0, 1, 2]) 

925 

926 To find the union of more than two arrays, use functools.reduce: 

927 

928 >>> from functools import reduce 

929 >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2])) 

930 array([1, 2, 3, 4, 6]) 

931 """ 

932 return unique(np.concatenate((ar1, ar2), axis=None)) 

933 

934 

935def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None): 

936 return (ar1, ar2) 

937 

938 

939@array_function_dispatch(_setdiff1d_dispatcher) 

940def setdiff1d(ar1, ar2, assume_unique=False): 

941 """ 

942 Find the set difference of two arrays. 

943 

944 Return the unique values in `ar1` that are not in `ar2`. 

945 

946 Parameters 

947 ---------- 

948 ar1 : array_like 

949 Input array. 

950 ar2 : array_like 

951 Input comparison array. 

952 assume_unique : bool 

953 If True, the input arrays are both assumed to be unique, which 

954 can speed up the calculation. Default is False. 

955 

956 Returns 

957 ------- 

958 setdiff1d : ndarray 

959 1D array of values in `ar1` that are not in `ar2`. The result 

960 is sorted when `assume_unique=False`, but otherwise only sorted 

961 if the input is sorted. 

962 

963 See Also 

964 -------- 

965 numpy.lib.arraysetops : Module with a number of other functions for 

966 performing set operations on arrays. 

967 

968 Examples 

969 -------- 

970 >>> a = np.array([1, 2, 3, 2, 4, 1]) 

971 >>> b = np.array([3, 4, 5, 6]) 

972 >>> np.setdiff1d(a, b) 

973 array([1, 2]) 

974 

975 """ 

976 if assume_unique: 

977 ar1 = np.asarray(ar1).ravel() 

978 else: 

979 ar1 = unique(ar1) 

980 ar2 = unique(ar2) 

981 return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)]