1"""
2Set operations for arrays based on sorting.
3
4Notes
5-----
6
7For floating point arrays, inaccurate results may appear due to usual round-off
8and floating point comparison issues.
9
10Speed could be gained in some operations by an implementation of
11`numpy.sort`, that can provide directly the permutation vectors, thus avoiding
12calls to `numpy.argsort`.
13
14Original author: Robert Cimrman
15
16"""
17import functools
18
19import numpy as np
20from numpy.core import overrides
21
22
23array_function_dispatch = functools.partial(
24 overrides.array_function_dispatch, module='numpy')
25
26
27__all__ = [
28 'ediff1d', 'intersect1d', 'setxor1d', 'union1d', 'setdiff1d', 'unique',
29 'in1d', 'isin'
30 ]
31
32
33def _ediff1d_dispatcher(ary, to_end=None, to_begin=None):
34 return (ary, to_end, to_begin)
35
36
37@array_function_dispatch(_ediff1d_dispatcher)
38def ediff1d(ary, to_end=None, to_begin=None):
39 """
40 The differences between consecutive elements of an array.
41
42 Parameters
43 ----------
44 ary : array_like
45 If necessary, will be flattened before the differences are taken.
46 to_end : array_like, optional
47 Number(s) to append at the end of the returned differences.
48 to_begin : array_like, optional
49 Number(s) to prepend at the beginning of the returned differences.
50
51 Returns
52 -------
53 ediff1d : ndarray
54 The differences. Loosely, this is ``ary.flat[1:] - ary.flat[:-1]``.
55
56 See Also
57 --------
58 diff, gradient
59
60 Notes
61 -----
62 When applied to masked arrays, this function drops the mask information
63 if the `to_begin` and/or `to_end` parameters are used.
64
65 Examples
66 --------
67 >>> x = np.array([1, 2, 4, 7, 0])
68 >>> np.ediff1d(x)
69 array([ 1, 2, 3, -7])
70
71 >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99]))
72 array([-99, 1, 2, ..., -7, 88, 99])
73
74 The returned array is always 1D.
75
76 >>> y = [[1, 2, 4], [1, 6, 24]]
77 >>> np.ediff1d(y)
78 array([ 1, 2, -3, 5, 18])
79
80 """
81 # force a 1d array
82 ary = np.asanyarray(ary).ravel()
83
84 # enforce that the dtype of `ary` is used for the output
85 dtype_req = ary.dtype
86
87 # fast track default case
88 if to_begin is None and to_end is None:
89 return ary[1:] - ary[:-1]
90
91 if to_begin is None:
92 l_begin = 0
93 else:
94 to_begin = np.asanyarray(to_begin)
95 if not np.can_cast(to_begin, dtype_req, casting="same_kind"):
96 raise TypeError("dtype of `to_begin` must be compatible "
97 "with input `ary` under the `same_kind` rule.")
98
99 to_begin = to_begin.ravel()
100 l_begin = len(to_begin)
101
102 if to_end is None:
103 l_end = 0
104 else:
105 to_end = np.asanyarray(to_end)
106 if not np.can_cast(to_end, dtype_req, casting="same_kind"):
107 raise TypeError("dtype of `to_end` must be compatible "
108 "with input `ary` under the `same_kind` rule.")
109
110 to_end = to_end.ravel()
111 l_end = len(to_end)
112
113 # do the calculation in place and copy to_begin and to_end
114 l_diff = max(len(ary) - 1, 0)
115 result = np.empty(l_diff + l_begin + l_end, dtype=ary.dtype)
116 result = ary.__array_wrap__(result)
117 if l_begin > 0:
118 result[:l_begin] = to_begin
119 if l_end > 0:
120 result[l_begin + l_diff:] = to_end
121 np.subtract(ary[1:], ary[:-1], result[l_begin:l_begin + l_diff])
122 return result
123
124
125def _unpack_tuple(x):
126 """ Unpacks one-element tuples for use as return values """
127 if len(x) == 1:
128 return x[0]
129 else:
130 return x
131
132
133def _unique_dispatcher(ar, return_index=None, return_inverse=None,
134 return_counts=None, axis=None, *, equal_nan=None):
135 return (ar,)
136
137
138@array_function_dispatch(_unique_dispatcher)
139def unique(ar, return_index=False, return_inverse=False,
140 return_counts=False, axis=None, *, equal_nan=True):
141 """
142 Find the unique elements of an array.
143
144 Returns the sorted unique elements of an array. There are three optional
145 outputs in addition to the unique elements:
146
147 * the indices of the input array that give the unique values
148 * the indices of the unique array that reconstruct the input array
149 * the number of times each unique value comes up in the input array
150
151 Parameters
152 ----------
153 ar : array_like
154 Input array. Unless `axis` is specified, this will be flattened if it
155 is not already 1-D.
156 return_index : bool, optional
157 If True, also return the indices of `ar` (along the specified axis,
158 if provided, or in the flattened array) that result in the unique array.
159 return_inverse : bool, optional
160 If True, also return the indices of the unique array (for the specified
161 axis, if provided) that can be used to reconstruct `ar`.
162 return_counts : bool, optional
163 If True, also return the number of times each unique item appears
164 in `ar`.
165 axis : int or None, optional
166 The axis to operate on. If None, `ar` will be flattened. If an integer,
167 the subarrays indexed by the given axis will be flattened and treated
168 as the elements of a 1-D array with the dimension of the given axis,
169 see the notes for more details. Object arrays or structured arrays
170 that contain objects are not supported if the `axis` kwarg is used. The
171 default is None.
172
173 .. versionadded:: 1.13.0
174
175 equal_nan : bool, optional
176 If True, collapses multiple NaN values in the return array into one.
177
178 .. versionadded:: 1.24
179
180 Returns
181 -------
182 unique : ndarray
183 The sorted unique values.
184 unique_indices : ndarray, optional
185 The indices of the first occurrences of the unique values in the
186 original array. Only provided if `return_index` is True.
187 unique_inverse : ndarray, optional
188 The indices to reconstruct the original array from the
189 unique array. Only provided if `return_inverse` is True.
190 unique_counts : ndarray, optional
191 The number of times each of the unique values comes up in the
192 original array. Only provided if `return_counts` is True.
193
194 .. versionadded:: 1.9.0
195
196 See Also
197 --------
198 numpy.lib.arraysetops : Module with a number of other functions for
199 performing set operations on arrays.
200 repeat : Repeat elements of an array.
201
202 Notes
203 -----
204 When an axis is specified the subarrays indexed by the axis are sorted.
205 This is done by making the specified axis the first dimension of the array
206 (move the axis to the first dimension to keep the order of the other axes)
207 and then flattening the subarrays in C order. The flattened subarrays are
208 then viewed as a structured type with each element given a label, with the
209 effect that we end up with a 1-D array of structured types that can be
210 treated in the same way as any other 1-D array. The result is that the
211 flattened subarrays are sorted in lexicographic order starting with the
212 first element.
213
214 .. versionchanged: NumPy 1.21
215 If nan values are in the input array, a single nan is put
216 to the end of the sorted unique values.
217
218 Also for complex arrays all NaN values are considered equivalent
219 (no matter whether the NaN is in the real or imaginary part).
220 As the representant for the returned array the smallest one in the
221 lexicographical order is chosen - see np.sort for how the lexicographical
222 order is defined for complex arrays.
223
224 Examples
225 --------
226 >>> np.unique([1, 1, 2, 2, 3, 3])
227 array([1, 2, 3])
228 >>> a = np.array([[1, 1], [2, 3]])
229 >>> np.unique(a)
230 array([1, 2, 3])
231
232 Return the unique rows of a 2D array
233
234 >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
235 >>> np.unique(a, axis=0)
236 array([[1, 0, 0], [2, 3, 4]])
237
238 Return the indices of the original array that give the unique values:
239
240 >>> a = np.array(['a', 'b', 'b', 'c', 'a'])
241 >>> u, indices = np.unique(a, return_index=True)
242 >>> u
243 array(['a', 'b', 'c'], dtype='<U1')
244 >>> indices
245 array([0, 1, 3])
246 >>> a[indices]
247 array(['a', 'b', 'c'], dtype='<U1')
248
249 Reconstruct the input array from the unique values and inverse:
250
251 >>> a = np.array([1, 2, 6, 4, 2, 3, 2])
252 >>> u, indices = np.unique(a, return_inverse=True)
253 >>> u
254 array([1, 2, 3, 4, 6])
255 >>> indices
256 array([0, 1, 4, 3, 1, 2, 1])
257 >>> u[indices]
258 array([1, 2, 6, 4, 2, 3, 2])
259
260 Reconstruct the input values from the unique values and counts:
261
262 >>> a = np.array([1, 2, 6, 4, 2, 3, 2])
263 >>> values, counts = np.unique(a, return_counts=True)
264 >>> values
265 array([1, 2, 3, 4, 6])
266 >>> counts
267 array([1, 3, 1, 1, 1])
268 >>> np.repeat(values, counts)
269 array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved
270
271 """
272 ar = np.asanyarray(ar)
273 if axis is None:
274 ret = _unique1d(ar, return_index, return_inverse, return_counts,
275 equal_nan=equal_nan)
276 return _unpack_tuple(ret)
277
278 # axis was specified and not None
279 try:
280 ar = np.moveaxis(ar, axis, 0)
281 except np.AxisError:
282 # this removes the "axis1" or "axis2" prefix from the error message
283 raise np.AxisError(axis, ar.ndim) from None
284
285 # Must reshape to a contiguous 2D array for this to work...
286 orig_shape, orig_dtype = ar.shape, ar.dtype
287 ar = ar.reshape(orig_shape[0], np.prod(orig_shape[1:], dtype=np.intp))
288 ar = np.ascontiguousarray(ar)
289 dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
290
291 # At this point, `ar` has shape `(n, m)`, and `dtype` is a structured
292 # data type with `m` fields where each field has the data type of `ar`.
293 # In the following, we create the array `consolidated`, which has
294 # shape `(n,)` with data type `dtype`.
295 try:
296 if ar.shape[1] > 0:
297 consolidated = ar.view(dtype)
298 else:
299 # If ar.shape[1] == 0, then dtype will be `np.dtype([])`, which is
300 # a data type with itemsize 0, and the call `ar.view(dtype)` will
301 # fail. Instead, we'll use `np.empty` to explicitly create the
302 # array with shape `(len(ar),)`. Because `dtype` in this case has
303 # itemsize 0, the total size of the result is still 0 bytes.
304 consolidated = np.empty(len(ar), dtype=dtype)
305 except TypeError as e:
306 # There's no good way to do this for object arrays, etc...
307 msg = 'The axis argument to unique is not supported for dtype {dt}'
308 raise TypeError(msg.format(dt=ar.dtype)) from e
309
310 def reshape_uniq(uniq):
311 n = len(uniq)
312 uniq = uniq.view(orig_dtype)
313 uniq = uniq.reshape(n, *orig_shape[1:])
314 uniq = np.moveaxis(uniq, 0, axis)
315 return uniq
316
317 output = _unique1d(consolidated, return_index,
318 return_inverse, return_counts, equal_nan=equal_nan)
319 output = (reshape_uniq(output[0]),) + output[1:]
320 return _unpack_tuple(output)
321
322
323def _unique1d(ar, return_index=False, return_inverse=False,
324 return_counts=False, *, equal_nan=True):
325 """
326 Find the unique elements of an array, ignoring shape.
327 """
328 ar = np.asanyarray(ar).flatten()
329
330 optional_indices = return_index or return_inverse
331
332 if optional_indices:
333 perm = ar.argsort(kind='mergesort' if return_index else 'quicksort')
334 aux = ar[perm]
335 else:
336 ar.sort()
337 aux = ar
338 mask = np.empty(aux.shape, dtype=np.bool_)
339 mask[:1] = True
340 if (equal_nan and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and
341 np.isnan(aux[-1])):
342 if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent
343 aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left')
344 else:
345 aux_firstnan = np.searchsorted(aux, aux[-1], side='left')
346 if aux_firstnan > 0:
347 mask[1:aux_firstnan] = (
348 aux[1:aux_firstnan] != aux[:aux_firstnan - 1])
349 mask[aux_firstnan] = True
350 mask[aux_firstnan + 1:] = False
351 else:
352 mask[1:] = aux[1:] != aux[:-1]
353
354 ret = (aux[mask],)
355 if return_index:
356 ret += (perm[mask],)
357 if return_inverse:
358 imask = np.cumsum(mask) - 1
359 inv_idx = np.empty(mask.shape, dtype=np.intp)
360 inv_idx[perm] = imask
361 ret += (inv_idx,)
362 if return_counts:
363 idx = np.concatenate(np.nonzero(mask) + ([mask.size],))
364 ret += (np.diff(idx),)
365 return ret
366
367
368def _intersect1d_dispatcher(
369 ar1, ar2, assume_unique=None, return_indices=None):
370 return (ar1, ar2)
371
372
373@array_function_dispatch(_intersect1d_dispatcher)
374def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
375 """
376 Find the intersection of two arrays.
377
378 Return the sorted, unique values that are in both of the input arrays.
379
380 Parameters
381 ----------
382 ar1, ar2 : array_like
383 Input arrays. Will be flattened if not already 1D.
384 assume_unique : bool
385 If True, the input arrays are both assumed to be unique, which
386 can speed up the calculation. If True but ``ar1`` or ``ar2`` are not
387 unique, incorrect results and out-of-bounds indices could result.
388 Default is False.
389 return_indices : bool
390 If True, the indices which correspond to the intersection of the two
391 arrays are returned. The first instance of a value is used if there are
392 multiple. Default is False.
393
394 .. versionadded:: 1.15.0
395
396 Returns
397 -------
398 intersect1d : ndarray
399 Sorted 1D array of common and unique elements.
400 comm1 : ndarray
401 The indices of the first occurrences of the common values in `ar1`.
402 Only provided if `return_indices` is True.
403 comm2 : ndarray
404 The indices of the first occurrences of the common values in `ar2`.
405 Only provided if `return_indices` is True.
406
407
408 See Also
409 --------
410 numpy.lib.arraysetops : Module with a number of other functions for
411 performing set operations on arrays.
412
413 Examples
414 --------
415 >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1])
416 array([1, 3])
417
418 To intersect more than two arrays, use functools.reduce:
419
420 >>> from functools import reduce
421 >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
422 array([3])
423
424 To return the indices of the values common to the input arrays
425 along with the intersected values:
426
427 >>> x = np.array([1, 1, 2, 3, 4])
428 >>> y = np.array([2, 1, 4, 6])
429 >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True)
430 >>> x_ind, y_ind
431 (array([0, 2, 4]), array([1, 0, 2]))
432 >>> xy, x[x_ind], y[y_ind]
433 (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4]))
434
435 """
436 ar1 = np.asanyarray(ar1)
437 ar2 = np.asanyarray(ar2)
438
439 if not assume_unique:
440 if return_indices:
441 ar1, ind1 = unique(ar1, return_index=True)
442 ar2, ind2 = unique(ar2, return_index=True)
443 else:
444 ar1 = unique(ar1)
445 ar2 = unique(ar2)
446 else:
447 ar1 = ar1.ravel()
448 ar2 = ar2.ravel()
449
450 aux = np.concatenate((ar1, ar2))
451 if return_indices:
452 aux_sort_indices = np.argsort(aux, kind='mergesort')
453 aux = aux[aux_sort_indices]
454 else:
455 aux.sort()
456
457 mask = aux[1:] == aux[:-1]
458 int1d = aux[:-1][mask]
459
460 if return_indices:
461 ar1_indices = aux_sort_indices[:-1][mask]
462 ar2_indices = aux_sort_indices[1:][mask] - ar1.size
463 if not assume_unique:
464 ar1_indices = ind1[ar1_indices]
465 ar2_indices = ind2[ar2_indices]
466
467 return int1d, ar1_indices, ar2_indices
468 else:
469 return int1d
470
471
472def _setxor1d_dispatcher(ar1, ar2, assume_unique=None):
473 return (ar1, ar2)
474
475
476@array_function_dispatch(_setxor1d_dispatcher)
477def setxor1d(ar1, ar2, assume_unique=False):
478 """
479 Find the set exclusive-or of two arrays.
480
481 Return the sorted, unique values that are in only one (not both) of the
482 input arrays.
483
484 Parameters
485 ----------
486 ar1, ar2 : array_like
487 Input arrays.
488 assume_unique : bool
489 If True, the input arrays are both assumed to be unique, which
490 can speed up the calculation. Default is False.
491
492 Returns
493 -------
494 setxor1d : ndarray
495 Sorted 1D array of unique values that are in only one of the input
496 arrays.
497
498 Examples
499 --------
500 >>> a = np.array([1, 2, 3, 2, 4])
501 >>> b = np.array([2, 3, 5, 7, 5])
502 >>> np.setxor1d(a,b)
503 array([1, 4, 5, 7])
504
505 """
506 if not assume_unique:
507 ar1 = unique(ar1)
508 ar2 = unique(ar2)
509
510 aux = np.concatenate((ar1, ar2))
511 if aux.size == 0:
512 return aux
513
514 aux.sort()
515 flag = np.concatenate(([True], aux[1:] != aux[:-1], [True]))
516 return aux[flag[1:] & flag[:-1]]
517
518
519def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None, *,
520 kind=None):
521 return (ar1, ar2)
522
523
524@array_function_dispatch(_in1d_dispatcher)
525def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):
526 """
527 Test whether each element of a 1-D array is also present in a second array.
528
529 Returns a boolean array the same length as `ar1` that is True
530 where an element of `ar1` is in `ar2` and False otherwise.
531
532 We recommend using :func:`isin` instead of `in1d` for new code.
533
534 Parameters
535 ----------
536 ar1 : (M,) array_like
537 Input array.
538 ar2 : array_like
539 The values against which to test each value of `ar1`.
540 assume_unique : bool, optional
541 If True, the input arrays are both assumed to be unique, which
542 can speed up the calculation. Default is False.
543 invert : bool, optional
544 If True, the values in the returned array are inverted (that is,
545 False where an element of `ar1` is in `ar2` and True otherwise).
546 Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
547 to (but is faster than) ``np.invert(in1d(a, b))``.
548 kind : {None, 'sort', 'table'}, optional
549 The algorithm to use. This will not affect the final result,
550 but will affect the speed and memory use. The default, None,
551 will select automatically based on memory considerations.
552
553 * If 'sort', will use a mergesort-based approach. This will have
554 a memory usage of roughly 6 times the sum of the sizes of
555 `ar1` and `ar2`, not accounting for size of dtypes.
556 * If 'table', will use a lookup table approach similar
557 to a counting sort. This is only available for boolean and
558 integer arrays. This will have a memory usage of the
559 size of `ar1` plus the max-min value of `ar2`. `assume_unique`
560 has no effect when the 'table' option is used.
561 * If None, will automatically choose 'table' if
562 the required memory allocation is less than or equal to
563 6 times the sum of the sizes of `ar1` and `ar2`,
564 otherwise will use 'sort'. This is done to not use
565 a large amount of memory by default, even though
566 'table' may be faster in most cases. If 'table' is chosen,
567 `assume_unique` will have no effect.
568
569 .. versionadded:: 1.8.0
570
571 Returns
572 -------
573 in1d : (M,) ndarray, bool
574 The values `ar1[in1d]` are in `ar2`.
575
576 See Also
577 --------
578 isin : Version of this function that preserves the
579 shape of ar1.
580 numpy.lib.arraysetops : Module with a number of other functions for
581 performing set operations on arrays.
582
583 Notes
584 -----
585 `in1d` can be considered as an element-wise function version of the
586 python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly
587 equivalent to ``np.array([item in b for item in a])``.
588 However, this idea fails if `ar2` is a set, or similar (non-sequence)
589 container: As ``ar2`` is converted to an array, in those cases
590 ``asarray(ar2)`` is an object array rather than the expected array of
591 contained values.
592
593 Using ``kind='table'`` tends to be faster than `kind='sort'` if the
594 following relationship is true:
595 ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
596 but may use greater memory. The default value for `kind` will
597 be automatically selected based only on memory usage, so one may
598 manually set ``kind='table'`` if memory constraints can be relaxed.
599
600 .. versionadded:: 1.4.0
601
602 Examples
603 --------
604 >>> test = np.array([0, 1, 2, 5, 0])
605 >>> states = [0, 2]
606 >>> mask = np.in1d(test, states)
607 >>> mask
608 array([ True, False, True, False, True])
609 >>> test[mask]
610 array([0, 2, 0])
611 >>> mask = np.in1d(test, states, invert=True)
612 >>> mask
613 array([False, True, False, True, False])
614 >>> test[mask]
615 array([1, 5])
616 """
617 # Ravel both arrays, behavior for the first array could be different
618 ar1 = np.asarray(ar1).ravel()
619 ar2 = np.asarray(ar2).ravel()
620
621 # Ensure that iteration through object arrays yields size-1 arrays
622 if ar2.dtype == object:
623 ar2 = ar2.reshape(-1, 1)
624
625 if kind not in {None, 'sort', 'table'}:
626 raise ValueError(
627 f"Invalid kind: '{kind}'. Please use None, 'sort' or 'table'.")
628
629 # Can use the table method if all arrays are integers or boolean:
630 is_int_arrays = all(ar.dtype.kind in ("u", "i", "b") for ar in (ar1, ar2))
631 use_table_method = is_int_arrays and kind in {None, 'table'}
632
633 if use_table_method:
634 if ar2.size == 0:
635 if invert:
636 return np.ones_like(ar1, dtype=bool)
637 else:
638 return np.zeros_like(ar1, dtype=bool)
639
640 # Convert booleans to uint8 so we can use the fast integer algorithm
641 if ar1.dtype == bool:
642 ar1 = ar1.astype(np.uint8)
643 if ar2.dtype == bool:
644 ar2 = ar2.astype(np.uint8)
645
646 ar2_min = np.min(ar2)
647 ar2_max = np.max(ar2)
648
649 ar2_range = int(ar2_max) - int(ar2_min)
650
651 # Constraints on whether we can actually use the table method:
652 # 1. Assert memory usage is not too large
653 below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size)
654 # 2. Check overflows for (ar2 - ar2_min); dtype=ar2.dtype
655 range_safe_from_overflow = ar2_range <= np.iinfo(ar2.dtype).max
656 # 3. Check overflows for (ar1 - ar2_min); dtype=ar1.dtype
657 if ar1.size > 0:
658 ar1_min = np.min(ar1)
659 ar1_max = np.max(ar1)
660
661 # After masking, the range of ar1 is guaranteed to be
662 # within the range of ar2:
663 ar1_upper = min(int(ar1_max), int(ar2_max))
664 ar1_lower = max(int(ar1_min), int(ar2_min))
665
666 range_safe_from_overflow &= all((
667 ar1_upper - int(ar2_min) <= np.iinfo(ar1.dtype).max,
668 ar1_lower - int(ar2_min) >= np.iinfo(ar1.dtype).min
669 ))
670
671 # Optimal performance is for approximately
672 # log10(size) > (log10(range) - 2.27) / 0.927.
673 # However, here we set the requirement that by default
674 # the intermediate array can only be 6x
675 # the combined memory allocation of the original
676 # arrays. See discussion on
677 # https://github.com/numpy/numpy/pull/12065.
678
679 if (
680 range_safe_from_overflow and
681 (below_memory_constraint or kind == 'table')
682 ):
683
684 if invert:
685 outgoing_array = np.ones_like(ar1, dtype=bool)
686 else:
687 outgoing_array = np.zeros_like(ar1, dtype=bool)
688
689 # Make elements 1 where the integer exists in ar2
690 if invert:
691 isin_helper_ar = np.ones(ar2_range + 1, dtype=bool)
692 isin_helper_ar[ar2 - ar2_min] = 0
693 else:
694 isin_helper_ar = np.zeros(ar2_range + 1, dtype=bool)
695 isin_helper_ar[ar2 - ar2_min] = 1
696
697 # Mask out elements we know won't work
698 basic_mask = (ar1 <= ar2_max) & (ar1 >= ar2_min)
699 outgoing_array[basic_mask] = isin_helper_ar[ar1[basic_mask] -
700 ar2_min]
701
702 return outgoing_array
703 elif kind == 'table': # not range_safe_from_overflow
704 raise RuntimeError(
705 "You have specified kind='table', "
706 "but the range of values in `ar2` or `ar1` exceed the "
707 "maximum integer of the datatype. "
708 "Please set `kind` to None or 'sort'."
709 )
710 elif kind == 'table':
711 raise ValueError(
712 "The 'table' method is only "
713 "supported for boolean or integer arrays. "
714 "Please select 'sort' or None for kind."
715 )
716
717
718 # Check if one of the arrays may contain arbitrary objects
719 contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject
720
721 # This code is run when
722 # a) the first condition is true, making the code significantly faster
723 # b) the second condition is true (i.e. `ar1` or `ar2` may contain
724 # arbitrary objects), since then sorting is not guaranteed to work
725 if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object:
726 if invert:
727 mask = np.ones(len(ar1), dtype=bool)
728 for a in ar2:
729 mask &= (ar1 != a)
730 else:
731 mask = np.zeros(len(ar1), dtype=bool)
732 for a in ar2:
733 mask |= (ar1 == a)
734 return mask
735
736 # Otherwise use sorting
737 if not assume_unique:
738 ar1, rev_idx = np.unique(ar1, return_inverse=True)
739 ar2 = np.unique(ar2)
740
741 ar = np.concatenate((ar1, ar2))
742 # We need this to be a stable sort, so always use 'mergesort'
743 # here. The values from the first array should always come before
744 # the values from the second array.
745 order = ar.argsort(kind='mergesort')
746 sar = ar[order]
747 if invert:
748 bool_ar = (sar[1:] != sar[:-1])
749 else:
750 bool_ar = (sar[1:] == sar[:-1])
751 flag = np.concatenate((bool_ar, [invert]))
752 ret = np.empty(ar.shape, dtype=bool)
753 ret[order] = flag
754
755 if assume_unique:
756 return ret[:len(ar1)]
757 else:
758 return ret[rev_idx]
759
760
761def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None,
762 *, kind=None):
763 return (element, test_elements)
764
765
766@array_function_dispatch(_isin_dispatcher)
767def isin(element, test_elements, assume_unique=False, invert=False, *,
768 kind=None):
769 """
770 Calculates ``element in test_elements``, broadcasting over `element` only.
771 Returns a boolean array of the same shape as `element` that is True
772 where an element of `element` is in `test_elements` and False otherwise.
773
774 Parameters
775 ----------
776 element : array_like
777 Input array.
778 test_elements : array_like
779 The values against which to test each value of `element`.
780 This argument is flattened if it is an array or array_like.
781 See notes for behavior with non-array-like parameters.
782 assume_unique : bool, optional
783 If True, the input arrays are both assumed to be unique, which
784 can speed up the calculation. Default is False.
785 invert : bool, optional
786 If True, the values in the returned array are inverted, as if
787 calculating `element not in test_elements`. Default is False.
788 ``np.isin(a, b, invert=True)`` is equivalent to (but faster
789 than) ``np.invert(np.isin(a, b))``.
790 kind : {None, 'sort', 'table'}, optional
791 The algorithm to use. This will not affect the final result,
792 but will affect the speed and memory use. The default, None,
793 will select automatically based on memory considerations.
794
795 * If 'sort', will use a mergesort-based approach. This will have
796 a memory usage of roughly 6 times the sum of the sizes of
797 `ar1` and `ar2`, not accounting for size of dtypes.
798 * If 'table', will use a lookup table approach similar
799 to a counting sort. This is only available for boolean and
800 integer arrays. This will have a memory usage of the
801 size of `ar1` plus the max-min value of `ar2`. `assume_unique`
802 has no effect when the 'table' option is used.
803 * If None, will automatically choose 'table' if
804 the required memory allocation is less than or equal to
805 6 times the sum of the sizes of `ar1` and `ar2`,
806 otherwise will use 'sort'. This is done to not use
807 a large amount of memory by default, even though
808 'table' may be faster in most cases. If 'table' is chosen,
809 `assume_unique` will have no effect.
810
811
812 Returns
813 -------
814 isin : ndarray, bool
815 Has the same shape as `element`. The values `element[isin]`
816 are in `test_elements`.
817
818 See Also
819 --------
820 in1d : Flattened version of this function.
821 numpy.lib.arraysetops : Module with a number of other functions for
822 performing set operations on arrays.
823
824 Notes
825 -----
826
827 `isin` is an element-wise function version of the python keyword `in`.
828 ``isin(a, b)`` is roughly equivalent to
829 ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences.
830
831 `element` and `test_elements` are converted to arrays if they are not
832 already. If `test_elements` is a set (or other non-sequence collection)
833 it will be converted to an object array with one element, rather than an
834 array of the values contained in `test_elements`. This is a consequence
835 of the `array` constructor's way of handling non-sequence collections.
836 Converting the set to a list usually gives the desired behavior.
837
838 Using ``kind='table'`` tends to be faster than `kind='sort'` if the
839 following relationship is true:
840 ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
841 but may use greater memory. The default value for `kind` will
842 be automatically selected based only on memory usage, so one may
843 manually set ``kind='table'`` if memory constraints can be relaxed.
844
845 .. versionadded:: 1.13.0
846
847 Examples
848 --------
849 >>> element = 2*np.arange(4).reshape((2, 2))
850 >>> element
851 array([[0, 2],
852 [4, 6]])
853 >>> test_elements = [1, 2, 4, 8]
854 >>> mask = np.isin(element, test_elements)
855 >>> mask
856 array([[False, True],
857 [ True, False]])
858 >>> element[mask]
859 array([2, 4])
860
861 The indices of the matched values can be obtained with `nonzero`:
862
863 >>> np.nonzero(mask)
864 (array([0, 1]), array([1, 0]))
865
866 The test can also be inverted:
867
868 >>> mask = np.isin(element, test_elements, invert=True)
869 >>> mask
870 array([[ True, False],
871 [False, True]])
872 >>> element[mask]
873 array([0, 6])
874
875 Because of how `array` handles sets, the following does not
876 work as expected:
877
878 >>> test_set = {1, 2, 4, 8}
879 >>> np.isin(element, test_set)
880 array([[False, False],
881 [False, False]])
882
883 Casting the set to a list gives the expected result:
884
885 >>> np.isin(element, list(test_set))
886 array([[False, True],
887 [ True, False]])
888 """
889 element = np.asarray(element)
890 return in1d(element, test_elements, assume_unique=assume_unique,
891 invert=invert, kind=kind).reshape(element.shape)
892
893
894def _union1d_dispatcher(ar1, ar2):
895 return (ar1, ar2)
896
897
898@array_function_dispatch(_union1d_dispatcher)
899def union1d(ar1, ar2):
900 """
901 Find the union of two arrays.
902
903 Return the unique, sorted array of values that are in either of the two
904 input arrays.
905
906 Parameters
907 ----------
908 ar1, ar2 : array_like
909 Input arrays. They are flattened if they are not already 1D.
910
911 Returns
912 -------
913 union1d : ndarray
914 Unique, sorted union of the input arrays.
915
916 See Also
917 --------
918 numpy.lib.arraysetops : Module with a number of other functions for
919 performing set operations on arrays.
920
921 Examples
922 --------
923 >>> np.union1d([-1, 0, 1], [-2, 0, 2])
924 array([-2, -1, 0, 1, 2])
925
926 To find the union of more than two arrays, use functools.reduce:
927
928 >>> from functools import reduce
929 >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
930 array([1, 2, 3, 4, 6])
931 """
932 return unique(np.concatenate((ar1, ar2), axis=None))
933
934
935def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None):
936 return (ar1, ar2)
937
938
939@array_function_dispatch(_setdiff1d_dispatcher)
940def setdiff1d(ar1, ar2, assume_unique=False):
941 """
942 Find the set difference of two arrays.
943
944 Return the unique values in `ar1` that are not in `ar2`.
945
946 Parameters
947 ----------
948 ar1 : array_like
949 Input array.
950 ar2 : array_like
951 Input comparison array.
952 assume_unique : bool
953 If True, the input arrays are both assumed to be unique, which
954 can speed up the calculation. Default is False.
955
956 Returns
957 -------
958 setdiff1d : ndarray
959 1D array of values in `ar1` that are not in `ar2`. The result
960 is sorted when `assume_unique=False`, but otherwise only sorted
961 if the input is sorted.
962
963 See Also
964 --------
965 numpy.lib.arraysetops : Module with a number of other functions for
966 performing set operations on arrays.
967
968 Examples
969 --------
970 >>> a = np.array([1, 2, 3, 2, 4, 1])
971 >>> b = np.array([3, 4, 5, 6])
972 >>> np.setdiff1d(a, b)
973 array([1, 2])
974
975 """
976 if assume_unique:
977 ar1 = np.asarray(ar1).ravel()
978 else:
979 ar1 = unique(ar1)
980 ar2 = unique(ar2)
981 return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)]