Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/lib/_arraysetops_impl.py: 21%
280 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-09 06:12 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-04-09 06:12 +0000
1"""
2Set operations for arrays based on sorting.
4Notes
5-----
7For floating point arrays, inaccurate results may appear due to usual round-off
8and floating point comparison issues.
10Speed could be gained in some operations by an implementation of
11`numpy.sort`, that can provide directly the permutation vectors, thus avoiding
12calls to `numpy.argsort`.
14Original author: Robert Cimrman
16"""
17import functools
18import warnings
19from typing import NamedTuple
21import numpy as np
22from numpy._core import overrides
23from numpy._core._multiarray_umath import _array_converter
26array_function_dispatch = functools.partial(
27 overrides.array_function_dispatch, module='numpy')
30__all__ = [
31 "ediff1d", "in1d", "intersect1d", "isin", "setdiff1d", "setxor1d",
32 "union1d", "unique", "unique_all", "unique_counts", "unique_inverse",
33 "unique_values"
34]
37def _ediff1d_dispatcher(ary, to_end=None, to_begin=None):
38 return (ary, to_end, to_begin)
41@array_function_dispatch(_ediff1d_dispatcher)
42def ediff1d(ary, to_end=None, to_begin=None):
43 """
44 The differences between consecutive elements of an array.
46 Parameters
47 ----------
48 ary : array_like
49 If necessary, will be flattened before the differences are taken.
50 to_end : array_like, optional
51 Number(s) to append at the end of the returned differences.
52 to_begin : array_like, optional
53 Number(s) to prepend at the beginning of the returned differences.
55 Returns
56 -------
57 ediff1d : ndarray
58 The differences. Loosely, this is ``ary.flat[1:] - ary.flat[:-1]``.
60 See Also
61 --------
62 diff, gradient
64 Notes
65 -----
66 When applied to masked arrays, this function drops the mask information
67 if the `to_begin` and/or `to_end` parameters are used.
69 Examples
70 --------
71 >>> x = np.array([1, 2, 4, 7, 0])
72 >>> np.ediff1d(x)
73 array([ 1, 2, 3, -7])
75 >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99]))
76 array([-99, 1, 2, ..., -7, 88, 99])
78 The returned array is always 1D.
80 >>> y = [[1, 2, 4], [1, 6, 24]]
81 >>> np.ediff1d(y)
82 array([ 1, 2, -3, 5, 18])
84 """
85 conv = _array_converter(ary)
86 # Convert to (any) array and ravel:
87 ary = conv[0].ravel()
89 # enforce that the dtype of `ary` is used for the output
90 dtype_req = ary.dtype
92 # fast track default case
93 if to_begin is None and to_end is None:
94 return ary[1:] - ary[:-1]
96 if to_begin is None:
97 l_begin = 0
98 else:
99 to_begin = np.asanyarray(to_begin)
100 if not np.can_cast(to_begin, dtype_req, casting="same_kind"):
101 raise TypeError("dtype of `to_begin` must be compatible "
102 "with input `ary` under the `same_kind` rule.")
104 to_begin = to_begin.ravel()
105 l_begin = len(to_begin)
107 if to_end is None:
108 l_end = 0
109 else:
110 to_end = np.asanyarray(to_end)
111 if not np.can_cast(to_end, dtype_req, casting="same_kind"):
112 raise TypeError("dtype of `to_end` must be compatible "
113 "with input `ary` under the `same_kind` rule.")
115 to_end = to_end.ravel()
116 l_end = len(to_end)
118 # do the calculation in place and copy to_begin and to_end
119 l_diff = max(len(ary) - 1, 0)
120 result = np.empty_like(ary, shape=l_diff + l_begin + l_end)
122 if l_begin > 0:
123 result[:l_begin] = to_begin
124 if l_end > 0:
125 result[l_begin + l_diff:] = to_end
126 np.subtract(ary[1:], ary[:-1], result[l_begin:l_begin + l_diff])
128 return conv.wrap(result)
131def _unpack_tuple(x):
132 """ Unpacks one-element tuples for use as return values """
133 if len(x) == 1:
134 return x[0]
135 else:
136 return x
139def _unique_dispatcher(ar, return_index=None, return_inverse=None,
140 return_counts=None, axis=None, *, equal_nan=None):
141 return (ar,)
144@array_function_dispatch(_unique_dispatcher)
145def unique(ar, return_index=False, return_inverse=False,
146 return_counts=False, axis=None, *, equal_nan=True):
147 """
148 Find the unique elements of an array.
150 Returns the sorted unique elements of an array. There are three optional
151 outputs in addition to the unique elements:
153 * the indices of the input array that give the unique values
154 * the indices of the unique array that reconstruct the input array
155 * the number of times each unique value comes up in the input array
157 Parameters
158 ----------
159 ar : array_like
160 Input array. Unless `axis` is specified, this will be flattened if it
161 is not already 1-D.
162 return_index : bool, optional
163 If True, also return the indices of `ar` (along the specified axis,
164 if provided, or in the flattened array) that result in the unique array.
165 return_inverse : bool, optional
166 If True, also return the indices of the unique array (for the specified
167 axis, if provided) that can be used to reconstruct `ar`.
168 return_counts : bool, optional
169 If True, also return the number of times each unique item appears
170 in `ar`.
171 axis : int or None, optional
172 The axis to operate on. If None, `ar` will be flattened. If an integer,
173 the subarrays indexed by the given axis will be flattened and treated
174 as the elements of a 1-D array with the dimension of the given axis,
175 see the notes for more details. Object arrays or structured arrays
176 that contain objects are not supported if the `axis` kwarg is used. The
177 default is None.
179 .. versionadded:: 1.13.0
181 equal_nan : bool, optional
182 If True, collapses multiple NaN values in the return array into one.
184 .. versionadded:: 1.24
186 Returns
187 -------
188 unique : ndarray
189 The sorted unique values.
190 unique_indices : ndarray, optional
191 The indices of the first occurrences of the unique values in the
192 original array. Only provided if `return_index` is True.
193 unique_inverse : ndarray, optional
194 The indices to reconstruct the original array from the
195 unique array. Only provided if `return_inverse` is True.
196 unique_counts : ndarray, optional
197 The number of times each of the unique values comes up in the
198 original array. Only provided if `return_counts` is True.
200 .. versionadded:: 1.9.0
202 See Also
203 --------
204 repeat : Repeat elements of an array.
206 Notes
207 -----
208 When an axis is specified the subarrays indexed by the axis are sorted.
209 This is done by making the specified axis the first dimension of the array
210 (move the axis to the first dimension to keep the order of the other axes)
211 and then flattening the subarrays in C order. The flattened subarrays are
212 then viewed as a structured type with each element given a label, with the
213 effect that we end up with a 1-D array of structured types that can be
214 treated in the same way as any other 1-D array. The result is that the
215 flattened subarrays are sorted in lexicographic order starting with the
216 first element.
218 .. versionchanged: 1.21
219 If nan values are in the input array, a single nan is put
220 to the end of the sorted unique values.
222 Also for complex arrays all NaN values are considered equivalent
223 (no matter whether the NaN is in the real or imaginary part).
224 As the representant for the returned array the smallest one in the
225 lexicographical order is chosen - see np.sort for how the lexicographical
226 order is defined for complex arrays.
228 .. versionchanged: 2.0
229 For multi-dimensional inputs, ``unique_inverse`` is reshaped
230 such that the input can be reconstructed using
231 ``np.take(unique, unique_inverse)`` when ``axis = None``, and
232 ``np.take_along_axis(unique, unique_inverse, axis=axis)`` otherwise.
234 Examples
235 --------
236 >>> np.unique([1, 1, 2, 2, 3, 3])
237 array([1, 2, 3])
238 >>> a = np.array([[1, 1], [2, 3]])
239 >>> np.unique(a)
240 array([1, 2, 3])
242 Return the unique rows of a 2D array
244 >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
245 >>> np.unique(a, axis=0)
246 array([[1, 0, 0], [2, 3, 4]])
248 Return the indices of the original array that give the unique values:
250 >>> a = np.array(['a', 'b', 'b', 'c', 'a'])
251 >>> u, indices = np.unique(a, return_index=True)
252 >>> u
253 array(['a', 'b', 'c'], dtype='<U1')
254 >>> indices
255 array([0, 1, 3])
256 >>> a[indices]
257 array(['a', 'b', 'c'], dtype='<U1')
259 Reconstruct the input array from the unique values and inverse:
261 >>> a = np.array([1, 2, 6, 4, 2, 3, 2])
262 >>> u, indices = np.unique(a, return_inverse=True)
263 >>> u
264 array([1, 2, 3, 4, 6])
265 >>> indices
266 array([0, 1, 4, 3, 1, 2, 1])
267 >>> u[indices]
268 array([1, 2, 6, 4, 2, 3, 2])
270 Reconstruct the input values from the unique values and counts:
272 >>> a = np.array([1, 2, 6, 4, 2, 3, 2])
273 >>> values, counts = np.unique(a, return_counts=True)
274 >>> values
275 array([1, 2, 3, 4, 6])
276 >>> counts
277 array([1, 3, 1, 1, 1])
278 >>> np.repeat(values, counts)
279 array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved
281 """
282 ar = np.asanyarray(ar)
283 if axis is None:
284 ret = _unique1d(ar, return_index, return_inverse, return_counts,
285 equal_nan=equal_nan, inverse_shape=ar.shape)
286 return _unpack_tuple(ret)
288 # axis was specified and not None
289 try:
290 ar = np.moveaxis(ar, axis, 0)
291 except np.exceptions.AxisError:
292 # this removes the "axis1" or "axis2" prefix from the error message
293 raise np.exceptions.AxisError(axis, ar.ndim) from None
294 inverse_shape = [1] * ar.ndim
295 inverse_shape[axis] = ar.shape[0]
297 # Must reshape to a contiguous 2D array for this to work...
298 orig_shape, orig_dtype = ar.shape, ar.dtype
299 ar = ar.reshape(orig_shape[0], np.prod(orig_shape[1:], dtype=np.intp))
300 ar = np.ascontiguousarray(ar)
301 dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
303 # At this point, `ar` has shape `(n, m)`, and `dtype` is a structured
304 # data type with `m` fields where each field has the data type of `ar`.
305 # In the following, we create the array `consolidated`, which has
306 # shape `(n,)` with data type `dtype`.
307 try:
308 if ar.shape[1] > 0:
309 consolidated = ar.view(dtype)
310 else:
311 # If ar.shape[1] == 0, then dtype will be `np.dtype([])`, which is
312 # a data type with itemsize 0, and the call `ar.view(dtype)` will
313 # fail. Instead, we'll use `np.empty` to explicitly create the
314 # array with shape `(len(ar),)`. Because `dtype` in this case has
315 # itemsize 0, the total size of the result is still 0 bytes.
316 consolidated = np.empty(len(ar), dtype=dtype)
317 except TypeError as e:
318 # There's no good way to do this for object arrays, etc...
319 msg = 'The axis argument to unique is not supported for dtype {dt}'
320 raise TypeError(msg.format(dt=ar.dtype)) from e
322 def reshape_uniq(uniq):
323 n = len(uniq)
324 uniq = uniq.view(orig_dtype)
325 uniq = uniq.reshape(n, *orig_shape[1:])
326 uniq = np.moveaxis(uniq, 0, axis)
327 return uniq
329 output = _unique1d(consolidated, return_index,
330 return_inverse, return_counts,
331 equal_nan=equal_nan, inverse_shape=inverse_shape)
332 output = (reshape_uniq(output[0]),) + output[1:]
333 return _unpack_tuple(output)
336def _unique1d(ar, return_index=False, return_inverse=False,
337 return_counts=False, *, equal_nan=True, inverse_shape=None):
338 """
339 Find the unique elements of an array, ignoring shape.
340 """
341 ar = np.asanyarray(ar).flatten()
343 optional_indices = return_index or return_inverse
345 if optional_indices:
346 perm = ar.argsort(kind='mergesort' if return_index else 'quicksort')
347 aux = ar[perm]
348 else:
349 ar.sort()
350 aux = ar
351 mask = np.empty(aux.shape, dtype=np.bool)
352 mask[:1] = True
353 if (equal_nan and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and
354 np.isnan(aux[-1])):
355 if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent
356 aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left')
357 else:
358 aux_firstnan = np.searchsorted(aux, aux[-1], side='left')
359 if aux_firstnan > 0:
360 mask[1:aux_firstnan] = (
361 aux[1:aux_firstnan] != aux[:aux_firstnan - 1])
362 mask[aux_firstnan] = True
363 mask[aux_firstnan + 1:] = False
364 else:
365 mask[1:] = aux[1:] != aux[:-1]
367 ret = (aux[mask],)
368 if return_index:
369 ret += (perm[mask],)
370 if return_inverse:
371 imask = np.cumsum(mask) - 1
372 inv_idx = np.empty(mask.shape, dtype=np.intp)
373 inv_idx[perm] = imask
374 ret += (inv_idx.reshape(inverse_shape),)
375 if return_counts:
376 idx = np.concatenate(np.nonzero(mask) + ([mask.size],))
377 ret += (np.diff(idx),)
378 return ret
381# Array API set functions
383class UniqueAllResult(NamedTuple):
384 values: np.ndarray
385 indices: np.ndarray
386 inverse_indices: np.ndarray
387 counts: np.ndarray
390class UniqueCountsResult(NamedTuple):
391 values: np.ndarray
392 counts: np.ndarray
395class UniqueInverseResult(NamedTuple):
396 values: np.ndarray
397 inverse_indices: np.ndarray
400def _unique_all_dispatcher(x, /):
401 return (x,)
404@array_function_dispatch(_unique_all_dispatcher)
405def unique_all(x):
406 """
407 Find the unique elements of an array, and counts, inverse and indices.
409 This function is an Array API compatible alternative to:
411 >>> x = np.array([1, 1, 2])
412 >>> np.unique(x, return_index=True, return_inverse=True,
413 ... return_counts=True, equal_nan=False)
414 (array([1, 2]), array([0, 2]), array([0, 0, 1]), array([2, 1]))
416 Parameters
417 ----------
418 x : array_like
419 Input array. It will be flattened if it is not already 1-D.
421 Returns
422 -------
423 out : namedtuple
424 The result containing:
426 * values - The unique elements of an input array.
427 * indices - The first occurring indices for each unique element.
428 * inverse_indices - The indices from the set of unique elements
429 that reconstruct `x`.
430 * counts - The corresponding counts for each unique element.
432 See Also
433 --------
434 unique : Find the unique elements of an array.
436 """
437 result = unique(
438 x,
439 return_index=True,
440 return_inverse=True,
441 return_counts=True,
442 equal_nan=False
443 )
444 return UniqueAllResult(*result)
447def _unique_counts_dispatcher(x, /):
448 return (x,)
451@array_function_dispatch(_unique_counts_dispatcher)
452def unique_counts(x):
453 """
454 Find the unique elements and counts of an input array `x`.
456 This function is an Array API compatible alternative to:
458 >>> x = np.array([1, 1, 2])
459 >>> np.unique(x, return_counts=True, equal_nan=False)
460 (array([1, 2]), array([2, 1]))
462 Parameters
463 ----------
464 x : array_like
465 Input array. It will be flattened if it is not already 1-D.
467 Returns
468 -------
469 out : namedtuple
470 The result containing:
472 * values - The unique elements of an input array.
473 * counts - The corresponding counts for each unique element.
475 See Also
476 --------
477 unique : Find the unique elements of an array.
479 """
480 result = unique(
481 x,
482 return_index=False,
483 return_inverse=False,
484 return_counts=True,
485 equal_nan=False
486 )
487 return UniqueCountsResult(*result)
490def _unique_inverse_dispatcher(x, /):
491 return (x,)
494@array_function_dispatch(_unique_inverse_dispatcher)
495def unique_inverse(x):
496 """
497 Find the unique elements of `x` and indices to reconstruct `x`.
499 This function is Array API compatible alternative to:
501 >>> x = np.array([1, 1, 2])
502 >>> np.unique(x, return_inverse=True, equal_nan=False)
503 (array([1, 2]), array([0, 0, 1]))
505 Parameters
506 ----------
507 x : array_like
508 Input array. It will be flattened if it is not already 1-D.
510 Returns
511 -------
512 out : namedtuple
513 The result containing:
515 * values - The unique elements of an input array.
516 * inverse_indices - The indices from the set of unique elements
517 that reconstruct `x`.
519 See Also
520 --------
521 unique : Find the unique elements of an array.
523 """
524 result = unique(
525 x,
526 return_index=False,
527 return_inverse=True,
528 return_counts=False,
529 equal_nan=False
530 )
531 return UniqueInverseResult(*result)
534def _unique_values_dispatcher(x, /):
535 return (x,)
538@array_function_dispatch(_unique_values_dispatcher)
539def unique_values(x):
540 """
541 Returns the unique elements of an input array `x`.
543 This function is Array API compatible alternative to:
545 >>> x = np.array([1, 1, 2])
546 >>> np.unique(x, equal_nan=False)
547 array([1, 2])
549 Parameters
550 ----------
551 x : array_like
552 Input array. It will be flattened if it is not already 1-D.
554 Returns
555 -------
556 out : ndarray
557 The unique elements of an input array.
559 See Also
560 --------
561 unique : Find the unique elements of an array.
563 """
564 return unique(
565 x,
566 return_index=False,
567 return_inverse=False,
568 return_counts=False,
569 equal_nan=False
570 )
573def _intersect1d_dispatcher(
574 ar1, ar2, assume_unique=None, return_indices=None):
575 return (ar1, ar2)
578@array_function_dispatch(_intersect1d_dispatcher)
579def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
580 """
581 Find the intersection of two arrays.
583 Return the sorted, unique values that are in both of the input arrays.
585 Parameters
586 ----------
587 ar1, ar2 : array_like
588 Input arrays. Will be flattened if not already 1D.
589 assume_unique : bool
590 If True, the input arrays are both assumed to be unique, which
591 can speed up the calculation. If True but ``ar1`` or ``ar2`` are not
592 unique, incorrect results and out-of-bounds indices could result.
593 Default is False.
594 return_indices : bool
595 If True, the indices which correspond to the intersection of the two
596 arrays are returned. The first instance of a value is used if there are
597 multiple. Default is False.
599 .. versionadded:: 1.15.0
601 Returns
602 -------
603 intersect1d : ndarray
604 Sorted 1D array of common and unique elements.
605 comm1 : ndarray
606 The indices of the first occurrences of the common values in `ar1`.
607 Only provided if `return_indices` is True.
608 comm2 : ndarray
609 The indices of the first occurrences of the common values in `ar2`.
610 Only provided if `return_indices` is True.
612 Examples
613 --------
614 >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1])
615 array([1, 3])
617 To intersect more than two arrays, use functools.reduce:
619 >>> from functools import reduce
620 >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
621 array([3])
623 To return the indices of the values common to the input arrays
624 along with the intersected values:
626 >>> x = np.array([1, 1, 2, 3, 4])
627 >>> y = np.array([2, 1, 4, 6])
628 >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True)
629 >>> x_ind, y_ind
630 (array([0, 2, 4]), array([1, 0, 2]))
631 >>> xy, x[x_ind], y[y_ind]
632 (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4]))
634 """
635 ar1 = np.asanyarray(ar1)
636 ar2 = np.asanyarray(ar2)
638 if not assume_unique:
639 if return_indices:
640 ar1, ind1 = unique(ar1, return_index=True)
641 ar2, ind2 = unique(ar2, return_index=True)
642 else:
643 ar1 = unique(ar1)
644 ar2 = unique(ar2)
645 else:
646 ar1 = ar1.ravel()
647 ar2 = ar2.ravel()
649 aux = np.concatenate((ar1, ar2))
650 if return_indices:
651 aux_sort_indices = np.argsort(aux, kind='mergesort')
652 aux = aux[aux_sort_indices]
653 else:
654 aux.sort()
656 mask = aux[1:] == aux[:-1]
657 int1d = aux[:-1][mask]
659 if return_indices:
660 ar1_indices = aux_sort_indices[:-1][mask]
661 ar2_indices = aux_sort_indices[1:][mask] - ar1.size
662 if not assume_unique:
663 ar1_indices = ind1[ar1_indices]
664 ar2_indices = ind2[ar2_indices]
666 return int1d, ar1_indices, ar2_indices
667 else:
668 return int1d
671def _setxor1d_dispatcher(ar1, ar2, assume_unique=None):
672 return (ar1, ar2)
675@array_function_dispatch(_setxor1d_dispatcher)
676def setxor1d(ar1, ar2, assume_unique=False):
677 """
678 Find the set exclusive-or of two arrays.
680 Return the sorted, unique values that are in only one (not both) of the
681 input arrays.
683 Parameters
684 ----------
685 ar1, ar2 : array_like
686 Input arrays.
687 assume_unique : bool
688 If True, the input arrays are both assumed to be unique, which
689 can speed up the calculation. Default is False.
691 Returns
692 -------
693 setxor1d : ndarray
694 Sorted 1D array of unique values that are in only one of the input
695 arrays.
697 Examples
698 --------
699 >>> a = np.array([1, 2, 3, 2, 4])
700 >>> b = np.array([2, 3, 5, 7, 5])
701 >>> np.setxor1d(a,b)
702 array([1, 4, 5, 7])
704 """
705 if not assume_unique:
706 ar1 = unique(ar1)
707 ar2 = unique(ar2)
709 aux = np.concatenate((ar1, ar2))
710 if aux.size == 0:
711 return aux
713 aux.sort()
714 flag = np.concatenate(([True], aux[1:] != aux[:-1], [True]))
715 return aux[flag[1:] & flag[:-1]]
718def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None, *,
719 kind=None):
720 return (ar1, ar2)
723@array_function_dispatch(_in1d_dispatcher)
724def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):
725 """
726 Test whether each element of a 1-D array is also present in a second array.
728 .. deprecated:: 2.0
729 Use :func:`isin` instead of `in1d` for new code.
731 Returns a boolean array the same length as `ar1` that is True
732 where an element of `ar1` is in `ar2` and False otherwise.
734 Parameters
735 ----------
736 ar1 : (M,) array_like
737 Input array.
738 ar2 : array_like
739 The values against which to test each value of `ar1`.
740 assume_unique : bool, optional
741 If True, the input arrays are both assumed to be unique, which
742 can speed up the calculation. Default is False.
743 invert : bool, optional
744 If True, the values in the returned array are inverted (that is,
745 False where an element of `ar1` is in `ar2` and True otherwise).
746 Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
747 to (but is faster than) ``np.invert(in1d(a, b))``.
748 kind : {None, 'sort', 'table'}, optional
749 The algorithm to use. This will not affect the final result,
750 but will affect the speed and memory use. The default, None,
751 will select automatically based on memory considerations.
753 * If 'sort', will use a mergesort-based approach. This will have
754 a memory usage of roughly 6 times the sum of the sizes of
755 `ar1` and `ar2`, not accounting for size of dtypes.
756 * If 'table', will use a lookup table approach similar
757 to a counting sort. This is only available for boolean and
758 integer arrays. This will have a memory usage of the
759 size of `ar1` plus the max-min value of `ar2`. `assume_unique`
760 has no effect when the 'table' option is used.
761 * If None, will automatically choose 'table' if
762 the required memory allocation is less than or equal to
763 6 times the sum of the sizes of `ar1` and `ar2`,
764 otherwise will use 'sort'. This is done to not use
765 a large amount of memory by default, even though
766 'table' may be faster in most cases. If 'table' is chosen,
767 `assume_unique` will have no effect.
769 .. versionadded:: 1.8.0
771 Returns
772 -------
773 in1d : (M,) ndarray, bool
774 The values `ar1[in1d]` are in `ar2`.
776 See Also
777 --------
778 isin : Version of this function that preserves the
779 shape of ar1.
781 Notes
782 -----
783 `in1d` can be considered as an element-wise function version of the
784 python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly
785 equivalent to ``np.array([item in b for item in a])``.
786 However, this idea fails if `ar2` is a set, or similar (non-sequence)
787 container: As ``ar2`` is converted to an array, in those cases
788 ``asarray(ar2)`` is an object array rather than the expected array of
789 contained values.
791 Using ``kind='table'`` tends to be faster than `kind='sort'` if the
792 following relationship is true:
793 ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
794 but may use greater memory. The default value for `kind` will
795 be automatically selected based only on memory usage, so one may
796 manually set ``kind='table'`` if memory constraints can be relaxed.
798 .. versionadded:: 1.4.0
800 Examples
801 --------
802 >>> test = np.array([0, 1, 2, 5, 0])
803 >>> states = [0, 2]
804 >>> mask = np.in1d(test, states)
805 >>> mask
806 array([ True, False, True, False, True])
807 >>> test[mask]
808 array([0, 2, 0])
809 >>> mask = np.in1d(test, states, invert=True)
810 >>> mask
811 array([False, True, False, True, False])
812 >>> test[mask]
813 array([1, 5])
814 """
816 # Deprecated in NumPy 2.0, 2023-08-18
817 warnings.warn(
818 "`in1d` is deprecated. Use `np.isin` instead.",
819 DeprecationWarning,
820 stacklevel=2
821 )
823 return _in1d(ar1, ar2, assume_unique, invert, kind=kind)
826def _in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):
827 # Ravel both arrays, behavior for the first array could be different
828 ar1 = np.asarray(ar1).ravel()
829 ar2 = np.asarray(ar2).ravel()
831 # Ensure that iteration through object arrays yields size-1 arrays
832 if ar2.dtype == object:
833 ar2 = ar2.reshape(-1, 1)
835 if kind not in {None, 'sort', 'table'}:
836 raise ValueError(
837 f"Invalid kind: '{kind}'. Please use None, 'sort' or 'table'.")
839 # Can use the table method if all arrays are integers or boolean:
840 is_int_arrays = all(ar.dtype.kind in ("u", "i", "b") for ar in (ar1, ar2))
841 use_table_method = is_int_arrays and kind in {None, 'table'}
843 if use_table_method:
844 if ar2.size == 0:
845 if invert:
846 return np.ones_like(ar1, dtype=bool)
847 else:
848 return np.zeros_like(ar1, dtype=bool)
850 # Convert booleans to uint8 so we can use the fast integer algorithm
851 if ar1.dtype == bool:
852 ar1 = ar1.astype(np.uint8)
853 if ar2.dtype == bool:
854 ar2 = ar2.astype(np.uint8)
856 ar2_min = np.min(ar2)
857 ar2_max = np.max(ar2)
859 ar2_range = int(ar2_max) - int(ar2_min)
861 # Constraints on whether we can actually use the table method:
862 # 1. Assert memory usage is not too large
863 below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size)
864 # 2. Check overflows for (ar2 - ar2_min); dtype=ar2.dtype
865 range_safe_from_overflow = ar2_range <= np.iinfo(ar2.dtype).max
866 # 3. Check overflows for (ar1 - ar2_min); dtype=ar1.dtype
867 if ar1.size > 0:
868 ar1_min = np.min(ar1)
869 ar1_max = np.max(ar1)
871 # After masking, the range of ar1 is guaranteed to be
872 # within the range of ar2:
873 ar1_upper = min(int(ar1_max), int(ar2_max))
874 ar1_lower = max(int(ar1_min), int(ar2_min))
876 range_safe_from_overflow &= all((
877 ar1_upper - int(ar2_min) <= np.iinfo(ar1.dtype).max,
878 ar1_lower - int(ar2_min) >= np.iinfo(ar1.dtype).min
879 ))
881 # Optimal performance is for approximately
882 # log10(size) > (log10(range) - 2.27) / 0.927.
883 # However, here we set the requirement that by default
884 # the intermediate array can only be 6x
885 # the combined memory allocation of the original
886 # arrays. See discussion on
887 # https://github.com/numpy/numpy/pull/12065.
889 if (
890 range_safe_from_overflow and
891 (below_memory_constraint or kind == 'table')
892 ):
894 if invert:
895 outgoing_array = np.ones_like(ar1, dtype=bool)
896 else:
897 outgoing_array = np.zeros_like(ar1, dtype=bool)
899 # Make elements 1 where the integer exists in ar2
900 if invert:
901 isin_helper_ar = np.ones(ar2_range + 1, dtype=bool)
902 isin_helper_ar[ar2 - ar2_min] = 0
903 else:
904 isin_helper_ar = np.zeros(ar2_range + 1, dtype=bool)
905 isin_helper_ar[ar2 - ar2_min] = 1
907 # Mask out elements we know won't work
908 basic_mask = (ar1 <= ar2_max) & (ar1 >= ar2_min)
909 outgoing_array[basic_mask] = isin_helper_ar[ar1[basic_mask] -
910 ar2_min]
912 return outgoing_array
913 elif kind == 'table': # not range_safe_from_overflow
914 raise RuntimeError(
915 "You have specified kind='table', "
916 "but the range of values in `ar2` or `ar1` exceed the "
917 "maximum integer of the datatype. "
918 "Please set `kind` to None or 'sort'."
919 )
920 elif kind == 'table':
921 raise ValueError(
922 "The 'table' method is only "
923 "supported for boolean or integer arrays. "
924 "Please select 'sort' or None for kind."
925 )
928 # Check if one of the arrays may contain arbitrary objects
929 contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject
931 # This code is run when
932 # a) the first condition is true, making the code significantly faster
933 # b) the second condition is true (i.e. `ar1` or `ar2` may contain
934 # arbitrary objects), since then sorting is not guaranteed to work
935 if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object:
936 if invert:
937 mask = np.ones(len(ar1), dtype=bool)
938 for a in ar2:
939 mask &= (ar1 != a)
940 else:
941 mask = np.zeros(len(ar1), dtype=bool)
942 for a in ar2:
943 mask |= (ar1 == a)
944 return mask
946 # Otherwise use sorting
947 if not assume_unique:
948 ar1, rev_idx = np.unique(ar1, return_inverse=True)
949 ar2 = np.unique(ar2)
951 ar = np.concatenate((ar1, ar2))
952 # We need this to be a stable sort, so always use 'mergesort'
953 # here. The values from the first array should always come before
954 # the values from the second array.
955 order = ar.argsort(kind='mergesort')
956 sar = ar[order]
957 if invert:
958 bool_ar = (sar[1:] != sar[:-1])
959 else:
960 bool_ar = (sar[1:] == sar[:-1])
961 flag = np.concatenate((bool_ar, [invert]))
962 ret = np.empty(ar.shape, dtype=bool)
963 ret[order] = flag
965 if assume_unique:
966 return ret[:len(ar1)]
967 else:
968 return ret[rev_idx]
971def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None,
972 *, kind=None):
973 return (element, test_elements)
976@array_function_dispatch(_isin_dispatcher)
977def isin(element, test_elements, assume_unique=False, invert=False, *,
978 kind=None):
979 """
980 Calculates ``element in test_elements``, broadcasting over `element` only.
981 Returns a boolean array of the same shape as `element` that is True
982 where an element of `element` is in `test_elements` and False otherwise.
984 Parameters
985 ----------
986 element : array_like
987 Input array.
988 test_elements : array_like
989 The values against which to test each value of `element`.
990 This argument is flattened if it is an array or array_like.
991 See notes for behavior with non-array-like parameters.
992 assume_unique : bool, optional
993 If True, the input arrays are both assumed to be unique, which
994 can speed up the calculation. Default is False.
995 invert : bool, optional
996 If True, the values in the returned array are inverted, as if
997 calculating `element not in test_elements`. Default is False.
998 ``np.isin(a, b, invert=True)`` is equivalent to (but faster
999 than) ``np.invert(np.isin(a, b))``.
1000 kind : {None, 'sort', 'table'}, optional
1001 The algorithm to use. This will not affect the final result,
1002 but will affect the speed and memory use. The default, None,
1003 will select automatically based on memory considerations.
1005 * If 'sort', will use a mergesort-based approach. This will have
1006 a memory usage of roughly 6 times the sum of the sizes of
1007 `element` and `test_elements`, not accounting for size of dtypes.
1008 * If 'table', will use a lookup table approach similar
1009 to a counting sort. This is only available for boolean and
1010 integer arrays. This will have a memory usage of the
1011 size of `element` plus the max-min value of `test_elements`.
1012 `assume_unique` has no effect when the 'table' option is used.
1013 * If None, will automatically choose 'table' if
1014 the required memory allocation is less than or equal to
1015 6 times the sum of the sizes of `element` and `test_elements`,
1016 otherwise will use 'sort'. This is done to not use
1017 a large amount of memory by default, even though
1018 'table' may be faster in most cases. If 'table' is chosen,
1019 `assume_unique` will have no effect.
1022 Returns
1023 -------
1024 isin : ndarray, bool
1025 Has the same shape as `element`. The values `element[isin]`
1026 are in `test_elements`.
1028 Notes
1029 -----
1031 `isin` is an element-wise function version of the python keyword `in`.
1032 ``isin(a, b)`` is roughly equivalent to
1033 ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences.
1035 `element` and `test_elements` are converted to arrays if they are not
1036 already. If `test_elements` is a set (or other non-sequence collection)
1037 it will be converted to an object array with one element, rather than an
1038 array of the values contained in `test_elements`. This is a consequence
1039 of the `array` constructor's way of handling non-sequence collections.
1040 Converting the set to a list usually gives the desired behavior.
1042 Using ``kind='table'`` tends to be faster than `kind='sort'` if the
1043 following relationship is true:
1044 ``log10(len(test_elements)) >
1045 (log10(max(test_elements)-min(test_elements)) - 2.27) / 0.927``,
1046 but may use greater memory. The default value for `kind` will
1047 be automatically selected based only on memory usage, so one may
1048 manually set ``kind='table'`` if memory constraints can be relaxed.
1050 .. versionadded:: 1.13.0
1052 Examples
1053 --------
1054 >>> element = 2*np.arange(4).reshape((2, 2))
1055 >>> element
1056 array([[0, 2],
1057 [4, 6]])
1058 >>> test_elements = [1, 2, 4, 8]
1059 >>> mask = np.isin(element, test_elements)
1060 >>> mask
1061 array([[False, True],
1062 [ True, False]])
1063 >>> element[mask]
1064 array([2, 4])
1066 The indices of the matched values can be obtained with `nonzero`:
1068 >>> np.nonzero(mask)
1069 (array([0, 1]), array([1, 0]))
1071 The test can also be inverted:
1073 >>> mask = np.isin(element, test_elements, invert=True)
1074 >>> mask
1075 array([[ True, False],
1076 [False, True]])
1077 >>> element[mask]
1078 array([0, 6])
1080 Because of how `array` handles sets, the following does not
1081 work as expected:
1083 >>> test_set = {1, 2, 4, 8}
1084 >>> np.isin(element, test_set)
1085 array([[False, False],
1086 [False, False]])
1088 Casting the set to a list gives the expected result:
1090 >>> np.isin(element, list(test_set))
1091 array([[False, True],
1092 [ True, False]])
1093 """
1094 element = np.asarray(element)
1095 return _in1d(element, test_elements, assume_unique=assume_unique,
1096 invert=invert, kind=kind).reshape(element.shape)
1099def _union1d_dispatcher(ar1, ar2):
1100 return (ar1, ar2)
1103@array_function_dispatch(_union1d_dispatcher)
1104def union1d(ar1, ar2):
1105 """
1106 Find the union of two arrays.
1108 Return the unique, sorted array of values that are in either of the two
1109 input arrays.
1111 Parameters
1112 ----------
1113 ar1, ar2 : array_like
1114 Input arrays. They are flattened if they are not already 1D.
1116 Returns
1117 -------
1118 union1d : ndarray
1119 Unique, sorted union of the input arrays.
1121 Examples
1122 --------
1123 >>> np.union1d([-1, 0, 1], [-2, 0, 2])
1124 array([-2, -1, 0, 1, 2])
1126 To find the union of more than two arrays, use functools.reduce:
1128 >>> from functools import reduce
1129 >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
1130 array([1, 2, 3, 4, 6])
1131 """
1132 return unique(np.concatenate((ar1, ar2), axis=None))
1135def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None):
1136 return (ar1, ar2)
1139@array_function_dispatch(_setdiff1d_dispatcher)
1140def setdiff1d(ar1, ar2, assume_unique=False):
1141 """
1142 Find the set difference of two arrays.
1144 Return the unique values in `ar1` that are not in `ar2`.
1146 Parameters
1147 ----------
1148 ar1 : array_like
1149 Input array.
1150 ar2 : array_like
1151 Input comparison array.
1152 assume_unique : bool
1153 If True, the input arrays are both assumed to be unique, which
1154 can speed up the calculation. Default is False.
1156 Returns
1157 -------
1158 setdiff1d : ndarray
1159 1D array of values in `ar1` that are not in `ar2`. The result
1160 is sorted when `assume_unique=False`, but otherwise only sorted
1161 if the input is sorted.
1163 Examples
1164 --------
1165 >>> a = np.array([1, 2, 3, 2, 4, 1])
1166 >>> b = np.array([3, 4, 5, 6])
1167 >>> np.setdiff1d(a, b)
1168 array([1, 2])
1170 """
1171 if assume_unique:
1172 ar1 = np.asarray(ar1).ravel()
1173 else:
1174 ar1 = unique(ar1)
1175 ar2 = unique(ar2)
1176 return ar1[_in1d(ar1, ar2, assume_unique=True, invert=True)]