Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/numpy/lib/arraysetops.py: 13%
238 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-05 06:32 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-05 06:32 +0000
1"""
2Set operations for arrays based on sorting.
4Notes
5-----
7For floating point arrays, inaccurate results may appear due to usual round-off
8and floating point comparison issues.
10Speed could be gained in some operations by an implementation of
11`numpy.sort`, that can provide directly the permutation vectors, thus avoiding
12calls to `numpy.argsort`.
14Original author: Robert Cimrman
16"""
17import functools
19import numpy as np
20from numpy.core import overrides
23array_function_dispatch = functools.partial(
24 overrides.array_function_dispatch, module='numpy')
27__all__ = [
28 'ediff1d', 'intersect1d', 'setxor1d', 'union1d', 'setdiff1d', 'unique',
29 'in1d', 'isin'
30 ]
33def _ediff1d_dispatcher(ary, to_end=None, to_begin=None):
34 return (ary, to_end, to_begin)
37@array_function_dispatch(_ediff1d_dispatcher)
38def ediff1d(ary, to_end=None, to_begin=None):
39 """
40 The differences between consecutive elements of an array.
42 Parameters
43 ----------
44 ary : array_like
45 If necessary, will be flattened before the differences are taken.
46 to_end : array_like, optional
47 Number(s) to append at the end of the returned differences.
48 to_begin : array_like, optional
49 Number(s) to prepend at the beginning of the returned differences.
51 Returns
52 -------
53 ediff1d : ndarray
54 The differences. Loosely, this is ``ary.flat[1:] - ary.flat[:-1]``.
56 See Also
57 --------
58 diff, gradient
60 Notes
61 -----
62 When applied to masked arrays, this function drops the mask information
63 if the `to_begin` and/or `to_end` parameters are used.
65 Examples
66 --------
67 >>> x = np.array([1, 2, 4, 7, 0])
68 >>> np.ediff1d(x)
69 array([ 1, 2, 3, -7])
71 >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99]))
72 array([-99, 1, 2, ..., -7, 88, 99])
74 The returned array is always 1D.
76 >>> y = [[1, 2, 4], [1, 6, 24]]
77 >>> np.ediff1d(y)
78 array([ 1, 2, -3, 5, 18])
80 """
81 # force a 1d array
82 ary = np.asanyarray(ary).ravel()
84 # enforce that the dtype of `ary` is used for the output
85 dtype_req = ary.dtype
87 # fast track default case
88 if to_begin is None and to_end is None:
89 return ary[1:] - ary[:-1]
91 if to_begin is None:
92 l_begin = 0
93 else:
94 to_begin = np.asanyarray(to_begin)
95 if not np.can_cast(to_begin, dtype_req, casting="same_kind"):
96 raise TypeError("dtype of `to_begin` must be compatible "
97 "with input `ary` under the `same_kind` rule.")
99 to_begin = to_begin.ravel()
100 l_begin = len(to_begin)
102 if to_end is None:
103 l_end = 0
104 else:
105 to_end = np.asanyarray(to_end)
106 if not np.can_cast(to_end, dtype_req, casting="same_kind"):
107 raise TypeError("dtype of `to_end` must be compatible "
108 "with input `ary` under the `same_kind` rule.")
110 to_end = to_end.ravel()
111 l_end = len(to_end)
113 # do the calculation in place and copy to_begin and to_end
114 l_diff = max(len(ary) - 1, 0)
115 result = np.empty(l_diff + l_begin + l_end, dtype=ary.dtype)
116 result = ary.__array_wrap__(result)
117 if l_begin > 0:
118 result[:l_begin] = to_begin
119 if l_end > 0:
120 result[l_begin + l_diff:] = to_end
121 np.subtract(ary[1:], ary[:-1], result[l_begin:l_begin + l_diff])
122 return result
125def _unpack_tuple(x):
126 """ Unpacks one-element tuples for use as return values """
127 if len(x) == 1:
128 return x[0]
129 else:
130 return x
133def _unique_dispatcher(ar, return_index=None, return_inverse=None,
134 return_counts=None, axis=None, *, equal_nan=None):
135 return (ar,)
138@array_function_dispatch(_unique_dispatcher)
139def unique(ar, return_index=False, return_inverse=False,
140 return_counts=False, axis=None, *, equal_nan=True):
141 """
142 Find the unique elements of an array.
144 Returns the sorted unique elements of an array. There are three optional
145 outputs in addition to the unique elements:
147 * the indices of the input array that give the unique values
148 * the indices of the unique array that reconstruct the input array
149 * the number of times each unique value comes up in the input array
151 Parameters
152 ----------
153 ar : array_like
154 Input array. Unless `axis` is specified, this will be flattened if it
155 is not already 1-D.
156 return_index : bool, optional
157 If True, also return the indices of `ar` (along the specified axis,
158 if provided, or in the flattened array) that result in the unique array.
159 return_inverse : bool, optional
160 If True, also return the indices of the unique array (for the specified
161 axis, if provided) that can be used to reconstruct `ar`.
162 return_counts : bool, optional
163 If True, also return the number of times each unique item appears
164 in `ar`.
165 axis : int or None, optional
166 The axis to operate on. If None, `ar` will be flattened. If an integer,
167 the subarrays indexed by the given axis will be flattened and treated
168 as the elements of a 1-D array with the dimension of the given axis,
169 see the notes for more details. Object arrays or structured arrays
170 that contain objects are not supported if the `axis` kwarg is used. The
171 default is None.
173 .. versionadded:: 1.13.0
175 equal_nan : bool, optional
176 If True, collapses multiple NaN values in the return array into one.
178 .. versionadded:: 1.24
180 Returns
181 -------
182 unique : ndarray
183 The sorted unique values.
184 unique_indices : ndarray, optional
185 The indices of the first occurrences of the unique values in the
186 original array. Only provided if `return_index` is True.
187 unique_inverse : ndarray, optional
188 The indices to reconstruct the original array from the
189 unique array. Only provided if `return_inverse` is True.
190 unique_counts : ndarray, optional
191 The number of times each of the unique values comes up in the
192 original array. Only provided if `return_counts` is True.
194 .. versionadded:: 1.9.0
196 See Also
197 --------
198 numpy.lib.arraysetops : Module with a number of other functions for
199 performing set operations on arrays.
200 repeat : Repeat elements of an array.
202 Notes
203 -----
204 When an axis is specified the subarrays indexed by the axis are sorted.
205 This is done by making the specified axis the first dimension of the array
206 (move the axis to the first dimension to keep the order of the other axes)
207 and then flattening the subarrays in C order. The flattened subarrays are
208 then viewed as a structured type with each element given a label, with the
209 effect that we end up with a 1-D array of structured types that can be
210 treated in the same way as any other 1-D array. The result is that the
211 flattened subarrays are sorted in lexicographic order starting with the
212 first element.
214 .. versionchanged: NumPy 1.21
215 If nan values are in the input array, a single nan is put
216 to the end of the sorted unique values.
218 Also for complex arrays all NaN values are considered equivalent
219 (no matter whether the NaN is in the real or imaginary part).
220 As the representant for the returned array the smallest one in the
221 lexicographical order is chosen - see np.sort for how the lexicographical
222 order is defined for complex arrays.
224 Examples
225 --------
226 >>> np.unique([1, 1, 2, 2, 3, 3])
227 array([1, 2, 3])
228 >>> a = np.array([[1, 1], [2, 3]])
229 >>> np.unique(a)
230 array([1, 2, 3])
232 Return the unique rows of a 2D array
234 >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])
235 >>> np.unique(a, axis=0)
236 array([[1, 0, 0], [2, 3, 4]])
238 Return the indices of the original array that give the unique values:
240 >>> a = np.array(['a', 'b', 'b', 'c', 'a'])
241 >>> u, indices = np.unique(a, return_index=True)
242 >>> u
243 array(['a', 'b', 'c'], dtype='<U1')
244 >>> indices
245 array([0, 1, 3])
246 >>> a[indices]
247 array(['a', 'b', 'c'], dtype='<U1')
249 Reconstruct the input array from the unique values and inverse:
251 >>> a = np.array([1, 2, 6, 4, 2, 3, 2])
252 >>> u, indices = np.unique(a, return_inverse=True)
253 >>> u
254 array([1, 2, 3, 4, 6])
255 >>> indices
256 array([0, 1, 4, 3, 1, 2, 1])
257 >>> u[indices]
258 array([1, 2, 6, 4, 2, 3, 2])
260 Reconstruct the input values from the unique values and counts:
262 >>> a = np.array([1, 2, 6, 4, 2, 3, 2])
263 >>> values, counts = np.unique(a, return_counts=True)
264 >>> values
265 array([1, 2, 3, 4, 6])
266 >>> counts
267 array([1, 3, 1, 1, 1])
268 >>> np.repeat(values, counts)
269 array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved
271 """
272 ar = np.asanyarray(ar)
273 if axis is None:
274 ret = _unique1d(ar, return_index, return_inverse, return_counts,
275 equal_nan=equal_nan)
276 return _unpack_tuple(ret)
278 # axis was specified and not None
279 try:
280 ar = np.moveaxis(ar, axis, 0)
281 except np.AxisError:
282 # this removes the "axis1" or "axis2" prefix from the error message
283 raise np.AxisError(axis, ar.ndim) from None
285 # Must reshape to a contiguous 2D array for this to work...
286 orig_shape, orig_dtype = ar.shape, ar.dtype
287 ar = ar.reshape(orig_shape[0], np.prod(orig_shape[1:], dtype=np.intp))
288 ar = np.ascontiguousarray(ar)
289 dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]
291 # At this point, `ar` has shape `(n, m)`, and `dtype` is a structured
292 # data type with `m` fields where each field has the data type of `ar`.
293 # In the following, we create the array `consolidated`, which has
294 # shape `(n,)` with data type `dtype`.
295 try:
296 if ar.shape[1] > 0:
297 consolidated = ar.view(dtype)
298 else:
299 # If ar.shape[1] == 0, then dtype will be `np.dtype([])`, which is
300 # a data type with itemsize 0, and the call `ar.view(dtype)` will
301 # fail. Instead, we'll use `np.empty` to explicitly create the
302 # array with shape `(len(ar),)`. Because `dtype` in this case has
303 # itemsize 0, the total size of the result is still 0 bytes.
304 consolidated = np.empty(len(ar), dtype=dtype)
305 except TypeError as e:
306 # There's no good way to do this for object arrays, etc...
307 msg = 'The axis argument to unique is not supported for dtype {dt}'
308 raise TypeError(msg.format(dt=ar.dtype)) from e
310 def reshape_uniq(uniq):
311 n = len(uniq)
312 uniq = uniq.view(orig_dtype)
313 uniq = uniq.reshape(n, *orig_shape[1:])
314 uniq = np.moveaxis(uniq, 0, axis)
315 return uniq
317 output = _unique1d(consolidated, return_index,
318 return_inverse, return_counts, equal_nan=equal_nan)
319 output = (reshape_uniq(output[0]),) + output[1:]
320 return _unpack_tuple(output)
323def _unique1d(ar, return_index=False, return_inverse=False,
324 return_counts=False, *, equal_nan=True):
325 """
326 Find the unique elements of an array, ignoring shape.
327 """
328 ar = np.asanyarray(ar).flatten()
330 optional_indices = return_index or return_inverse
332 if optional_indices:
333 perm = ar.argsort(kind='mergesort' if return_index else 'quicksort')
334 aux = ar[perm]
335 else:
336 ar.sort()
337 aux = ar
338 mask = np.empty(aux.shape, dtype=np.bool_)
339 mask[:1] = True
340 if (equal_nan and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and
341 np.isnan(aux[-1])):
342 if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent
343 aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left')
344 else:
345 aux_firstnan = np.searchsorted(aux, aux[-1], side='left')
346 if aux_firstnan > 0:
347 mask[1:aux_firstnan] = (
348 aux[1:aux_firstnan] != aux[:aux_firstnan - 1])
349 mask[aux_firstnan] = True
350 mask[aux_firstnan + 1:] = False
351 else:
352 mask[1:] = aux[1:] != aux[:-1]
354 ret = (aux[mask],)
355 if return_index:
356 ret += (perm[mask],)
357 if return_inverse:
358 imask = np.cumsum(mask) - 1
359 inv_idx = np.empty(mask.shape, dtype=np.intp)
360 inv_idx[perm] = imask
361 ret += (inv_idx,)
362 if return_counts:
363 idx = np.concatenate(np.nonzero(mask) + ([mask.size],))
364 ret += (np.diff(idx),)
365 return ret
368def _intersect1d_dispatcher(
369 ar1, ar2, assume_unique=None, return_indices=None):
370 return (ar1, ar2)
373@array_function_dispatch(_intersect1d_dispatcher)
374def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):
375 """
376 Find the intersection of two arrays.
378 Return the sorted, unique values that are in both of the input arrays.
380 Parameters
381 ----------
382 ar1, ar2 : array_like
383 Input arrays. Will be flattened if not already 1D.
384 assume_unique : bool
385 If True, the input arrays are both assumed to be unique, which
386 can speed up the calculation. If True but ``ar1`` or ``ar2`` are not
387 unique, incorrect results and out-of-bounds indices could result.
388 Default is False.
389 return_indices : bool
390 If True, the indices which correspond to the intersection of the two
391 arrays are returned. The first instance of a value is used if there are
392 multiple. Default is False.
394 .. versionadded:: 1.15.0
396 Returns
397 -------
398 intersect1d : ndarray
399 Sorted 1D array of common and unique elements.
400 comm1 : ndarray
401 The indices of the first occurrences of the common values in `ar1`.
402 Only provided if `return_indices` is True.
403 comm2 : ndarray
404 The indices of the first occurrences of the common values in `ar2`.
405 Only provided if `return_indices` is True.
408 See Also
409 --------
410 numpy.lib.arraysetops : Module with a number of other functions for
411 performing set operations on arrays.
413 Examples
414 --------
415 >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1])
416 array([1, 3])
418 To intersect more than two arrays, use functools.reduce:
420 >>> from functools import reduce
421 >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
422 array([3])
424 To return the indices of the values common to the input arrays
425 along with the intersected values:
427 >>> x = np.array([1, 1, 2, 3, 4])
428 >>> y = np.array([2, 1, 4, 6])
429 >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True)
430 >>> x_ind, y_ind
431 (array([0, 2, 4]), array([1, 0, 2]))
432 >>> xy, x[x_ind], y[y_ind]
433 (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4]))
435 """
436 ar1 = np.asanyarray(ar1)
437 ar2 = np.asanyarray(ar2)
439 if not assume_unique:
440 if return_indices:
441 ar1, ind1 = unique(ar1, return_index=True)
442 ar2, ind2 = unique(ar2, return_index=True)
443 else:
444 ar1 = unique(ar1)
445 ar2 = unique(ar2)
446 else:
447 ar1 = ar1.ravel()
448 ar2 = ar2.ravel()
450 aux = np.concatenate((ar1, ar2))
451 if return_indices:
452 aux_sort_indices = np.argsort(aux, kind='mergesort')
453 aux = aux[aux_sort_indices]
454 else:
455 aux.sort()
457 mask = aux[1:] == aux[:-1]
458 int1d = aux[:-1][mask]
460 if return_indices:
461 ar1_indices = aux_sort_indices[:-1][mask]
462 ar2_indices = aux_sort_indices[1:][mask] - ar1.size
463 if not assume_unique:
464 ar1_indices = ind1[ar1_indices]
465 ar2_indices = ind2[ar2_indices]
467 return int1d, ar1_indices, ar2_indices
468 else:
469 return int1d
472def _setxor1d_dispatcher(ar1, ar2, assume_unique=None):
473 return (ar1, ar2)
476@array_function_dispatch(_setxor1d_dispatcher)
477def setxor1d(ar1, ar2, assume_unique=False):
478 """
479 Find the set exclusive-or of two arrays.
481 Return the sorted, unique values that are in only one (not both) of the
482 input arrays.
484 Parameters
485 ----------
486 ar1, ar2 : array_like
487 Input arrays.
488 assume_unique : bool
489 If True, the input arrays are both assumed to be unique, which
490 can speed up the calculation. Default is False.
492 Returns
493 -------
494 setxor1d : ndarray
495 Sorted 1D array of unique values that are in only one of the input
496 arrays.
498 Examples
499 --------
500 >>> a = np.array([1, 2, 3, 2, 4])
501 >>> b = np.array([2, 3, 5, 7, 5])
502 >>> np.setxor1d(a,b)
503 array([1, 4, 5, 7])
505 """
506 if not assume_unique:
507 ar1 = unique(ar1)
508 ar2 = unique(ar2)
510 aux = np.concatenate((ar1, ar2))
511 if aux.size == 0:
512 return aux
514 aux.sort()
515 flag = np.concatenate(([True], aux[1:] != aux[:-1], [True]))
516 return aux[flag[1:] & flag[:-1]]
519def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None, *,
520 kind=None):
521 return (ar1, ar2)
524@array_function_dispatch(_in1d_dispatcher)
525def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):
526 """
527 Test whether each element of a 1-D array is also present in a second array.
529 Returns a boolean array the same length as `ar1` that is True
530 where an element of `ar1` is in `ar2` and False otherwise.
532 We recommend using :func:`isin` instead of `in1d` for new code.
534 Parameters
535 ----------
536 ar1 : (M,) array_like
537 Input array.
538 ar2 : array_like
539 The values against which to test each value of `ar1`.
540 assume_unique : bool, optional
541 If True, the input arrays are both assumed to be unique, which
542 can speed up the calculation. Default is False.
543 invert : bool, optional
544 If True, the values in the returned array are inverted (that is,
545 False where an element of `ar1` is in `ar2` and True otherwise).
546 Default is False. ``np.in1d(a, b, invert=True)`` is equivalent
547 to (but is faster than) ``np.invert(in1d(a, b))``.
548 kind : {None, 'sort', 'table'}, optional
549 The algorithm to use. This will not affect the final result,
550 but will affect the speed and memory use. The default, None,
551 will select automatically based on memory considerations.
553 * If 'sort', will use a mergesort-based approach. This will have
554 a memory usage of roughly 6 times the sum of the sizes of
555 `ar1` and `ar2`, not accounting for size of dtypes.
556 * If 'table', will use a lookup table approach similar
557 to a counting sort. This is only available for boolean and
558 integer arrays. This will have a memory usage of the
559 size of `ar1` plus the max-min value of `ar2`. `assume_unique`
560 has no effect when the 'table' option is used.
561 * If None, will automatically choose 'table' if
562 the required memory allocation is less than or equal to
563 6 times the sum of the sizes of `ar1` and `ar2`,
564 otherwise will use 'sort'. This is done to not use
565 a large amount of memory by default, even though
566 'table' may be faster in most cases. If 'table' is chosen,
567 `assume_unique` will have no effect.
569 .. versionadded:: 1.8.0
571 Returns
572 -------
573 in1d : (M,) ndarray, bool
574 The values `ar1[in1d]` are in `ar2`.
576 See Also
577 --------
578 isin : Version of this function that preserves the
579 shape of ar1.
580 numpy.lib.arraysetops : Module with a number of other functions for
581 performing set operations on arrays.
583 Notes
584 -----
585 `in1d` can be considered as an element-wise function version of the
586 python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly
587 equivalent to ``np.array([item in b for item in a])``.
588 However, this idea fails if `ar2` is a set, or similar (non-sequence)
589 container: As ``ar2`` is converted to an array, in those cases
590 ``asarray(ar2)`` is an object array rather than the expected array of
591 contained values.
593 Using ``kind='table'`` tends to be faster than `kind='sort'` if the
594 following relationship is true:
595 ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
596 but may use greater memory. The default value for `kind` will
597 be automatically selected based only on memory usage, so one may
598 manually set ``kind='table'`` if memory constraints can be relaxed.
600 .. versionadded:: 1.4.0
602 Examples
603 --------
604 >>> test = np.array([0, 1, 2, 5, 0])
605 >>> states = [0, 2]
606 >>> mask = np.in1d(test, states)
607 >>> mask
608 array([ True, False, True, False, True])
609 >>> test[mask]
610 array([0, 2, 0])
611 >>> mask = np.in1d(test, states, invert=True)
612 >>> mask
613 array([False, True, False, True, False])
614 >>> test[mask]
615 array([1, 5])
616 """
617 # Ravel both arrays, behavior for the first array could be different
618 ar1 = np.asarray(ar1).ravel()
619 ar2 = np.asarray(ar2).ravel()
621 # Ensure that iteration through object arrays yields size-1 arrays
622 if ar2.dtype == object:
623 ar2 = ar2.reshape(-1, 1)
625 if kind not in {None, 'sort', 'table'}:
626 raise ValueError(
627 f"Invalid kind: '{kind}'. Please use None, 'sort' or 'table'.")
629 # Can use the table method if all arrays are integers or boolean:
630 is_int_arrays = all(ar.dtype.kind in ("u", "i", "b") for ar in (ar1, ar2))
631 use_table_method = is_int_arrays and kind in {None, 'table'}
633 if use_table_method:
634 if ar2.size == 0:
635 if invert:
636 return np.ones_like(ar1, dtype=bool)
637 else:
638 return np.zeros_like(ar1, dtype=bool)
640 # Convert booleans to uint8 so we can use the fast integer algorithm
641 if ar1.dtype == bool:
642 ar1 = ar1.astype(np.uint8)
643 if ar2.dtype == bool:
644 ar2 = ar2.astype(np.uint8)
646 ar2_min = np.min(ar2)
647 ar2_max = np.max(ar2)
649 ar2_range = int(ar2_max) - int(ar2_min)
651 # Constraints on whether we can actually use the table method:
652 # 1. Assert memory usage is not too large
653 below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size)
654 # 2. Check overflows for (ar2 - ar2_min); dtype=ar2.dtype
655 range_safe_from_overflow = ar2_range <= np.iinfo(ar2.dtype).max
656 # 3. Check overflows for (ar1 - ar2_min); dtype=ar1.dtype
657 if ar1.size > 0:
658 ar1_min = np.min(ar1)
659 ar1_max = np.max(ar1)
661 # After masking, the range of ar1 is guaranteed to be
662 # within the range of ar2:
663 ar1_upper = min(int(ar1_max), int(ar2_max))
664 ar1_lower = max(int(ar1_min), int(ar2_min))
666 range_safe_from_overflow &= all((
667 ar1_upper - int(ar2_min) <= np.iinfo(ar1.dtype).max,
668 ar1_lower - int(ar2_min) >= np.iinfo(ar1.dtype).min
669 ))
671 # Optimal performance is for approximately
672 # log10(size) > (log10(range) - 2.27) / 0.927.
673 # However, here we set the requirement that by default
674 # the intermediate array can only be 6x
675 # the combined memory allocation of the original
676 # arrays. See discussion on
677 # https://github.com/numpy/numpy/pull/12065.
679 if (
680 range_safe_from_overflow and
681 (below_memory_constraint or kind == 'table')
682 ):
684 if invert:
685 outgoing_array = np.ones_like(ar1, dtype=bool)
686 else:
687 outgoing_array = np.zeros_like(ar1, dtype=bool)
689 # Make elements 1 where the integer exists in ar2
690 if invert:
691 isin_helper_ar = np.ones(ar2_range + 1, dtype=bool)
692 isin_helper_ar[ar2 - ar2_min] = 0
693 else:
694 isin_helper_ar = np.zeros(ar2_range + 1, dtype=bool)
695 isin_helper_ar[ar2 - ar2_min] = 1
697 # Mask out elements we know won't work
698 basic_mask = (ar1 <= ar2_max) & (ar1 >= ar2_min)
699 outgoing_array[basic_mask] = isin_helper_ar[ar1[basic_mask] -
700 ar2_min]
702 return outgoing_array
703 elif kind == 'table': # not range_safe_from_overflow
704 raise RuntimeError(
705 "You have specified kind='table', "
706 "but the range of values in `ar2` or `ar1` exceed the "
707 "maximum integer of the datatype. "
708 "Please set `kind` to None or 'sort'."
709 )
710 elif kind == 'table':
711 raise ValueError(
712 "The 'table' method is only "
713 "supported for boolean or integer arrays. "
714 "Please select 'sort' or None for kind."
715 )
718 # Check if one of the arrays may contain arbitrary objects
719 contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject
721 # This code is run when
722 # a) the first condition is true, making the code significantly faster
723 # b) the second condition is true (i.e. `ar1` or `ar2` may contain
724 # arbitrary objects), since then sorting is not guaranteed to work
725 if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object:
726 if invert:
727 mask = np.ones(len(ar1), dtype=bool)
728 for a in ar2:
729 mask &= (ar1 != a)
730 else:
731 mask = np.zeros(len(ar1), dtype=bool)
732 for a in ar2:
733 mask |= (ar1 == a)
734 return mask
736 # Otherwise use sorting
737 if not assume_unique:
738 ar1, rev_idx = np.unique(ar1, return_inverse=True)
739 ar2 = np.unique(ar2)
741 ar = np.concatenate((ar1, ar2))
742 # We need this to be a stable sort, so always use 'mergesort'
743 # here. The values from the first array should always come before
744 # the values from the second array.
745 order = ar.argsort(kind='mergesort')
746 sar = ar[order]
747 if invert:
748 bool_ar = (sar[1:] != sar[:-1])
749 else:
750 bool_ar = (sar[1:] == sar[:-1])
751 flag = np.concatenate((bool_ar, [invert]))
752 ret = np.empty(ar.shape, dtype=bool)
753 ret[order] = flag
755 if assume_unique:
756 return ret[:len(ar1)]
757 else:
758 return ret[rev_idx]
761def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None,
762 *, kind=None):
763 return (element, test_elements)
766@array_function_dispatch(_isin_dispatcher)
767def isin(element, test_elements, assume_unique=False, invert=False, *,
768 kind=None):
769 """
770 Calculates ``element in test_elements``, broadcasting over `element` only.
771 Returns a boolean array of the same shape as `element` that is True
772 where an element of `element` is in `test_elements` and False otherwise.
774 Parameters
775 ----------
776 element : array_like
777 Input array.
778 test_elements : array_like
779 The values against which to test each value of `element`.
780 This argument is flattened if it is an array or array_like.
781 See notes for behavior with non-array-like parameters.
782 assume_unique : bool, optional
783 If True, the input arrays are both assumed to be unique, which
784 can speed up the calculation. Default is False.
785 invert : bool, optional
786 If True, the values in the returned array are inverted, as if
787 calculating `element not in test_elements`. Default is False.
788 ``np.isin(a, b, invert=True)`` is equivalent to (but faster
789 than) ``np.invert(np.isin(a, b))``.
790 kind : {None, 'sort', 'table'}, optional
791 The algorithm to use. This will not affect the final result,
792 but will affect the speed and memory use. The default, None,
793 will select automatically based on memory considerations.
795 * If 'sort', will use a mergesort-based approach. This will have
796 a memory usage of roughly 6 times the sum of the sizes of
797 `ar1` and `ar2`, not accounting for size of dtypes.
798 * If 'table', will use a lookup table approach similar
799 to a counting sort. This is only available for boolean and
800 integer arrays. This will have a memory usage of the
801 size of `ar1` plus the max-min value of `ar2`. `assume_unique`
802 has no effect when the 'table' option is used.
803 * If None, will automatically choose 'table' if
804 the required memory allocation is less than or equal to
805 6 times the sum of the sizes of `ar1` and `ar2`,
806 otherwise will use 'sort'. This is done to not use
807 a large amount of memory by default, even though
808 'table' may be faster in most cases. If 'table' is chosen,
809 `assume_unique` will have no effect.
812 Returns
813 -------
814 isin : ndarray, bool
815 Has the same shape as `element`. The values `element[isin]`
816 are in `test_elements`.
818 See Also
819 --------
820 in1d : Flattened version of this function.
821 numpy.lib.arraysetops : Module with a number of other functions for
822 performing set operations on arrays.
824 Notes
825 -----
827 `isin` is an element-wise function version of the python keyword `in`.
828 ``isin(a, b)`` is roughly equivalent to
829 ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences.
831 `element` and `test_elements` are converted to arrays if they are not
832 already. If `test_elements` is a set (or other non-sequence collection)
833 it will be converted to an object array with one element, rather than an
834 array of the values contained in `test_elements`. This is a consequence
835 of the `array` constructor's way of handling non-sequence collections.
836 Converting the set to a list usually gives the desired behavior.
838 Using ``kind='table'`` tends to be faster than `kind='sort'` if the
839 following relationship is true:
840 ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,
841 but may use greater memory. The default value for `kind` will
842 be automatically selected based only on memory usage, so one may
843 manually set ``kind='table'`` if memory constraints can be relaxed.
845 .. versionadded:: 1.13.0
847 Examples
848 --------
849 >>> element = 2*np.arange(4).reshape((2, 2))
850 >>> element
851 array([[0, 2],
852 [4, 6]])
853 >>> test_elements = [1, 2, 4, 8]
854 >>> mask = np.isin(element, test_elements)
855 >>> mask
856 array([[False, True],
857 [ True, False]])
858 >>> element[mask]
859 array([2, 4])
861 The indices of the matched values can be obtained with `nonzero`:
863 >>> np.nonzero(mask)
864 (array([0, 1]), array([1, 0]))
866 The test can also be inverted:
868 >>> mask = np.isin(element, test_elements, invert=True)
869 >>> mask
870 array([[ True, False],
871 [False, True]])
872 >>> element[mask]
873 array([0, 6])
875 Because of how `array` handles sets, the following does not
876 work as expected:
878 >>> test_set = {1, 2, 4, 8}
879 >>> np.isin(element, test_set)
880 array([[False, False],
881 [False, False]])
883 Casting the set to a list gives the expected result:
885 >>> np.isin(element, list(test_set))
886 array([[False, True],
887 [ True, False]])
888 """
889 element = np.asarray(element)
890 return in1d(element, test_elements, assume_unique=assume_unique,
891 invert=invert, kind=kind).reshape(element.shape)
894def _union1d_dispatcher(ar1, ar2):
895 return (ar1, ar2)
898@array_function_dispatch(_union1d_dispatcher)
899def union1d(ar1, ar2):
900 """
901 Find the union of two arrays.
903 Return the unique, sorted array of values that are in either of the two
904 input arrays.
906 Parameters
907 ----------
908 ar1, ar2 : array_like
909 Input arrays. They are flattened if they are not already 1D.
911 Returns
912 -------
913 union1d : ndarray
914 Unique, sorted union of the input arrays.
916 See Also
917 --------
918 numpy.lib.arraysetops : Module with a number of other functions for
919 performing set operations on arrays.
921 Examples
922 --------
923 >>> np.union1d([-1, 0, 1], [-2, 0, 2])
924 array([-2, -1, 0, 1, 2])
926 To find the union of more than two arrays, use functools.reduce:
928 >>> from functools import reduce
929 >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))
930 array([1, 2, 3, 4, 6])
931 """
932 return unique(np.concatenate((ar1, ar2), axis=None))
935def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None):
936 return (ar1, ar2)
939@array_function_dispatch(_setdiff1d_dispatcher)
940def setdiff1d(ar1, ar2, assume_unique=False):
941 """
942 Find the set difference of two arrays.
944 Return the unique values in `ar1` that are not in `ar2`.
946 Parameters
947 ----------
948 ar1 : array_like
949 Input array.
950 ar2 : array_like
951 Input comparison array.
952 assume_unique : bool
953 If True, the input arrays are both assumed to be unique, which
954 can speed up the calculation. Default is False.
956 Returns
957 -------
958 setdiff1d : ndarray
959 1D array of values in `ar1` that are not in `ar2`. The result
960 is sorted when `assume_unique=False`, but otherwise only sorted
961 if the input is sorted.
963 See Also
964 --------
965 numpy.lib.arraysetops : Module with a number of other functions for
966 performing set operations on arrays.
968 Examples
969 --------
970 >>> a = np.array([1, 2, 3, 2, 4, 1])
971 >>> b = np.array([3, 4, 5, 6])
972 >>> np.setdiff1d(a, b)
973 array([1, 2])
975 """
976 if assume_unique:
977 ar1 = np.asarray(ar1).ravel()
978 else:
979 ar1 = unique(ar1)
980 ar2 = unique(ar2)
981 return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)]