Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/numpy/lib/arraysetops.py: 13%

1"""

2Set operations for arrays based on sorting.

4Notes

5-----

7For floating point arrays, inaccurate results may appear due to usual round-off

8and floating point comparison issues.

10Speed could be gained in some operations by an implementation of

11`numpy.sort`, that can provide directly the permutation vectors, thus avoiding

12calls to `numpy.argsort`.

14Original author: Robert Cimrman

16"""

17import functools

19import numpy as np

20from numpy.core import overrides

23array_function_dispatch = functools.partial(

24 overrides.array_function_dispatch, module='numpy')

27__all__ = [

28 'ediff1d', 'intersect1d', 'setxor1d', 'union1d', 'setdiff1d', 'unique',

29 'in1d', 'isin'

30 ]

33def _ediff1d_dispatcher(ary, to_end=None, to_begin=None):

34 return (ary, to_end, to_begin)

37@array_function_dispatch(_ediff1d_dispatcher)

38def ediff1d(ary, to_end=None, to_begin=None):

39 """

40 The differences between consecutive elements of an array.

42 Parameters

43 ----------

44 ary : array_like

45 If necessary, will be flattened before the differences are taken.

46 to_end : array_like, optional

47 Number(s) to append at the end of the returned differences.

48 to_begin : array_like, optional

49 Number(s) to prepend at the beginning of the returned differences.

51 Returns

52 -------

53 ediff1d : ndarray

54 The differences. Loosely, this is ``ary.flat[1:] - ary.flat[:-1]``.

56 See Also

57 --------

58 diff, gradient

60 Notes

61 -----

62 When applied to masked arrays, this function drops the mask information

63 if the `to_begin` and/or `to_end` parameters are used.

65 Examples

66 --------

67 >>> x = np.array([1, 2, 4, 7, 0])

68 >>> np.ediff1d(x)

69 array([ 1, 2, 3, -7])

71 >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99]))

72 array([-99, 1, 2, ..., -7, 88, 99])

74 The returned array is always 1D.

76 >>> y = [[1, 2, 4], [1, 6, 24]]

77 >>> np.ediff1d(y)

78 array([ 1, 2, -3, 5, 18])

80 """

81 # force a 1d array

82 ary = np.asanyarray(ary).ravel()

84 # enforce that the dtype of `ary` is used for the output

85 dtype_req = ary.dtype

87 # fast track default case

88 if to_begin is None and to_end is None:

89 return ary[1:] - ary[:-1]

91 if to_begin is None:

92 l_begin = 0

93 else:

94 to_begin = np.asanyarray(to_begin)

95 if not np.can_cast(to_begin, dtype_req, casting="same_kind"):

96 raise TypeError("dtype of `to_begin` must be compatible "

97 "with input `ary` under the `same_kind` rule.")

99 to_begin = to_begin.ravel()

100 l_begin = len(to_begin)

101

102 if to_end is None:

103 l_end = 0

104 else:

105 to_end = np.asanyarray(to_end)

106 if not np.can_cast(to_end, dtype_req, casting="same_kind"):

107 raise TypeError("dtype of `to_end` must be compatible "

108 "with input `ary` under the `same_kind` rule.")

109

110 to_end = to_end.ravel()

111 l_end = len(to_end)

112

113 # do the calculation in place and copy to_begin and to_end

114 l_diff = max(len(ary) - 1, 0)

115 result = np.empty(l_diff + l_begin + l_end, dtype=ary.dtype)

116 result = ary.__array_wrap__(result)

117 if l_begin > 0:

118 result[:l_begin] = to_begin

119 if l_end > 0:

120 result[l_begin + l_diff:] = to_end

121 np.subtract(ary[1:], ary[:-1], result[l_begin:l_begin + l_diff])

122 return result

123

124

125def _unpack_tuple(x):

126 """ Unpacks one-element tuples for use as return values """

127 if len(x) == 1:

128 return x[0]

129 else:

130 return x

131

132

133def _unique_dispatcher(ar, return_index=None, return_inverse=None,

134 return_counts=None, axis=None, *, equal_nan=None):

135 return (ar,)

136

137

138@array_function_dispatch(_unique_dispatcher)

139def unique(ar, return_index=False, return_inverse=False,

140 return_counts=False, axis=None, *, equal_nan=True):

141 """

142 Find the unique elements of an array.

143

144 Returns the sorted unique elements of an array. There are three optional

145 outputs in addition to the unique elements:

146

147 * the indices of the input array that give the unique values

148 * the indices of the unique array that reconstruct the input array

149 * the number of times each unique value comes up in the input array

150

151 Parameters

152 ----------

153 ar : array_like

154 Input array. Unless `axis` is specified, this will be flattened if it

155 is not already 1-D.

156 return_index : bool, optional

157 If True, also return the indices of `ar` (along the specified axis,

158 if provided, or in the flattened array) that result in the unique array.

159 return_inverse : bool, optional

160 If True, also return the indices of the unique array (for the specified

161 axis, if provided) that can be used to reconstruct `ar`.

162 return_counts : bool, optional

163 If True, also return the number of times each unique item appears

164 in `ar`.

165 axis : int or None, optional

166 The axis to operate on. If None, `ar` will be flattened. If an integer,

167 the subarrays indexed by the given axis will be flattened and treated

168 as the elements of a 1-D array with the dimension of the given axis,

169 see the notes for more details. Object arrays or structured arrays

170 that contain objects are not supported if the `axis` kwarg is used. The

171 default is None.

172

173 .. versionadded:: 1.13.0

174

175 equal_nan : bool, optional

176 If True, collapses multiple NaN values in the return array into one.

177

178 .. versionadded:: 1.24

179

180 Returns

181 -------

182 unique : ndarray

183 The sorted unique values.

184 unique_indices : ndarray, optional

185 The indices of the first occurrences of the unique values in the

186 original array. Only provided if `return_index` is True.

187 unique_inverse : ndarray, optional

188 The indices to reconstruct the original array from the

189 unique array. Only provided if `return_inverse` is True.

190 unique_counts : ndarray, optional

191 The number of times each of the unique values comes up in the

192 original array. Only provided if `return_counts` is True.

193

194 .. versionadded:: 1.9.0

195

196 See Also

197 --------

198 numpy.lib.arraysetops : Module with a number of other functions for

199 performing set operations on arrays.

200 repeat : Repeat elements of an array.

201

202 Notes

203 -----

204 When an axis is specified the subarrays indexed by the axis are sorted.

205 This is done by making the specified axis the first dimension of the array

206 (move the axis to the first dimension to keep the order of the other axes)

207 and then flattening the subarrays in C order. The flattened subarrays are

208 then viewed as a structured type with each element given a label, with the

209 effect that we end up with a 1-D array of structured types that can be

210 treated in the same way as any other 1-D array. The result is that the

211 flattened subarrays are sorted in lexicographic order starting with the

212 first element.

213

214 .. versionchanged: NumPy 1.21

215 If nan values are in the input array, a single nan is put

216 to the end of the sorted unique values.

217

218 Also for complex arrays all NaN values are considered equivalent

219 (no matter whether the NaN is in the real or imaginary part).

220 As the representant for the returned array the smallest one in the

221 lexicographical order is chosen - see np.sort for how the lexicographical

222 order is defined for complex arrays.

223

224 Examples

225 --------

226 >>> np.unique([1, 1, 2, 2, 3, 3])

227 array([1, 2, 3])

228 >>> a = np.array([[1, 1], [2, 3]])

229 >>> np.unique(a)

230 array([1, 2, 3])

231

232 Return the unique rows of a 2D array

233

234 >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])

235 >>> np.unique(a, axis=0)

236 array([[1, 0, 0], [2, 3, 4]])

237

238 Return the indices of the original array that give the unique values:

239

240 >>> a = np.array(['a', 'b', 'b', 'c', 'a'])

241 >>> u, indices = np.unique(a, return_index=True)

242 >>> u

243 array(['a', 'b', 'c'], dtype='<U1')

244 >>> indices

245 array([0, 1, 3])

246 >>> a[indices]

247 array(['a', 'b', 'c'], dtype='<U1')

248

249 Reconstruct the input array from the unique values and inverse:

250

251 >>> a = np.array([1, 2, 6, 4, 2, 3, 2])

252 >>> u, indices = np.unique(a, return_inverse=True)

253 >>> u

254 array([1, 2, 3, 4, 6])

255 >>> indices

256 array([0, 1, 4, 3, 1, 2, 1])

257 >>> u[indices]

258 array([1, 2, 6, 4, 2, 3, 2])

259

260 Reconstruct the input values from the unique values and counts:

261

262 >>> a = np.array([1, 2, 6, 4, 2, 3, 2])

263 >>> values, counts = np.unique(a, return_counts=True)

264 >>> values

265 array([1, 2, 3, 4, 6])

266 >>> counts

267 array([1, 3, 1, 1, 1])

268 >>> np.repeat(values, counts)

269 array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved

270

271 """

272 ar = np.asanyarray(ar)

273 if axis is None:

274 ret = _unique1d(ar, return_index, return_inverse, return_counts,

275 equal_nan=equal_nan)

276 return _unpack_tuple(ret)

277

278 # axis was specified and not None

279 try:

280 ar = np.moveaxis(ar, axis, 0)

281 except np.AxisError:

282 # this removes the "axis1" or "axis2" prefix from the error message

283 raise np.AxisError(axis, ar.ndim) from None

284

285 # Must reshape to a contiguous 2D array for this to work...

286 orig_shape, orig_dtype = ar.shape, ar.dtype

287 ar = ar.reshape(orig_shape[0], np.prod(orig_shape[1:], dtype=np.intp))

288 ar = np.ascontiguousarray(ar)

289 dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]

290

291 # At this point, `ar` has shape `(n, m)`, and `dtype` is a structured

292 # data type with `m` fields where each field has the data type of `ar`.

293 # In the following, we create the array `consolidated`, which has

294 # shape `(n,)` with data type `dtype`.

295 try:

296 if ar.shape[1] > 0:

297 consolidated = ar.view(dtype)

298 else:

299 # If ar.shape[1] == 0, then dtype will be `np.dtype([])`, which is

300 # a data type with itemsize 0, and the call `ar.view(dtype)` will

301 # fail. Instead, we'll use `np.empty` to explicitly create the

302 # array with shape `(len(ar),)`. Because `dtype` in this case has

303 # itemsize 0, the total size of the result is still 0 bytes.

304 consolidated = np.empty(len(ar), dtype=dtype)

305 except TypeError as e:

306 # There's no good way to do this for object arrays, etc...

307 msg = 'The axis argument to unique is not supported for dtype {dt}'

308 raise TypeError(msg.format(dt=ar.dtype)) from e

309

310 def reshape_uniq(uniq):

311 n = len(uniq)

312 uniq = uniq.view(orig_dtype)

313 uniq = uniq.reshape(n, *orig_shape[1:])

314 uniq = np.moveaxis(uniq, 0, axis)

315 return uniq

316

317 output = _unique1d(consolidated, return_index,

318 return_inverse, return_counts, equal_nan=equal_nan)

319 output = (reshape_uniq(output[0]),) + output[1:]

320 return _unpack_tuple(output)

321

322

323def _unique1d(ar, return_index=False, return_inverse=False,

324 return_counts=False, *, equal_nan=True):

325 """

326 Find the unique elements of an array, ignoring shape.

327 """

328 ar = np.asanyarray(ar).flatten()

329

330 optional_indices = return_index or return_inverse

331

332 if optional_indices:

333 perm = ar.argsort(kind='mergesort' if return_index else 'quicksort')

334 aux = ar[perm]

335 else:

336 ar.sort()

337 aux = ar

338 mask = np.empty(aux.shape, dtype=np.bool_)

339 mask[:1] = True

340 if (equal_nan and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and

341 np.isnan(aux[-1])):

342 if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent

343 aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left')

344 else:

345 aux_firstnan = np.searchsorted(aux, aux[-1], side='left')

346 if aux_firstnan > 0:

347 mask[1:aux_firstnan] = (

348 aux[1:aux_firstnan] != aux[:aux_firstnan - 1])

349 mask[aux_firstnan] = True

350 mask[aux_firstnan + 1:] = False

351 else:

352 mask[1:] = aux[1:] != aux[:-1]

353

354 ret = (aux[mask],)

355 if return_index:

356 ret += (perm[mask],)

357 if return_inverse:

358 imask = np.cumsum(mask) - 1

359 inv_idx = np.empty(mask.shape, dtype=np.intp)

360 inv_idx[perm] = imask

361 ret += (inv_idx,)

362 if return_counts:

363 idx = np.concatenate(np.nonzero(mask) + ([mask.size],))

364 ret += (np.diff(idx),)

365 return ret

366

367

368def _intersect1d_dispatcher(

369 ar1, ar2, assume_unique=None, return_indices=None):

370 return (ar1, ar2)

371

372

373@array_function_dispatch(_intersect1d_dispatcher)

374def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):

375 """

376 Find the intersection of two arrays.

377

378 Return the sorted, unique values that are in both of the input arrays.

379

380 Parameters

381 ----------

382 ar1, ar2 : array_like

383 Input arrays. Will be flattened if not already 1D.

384 assume_unique : bool

385 If True, the input arrays are both assumed to be unique, which

386 can speed up the calculation. If True but ``ar1`` or ``ar2`` are not

387 unique, incorrect results and out-of-bounds indices could result.

388 Default is False.

389 return_indices : bool

390 If True, the indices which correspond to the intersection of the two

391 arrays are returned. The first instance of a value is used if there are

392 multiple. Default is False.

393

394 .. versionadded:: 1.15.0

395

396 Returns

397 -------

398 intersect1d : ndarray

399 Sorted 1D array of common and unique elements.

400 comm1 : ndarray

401 The indices of the first occurrences of the common values in `ar1`.

402 Only provided if `return_indices` is True.

403 comm2 : ndarray

404 The indices of the first occurrences of the common values in `ar2`.

405 Only provided if `return_indices` is True.

406

407

408 See Also

409 --------

410 numpy.lib.arraysetops : Module with a number of other functions for

411 performing set operations on arrays.

412

413 Examples

414 --------

415 >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1])

416 array([1, 3])

417

418 To intersect more than two arrays, use functools.reduce:

419

420 >>> from functools import reduce

421 >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))

422 array([3])

423

424 To return the indices of the values common to the input arrays

425 along with the intersected values:

426

427 >>> x = np.array([1, 1, 2, 3, 4])

428 >>> y = np.array([2, 1, 4, 6])

429 >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True)

430 >>> x_ind, y_ind

431 (array([0, 2, 4]), array([1, 0, 2]))

432 >>> xy, x[x_ind], y[y_ind]

433 (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4]))

434

435 """

436 ar1 = np.asanyarray(ar1)

437 ar2 = np.asanyarray(ar2)

438

439 if not assume_unique:

440 if return_indices:

441 ar1, ind1 = unique(ar1, return_index=True)

442 ar2, ind2 = unique(ar2, return_index=True)

443 else:

444 ar1 = unique(ar1)

445 ar2 = unique(ar2)

446 else:

447 ar1 = ar1.ravel()

448 ar2 = ar2.ravel()

449

450 aux = np.concatenate((ar1, ar2))

451 if return_indices:

452 aux_sort_indices = np.argsort(aux, kind='mergesort')

453 aux = aux[aux_sort_indices]

454 else:

455 aux.sort()

456

457 mask = aux[1:] == aux[:-1]

458 int1d = aux[:-1][mask]

459

460 if return_indices:

461 ar1_indices = aux_sort_indices[:-1][mask]

462 ar2_indices = aux_sort_indices[1:][mask] - ar1.size

463 if not assume_unique:

464 ar1_indices = ind1[ar1_indices]

465 ar2_indices = ind2[ar2_indices]

466

467 return int1d, ar1_indices, ar2_indices

468 else:

469 return int1d

470

471

472def _setxor1d_dispatcher(ar1, ar2, assume_unique=None):

473 return (ar1, ar2)

474

475

476@array_function_dispatch(_setxor1d_dispatcher)

477def setxor1d(ar1, ar2, assume_unique=False):

478 """

479 Find the set exclusive-or of two arrays.

480

481 Return the sorted, unique values that are in only one (not both) of the

482 input arrays.

483

484 Parameters

485 ----------

486 ar1, ar2 : array_like

487 Input arrays.

488 assume_unique : bool

489 If True, the input arrays are both assumed to be unique, which

490 can speed up the calculation. Default is False.

491

492 Returns

493 -------

494 setxor1d : ndarray

495 Sorted 1D array of unique values that are in only one of the input

496 arrays.

497

498 Examples

499 --------

500 >>> a = np.array([1, 2, 3, 2, 4])

501 >>> b = np.array([2, 3, 5, 7, 5])

502 >>> np.setxor1d(a,b)

503 array([1, 4, 5, 7])

504

505 """

506 if not assume_unique:

507 ar1 = unique(ar1)

508 ar2 = unique(ar2)

509

510 aux = np.concatenate((ar1, ar2))

511 if aux.size == 0:

512 return aux

513

514 aux.sort()

515 flag = np.concatenate(([True], aux[1:] != aux[:-1], [True]))

516 return aux[flag[1:] & flag[:-1]]

517

518

519def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None, *,

520 kind=None):

521 return (ar1, ar2)

522

523

524@array_function_dispatch(_in1d_dispatcher)

525def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):

526 """

527 Test whether each element of a 1-D array is also present in a second array.

528

529 Returns a boolean array the same length as `ar1` that is True

530 where an element of `ar1` is in `ar2` and False otherwise.

531

532 We recommend using :func:`isin` instead of `in1d` for new code.

533

534 Parameters

535 ----------

536 ar1 : (M,) array_like

537 Input array.

538 ar2 : array_like

539 The values against which to test each value of `ar1`.

540 assume_unique : bool, optional

541 If True, the input arrays are both assumed to be unique, which

542 can speed up the calculation. Default is False.

543 invert : bool, optional

544 If True, the values in the returned array are inverted (that is,

545 False where an element of `ar1` is in `ar2` and True otherwise).

546 Default is False. ``np.in1d(a, b, invert=True)`` is equivalent

547 to (but is faster than) ``np.invert(in1d(a, b))``.

548 kind : {None, 'sort', 'table'}, optional

549 The algorithm to use. This will not affect the final result,

550 but will affect the speed and memory use. The default, None,

551 will select automatically based on memory considerations.

552

553 * If 'sort', will use a mergesort-based approach. This will have

554 a memory usage of roughly 6 times the sum of the sizes of

555 `ar1` and `ar2`, not accounting for size of dtypes.

556 * If 'table', will use a lookup table approach similar

557 to a counting sort. This is only available for boolean and

558 integer arrays. This will have a memory usage of the

559 size of `ar1` plus the max-min value of `ar2`. `assume_unique`

560 has no effect when the 'table' option is used.

561 * If None, will automatically choose 'table' if

562 the required memory allocation is less than or equal to

563 6 times the sum of the sizes of `ar1` and `ar2`,

564 otherwise will use 'sort'. This is done to not use

565 a large amount of memory by default, even though

566 'table' may be faster in most cases. If 'table' is chosen,

567 `assume_unique` will have no effect.

568

569 .. versionadded:: 1.8.0

570

571 Returns

572 -------

573 in1d : (M,) ndarray, bool

574 The values `ar1[in1d]` are in `ar2`.

575

576 See Also

577 --------

578 isin : Version of this function that preserves the

579 shape of ar1.

580 numpy.lib.arraysetops : Module with a number of other functions for

581 performing set operations on arrays.

582

583 Notes

584 -----

585 `in1d` can be considered as an element-wise function version of the

586 python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly

587 equivalent to ``np.array([item in b for item in a])``.

588 However, this idea fails if `ar2` is a set, or similar (non-sequence)

589 container: As ``ar2`` is converted to an array, in those cases

590 ``asarray(ar2)`` is an object array rather than the expected array of

591 contained values.

592

593 Using ``kind='table'`` tends to be faster than `kind='sort'` if the

594 following relationship is true:

595 ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,

596 but may use greater memory. The default value for `kind` will

597 be automatically selected based only on memory usage, so one may

598 manually set ``kind='table'`` if memory constraints can be relaxed.

599

600 .. versionadded:: 1.4.0

601

602 Examples

603 --------

604 >>> test = np.array([0, 1, 2, 5, 0])

605 >>> states = [0, 2]

606 >>> mask = np.in1d(test, states)

607 >>> mask

608 array([ True, False, True, False, True])

609 >>> test[mask]

610 array([0, 2, 0])

611 >>> mask = np.in1d(test, states, invert=True)

612 >>> mask

613 array([False, True, False, True, False])

614 >>> test[mask]

615 array([1, 5])

616 """

617 # Ravel both arrays, behavior for the first array could be different

618 ar1 = np.asarray(ar1).ravel()

619 ar2 = np.asarray(ar2).ravel()

620

621 # Ensure that iteration through object arrays yields size-1 arrays

622 if ar2.dtype == object:

623 ar2 = ar2.reshape(-1, 1)

624

625 if kind not in {None, 'sort', 'table'}:

626 raise ValueError(

627 f"Invalid kind: '{kind}'. Please use None, 'sort' or 'table'.")

628

629 # Can use the table method if all arrays are integers or boolean:

630 is_int_arrays = all(ar.dtype.kind in ("u", "i", "b") for ar in (ar1, ar2))

631 use_table_method = is_int_arrays and kind in {None, 'table'}

632

633 if use_table_method:

634 if ar2.size == 0:

635 if invert:

636 return np.ones_like(ar1, dtype=bool)

637 else:

638 return np.zeros_like(ar1, dtype=bool)

639

640 # Convert booleans to uint8 so we can use the fast integer algorithm

641 if ar1.dtype == bool:

642 ar1 = ar1.astype(np.uint8)

643 if ar2.dtype == bool:

644 ar2 = ar2.astype(np.uint8)

645

646 ar2_min = np.min(ar2)

647 ar2_max = np.max(ar2)

648

649 ar2_range = int(ar2_max) - int(ar2_min)

650

651 # Constraints on whether we can actually use the table method:

652 # 1. Assert memory usage is not too large

653 below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size)

654 # 2. Check overflows for (ar2 - ar2_min); dtype=ar2.dtype

655 range_safe_from_overflow = ar2_range <= np.iinfo(ar2.dtype).max

656 # 3. Check overflows for (ar1 - ar2_min); dtype=ar1.dtype

657 if ar1.size > 0:

658 ar1_min = np.min(ar1)

659 ar1_max = np.max(ar1)

660

661 # After masking, the range of ar1 is guaranteed to be

662 # within the range of ar2:

663 ar1_upper = min(int(ar1_max), int(ar2_max))

664 ar1_lower = max(int(ar1_min), int(ar2_min))

665

666 range_safe_from_overflow &= all((

667 ar1_upper - int(ar2_min) <= np.iinfo(ar1.dtype).max,

668 ar1_lower - int(ar2_min) >= np.iinfo(ar1.dtype).min

669 ))

670

671 # Optimal performance is for approximately

672 # log10(size) > (log10(range) - 2.27) / 0.927.

673 # However, here we set the requirement that by default

674 # the intermediate array can only be 6x

675 # the combined memory allocation of the original

676 # arrays. See discussion on

677 # https://github.com/numpy/numpy/pull/12065.

678

679 if (

680 range_safe_from_overflow and

681 (below_memory_constraint or kind == 'table')

682 ):

683

684 if invert:

685 outgoing_array = np.ones_like(ar1, dtype=bool)

686 else:

687 outgoing_array = np.zeros_like(ar1, dtype=bool)

688

689 # Make elements 1 where the integer exists in ar2

690 if invert:

691 isin_helper_ar = np.ones(ar2_range + 1, dtype=bool)

692 isin_helper_ar[ar2 - ar2_min] = 0

693 else:

694 isin_helper_ar = np.zeros(ar2_range + 1, dtype=bool)

695 isin_helper_ar[ar2 - ar2_min] = 1

696

697 # Mask out elements we know won't work

698 basic_mask = (ar1 <= ar2_max) & (ar1 >= ar2_min)

699 outgoing_array[basic_mask] = isin_helper_ar[ar1[basic_mask] -

700 ar2_min]

701

702 return outgoing_array

703 elif kind == 'table': # not range_safe_from_overflow

704 raise RuntimeError(

705 "You have specified kind='table', "

706 "but the range of values in `ar2` or `ar1` exceed the "

707 "maximum integer of the datatype. "

708 "Please set `kind` to None or 'sort'."

709 )

710 elif kind == 'table':

711 raise ValueError(

712 "The 'table' method is only "

713 "supported for boolean or integer arrays. "

714 "Please select 'sort' or None for kind."

715 )

716

717

718 # Check if one of the arrays may contain arbitrary objects

719 contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject

720

721 # This code is run when

722 # a) the first condition is true, making the code significantly faster

723 # b) the second condition is true (i.e. `ar1` or `ar2` may contain

724 # arbitrary objects), since then sorting is not guaranteed to work

725 if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object:

726 if invert:

727 mask = np.ones(len(ar1), dtype=bool)

728 for a in ar2:

729 mask &= (ar1 != a)

730 else:

731 mask = np.zeros(len(ar1), dtype=bool)

732 for a in ar2:

733 mask |= (ar1 == a)

734 return mask

735

736 # Otherwise use sorting

737 if not assume_unique:

738 ar1, rev_idx = np.unique(ar1, return_inverse=True)

739 ar2 = np.unique(ar2)

740

741 ar = np.concatenate((ar1, ar2))

742 # We need this to be a stable sort, so always use 'mergesort'

743 # here. The values from the first array should always come before

744 # the values from the second array.

745 order = ar.argsort(kind='mergesort')

746 sar = ar[order]

747 if invert:

748 bool_ar = (sar[1:] != sar[:-1])

749 else:

750 bool_ar = (sar[1:] == sar[:-1])

751 flag = np.concatenate((bool_ar, [invert]))

752 ret = np.empty(ar.shape, dtype=bool)

753 ret[order] = flag

754

755 if assume_unique:

756 return ret[:len(ar1)]

757 else:

758 return ret[rev_idx]

759

760

761def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None,

762 *, kind=None):

763 return (element, test_elements)

764

765

766@array_function_dispatch(_isin_dispatcher)

767def isin(element, test_elements, assume_unique=False, invert=False, *,

768 kind=None):

769 """

770 Calculates ``element in test_elements``, broadcasting over `element` only.

771 Returns a boolean array of the same shape as `element` that is True

772 where an element of `element` is in `test_elements` and False otherwise.

773

774 Parameters

775 ----------

776 element : array_like

777 Input array.

778 test_elements : array_like

779 The values against which to test each value of `element`.

780 This argument is flattened if it is an array or array_like.

781 See notes for behavior with non-array-like parameters.

782 assume_unique : bool, optional

783 If True, the input arrays are both assumed to be unique, which

784 can speed up the calculation. Default is False.

785 invert : bool, optional

786 If True, the values in the returned array are inverted, as if

787 calculating `element not in test_elements`. Default is False.

788 ``np.isin(a, b, invert=True)`` is equivalent to (but faster

789 than) ``np.invert(np.isin(a, b))``.

790 kind : {None, 'sort', 'table'}, optional

791 The algorithm to use. This will not affect the final result,

792 but will affect the speed and memory use. The default, None,

793 will select automatically based on memory considerations.

794

795 * If 'sort', will use a mergesort-based approach. This will have

796 a memory usage of roughly 6 times the sum of the sizes of

797 `ar1` and `ar2`, not accounting for size of dtypes.

798 * If 'table', will use a lookup table approach similar

799 to a counting sort. This is only available for boolean and

800 integer arrays. This will have a memory usage of the

801 size of `ar1` plus the max-min value of `ar2`. `assume_unique`

802 has no effect when the 'table' option is used.

803 * If None, will automatically choose 'table' if

804 the required memory allocation is less than or equal to

805 6 times the sum of the sizes of `ar1` and `ar2`,

806 otherwise will use 'sort'. This is done to not use

807 a large amount of memory by default, even though

808 'table' may be faster in most cases. If 'table' is chosen,

809 `assume_unique` will have no effect.

810

811

812 Returns

813 -------

814 isin : ndarray, bool

815 Has the same shape as `element`. The values `element[isin]`

816 are in `test_elements`.

817

818 See Also

819 --------

820 in1d : Flattened version of this function.

821 numpy.lib.arraysetops : Module with a number of other functions for

822 performing set operations on arrays.

823

824 Notes

825 -----

826

827 `isin` is an element-wise function version of the python keyword `in`.

828 ``isin(a, b)`` is roughly equivalent to

829 ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences.

830

831 `element` and `test_elements` are converted to arrays if they are not

832 already. If `test_elements` is a set (or other non-sequence collection)

833 it will be converted to an object array with one element, rather than an

834 array of the values contained in `test_elements`. This is a consequence

835 of the `array` constructor's way of handling non-sequence collections.

836 Converting the set to a list usually gives the desired behavior.

837

838 Using ``kind='table'`` tends to be faster than `kind='sort'` if the

839 following relationship is true:

840 ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,

841 but may use greater memory. The default value for `kind` will

842 be automatically selected based only on memory usage, so one may

843 manually set ``kind='table'`` if memory constraints can be relaxed.

844

845 .. versionadded:: 1.13.0

846

847 Examples

848 --------

849 >>> element = 2*np.arange(4).reshape((2, 2))

850 >>> element

851 array([[0, 2],

852 [4, 6]])

853 >>> test_elements = [1, 2, 4, 8]

854 >>> mask = np.isin(element, test_elements)

855 >>> mask

856 array([[False, True],

857 [ True, False]])

858 >>> element[mask]

859 array([2, 4])

860

861 The indices of the matched values can be obtained with `nonzero`:

862

863 >>> np.nonzero(mask)

864 (array([0, 1]), array([1, 0]))

865

866 The test can also be inverted:

867

868 >>> mask = np.isin(element, test_elements, invert=True)

869 >>> mask

870 array([[ True, False],

871 [False, True]])

872 >>> element[mask]

873 array([0, 6])

874

875 Because of how `array` handles sets, the following does not

876 work as expected:

877

878 >>> test_set = {1, 2, 4, 8}

879 >>> np.isin(element, test_set)

880 array([[False, False],

881 [False, False]])

882

883 Casting the set to a list gives the expected result:

884

885 >>> np.isin(element, list(test_set))

886 array([[False, True],

887 [ True, False]])

888 """

889 element = np.asarray(element)

890 return in1d(element, test_elements, assume_unique=assume_unique,

891 invert=invert, kind=kind).reshape(element.shape)

892

893

894def _union1d_dispatcher(ar1, ar2):

895 return (ar1, ar2)

896

897

898@array_function_dispatch(_union1d_dispatcher)

899def union1d(ar1, ar2):

900 """

901 Find the union of two arrays.

902

903 Return the unique, sorted array of values that are in either of the two

904 input arrays.

905

906 Parameters

907 ----------

908 ar1, ar2 : array_like

909 Input arrays. They are flattened if they are not already 1D.

910

911 Returns

912 -------

913 union1d : ndarray

914 Unique, sorted union of the input arrays.

915

916 See Also

917 --------

918 numpy.lib.arraysetops : Module with a number of other functions for

919 performing set operations on arrays.

920

921 Examples

922 --------

923 >>> np.union1d([-1, 0, 1], [-2, 0, 2])

924 array([-2, -1, 0, 1, 2])

925

926 To find the union of more than two arrays, use functools.reduce:

927

928 >>> from functools import reduce

929 >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))

930 array([1, 2, 3, 4, 6])

931 """

932 return unique(np.concatenate((ar1, ar2), axis=None))

933

934

935def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None):

936 return (ar1, ar2)

937

938

939@array_function_dispatch(_setdiff1d_dispatcher)

940def setdiff1d(ar1, ar2, assume_unique=False):

941 """

942 Find the set difference of two arrays.

943

944 Return the unique values in `ar1` that are not in `ar2`.

945

946 Parameters

947 ----------

948 ar1 : array_like

949 Input array.

950 ar2 : array_like

951 Input comparison array.

952 assume_unique : bool

953 If True, the input arrays are both assumed to be unique, which

954 can speed up the calculation. Default is False.

955

956 Returns

957 -------

958 setdiff1d : ndarray

959 1D array of values in `ar1` that are not in `ar2`. The result

960 is sorted when `assume_unique=False`, but otherwise only sorted

961 if the input is sorted.

962

963 See Also

964 --------

965 numpy.lib.arraysetops : Module with a number of other functions for

966 performing set operations on arrays.

967

968 Examples

969 --------

970 >>> a = np.array([1, 2, 3, 2, 4, 1])

971 >>> b = np.array([3, 4, 5, 6])

972 >>> np.setdiff1d(a, b)

973 array([1, 2])

974

975 """

976 if assume_unique:

977 ar1 = np.asarray(ar1).ravel()

978 else:

979 ar1 = unique(ar1)

980 ar2 = unique(ar2)

981 return ar1[in1d(ar1, ar2, assume_unique=True, invert=True)]