Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/lib/_arraysetops

1"""

2Set operations for arrays based on sorting.

4Notes

5-----

7For floating point arrays, inaccurate results may appear due to usual round-off

8and floating point comparison issues.

10Speed could be gained in some operations by an implementation of

11`numpy.sort`, that can provide directly the permutation vectors, thus avoiding

12calls to `numpy.argsort`.

14Original author: Robert Cimrman

16"""

17import functools

18import warnings

19from typing import NamedTuple

21import numpy as np

22from numpy._core import overrides

23from numpy._core._multiarray_umath import _array_converter

26array_function_dispatch = functools.partial(

27 overrides.array_function_dispatch, module='numpy')

30__all__ = [

31 "ediff1d", "in1d", "intersect1d", "isin", "setdiff1d", "setxor1d",

32 "union1d", "unique", "unique_all", "unique_counts", "unique_inverse",

33 "unique_values"

34]

37def _ediff1d_dispatcher(ary, to_end=None, to_begin=None):

38 return (ary, to_end, to_begin)

41@array_function_dispatch(_ediff1d_dispatcher)

42def ediff1d(ary, to_end=None, to_begin=None):

43 """

44 The differences between consecutive elements of an array.

46 Parameters

47 ----------

48 ary : array_like

49 If necessary, will be flattened before the differences are taken.

50 to_end : array_like, optional

51 Number(s) to append at the end of the returned differences.

52 to_begin : array_like, optional

53 Number(s) to prepend at the beginning of the returned differences.

55 Returns

56 -------

57 ediff1d : ndarray

58 The differences. Loosely, this is ``ary.flat[1:] - ary.flat[:-1]``.

60 See Also

61 --------

62 diff, gradient

64 Notes

65 -----

66 When applied to masked arrays, this function drops the mask information

67 if the `to_begin` and/or `to_end` parameters are used.

69 Examples

70 --------

71 >>> x = np.array([1, 2, 4, 7, 0])

72 >>> np.ediff1d(x)

73 array([ 1, 2, 3, -7])

75 >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99]))

76 array([-99, 1, 2, ..., -7, 88, 99])

78 The returned array is always 1D.

80 >>> y = [[1, 2, 4], [1, 6, 24]]

81 >>> np.ediff1d(y)

82 array([ 1, 2, -3, 5, 18])

84 """

85 conv = _array_converter(ary)

86 # Convert to (any) array and ravel:

87 ary = conv[0].ravel()

89 # enforce that the dtype of `ary` is used for the output

90 dtype_req = ary.dtype

92 # fast track default case

93 if to_begin is None and to_end is None:

94 return ary[1:] - ary[:-1]

96 if to_begin is None:

97 l_begin = 0

98 else:

99 to_begin = np.asanyarray(to_begin)

100 if not np.can_cast(to_begin, dtype_req, casting="same_kind"):

101 raise TypeError("dtype of `to_begin` must be compatible "

102 "with input `ary` under the `same_kind` rule.")

103

104 to_begin = to_begin.ravel()

105 l_begin = len(to_begin)

106

107 if to_end is None:

108 l_end = 0

109 else:

110 to_end = np.asanyarray(to_end)

111 if not np.can_cast(to_end, dtype_req, casting="same_kind"):

112 raise TypeError("dtype of `to_end` must be compatible "

113 "with input `ary` under the `same_kind` rule.")

114

115 to_end = to_end.ravel()

116 l_end = len(to_end)

117

118 # do the calculation in place and copy to_begin and to_end

119 l_diff = max(len(ary) - 1, 0)

120 result = np.empty_like(ary, shape=l_diff + l_begin + l_end)

121

122 if l_begin > 0:

123 result[:l_begin] = to_begin

124 if l_end > 0:

125 result[l_begin + l_diff:] = to_end

126 np.subtract(ary[1:], ary[:-1], result[l_begin:l_begin + l_diff])

127

128 return conv.wrap(result)

129

130

131def _unpack_tuple(x):

132 """ Unpacks one-element tuples for use as return values """

133 if len(x) == 1:

134 return x[0]

135 else:

136 return x

137

138

139def _unique_dispatcher(ar, return_index=None, return_inverse=None,

140 return_counts=None, axis=None, *, equal_nan=None):

141 return (ar,)

142

143

144@array_function_dispatch(_unique_dispatcher)

145def unique(ar, return_index=False, return_inverse=False,

146 return_counts=False, axis=None, *, equal_nan=True):

147 """

148 Find the unique elements of an array.

149

150 Returns the sorted unique elements of an array. There are three optional

151 outputs in addition to the unique elements:

152

153 * the indices of the input array that give the unique values

154 * the indices of the unique array that reconstruct the input array

155 * the number of times each unique value comes up in the input array

156

157 Parameters

158 ----------

159 ar : array_like

160 Input array. Unless `axis` is specified, this will be flattened if it

161 is not already 1-D.

162 return_index : bool, optional

163 If True, also return the indices of `ar` (along the specified axis,

164 if provided, or in the flattened array) that result in the unique array.

165 return_inverse : bool, optional

166 If True, also return the indices of the unique array (for the specified

167 axis, if provided) that can be used to reconstruct `ar`.

168 return_counts : bool, optional

169 If True, also return the number of times each unique item appears

170 in `ar`.

171 axis : int or None, optional

172 The axis to operate on. If None, `ar` will be flattened. If an integer,

173 the subarrays indexed by the given axis will be flattened and treated

174 as the elements of a 1-D array with the dimension of the given axis,

175 see the notes for more details. Object arrays or structured arrays

176 that contain objects are not supported if the `axis` kwarg is used. The

177 default is None.

178

179 .. versionadded:: 1.13.0

180

181 equal_nan : bool, optional

182 If True, collapses multiple NaN values in the return array into one.

183

184 .. versionadded:: 1.24

185

186 Returns

187 -------

188 unique : ndarray

189 The sorted unique values.

190 unique_indices : ndarray, optional

191 The indices of the first occurrences of the unique values in the

192 original array. Only provided if `return_index` is True.

193 unique_inverse : ndarray, optional

194 The indices to reconstruct the original array from the

195 unique array. Only provided if `return_inverse` is True.

196 unique_counts : ndarray, optional

197 The number of times each of the unique values comes up in the

198 original array. Only provided if `return_counts` is True.

199

200 .. versionadded:: 1.9.0

201

202 See Also

203 --------

204 repeat : Repeat elements of an array.

205

206 Notes

207 -----

208 When an axis is specified the subarrays indexed by the axis are sorted.

209 This is done by making the specified axis the first dimension of the array

210 (move the axis to the first dimension to keep the order of the other axes)

211 and then flattening the subarrays in C order. The flattened subarrays are

212 then viewed as a structured type with each element given a label, with the

213 effect that we end up with a 1-D array of structured types that can be

214 treated in the same way as any other 1-D array. The result is that the

215 flattened subarrays are sorted in lexicographic order starting with the

216 first element.

217

218 .. versionchanged: 1.21

219 If nan values are in the input array, a single nan is put

220 to the end of the sorted unique values.

221

222 Also for complex arrays all NaN values are considered equivalent

223 (no matter whether the NaN is in the real or imaginary part).

224 As the representant for the returned array the smallest one in the

225 lexicographical order is chosen - see np.sort for how the lexicographical

226 order is defined for complex arrays.

227

228 .. versionchanged: 2.0

229 For multi-dimensional inputs, ``unique_inverse`` is reshaped

230 such that the input can be reconstructed using

231 ``np.take(unique, unique_inverse)`` when ``axis = None``, and

232 ``np.take_along_axis(unique, unique_inverse, axis=axis)`` otherwise.

233

234 Examples

235 --------

236 >>> np.unique([1, 1, 2, 2, 3, 3])

237 array([1, 2, 3])

238 >>> a = np.array([[1, 1], [2, 3]])

239 >>> np.unique(a)

240 array([1, 2, 3])

241

242 Return the unique rows of a 2D array

243

244 >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])

245 >>> np.unique(a, axis=0)

246 array([[1, 0, 0], [2, 3, 4]])

247

248 Return the indices of the original array that give the unique values:

249

250 >>> a = np.array(['a', 'b', 'b', 'c', 'a'])

251 >>> u, indices = np.unique(a, return_index=True)

252 >>> u

253 array(['a', 'b', 'c'], dtype='<U1')

254 >>> indices

255 array([0, 1, 3])

256 >>> a[indices]

257 array(['a', 'b', 'c'], dtype='<U1')

258

259 Reconstruct the input array from the unique values and inverse:

260

261 >>> a = np.array([1, 2, 6, 4, 2, 3, 2])

262 >>> u, indices = np.unique(a, return_inverse=True)

263 >>> u

264 array([1, 2, 3, 4, 6])

265 >>> indices

266 array([0, 1, 4, 3, 1, 2, 1])

267 >>> u[indices]

268 array([1, 2, 6, 4, 2, 3, 2])

269

270 Reconstruct the input values from the unique values and counts:

271

272 >>> a = np.array([1, 2, 6, 4, 2, 3, 2])

273 >>> values, counts = np.unique(a, return_counts=True)

274 >>> values

275 array([1, 2, 3, 4, 6])

276 >>> counts

277 array([1, 3, 1, 1, 1])

278 >>> np.repeat(values, counts)

279 array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved

280

281 """

282 ar = np.asanyarray(ar)

283 if axis is None:

284 ret = _unique1d(ar, return_index, return_inverse, return_counts,

285 equal_nan=equal_nan, inverse_shape=ar.shape)

286 return _unpack_tuple(ret)

287

288 # axis was specified and not None

289 try:

290 ar = np.moveaxis(ar, axis, 0)

291 except np.exceptions.AxisError:

292 # this removes the "axis1" or "axis2" prefix from the error message

293 raise np.exceptions.AxisError(axis, ar.ndim) from None

294 inverse_shape = [1] * ar.ndim

295 inverse_shape[axis] = ar.shape[0]

296

297 # Must reshape to a contiguous 2D array for this to work...

298 orig_shape, orig_dtype = ar.shape, ar.dtype

299 ar = ar.reshape(orig_shape[0], np.prod(orig_shape[1:], dtype=np.intp))

300 ar = np.ascontiguousarray(ar)

301 dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]

302

303 # At this point, `ar` has shape `(n, m)`, and `dtype` is a structured

304 # data type with `m` fields where each field has the data type of `ar`.

305 # In the following, we create the array `consolidated`, which has

306 # shape `(n,)` with data type `dtype`.

307 try:

308 if ar.shape[1] > 0:

309 consolidated = ar.view(dtype)

310 else:

311 # If ar.shape[1] == 0, then dtype will be `np.dtype([])`, which is

312 # a data type with itemsize 0, and the call `ar.view(dtype)` will

313 # fail. Instead, we'll use `np.empty` to explicitly create the

314 # array with shape `(len(ar),)`. Because `dtype` in this case has

315 # itemsize 0, the total size of the result is still 0 bytes.

316 consolidated = np.empty(len(ar), dtype=dtype)

317 except TypeError as e:

318 # There's no good way to do this for object arrays, etc...

319 msg = 'The axis argument to unique is not supported for dtype {dt}'

320 raise TypeError(msg.format(dt=ar.dtype)) from e

321

322 def reshape_uniq(uniq):

323 n = len(uniq)

324 uniq = uniq.view(orig_dtype)

325 uniq = uniq.reshape(n, *orig_shape[1:])

326 uniq = np.moveaxis(uniq, 0, axis)

327 return uniq

328

329 output = _unique1d(consolidated, return_index,

330 return_inverse, return_counts,

331 equal_nan=equal_nan, inverse_shape=inverse_shape)

332 output = (reshape_uniq(output[0]),) + output[1:]

333 return _unpack_tuple(output)

334

335

336def _unique1d(ar, return_index=False, return_inverse=False,

337 return_counts=False, *, equal_nan=True, inverse_shape=None):

338 """

339 Find the unique elements of an array, ignoring shape.

340 """

341 ar = np.asanyarray(ar).flatten()

342

343 optional_indices = return_index or return_inverse

344

345 if optional_indices:

346 perm = ar.argsort(kind='mergesort' if return_index else 'quicksort')

347 aux = ar[perm]

348 else:

349 ar.sort()

350 aux = ar

351 mask = np.empty(aux.shape, dtype=np.bool)

352 mask[:1] = True

353 if (equal_nan and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and

354 np.isnan(aux[-1])):

355 if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent

356 aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left')

357 else:

358 aux_firstnan = np.searchsorted(aux, aux[-1], side='left')

359 if aux_firstnan > 0:

360 mask[1:aux_firstnan] = (

361 aux[1:aux_firstnan] != aux[:aux_firstnan - 1])

362 mask[aux_firstnan] = True

363 mask[aux_firstnan + 1:] = False

364 else:

365 mask[1:] = aux[1:] != aux[:-1]

366

367 ret = (aux[mask],)

368 if return_index:

369 ret += (perm[mask],)

370 if return_inverse:

371 imask = np.cumsum(mask) - 1

372 inv_idx = np.empty(mask.shape, dtype=np.intp)

373 inv_idx[perm] = imask

374 ret += (inv_idx.reshape(inverse_shape),)

375 if return_counts:

376 idx = np.concatenate(np.nonzero(mask) + ([mask.size],))

377 ret += (np.diff(idx),)

378 return ret

379

380

381# Array API set functions

382

383class UniqueAllResult(NamedTuple):

384 values: np.ndarray

385 indices: np.ndarray

386 inverse_indices: np.ndarray

387 counts: np.ndarray

388

389

390class UniqueCountsResult(NamedTuple):

391 values: np.ndarray

392 counts: np.ndarray

393

394

395class UniqueInverseResult(NamedTuple):

396 values: np.ndarray

397 inverse_indices: np.ndarray

398

399

400def _unique_all_dispatcher(x, /):

401 return (x,)

402

403

404@array_function_dispatch(_unique_all_dispatcher)

405def unique_all(x):

406 """

407 Find the unique elements of an array, and counts, inverse and indices.

408

409 This function is an Array API compatible alternative to:

410

411 >>> x = np.array([1, 1, 2])

412 >>> np.unique(x, return_index=True, return_inverse=True,

413 ... return_counts=True, equal_nan=False)

414 (array([1, 2]), array([0, 2]), array([0, 0, 1]), array([2, 1]))

415

416 Parameters

417 ----------

418 x : array_like

419 Input array. It will be flattened if it is not already 1-D.

420

421 Returns

422 -------

423 out : namedtuple

424 The result containing:

425

426 * values - The unique elements of an input array.

427 * indices - The first occurring indices for each unique element.

428 * inverse_indices - The indices from the set of unique elements

429 that reconstruct `x`.

430 * counts - The corresponding counts for each unique element.

431

432 See Also

433 --------

434 unique : Find the unique elements of an array.

435

436 """

437 result = unique(

438 x,

439 return_index=True,

440 return_inverse=True,

441 return_counts=True,

442 equal_nan=False

443 )

444 return UniqueAllResult(*result)

445

446

447def _unique_counts_dispatcher(x, /):

448 return (x,)

449

450

451@array_function_dispatch(_unique_counts_dispatcher)

452def unique_counts(x):

453 """

454 Find the unique elements and counts of an input array `x`.

455

456 This function is an Array API compatible alternative to:

457

458 >>> x = np.array([1, 1, 2])

459 >>> np.unique(x, return_counts=True, equal_nan=False)

460 (array([1, 2]), array([2, 1]))

461

462 Parameters

463 ----------

464 x : array_like

465 Input array. It will be flattened if it is not already 1-D.

466

467 Returns

468 -------

469 out : namedtuple

470 The result containing:

471

472 * values - The unique elements of an input array.

473 * counts - The corresponding counts for each unique element.

474

475 See Also

476 --------

477 unique : Find the unique elements of an array.

478

479 """

480 result = unique(

481 x,

482 return_index=False,

483 return_inverse=False,

484 return_counts=True,

485 equal_nan=False

486 )

487 return UniqueCountsResult(*result)

488

489

490def _unique_inverse_dispatcher(x, /):

491 return (x,)

492

493

494@array_function_dispatch(_unique_inverse_dispatcher)

495def unique_inverse(x):

496 """

497 Find the unique elements of `x` and indices to reconstruct `x`.

498

499 This function is Array API compatible alternative to:

500

501 >>> x = np.array([1, 1, 2])

502 >>> np.unique(x, return_inverse=True, equal_nan=False)

503 (array([1, 2]), array([0, 0, 1]))

504

505 Parameters

506 ----------

507 x : array_like

508 Input array. It will be flattened if it is not already 1-D.

509

510 Returns

511 -------

512 out : namedtuple

513 The result containing:

514

515 * values - The unique elements of an input array.

516 * inverse_indices - The indices from the set of unique elements

517 that reconstruct `x`.

518

519 See Also

520 --------

521 unique : Find the unique elements of an array.

522

523 """

524 result = unique(

525 x,

526 return_index=False,

527 return_inverse=True,

528 return_counts=False,

529 equal_nan=False

530 )

531 return UniqueInverseResult(*result)

532

533

534def _unique_values_dispatcher(x, /):

535 return (x,)

536

537

538@array_function_dispatch(_unique_values_dispatcher)

539def unique_values(x):

540 """

541 Returns the unique elements of an input array `x`.

542

543 This function is Array API compatible alternative to:

544

545 >>> x = np.array([1, 1, 2])

546 >>> np.unique(x, equal_nan=False)

547 array([1, 2])

548

549 Parameters

550 ----------

551 x : array_like

552 Input array. It will be flattened if it is not already 1-D.

553

554 Returns

555 -------

556 out : ndarray

557 The unique elements of an input array.

558

559 See Also

560 --------

561 unique : Find the unique elements of an array.

562

563 """

564 return unique(

565 x,

566 return_index=False,

567 return_inverse=False,

568 return_counts=False,

569 equal_nan=False

570 )

571

572

573def _intersect1d_dispatcher(

574 ar1, ar2, assume_unique=None, return_indices=None):

575 return (ar1, ar2)

576

577

578@array_function_dispatch(_intersect1d_dispatcher)

579def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):

580 """

581 Find the intersection of two arrays.

582

583 Return the sorted, unique values that are in both of the input arrays.

584

585 Parameters

586 ----------

587 ar1, ar2 : array_like

588 Input arrays. Will be flattened if not already 1D.

589 assume_unique : bool

590 If True, the input arrays are both assumed to be unique, which

591 can speed up the calculation. If True but ``ar1`` or ``ar2`` are not

592 unique, incorrect results and out-of-bounds indices could result.

593 Default is False.

594 return_indices : bool

595 If True, the indices which correspond to the intersection of the two

596 arrays are returned. The first instance of a value is used if there are

597 multiple. Default is False.

598

599 .. versionadded:: 1.15.0

600

601 Returns

602 -------

603 intersect1d : ndarray

604 Sorted 1D array of common and unique elements.

605 comm1 : ndarray

606 The indices of the first occurrences of the common values in `ar1`.

607 Only provided if `return_indices` is True.

608 comm2 : ndarray

609 The indices of the first occurrences of the common values in `ar2`.

610 Only provided if `return_indices` is True.

611

612 Examples

613 --------

614 >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1])

615 array([1, 3])

616

617 To intersect more than two arrays, use functools.reduce:

618

619 >>> from functools import reduce

620 >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))

621 array([3])

622

623 To return the indices of the values common to the input arrays

624 along with the intersected values:

625

626 >>> x = np.array([1, 1, 2, 3, 4])

627 >>> y = np.array([2, 1, 4, 6])

628 >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True)

629 >>> x_ind, y_ind

630 (array([0, 2, 4]), array([1, 0, 2]))

631 >>> xy, x[x_ind], y[y_ind]

632 (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4]))

633

634 """

635 ar1 = np.asanyarray(ar1)

636 ar2 = np.asanyarray(ar2)

637

638 if not assume_unique:

639 if return_indices:

640 ar1, ind1 = unique(ar1, return_index=True)

641 ar2, ind2 = unique(ar2, return_index=True)

642 else:

643 ar1 = unique(ar1)

644 ar2 = unique(ar2)

645 else:

646 ar1 = ar1.ravel()

647 ar2 = ar2.ravel()

648

649 aux = np.concatenate((ar1, ar2))

650 if return_indices:

651 aux_sort_indices = np.argsort(aux, kind='mergesort')

652 aux = aux[aux_sort_indices]

653 else:

654 aux.sort()

655

656 mask = aux[1:] == aux[:-1]

657 int1d = aux[:-1][mask]

658

659 if return_indices:

660 ar1_indices = aux_sort_indices[:-1][mask]

661 ar2_indices = aux_sort_indices[1:][mask] - ar1.size

662 if not assume_unique:

663 ar1_indices = ind1[ar1_indices]

664 ar2_indices = ind2[ar2_indices]

665

666 return int1d, ar1_indices, ar2_indices

667 else:

668 return int1d

669

670

671def _setxor1d_dispatcher(ar1, ar2, assume_unique=None):

672 return (ar1, ar2)

673

674

675@array_function_dispatch(_setxor1d_dispatcher)

676def setxor1d(ar1, ar2, assume_unique=False):

677 """

678 Find the set exclusive-or of two arrays.

679

680 Return the sorted, unique values that are in only one (not both) of the

681 input arrays.

682

683 Parameters

684 ----------

685 ar1, ar2 : array_like

686 Input arrays.

687 assume_unique : bool

688 If True, the input arrays are both assumed to be unique, which

689 can speed up the calculation. Default is False.

690

691 Returns

692 -------

693 setxor1d : ndarray

694 Sorted 1D array of unique values that are in only one of the input

695 arrays.

696

697 Examples

698 --------

699 >>> a = np.array([1, 2, 3, 2, 4])

700 >>> b = np.array([2, 3, 5, 7, 5])

701 >>> np.setxor1d(a,b)

702 array([1, 4, 5, 7])

703

704 """

705 if not assume_unique:

706 ar1 = unique(ar1)

707 ar2 = unique(ar2)

708

709 aux = np.concatenate((ar1, ar2))

710 if aux.size == 0:

711 return aux

712

713 aux.sort()

714 flag = np.concatenate(([True], aux[1:] != aux[:-1], [True]))

715 return aux[flag[1:] & flag[:-1]]

716

717

718def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None, *,

719 kind=None):

720 return (ar1, ar2)

721

722

723@array_function_dispatch(_in1d_dispatcher)

724def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):

725 """

726 Test whether each element of a 1-D array is also present in a second array.

727

728 .. deprecated:: 2.0

729 Use :func:`isin` instead of `in1d` for new code.

730

731 Returns a boolean array the same length as `ar1` that is True

732 where an element of `ar1` is in `ar2` and False otherwise.

733

734 Parameters

735 ----------

736 ar1 : (M,) array_like

737 Input array.

738 ar2 : array_like

739 The values against which to test each value of `ar1`.

740 assume_unique : bool, optional

741 If True, the input arrays are both assumed to be unique, which

742 can speed up the calculation. Default is False.

743 invert : bool, optional

744 If True, the values in the returned array are inverted (that is,

745 False where an element of `ar1` is in `ar2` and True otherwise).

746 Default is False. ``np.in1d(a, b, invert=True)`` is equivalent

747 to (but is faster than) ``np.invert(in1d(a, b))``.

748 kind : {None, 'sort', 'table'}, optional

749 The algorithm to use. This will not affect the final result,

750 but will affect the speed and memory use. The default, None,

751 will select automatically based on memory considerations.

752

753 * If 'sort', will use a mergesort-based approach. This will have

754 a memory usage of roughly 6 times the sum of the sizes of

755 `ar1` and `ar2`, not accounting for size of dtypes.

756 * If 'table', will use a lookup table approach similar

757 to a counting sort. This is only available for boolean and

758 integer arrays. This will have a memory usage of the

759 size of `ar1` plus the max-min value of `ar2`. `assume_unique`

760 has no effect when the 'table' option is used.

761 * If None, will automatically choose 'table' if

762 the required memory allocation is less than or equal to

763 6 times the sum of the sizes of `ar1` and `ar2`,

764 otherwise will use 'sort'. This is done to not use

765 a large amount of memory by default, even though

766 'table' may be faster in most cases. If 'table' is chosen,

767 `assume_unique` will have no effect.

768

769 .. versionadded:: 1.8.0

770

771 Returns

772 -------

773 in1d : (M,) ndarray, bool

774 The values `ar1[in1d]` are in `ar2`.

775

776 See Also

777 --------

778 isin : Version of this function that preserves the

779 shape of ar1.

780

781 Notes

782 -----

783 `in1d` can be considered as an element-wise function version of the

784 python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly

785 equivalent to ``np.array([item in b for item in a])``.

786 However, this idea fails if `ar2` is a set, or similar (non-sequence)

787 container: As ``ar2`` is converted to an array, in those cases

788 ``asarray(ar2)`` is an object array rather than the expected array of

789 contained values.

790

791 Using ``kind='table'`` tends to be faster than `kind='sort'` if the

792 following relationship is true:

793 ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,

794 but may use greater memory. The default value for `kind` will

795 be automatically selected based only on memory usage, so one may

796 manually set ``kind='table'`` if memory constraints can be relaxed.

797

798 .. versionadded:: 1.4.0

799

800 Examples

801 --------

802 >>> test = np.array([0, 1, 2, 5, 0])

803 >>> states = [0, 2]

804 >>> mask = np.in1d(test, states)

805 >>> mask

806 array([ True, False, True, False, True])

807 >>> test[mask]

808 array([0, 2, 0])

809 >>> mask = np.in1d(test, states, invert=True)

810 >>> mask

811 array([False, True, False, True, False])

812 >>> test[mask]

813 array([1, 5])

814 """

815

816 # Deprecated in NumPy 2.0, 2023-08-18

817 warnings.warn(

818 "`in1d` is deprecated. Use `np.isin` instead.",

819 DeprecationWarning,

820 stacklevel=2

821 )

822

823 return _in1d(ar1, ar2, assume_unique, invert, kind=kind)

824

825

826def _in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):

827 # Ravel both arrays, behavior for the first array could be different

828 ar1 = np.asarray(ar1).ravel()

829 ar2 = np.asarray(ar2).ravel()

830

831 # Ensure that iteration through object arrays yields size-1 arrays

832 if ar2.dtype == object:

833 ar2 = ar2.reshape(-1, 1)

834

835 if kind not in {None, 'sort', 'table'}:

836 raise ValueError(

837 f"Invalid kind: '{kind}'. Please use None, 'sort' or 'table'.")

838

839 # Can use the table method if all arrays are integers or boolean:

840 is_int_arrays = all(ar.dtype.kind in ("u", "i", "b") for ar in (ar1, ar2))

841 use_table_method = is_int_arrays and kind in {None, 'table'}

842

843 if use_table_method:

844 if ar2.size == 0:

845 if invert:

846 return np.ones_like(ar1, dtype=bool)

847 else:

848 return np.zeros_like(ar1, dtype=bool)

849

850 # Convert booleans to uint8 so we can use the fast integer algorithm

851 if ar1.dtype == bool:

852 ar1 = ar1.astype(np.uint8)

853 if ar2.dtype == bool:

854 ar2 = ar2.astype(np.uint8)

855

856 ar2_min = np.min(ar2)

857 ar2_max = np.max(ar2)

858

859 ar2_range = int(ar2_max) - int(ar2_min)

860

861 # Constraints on whether we can actually use the table method:

862 # 1. Assert memory usage is not too large

863 below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size)

864 # 2. Check overflows for (ar2 - ar2_min); dtype=ar2.dtype

865 range_safe_from_overflow = ar2_range <= np.iinfo(ar2.dtype).max

866 # 3. Check overflows for (ar1 - ar2_min); dtype=ar1.dtype

867 if ar1.size > 0:

868 ar1_min = np.min(ar1)

869 ar1_max = np.max(ar1)

870

871 # After masking, the range of ar1 is guaranteed to be

872 # within the range of ar2:

873 ar1_upper = min(int(ar1_max), int(ar2_max))

874 ar1_lower = max(int(ar1_min), int(ar2_min))

875

876 range_safe_from_overflow &= all((

877 ar1_upper - int(ar2_min) <= np.iinfo(ar1.dtype).max,

878 ar1_lower - int(ar2_min) >= np.iinfo(ar1.dtype).min

879 ))

880

881 # Optimal performance is for approximately

882 # log10(size) > (log10(range) - 2.27) / 0.927.

883 # However, here we set the requirement that by default

884 # the intermediate array can only be 6x

885 # the combined memory allocation of the original

886 # arrays. See discussion on

887 # https://github.com/numpy/numpy/pull/12065.

888

889 if (

890 range_safe_from_overflow and

891 (below_memory_constraint or kind == 'table')

892 ):

893

894 if invert:

895 outgoing_array = np.ones_like(ar1, dtype=bool)

896 else:

897 outgoing_array = np.zeros_like(ar1, dtype=bool)

898

899 # Make elements 1 where the integer exists in ar2

900 if invert:

901 isin_helper_ar = np.ones(ar2_range + 1, dtype=bool)

902 isin_helper_ar[ar2 - ar2_min] = 0

903 else:

904 isin_helper_ar = np.zeros(ar2_range + 1, dtype=bool)

905 isin_helper_ar[ar2 - ar2_min] = 1

906

907 # Mask out elements we know won't work

908 basic_mask = (ar1 <= ar2_max) & (ar1 >= ar2_min)

909 outgoing_array[basic_mask] = isin_helper_ar[ar1[basic_mask] -

910 ar2_min]

911

912 return outgoing_array

913 elif kind == 'table': # not range_safe_from_overflow

914 raise RuntimeError(

915 "You have specified kind='table', "

916 "but the range of values in `ar2` or `ar1` exceed the "

917 "maximum integer of the datatype. "

918 "Please set `kind` to None or 'sort'."

919 )

920 elif kind == 'table':

921 raise ValueError(

922 "The 'table' method is only "

923 "supported for boolean or integer arrays. "

924 "Please select 'sort' or None for kind."

925 )

926

927

928 # Check if one of the arrays may contain arbitrary objects

929 contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject

930

931 # This code is run when

932 # a) the first condition is true, making the code significantly faster

933 # b) the second condition is true (i.e. `ar1` or `ar2` may contain

934 # arbitrary objects), since then sorting is not guaranteed to work

935 if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object:

936 if invert:

937 mask = np.ones(len(ar1), dtype=bool)

938 for a in ar2:

939 mask &= (ar1 != a)

940 else:

941 mask = np.zeros(len(ar1), dtype=bool)

942 for a in ar2:

943 mask |= (ar1 == a)

944 return mask

945

946 # Otherwise use sorting

947 if not assume_unique:

948 ar1, rev_idx = np.unique(ar1, return_inverse=True)

949 ar2 = np.unique(ar2)

950

951 ar = np.concatenate((ar1, ar2))

952 # We need this to be a stable sort, so always use 'mergesort'

953 # here. The values from the first array should always come before

954 # the values from the second array.

955 order = ar.argsort(kind='mergesort')

956 sar = ar[order]

957 if invert:

958 bool_ar = (sar[1:] != sar[:-1])

959 else:

960 bool_ar = (sar[1:] == sar[:-1])

961 flag = np.concatenate((bool_ar, [invert]))

962 ret = np.empty(ar.shape, dtype=bool)

963 ret[order] = flag

964

965 if assume_unique:

966 return ret[:len(ar1)]

967 else:

968 return ret[rev_idx]

969

970

971def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None,

972 *, kind=None):

973 return (element, test_elements)

974

975

976@array_function_dispatch(_isin_dispatcher)

977def isin(element, test_elements, assume_unique=False, invert=False, *,

978 kind=None):

979 """

980 Calculates ``element in test_elements``, broadcasting over `element` only.

981 Returns a boolean array of the same shape as `element` that is True

982 where an element of `element` is in `test_elements` and False otherwise.

983

984 Parameters

985 ----------

986 element : array_like

987 Input array.

988 test_elements : array_like

989 The values against which to test each value of `element`.

990 This argument is flattened if it is an array or array_like.

991 See notes for behavior with non-array-like parameters.

992 assume_unique : bool, optional

993 If True, the input arrays are both assumed to be unique, which

994 can speed up the calculation. Default is False.

995 invert : bool, optional

996 If True, the values in the returned array are inverted, as if

997 calculating `element not in test_elements`. Default is False.

998 ``np.isin(a, b, invert=True)`` is equivalent to (but faster

999 than) ``np.invert(np.isin(a, b))``.

1000 kind : {None, 'sort', 'table'}, optional

1001 The algorithm to use. This will not affect the final result,

1002 but will affect the speed and memory use. The default, None,

1003 will select automatically based on memory considerations.

1004

1005 * If 'sort', will use a mergesort-based approach. This will have

1006 a memory usage of roughly 6 times the sum of the sizes of

1007 `element` and `test_elements`, not accounting for size of dtypes.

1008 * If 'table', will use a lookup table approach similar

1009 to a counting sort. This is only available for boolean and

1010 integer arrays. This will have a memory usage of the

1011 size of `element` plus the max-min value of `test_elements`.

1012 `assume_unique` has no effect when the 'table' option is used.

1013 * If None, will automatically choose 'table' if

1014 the required memory allocation is less than or equal to

1015 6 times the sum of the sizes of `element` and `test_elements`,

1016 otherwise will use 'sort'. This is done to not use

1017 a large amount of memory by default, even though

1018 'table' may be faster in most cases. If 'table' is chosen,

1019 `assume_unique` will have no effect.

1020

1021

1022 Returns

1023 -------

1024 isin : ndarray, bool

1025 Has the same shape as `element`. The values `element[isin]`

1026 are in `test_elements`.

1027

1028 Notes

1029 -----

1030

1031 `isin` is an element-wise function version of the python keyword `in`.

1032 ``isin(a, b)`` is roughly equivalent to

1033 ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences.

1034

1035 `element` and `test_elements` are converted to arrays if they are not

1036 already. If `test_elements` is a set (or other non-sequence collection)

1037 it will be converted to an object array with one element, rather than an

1038 array of the values contained in `test_elements`. This is a consequence

1039 of the `array` constructor's way of handling non-sequence collections.

1040 Converting the set to a list usually gives the desired behavior.

1041

1042 Using ``kind='table'`` tends to be faster than `kind='sort'` if the

1043 following relationship is true:

1044 ``log10(len(test_elements)) >

1045 (log10(max(test_elements)-min(test_elements)) - 2.27) / 0.927``,

1046 but may use greater memory. The default value for `kind` will

1047 be automatically selected based only on memory usage, so one may

1048 manually set ``kind='table'`` if memory constraints can be relaxed.

1049

1050 .. versionadded:: 1.13.0

1051

1052 Examples

1053 --------

1054 >>> element = 2*np.arange(4).reshape((2, 2))

1055 >>> element

1056 array([[0, 2],

1057 [4, 6]])

1058 >>> test_elements = [1, 2, 4, 8]

1059 >>> mask = np.isin(element, test_elements)

1060 >>> mask

1061 array([[False, True],

1062 [ True, False]])

1063 >>> element[mask]

1064 array([2, 4])

1065

1066 The indices of the matched values can be obtained with `nonzero`:

1067

1068 >>> np.nonzero(mask)

1069 (array([0, 1]), array([1, 0]))

1070

1071 The test can also be inverted:

1072

1073 >>> mask = np.isin(element, test_elements, invert=True)

1074 >>> mask

1075 array([[ True, False],

1076 [False, True]])

1077 >>> element[mask]

1078 array([0, 6])

1079

1080 Because of how `array` handles sets, the following does not

1081 work as expected:

1082

1083 >>> test_set = {1, 2, 4, 8}

1084 >>> np.isin(element, test_set)

1085 array([[False, False],

1086 [False, False]])

1087

1088 Casting the set to a list gives the expected result:

1089

1090 >>> np.isin(element, list(test_set))

1091 array([[False, True],

1092 [ True, False]])

1093 """

1094 element = np.asarray(element)

1095 return _in1d(element, test_elements, assume_unique=assume_unique,

1096 invert=invert, kind=kind).reshape(element.shape)

1097

1098

1099def _union1d_dispatcher(ar1, ar2):

1100 return (ar1, ar2)

1101

1102

1103@array_function_dispatch(_union1d_dispatcher)

1104def union1d(ar1, ar2):

1105 """

1106 Find the union of two arrays.

1107

1108 Return the unique, sorted array of values that are in either of the two

1109 input arrays.

1110

1111 Parameters

1112 ----------

1113 ar1, ar2 : array_like

1114 Input arrays. They are flattened if they are not already 1D.

1115

1116 Returns

1117 -------

1118 union1d : ndarray

1119 Unique, sorted union of the input arrays.

1120

1121 Examples

1122 --------

1123 >>> np.union1d([-1, 0, 1], [-2, 0, 2])

1124 array([-2, -1, 0, 1, 2])

1125

1126 To find the union of more than two arrays, use functools.reduce:

1127

1128 >>> from functools import reduce

1129 >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))

1130 array([1, 2, 3, 4, 6])

1131 """

1132 return unique(np.concatenate((ar1, ar2), axis=None))

1133

1134

1135def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None):

1136 return (ar1, ar2)

1137

1138

1139@array_function_dispatch(_setdiff1d_dispatcher)

1140def setdiff1d(ar1, ar2, assume_unique=False):

1141 """

1142 Find the set difference of two arrays.

1143

1144 Return the unique values in `ar1` that are not in `ar2`.

1145

1146 Parameters

1147 ----------

1148 ar1 : array_like

1149 Input array.

1150 ar2 : array_like

1151 Input comparison array.

1152 assume_unique : bool

1153 If True, the input arrays are both assumed to be unique, which

1154 can speed up the calculation. Default is False.

1155

1156 Returns

1157 -------

1158 setdiff1d : ndarray

1159 1D array of values in `ar1` that are not in `ar2`. The result

1160 is sorted when `assume_unique=False`, but otherwise only sorted

1161 if the input is sorted.

1162

1163 Examples

1164 --------

1165 >>> a = np.array([1, 2, 3, 2, 4, 1])

1166 >>> b = np.array([3, 4, 5, 6])

1167 >>> np.setdiff1d(a, b)

1168 array([1, 2])

1169

1170 """

1171 if assume_unique:

1172 ar1 = np.asarray(ar1).ravel()

1173 else:

1174 ar1 = unique(ar1)

1175 ar2 = unique(ar2)

1176 return ar1[_in1d(ar1, ar2, assume_unique=True, invert=True)]

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/numpy/lib/_arraysetops_impl.py: 21%

280 statements