Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/numpy/lib/_arraysetops

1"""

2Set operations for arrays based on sorting.

4Notes

5-----

7For floating point arrays, inaccurate results may appear due to usual round-off

8and floating point comparison issues.

10Speed could be gained in some operations by an implementation of

11`numpy.sort`, that can provide directly the permutation vectors, thus avoiding

12calls to `numpy.argsort`.

14Original author: Robert Cimrman

16"""

17import functools

18import warnings

19from typing import NamedTuple

21import numpy as np

22from numpy._core import overrides

23from numpy._core._multiarray_umath import _array_converter

26array_function_dispatch = functools.partial(

27 overrides.array_function_dispatch, module='numpy')

30__all__ = [

31 "ediff1d", "in1d", "intersect1d", "isin", "setdiff1d", "setxor1d",

32 "union1d", "unique", "unique_all", "unique_counts", "unique_inverse",

33 "unique_values"

34]

37def _ediff1d_dispatcher(ary, to_end=None, to_begin=None):

38 return (ary, to_end, to_begin)

41@array_function_dispatch(_ediff1d_dispatcher)

42def ediff1d(ary, to_end=None, to_begin=None):

43 """

44 The differences between consecutive elements of an array.

46 Parameters

47 ----------

48 ary : array_like

49 If necessary, will be flattened before the differences are taken.

50 to_end : array_like, optional

51 Number(s) to append at the end of the returned differences.

52 to_begin : array_like, optional

53 Number(s) to prepend at the beginning of the returned differences.

55 Returns

56 -------

57 ediff1d : ndarray

58 The differences. Loosely, this is ``ary.flat[1:] - ary.flat[:-1]``.

60 See Also

61 --------

62 diff, gradient

64 Notes

65 -----

66 When applied to masked arrays, this function drops the mask information

67 if the `to_begin` and/or `to_end` parameters are used.

69 Examples

70 --------

71 >>> import numpy as np

72 >>> x = np.array([1, 2, 4, 7, 0])

73 >>> np.ediff1d(x)

74 array([ 1, 2, 3, -7])

76 >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99]))

77 array([-99, 1, 2, ..., -7, 88, 99])

79 The returned array is always 1D.

81 >>> y = [[1, 2, 4], [1, 6, 24]]

82 >>> np.ediff1d(y)

83 array([ 1, 2, -3, 5, 18])

85 """

86 conv = _array_converter(ary)

87 # Convert to (any) array and ravel:

88 ary = conv[0].ravel()

90 # enforce that the dtype of `ary` is used for the output

91 dtype_req = ary.dtype

93 # fast track default case

94 if to_begin is None and to_end is None:

95 return ary[1:] - ary[:-1]

97 if to_begin is None:

98 l_begin = 0

99 else:

100 to_begin = np.asanyarray(to_begin)

101 if not np.can_cast(to_begin, dtype_req, casting="same_kind"):

102 raise TypeError("dtype of `to_begin` must be compatible "

103 "with input `ary` under the `same_kind` rule.")

104

105 to_begin = to_begin.ravel()

106 l_begin = len(to_begin)

107

108 if to_end is None:

109 l_end = 0

110 else:

111 to_end = np.asanyarray(to_end)

112 if not np.can_cast(to_end, dtype_req, casting="same_kind"):

113 raise TypeError("dtype of `to_end` must be compatible "

114 "with input `ary` under the `same_kind` rule.")

115

116 to_end = to_end.ravel()

117 l_end = len(to_end)

118

119 # do the calculation in place and copy to_begin and to_end

120 l_diff = max(len(ary) - 1, 0)

121 result = np.empty_like(ary, shape=l_diff + l_begin + l_end)

122

123 if l_begin > 0:

124 result[:l_begin] = to_begin

125 if l_end > 0:

126 result[l_begin + l_diff:] = to_end

127 np.subtract(ary[1:], ary[:-1], result[l_begin:l_begin + l_diff])

128

129 return conv.wrap(result)

130

131

132def _unpack_tuple(x):

133 """ Unpacks one-element tuples for use as return values """

134 if len(x) == 1:

135 return x[0]

136 else:

137 return x

138

139

140def _unique_dispatcher(ar, return_index=None, return_inverse=None,

141 return_counts=None, axis=None, *, equal_nan=None):

142 return (ar,)

143

144

145@array_function_dispatch(_unique_dispatcher)

146def unique(ar, return_index=False, return_inverse=False,

147 return_counts=False, axis=None, *, equal_nan=True):

148 """

149 Find the unique elements of an array.

150

151 Returns the sorted unique elements of an array. There are three optional

152 outputs in addition to the unique elements:

153

154 * the indices of the input array that give the unique values

155 * the indices of the unique array that reconstruct the input array

156 * the number of times each unique value comes up in the input array

157

158 Parameters

159 ----------

160 ar : array_like

161 Input array. Unless `axis` is specified, this will be flattened if it

162 is not already 1-D.

163 return_index : bool, optional

164 If True, also return the indices of `ar` (along the specified axis,

165 if provided, or in the flattened array) that result in the unique array.

166 return_inverse : bool, optional

167 If True, also return the indices of the unique array (for the specified

168 axis, if provided) that can be used to reconstruct `ar`.

169 return_counts : bool, optional

170 If True, also return the number of times each unique item appears

171 in `ar`.

172 axis : int or None, optional

173 The axis to operate on. If None, `ar` will be flattened. If an integer,

174 the subarrays indexed by the given axis will be flattened and treated

175 as the elements of a 1-D array with the dimension of the given axis,

176 see the notes for more details. Object arrays or structured arrays

177 that contain objects are not supported if the `axis` kwarg is used. The

178 default is None.

179

180 equal_nan : bool, optional

181 If True, collapses multiple NaN values in the return array into one.

182

183 .. versionadded:: 1.24

184

185 Returns

186 -------

187 unique : ndarray

188 The sorted unique values.

189 unique_indices : ndarray, optional

190 The indices of the first occurrences of the unique values in the

191 original array. Only provided if `return_index` is True.

192 unique_inverse : ndarray, optional

193 The indices to reconstruct the original array from the

194 unique array. Only provided if `return_inverse` is True.

195 unique_counts : ndarray, optional

196 The number of times each of the unique values comes up in the

197 original array. Only provided if `return_counts` is True.

198

199 See Also

200 --------

201 repeat : Repeat elements of an array.

202 sort : Return a sorted copy of an array.

203

204 Notes

205 -----

206 When an axis is specified the subarrays indexed by the axis are sorted.

207 This is done by making the specified axis the first dimension of the array

208 (move the axis to the first dimension to keep the order of the other axes)

209 and then flattening the subarrays in C order. The flattened subarrays are

210 then viewed as a structured type with each element given a label, with the

211 effect that we end up with a 1-D array of structured types that can be

212 treated in the same way as any other 1-D array. The result is that the

213 flattened subarrays are sorted in lexicographic order starting with the

214 first element.

215

216 .. versionchanged:: 1.21

217 Like np.sort, NaN will sort to the end of the values.

218 For complex arrays all NaN values are considered equivalent

219 (no matter whether the NaN is in the real or imaginary part).

220 As the representant for the returned array the smallest one in the

221 lexicographical order is chosen - see np.sort for how the lexicographical

222 order is defined for complex arrays.

223

224 .. versionchanged:: 2.0

225 For multi-dimensional inputs, ``unique_inverse`` is reshaped

226 such that the input can be reconstructed using

227 ``np.take(unique, unique_inverse, axis=axis)``. The result is

228 now not 1-dimensional when ``axis=None``.

229

230 Note that in NumPy 2.0.0 a higher dimensional array was returned also

231 when ``axis`` was not ``None``. This was reverted, but

232 ``inverse.reshape(-1)`` can be used to ensure compatibility with both

233 versions.

234

235 Examples

236 --------

237 >>> import numpy as np

238 >>> np.unique([1, 1, 2, 2, 3, 3])

239 array([1, 2, 3])

240 >>> a = np.array([[1, 1], [2, 3]])

241 >>> np.unique(a)

242 array([1, 2, 3])

243

244 Return the unique rows of a 2D array

245

246 >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])

247 >>> np.unique(a, axis=0)

248 array([[1, 0, 0], [2, 3, 4]])

249

250 Return the indices of the original array that give the unique values:

251

252 >>> a = np.array(['a', 'b', 'b', 'c', 'a'])

253 >>> u, indices = np.unique(a, return_index=True)

254 >>> u

255 array(['a', 'b', 'c'], dtype='<U1')

256 >>> indices

257 array([0, 1, 3])

258 >>> a[indices]

259 array(['a', 'b', 'c'], dtype='<U1')

260

261 Reconstruct the input array from the unique values and inverse:

262

263 >>> a = np.array([1, 2, 6, 4, 2, 3, 2])

264 >>> u, indices = np.unique(a, return_inverse=True)

265 >>> u

266 array([1, 2, 3, 4, 6])

267 >>> indices

268 array([0, 1, 4, 3, 1, 2, 1])

269 >>> u[indices]

270 array([1, 2, 6, 4, 2, 3, 2])

271

272 Reconstruct the input values from the unique values and counts:

273

274 >>> a = np.array([1, 2, 6, 4, 2, 3, 2])

275 >>> values, counts = np.unique(a, return_counts=True)

276 >>> values

277 array([1, 2, 3, 4, 6])

278 >>> counts

279 array([1, 3, 1, 1, 1])

280 >>> np.repeat(values, counts)

281 array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved

282

283 """

284 ar = np.asanyarray(ar)

285 if axis is None:

286 ret = _unique1d(ar, return_index, return_inverse, return_counts,

287 equal_nan=equal_nan, inverse_shape=ar.shape, axis=None)

288 return _unpack_tuple(ret)

289

290 # axis was specified and not None

291 try:

292 ar = np.moveaxis(ar, axis, 0)

293 except np.exceptions.AxisError:

294 # this removes the "axis1" or "axis2" prefix from the error message

295 raise np.exceptions.AxisError(axis, ar.ndim) from None

296 inverse_shape = [1] * ar.ndim

297 inverse_shape[axis] = ar.shape[0]

298

299 # Must reshape to a contiguous 2D array for this to work...

300 orig_shape, orig_dtype = ar.shape, ar.dtype

301 ar = ar.reshape(orig_shape[0], np.prod(orig_shape[1:], dtype=np.intp))

302 ar = np.ascontiguousarray(ar)

303 dtype = [('f{i}'.format(i=i), ar.dtype) for i in range(ar.shape[1])]

304

305 # At this point, `ar` has shape `(n, m)`, and `dtype` is a structured

306 # data type with `m` fields where each field has the data type of `ar`.

307 # In the following, we create the array `consolidated`, which has

308 # shape `(n,)` with data type `dtype`.

309 try:

310 if ar.shape[1] > 0:

311 consolidated = ar.view(dtype)

312 else:

313 # If ar.shape[1] == 0, then dtype will be `np.dtype([])`, which is

314 # a data type with itemsize 0, and the call `ar.view(dtype)` will

315 # fail. Instead, we'll use `np.empty` to explicitly create the

316 # array with shape `(len(ar),)`. Because `dtype` in this case has

317 # itemsize 0, the total size of the result is still 0 bytes.

318 consolidated = np.empty(len(ar), dtype=dtype)

319 except TypeError as e:

320 # There's no good way to do this for object arrays, etc...

321 msg = 'The axis argument to unique is not supported for dtype {dt}'

322 raise TypeError(msg.format(dt=ar.dtype)) from e

323

324 def reshape_uniq(uniq):

325 n = len(uniq)

326 uniq = uniq.view(orig_dtype)

327 uniq = uniq.reshape(n, *orig_shape[1:])

328 uniq = np.moveaxis(uniq, 0, axis)

329 return uniq

330

331 output = _unique1d(consolidated, return_index,

332 return_inverse, return_counts,

333 equal_nan=equal_nan, inverse_shape=inverse_shape,

334 axis=axis)

335 output = (reshape_uniq(output[0]),) + output[1:]

336 return _unpack_tuple(output)

337

338

339def _unique1d(ar, return_index=False, return_inverse=False,

340 return_counts=False, *, equal_nan=True, inverse_shape=None,

341 axis=None):

342 """

343 Find the unique elements of an array, ignoring shape.

344 """

345 ar = np.asanyarray(ar).flatten()

346

347 optional_indices = return_index or return_inverse

348

349 if optional_indices:

350 perm = ar.argsort(kind='mergesort' if return_index else 'quicksort')

351 aux = ar[perm]

352 else:

353 ar.sort()

354 aux = ar

355 mask = np.empty(aux.shape, dtype=np.bool)

356 mask[:1] = True

357 if (equal_nan and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and

358 np.isnan(aux[-1])):

359 if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent

360 aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left')

361 else:

362 aux_firstnan = np.searchsorted(aux, aux[-1], side='left')

363 if aux_firstnan > 0:

364 mask[1:aux_firstnan] = (

365 aux[1:aux_firstnan] != aux[:aux_firstnan - 1])

366 mask[aux_firstnan] = True

367 mask[aux_firstnan + 1:] = False

368 else:

369 mask[1:] = aux[1:] != aux[:-1]

370

371 ret = (aux[mask],)

372 if return_index:

373 ret += (perm[mask],)

374 if return_inverse:

375 imask = np.cumsum(mask) - 1

376 inv_idx = np.empty(mask.shape, dtype=np.intp)

377 inv_idx[perm] = imask

378 ret += (inv_idx.reshape(inverse_shape) if axis is None else inv_idx,)

379 if return_counts:

380 idx = np.concatenate(np.nonzero(mask) + ([mask.size],))

381 ret += (np.diff(idx),)

382 return ret

383

384

385# Array API set functions

386

387class UniqueAllResult(NamedTuple):

388 values: np.ndarray

389 indices: np.ndarray

390 inverse_indices: np.ndarray

391 counts: np.ndarray

392

393

394class UniqueCountsResult(NamedTuple):

395 values: np.ndarray

396 counts: np.ndarray

397

398

399class UniqueInverseResult(NamedTuple):

400 values: np.ndarray

401 inverse_indices: np.ndarray

402

403

404def _unique_all_dispatcher(x, /):

405 return (x,)

406

407

408@array_function_dispatch(_unique_all_dispatcher)

409def unique_all(x):

410 """

411 Find the unique elements of an array, and counts, inverse, and indices.

412

413 This function is an Array API compatible alternative to::

414

415 np.unique(x, return_index=True, return_inverse=True,

416 return_counts=True, equal_nan=False)

417

418 but returns a namedtuple for easier access to each output.

419

420 Parameters

421 ----------

422 x : array_like

423 Input array. It will be flattened if it is not already 1-D.

424

425 Returns

426 -------

427 out : namedtuple

428 The result containing:

429

430 * values - The unique elements of an input array.

431 * indices - The first occurring indices for each unique element.

432 * inverse_indices - The indices from the set of unique elements

433 that reconstruct `x`.

434 * counts - The corresponding counts for each unique element.

435

436 See Also

437 --------

438 unique : Find the unique elements of an array.

439

440 Examples

441 --------

442 >>> import numpy as np

443 >>> x = [1, 1, 2]

444 >>> uniq = np.unique_all(x)

445 >>> uniq.values

446 array([1, 2])

447 >>> uniq.indices

448 array([0, 2])

449 >>> uniq.inverse_indices

450 array([0, 0, 1])

451 >>> uniq.counts

452 array([2, 1])

453 """

454 result = unique(

455 x,

456 return_index=True,

457 return_inverse=True,

458 return_counts=True,

459 equal_nan=False

460 )

461 return UniqueAllResult(*result)

462

463

464def _unique_counts_dispatcher(x, /):

465 return (x,)

466

467

468@array_function_dispatch(_unique_counts_dispatcher)

469def unique_counts(x):

470 """

471 Find the unique elements and counts of an input array `x`.

472

473 This function is an Array API compatible alternative to::

474

475 np.unique(x, return_counts=True, equal_nan=False)

476

477 but returns a namedtuple for easier access to each output.

478

479 Parameters

480 ----------

481 x : array_like

482 Input array. It will be flattened if it is not already 1-D.

483

484 Returns

485 -------

486 out : namedtuple

487 The result containing:

488

489 * values - The unique elements of an input array.

490 * counts - The corresponding counts for each unique element.

491

492 See Also

493 --------

494 unique : Find the unique elements of an array.

495

496 Examples

497 --------

498 >>> import numpy as np

499 >>> x = [1, 1, 2]

500 >>> uniq = np.unique_counts(x)

501 >>> uniq.values

502 array([1, 2])

503 >>> uniq.counts

504 array([2, 1])

505 """

506 result = unique(

507 x,

508 return_index=False,

509 return_inverse=False,

510 return_counts=True,

511 equal_nan=False

512 )

513 return UniqueCountsResult(*result)

514

515

516def _unique_inverse_dispatcher(x, /):

517 return (x,)

518

519

520@array_function_dispatch(_unique_inverse_dispatcher)

521def unique_inverse(x):

522 """

523 Find the unique elements of `x` and indices to reconstruct `x`.

524

525 This function is an Array API compatible alternative to::

526

527 np.unique(x, return_inverse=True, equal_nan=False)

528

529 but returns a namedtuple for easier access to each output.

530

531 Parameters

532 ----------

533 x : array_like

534 Input array. It will be flattened if it is not already 1-D.

535

536 Returns

537 -------

538 out : namedtuple

539 The result containing:

540

541 * values - The unique elements of an input array.

542 * inverse_indices - The indices from the set of unique elements

543 that reconstruct `x`.

544

545 See Also

546 --------

547 unique : Find the unique elements of an array.

548

549 Examples

550 --------

551 >>> import numpy as np

552 >>> x = [1, 1, 2]

553 >>> uniq = np.unique_inverse(x)

554 >>> uniq.values

555 array([1, 2])

556 >>> uniq.inverse_indices

557 array([0, 0, 1])

558 """

559 result = unique(

560 x,

561 return_index=False,

562 return_inverse=True,

563 return_counts=False,

564 equal_nan=False

565 )

566 return UniqueInverseResult(*result)

567

568

569def _unique_values_dispatcher(x, /):

570 return (x,)

571

572

573@array_function_dispatch(_unique_values_dispatcher)

574def unique_values(x):

575 """

576 Returns the unique elements of an input array `x`.

577

578 This function is an Array API compatible alternative to::

579

580 np.unique(x, equal_nan=False)

581

582 Parameters

583 ----------

584 x : array_like

585 Input array. It will be flattened if it is not already 1-D.

586

587 Returns

588 -------

589 out : ndarray

590 The unique elements of an input array.

591

592 See Also

593 --------

594 unique : Find the unique elements of an array.

595

596 Examples

597 --------

598 >>> import numpy as np

599 >>> np.unique_values([1, 1, 2])

600 array([1, 2])

601

602 """

603 return unique(

604 x,

605 return_index=False,

606 return_inverse=False,

607 return_counts=False,

608 equal_nan=False

609 )

610

611

612def _intersect1d_dispatcher(

613 ar1, ar2, assume_unique=None, return_indices=None):

614 return (ar1, ar2)

615

616

617@array_function_dispatch(_intersect1d_dispatcher)

618def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):

619 """

620 Find the intersection of two arrays.

621

622 Return the sorted, unique values that are in both of the input arrays.

623

624 Parameters

625 ----------

626 ar1, ar2 : array_like

627 Input arrays. Will be flattened if not already 1D.

628 assume_unique : bool

629 If True, the input arrays are both assumed to be unique, which

630 can speed up the calculation. If True but ``ar1`` or ``ar2`` are not

631 unique, incorrect results and out-of-bounds indices could result.

632 Default is False.

633 return_indices : bool

634 If True, the indices which correspond to the intersection of the two

635 arrays are returned. The first instance of a value is used if there are

636 multiple. Default is False.

637

638 Returns

639 -------

640 intersect1d : ndarray

641 Sorted 1D array of common and unique elements.

642 comm1 : ndarray

643 The indices of the first occurrences of the common values in `ar1`.

644 Only provided if `return_indices` is True.

645 comm2 : ndarray

646 The indices of the first occurrences of the common values in `ar2`.

647 Only provided if `return_indices` is True.

648

649 Examples

650 --------

651 >>> import numpy as np

652 >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1])

653 array([1, 3])

654

655 To intersect more than two arrays, use functools.reduce:

656

657 >>> from functools import reduce

658 >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))

659 array([3])

660

661 To return the indices of the values common to the input arrays

662 along with the intersected values:

663

664 >>> x = np.array([1, 1, 2, 3, 4])

665 >>> y = np.array([2, 1, 4, 6])

666 >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True)

667 >>> x_ind, y_ind

668 (array([0, 2, 4]), array([1, 0, 2]))

669 >>> xy, x[x_ind], y[y_ind]

670 (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4]))

671

672 """

673 ar1 = np.asanyarray(ar1)

674 ar2 = np.asanyarray(ar2)

675

676 if not assume_unique:

677 if return_indices:

678 ar1, ind1 = unique(ar1, return_index=True)

679 ar2, ind2 = unique(ar2, return_index=True)

680 else:

681 ar1 = unique(ar1)

682 ar2 = unique(ar2)

683 else:

684 ar1 = ar1.ravel()

685 ar2 = ar2.ravel()

686

687 aux = np.concatenate((ar1, ar2))

688 if return_indices:

689 aux_sort_indices = np.argsort(aux, kind='mergesort')

690 aux = aux[aux_sort_indices]

691 else:

692 aux.sort()

693

694 mask = aux[1:] == aux[:-1]

695 int1d = aux[:-1][mask]

696

697 if return_indices:

698 ar1_indices = aux_sort_indices[:-1][mask]

699 ar2_indices = aux_sort_indices[1:][mask] - ar1.size

700 if not assume_unique:

701 ar1_indices = ind1[ar1_indices]

702 ar2_indices = ind2[ar2_indices]

703

704 return int1d, ar1_indices, ar2_indices

705 else:

706 return int1d

707

708

709def _setxor1d_dispatcher(ar1, ar2, assume_unique=None):

710 return (ar1, ar2)

711

712

713@array_function_dispatch(_setxor1d_dispatcher)

714def setxor1d(ar1, ar2, assume_unique=False):

715 """

716 Find the set exclusive-or of two arrays.

717

718 Return the sorted, unique values that are in only one (not both) of the

719 input arrays.

720

721 Parameters

722 ----------

723 ar1, ar2 : array_like

724 Input arrays.

725 assume_unique : bool

726 If True, the input arrays are both assumed to be unique, which

727 can speed up the calculation. Default is False.

728

729 Returns

730 -------

731 setxor1d : ndarray

732 Sorted 1D array of unique values that are in only one of the input

733 arrays.

734

735 Examples

736 --------

737 >>> import numpy as np

738 >>> a = np.array([1, 2, 3, 2, 4])

739 >>> b = np.array([2, 3, 5, 7, 5])

740 >>> np.setxor1d(a,b)

741 array([1, 4, 5, 7])

742

743 """

744 if not assume_unique:

745 ar1 = unique(ar1)

746 ar2 = unique(ar2)

747

748 aux = np.concatenate((ar1, ar2), axis=None)

749 if aux.size == 0:

750 return aux

751

752 aux.sort()

753 flag = np.concatenate(([True], aux[1:] != aux[:-1], [True]))

754 return aux[flag[1:] & flag[:-1]]

755

756

757def _in1d_dispatcher(ar1, ar2, assume_unique=None, invert=None, *,

758 kind=None):

759 return (ar1, ar2)

760

761

762@array_function_dispatch(_in1d_dispatcher)

763def in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):

764 """

765 Test whether each element of a 1-D array is also present in a second array.

766

767 .. deprecated:: 2.0

768 Use :func:`isin` instead of `in1d` for new code.

769

770 Returns a boolean array the same length as `ar1` that is True

771 where an element of `ar1` is in `ar2` and False otherwise.

772

773 Parameters

774 ----------

775 ar1 : (M,) array_like

776 Input array.

777 ar2 : array_like

778 The values against which to test each value of `ar1`.

779 assume_unique : bool, optional

780 If True, the input arrays are both assumed to be unique, which

781 can speed up the calculation. Default is False.

782 invert : bool, optional

783 If True, the values in the returned array are inverted (that is,

784 False where an element of `ar1` is in `ar2` and True otherwise).

785 Default is False. ``np.in1d(a, b, invert=True)`` is equivalent

786 to (but is faster than) ``np.invert(in1d(a, b))``.

787 kind : {None, 'sort', 'table'}, optional

788 The algorithm to use. This will not affect the final result,

789 but will affect the speed and memory use. The default, None,

790 will select automatically based on memory considerations.

791

792 * If 'sort', will use a mergesort-based approach. This will have

793 a memory usage of roughly 6 times the sum of the sizes of

794 `ar1` and `ar2`, not accounting for size of dtypes.

795 * If 'table', will use a lookup table approach similar

796 to a counting sort. This is only available for boolean and

797 integer arrays. This will have a memory usage of the

798 size of `ar1` plus the max-min value of `ar2`. `assume_unique`

799 has no effect when the 'table' option is used.

800 * If None, will automatically choose 'table' if

801 the required memory allocation is less than or equal to

802 6 times the sum of the sizes of `ar1` and `ar2`,

803 otherwise will use 'sort'. This is done to not use

804 a large amount of memory by default, even though

805 'table' may be faster in most cases. If 'table' is chosen,

806 `assume_unique` will have no effect.

807

808 Returns

809 -------

810 in1d : (M,) ndarray, bool

811 The values `ar1[in1d]` are in `ar2`.

812

813 See Also

814 --------

815 isin : Version of this function that preserves the

816 shape of ar1.

817

818 Notes

819 -----

820 `in1d` can be considered as an element-wise function version of the

821 python keyword `in`, for 1-D sequences. ``in1d(a, b)`` is roughly

822 equivalent to ``np.array([item in b for item in a])``.

823 However, this idea fails if `ar2` is a set, or similar (non-sequence)

824 container: As ``ar2`` is converted to an array, in those cases

825 ``asarray(ar2)`` is an object array rather than the expected array of

826 contained values.

827

828 Using ``kind='table'`` tends to be faster than `kind='sort'` if the

829 following relationship is true:

830 ``log10(len(ar2)) > (log10(max(ar2)-min(ar2)) - 2.27) / 0.927``,

831 but may use greater memory. The default value for `kind` will

832 be automatically selected based only on memory usage, so one may

833 manually set ``kind='table'`` if memory constraints can be relaxed.

834

835 Examples

836 --------

837 >>> import numpy as np

838 >>> test = np.array([0, 1, 2, 5, 0])

839 >>> states = [0, 2]

840 >>> mask = np.in1d(test, states)

841 >>> mask

842 array([ True, False, True, False, True])

843 >>> test[mask]

844 array([0, 2, 0])

845 >>> mask = np.in1d(test, states, invert=True)

846 >>> mask

847 array([False, True, False, True, False])

848 >>> test[mask]

849 array([1, 5])

850 """

851

852 # Deprecated in NumPy 2.0, 2023-08-18

853 warnings.warn(

854 "`in1d` is deprecated. Use `np.isin` instead.",

855 DeprecationWarning,

856 stacklevel=2

857 )

858

859 return _in1d(ar1, ar2, assume_unique, invert, kind=kind)

860

861

862def _in1d(ar1, ar2, assume_unique=False, invert=False, *, kind=None):

863 # Ravel both arrays, behavior for the first array could be different

864 ar1 = np.asarray(ar1).ravel()

865 ar2 = np.asarray(ar2).ravel()

866

867 # Ensure that iteration through object arrays yields size-1 arrays

868 if ar2.dtype == object:

869 ar2 = ar2.reshape(-1, 1)

870

871 if kind not in {None, 'sort', 'table'}:

872 raise ValueError(

873 f"Invalid kind: '{kind}'. Please use None, 'sort' or 'table'.")

874

875 # Can use the table method if all arrays are integers or boolean:

876 is_int_arrays = all(ar.dtype.kind in ("u", "i", "b") for ar in (ar1, ar2))

877 use_table_method = is_int_arrays and kind in {None, 'table'}

878

879 if use_table_method:

880 if ar2.size == 0:

881 if invert:

882 return np.ones_like(ar1, dtype=bool)

883 else:

884 return np.zeros_like(ar1, dtype=bool)

885

886 # Convert booleans to uint8 so we can use the fast integer algorithm

887 if ar1.dtype == bool:

888 ar1 = ar1.astype(np.uint8)

889 if ar2.dtype == bool:

890 ar2 = ar2.astype(np.uint8)

891

892 ar2_min = int(np.min(ar2))

893 ar2_max = int(np.max(ar2))

894

895 ar2_range = ar2_max - ar2_min

896

897 # Constraints on whether we can actually use the table method:

898 # 1. Assert memory usage is not too large

899 below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size)

900 # 2. Check overflows for (ar2 - ar2_min); dtype=ar2.dtype

901 range_safe_from_overflow = ar2_range <= np.iinfo(ar2.dtype).max

902

903 # Optimal performance is for approximately

904 # log10(size) > (log10(range) - 2.27) / 0.927.

905 # However, here we set the requirement that by default

906 # the intermediate array can only be 6x

907 # the combined memory allocation of the original

908 # arrays. See discussion on

909 # https://github.com/numpy/numpy/pull/12065.

910

911 if (

912 range_safe_from_overflow and

913 (below_memory_constraint or kind == 'table')

914 ):

915

916 if invert:

917 outgoing_array = np.ones_like(ar1, dtype=bool)

918 else:

919 outgoing_array = np.zeros_like(ar1, dtype=bool)

920

921 # Make elements 1 where the integer exists in ar2

922 if invert:

923 isin_helper_ar = np.ones(ar2_range + 1, dtype=bool)

924 isin_helper_ar[ar2 - ar2_min] = 0

925 else:

926 isin_helper_ar = np.zeros(ar2_range + 1, dtype=bool)

927 isin_helper_ar[ar2 - ar2_min] = 1

928

929 # Mask out elements we know won't work

930 basic_mask = (ar1 <= ar2_max) & (ar1 >= ar2_min)

931 in_range_ar1 = ar1[basic_mask]

932 if in_range_ar1.size == 0:

933 # Nothing more to do, since all values are out of range.

934 return outgoing_array

935

936 # Unfortunately, ar2_min can be out of range for `intp` even

937 # if the calculation result must fit in range (and be positive).

938 # In that case, use ar2.dtype which must work for all unmasked

939 # values.

940 try:

941 ar2_min = np.array(ar2_min, dtype=np.intp)

942 dtype = np.intp

943 except OverflowError:

944 dtype = ar2.dtype

945

946 out = np.empty_like(in_range_ar1, dtype=np.intp)

947 outgoing_array[basic_mask] = isin_helper_ar[

948 np.subtract(in_range_ar1, ar2_min, dtype=dtype,

949 out=out, casting="unsafe")]

950

951 return outgoing_array

952 elif kind == 'table': # not range_safe_from_overflow

953 raise RuntimeError(

954 "You have specified kind='table', "

955 "but the range of values in `ar2` or `ar1` exceed the "

956 "maximum integer of the datatype. "

957 "Please set `kind` to None or 'sort'."

958 )

959 elif kind == 'table':

960 raise ValueError(

961 "The 'table' method is only "

962 "supported for boolean or integer arrays. "

963 "Please select 'sort' or None for kind."

964 )

965

966

967 # Check if one of the arrays may contain arbitrary objects

968 contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject

969

970 # This code is run when

971 # a) the first condition is true, making the code significantly faster

972 # b) the second condition is true (i.e. `ar1` or `ar2` may contain

973 # arbitrary objects), since then sorting is not guaranteed to work

974 if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object:

975 if invert:

976 mask = np.ones(len(ar1), dtype=bool)

977 for a in ar2:

978 mask &= (ar1 != a)

979 else:

980 mask = np.zeros(len(ar1), dtype=bool)

981 for a in ar2:

982 mask |= (ar1 == a)

983 return mask

984

985 # Otherwise use sorting

986 if not assume_unique:

987 ar1, rev_idx = np.unique(ar1, return_inverse=True)

988 ar2 = np.unique(ar2)

989

990 ar = np.concatenate((ar1, ar2))

991 # We need this to be a stable sort, so always use 'mergesort'

992 # here. The values from the first array should always come before

993 # the values from the second array.

994 order = ar.argsort(kind='mergesort')

995 sar = ar[order]

996 if invert:

997 bool_ar = (sar[1:] != sar[:-1])

998 else:

999 bool_ar = (sar[1:] == sar[:-1])

1000 flag = np.concatenate((bool_ar, [invert]))

1001 ret = np.empty(ar.shape, dtype=bool)

1002 ret[order] = flag

1003

1004 if assume_unique:

1005 return ret[:len(ar1)]

1006 else:

1007 return ret[rev_idx]

1008

1009

1010def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None,

1011 *, kind=None):

1012 return (element, test_elements)

1013

1014

1015@array_function_dispatch(_isin_dispatcher)

1016def isin(element, test_elements, assume_unique=False, invert=False, *,

1017 kind=None):

1018 """

1019 Calculates ``element in test_elements``, broadcasting over `element` only.

1020 Returns a boolean array of the same shape as `element` that is True

1021 where an element of `element` is in `test_elements` and False otherwise.

1022

1023 Parameters

1024 ----------

1025 element : array_like

1026 Input array.

1027 test_elements : array_like

1028 The values against which to test each value of `element`.

1029 This argument is flattened if it is an array or array_like.

1030 See notes for behavior with non-array-like parameters.

1031 assume_unique : bool, optional

1032 If True, the input arrays are both assumed to be unique, which

1033 can speed up the calculation. Default is False.

1034 invert : bool, optional

1035 If True, the values in the returned array are inverted, as if

1036 calculating `element not in test_elements`. Default is False.

1037 ``np.isin(a, b, invert=True)`` is equivalent to (but faster

1038 than) ``np.invert(np.isin(a, b))``.

1039 kind : {None, 'sort', 'table'}, optional

1040 The algorithm to use. This will not affect the final result,

1041 but will affect the speed and memory use. The default, None,

1042 will select automatically based on memory considerations.

1043

1044 * If 'sort', will use a mergesort-based approach. This will have

1045 a memory usage of roughly 6 times the sum of the sizes of

1046 `element` and `test_elements`, not accounting for size of dtypes.

1047 * If 'table', will use a lookup table approach similar

1048 to a counting sort. This is only available for boolean and

1049 integer arrays. This will have a memory usage of the

1050 size of `element` plus the max-min value of `test_elements`.

1051 `assume_unique` has no effect when the 'table' option is used.

1052 * If None, will automatically choose 'table' if

1053 the required memory allocation is less than or equal to

1054 6 times the sum of the sizes of `element` and `test_elements`,

1055 otherwise will use 'sort'. This is done to not use

1056 a large amount of memory by default, even though

1057 'table' may be faster in most cases. If 'table' is chosen,

1058 `assume_unique` will have no effect.

1059

1060

1061 Returns

1062 -------

1063 isin : ndarray, bool

1064 Has the same shape as `element`. The values `element[isin]`

1065 are in `test_elements`.

1066

1067 Notes

1068 -----

1069 `isin` is an element-wise function version of the python keyword `in`.

1070 ``isin(a, b)`` is roughly equivalent to

1071 ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences.

1072

1073 `element` and `test_elements` are converted to arrays if they are not

1074 already. If `test_elements` is a set (or other non-sequence collection)

1075 it will be converted to an object array with one element, rather than an

1076 array of the values contained in `test_elements`. This is a consequence

1077 of the `array` constructor's way of handling non-sequence collections.

1078 Converting the set to a list usually gives the desired behavior.

1079

1080 Using ``kind='table'`` tends to be faster than `kind='sort'` if the

1081 following relationship is true:

1082 ``log10(len(test_elements)) >

1083 (log10(max(test_elements)-min(test_elements)) - 2.27) / 0.927``,

1084 but may use greater memory. The default value for `kind` will

1085 be automatically selected based only on memory usage, so one may

1086 manually set ``kind='table'`` if memory constraints can be relaxed.

1087

1088 Examples

1089 --------

1090 >>> import numpy as np

1091 >>> element = 2*np.arange(4).reshape((2, 2))

1092 >>> element

1093 array([[0, 2],

1094 [4, 6]])

1095 >>> test_elements = [1, 2, 4, 8]

1096 >>> mask = np.isin(element, test_elements)

1097 >>> mask

1098 array([[False, True],

1099 [ True, False]])

1100 >>> element[mask]

1101 array([2, 4])

1102

1103 The indices of the matched values can be obtained with `nonzero`:

1104

1105 >>> np.nonzero(mask)

1106 (array([0, 1]), array([1, 0]))

1107

1108 The test can also be inverted:

1109

1110 >>> mask = np.isin(element, test_elements, invert=True)

1111 >>> mask

1112 array([[ True, False],

1113 [False, True]])

1114 >>> element[mask]

1115 array([0, 6])

1116

1117 Because of how `array` handles sets, the following does not

1118 work as expected:

1119

1120 >>> test_set = {1, 2, 4, 8}

1121 >>> np.isin(element, test_set)

1122 array([[False, False],

1123 [False, False]])

1124

1125 Casting the set to a list gives the expected result:

1126

1127 >>> np.isin(element, list(test_set))

1128 array([[False, True],

1129 [ True, False]])

1130 """

1131 element = np.asarray(element)

1132 return _in1d(element, test_elements, assume_unique=assume_unique,

1133 invert=invert, kind=kind).reshape(element.shape)

1134

1135

1136def _union1d_dispatcher(ar1, ar2):

1137 return (ar1, ar2)

1138

1139

1140@array_function_dispatch(_union1d_dispatcher)

1141def union1d(ar1, ar2):

1142 """

1143 Find the union of two arrays.

1144

1145 Return the unique, sorted array of values that are in either of the two

1146 input arrays.

1147

1148 Parameters

1149 ----------

1150 ar1, ar2 : array_like

1151 Input arrays. They are flattened if they are not already 1D.

1152

1153 Returns

1154 -------

1155 union1d : ndarray

1156 Unique, sorted union of the input arrays.

1157

1158 Examples

1159 --------

1160 >>> import numpy as np

1161 >>> np.union1d([-1, 0, 1], [-2, 0, 2])

1162 array([-2, -1, 0, 1, 2])

1163

1164 To find the union of more than two arrays, use functools.reduce:

1165

1166 >>> from functools import reduce

1167 >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))

1168 array([1, 2, 3, 4, 6])

1169 """

1170 return unique(np.concatenate((ar1, ar2), axis=None))

1171

1172

1173def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None):

1174 return (ar1, ar2)

1175

1176

1177@array_function_dispatch(_setdiff1d_dispatcher)

1178def setdiff1d(ar1, ar2, assume_unique=False):

1179 """

1180 Find the set difference of two arrays.

1181

1182 Return the unique values in `ar1` that are not in `ar2`.

1183

1184 Parameters

1185 ----------

1186 ar1 : array_like

1187 Input array.

1188 ar2 : array_like

1189 Input comparison array.

1190 assume_unique : bool

1191 If True, the input arrays are both assumed to be unique, which

1192 can speed up the calculation. Default is False.

1193

1194 Returns

1195 -------

1196 setdiff1d : ndarray

1197 1D array of values in `ar1` that are not in `ar2`. The result

1198 is sorted when `assume_unique=False`, but otherwise only sorted

1199 if the input is sorted.

1200

1201 Examples

1202 --------

1203 >>> import numpy as np

1204 >>> a = np.array([1, 2, 3, 2, 4, 1])

1205 >>> b = np.array([3, 4, 5, 6])

1206 >>> np.setdiff1d(a, b)

1207 array([1, 2])

1208

1209 """

1210 if assume_unique:

1211 ar1 = np.asarray(ar1).ravel()

1212 else:

1213 ar1 = unique(ar1)

1214 ar2 = unique(ar2)

1215 return ar1[_in1d(ar1, ar2, assume_unique=True, invert=True)]

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/numpy/lib/_arraysetops_impl.py: 20%

283 statements