Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/numpy/lib/_arraysetops

1"""

2Set operations for arrays based on sorting.

4Notes

5-----

7For floating point arrays, inaccurate results may appear due to usual round-off

8and floating point comparison issues.

10Speed could be gained in some operations by an implementation of

11`numpy.sort`, that can provide directly the permutation vectors, thus avoiding

12calls to `numpy.argsort`.

14Original author: Robert Cimrman

16"""

17import functools

18from typing import NamedTuple

20import numpy as np

21from numpy._core import overrides

22from numpy._core._multiarray_umath import _array_converter, _unique_hash

23from numpy.lib.array_utils import normalize_axis_index

25array_function_dispatch = functools.partial(

26 overrides.array_function_dispatch, module='numpy')

29__all__ = [

30 "ediff1d", "intersect1d", "isin", "setdiff1d", "setxor1d",

31 "union1d", "unique", "unique_all", "unique_counts", "unique_inverse",

32 "unique_values"

33]

36def _ediff1d_dispatcher(ary, to_end=None, to_begin=None):

37 return (ary, to_end, to_begin)

40@array_function_dispatch(_ediff1d_dispatcher)

41def ediff1d(ary, to_end=None, to_begin=None):

42 """

43 The differences between consecutive elements of an array.

45 Parameters

46 ----------

47 ary : array_like

48 If necessary, will be flattened before the differences are taken.

49 to_end : array_like, optional

50 Number(s) to append at the end of the returned differences.

51 to_begin : array_like, optional

52 Number(s) to prepend at the beginning of the returned differences.

54 Returns

55 -------

56 ediff1d : ndarray

57 The differences. Loosely, this is ``ary.flat[1:] - ary.flat[:-1]``.

59 See Also

60 --------

61 diff, gradient

63 Notes

64 -----

65 When applied to masked arrays, this function drops the mask information

66 if the `to_begin` and/or `to_end` parameters are used.

68 Examples

69 --------

70 >>> import numpy as np

71 >>> x = np.array([1, 2, 4, 7, 0])

72 >>> np.ediff1d(x)

73 array([ 1, 2, 3, -7])

75 >>> np.ediff1d(x, to_begin=-99, to_end=np.array([88, 99]))

76 array([-99, 1, 2, ..., -7, 88, 99])

78 The returned array is always 1D.

80 >>> y = [[1, 2, 4], [1, 6, 24]]

81 >>> np.ediff1d(y)

82 array([ 1, 2, -3, 5, 18])

84 """

85 conv = _array_converter(ary)

86 # Convert to (any) array and ravel:

87 ary = conv[0].ravel()

89 # enforce that the dtype of `ary` is used for the output

90 dtype_req = ary.dtype

92 # fast track default case

93 if to_begin is None and to_end is None:

94 return ary[1:] - ary[:-1]

96 if to_begin is None:

97 l_begin = 0

98 else:

99 to_begin = np.asanyarray(to_begin)

100 if not np.can_cast(to_begin, dtype_req, casting="same_kind"):

101 raise TypeError("dtype of `to_begin` must be compatible "

102 "with input `ary` under the `same_kind` rule.")

103

104 to_begin = to_begin.ravel()

105 l_begin = len(to_begin)

106

107 if to_end is None:

108 l_end = 0

109 else:

110 to_end = np.asanyarray(to_end)

111 if not np.can_cast(to_end, dtype_req, casting="same_kind"):

112 raise TypeError("dtype of `to_end` must be compatible "

113 "with input `ary` under the `same_kind` rule.")

114

115 to_end = to_end.ravel()

116 l_end = len(to_end)

117

118 # do the calculation in place and copy to_begin and to_end

119 l_diff = max(len(ary) - 1, 0)

120 result = np.empty_like(ary, shape=l_diff + l_begin + l_end)

121

122 if l_begin > 0:

123 result[:l_begin] = to_begin

124 if l_end > 0:

125 result[l_begin + l_diff:] = to_end

126 np.subtract(ary[1:], ary[:-1], result[l_begin:l_begin + l_diff])

127

128 return conv.wrap(result)

129

130

131def _unpack_tuple(x):

132 """ Unpacks one-element tuples for use as return values """

133 if len(x) == 1:

134 return x[0]

135 else:

136 return x

137

138

139def _unique_dispatcher(ar, return_index=None, return_inverse=None,

140 return_counts=None, axis=None, *, equal_nan=None,

141 sorted=True):

142 return (ar,)

143

144

145@array_function_dispatch(_unique_dispatcher)

146def unique(ar, return_index=False, return_inverse=False,

147 return_counts=False, axis=None, *, equal_nan=True,

148 sorted=True):

149 """

150 Find the unique elements of an array.

151

152 Returns the sorted unique elements of an array. There are three optional

153 outputs in addition to the unique elements:

154

155 * the indices of the input array that give the unique values

156 * the indices of the unique array that reconstruct the input array

157 * the number of times each unique value comes up in the input array

158

159 Parameters

160 ----------

161 ar : array_like

162 Input array. Unless `axis` is specified, this will be flattened if it

163 is not already 1-D.

164 return_index : bool, optional

165 If True, also return the indices of `ar` (along the specified axis,

166 if provided, or in the flattened array) that result in the unique array.

167 return_inverse : bool, optional

168 If True, also return the indices of the unique array (for the specified

169 axis, if provided) that can be used to reconstruct `ar`.

170 return_counts : bool, optional

171 If True, also return the number of times each unique item appears

172 in `ar`.

173 axis : int or None, optional

174 The axis to operate on. If None, `ar` will be flattened. If an integer,

175 the subarrays indexed by the given axis will be flattened and treated

176 as the elements of a 1-D array with the dimension of the given axis,

177 see the notes for more details. Object arrays or structured arrays

178 that contain objects are not supported if the `axis` kwarg is used. The

179 default is None.

180

181 equal_nan : bool, optional

182 If True, collapses multiple NaN values in the return array into one.

183

184 .. versionadded:: 1.24

185

186 sorted : bool, optional

187 If True, the unique elements are sorted. Elements may be sorted in

188 practice even if ``sorted=False``, but this could change without

189 notice.

190

191 .. versionadded:: 2.3

192

193 Returns

194 -------

195 unique : ndarray

196 The sorted unique values.

197 unique_indices : ndarray, optional

198 The indices of the first occurrences of the unique values in the

199 original array. Only provided if `return_index` is True.

200 unique_inverse : ndarray, optional

201 The indices to reconstruct the original array from the

202 unique array. Only provided if `return_inverse` is True.

203 unique_counts : ndarray, optional

204 The number of times each of the unique values comes up in the

205 original array. Only provided if `return_counts` is True.

206

207 See Also

208 --------

209 repeat : Repeat elements of an array.

210 sort : Return a sorted copy of an array.

211

212 Notes

213 -----

214 When an axis is specified the subarrays indexed by the axis are sorted.

215 This is done by making the specified axis the first dimension of the array

216 (move the axis to the first dimension to keep the order of the other axes)

217 and then flattening the subarrays in C order. The flattened subarrays are

218 then viewed as a structured type with each element given a label, with the

219 effect that we end up with a 1-D array of structured types that can be

220 treated in the same way as any other 1-D array. The result is that the

221 flattened subarrays are sorted in lexicographic order starting with the

222 first element.

223

224 .. versionchanged:: 1.21

225 Like np.sort, NaN will sort to the end of the values.

226 For complex arrays all NaN values are considered equivalent

227 (no matter whether the NaN is in the real or imaginary part).

228 As the representant for the returned array the smallest one in the

229 lexicographical order is chosen - see np.sort for how the lexicographical

230 order is defined for complex arrays.

231

232 .. versionchanged:: 2.0

233 For multi-dimensional inputs, ``unique_inverse`` is reshaped

234 such that the input can be reconstructed using

235 ``np.take(unique, unique_inverse, axis=axis)``. The result is

236 now not 1-dimensional when ``axis=None``.

237

238 Note that in NumPy 2.0.0 a higher dimensional array was returned also

239 when ``axis`` was not ``None``. This was reverted, but

240 ``inverse.reshape(-1)`` can be used to ensure compatibility with both

241 versions.

242

243 Examples

244 --------

245 >>> import numpy as np

246 >>> np.unique([1, 1, 2, 2, 3, 3])

247 array([1, 2, 3])

248 >>> a = np.array([[1, 1], [2, 3]])

249 >>> np.unique(a)

250 array([1, 2, 3])

251

252 Return the unique rows of a 2D array

253

254 >>> a = np.array([[1, 0, 0], [1, 0, 0], [2, 3, 4]])

255 >>> np.unique(a, axis=0)

256 array([[1, 0, 0], [2, 3, 4]])

257

258 Return the indices of the original array that give the unique values:

259

260 >>> a = np.array(['a', 'b', 'b', 'c', 'a'])

261 >>> u, indices = np.unique(a, return_index=True)

262 >>> u

263 array(['a', 'b', 'c'], dtype='<U1')

264 >>> indices

265 array([0, 1, 3])

266 >>> a[indices]

267 array(['a', 'b', 'c'], dtype='<U1')

268

269 Reconstruct the input array from the unique values and inverse:

270

271 >>> a = np.array([1, 2, 6, 4, 2, 3, 2])

272 >>> u, indices = np.unique(a, return_inverse=True)

273 >>> u

274 array([1, 2, 3, 4, 6])

275 >>> indices

276 array([0, 1, 4, 3, 1, 2, 1])

277 >>> u[indices]

278 array([1, 2, 6, 4, 2, 3, 2])

279

280 Reconstruct the input values from the unique values and counts:

281

282 >>> a = np.array([1, 2, 6, 4, 2, 3, 2])

283 >>> values, counts = np.unique(a, return_counts=True)

284 >>> values

285 array([1, 2, 3, 4, 6])

286 >>> counts

287 array([1, 3, 1, 1, 1])

288 >>> np.repeat(values, counts)

289 array([1, 2, 2, 2, 3, 4, 6]) # original order not preserved

290

291 """

292 ar = np.asanyarray(ar)

293 if axis is None or ar.ndim == 1:

294 if axis is not None:

295 normalize_axis_index(axis, ar.ndim)

296 ret = _unique1d(ar, return_index, return_inverse, return_counts,

297 equal_nan=equal_nan, inverse_shape=ar.shape, axis=None,

298 sorted=sorted)

299 return _unpack_tuple(ret)

300

301 # axis was specified and not None

302 try:

303 ar = np.moveaxis(ar, axis, 0)

304 except np.exceptions.AxisError:

305 # this removes the "axis1" or "axis2" prefix from the error message

306 raise np.exceptions.AxisError(axis, ar.ndim) from None

307 inverse_shape = [1] * ar.ndim

308 inverse_shape[axis] = ar.shape[0]

309

310 # Must reshape to a contiguous 2D array for this to work...

311 orig_shape, orig_dtype = ar.shape, ar.dtype

312 ar = ar.reshape(orig_shape[0], np.prod(orig_shape[1:], dtype=np.intp))

313 ar = np.ascontiguousarray(ar)

314 dtype = [(f'f{i}', ar.dtype) for i in range(ar.shape[1])]

315

316 # At this point, `ar` has shape `(n, m)`, and `dtype` is a structured

317 # data type with `m` fields where each field has the data type of `ar`.

318 # In the following, we create the array `consolidated`, which has

319 # shape `(n,)` with data type `dtype`.

320 try:

321 if ar.shape[1] > 0:

322 consolidated = ar.view(dtype)

323 else:

324 # If ar.shape[1] == 0, then dtype will be `np.dtype([])`, which is

325 # a data type with itemsize 0, and the call `ar.view(dtype)` will

326 # fail. Instead, we'll use `np.empty` to explicitly create the

327 # array with shape `(len(ar),)`. Because `dtype` in this case has

328 # itemsize 0, the total size of the result is still 0 bytes.

329 consolidated = np.empty(len(ar), dtype=dtype)

330 except TypeError as e:

331 # There's no good way to do this for object arrays, etc...

332 msg = 'The axis argument to unique is not supported for dtype {dt}'

333 raise TypeError(msg.format(dt=ar.dtype)) from e

334

335 def reshape_uniq(uniq):

336 n = len(uniq)

337 uniq = uniq.view(orig_dtype)

338 uniq = uniq.reshape(n, *orig_shape[1:])

339 uniq = np.moveaxis(uniq, 0, axis)

340 return uniq

341

342 output = _unique1d(consolidated, return_index,

343 return_inverse, return_counts,

344 equal_nan=equal_nan, inverse_shape=inverse_shape,

345 axis=axis, sorted=sorted)

346 output = (reshape_uniq(output[0]),) + output[1:]

347 return _unpack_tuple(output)

348

349

350def _unique1d(ar, return_index=False, return_inverse=False,

351 return_counts=False, *, equal_nan=True, inverse_shape=None,

352 axis=None, sorted=True):

353 """

354 Find the unique elements of an array, ignoring shape.

355

356 Uses a hash table to find the unique elements if possible.

357 """

358 ar = np.asanyarray(ar).flatten()

359 if len(ar.shape) != 1:

360 # np.matrix, and maybe some other array subclasses, insist on keeping

361 # two dimensions for all operations. Coerce to an ndarray in such cases.

362 ar = np.asarray(ar).flatten()

363

364 optional_indices = return_index or return_inverse

365

366 # masked arrays are not supported yet.

367 if not optional_indices and not return_counts and not np.ma.is_masked(ar):

368 # First we convert the array to a numpy array, later we wrap it back

369 # in case it was a subclass of numpy.ndarray.

370 conv = _array_converter(ar)

371 ar_, = conv

372

373 if (hash_unique := _unique_hash(ar_, equal_nan=equal_nan)) \

374 is not NotImplemented:

375 if sorted:

376 hash_unique.sort()

377 # We wrap the result back in case it was a subclass of numpy.ndarray.

378 return (conv.wrap(hash_unique),)

379

380 # If we don't use the hash map, we use the slower sorting method.

381 if optional_indices:

382 perm = ar.argsort(kind='mergesort' if return_index else 'quicksort')

383 aux = ar[perm]

384 else:

385 ar.sort()

386 aux = ar

387 mask = np.empty(aux.shape, dtype=np.bool)

388 mask[:1] = True

389 if (equal_nan and aux.shape[0] > 0 and aux.dtype.kind in "cfmM" and

390 np.isnan(aux[-1])):

391 if aux.dtype.kind == "c": # for complex all NaNs are considered equivalent

392 aux_firstnan = np.searchsorted(np.isnan(aux), True, side='left')

393 else:

394 aux_firstnan = np.searchsorted(aux, aux[-1], side='left')

395 if aux_firstnan > 0:

396 mask[1:aux_firstnan] = (

397 aux[1:aux_firstnan] != aux[:aux_firstnan - 1])

398 mask[aux_firstnan] = True

399 mask[aux_firstnan + 1:] = False

400 else:

401 mask[1:] = aux[1:] != aux[:-1]

402

403 ret = (aux[mask],)

404 if return_index:

405 ret += (perm[mask],)

406 if return_inverse:

407 imask = np.cumsum(mask) - 1

408 inv_idx = np.empty(mask.shape, dtype=np.intp)

409 inv_idx[perm] = imask

410 ret += (inv_idx.reshape(inverse_shape) if axis is None else inv_idx,)

411 if return_counts:

412 idx = np.concatenate(np.nonzero(mask) + ([mask.size],))

413 ret += (np.diff(idx),)

414 return ret

415

416

417# Array API set functions

418

419class UniqueAllResult(NamedTuple):

420 values: np.ndarray

421 indices: np.ndarray

422 inverse_indices: np.ndarray

423 counts: np.ndarray

424

425

426class UniqueCountsResult(NamedTuple):

427 values: np.ndarray

428 counts: np.ndarray

429

430

431class UniqueInverseResult(NamedTuple):

432 values: np.ndarray

433 inverse_indices: np.ndarray

434

435

436def _unique_all_dispatcher(x, /):

437 return (x,)

438

439

440@array_function_dispatch(_unique_all_dispatcher)

441def unique_all(x):

442 """

443 Find the unique elements of an array, and counts, inverse, and indices.

444

445 This function is an Array API compatible alternative to::

446

447 np.unique(x, return_index=True, return_inverse=True,

448 return_counts=True, equal_nan=False, sorted=False)

449

450 but returns a namedtuple for easier access to each output.

451

452 .. note::

453 This function currently always returns a sorted result, however,

454 this could change in any NumPy minor release.

455

456 Parameters

457 ----------

458 x : array_like

459 Input array. It will be flattened if it is not already 1-D.

460

461 Returns

462 -------

463 out : namedtuple

464 The result containing:

465

466 * values - The unique elements of an input array.

467 * indices - The first occurring indices for each unique element.

468 * inverse_indices - The indices from the set of unique elements

469 that reconstruct `x`.

470 * counts - The corresponding counts for each unique element.

471

472 See Also

473 --------

474 unique : Find the unique elements of an array.

475

476 Examples

477 --------

478 >>> import numpy as np

479 >>> x = [1, 1, 2]

480 >>> uniq = np.unique_all(x)

481 >>> uniq.values

482 array([1, 2])

483 >>> uniq.indices

484 array([0, 2])

485 >>> uniq.inverse_indices

486 array([0, 0, 1])

487 >>> uniq.counts

488 array([2, 1])

489 """

490 result = unique(

491 x,

492 return_index=True,

493 return_inverse=True,

494 return_counts=True,

495 equal_nan=False,

496 )

497 return UniqueAllResult(*result)

498

499

500def _unique_counts_dispatcher(x, /):

501 return (x,)

502

503

504@array_function_dispatch(_unique_counts_dispatcher)

505def unique_counts(x):

506 """

507 Find the unique elements and counts of an input array `x`.

508

509 This function is an Array API compatible alternative to::

510

511 np.unique(x, return_counts=True, equal_nan=False, sorted=False)

512

513 but returns a namedtuple for easier access to each output.

514

515 .. note::

516 This function currently always returns a sorted result, however,

517 this could change in any NumPy minor release.

518

519 Parameters

520 ----------

521 x : array_like

522 Input array. It will be flattened if it is not already 1-D.

523

524 Returns

525 -------

526 out : namedtuple

527 The result containing:

528

529 * values - The unique elements of an input array.

530 * counts - The corresponding counts for each unique element.

531

532 See Also

533 --------

534 unique : Find the unique elements of an array.

535

536 Examples

537 --------

538 >>> import numpy as np

539 >>> x = [1, 1, 2]

540 >>> uniq = np.unique_counts(x)

541 >>> uniq.values

542 array([1, 2])

543 >>> uniq.counts

544 array([2, 1])

545 """

546 result = unique(

547 x,

548 return_index=False,

549 return_inverse=False,

550 return_counts=True,

551 equal_nan=False,

552 )

553 return UniqueCountsResult(*result)

554

555

556def _unique_inverse_dispatcher(x, /):

557 return (x,)

558

559

560@array_function_dispatch(_unique_inverse_dispatcher)

561def unique_inverse(x):

562 """

563 Find the unique elements of `x` and indices to reconstruct `x`.

564

565 This function is an Array API compatible alternative to::

566

567 np.unique(x, return_inverse=True, equal_nan=False, sorted=False)

568

569 but returns a namedtuple for easier access to each output.

570

571 .. note::

572 This function currently always returns a sorted result, however,

573 this could change in any NumPy minor release.

574

575 Parameters

576 ----------

577 x : array_like

578 Input array. It will be flattened if it is not already 1-D.

579

580 Returns

581 -------

582 out : namedtuple

583 The result containing:

584

585 * values - The unique elements of an input array.

586 * inverse_indices - The indices from the set of unique elements

587 that reconstruct `x`.

588

589 See Also

590 --------

591 unique : Find the unique elements of an array.

592

593 Examples

594 --------

595 >>> import numpy as np

596 >>> x = [1, 1, 2]

597 >>> uniq = np.unique_inverse(x)

598 >>> uniq.values

599 array([1, 2])

600 >>> uniq.inverse_indices

601 array([0, 0, 1])

602 """

603 result = unique(

604 x,

605 return_index=False,

606 return_inverse=True,

607 return_counts=False,

608 equal_nan=False,

609 )

610 return UniqueInverseResult(*result)

611

612

613def _unique_values_dispatcher(x, /):

614 return (x,)

615

616

617@array_function_dispatch(_unique_values_dispatcher)

618def unique_values(x):

619 """

620 Returns the unique elements of an input array `x`.

621

622 This function is an Array API compatible alternative to::

623

624 np.unique(x, equal_nan=False, sorted=False)

625

626 .. versionchanged:: 2.3

627 The algorithm was changed to a faster one that does not rely on

628 sorting, and hence the results are no longer implicitly sorted.

629

630 Parameters

631 ----------

632 x : array_like

633 Input array. It will be flattened if it is not already 1-D.

634

635 Returns

636 -------

637 out : ndarray

638 The unique elements of an input array.

639

640 See Also

641 --------

642 unique : Find the unique elements of an array.

643

644 Examples

645 --------

646 >>> import numpy as np

647 >>> np.unique_values([1, 1, 2])

648 array([1, 2]) # may vary

649

650 """

651 return unique(

652 x,

653 return_index=False,

654 return_inverse=False,

655 return_counts=False,

656 equal_nan=False,

657 sorted=False,

658 )

659

660

661def _intersect1d_dispatcher(

662 ar1, ar2, assume_unique=None, return_indices=None):

663 return (ar1, ar2)

664

665

666@array_function_dispatch(_intersect1d_dispatcher)

667def intersect1d(ar1, ar2, assume_unique=False, return_indices=False):

668 """

669 Find the intersection of two arrays.

670

671 Return the sorted, unique values that are in both of the input arrays.

672

673 Parameters

674 ----------

675 ar1, ar2 : array_like

676 Input arrays. Will be flattened if not already 1D.

677 assume_unique : bool

678 If True, the input arrays are both assumed to be unique, which

679 can speed up the calculation. If True but ``ar1`` or ``ar2`` are not

680 unique, incorrect results and out-of-bounds indices could result.

681 Default is False.

682 return_indices : bool

683 If True, the indices which correspond to the intersection of the two

684 arrays are returned. The first instance of a value is used if there are

685 multiple. Default is False.

686

687 Returns

688 -------

689 intersect1d : ndarray

690 Sorted 1D array of common and unique elements.

691 comm1 : ndarray

692 The indices of the first occurrences of the common values in `ar1`.

693 Only provided if `return_indices` is True.

694 comm2 : ndarray

695 The indices of the first occurrences of the common values in `ar2`.

696 Only provided if `return_indices` is True.

697

698 Examples

699 --------

700 >>> import numpy as np

701 >>> np.intersect1d([1, 3, 4, 3], [3, 1, 2, 1])

702 array([1, 3])

703

704 To intersect more than two arrays, use functools.reduce:

705

706 >>> from functools import reduce

707 >>> reduce(np.intersect1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))

708 array([3])

709

710 To return the indices of the values common to the input arrays

711 along with the intersected values:

712

713 >>> x = np.array([1, 1, 2, 3, 4])

714 >>> y = np.array([2, 1, 4, 6])

715 >>> xy, x_ind, y_ind = np.intersect1d(x, y, return_indices=True)

716 >>> x_ind, y_ind

717 (array([0, 2, 4]), array([1, 0, 2]))

718 >>> xy, x[x_ind], y[y_ind]

719 (array([1, 2, 4]), array([1, 2, 4]), array([1, 2, 4]))

720

721 """

722 ar1 = np.asanyarray(ar1)

723 ar2 = np.asanyarray(ar2)

724

725 if not assume_unique:

726 if return_indices:

727 ar1, ind1 = unique(ar1, return_index=True)

728 ar2, ind2 = unique(ar2, return_index=True)

729 else:

730 ar1 = unique(ar1)

731 ar2 = unique(ar2)

732 else:

733 ar1 = ar1.ravel()

734 ar2 = ar2.ravel()

735

736 aux = np.concatenate((ar1, ar2))

737 if return_indices:

738 aux_sort_indices = np.argsort(aux, kind='mergesort')

739 aux = aux[aux_sort_indices]

740 else:

741 aux.sort()

742

743 mask = aux[1:] == aux[:-1]

744 int1d = aux[:-1][mask]

745

746 if return_indices:

747 ar1_indices = aux_sort_indices[:-1][mask]

748 ar2_indices = aux_sort_indices[1:][mask] - ar1.size

749 if not assume_unique:

750 ar1_indices = ind1[ar1_indices]

751 ar2_indices = ind2[ar2_indices]

752

753 return int1d, ar1_indices, ar2_indices

754 else:

755 return int1d

756

757

758def _setxor1d_dispatcher(ar1, ar2, assume_unique=None):

759 return (ar1, ar2)

760

761

762@array_function_dispatch(_setxor1d_dispatcher)

763def setxor1d(ar1, ar2, assume_unique=False):

764 """

765 Find the set exclusive-or of two arrays.

766

767 Return the sorted, unique values that are in only one (not both) of the

768 input arrays.

769

770 Parameters

771 ----------

772 ar1, ar2 : array_like

773 Input arrays.

774 assume_unique : bool

775 If True, the input arrays are both assumed to be unique, which

776 can speed up the calculation. Default is False.

777

778 Returns

779 -------

780 setxor1d : ndarray

781 Sorted 1D array of unique values that are in only one of the input

782 arrays.

783

784 Examples

785 --------

786 >>> import numpy as np

787 >>> a = np.array([1, 2, 3, 2, 4])

788 >>> b = np.array([2, 3, 5, 7, 5])

789 >>> np.setxor1d(a,b)

790 array([1, 4, 5, 7])

791

792 """

793 if not assume_unique:

794 ar1 = unique(ar1)

795 ar2 = unique(ar2)

796

797 aux = np.concatenate((ar1, ar2), axis=None)

798 if aux.size == 0:

799 return aux

800

801 aux.sort()

802 flag = np.concatenate(([True], aux[1:] != aux[:-1], [True]))

803 return aux[flag[1:] & flag[:-1]]

804

805

806def _isin(ar1, ar2, assume_unique=False, invert=False, *, kind=None):

807 # Ravel both arrays, behavior for the first array could be different

808 ar1 = np.asarray(ar1).ravel()

809 ar2 = np.asarray(ar2).ravel()

810

811 # Ensure that iteration through object arrays yields size-1 arrays

812 if ar2.dtype == object:

813 ar2 = ar2.reshape(-1, 1)

814

815 if kind not in {None, 'sort', 'table'}:

816 raise ValueError(

817 f"Invalid kind: '{kind}'. Please use None, 'sort' or 'table'.")

818

819 # Can use the table method if all arrays are integers or boolean:

820 is_int_arrays = all(ar.dtype.kind in ("u", "i", "b") for ar in (ar1, ar2))

821 use_table_method = is_int_arrays and kind in {None, 'table'}

822

823 if use_table_method:

824 if ar2.size == 0:

825 if invert:

826 return np.ones_like(ar1, dtype=bool)

827 else:

828 return np.zeros_like(ar1, dtype=bool)

829

830 # Convert booleans to uint8 so we can use the fast integer algorithm

831 if ar1.dtype == bool:

832 ar1 = ar1.astype(np.uint8)

833 if ar2.dtype == bool:

834 ar2 = ar2.astype(np.uint8)

835

836 ar2_min = int(np.min(ar2))

837 ar2_max = int(np.max(ar2))

838

839 ar2_range = ar2_max - ar2_min

840

841 # Constraints on whether we can actually use the table method:

842 # 1. Assert memory usage is not too large

843 below_memory_constraint = ar2_range <= 6 * (ar1.size + ar2.size)

844 # 2. Check overflows for (ar2 - ar2_min); dtype=ar2.dtype

845 range_safe_from_overflow = ar2_range <= np.iinfo(ar2.dtype).max

846

847 # Optimal performance is for approximately

848 # log10(size) > (log10(range) - 2.27) / 0.927.

849 # However, here we set the requirement that by default

850 # the intermediate array can only be 6x

851 # the combined memory allocation of the original

852 # arrays. See discussion on

853 # https://github.com/numpy/numpy/pull/12065.

854

855 if (

856 range_safe_from_overflow and

857 (below_memory_constraint or kind == 'table')

858 ):

859

860 if invert:

861 outgoing_array = np.ones_like(ar1, dtype=bool)

862 else:

863 outgoing_array = np.zeros_like(ar1, dtype=bool)

864

865 # Make elements 1 where the integer exists in ar2

866 if invert:

867 isin_helper_ar = np.ones(ar2_range + 1, dtype=bool)

868 isin_helper_ar[ar2 - ar2_min] = 0

869 else:

870 isin_helper_ar = np.zeros(ar2_range + 1, dtype=bool)

871 isin_helper_ar[ar2 - ar2_min] = 1

872

873 # Mask out elements we know won't work

874 basic_mask = (ar1 <= ar2_max) & (ar1 >= ar2_min)

875 in_range_ar1 = ar1[basic_mask]

876 if in_range_ar1.size == 0:

877 # Nothing more to do, since all values are out of range.

878 return outgoing_array

879

880 # Unfortunately, ar2_min can be out of range for `intp` even

881 # if the calculation result must fit in range (and be positive).

882 # In that case, use ar2.dtype which must work for all unmasked

883 # values.

884 try:

885 ar2_min = np.array(ar2_min, dtype=np.intp)

886 dtype = np.intp

887 except OverflowError:

888 dtype = ar2.dtype

889

890 out = np.empty_like(in_range_ar1, dtype=np.intp)

891 outgoing_array[basic_mask] = isin_helper_ar[

892 np.subtract(in_range_ar1, ar2_min, dtype=dtype,

893 out=out, casting="unsafe")]

894

895 return outgoing_array

896 elif kind == 'table': # not range_safe_from_overflow

897 raise RuntimeError(

898 "You have specified kind='table', "

899 "but the range of values in `ar2` or `ar1` exceed the "

900 "maximum integer of the datatype. "

901 "Please set `kind` to None or 'sort'."

902 )

903 elif kind == 'table':

904 raise ValueError(

905 "The 'table' method is only "

906 "supported for boolean or integer arrays. "

907 "Please select 'sort' or None for kind."

908 )

909

910 # Check if one of the arrays may contain arbitrary objects

911 contains_object = ar1.dtype.hasobject or ar2.dtype.hasobject

912

913 # This code is run when

914 # a) the first condition is true, making the code significantly faster

915 # b) the second condition is true (i.e. `ar1` or `ar2` may contain

916 # arbitrary objects), since then sorting is not guaranteed to work

917 if len(ar2) < 10 * len(ar1) ** 0.145 or contains_object:

918 if invert:

919 mask = np.ones(len(ar1), dtype=bool)

920 for a in ar2:

921 mask &= (ar1 != a)

922 else:

923 mask = np.zeros(len(ar1), dtype=bool)

924 for a in ar2:

925 mask |= (ar1 == a)

926 return mask

927

928 # Otherwise use sorting

929 if not assume_unique:

930 ar1, rev_idx = np.unique(ar1, return_inverse=True)

931 ar2 = np.unique(ar2)

932

933 ar = np.concatenate((ar1, ar2))

934 # We need this to be a stable sort, so always use 'mergesort'

935 # here. The values from the first array should always come before

936 # the values from the second array.

937 order = ar.argsort(kind='mergesort')

938 sar = ar[order]

939 if invert:

940 bool_ar = (sar[1:] != sar[:-1])

941 else:

942 bool_ar = (sar[1:] == sar[:-1])

943 flag = np.concatenate((bool_ar, [invert]))

944 ret = np.empty(ar.shape, dtype=bool)

945 ret[order] = flag

946

947 if assume_unique:

948 return ret[:len(ar1)]

949 else:

950 return ret[rev_idx]

951

952

953def _isin_dispatcher(element, test_elements, assume_unique=None, invert=None,

954 *, kind=None):

955 return (element, test_elements)

956

957

958@array_function_dispatch(_isin_dispatcher)

959def isin(element, test_elements, assume_unique=False, invert=False, *,

960 kind=None):

961 """

962 Calculates ``element in test_elements``, broadcasting over `element` only.

963 Returns a boolean array of the same shape as `element` that is True

964 where an element of `element` is in `test_elements` and False otherwise.

965

966 Parameters

967 ----------

968 element : array_like

969 Input array.

970 test_elements : array_like

971 The values against which to test each value of `element`.

972 This argument is flattened if it is an array or array_like.

973 See notes for behavior with non-array-like parameters.

974 assume_unique : bool, optional

975 If True, the input arrays are both assumed to be unique, which

976 can speed up the calculation. Default is False.

977 invert : bool, optional

978 If True, the values in the returned array are inverted, as if

979 calculating `element not in test_elements`. Default is False.

980 ``np.isin(a, b, invert=True)`` is equivalent to (but faster

981 than) ``np.invert(np.isin(a, b))``.

982 kind : {None, 'sort', 'table'}, optional

983 The algorithm to use. This will not affect the final result,

984 but will affect the speed and memory use. The default, None,

985 will select automatically based on memory considerations.

986

987 * If 'sort', will use a mergesort-based approach. This will have

988 a memory usage of roughly 6 times the sum of the sizes of

989 `element` and `test_elements`, not accounting for size of dtypes.

990 * If 'table', will use a lookup table approach similar

991 to a counting sort. This is only available for boolean and

992 integer arrays. This will have a memory usage of the

993 size of `element` plus the max-min value of `test_elements`.

994 `assume_unique` has no effect when the 'table' option is used.

995 * If None, will automatically choose 'table' if

996 the required memory allocation is less than or equal to

997 6 times the sum of the sizes of `element` and `test_elements`,

998 otherwise will use 'sort'. This is done to not use

999 a large amount of memory by default, even though

1000 'table' may be faster in most cases. If 'table' is chosen,

1001 `assume_unique` will have no effect.

1002

1003

1004 Returns

1005 -------

1006 isin : ndarray, bool

1007 Has the same shape as `element`. The values `element[isin]`

1008 are in `test_elements`.

1009

1010 Notes

1011 -----

1012 `isin` is an element-wise function version of the python keyword `in`.

1013 ``isin(a, b)`` is roughly equivalent to

1014 ``np.array([item in b for item in a])`` if `a` and `b` are 1-D sequences.

1015

1016 `element` and `test_elements` are converted to arrays if they are not

1017 already. If `test_elements` is a set (or other non-sequence collection)

1018 it will be converted to an object array with one element, rather than an

1019 array of the values contained in `test_elements`. This is a consequence

1020 of the `array` constructor's way of handling non-sequence collections.

1021 Converting the set to a list usually gives the desired behavior.

1022

1023 Using ``kind='table'`` tends to be faster than `kind='sort'` if the

1024 following relationship is true:

1025 ``log10(len(test_elements)) >

1026 (log10(max(test_elements)-min(test_elements)) - 2.27) / 0.927``,

1027 but may use greater memory. The default value for `kind` will

1028 be automatically selected based only on memory usage, so one may

1029 manually set ``kind='table'`` if memory constraints can be relaxed.

1030

1031 Examples

1032 --------

1033 >>> import numpy as np

1034 >>> element = 2*np.arange(4).reshape((2, 2))

1035 >>> element

1036 array([[0, 2],

1037 [4, 6]])

1038 >>> test_elements = [1, 2, 4, 8]

1039 >>> mask = np.isin(element, test_elements)

1040 >>> mask

1041 array([[False, True],

1042 [ True, False]])

1043 >>> element[mask]

1044 array([2, 4])

1045

1046 The indices of the matched values can be obtained with `nonzero`:

1047

1048 >>> np.nonzero(mask)

1049 (array([0, 1]), array([1, 0]))

1050

1051 The test can also be inverted:

1052

1053 >>> mask = np.isin(element, test_elements, invert=True)

1054 >>> mask

1055 array([[ True, False],

1056 [False, True]])

1057 >>> element[mask]

1058 array([0, 6])

1059

1060 Because of how `array` handles sets, the following does not

1061 work as expected:

1062

1063 >>> test_set = {1, 2, 4, 8}

1064 >>> np.isin(element, test_set)

1065 array([[False, False],

1066 [False, False]])

1067

1068 Casting the set to a list gives the expected result:

1069

1070 >>> np.isin(element, list(test_set))

1071 array([[False, True],

1072 [ True, False]])

1073 """

1074 element = np.asarray(element)

1075 return _isin(element, test_elements, assume_unique=assume_unique,

1076 invert=invert, kind=kind).reshape(element.shape)

1077

1078

1079def _union1d_dispatcher(ar1, ar2):

1080 return (ar1, ar2)

1081

1082

1083@array_function_dispatch(_union1d_dispatcher)

1084def union1d(ar1, ar2):

1085 """

1086 Find the union of two arrays.

1087

1088 Return the unique, sorted array of values that are in either of the two

1089 input arrays.

1090

1091 Parameters

1092 ----------

1093 ar1, ar2 : array_like

1094 Input arrays. They are flattened if they are not already 1D.

1095

1096 Returns

1097 -------

1098 union1d : ndarray

1099 Unique, sorted union of the input arrays.

1100

1101 Examples

1102 --------

1103 >>> import numpy as np

1104 >>> np.union1d([-1, 0, 1], [-2, 0, 2])

1105 array([-2, -1, 0, 1, 2])

1106

1107 To find the union of more than two arrays, use functools.reduce:

1108

1109 >>> from functools import reduce

1110 >>> reduce(np.union1d, ([1, 3, 4, 3], [3, 1, 2, 1], [6, 3, 4, 2]))

1111 array([1, 2, 3, 4, 6])

1112 """

1113 return unique(np.concatenate((ar1, ar2), axis=None))

1114

1115

1116def _setdiff1d_dispatcher(ar1, ar2, assume_unique=None):

1117 return (ar1, ar2)

1118

1119

1120@array_function_dispatch(_setdiff1d_dispatcher)

1121def setdiff1d(ar1, ar2, assume_unique=False):

1122 """

1123 Find the set difference of two arrays.

1124

1125 Return the unique values in `ar1` that are not in `ar2`.

1126

1127 Parameters

1128 ----------

1129 ar1 : array_like

1130 Input array.

1131 ar2 : array_like

1132 Input comparison array.

1133 assume_unique : bool

1134 If True, the input arrays are both assumed to be unique, which

1135 can speed up the calculation. Default is False.

1136

1137 Returns

1138 -------

1139 setdiff1d : ndarray

1140 1D array of values in `ar1` that are not in `ar2`. The result

1141 is sorted when `assume_unique=False`, but otherwise only sorted

1142 if the input is sorted.

1143

1144 Examples

1145 --------

1146 >>> import numpy as np

1147 >>> a = np.array([1, 2, 3, 2, 4, 1])

1148 >>> b = np.array([3, 4, 5, 6])

1149 >>> np.setdiff1d(a, b)

1150 array([1, 2])

1151

1152 """

1153 if assume_unique:

1154 ar1 = np.asarray(ar1).ravel()

1155 else:

1156 ar1 = unique(ar1)

1157 ar2 = unique(ar2)

1158 return ar1[_isin(ar1, ar2, assume_unique=True, invert=True)]

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/numpy/lib/_arraysetops_impl.py: 19%

289 statements