Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexers/utils.py: 26%

1"""

2Low-dependency indexing utilities.

3"""

4from __future__ import annotations

6from typing import (

7 TYPE_CHECKING,

8 Any,

11import numpy as np

13from pandas._libs import lib

15from pandas.core.dtypes.common import (

16 is_array_like,

17 is_bool_dtype,

18 is_integer,

19 is_integer_dtype,

20 is_list_like,

21)

22from pandas.core.dtypes.dtypes import ExtensionDtype

23from pandas.core.dtypes.generic import (

24 ABCIndex,

25 ABCSeries,

26)

28if TYPE_CHECKING:

29 from pandas._typing import AnyArrayLike

31 from pandas.core.frame import DataFrame

32 from pandas.core.indexes.base import Index

34# -----------------------------------------------------------

35# Indexer Identification

38def is_valid_positional_slice(slc: slice) -> bool:

39 """

40 Check if a slice object can be interpreted as a positional indexer.

42 Parameters

43 ----------

44 slc : slice

46 Returns

47 -------

48 bool

50 Notes

51 -----

52 A valid positional slice may also be interpreted as a label-based slice

53 depending on the index being sliced.

54 """

55 return (

56 lib.is_int_or_none(slc.start)

57 and lib.is_int_or_none(slc.stop)

58 and lib.is_int_or_none(slc.step)

59 )

62def is_list_like_indexer(key) -> bool:

63 """

64 Check if we have a list-like indexer that is *not* a NamedTuple.

66 Parameters

67 ----------

68 key : object

70 Returns

71 -------

72 bool

73 """

74 # allow a list_like, but exclude NamedTuples which can be indexers

75 return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple)

78def is_scalar_indexer(indexer, ndim: int) -> bool:

79 """

80 Return True if we are all scalar indexers.

82 Parameters

83 ----------

84 indexer : object

85 ndim : int

86 Number of dimensions in the object being indexed.

88 Returns

89 -------

90 bool

91 """

92 if ndim == 1 and is_integer(indexer):

93 # GH37748: allow indexer to be an integer for Series

94 return True

95 if isinstance(indexer, tuple) and len(indexer) == ndim:

96 return all(is_integer(x) for x in indexer)

97 return False

100def is_empty_indexer(indexer) -> bool:

101 """

102 Check if we have an empty indexer.

103

104 Parameters

105 ----------

106 indexer : object

107

108 Returns

109 -------

110 bool

111 """

112 if is_list_like(indexer) and not len(indexer):

113 return True

114 if not isinstance(indexer, tuple):

115 indexer = (indexer,)

116 return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)

117

118

119# -----------------------------------------------------------

120# Indexer Validation

121

122

123def check_setitem_lengths(indexer, value, values) -> bool:

124 """

125 Validate that value and indexer are the same length.

126

127 An special-case is allowed for when the indexer is a boolean array

128 and the number of true values equals the length of ``value``. In

129 this case, no exception is raised.

130

131 Parameters

132 ----------

133 indexer : sequence

134 Key for the setitem.

135 value : array-like

136 Value for the setitem.

137 values : array-like

138 Values being set into.

139

140 Returns

141 -------

142 bool

143 Whether this is an empty listlike setting which is a no-op.

144

145 Raises

146 ------

147 ValueError

148 When the indexer is an ndarray or list and the lengths don't match.

149 """

150 no_op = False

151

152 if isinstance(indexer, (np.ndarray, list)):

153 # We can ignore other listlikes because they are either

154 # a) not necessarily 1-D indexers, e.g. tuple

155 # b) boolean indexers e.g. BoolArray

156 if is_list_like(value):

157 if len(indexer) != len(value) and values.ndim == 1:

158 # boolean with truth values == len of the value is ok too

159 if isinstance(indexer, list):

160 indexer = np.array(indexer)

161 if not (

162 isinstance(indexer, np.ndarray)

163 and indexer.dtype == np.bool_

164 and indexer.sum() == len(value)

165 ):

166 raise ValueError(

167 "cannot set using a list-like indexer "

168 "with a different length than the value"

169 )

170 if not len(indexer):

171 no_op = True

172

173 elif isinstance(indexer, slice):

174 if is_list_like(value):

175 if len(value) != length_of_indexer(indexer, values) and values.ndim == 1:

176 # In case of two dimensional value is used row-wise and broadcasted

177 raise ValueError(

178 "cannot set using a slice indexer with a "

179 "different length than the value"

180 )

181 if not len(value):

182 no_op = True

183

184 return no_op

185

186

187def validate_indices(indices: np.ndarray, n: int) -> None:

188 """

189 Perform bounds-checking for an indexer.

190

191 -1 is allowed for indicating missing values.

192

193 Parameters

194 ----------

195 indices : ndarray

196 n : int

197 Length of the array being indexed.

198

199 Raises

200 ------

201 ValueError

202

203 Examples

204 --------

205 >>> validate_indices(np.array([1, 2]), 3) # OK

206

207 >>> validate_indices(np.array([1, -2]), 3)

208 Traceback (most recent call last):

209 ...

210 ValueError: negative dimensions are not allowed

211

212 >>> validate_indices(np.array([1, 2, 3]), 3)

213 Traceback (most recent call last):

214 ...

215 IndexError: indices are out-of-bounds

216

217 >>> validate_indices(np.array([-1, -1]), 0) # OK

218

219 >>> validate_indices(np.array([0, 1]), 0)

220 Traceback (most recent call last):

221 ...

222 IndexError: indices are out-of-bounds

223 """

224 if len(indices):

225 min_idx = indices.min()

226 if min_idx < -1:

227 msg = f"'indices' contains values less than allowed ({min_idx} < -1)"

228 raise ValueError(msg)

229

230 max_idx = indices.max()

231 if max_idx >= n:

232 raise IndexError("indices are out-of-bounds")

233

234

235# -----------------------------------------------------------

236# Indexer Conversion

237

238

239def maybe_convert_indices(indices, n: int, verify: bool = True) -> np.ndarray:

240 """

241 Attempt to convert indices into valid, positive indices.

242

243 If we have negative indices, translate to positive here.

244 If we have indices that are out-of-bounds, raise an IndexError.

245

246 Parameters

247 ----------

248 indices : array-like

249 Array of indices that we are to convert.

250 n : int

251 Number of elements in the array that we are indexing.

252 verify : bool, default True

253 Check that all entries are between 0 and n - 1, inclusive.

254

255 Returns

256 -------

257 array-like

258 An array-like of positive indices that correspond to the ones

259 that were passed in initially to this function.

260

261 Raises

262 ------

263 IndexError

264 One of the converted indices either exceeded the number of,

265 elements (specified by `n`), or was still negative.

266 """

267 if isinstance(indices, list):

268 indices = np.array(indices)

269 if len(indices) == 0:

270 # If `indices` is empty, np.array will return a float,

271 # and will cause indexing errors.

272 return np.empty(0, dtype=np.intp)

273

274 mask = indices < 0

275 if mask.any():

276 indices = indices.copy()

277 indices[mask] += n

278

279 if verify:

280 mask = (indices >= n) | (indices < 0)

281 if mask.any():

282 raise IndexError("indices are out-of-bounds")

283 return indices

284

285

286# -----------------------------------------------------------

287# Unsorted

288

289

290def length_of_indexer(indexer, target=None) -> int:

291 """

292 Return the expected length of target[indexer]

293

294 Returns

295 -------

296 int

297 """

298 if target is not None and isinstance(indexer, slice):

299 target_len = len(target)

300 start = indexer.start

301 stop = indexer.stop

302 step = indexer.step

303 if start is None:

304 start = 0

305 elif start < 0:

306 start += target_len

307 if stop is None or stop > target_len:

308 stop = target_len

309 elif stop < 0:

310 stop += target_len

311 if step is None:

312 step = 1

313 elif step < 0:

314 start, stop = stop + 1, start + 1

315 step = -step

316 return (stop - start + step - 1) // step

317 elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)):

318 if isinstance(indexer, list):

319 indexer = np.array(indexer)

320

321 if indexer.dtype == bool:

322 # GH#25774

323 return indexer.sum()

324 return len(indexer)

325 elif isinstance(indexer, range):

326 return (indexer.stop - indexer.start) // indexer.step

327 elif not is_list_like_indexer(indexer):

328 return 1

329 raise AssertionError("cannot find the length of the indexer")

330

331

332def disallow_ndim_indexing(result) -> None:

333 """

334 Helper function to disallow multi-dimensional indexing on 1D Series/Index.

335

336 GH#27125 indexer like idx[:, None] expands dim, but we cannot do that

337 and keep an index, so we used to return ndarray, which was deprecated

338 in GH#30588.

339 """

340 if np.ndim(result) > 1:

341 raise ValueError(

342 "Multi-dimensional indexing (e.g. `obj[:, None]`) is no longer "

343 "supported. Convert to a numpy array before indexing instead."

344 )

345

346

347def unpack_1tuple(tup):

348 """

349 If we have a length-1 tuple/list that contains a slice, unpack to just

350 the slice.

351

352 Notes

353 -----

354 The list case is deprecated.

355 """

356 if len(tup) == 1 and isinstance(tup[0], slice):

357 # if we don't have a MultiIndex, we may still be able to handle

358 # a 1-tuple. see test_1tuple_without_multiindex

359

360 if isinstance(tup, list):

361 # GH#31299

362 raise ValueError(

363 "Indexing with a single-item list containing a "

364 "slice is not allowed. Pass a tuple instead.",

365 )

366

367 return tup[0]

368 return tup

369

370

371def check_key_length(columns: Index, key, value: DataFrame) -> None:

372 """

373 Checks if a key used as indexer has the same length as the columns it is

374 associated with.

375

376 Parameters

377 ----------

378 columns : Index The columns of the DataFrame to index.

379 key : A list-like of keys to index with.

380 value : DataFrame The value to set for the keys.

381

382 Raises

383 ------

384 ValueError: If the length of key is not equal to the number of columns in value

385 or if the number of columns referenced by key is not equal to number

386 of columns.

387 """

388 if columns.is_unique:

389 if len(value.columns) != len(key):

390 raise ValueError("Columns must be same length as key")

391 else:

392 # Missing keys in columns are represented as -1

393 if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns):

394 raise ValueError("Columns must be same length as key")

395

396

397def unpack_tuple_and_ellipses(item: tuple):

398 """

399 Possibly unpack arr[..., n] to arr[n]

400 """

401 if len(item) > 1:

402 # Note: we are assuming this indexing is being done on a 1D arraylike

403 if item[0] is Ellipsis:

404 item = item[1:]

405 elif item[-1] is Ellipsis:

406 item = item[:-1]

407

408 if len(item) > 1:

409 raise IndexError("too many indices for array.")

410

411 item = item[0]

412 return item

413

414

415# -----------------------------------------------------------

416# Public indexer validation

417

418

419def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any:

420 """

421 Check if `indexer` is a valid array indexer for `array`.

422

423 For a boolean mask, `array` and `indexer` are checked to have the same

424 length. The dtype is validated, and if it is an integer or boolean

425 ExtensionArray, it is checked if there are missing values present, and

426 it is converted to the appropriate numpy array. Other dtypes will raise

427 an error.

428

429 Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed

430 through as is.

431

432 Parameters

433 ----------

434 array : array-like

435 The array that is being indexed (only used for the length).

436 indexer : array-like or list-like

437 The array-like that's used to index. List-like input that is not yet

438 a numpy array or an ExtensionArray is converted to one. Other input

439 types are passed through as is.

440

441 Returns

442 -------

443 numpy.ndarray

444 The validated indexer as a numpy array that can be used to index.

445

446 Raises

447 ------

448 IndexError

449 When the lengths don't match.

450 ValueError

451 When `indexer` cannot be converted to a numpy ndarray to index

452 (e.g. presence of missing values).

453

454 See Also

455 --------

456 api.types.is_bool_dtype : Check if `key` is of boolean dtype.

457

458 Examples

459 --------

460 When checking a boolean mask, a boolean ndarray is returned when the

461 arguments are all valid.

462

463 >>> mask = pd.array([True, False])

464 >>> arr = pd.array([1, 2])

465 >>> pd.api.indexers.check_array_indexer(arr, mask)

466 array([ True, False])

467

468 An IndexError is raised when the lengths don't match.

469

470 >>> mask = pd.array([True, False, True])

471 >>> pd.api.indexers.check_array_indexer(arr, mask)

472 Traceback (most recent call last):

473 ...

474 IndexError: Boolean index has wrong length: 3 instead of 2.

475

476 NA values in a boolean array are treated as False.

477

478 >>> mask = pd.array([True, pd.NA])

479 >>> pd.api.indexers.check_array_indexer(arr, mask)

480 array([ True, False])

481

482 A numpy boolean mask will get passed through (if the length is correct):

483

484 >>> mask = np.array([True, False])

485 >>> pd.api.indexers.check_array_indexer(arr, mask)

486 array([ True, False])

487

488 Similarly for integer indexers, an integer ndarray is returned when it is

489 a valid indexer, otherwise an error is (for integer indexers, a matching

490 length is not required):

491

492 >>> indexer = pd.array([0, 2], dtype="Int64")

493 >>> arr = pd.array([1, 2, 3])

494 >>> pd.api.indexers.check_array_indexer(arr, indexer)

495 array([0, 2])

496

497 >>> indexer = pd.array([0, pd.NA], dtype="Int64")

498 >>> pd.api.indexers.check_array_indexer(arr, indexer)

499 Traceback (most recent call last):

500 ...

501 ValueError: Cannot index with an integer indexer containing NA values

502

503 For non-integer/boolean dtypes, an appropriate error is raised:

504

505 >>> indexer = np.array([0., 2.], dtype="float64")

506 >>> pd.api.indexers.check_array_indexer(arr, indexer)

507 Traceback (most recent call last):

508 ...

509 IndexError: arrays used as indices must be of integer or boolean type

510 """

511 from pandas.core.construction import array as pd_array

512

513 # whatever is not an array-like is returned as-is (possible valid array

514 # indexers that are not array-like: integer, slice, Ellipsis, None)

515 # In this context, tuples are not considered as array-like, as they have

516 # a specific meaning in indexing (multi-dimensional indexing)

517 if is_list_like(indexer):

518 if isinstance(indexer, tuple):

519 return indexer

520 else:

521 return indexer

522

523 # convert list-likes to array

524 if not is_array_like(indexer):

525 indexer = pd_array(indexer)

526 if len(indexer) == 0:

527 # empty list is converted to float array by pd.array

528 indexer = np.array([], dtype=np.intp)

529

530 dtype = indexer.dtype

531 if is_bool_dtype(dtype):

532 if isinstance(dtype, ExtensionDtype):

533 indexer = indexer.to_numpy(dtype=bool, na_value=False)

534 else:

535 indexer = np.asarray(indexer, dtype=bool)

536

537 # GH26658

538 if len(indexer) != len(array):

539 raise IndexError(

540 f"Boolean index has wrong length: "

541 f"{len(indexer)} instead of {len(array)}"

542 )

543 elif is_integer_dtype(dtype):

544 try:

545 indexer = np.asarray(indexer, dtype=np.intp)

546 except ValueError as err:

547 raise ValueError(

548 "Cannot index with an integer indexer containing NA values"

549 ) from err

550 else:

551 raise IndexError("arrays used as indices must be of integer or boolean type")

552

553 return indexer