1"""
2Low-dependency indexing utilities.
3"""
4from __future__ import annotations
5
6from typing import (
7 TYPE_CHECKING,
8 Any,
9)
10
11import numpy as np
12
13from pandas._libs import lib
14
15from pandas.core.dtypes.common import (
16 is_array_like,
17 is_bool_dtype,
18 is_integer,
19 is_integer_dtype,
20 is_list_like,
21)
22from pandas.core.dtypes.dtypes import ExtensionDtype
23from pandas.core.dtypes.generic import (
24 ABCIndex,
25 ABCSeries,
26)
27
28if TYPE_CHECKING:
29 from pandas._typing import AnyArrayLike
30
31 from pandas.core.frame import DataFrame
32 from pandas.core.indexes.base import Index
33
34# -----------------------------------------------------------
35# Indexer Identification
36
37
38def is_valid_positional_slice(slc: slice) -> bool:
39 """
40 Check if a slice object can be interpreted as a positional indexer.
41
42 Parameters
43 ----------
44 slc : slice
45
46 Returns
47 -------
48 bool
49
50 Notes
51 -----
52 A valid positional slice may also be interpreted as a label-based slice
53 depending on the index being sliced.
54 """
55 return (
56 lib.is_int_or_none(slc.start)
57 and lib.is_int_or_none(slc.stop)
58 and lib.is_int_or_none(slc.step)
59 )
60
61
62def is_list_like_indexer(key) -> bool:
63 """
64 Check if we have a list-like indexer that is *not* a NamedTuple.
65
66 Parameters
67 ----------
68 key : object
69
70 Returns
71 -------
72 bool
73 """
74 # allow a list_like, but exclude NamedTuples which can be indexers
75 return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple)
76
77
78def is_scalar_indexer(indexer, ndim: int) -> bool:
79 """
80 Return True if we are all scalar indexers.
81
82 Parameters
83 ----------
84 indexer : object
85 ndim : int
86 Number of dimensions in the object being indexed.
87
88 Returns
89 -------
90 bool
91 """
92 if ndim == 1 and is_integer(indexer):
93 # GH37748: allow indexer to be an integer for Series
94 return True
95 if isinstance(indexer, tuple) and len(indexer) == ndim:
96 return all(is_integer(x) for x in indexer)
97 return False
98
99
100def is_empty_indexer(indexer) -> bool:
101 """
102 Check if we have an empty indexer.
103
104 Parameters
105 ----------
106 indexer : object
107
108 Returns
109 -------
110 bool
111 """
112 if is_list_like(indexer) and not len(indexer):
113 return True
114 if not isinstance(indexer, tuple):
115 indexer = (indexer,)
116 return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)
117
118
119# -----------------------------------------------------------
120# Indexer Validation
121
122
123def check_setitem_lengths(indexer, value, values) -> bool:
124 """
125 Validate that value and indexer are the same length.
126
127 An special-case is allowed for when the indexer is a boolean array
128 and the number of true values equals the length of ``value``. In
129 this case, no exception is raised.
130
131 Parameters
132 ----------
133 indexer : sequence
134 Key for the setitem.
135 value : array-like
136 Value for the setitem.
137 values : array-like
138 Values being set into.
139
140 Returns
141 -------
142 bool
143 Whether this is an empty listlike setting which is a no-op.
144
145 Raises
146 ------
147 ValueError
148 When the indexer is an ndarray or list and the lengths don't match.
149 """
150 no_op = False
151
152 if isinstance(indexer, (np.ndarray, list)):
153 # We can ignore other listlikes because they are either
154 # a) not necessarily 1-D indexers, e.g. tuple
155 # b) boolean indexers e.g. BoolArray
156 if is_list_like(value):
157 if len(indexer) != len(value) and values.ndim == 1:
158 # boolean with truth values == len of the value is ok too
159 if isinstance(indexer, list):
160 indexer = np.array(indexer)
161 if not (
162 isinstance(indexer, np.ndarray)
163 and indexer.dtype == np.bool_
164 and indexer.sum() == len(value)
165 ):
166 raise ValueError(
167 "cannot set using a list-like indexer "
168 "with a different length than the value"
169 )
170 if not len(indexer):
171 no_op = True
172
173 elif isinstance(indexer, slice):
174 if is_list_like(value):
175 if len(value) != length_of_indexer(indexer, values) and values.ndim == 1:
176 # In case of two dimensional value is used row-wise and broadcasted
177 raise ValueError(
178 "cannot set using a slice indexer with a "
179 "different length than the value"
180 )
181 if not len(value):
182 no_op = True
183
184 return no_op
185
186
187def validate_indices(indices: np.ndarray, n: int) -> None:
188 """
189 Perform bounds-checking for an indexer.
190
191 -1 is allowed for indicating missing values.
192
193 Parameters
194 ----------
195 indices : ndarray
196 n : int
197 Length of the array being indexed.
198
199 Raises
200 ------
201 ValueError
202
203 Examples
204 --------
205 >>> validate_indices(np.array([1, 2]), 3) # OK
206
207 >>> validate_indices(np.array([1, -2]), 3)
208 Traceback (most recent call last):
209 ...
210 ValueError: negative dimensions are not allowed
211
212 >>> validate_indices(np.array([1, 2, 3]), 3)
213 Traceback (most recent call last):
214 ...
215 IndexError: indices are out-of-bounds
216
217 >>> validate_indices(np.array([-1, -1]), 0) # OK
218
219 >>> validate_indices(np.array([0, 1]), 0)
220 Traceback (most recent call last):
221 ...
222 IndexError: indices are out-of-bounds
223 """
224 if len(indices):
225 min_idx = indices.min()
226 if min_idx < -1:
227 msg = f"'indices' contains values less than allowed ({min_idx} < -1)"
228 raise ValueError(msg)
229
230 max_idx = indices.max()
231 if max_idx >= n:
232 raise IndexError("indices are out-of-bounds")
233
234
235# -----------------------------------------------------------
236# Indexer Conversion
237
238
239def maybe_convert_indices(indices, n: int, verify: bool = True) -> np.ndarray:
240 """
241 Attempt to convert indices into valid, positive indices.
242
243 If we have negative indices, translate to positive here.
244 If we have indices that are out-of-bounds, raise an IndexError.
245
246 Parameters
247 ----------
248 indices : array-like
249 Array of indices that we are to convert.
250 n : int
251 Number of elements in the array that we are indexing.
252 verify : bool, default True
253 Check that all entries are between 0 and n - 1, inclusive.
254
255 Returns
256 -------
257 array-like
258 An array-like of positive indices that correspond to the ones
259 that were passed in initially to this function.
260
261 Raises
262 ------
263 IndexError
264 One of the converted indices either exceeded the number of,
265 elements (specified by `n`), or was still negative.
266 """
267 if isinstance(indices, list):
268 indices = np.array(indices)
269 if len(indices) == 0:
270 # If `indices` is empty, np.array will return a float,
271 # and will cause indexing errors.
272 return np.empty(0, dtype=np.intp)
273
274 mask = indices < 0
275 if mask.any():
276 indices = indices.copy()
277 indices[mask] += n
278
279 if verify:
280 mask = (indices >= n) | (indices < 0)
281 if mask.any():
282 raise IndexError("indices are out-of-bounds")
283 return indices
284
285
286# -----------------------------------------------------------
287# Unsorted
288
289
290def length_of_indexer(indexer, target=None) -> int:
291 """
292 Return the expected length of target[indexer]
293
294 Returns
295 -------
296 int
297 """
298 if target is not None and isinstance(indexer, slice):
299 target_len = len(target)
300 start = indexer.start
301 stop = indexer.stop
302 step = indexer.step
303 if start is None:
304 start = 0
305 elif start < 0:
306 start += target_len
307 if stop is None or stop > target_len:
308 stop = target_len
309 elif stop < 0:
310 stop += target_len
311 if step is None:
312 step = 1
313 elif step < 0:
314 start, stop = stop + 1, start + 1
315 step = -step
316 return (stop - start + step - 1) // step
317 elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)):
318 if isinstance(indexer, list):
319 indexer = np.array(indexer)
320
321 if indexer.dtype == bool:
322 # GH#25774
323 return indexer.sum()
324 return len(indexer)
325 elif isinstance(indexer, range):
326 return (indexer.stop - indexer.start) // indexer.step
327 elif not is_list_like_indexer(indexer):
328 return 1
329 raise AssertionError("cannot find the length of the indexer")
330
331
332def disallow_ndim_indexing(result) -> None:
333 """
334 Helper function to disallow multi-dimensional indexing on 1D Series/Index.
335
336 GH#27125 indexer like idx[:, None] expands dim, but we cannot do that
337 and keep an index, so we used to return ndarray, which was deprecated
338 in GH#30588.
339 """
340 if np.ndim(result) > 1:
341 raise ValueError(
342 "Multi-dimensional indexing (e.g. `obj[:, None]`) is no longer "
343 "supported. Convert to a numpy array before indexing instead."
344 )
345
346
347def unpack_1tuple(tup):
348 """
349 If we have a length-1 tuple/list that contains a slice, unpack to just
350 the slice.
351
352 Notes
353 -----
354 The list case is deprecated.
355 """
356 if len(tup) == 1 and isinstance(tup[0], slice):
357 # if we don't have a MultiIndex, we may still be able to handle
358 # a 1-tuple. see test_1tuple_without_multiindex
359
360 if isinstance(tup, list):
361 # GH#31299
362 raise ValueError(
363 "Indexing with a single-item list containing a "
364 "slice is not allowed. Pass a tuple instead.",
365 )
366
367 return tup[0]
368 return tup
369
370
371def check_key_length(columns: Index, key, value: DataFrame) -> None:
372 """
373 Checks if a key used as indexer has the same length as the columns it is
374 associated with.
375
376 Parameters
377 ----------
378 columns : Index The columns of the DataFrame to index.
379 key : A list-like of keys to index with.
380 value : DataFrame The value to set for the keys.
381
382 Raises
383 ------
384 ValueError: If the length of key is not equal to the number of columns in value
385 or if the number of columns referenced by key is not equal to number
386 of columns.
387 """
388 if columns.is_unique:
389 if len(value.columns) != len(key):
390 raise ValueError("Columns must be same length as key")
391 else:
392 # Missing keys in columns are represented as -1
393 if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns):
394 raise ValueError("Columns must be same length as key")
395
396
397def unpack_tuple_and_ellipses(item: tuple):
398 """
399 Possibly unpack arr[..., n] to arr[n]
400 """
401 if len(item) > 1:
402 # Note: we are assuming this indexing is being done on a 1D arraylike
403 if item[0] is Ellipsis:
404 item = item[1:]
405 elif item[-1] is Ellipsis:
406 item = item[:-1]
407
408 if len(item) > 1:
409 raise IndexError("too many indices for array.")
410
411 item = item[0]
412 return item
413
414
415# -----------------------------------------------------------
416# Public indexer validation
417
418
419def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any:
420 """
421 Check if `indexer` is a valid array indexer for `array`.
422
423 For a boolean mask, `array` and `indexer` are checked to have the same
424 length. The dtype is validated, and if it is an integer or boolean
425 ExtensionArray, it is checked if there are missing values present, and
426 it is converted to the appropriate numpy array. Other dtypes will raise
427 an error.
428
429 Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed
430 through as is.
431
432 Parameters
433 ----------
434 array : array-like
435 The array that is being indexed (only used for the length).
436 indexer : array-like or list-like
437 The array-like that's used to index. List-like input that is not yet
438 a numpy array or an ExtensionArray is converted to one. Other input
439 types are passed through as is.
440
441 Returns
442 -------
443 numpy.ndarray
444 The validated indexer as a numpy array that can be used to index.
445
446 Raises
447 ------
448 IndexError
449 When the lengths don't match.
450 ValueError
451 When `indexer` cannot be converted to a numpy ndarray to index
452 (e.g. presence of missing values).
453
454 See Also
455 --------
456 api.types.is_bool_dtype : Check if `key` is of boolean dtype.
457
458 Examples
459 --------
460 When checking a boolean mask, a boolean ndarray is returned when the
461 arguments are all valid.
462
463 >>> mask = pd.array([True, False])
464 >>> arr = pd.array([1, 2])
465 >>> pd.api.indexers.check_array_indexer(arr, mask)
466 array([ True, False])
467
468 An IndexError is raised when the lengths don't match.
469
470 >>> mask = pd.array([True, False, True])
471 >>> pd.api.indexers.check_array_indexer(arr, mask)
472 Traceback (most recent call last):
473 ...
474 IndexError: Boolean index has wrong length: 3 instead of 2.
475
476 NA values in a boolean array are treated as False.
477
478 >>> mask = pd.array([True, pd.NA])
479 >>> pd.api.indexers.check_array_indexer(arr, mask)
480 array([ True, False])
481
482 A numpy boolean mask will get passed through (if the length is correct):
483
484 >>> mask = np.array([True, False])
485 >>> pd.api.indexers.check_array_indexer(arr, mask)
486 array([ True, False])
487
488 Similarly for integer indexers, an integer ndarray is returned when it is
489 a valid indexer, otherwise an error is (for integer indexers, a matching
490 length is not required):
491
492 >>> indexer = pd.array([0, 2], dtype="Int64")
493 >>> arr = pd.array([1, 2, 3])
494 >>> pd.api.indexers.check_array_indexer(arr, indexer)
495 array([0, 2])
496
497 >>> indexer = pd.array([0, pd.NA], dtype="Int64")
498 >>> pd.api.indexers.check_array_indexer(arr, indexer)
499 Traceback (most recent call last):
500 ...
501 ValueError: Cannot index with an integer indexer containing NA values
502
503 For non-integer/boolean dtypes, an appropriate error is raised:
504
505 >>> indexer = np.array([0., 2.], dtype="float64")
506 >>> pd.api.indexers.check_array_indexer(arr, indexer)
507 Traceback (most recent call last):
508 ...
509 IndexError: arrays used as indices must be of integer or boolean type
510 """
511 from pandas.core.construction import array as pd_array
512
513 # whatever is not an array-like is returned as-is (possible valid array
514 # indexers that are not array-like: integer, slice, Ellipsis, None)
515 # In this context, tuples are not considered as array-like, as they have
516 # a specific meaning in indexing (multi-dimensional indexing)
517 if is_list_like(indexer):
518 if isinstance(indexer, tuple):
519 return indexer
520 else:
521 return indexer
522
523 # convert list-likes to array
524 if not is_array_like(indexer):
525 indexer = pd_array(indexer)
526 if len(indexer) == 0:
527 # empty list is converted to float array by pd.array
528 indexer = np.array([], dtype=np.intp)
529
530 dtype = indexer.dtype
531 if is_bool_dtype(dtype):
532 if isinstance(dtype, ExtensionDtype):
533 indexer = indexer.to_numpy(dtype=bool, na_value=False)
534 else:
535 indexer = np.asarray(indexer, dtype=bool)
536
537 # GH26658
538 if len(indexer) != len(array):
539 raise IndexError(
540 f"Boolean index has wrong length: "
541 f"{len(indexer)} instead of {len(array)}"
542 )
543 elif is_integer_dtype(dtype):
544 try:
545 indexer = np.asarray(indexer, dtype=np.intp)
546 except ValueError as err:
547 raise ValueError(
548 "Cannot index with an integer indexer containing NA values"
549 ) from err
550 else:
551 raise IndexError("arrays used as indices must be of integer or boolean type")
552
553 return indexer