1"""
2Low-dependency indexing utilities.
3"""
4from __future__ import annotations
5
6from typing import (
7 TYPE_CHECKING,
8 Any,
9)
10
11import numpy as np
12
13from pandas._typing import AnyArrayLike
14
15from pandas.core.dtypes.common import (
16 is_array_like,
17 is_bool_dtype,
18 is_extension_array_dtype,
19 is_integer,
20 is_integer_dtype,
21 is_list_like,
22)
23from pandas.core.dtypes.generic import (
24 ABCIndex,
25 ABCSeries,
26)
27
28if TYPE_CHECKING:
29 from pandas.core.frame import DataFrame
30 from pandas.core.indexes.base import Index
31
32# -----------------------------------------------------------
33# Indexer Identification
34
35
36def is_valid_positional_slice(slc: slice) -> bool:
37 """
38 Check if a slice object can be interpreted as a positional indexer.
39
40 Parameters
41 ----------
42 slc : slice
43
44 Returns
45 -------
46 bool
47
48 Notes
49 -----
50 A valid positional slice may also be interpreted as a label-based slice
51 depending on the index being sliced.
52 """
53
54 def is_int_or_none(val):
55 return val is None or is_integer(val)
56
57 return (
58 is_int_or_none(slc.start)
59 and is_int_or_none(slc.stop)
60 and is_int_or_none(slc.step)
61 )
62
63
64def is_list_like_indexer(key) -> bool:
65 """
66 Check if we have a list-like indexer that is *not* a NamedTuple.
67
68 Parameters
69 ----------
70 key : object
71
72 Returns
73 -------
74 bool
75 """
76 # allow a list_like, but exclude NamedTuples which can be indexers
77 return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple)
78
79
80def is_scalar_indexer(indexer, ndim: int) -> bool:
81 """
82 Return True if we are all scalar indexers.
83
84 Parameters
85 ----------
86 indexer : object
87 ndim : int
88 Number of dimensions in the object being indexed.
89
90 Returns
91 -------
92 bool
93 """
94 if ndim == 1 and is_integer(indexer):
95 # GH37748: allow indexer to be an integer for Series
96 return True
97 if isinstance(indexer, tuple) and len(indexer) == ndim:
98 return all(is_integer(x) for x in indexer)
99 return False
100
101
102def is_empty_indexer(indexer) -> bool:
103 """
104 Check if we have an empty indexer.
105
106 Parameters
107 ----------
108 indexer : object
109
110 Returns
111 -------
112 bool
113 """
114 if is_list_like(indexer) and not len(indexer):
115 return True
116 if not isinstance(indexer, tuple):
117 indexer = (indexer,)
118 return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)
119
120
121# -----------------------------------------------------------
122# Indexer Validation
123
124
125def check_setitem_lengths(indexer, value, values) -> bool:
126 """
127 Validate that value and indexer are the same length.
128
129 An special-case is allowed for when the indexer is a boolean array
130 and the number of true values equals the length of ``value``. In
131 this case, no exception is raised.
132
133 Parameters
134 ----------
135 indexer : sequence
136 Key for the setitem.
137 value : array-like
138 Value for the setitem.
139 values : array-like
140 Values being set into.
141
142 Returns
143 -------
144 bool
145 Whether this is an empty listlike setting which is a no-op.
146
147 Raises
148 ------
149 ValueError
150 When the indexer is an ndarray or list and the lengths don't match.
151 """
152 no_op = False
153
154 if isinstance(indexer, (np.ndarray, list)):
155 # We can ignore other listlikes because they are either
156 # a) not necessarily 1-D indexers, e.g. tuple
157 # b) boolean indexers e.g. BoolArray
158 if is_list_like(value):
159 if len(indexer) != len(value) and values.ndim == 1:
160 # boolean with truth values == len of the value is ok too
161 if isinstance(indexer, list):
162 indexer = np.array(indexer)
163 if not (
164 isinstance(indexer, np.ndarray)
165 and indexer.dtype == np.bool_
166 and indexer.sum() == len(value)
167 ):
168 raise ValueError(
169 "cannot set using a list-like indexer "
170 "with a different length than the value"
171 )
172 if not len(indexer):
173 no_op = True
174
175 elif isinstance(indexer, slice):
176 if is_list_like(value):
177 if len(value) != length_of_indexer(indexer, values) and values.ndim == 1:
178 # In case of two dimensional value is used row-wise and broadcasted
179 raise ValueError(
180 "cannot set using a slice indexer with a "
181 "different length than the value"
182 )
183 if not len(value):
184 no_op = True
185
186 return no_op
187
188
189def validate_indices(indices: np.ndarray, n: int) -> None:
190 """
191 Perform bounds-checking for an indexer.
192
193 -1 is allowed for indicating missing values.
194
195 Parameters
196 ----------
197 indices : ndarray
198 n : int
199 Length of the array being indexed.
200
201 Raises
202 ------
203 ValueError
204
205 Examples
206 --------
207 >>> validate_indices(np.array([1, 2]), 3) # OK
208
209 >>> validate_indices(np.array([1, -2]), 3)
210 Traceback (most recent call last):
211 ...
212 ValueError: negative dimensions are not allowed
213
214 >>> validate_indices(np.array([1, 2, 3]), 3)
215 Traceback (most recent call last):
216 ...
217 IndexError: indices are out-of-bounds
218
219 >>> validate_indices(np.array([-1, -1]), 0) # OK
220
221 >>> validate_indices(np.array([0, 1]), 0)
222 Traceback (most recent call last):
223 ...
224 IndexError: indices are out-of-bounds
225 """
226 if len(indices):
227 min_idx = indices.min()
228 if min_idx < -1:
229 msg = f"'indices' contains values less than allowed ({min_idx} < -1)"
230 raise ValueError(msg)
231
232 max_idx = indices.max()
233 if max_idx >= n:
234 raise IndexError("indices are out-of-bounds")
235
236
237# -----------------------------------------------------------
238# Indexer Conversion
239
240
241def maybe_convert_indices(indices, n: int, verify: bool = True) -> np.ndarray:
242 """
243 Attempt to convert indices into valid, positive indices.
244
245 If we have negative indices, translate to positive here.
246 If we have indices that are out-of-bounds, raise an IndexError.
247
248 Parameters
249 ----------
250 indices : array-like
251 Array of indices that we are to convert.
252 n : int
253 Number of elements in the array that we are indexing.
254 verify : bool, default True
255 Check that all entries are between 0 and n - 1, inclusive.
256
257 Returns
258 -------
259 array-like
260 An array-like of positive indices that correspond to the ones
261 that were passed in initially to this function.
262
263 Raises
264 ------
265 IndexError
266 One of the converted indices either exceeded the number of,
267 elements (specified by `n`), or was still negative.
268 """
269 if isinstance(indices, list):
270 indices = np.array(indices)
271 if len(indices) == 0:
272 # If `indices` is empty, np.array will return a float,
273 # and will cause indexing errors.
274 return np.empty(0, dtype=np.intp)
275
276 mask = indices < 0
277 if mask.any():
278 indices = indices.copy()
279 indices[mask] += n
280
281 if verify:
282 mask = (indices >= n) | (indices < 0)
283 if mask.any():
284 raise IndexError("indices are out-of-bounds")
285 return indices
286
287
288# -----------------------------------------------------------
289# Unsorted
290
291
292def length_of_indexer(indexer, target=None) -> int:
293 """
294 Return the expected length of target[indexer]
295
296 Returns
297 -------
298 int
299 """
300 if target is not None and isinstance(indexer, slice):
301 target_len = len(target)
302 start = indexer.start
303 stop = indexer.stop
304 step = indexer.step
305 if start is None:
306 start = 0
307 elif start < 0:
308 start += target_len
309 if stop is None or stop > target_len:
310 stop = target_len
311 elif stop < 0:
312 stop += target_len
313 if step is None:
314 step = 1
315 elif step < 0:
316 start, stop = stop + 1, start + 1
317 step = -step
318 return (stop - start + step - 1) // step
319 elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)):
320 if isinstance(indexer, list):
321 indexer = np.array(indexer)
322
323 if indexer.dtype == bool:
324 # GH#25774
325 return indexer.sum()
326 return len(indexer)
327 elif isinstance(indexer, range):
328 return (indexer.stop - indexer.start) // indexer.step
329 elif not is_list_like_indexer(indexer):
330 return 1
331 raise AssertionError("cannot find the length of the indexer")
332
333
334def disallow_ndim_indexing(result) -> None:
335 """
336 Helper function to disallow multi-dimensional indexing on 1D Series/Index.
337
338 GH#27125 indexer like idx[:, None] expands dim, but we cannot do that
339 and keep an index, so we used to return ndarray, which was deprecated
340 in GH#30588.
341 """
342 if np.ndim(result) > 1:
343 raise ValueError(
344 "Multi-dimensional indexing (e.g. `obj[:, None]`) is no longer "
345 "supported. Convert to a numpy array before indexing instead."
346 )
347
348
349def unpack_1tuple(tup):
350 """
351 If we have a length-1 tuple/list that contains a slice, unpack to just
352 the slice.
353
354 Notes
355 -----
356 The list case is deprecated.
357 """
358 if len(tup) == 1 and isinstance(tup[0], slice):
359 # if we don't have a MultiIndex, we may still be able to handle
360 # a 1-tuple. see test_1tuple_without_multiindex
361
362 if isinstance(tup, list):
363 # GH#31299
364 raise ValueError(
365 "Indexing with a single-item list containing a "
366 "slice is not allowed. Pass a tuple instead.",
367 )
368
369 return tup[0]
370 return tup
371
372
373def check_key_length(columns: Index, key, value: DataFrame) -> None:
374 """
375 Checks if a key used as indexer has the same length as the columns it is
376 associated with.
377
378 Parameters
379 ----------
380 columns : Index The columns of the DataFrame to index.
381 key : A list-like of keys to index with.
382 value : DataFrame The value to set for the keys.
383
384 Raises
385 ------
386 ValueError: If the length of key is not equal to the number of columns in value
387 or if the number of columns referenced by key is not equal to number
388 of columns.
389 """
390 if columns.is_unique:
391 if len(value.columns) != len(key):
392 raise ValueError("Columns must be same length as key")
393 else:
394 # Missing keys in columns are represented as -1
395 if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns):
396 raise ValueError("Columns must be same length as key")
397
398
399def unpack_tuple_and_ellipses(item: tuple):
400 """
401 Possibly unpack arr[..., n] to arr[n]
402 """
403 if len(item) > 1:
404 # Note: we are assuming this indexing is being done on a 1D arraylike
405 if item[0] is Ellipsis:
406 item = item[1:]
407 elif item[-1] is Ellipsis:
408 item = item[:-1]
409
410 if len(item) > 1:
411 raise IndexError("too many indices for array.")
412
413 item = item[0]
414 return item
415
416
417# -----------------------------------------------------------
418# Public indexer validation
419
420
421def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any:
422 """
423 Check if `indexer` is a valid array indexer for `array`.
424
425 For a boolean mask, `array` and `indexer` are checked to have the same
426 length. The dtype is validated, and if it is an integer or boolean
427 ExtensionArray, it is checked if there are missing values present, and
428 it is converted to the appropriate numpy array. Other dtypes will raise
429 an error.
430
431 Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed
432 through as is.
433
434 Parameters
435 ----------
436 array : array-like
437 The array that is being indexed (only used for the length).
438 indexer : array-like or list-like
439 The array-like that's used to index. List-like input that is not yet
440 a numpy array or an ExtensionArray is converted to one. Other input
441 types are passed through as is.
442
443 Returns
444 -------
445 numpy.ndarray
446 The validated indexer as a numpy array that can be used to index.
447
448 Raises
449 ------
450 IndexError
451 When the lengths don't match.
452 ValueError
453 When `indexer` cannot be converted to a numpy ndarray to index
454 (e.g. presence of missing values).
455
456 See Also
457 --------
458 api.types.is_bool_dtype : Check if `key` is of boolean dtype.
459
460 Examples
461 --------
462 When checking a boolean mask, a boolean ndarray is returned when the
463 arguments are all valid.
464
465 >>> mask = pd.array([True, False])
466 >>> arr = pd.array([1, 2])
467 >>> pd.api.indexers.check_array_indexer(arr, mask)
468 array([ True, False])
469
470 An IndexError is raised when the lengths don't match.
471
472 >>> mask = pd.array([True, False, True])
473 >>> pd.api.indexers.check_array_indexer(arr, mask)
474 Traceback (most recent call last):
475 ...
476 IndexError: Boolean index has wrong length: 3 instead of 2.
477
478 NA values in a boolean array are treated as False.
479
480 >>> mask = pd.array([True, pd.NA])
481 >>> pd.api.indexers.check_array_indexer(arr, mask)
482 array([ True, False])
483
484 A numpy boolean mask will get passed through (if the length is correct):
485
486 >>> mask = np.array([True, False])
487 >>> pd.api.indexers.check_array_indexer(arr, mask)
488 array([ True, False])
489
490 Similarly for integer indexers, an integer ndarray is returned when it is
491 a valid indexer, otherwise an error is (for integer indexers, a matching
492 length is not required):
493
494 >>> indexer = pd.array([0, 2], dtype="Int64")
495 >>> arr = pd.array([1, 2, 3])
496 >>> pd.api.indexers.check_array_indexer(arr, indexer)
497 array([0, 2])
498
499 >>> indexer = pd.array([0, pd.NA], dtype="Int64")
500 >>> pd.api.indexers.check_array_indexer(arr, indexer)
501 Traceback (most recent call last):
502 ...
503 ValueError: Cannot index with an integer indexer containing NA values
504
505 For non-integer/boolean dtypes, an appropriate error is raised:
506
507 >>> indexer = np.array([0., 2.], dtype="float64")
508 >>> pd.api.indexers.check_array_indexer(arr, indexer)
509 Traceback (most recent call last):
510 ...
511 IndexError: arrays used as indices must be of integer or boolean type
512 """
513 from pandas.core.construction import array as pd_array
514
515 # whatever is not an array-like is returned as-is (possible valid array
516 # indexers that are not array-like: integer, slice, Ellipsis, None)
517 # In this context, tuples are not considered as array-like, as they have
518 # a specific meaning in indexing (multi-dimensional indexing)
519 if is_list_like(indexer):
520 if isinstance(indexer, tuple):
521 return indexer
522 else:
523 return indexer
524
525 # convert list-likes to array
526 if not is_array_like(indexer):
527 indexer = pd_array(indexer)
528 if len(indexer) == 0:
529 # empty list is converted to float array by pd.array
530 indexer = np.array([], dtype=np.intp)
531
532 dtype = indexer.dtype
533 if is_bool_dtype(dtype):
534 if is_extension_array_dtype(dtype):
535 indexer = indexer.to_numpy(dtype=bool, na_value=False)
536 else:
537 indexer = np.asarray(indexer, dtype=bool)
538
539 # GH26658
540 if len(indexer) != len(array):
541 raise IndexError(
542 f"Boolean index has wrong length: "
543 f"{len(indexer)} instead of {len(array)}"
544 )
545 elif is_integer_dtype(dtype):
546 try:
547 indexer = np.asarray(indexer, dtype=np.intp)
548 except ValueError as err:
549 raise ValueError(
550 "Cannot index with an integer indexer containing NA values"
551 ) from err
552 else:
553 raise IndexError("arrays used as indices must be of integer or boolean type")
554
555 return indexer