Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexing.py: 16%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3from contextlib import suppress
4import sys
5from typing import (
6 TYPE_CHECKING,
7 Hashable,
8 Sequence,
9 TypeVar,
10 cast,
11 final,
12)
13import warnings
15import numpy as np
17from pandas._config import using_copy_on_write
19from pandas._libs.indexing import NDFrameIndexerBase
20from pandas._libs.lib import item_from_zerodim
21from pandas._typing import (
22 Axis,
23 AxisInt,
24)
25from pandas.compat import PYPY
26from pandas.errors import (
27 AbstractMethodError,
28 ChainedAssignmentError,
29 IndexingError,
30 InvalidIndexError,
31 LossySetitemError,
32 _chained_assignment_msg,
33)
34from pandas.util._decorators import doc
36from pandas.core.dtypes.cast import (
37 can_hold_element,
38 maybe_promote,
39)
40from pandas.core.dtypes.common import (
41 is_array_like,
42 is_bool_dtype,
43 is_extension_array_dtype,
44 is_hashable,
45 is_integer,
46 is_iterator,
47 is_list_like,
48 is_numeric_dtype,
49 is_object_dtype,
50 is_scalar,
51 is_sequence,
52)
53from pandas.core.dtypes.concat import concat_compat
54from pandas.core.dtypes.generic import (
55 ABCDataFrame,
56 ABCSeries,
57)
58from pandas.core.dtypes.missing import (
59 infer_fill_value,
60 is_valid_na_for_dtype,
61 isna,
62 na_value_for_dtype,
63)
65from pandas.core import algorithms as algos
66import pandas.core.common as com
67from pandas.core.construction import (
68 array as pd_array,
69 extract_array,
70)
71from pandas.core.indexers import (
72 check_array_indexer,
73 is_list_like_indexer,
74 is_scalar_indexer,
75 length_of_indexer,
76)
77from pandas.core.indexes.api import (
78 Index,
79 MultiIndex,
80)
82if TYPE_CHECKING:
83 from pandas import (
84 DataFrame,
85 Series,
86 )
88_LocationIndexerT = TypeVar("_LocationIndexerT", bound="_LocationIndexer")
90# "null slice"
91_NS = slice(None, None)
92_one_ellipsis_message = "indexer may only contain one '...' entry"
95# the public IndexSlicerMaker
96class _IndexSlice:
97 """
98 Create an object to more easily perform multi-index slicing.
100 See Also
101 --------
102 MultiIndex.remove_unused_levels : New MultiIndex with no unused levels.
104 Notes
105 -----
106 See :ref:`Defined Levels <advanced.shown_levels>`
107 for further info on slicing a MultiIndex.
109 Examples
110 --------
111 >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']])
112 >>> columns = ['foo', 'bar']
113 >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))),
114 ... index=midx, columns=columns)
116 Using the default slice command:
118 >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :]
119 foo bar
120 A0 B0 0 1
121 B1 2 3
122 A1 B0 8 9
123 B1 10 11
125 Using the IndexSlice class for a more intuitive command:
127 >>> idx = pd.IndexSlice
128 >>> dfmi.loc[idx[:, 'B0':'B1'], :]
129 foo bar
130 A0 B0 0 1
131 B1 2 3
132 A1 B0 8 9
133 B1 10 11
134 """
136 def __getitem__(self, arg):
137 return arg
140IndexSlice = _IndexSlice()
143class IndexingMixin:
144 """
145 Mixin for adding .loc/.iloc/.at/.iat to Dataframes and Series.
146 """
148 @property
149 def iloc(self) -> _iLocIndexer:
150 """
151 Purely integer-location based indexing for selection by position.
153 ``.iloc[]`` is primarily integer position based (from ``0`` to
154 ``length-1`` of the axis), but may also be used with a boolean
155 array.
157 Allowed inputs are:
159 - An integer, e.g. ``5``.
160 - A list or array of integers, e.g. ``[4, 3, 0]``.
161 - A slice object with ints, e.g. ``1:7``.
162 - A boolean array.
163 - A ``callable`` function with one argument (the calling Series or
164 DataFrame) and that returns valid output for indexing (one of the above).
165 This is useful in method chains, when you don't have a reference to the
166 calling object, but would like to base your selection on some value.
167 - A tuple of row and column indexes. The tuple elements consist of one of the
168 above inputs, e.g. ``(0, 1)``.
170 ``.iloc`` will raise ``IndexError`` if a requested indexer is
171 out-of-bounds, except *slice* indexers which allow out-of-bounds
172 indexing (this conforms with python/numpy *slice* semantics).
174 See more at :ref:`Selection by Position <indexing.integer>`.
176 See Also
177 --------
178 DataFrame.iat : Fast integer location scalar accessor.
179 DataFrame.loc : Purely label-location based indexer for selection by label.
180 Series.iloc : Purely integer-location based indexing for
181 selection by position.
183 Examples
184 --------
185 >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
186 ... {'a': 100, 'b': 200, 'c': 300, 'd': 400},
187 ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }]
188 >>> df = pd.DataFrame(mydict)
189 >>> df
190 a b c d
191 0 1 2 3 4
192 1 100 200 300 400
193 2 1000 2000 3000 4000
195 **Indexing just the rows**
197 With a scalar integer.
199 >>> type(df.iloc[0])
200 <class 'pandas.core.series.Series'>
201 >>> df.iloc[0]
202 a 1
203 b 2
204 c 3
205 d 4
206 Name: 0, dtype: int64
208 With a list of integers.
210 >>> df.iloc[[0]]
211 a b c d
212 0 1 2 3 4
213 >>> type(df.iloc[[0]])
214 <class 'pandas.core.frame.DataFrame'>
216 >>> df.iloc[[0, 1]]
217 a b c d
218 0 1 2 3 4
219 1 100 200 300 400
221 With a `slice` object.
223 >>> df.iloc[:3]
224 a b c d
225 0 1 2 3 4
226 1 100 200 300 400
227 2 1000 2000 3000 4000
229 With a boolean mask the same length as the index.
231 >>> df.iloc[[True, False, True]]
232 a b c d
233 0 1 2 3 4
234 2 1000 2000 3000 4000
236 With a callable, useful in method chains. The `x` passed
237 to the ``lambda`` is the DataFrame being sliced. This selects
238 the rows whose index label even.
240 >>> df.iloc[lambda x: x.index % 2 == 0]
241 a b c d
242 0 1 2 3 4
243 2 1000 2000 3000 4000
245 **Indexing both axes**
247 You can mix the indexer types for the index and columns. Use ``:`` to
248 select the entire axis.
250 With scalar integers.
252 >>> df.iloc[0, 1]
253 2
255 With lists of integers.
257 >>> df.iloc[[0, 2], [1, 3]]
258 b d
259 0 2 4
260 2 2000 4000
262 With `slice` objects.
264 >>> df.iloc[1:3, 0:3]
265 a b c
266 1 100 200 300
267 2 1000 2000 3000
269 With a boolean array whose length matches the columns.
271 >>> df.iloc[:, [True, False, True, False]]
272 a c
273 0 1 3
274 1 100 300
275 2 1000 3000
277 With a callable function that expects the Series or DataFrame.
279 >>> df.iloc[:, lambda df: [0, 2]]
280 a c
281 0 1 3
282 1 100 300
283 2 1000 3000
284 """
285 return _iLocIndexer("iloc", self)
287 @property
288 def loc(self) -> _LocIndexer:
289 """
290 Access a group of rows and columns by label(s) or a boolean array.
292 ``.loc[]`` is primarily label based, but may also be used with a
293 boolean array.
295 Allowed inputs are:
297 - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
298 interpreted as a *label* of the index, and **never** as an
299 integer position along the index).
300 - A list or array of labels, e.g. ``['a', 'b', 'c']``.
301 - A slice object with labels, e.g. ``'a':'f'``.
303 .. warning:: Note that contrary to usual python slices, **both** the
304 start and the stop are included
306 - A boolean array of the same length as the axis being sliced,
307 e.g. ``[True, False, True]``.
308 - An alignable boolean Series. The index of the key will be aligned before
309 masking.
310 - An alignable Index. The Index of the returned selection will be the input.
311 - A ``callable`` function with one argument (the calling Series or
312 DataFrame) and that returns valid output for indexing (one of the above)
314 See more at :ref:`Selection by Label <indexing.label>`.
316 Raises
317 ------
318 KeyError
319 If any items are not found.
320 IndexingError
321 If an indexed key is passed and its index is unalignable to the frame index.
323 See Also
324 --------
325 DataFrame.at : Access a single value for a row/column label pair.
326 DataFrame.iloc : Access group of rows and columns by integer position(s).
327 DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the
328 Series/DataFrame.
329 Series.loc : Access group of values using labels.
331 Examples
332 --------
333 **Getting values**
335 >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
336 ... index=['cobra', 'viper', 'sidewinder'],
337 ... columns=['max_speed', 'shield'])
338 >>> df
339 max_speed shield
340 cobra 1 2
341 viper 4 5
342 sidewinder 7 8
344 Single label. Note this returns the row as a Series.
346 >>> df.loc['viper']
347 max_speed 4
348 shield 5
349 Name: viper, dtype: int64
351 List of labels. Note using ``[[]]`` returns a DataFrame.
353 >>> df.loc[['viper', 'sidewinder']]
354 max_speed shield
355 viper 4 5
356 sidewinder 7 8
358 Single label for row and column
360 >>> df.loc['cobra', 'shield']
361 2
363 Slice with labels for row and single label for column. As mentioned
364 above, note that both the start and stop of the slice are included.
366 >>> df.loc['cobra':'viper', 'max_speed']
367 cobra 1
368 viper 4
369 Name: max_speed, dtype: int64
371 Boolean list with the same length as the row axis
373 >>> df.loc[[False, False, True]]
374 max_speed shield
375 sidewinder 7 8
377 Alignable boolean Series:
379 >>> df.loc[pd.Series([False, True, False],
380 ... index=['viper', 'sidewinder', 'cobra'])]
381 max_speed shield
382 sidewinder 7 8
384 Index (same behavior as ``df.reindex``)
386 >>> df.loc[pd.Index(["cobra", "viper"], name="foo")]
387 max_speed shield
388 foo
389 cobra 1 2
390 viper 4 5
392 Conditional that returns a boolean Series
394 >>> df.loc[df['shield'] > 6]
395 max_speed shield
396 sidewinder 7 8
398 Conditional that returns a boolean Series with column labels specified
400 >>> df.loc[df['shield'] > 6, ['max_speed']]
401 max_speed
402 sidewinder 7
404 Callable that returns a boolean Series
406 >>> df.loc[lambda df: df['shield'] == 8]
407 max_speed shield
408 sidewinder 7 8
410 **Setting values**
412 Set value for all items matching the list of labels
414 >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50
415 >>> df
416 max_speed shield
417 cobra 1 2
418 viper 4 50
419 sidewinder 7 50
421 Set value for an entire row
423 >>> df.loc['cobra'] = 10
424 >>> df
425 max_speed shield
426 cobra 10 10
427 viper 4 50
428 sidewinder 7 50
430 Set value for an entire column
432 >>> df.loc[:, 'max_speed'] = 30
433 >>> df
434 max_speed shield
435 cobra 30 10
436 viper 30 50
437 sidewinder 30 50
439 Set value for rows matching callable condition
441 >>> df.loc[df['shield'] > 35] = 0
442 >>> df
443 max_speed shield
444 cobra 30 10
445 viper 0 0
446 sidewinder 0 0
448 **Getting values on a DataFrame with an index that has integer labels**
450 Another example using integers for the index
452 >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
453 ... index=[7, 8, 9], columns=['max_speed', 'shield'])
454 >>> df
455 max_speed shield
456 7 1 2
457 8 4 5
458 9 7 8
460 Slice with integer labels for rows. As mentioned above, note that both
461 the start and stop of the slice are included.
463 >>> df.loc[7:9]
464 max_speed shield
465 7 1 2
466 8 4 5
467 9 7 8
469 **Getting values with a MultiIndex**
471 A number of examples using a DataFrame with a MultiIndex
473 >>> tuples = [
474 ... ('cobra', 'mark i'), ('cobra', 'mark ii'),
475 ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
476 ... ('viper', 'mark ii'), ('viper', 'mark iii')
477 ... ]
478 >>> index = pd.MultiIndex.from_tuples(tuples)
479 >>> values = [[12, 2], [0, 4], [10, 20],
480 ... [1, 4], [7, 1], [16, 36]]
481 >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
482 >>> df
483 max_speed shield
484 cobra mark i 12 2
485 mark ii 0 4
486 sidewinder mark i 10 20
487 mark ii 1 4
488 viper mark ii 7 1
489 mark iii 16 36
491 Single label. Note this returns a DataFrame with a single index.
493 >>> df.loc['cobra']
494 max_speed shield
495 mark i 12 2
496 mark ii 0 4
498 Single index tuple. Note this returns a Series.
500 >>> df.loc[('cobra', 'mark ii')]
501 max_speed 0
502 shield 4
503 Name: (cobra, mark ii), dtype: int64
505 Single label for row and column. Similar to passing in a tuple, this
506 returns a Series.
508 >>> df.loc['cobra', 'mark i']
509 max_speed 12
510 shield 2
511 Name: (cobra, mark i), dtype: int64
513 Single tuple. Note using ``[[]]`` returns a DataFrame.
515 >>> df.loc[[('cobra', 'mark ii')]]
516 max_speed shield
517 cobra mark ii 0 4
519 Single tuple for the index with a single label for the column
521 >>> df.loc[('cobra', 'mark i'), 'shield']
522 2
524 Slice from index tuple to single label
526 >>> df.loc[('cobra', 'mark i'):'viper']
527 max_speed shield
528 cobra mark i 12 2
529 mark ii 0 4
530 sidewinder mark i 10 20
531 mark ii 1 4
532 viper mark ii 7 1
533 mark iii 16 36
535 Slice from index tuple to index tuple
537 >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')]
538 max_speed shield
539 cobra mark i 12 2
540 mark ii 0 4
541 sidewinder mark i 10 20
542 mark ii 1 4
543 viper mark ii 7 1
545 Please see the :ref:`user guide<advanced.advanced_hierarchical>`
546 for more details and explanations of advanced indexing.
547 """
548 return _LocIndexer("loc", self)
550 @property
551 def at(self) -> _AtIndexer:
552 """
553 Access a single value for a row/column label pair.
555 Similar to ``loc``, in that both provide label-based lookups. Use
556 ``at`` if you only need to get or set a single value in a DataFrame
557 or Series.
559 Raises
560 ------
561 KeyError
562 * If getting a value and 'label' does not exist in a DataFrame or
563 Series.
564 ValueError
565 * If row/column label pair is not a tuple or if any label from
566 the pair is not a scalar for DataFrame.
567 * If label is list-like (*excluding* NamedTuple) for Series.
569 See Also
570 --------
571 DataFrame.at : Access a single value for a row/column pair by label.
572 DataFrame.iat : Access a single value for a row/column pair by integer
573 position.
574 DataFrame.loc : Access a group of rows and columns by label(s).
575 DataFrame.iloc : Access a group of rows and columns by integer
576 position(s).
577 Series.at : Access a single value by label.
578 Series.iat : Access a single value by integer position.
579 Series.loc : Access a group of rows by label(s).
580 Series.iloc : Access a group of rows by integer position(s).
582 Notes
583 -----
584 See :ref:`Fast scalar value getting and setting <indexing.basics.get_value>`
585 for more details.
587 Examples
588 --------
589 >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
590 ... index=[4, 5, 6], columns=['A', 'B', 'C'])
591 >>> df
592 A B C
593 4 0 2 3
594 5 0 4 1
595 6 10 20 30
597 Get value at specified row/column pair
599 >>> df.at[4, 'B']
600 2
602 Set value at specified row/column pair
604 >>> df.at[4, 'B'] = 10
605 >>> df.at[4, 'B']
606 10
608 Get value within a Series
610 >>> df.loc[5].at['B']
611 4
612 """
613 return _AtIndexer("at", self)
615 @property
616 def iat(self) -> _iAtIndexer:
617 """
618 Access a single value for a row/column pair by integer position.
620 Similar to ``iloc``, in that both provide integer-based lookups. Use
621 ``iat`` if you only need to get or set a single value in a DataFrame
622 or Series.
624 Raises
625 ------
626 IndexError
627 When integer position is out of bounds.
629 See Also
630 --------
631 DataFrame.at : Access a single value for a row/column label pair.
632 DataFrame.loc : Access a group of rows and columns by label(s).
633 DataFrame.iloc : Access a group of rows and columns by integer position(s).
635 Examples
636 --------
637 >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
638 ... columns=['A', 'B', 'C'])
639 >>> df
640 A B C
641 0 0 2 3
642 1 0 4 1
643 2 10 20 30
645 Get value at specified row/column pair
647 >>> df.iat[1, 2]
648 1
650 Set value at specified row/column pair
652 >>> df.iat[1, 2] = 10
653 >>> df.iat[1, 2]
654 10
656 Get value within a series
658 >>> df.loc[0].iat[1]
659 2
660 """
661 return _iAtIndexer("iat", self)
664class _LocationIndexer(NDFrameIndexerBase):
665 _valid_types: str
666 axis: AxisInt | None = None
668 # sub-classes need to set _takeable
669 _takeable: bool
671 @final
672 def __call__(
673 self: _LocationIndexerT, axis: Axis | None = None
674 ) -> _LocationIndexerT:
675 # we need to return a copy of ourselves
676 new_self = type(self)(self.name, self.obj)
678 if axis is not None:
679 axis_int_none = self.obj._get_axis_number(axis)
680 else:
681 axis_int_none = axis
682 new_self.axis = axis_int_none
683 return new_self
685 def _get_setitem_indexer(self, key):
686 """
687 Convert a potentially-label-based key into a positional indexer.
688 """
689 if self.name == "loc":
690 # always holds here bc iloc overrides _get_setitem_indexer
691 self._ensure_listlike_indexer(key)
693 if isinstance(key, tuple):
694 for x in key:
695 check_dict_or_set_indexers(x)
697 if self.axis is not None:
698 key = _tupleize_axis_indexer(self.ndim, self.axis, key)
700 ax = self.obj._get_axis(0)
702 if isinstance(ax, MultiIndex) and self.name != "iloc" and is_hashable(key):
703 with suppress(KeyError, InvalidIndexError):
704 # TypeError e.g. passed a bool
705 return ax.get_loc(key)
707 if isinstance(key, tuple):
708 with suppress(IndexingError):
709 # suppress "Too many indexers"
710 return self._convert_tuple(key)
712 if isinstance(key, range):
713 # GH#45479 test_loc_setitem_range_key
714 key = list(key)
716 return self._convert_to_indexer(key, axis=0)
718 @final
719 def _maybe_mask_setitem_value(self, indexer, value):
720 """
721 If we have obj.iloc[mask] = series_or_frame and series_or_frame has the
722 same length as obj, we treat this as obj.iloc[mask] = series_or_frame[mask],
723 similar to Series.__setitem__.
725 Note this is only for loc, not iloc.
726 """
728 if (
729 isinstance(indexer, tuple)
730 and len(indexer) == 2
731 and isinstance(value, (ABCSeries, ABCDataFrame))
732 ):
733 pi, icols = indexer
734 ndim = value.ndim
735 if com.is_bool_indexer(pi) and len(value) == len(pi):
736 newkey = pi.nonzero()[0]
738 if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1:
739 # e.g. test_loc_setitem_boolean_mask_allfalse
740 if len(newkey) == 0:
741 # FIXME: kludge for test_loc_setitem_boolean_mask_allfalse
742 # TODO(GH#45333): may be fixed when deprecation is enforced
744 value = value.iloc[:0]
745 else:
746 # test_loc_setitem_ndframe_values_alignment
747 value = self.obj.iloc._align_series(indexer, value)
748 indexer = (newkey, icols)
750 elif (
751 isinstance(icols, np.ndarray)
752 and icols.dtype.kind == "i"
753 and len(icols) == 1
754 ):
755 if ndim == 1:
756 # We implicitly broadcast, though numpy does not, see
757 # github.com/pandas-dev/pandas/pull/45501#discussion_r789071825
758 # test_loc_setitem_ndframe_values_alignment
759 value = self.obj.iloc._align_series(indexer, value)
760 indexer = (newkey, icols)
762 elif ndim == 2 and value.shape[1] == 1:
763 if len(newkey) == 0:
764 # FIXME: kludge for
765 # test_loc_setitem_all_false_boolean_two_blocks
766 # TODO(GH#45333): may be fixed when deprecation is enforced
767 value = value.iloc[:0]
768 else:
769 # test_loc_setitem_ndframe_values_alignment
770 value = self.obj.iloc._align_frame(indexer, value)
771 indexer = (newkey, icols)
772 elif com.is_bool_indexer(indexer):
773 indexer = indexer.nonzero()[0]
775 return indexer, value
777 @final
778 def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
779 """
780 Ensure that a list-like of column labels are all present by adding them if
781 they do not already exist.
783 Parameters
784 ----------
785 key : list-like of column labels
786 Target labels.
787 axis : key axis if known
788 """
789 column_axis = 1
791 # column only exists in 2-dimensional DataFrame
792 if self.ndim != 2:
793 return
795 orig_key = key
796 if isinstance(key, tuple) and len(key) > 1:
797 # key may be a tuple if we are .loc
798 # if length of key is > 1 set key to column part
799 key = key[column_axis]
800 axis = column_axis
802 if (
803 axis == column_axis
804 and not isinstance(self.obj.columns, MultiIndex)
805 and is_list_like_indexer(key)
806 and not com.is_bool_indexer(key)
807 and all(is_hashable(k) for k in key)
808 ):
809 # GH#38148
810 keys = self.obj.columns.union(key, sort=False)
811 diff = Index(key).difference(self.obj.columns, sort=False)
813 if len(diff) and com.is_null_slice(orig_key[0]):
814 # e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B"
815 # is a new column, add the new columns with dtype=np.void
816 # so that later when we go through setitem_single_column
817 # we will use isetitem. Without this, the reindex_axis
818 # below would create float64 columns in this example, which
819 # would successfully hold 7, so we would end up with the wrong
820 # dtype.
821 indexer = np.arange(len(keys), dtype=np.intp)
822 indexer[len(self.obj.columns) :] = -1
823 new_mgr = self.obj._mgr.reindex_indexer(
824 keys, indexer=indexer, axis=0, only_slice=True, use_na_proxy=True
825 )
826 self.obj._mgr = new_mgr
827 return
829 self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True)
831 @final
832 def __setitem__(self, key, value) -> None:
833 if not PYPY and using_copy_on_write():
834 if sys.getrefcount(self.obj) <= 2:
835 warnings.warn(
836 _chained_assignment_msg, ChainedAssignmentError, stacklevel=2
837 )
839 check_dict_or_set_indexers(key)
840 if isinstance(key, tuple):
841 key = tuple(list(x) if is_iterator(x) else x for x in key)
842 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
843 else:
844 key = com.apply_if_callable(key, self.obj)
845 indexer = self._get_setitem_indexer(key)
846 self._has_valid_setitem_indexer(key)
848 iloc = self if self.name == "iloc" else self.obj.iloc
849 iloc._setitem_with_indexer(indexer, value, self.name)
851 def _validate_key(self, key, axis: AxisInt):
852 """
853 Ensure that key is valid for current indexer.
855 Parameters
856 ----------
857 key : scalar, slice or list-like
858 Key requested.
859 axis : int
860 Dimension on which the indexing is being made.
862 Raises
863 ------
864 TypeError
865 If the key (or some element of it) has wrong type.
866 IndexError
867 If the key (or some element of it) is out of bounds.
868 KeyError
869 If the key was not found.
870 """
871 raise AbstractMethodError(self)
873 @final
874 def _expand_ellipsis(self, tup: tuple) -> tuple:
875 """
876 If a tuple key includes an Ellipsis, replace it with an appropriate
877 number of null slices.
878 """
879 if any(x is Ellipsis for x in tup):
880 if tup.count(Ellipsis) > 1:
881 raise IndexingError(_one_ellipsis_message)
883 if len(tup) == self.ndim:
884 # It is unambiguous what axis this Ellipsis is indexing,
885 # treat as a single null slice.
886 i = tup.index(Ellipsis)
887 # FIXME: this assumes only one Ellipsis
888 new_key = tup[:i] + (_NS,) + tup[i + 1 :]
889 return new_key
891 # TODO: other cases? only one test gets here, and that is covered
892 # by _validate_key_length
893 return tup
895 @final
896 def _validate_tuple_indexer(self, key: tuple) -> tuple:
897 """
898 Check the key for valid keys across my indexer.
899 """
900 key = self._validate_key_length(key)
901 key = self._expand_ellipsis(key)
902 for i, k in enumerate(key):
903 try:
904 self._validate_key(k, i)
905 except ValueError as err:
906 raise ValueError(
907 "Location based indexing can only have "
908 f"[{self._valid_types}] types"
909 ) from err
910 return key
912 @final
913 def _is_nested_tuple_indexer(self, tup: tuple) -> bool:
914 """
915 Returns
916 -------
917 bool
918 """
919 if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):
920 return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
921 return False
923 @final
924 def _convert_tuple(self, key: tuple) -> tuple:
925 # Note: we assume _tupleize_axis_indexer has been called, if necessary.
926 self._validate_key_length(key)
927 keyidx = [self._convert_to_indexer(k, axis=i) for i, k in enumerate(key)]
928 return tuple(keyidx)
930 @final
931 def _validate_key_length(self, key: tuple) -> tuple:
932 if len(key) > self.ndim:
933 if key[0] is Ellipsis:
934 # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3]
935 key = key[1:]
936 if Ellipsis in key:
937 raise IndexingError(_one_ellipsis_message)
938 return self._validate_key_length(key)
939 raise IndexingError("Too many indexers")
940 return key
942 @final
943 def _getitem_tuple_same_dim(self, tup: tuple):
944 """
945 Index with indexers that should return an object of the same dimension
946 as self.obj.
948 This is only called after a failed call to _getitem_lowerdim.
949 """
950 retval = self.obj
951 for i, key in enumerate(tup):
952 if com.is_null_slice(key):
953 continue
955 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
956 # We should never have retval.ndim < self.ndim, as that should
957 # be handled by the _getitem_lowerdim call above.
958 assert retval.ndim == self.ndim
960 if retval is self.obj:
961 # if all axes were a null slice (`df.loc[:, :]`), ensure we still
962 # return a new object (https://github.com/pandas-dev/pandas/pull/49469)
963 retval = retval.copy(deep=False)
965 return retval
967 @final
968 def _getitem_lowerdim(self, tup: tuple):
969 # we can directly get the axis result since the axis is specified
970 if self.axis is not None:
971 axis = self.obj._get_axis_number(self.axis)
972 return self._getitem_axis(tup, axis=axis)
974 # we may have a nested tuples indexer here
975 if self._is_nested_tuple_indexer(tup):
976 return self._getitem_nested_tuple(tup)
978 # we maybe be using a tuple to represent multiple dimensions here
979 ax0 = self.obj._get_axis(0)
980 # ...but iloc should handle the tuple as simple integer-location
981 # instead of checking it as multiindex representation (GH 13797)
982 if (
983 isinstance(ax0, MultiIndex)
984 and self.name != "iloc"
985 and not any(isinstance(x, slice) for x in tup)
986 ):
987 # Note: in all extant test cases, replacing the slice condition with
988 # `all(is_hashable(x) or com.is_null_slice(x) for x in tup)`
989 # is equivalent.
990 # (see the other place where we call _handle_lowerdim_multi_index_axis0)
991 with suppress(IndexingError):
992 return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(tup)
994 tup = self._validate_key_length(tup)
996 for i, key in enumerate(tup):
997 if is_label_like(key):
998 # We don't need to check for tuples here because those are
999 # caught by the _is_nested_tuple_indexer check above.
1000 section = self._getitem_axis(key, axis=i)
1002 # We should never have a scalar section here, because
1003 # _getitem_lowerdim is only called after a check for
1004 # is_scalar_access, which that would be.
1005 if section.ndim == self.ndim:
1006 # we're in the middle of slicing through a MultiIndex
1007 # revise the key wrt to `section` by inserting an _NS
1008 new_key = tup[:i] + (_NS,) + tup[i + 1 :]
1010 else:
1011 # Note: the section.ndim == self.ndim check above
1012 # rules out having DataFrame here, so we dont need to worry
1013 # about transposing.
1014 new_key = tup[:i] + tup[i + 1 :]
1016 if len(new_key) == 1:
1017 new_key = new_key[0]
1019 # Slices should return views, but calling iloc/loc with a null
1020 # slice returns a new object.
1021 if com.is_null_slice(new_key):
1022 return section
1023 # This is an elided recursive call to iloc/loc
1024 return getattr(section, self.name)[new_key]
1026 raise IndexingError("not applicable")
1028 @final
1029 def _getitem_nested_tuple(self, tup: tuple):
1030 # we have a nested tuple so have at least 1 multi-index level
1031 # we should be able to match up the dimensionality here
1033 for key in tup:
1034 check_dict_or_set_indexers(key)
1036 # we have too many indexers for our dim, but have at least 1
1037 # multi-index dimension, try to see if we have something like
1038 # a tuple passed to a series with a multi-index
1039 if len(tup) > self.ndim:
1040 if self.name != "loc":
1041 # This should never be reached, but let's be explicit about it
1042 raise ValueError("Too many indices") # pragma: no cover
1043 if all(is_hashable(x) or com.is_null_slice(x) for x in tup):
1044 # GH#10521 Series should reduce MultiIndex dimensions instead of
1045 # DataFrame, IndexingError is not raised when slice(None,None,None)
1046 # with one row.
1047 with suppress(IndexingError):
1048 return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(
1049 tup
1050 )
1051 elif isinstance(self.obj, ABCSeries) and any(
1052 isinstance(k, tuple) for k in tup
1053 ):
1054 # GH#35349 Raise if tuple in tuple for series
1055 # Do this after the all-hashable-or-null-slice check so that
1056 # we are only getting non-hashable tuples, in particular ones
1057 # that themselves contain a slice entry
1058 # See test_loc_series_getitem_too_many_dimensions
1059 raise IndexingError("Too many indexers")
1061 # this is a series with a multi-index specified a tuple of
1062 # selectors
1063 axis = self.axis or 0
1064 return self._getitem_axis(tup, axis=axis)
1066 # handle the multi-axis by taking sections and reducing
1067 # this is iterative
1068 obj = self.obj
1069 # GH#41369 Loop in reverse order ensures indexing along columns before rows
1070 # which selects only necessary blocks which avoids dtype conversion if possible
1071 axis = len(tup) - 1
1072 for key in tup[::-1]:
1073 if com.is_null_slice(key):
1074 axis -= 1
1075 continue
1077 obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)
1078 axis -= 1
1080 # if we have a scalar, we are done
1081 if is_scalar(obj) or not hasattr(obj, "ndim"):
1082 break
1084 return obj
1086 def _convert_to_indexer(self, key, axis: AxisInt):
1087 raise AbstractMethodError(self)
1089 @final
1090 def __getitem__(self, key):
1091 check_dict_or_set_indexers(key)
1092 if type(key) is tuple:
1093 key = tuple(list(x) if is_iterator(x) else x for x in key)
1094 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
1095 if self._is_scalar_access(key):
1096 return self.obj._get_value(*key, takeable=self._takeable)
1097 return self._getitem_tuple(key)
1098 else:
1099 # we by definition only have the 0th axis
1100 axis = self.axis or 0
1102 maybe_callable = com.apply_if_callable(key, self.obj)
1103 return self._getitem_axis(maybe_callable, axis=axis)
1105 def _is_scalar_access(self, key: tuple):
1106 raise NotImplementedError()
1108 def _getitem_tuple(self, tup: tuple):
1109 raise AbstractMethodError(self)
1111 def _getitem_axis(self, key, axis: AxisInt):
1112 raise NotImplementedError()
1114 def _has_valid_setitem_indexer(self, indexer) -> bool:
1115 raise AbstractMethodError(self)
1117 @final
1118 def _getbool_axis(self, key, axis: AxisInt):
1119 # caller is responsible for ensuring non-None axis
1120 labels = self.obj._get_axis(axis)
1121 key = check_bool_indexer(labels, key)
1122 inds = key.nonzero()[0]
1123 return self.obj._take_with_is_copy(inds, axis=axis)
1126@doc(IndexingMixin.loc)
1127class _LocIndexer(_LocationIndexer):
1128 _takeable: bool = False
1129 _valid_types = (
1130 "labels (MUST BE IN THE INDEX), slices of labels (BOTH "
1131 "endpoints included! Can be slices of integers if the "
1132 "index is integers), listlike of labels, boolean"
1133 )
1135 # -------------------------------------------------------------------
1136 # Key Checks
1138 @doc(_LocationIndexer._validate_key)
1139 def _validate_key(self, key, axis: Axis):
1140 # valid for a collection of labels (we check their presence later)
1141 # slice of labels (where start-end in labels)
1142 # slice of integers (only if in the labels)
1143 # boolean not in slice and with boolean index
1144 ax = self.obj._get_axis(axis)
1145 if isinstance(key, bool) and not (
1146 is_bool_dtype(ax)
1147 or ax.dtype.name == "boolean"
1148 or isinstance(ax, MultiIndex)
1149 and is_bool_dtype(ax.get_level_values(0))
1150 ):
1151 raise KeyError(
1152 f"{key}: boolean label can not be used without a boolean index"
1153 )
1155 if isinstance(key, slice) and (
1156 isinstance(key.start, bool) or isinstance(key.stop, bool)
1157 ):
1158 raise TypeError(f"{key}: boolean values can not be used in a slice")
1160 def _has_valid_setitem_indexer(self, indexer) -> bool:
1161 return True
1163 def _is_scalar_access(self, key: tuple) -> bool:
1164 """
1165 Returns
1166 -------
1167 bool
1168 """
1169 # this is a shortcut accessor to both .loc and .iloc
1170 # that provide the equivalent access of .at and .iat
1171 # a) avoid getting things via sections and (to minimize dtype changes)
1172 # b) provide a performant path
1173 if len(key) != self.ndim:
1174 return False
1176 for i, k in enumerate(key):
1177 if not is_scalar(k):
1178 return False
1180 ax = self.obj.axes[i]
1181 if isinstance(ax, MultiIndex):
1182 return False
1184 if isinstance(k, str) and ax._supports_partial_string_indexing:
1185 # partial string indexing, df.loc['2000', 'A']
1186 # should not be considered scalar
1187 return False
1189 if not ax._index_as_unique:
1190 return False
1192 return True
1194 # -------------------------------------------------------------------
1195 # MultiIndex Handling
1197 def _multi_take_opportunity(self, tup: tuple) -> bool:
1198 """
1199 Check whether there is the possibility to use ``_multi_take``.
1201 Currently the limit is that all axes being indexed, must be indexed with
1202 list-likes.
1204 Parameters
1205 ----------
1206 tup : tuple
1207 Tuple of indexers, one per axis.
1209 Returns
1210 -------
1211 bool
1212 Whether the current indexing,
1213 can be passed through `_multi_take`.
1214 """
1215 if not all(is_list_like_indexer(x) for x in tup):
1216 return False
1218 # just too complicated
1219 return not any(com.is_bool_indexer(x) for x in tup)
1221 def _multi_take(self, tup: tuple):
1222 """
1223 Create the indexers for the passed tuple of keys, and
1224 executes the take operation. This allows the take operation to be
1225 executed all at once, rather than once for each dimension.
1226 Improving efficiency.
1228 Parameters
1229 ----------
1230 tup : tuple
1231 Tuple of indexers, one per axis.
1233 Returns
1234 -------
1235 values: same type as the object being indexed
1236 """
1237 # GH 836
1238 d = {
1239 axis: self._get_listlike_indexer(key, axis)
1240 for (key, axis) in zip(tup, self.obj._AXIS_ORDERS)
1241 }
1242 return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True)
1244 # -------------------------------------------------------------------
1246 def _getitem_iterable(self, key, axis: AxisInt):
1247 """
1248 Index current object with an iterable collection of keys.
1250 Parameters
1251 ----------
1252 key : iterable
1253 Targeted labels.
1254 axis : int
1255 Dimension on which the indexing is being made.
1257 Raises
1258 ------
1259 KeyError
1260 If no key was found. Will change in the future to raise if not all
1261 keys were found.
1263 Returns
1264 -------
1265 scalar, DataFrame, or Series: indexed value(s).
1266 """
1267 # we assume that not com.is_bool_indexer(key), as that is
1268 # handled before we get here.
1269 self._validate_key(key, axis)
1271 # A collection of keys
1272 keyarr, indexer = self._get_listlike_indexer(key, axis)
1273 return self.obj._reindex_with_indexers(
1274 {axis: [keyarr, indexer]}, copy=True, allow_dups=True
1275 )
1277 def _getitem_tuple(self, tup: tuple):
1278 with suppress(IndexingError):
1279 tup = self._expand_ellipsis(tup)
1280 return self._getitem_lowerdim(tup)
1282 # no multi-index, so validate all of the indexers
1283 tup = self._validate_tuple_indexer(tup)
1285 # ugly hack for GH #836
1286 if self._multi_take_opportunity(tup):
1287 return self._multi_take(tup)
1289 return self._getitem_tuple_same_dim(tup)
1291 def _get_label(self, label, axis: AxisInt):
1292 # GH#5567 this will fail if the label is not present in the axis.
1293 return self.obj.xs(label, axis=axis)
1295 def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
1296 # we have an axis0 multi-index, handle or raise
1297 axis = self.axis or 0
1298 try:
1299 # fast path for series or for tup devoid of slices
1300 return self._get_label(tup, axis=axis)
1302 except KeyError as ek:
1303 # raise KeyError if number of indexers match
1304 # else IndexingError will be raised
1305 if self.ndim < len(tup) <= self.obj.index.nlevels:
1306 raise ek
1307 raise IndexingError("No label returned") from ek
1309 def _getitem_axis(self, key, axis: AxisInt):
1310 key = item_from_zerodim(key)
1311 if is_iterator(key):
1312 key = list(key)
1313 if key is Ellipsis:
1314 key = slice(None)
1316 labels = self.obj._get_axis(axis)
1318 if isinstance(key, tuple) and isinstance(labels, MultiIndex):
1319 key = tuple(key)
1321 if isinstance(key, slice):
1322 self._validate_key(key, axis)
1323 return self._get_slice_axis(key, axis=axis)
1324 elif com.is_bool_indexer(key):
1325 return self._getbool_axis(key, axis=axis)
1326 elif is_list_like_indexer(key):
1327 # an iterable multi-selection
1328 if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)):
1329 if hasattr(key, "ndim") and key.ndim > 1:
1330 raise ValueError("Cannot index with multidimensional key")
1332 return self._getitem_iterable(key, axis=axis)
1334 # nested tuple slicing
1335 if is_nested_tuple(key, labels):
1336 locs = labels.get_locs(key)
1337 indexer = [slice(None)] * self.ndim
1338 indexer[axis] = locs
1339 return self.obj.iloc[tuple(indexer)]
1341 # fall thru to straight lookup
1342 self._validate_key(key, axis)
1343 return self._get_label(key, axis=axis)
1345 def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
1346 """
1347 This is pretty simple as we just have to deal with labels.
1348 """
1349 # caller is responsible for ensuring non-None axis
1350 obj = self.obj
1351 if not need_slice(slice_obj):
1352 return obj.copy(deep=False)
1354 labels = obj._get_axis(axis)
1355 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)
1357 if isinstance(indexer, slice):
1358 return self.obj._slice(indexer, axis=axis)
1359 else:
1360 # DatetimeIndex overrides Index.slice_indexer and may
1361 # return a DatetimeIndex instead of a slice object.
1362 return self.obj.take(indexer, axis=axis)
1364 def _convert_to_indexer(self, key, axis: AxisInt):
1365 """
1366 Convert indexing key into something we can use to do actual fancy
1367 indexing on a ndarray.
1369 Examples
1370 ix[:5] -> slice(0, 5)
1371 ix[[1,2,3]] -> [1,2,3]
1372 ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)
1374 Going by Zen of Python?
1375 'In the face of ambiguity, refuse the temptation to guess.'
1376 raise AmbiguousIndexError with integer labels?
1377 - No, prefer label-based indexing
1378 """
1379 labels = self.obj._get_axis(axis)
1381 if isinstance(key, slice):
1382 return labels._convert_slice_indexer(key, kind="loc")
1384 if (
1385 isinstance(key, tuple)
1386 and not isinstance(labels, MultiIndex)
1387 and self.ndim < 2
1388 and len(key) > 1
1389 ):
1390 raise IndexingError("Too many indexers")
1392 if is_scalar(key) or (isinstance(labels, MultiIndex) and is_hashable(key)):
1393 # Otherwise get_loc will raise InvalidIndexError
1395 # if we are a label return me
1396 try:
1397 return labels.get_loc(key)
1398 except LookupError:
1399 if isinstance(key, tuple) and isinstance(labels, MultiIndex):
1400 if len(key) == labels.nlevels:
1401 return {"key": key}
1402 raise
1403 except InvalidIndexError:
1404 # GH35015, using datetime as column indices raises exception
1405 if not isinstance(labels, MultiIndex):
1406 raise
1407 except ValueError:
1408 if not is_integer(key):
1409 raise
1410 return {"key": key}
1412 if is_nested_tuple(key, labels):
1413 if self.ndim == 1 and any(isinstance(k, tuple) for k in key):
1414 # GH#35349 Raise if tuple in tuple for series
1415 raise IndexingError("Too many indexers")
1416 return labels.get_locs(key)
1418 elif is_list_like_indexer(key):
1419 if is_iterator(key):
1420 key = list(key)
1422 if com.is_bool_indexer(key):
1423 key = check_bool_indexer(labels, key)
1424 return key
1425 else:
1426 return self._get_listlike_indexer(key, axis)[1]
1427 else:
1428 try:
1429 return labels.get_loc(key)
1430 except LookupError:
1431 # allow a not found key only if we are a setter
1432 if not is_list_like_indexer(key):
1433 return {"key": key}
1434 raise
1436 def _get_listlike_indexer(self, key, axis: AxisInt):
1437 """
1438 Transform a list-like of keys into a new index and an indexer.
1440 Parameters
1441 ----------
1442 key : list-like
1443 Targeted labels.
1444 axis: int
1445 Dimension on which the indexing is being made.
1447 Raises
1448 ------
1449 KeyError
1450 If at least one key was requested but none was found.
1452 Returns
1453 -------
1454 keyarr: Index
1455 New index (coinciding with 'key' if the axis is unique).
1456 values : array-like
1457 Indexer for the return object, -1 denotes keys not found.
1458 """
1459 ax = self.obj._get_axis(axis)
1460 axis_name = self.obj._get_axis_name(axis)
1462 keyarr, indexer = ax._get_indexer_strict(key, axis_name)
1464 return keyarr, indexer
1467@doc(IndexingMixin.iloc)
1468class _iLocIndexer(_LocationIndexer):
1469 _valid_types = (
1470 "integer, integer slice (START point is INCLUDED, END "
1471 "point is EXCLUDED), listlike of integers, boolean array"
1472 )
1473 _takeable = True
1475 # -------------------------------------------------------------------
1476 # Key Checks
1478 def _validate_key(self, key, axis: AxisInt):
1479 if com.is_bool_indexer(key):
1480 if hasattr(key, "index") and isinstance(key.index, Index):
1481 if key.index.inferred_type == "integer":
1482 raise NotImplementedError(
1483 "iLocation based boolean "
1484 "indexing on an integer type "
1485 "is not available"
1486 )
1487 raise ValueError(
1488 "iLocation based boolean indexing cannot use "
1489 "an indexable as a mask"
1490 )
1491 return
1493 if isinstance(key, slice):
1494 return
1495 elif is_integer(key):
1496 self._validate_integer(key, axis)
1497 elif isinstance(key, tuple):
1498 # a tuple should already have been caught by this point
1499 # so don't treat a tuple as a valid indexer
1500 raise IndexingError("Too many indexers")
1501 elif is_list_like_indexer(key):
1502 if isinstance(key, ABCSeries):
1503 arr = key._values
1504 elif is_array_like(key):
1505 arr = key
1506 else:
1507 arr = np.array(key)
1508 len_axis = len(self.obj._get_axis(axis))
1510 # check that the key has a numeric dtype
1511 if not is_numeric_dtype(arr.dtype):
1512 raise IndexError(f".iloc requires numeric indexers, got {arr}")
1514 # check that the key does not exceed the maximum size of the index
1515 if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis):
1516 raise IndexError("positional indexers are out-of-bounds")
1517 else:
1518 raise ValueError(f"Can only index by location with a [{self._valid_types}]")
1520 def _has_valid_setitem_indexer(self, indexer) -> bool:
1521 """
1522 Validate that a positional indexer cannot enlarge its target
1523 will raise if needed, does not modify the indexer externally.
1525 Returns
1526 -------
1527 bool
1528 """
1529 if isinstance(indexer, dict):
1530 raise IndexError("iloc cannot enlarge its target object")
1532 if isinstance(indexer, ABCDataFrame):
1533 raise TypeError(
1534 "DataFrame indexer for .iloc is not supported. "
1535 "Consider using .loc with a DataFrame indexer for automatic alignment.",
1536 )
1538 if not isinstance(indexer, tuple):
1539 indexer = _tuplify(self.ndim, indexer)
1541 for ax, i in zip(self.obj.axes, indexer):
1542 if isinstance(i, slice):
1543 # should check the stop slice?
1544 pass
1545 elif is_list_like_indexer(i):
1546 # should check the elements?
1547 pass
1548 elif is_integer(i):
1549 if i >= len(ax):
1550 raise IndexError("iloc cannot enlarge its target object")
1551 elif isinstance(i, dict):
1552 raise IndexError("iloc cannot enlarge its target object")
1554 return True
1556 def _is_scalar_access(self, key: tuple) -> bool:
1557 """
1558 Returns
1559 -------
1560 bool
1561 """
1562 # this is a shortcut accessor to both .loc and .iloc
1563 # that provide the equivalent access of .at and .iat
1564 # a) avoid getting things via sections and (to minimize dtype changes)
1565 # b) provide a performant path
1566 if len(key) != self.ndim:
1567 return False
1569 return all(is_integer(k) for k in key)
1571 def _validate_integer(self, key: int, axis: AxisInt) -> None:
1572 """
1573 Check that 'key' is a valid position in the desired axis.
1575 Parameters
1576 ----------
1577 key : int
1578 Requested position.
1579 axis : int
1580 Desired axis.
1582 Raises
1583 ------
1584 IndexError
1585 If 'key' is not a valid position in axis 'axis'.
1586 """
1587 len_axis = len(self.obj._get_axis(axis))
1588 if key >= len_axis or key < -len_axis:
1589 raise IndexError("single positional indexer is out-of-bounds")
1591 # -------------------------------------------------------------------
1593 def _getitem_tuple(self, tup: tuple):
1594 tup = self._validate_tuple_indexer(tup)
1595 with suppress(IndexingError):
1596 return self._getitem_lowerdim(tup)
1598 return self._getitem_tuple_same_dim(tup)
1600 def _get_list_axis(self, key, axis: AxisInt):
1601 """
1602 Return Series values by list or array of integers.
1604 Parameters
1605 ----------
1606 key : list-like positional indexer
1607 axis : int
1609 Returns
1610 -------
1611 Series object
1613 Notes
1614 -----
1615 `axis` can only be zero.
1616 """
1617 try:
1618 return self.obj._take_with_is_copy(key, axis=axis)
1619 except IndexError as err:
1620 # re-raise with different error message
1621 raise IndexError("positional indexers are out-of-bounds") from err
1623 def _getitem_axis(self, key, axis: AxisInt):
1624 if key is Ellipsis:
1625 key = slice(None)
1626 elif isinstance(key, ABCDataFrame):
1627 raise IndexError(
1628 "DataFrame indexer is not allowed for .iloc\n"
1629 "Consider using .loc for automatic alignment."
1630 )
1632 if isinstance(key, slice):
1633 return self._get_slice_axis(key, axis=axis)
1635 if is_iterator(key):
1636 key = list(key)
1638 if isinstance(key, list):
1639 key = np.asarray(key)
1641 if com.is_bool_indexer(key):
1642 self._validate_key(key, axis)
1643 return self._getbool_axis(key, axis=axis)
1645 # a list of integers
1646 elif is_list_like_indexer(key):
1647 return self._get_list_axis(key, axis=axis)
1649 # a single integer
1650 else:
1651 key = item_from_zerodim(key)
1652 if not is_integer(key):
1653 raise TypeError("Cannot index by location index with a non-integer key")
1655 # validate the location
1656 self._validate_integer(key, axis)
1658 return self.obj._ixs(key, axis=axis)
1660 def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
1661 # caller is responsible for ensuring non-None axis
1662 obj = self.obj
1664 if not need_slice(slice_obj):
1665 return obj.copy(deep=False)
1667 labels = obj._get_axis(axis)
1668 labels._validate_positional_slice(slice_obj)
1669 return self.obj._slice(slice_obj, axis=axis)
1671 def _convert_to_indexer(self, key, axis: AxisInt):
1672 """
1673 Much simpler as we only have to deal with our valid types.
1674 """
1675 return key
1677 def _get_setitem_indexer(self, key):
1678 # GH#32257 Fall through to let numpy do validation
1679 if is_iterator(key):
1680 key = list(key)
1682 if self.axis is not None:
1683 key = _tupleize_axis_indexer(self.ndim, self.axis, key)
1685 return key
1687 # -------------------------------------------------------------------
1689 def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
1690 """
1691 _setitem_with_indexer is for setting values on a Series/DataFrame
1692 using positional indexers.
1694 If the relevant keys are not present, the Series/DataFrame may be
1695 expanded.
1697 This method is currently broken when dealing with non-unique Indexes,
1698 since it goes from positional indexers back to labels when calling
1699 BlockManager methods, see GH#12991, GH#22046, GH#15686.
1700 """
1701 info_axis = self.obj._info_axis_number
1703 # maybe partial set
1704 take_split_path = not self.obj._mgr.is_single_block
1706 if not take_split_path and isinstance(value, ABCDataFrame):
1707 # Avoid cast of values
1708 take_split_path = not value._mgr.is_single_block
1710 # if there is only one block/type, still have to take split path
1711 # unless the block is one-dimensional or it can hold the value
1712 if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
1713 # in case of dict, keys are indices
1714 val = list(value.values()) if isinstance(value, dict) else value
1715 arr = self.obj._mgr.arrays[0]
1716 take_split_path = not can_hold_element(
1717 arr, extract_array(val, extract_numpy=True)
1718 )
1720 # if we have any multi-indexes that have non-trivial slices
1721 # (not null slices) then we must take the split path, xref
1722 # GH 10360, GH 27841
1723 if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):
1724 for i, ax in zip(indexer, self.obj.axes):
1725 if isinstance(ax, MultiIndex) and not (
1726 is_integer(i) or com.is_null_slice(i)
1727 ):
1728 take_split_path = True
1729 break
1731 if isinstance(indexer, tuple):
1732 nindexer = []
1733 for i, idx in enumerate(indexer):
1734 if isinstance(idx, dict):
1735 # reindex the axis to the new value
1736 # and set inplace
1737 key, _ = convert_missing_indexer(idx)
1739 # if this is the items axes, then take the main missing
1740 # path first
1741 # this correctly sets the dtype and avoids cache issues
1742 # essentially this separates out the block that is needed
1743 # to possibly be modified
1744 if self.ndim > 1 and i == info_axis:
1745 # add the new item, and set the value
1746 # must have all defined axes if we have a scalar
1747 # or a list-like on the non-info axes if we have a
1748 # list-like
1749 if not len(self.obj):
1750 if not is_list_like_indexer(value):
1751 raise ValueError(
1752 "cannot set a frame with no "
1753 "defined index and a scalar"
1754 )
1755 self.obj[key] = value
1756 return
1758 # add a new item with the dtype setup
1759 if com.is_null_slice(indexer[0]):
1760 # We are setting an entire column
1761 self.obj[key] = value
1762 return
1763 elif is_array_like(value):
1764 # GH#42099
1765 arr = extract_array(value, extract_numpy=True)
1766 taker = -1 * np.ones(len(self.obj), dtype=np.intp)
1767 empty_value = algos.take_nd(arr, taker)
1768 if not isinstance(value, ABCSeries):
1769 # if not Series (in which case we need to align),
1770 # we can short-circuit
1771 if (
1772 isinstance(arr, np.ndarray)
1773 and arr.ndim == 1
1774 and len(arr) == 1
1775 ):
1776 # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615
1777 arr = arr[0, ...]
1778 empty_value[indexer[0]] = arr
1779 self.obj[key] = empty_value
1780 return
1782 self.obj[key] = empty_value
1784 else:
1785 # FIXME: GH#42099#issuecomment-864326014
1786 self.obj[key] = infer_fill_value(value)
1788 new_indexer = convert_from_missing_indexer_tuple(
1789 indexer, self.obj.axes
1790 )
1791 self._setitem_with_indexer(new_indexer, value, name)
1793 return
1795 # reindex the axis
1796 # make sure to clear the cache because we are
1797 # just replacing the block manager here
1798 # so the object is the same
1799 index = self.obj._get_axis(i)
1800 labels = index.insert(len(index), key)
1802 # We are expanding the Series/DataFrame values to match
1803 # the length of thenew index `labels`. GH#40096 ensure
1804 # this is valid even if the index has duplicates.
1805 taker = np.arange(len(index) + 1, dtype=np.intp)
1806 taker[-1] = -1
1807 reindexers = {i: (labels, taker)}
1808 new_obj = self.obj._reindex_with_indexers(
1809 reindexers, allow_dups=True
1810 )
1811 self.obj._mgr = new_obj._mgr
1812 self.obj._maybe_update_cacher(clear=True)
1813 self.obj._is_copy = None
1815 nindexer.append(labels.get_loc(key))
1817 else:
1818 nindexer.append(idx)
1820 indexer = tuple(nindexer)
1821 else:
1822 indexer, missing = convert_missing_indexer(indexer)
1824 if missing:
1825 self._setitem_with_indexer_missing(indexer, value)
1826 return
1828 if name == "loc":
1829 # must come after setting of missing
1830 indexer, value = self._maybe_mask_setitem_value(indexer, value)
1832 # align and set the values
1833 if take_split_path:
1834 # We have to operate column-wise
1835 self._setitem_with_indexer_split_path(indexer, value, name)
1836 else:
1837 self._setitem_single_block(indexer, value, name)
1839 def _setitem_with_indexer_split_path(self, indexer, value, name: str):
1840 """
1841 Setitem column-wise.
1842 """
1843 # Above we only set take_split_path to True for 2D cases
1844 assert self.ndim == 2
1846 if not isinstance(indexer, tuple):
1847 indexer = _tuplify(self.ndim, indexer)
1848 if len(indexer) > self.ndim:
1849 raise IndexError("too many indices for array")
1850 if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2:
1851 raise ValueError(r"Cannot set values with ndim > 2")
1853 if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
1854 from pandas import Series
1856 value = self._align_series(indexer, Series(value))
1858 # Ensure we have something we can iterate over
1859 info_axis = indexer[1]
1860 ilocs = self._ensure_iterable_column_indexer(info_axis)
1862 pi = indexer[0]
1863 lplane_indexer = length_of_indexer(pi, self.obj.index)
1864 # lplane_indexer gives the expected length of obj[indexer[0]]
1866 # we need an iterable, with a ndim of at least 1
1867 # eg. don't pass through np.array(0)
1868 if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:
1869 if isinstance(value, ABCDataFrame):
1870 self._setitem_with_indexer_frame_value(indexer, value, name)
1872 elif np.ndim(value) == 2:
1873 # TODO: avoid np.ndim call in case it isn't an ndarray, since
1874 # that will construct an ndarray, which will be wasteful
1875 self._setitem_with_indexer_2d_value(indexer, value)
1877 elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi):
1878 # We are setting multiple rows in a single column.
1879 self._setitem_single_column(ilocs[0], value, pi)
1881 elif len(ilocs) == 1 and 0 != lplane_indexer != len(value):
1882 # We are trying to set N values into M entries of a single
1883 # column, which is invalid for N != M
1884 # Exclude zero-len for e.g. boolean masking that is all-false
1886 if len(value) == 1 and not is_integer(info_axis):
1887 # This is a case like df.iloc[:3, [1]] = [0]
1888 # where we treat as df.iloc[:3, 1] = 0
1889 return self._setitem_with_indexer((pi, info_axis[0]), value[0])
1891 raise ValueError(
1892 "Must have equal len keys and value "
1893 "when setting with an iterable"
1894 )
1896 elif lplane_indexer == 0 and len(value) == len(self.obj.index):
1897 # We get here in one case via .loc with a all-False mask
1898 pass
1900 elif self._is_scalar_access(indexer) and is_object_dtype(
1901 self.obj.dtypes[ilocs[0]]
1902 ):
1903 # We are setting nested data, only possible for object dtype data
1904 self._setitem_single_column(indexer[1], value, pi)
1906 elif len(ilocs) == len(value):
1907 # We are setting multiple columns in a single row.
1908 for loc, v in zip(ilocs, value):
1909 self._setitem_single_column(loc, v, pi)
1911 elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:
1912 # This is a setitem-with-expansion, see
1913 # test_loc_setitem_empty_append_expands_rows_mixed_dtype
1914 # e.g. df = DataFrame(columns=["x", "y"])
1915 # df["x"] = df["x"].astype(np.int64)
1916 # df.loc[:, "x"] = [1, 2, 3]
1917 self._setitem_single_column(ilocs[0], value, pi)
1919 else:
1920 raise ValueError(
1921 "Must have equal len keys and value "
1922 "when setting with an iterable"
1923 )
1925 else:
1926 # scalar value
1927 for loc in ilocs:
1928 self._setitem_single_column(loc, value, pi)
1930 def _setitem_with_indexer_2d_value(self, indexer, value):
1931 # We get here with np.ndim(value) == 2, excluding DataFrame,
1932 # which goes through _setitem_with_indexer_frame_value
1933 pi = indexer[0]
1935 ilocs = self._ensure_iterable_column_indexer(indexer[1])
1937 if not is_array_like(value):
1938 # cast lists to array
1939 value = np.array(value, dtype=object)
1940 if len(ilocs) != value.shape[1]:
1941 raise ValueError(
1942 "Must have equal len keys and value when setting with an ndarray"
1943 )
1945 for i, loc in enumerate(ilocs):
1946 value_col = value[:, i]
1947 if is_object_dtype(value_col.dtype):
1948 # casting to list so that we do type inference in setitem_single_column
1949 value_col = value_col.tolist()
1950 self._setitem_single_column(loc, value_col, pi)
1952 def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str):
1953 ilocs = self._ensure_iterable_column_indexer(indexer[1])
1955 sub_indexer = list(indexer)
1956 pi = indexer[0]
1958 multiindex_indexer = isinstance(self.obj.columns, MultiIndex)
1960 unique_cols = value.columns.is_unique
1962 # We do not want to align the value in case of iloc GH#37728
1963 if name == "iloc":
1964 for i, loc in enumerate(ilocs):
1965 val = value.iloc[:, i]
1966 self._setitem_single_column(loc, val, pi)
1968 elif not unique_cols and value.columns.equals(self.obj.columns):
1969 # We assume we are already aligned, see
1970 # test_iloc_setitem_frame_duplicate_columns_multiple_blocks
1971 for loc in ilocs:
1972 item = self.obj.columns[loc]
1973 if item in value:
1974 sub_indexer[1] = item
1975 val = self._align_series(
1976 tuple(sub_indexer),
1977 value.iloc[:, loc],
1978 multiindex_indexer,
1979 )
1980 else:
1981 val = np.nan
1983 self._setitem_single_column(loc, val, pi)
1985 elif not unique_cols:
1986 raise ValueError("Setting with non-unique columns is not allowed.")
1988 else:
1989 for loc in ilocs:
1990 item = self.obj.columns[loc]
1991 if item in value:
1992 sub_indexer[1] = item
1993 val = self._align_series(
1994 tuple(sub_indexer), value[item], multiindex_indexer
1995 )
1996 else:
1997 val = np.nan
1999 self._setitem_single_column(loc, val, pi)
2001 def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
2002 """
2004 Parameters
2005 ----------
2006 loc : int
2007 Indexer for column position
2008 plane_indexer : int, slice, listlike[int]
2009 The indexer we use for setitem along axis=0.
2010 """
2011 pi = plane_indexer
2013 is_full_setter = com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj))
2015 is_null_setter = com.is_empty_slice(pi) or is_array_like(pi) and len(pi) == 0
2017 if is_null_setter:
2018 # no-op, don't cast dtype later
2019 return
2021 elif is_full_setter:
2022 try:
2023 self.obj._mgr.column_setitem(
2024 loc, plane_indexer, value, inplace_only=True
2025 )
2026 except (ValueError, TypeError, LossySetitemError):
2027 # If we're setting an entire column and we can't do it inplace,
2028 # then we can use value's dtype (or inferred dtype)
2029 # instead of object
2030 self.obj.isetitem(loc, value)
2031 else:
2032 # set value into the column (first attempting to operate inplace, then
2033 # falling back to casting if necessary)
2034 self.obj._mgr.column_setitem(loc, plane_indexer, value)
2036 self.obj._clear_item_cache()
2038 def _setitem_single_block(self, indexer, value, name: str) -> None:
2039 """
2040 _setitem_with_indexer for the case when we have a single Block.
2041 """
2042 from pandas import Series
2044 info_axis = self.obj._info_axis_number
2045 item_labels = self.obj._get_axis(info_axis)
2046 if isinstance(indexer, tuple):
2047 # if we are setting on the info axis ONLY
2048 # set using those methods to avoid block-splitting
2049 # logic here
2050 if (
2051 self.ndim == len(indexer) == 2
2052 and is_integer(indexer[1])
2053 and com.is_null_slice(indexer[0])
2054 ):
2055 col = item_labels[indexer[info_axis]]
2056 if len(item_labels.get_indexer_for([col])) == 1:
2057 # e.g. test_loc_setitem_empty_append_expands_rows
2058 loc = item_labels.get_loc(col)
2059 self._setitem_single_column(loc, value, indexer[0])
2060 return
2062 indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align
2064 if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
2065 # TODO(EA): ExtensionBlock.setitem this causes issues with
2066 # setting for extensionarrays that store dicts. Need to decide
2067 # if it's worth supporting that.
2068 value = self._align_series(indexer, Series(value))
2070 elif isinstance(value, ABCDataFrame) and name != "iloc":
2071 value = self._align_frame(indexer, value)._values
2073 # check for chained assignment
2074 self.obj._check_is_chained_assignment_possible()
2076 # actually do the set
2077 self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
2078 self.obj._maybe_update_cacher(clear=True, inplace=True)
2080 def _setitem_with_indexer_missing(self, indexer, value):
2081 """
2082 Insert new row(s) or column(s) into the Series or DataFrame.
2083 """
2084 from pandas import Series
2086 # reindex the axis to the new value
2087 # and set inplace
2088 if self.ndim == 1:
2089 index = self.obj.index
2090 new_index = index.insert(len(index), indexer)
2092 # we have a coerced indexer, e.g. a float
2093 # that matches in an int64 Index, so
2094 # we will not create a duplicate index, rather
2095 # index to that element
2096 # e.g. 0.0 -> 0
2097 # GH#12246
2098 if index.is_unique:
2099 # pass new_index[-1:] instead if [new_index[-1]]
2100 # so that we retain dtype
2101 new_indexer = index.get_indexer(new_index[-1:])
2102 if (new_indexer != -1).any():
2103 # We get only here with loc, so can hard code
2104 return self._setitem_with_indexer(new_indexer, value, "loc")
2106 # this preserves dtype of the value and of the object
2107 if not is_scalar(value):
2108 new_dtype = None
2110 elif is_valid_na_for_dtype(value, self.obj.dtype):
2111 if not is_object_dtype(self.obj.dtype):
2112 # Every NA value is suitable for object, no conversion needed
2113 value = na_value_for_dtype(self.obj.dtype, compat=False)
2115 new_dtype = maybe_promote(self.obj.dtype, value)[0]
2117 elif isna(value):
2118 new_dtype = None
2119 elif not self.obj.empty and not is_object_dtype(self.obj.dtype):
2120 # We should not cast, if we have object dtype because we can
2121 # set timedeltas into object series
2122 curr_dtype = self.obj.dtype
2123 curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype)
2124 new_dtype = maybe_promote(curr_dtype, value)[0]
2125 else:
2126 new_dtype = None
2128 new_values = Series([value], dtype=new_dtype)._values
2130 if len(self.obj._values):
2131 # GH#22717 handle casting compatibility that np.concatenate
2132 # does incorrectly
2133 new_values = concat_compat([self.obj._values, new_values])
2134 self.obj._mgr = self.obj._constructor(
2135 new_values, index=new_index, name=self.obj.name
2136 )._mgr
2137 self.obj._maybe_update_cacher(clear=True)
2139 elif self.ndim == 2:
2140 if not len(self.obj.columns):
2141 # no columns and scalar
2142 raise ValueError("cannot set a frame with no defined columns")
2144 has_dtype = hasattr(value, "dtype")
2145 if isinstance(value, ABCSeries):
2146 # append a Series
2147 value = value.reindex(index=self.obj.columns, copy=True)
2148 value.name = indexer
2149 elif isinstance(value, dict):
2150 value = Series(
2151 value, index=self.obj.columns, name=indexer, dtype=object
2152 )
2153 else:
2154 # a list-list
2155 if is_list_like_indexer(value):
2156 # must have conforming columns
2157 if len(value) != len(self.obj.columns):
2158 raise ValueError("cannot set a row with mismatched columns")
2160 value = Series(value, index=self.obj.columns, name=indexer)
2162 if not len(self.obj):
2163 # We will ignore the existing dtypes instead of using
2164 # internals.concat logic
2165 df = value.to_frame().T
2167 idx = self.obj.index
2168 if isinstance(idx, MultiIndex):
2169 name = idx.names
2170 else:
2171 name = idx.name
2173 df.index = Index([indexer], name=name)
2174 if not has_dtype:
2175 # i.e. if we already had a Series or ndarray, keep that
2176 # dtype. But if we had a list or dict, then do inference
2177 df = df.infer_objects(copy=False)
2178 self.obj._mgr = df._mgr
2179 else:
2180 self.obj._mgr = self.obj._append(value)._mgr
2181 self.obj._maybe_update_cacher(clear=True)
2183 def _ensure_iterable_column_indexer(self, column_indexer):
2184 """
2185 Ensure that our column indexer is something that can be iterated over.
2186 """
2187 ilocs: Sequence[int] | np.ndarray
2188 if is_integer(column_indexer):
2189 ilocs = [column_indexer]
2190 elif isinstance(column_indexer, slice):
2191 ilocs = np.arange(len(self.obj.columns))[column_indexer]
2192 elif isinstance(column_indexer, np.ndarray) and is_bool_dtype(
2193 column_indexer.dtype
2194 ):
2195 ilocs = np.arange(len(column_indexer))[column_indexer]
2196 else:
2197 ilocs = column_indexer
2198 return ilocs
2200 def _align_series(self, indexer, ser: Series, multiindex_indexer: bool = False):
2201 """
2202 Parameters
2203 ----------
2204 indexer : tuple, slice, scalar
2205 Indexer used to get the locations that will be set to `ser`.
2206 ser : pd.Series
2207 Values to assign to the locations specified by `indexer`.
2208 multiindex_indexer : bool, optional
2209 Defaults to False. Should be set to True if `indexer` was from
2210 a `pd.MultiIndex`, to avoid unnecessary broadcasting.
2212 Returns
2213 -------
2214 `np.array` of `ser` broadcast to the appropriate shape for assignment
2215 to the locations selected by `indexer`
2216 """
2217 if isinstance(indexer, (slice, np.ndarray, list, Index)):
2218 indexer = (indexer,)
2220 if isinstance(indexer, tuple):
2221 # flatten np.ndarray indexers
2222 def ravel(i):
2223 return i.ravel() if isinstance(i, np.ndarray) else i
2225 indexer = tuple(map(ravel, indexer))
2227 aligners = [not com.is_null_slice(idx) for idx in indexer]
2228 sum_aligners = sum(aligners)
2229 single_aligner = sum_aligners == 1
2230 is_frame = self.ndim == 2
2231 obj = self.obj
2233 # are we a single alignable value on a non-primary
2234 # dim (e.g. panel: 1,2, or frame: 0) ?
2235 # hence need to align to a single axis dimension
2236 # rather that find all valid dims
2238 # frame
2239 if is_frame:
2240 single_aligner = single_aligner and aligners[0]
2242 # we have a frame, with multiple indexers on both axes; and a
2243 # series, so need to broadcast (see GH5206)
2244 if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer):
2245 ser_values = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values
2247 # single indexer
2248 if len(indexer) > 1 and not multiindex_indexer:
2249 len_indexer = len(indexer[1])
2250 ser_values = (
2251 np.tile(ser_values, len_indexer).reshape(len_indexer, -1).T
2252 )
2254 return ser_values
2256 for i, idx in enumerate(indexer):
2257 ax = obj.axes[i]
2259 # multiple aligners (or null slices)
2260 if is_sequence(idx) or isinstance(idx, slice):
2261 if single_aligner and com.is_null_slice(idx):
2262 continue
2263 new_ix = ax[idx]
2264 if not is_list_like_indexer(new_ix):
2265 new_ix = Index([new_ix])
2266 else:
2267 new_ix = Index(new_ix)
2268 if ser.index.equals(new_ix) or not len(new_ix):
2269 return ser._values.copy()
2271 return ser.reindex(new_ix)._values
2273 # 2 dims
2274 elif single_aligner:
2275 # reindex along index
2276 ax = self.obj.axes[1]
2277 if ser.index.equals(ax) or not len(ax):
2278 return ser._values.copy()
2279 return ser.reindex(ax)._values
2281 elif is_integer(indexer) and self.ndim == 1:
2282 if is_object_dtype(self.obj):
2283 return ser
2284 ax = self.obj._get_axis(0)
2286 if ser.index.equals(ax):
2287 return ser._values.copy()
2289 return ser.reindex(ax)._values[indexer]
2291 elif is_integer(indexer):
2292 ax = self.obj._get_axis(1)
2294 if ser.index.equals(ax):
2295 return ser._values.copy()
2297 return ser.reindex(ax)._values
2299 raise ValueError("Incompatible indexer with Series")
2301 def _align_frame(self, indexer, df: DataFrame) -> DataFrame:
2302 is_frame = self.ndim == 2
2304 if isinstance(indexer, tuple):
2305 idx, cols = None, None
2306 sindexers = []
2307 for i, ix in enumerate(indexer):
2308 ax = self.obj.axes[i]
2309 if is_sequence(ix) or isinstance(ix, slice):
2310 if isinstance(ix, np.ndarray):
2311 ix = ix.ravel()
2312 if idx is None:
2313 idx = ax[ix]
2314 elif cols is None:
2315 cols = ax[ix]
2316 else:
2317 break
2318 else:
2319 sindexers.append(i)
2321 if idx is not None and cols is not None:
2322 if df.index.equals(idx) and df.columns.equals(cols):
2323 val = df.copy()
2324 else:
2325 val = df.reindex(idx, columns=cols)
2326 return val
2328 elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame:
2329 ax = self.obj.index[indexer]
2330 if df.index.equals(ax):
2331 val = df.copy()
2332 else:
2333 # we have a multi-index and are trying to align
2334 # with a particular, level GH3738
2335 if (
2336 isinstance(ax, MultiIndex)
2337 and isinstance(df.index, MultiIndex)
2338 and ax.nlevels != df.index.nlevels
2339 ):
2340 raise TypeError(
2341 "cannot align on a multi-index with out "
2342 "specifying the join levels"
2343 )
2345 val = df.reindex(index=ax)
2346 return val
2348 raise ValueError("Incompatible indexer with DataFrame")
2351class _ScalarAccessIndexer(NDFrameIndexerBase):
2352 """
2353 Access scalars quickly.
2354 """
2356 # sub-classes need to set _takeable
2357 _takeable: bool
2359 def _convert_key(self, key):
2360 raise AbstractMethodError(self)
2362 def __getitem__(self, key):
2363 if not isinstance(key, tuple):
2364 # we could have a convertible item here (e.g. Timestamp)
2365 if not is_list_like_indexer(key):
2366 key = (key,)
2367 else:
2368 raise ValueError("Invalid call for scalar access (getting)!")
2370 key = self._convert_key(key)
2371 return self.obj._get_value(*key, takeable=self._takeable)
2373 def __setitem__(self, key, value) -> None:
2374 if isinstance(key, tuple):
2375 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
2376 else:
2377 # scalar callable may return tuple
2378 key = com.apply_if_callable(key, self.obj)
2380 if not isinstance(key, tuple):
2381 key = _tuplify(self.ndim, key)
2382 key = list(self._convert_key(key))
2383 if len(key) != self.ndim:
2384 raise ValueError("Not enough indexers for scalar access (setting)!")
2386 self.obj._set_value(*key, value=value, takeable=self._takeable)
2389@doc(IndexingMixin.at)
2390class _AtIndexer(_ScalarAccessIndexer):
2391 _takeable = False
2393 def _convert_key(self, key):
2394 """
2395 Require they keys to be the same type as the index. (so we don't
2396 fallback)
2397 """
2398 # GH 26989
2399 # For series, unpacking key needs to result in the label.
2400 # This is already the case for len(key) == 1; e.g. (1,)
2401 if self.ndim == 1 and len(key) > 1:
2402 key = (key,)
2404 return key
2406 @property
2407 def _axes_are_unique(self) -> bool:
2408 # Only relevant for self.ndim == 2
2409 assert self.ndim == 2
2410 return self.obj.index.is_unique and self.obj.columns.is_unique
2412 def __getitem__(self, key):
2413 if self.ndim == 2 and not self._axes_are_unique:
2414 # GH#33041 fall back to .loc
2415 if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
2416 raise ValueError("Invalid call for scalar access (getting)!")
2417 return self.obj.loc[key]
2419 return super().__getitem__(key)
2421 def __setitem__(self, key, value):
2422 if self.ndim == 2 and not self._axes_are_unique:
2423 # GH#33041 fall back to .loc
2424 if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
2425 raise ValueError("Invalid call for scalar access (setting)!")
2427 self.obj.loc[key] = value
2428 return
2430 return super().__setitem__(key, value)
2433@doc(IndexingMixin.iat)
2434class _iAtIndexer(_ScalarAccessIndexer):
2435 _takeable = True
2437 def _convert_key(self, key):
2438 """
2439 Require integer args. (and convert to label arguments)
2440 """
2441 for i in key:
2442 if not is_integer(i):
2443 raise ValueError("iAt based indexing can only have integer indexers")
2444 return key
2447def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]:
2448 """
2449 Given an indexer for the first dimension, create an equivalent tuple
2450 for indexing over all dimensions.
2452 Parameters
2453 ----------
2454 ndim : int
2455 loc : object
2457 Returns
2458 -------
2459 tuple
2460 """
2461 _tup: list[Hashable | slice]
2462 _tup = [slice(None, None) for _ in range(ndim)]
2463 _tup[0] = loc
2464 return tuple(_tup)
2467def _tupleize_axis_indexer(ndim: int, axis: AxisInt, key) -> tuple:
2468 """
2469 If we have an axis, adapt the given key to be axis-independent.
2470 """
2471 new_key = [slice(None)] * ndim
2472 new_key[axis] = key
2473 return tuple(new_key)
2476def check_bool_indexer(index: Index, key) -> np.ndarray:
2477 """
2478 Check if key is a valid boolean indexer for an object with such index and
2479 perform reindexing or conversion if needed.
2481 This function assumes that is_bool_indexer(key) == True.
2483 Parameters
2484 ----------
2485 index : Index
2486 Index of the object on which the indexing is done.
2487 key : list-like
2488 Boolean indexer to check.
2490 Returns
2491 -------
2492 np.array
2493 Resulting key.
2495 Raises
2496 ------
2497 IndexError
2498 If the key does not have the same length as index.
2499 IndexingError
2500 If the index of the key is unalignable to index.
2501 """
2502 result = key
2503 if isinstance(key, ABCSeries) and not key.index.equals(index):
2504 indexer = result.index.get_indexer_for(index)
2505 if -1 in indexer:
2506 raise IndexingError(
2507 "Unalignable boolean Series provided as "
2508 "indexer (index of the boolean Series and of "
2509 "the indexed object do not match)."
2510 )
2512 result = result.take(indexer)
2514 # fall through for boolean
2515 if not is_extension_array_dtype(result.dtype):
2516 return result.astype(bool)._values
2518 if is_object_dtype(key):
2519 # key might be object-dtype bool, check_array_indexer needs bool array
2520 result = np.asarray(result, dtype=bool)
2521 elif not is_array_like(result):
2522 # GH 33924
2523 # key may contain nan elements, check_array_indexer needs bool array
2524 result = pd_array(result, dtype=bool)
2525 return check_array_indexer(index, result)
2528def convert_missing_indexer(indexer):
2529 """
2530 Reverse convert a missing indexer, which is a dict
2531 return the scalar indexer and a boolean indicating if we converted
2532 """
2533 if isinstance(indexer, dict):
2534 # a missing key (but not a tuple indexer)
2535 indexer = indexer["key"]
2537 if isinstance(indexer, bool):
2538 raise KeyError("cannot use a single bool to index into setitem")
2539 return indexer, True
2541 return indexer, False
2544def convert_from_missing_indexer_tuple(indexer, axes):
2545 """
2546 Create a filtered indexer that doesn't have any missing indexers.
2547 """
2549 def get_indexer(_i, _idx):
2550 return axes[_i].get_loc(_idx["key"]) if isinstance(_idx, dict) else _idx
2552 return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer))
2555def maybe_convert_ix(*args):
2556 """
2557 We likely want to take the cross-product.
2558 """
2559 for arg in args:
2560 if not isinstance(arg, (np.ndarray, list, ABCSeries, Index)):
2561 return args
2562 return np.ix_(*args)
2565def is_nested_tuple(tup, labels) -> bool:
2566 """
2567 Returns
2568 -------
2569 bool
2570 """
2571 # check for a compatible nested tuple and multiindexes among the axes
2572 if not isinstance(tup, tuple):
2573 return False
2575 for k in tup:
2576 if is_list_like(k) or isinstance(k, slice):
2577 return isinstance(labels, MultiIndex)
2579 return False
2582def is_label_like(key) -> bool:
2583 """
2584 Returns
2585 -------
2586 bool
2587 """
2588 # select a label or row
2589 return (
2590 not isinstance(key, slice)
2591 and not is_list_like_indexer(key)
2592 and key is not Ellipsis
2593 )
2596def need_slice(obj: slice) -> bool:
2597 """
2598 Returns
2599 -------
2600 bool
2601 """
2602 return (
2603 obj.start is not None
2604 or obj.stop is not None
2605 or (obj.step is not None and obj.step != 1)
2606 )
2609def check_dict_or_set_indexers(key) -> None:
2610 """
2611 Check if the indexer is or contains a dict or set, which is no longer allowed.
2612 """
2613 if (
2614 isinstance(key, set)
2615 or isinstance(key, tuple)
2616 and any(isinstance(x, set) for x in key)
2617 ):
2618 raise TypeError(
2619 "Passing a set as an indexer is not supported. Use a list instead."
2620 )
2622 if (
2623 isinstance(key, dict)
2624 or isinstance(key, tuple)
2625 and any(isinstance(x, dict) for x in key)
2626 ):
2627 raise TypeError(
2628 "Passing a dict as an indexer is not supported. Use a list instead."
2629 )