1from __future__ import annotations
2
3from contextlib import suppress
4import sys
5from typing import (
6 TYPE_CHECKING,
7 Hashable,
8 Sequence,
9 TypeVar,
10 cast,
11 final,
12)
13import warnings
14
15import numpy as np
16
17from pandas._config import using_copy_on_write
18
19from pandas._libs.indexing import NDFrameIndexerBase
20from pandas._libs.lib import item_from_zerodim
21from pandas._typing import (
22 Axis,
23 AxisInt,
24)
25from pandas.compat import PYPY
26from pandas.errors import (
27 AbstractMethodError,
28 ChainedAssignmentError,
29 IndexingError,
30 InvalidIndexError,
31 LossySetitemError,
32 _chained_assignment_msg,
33)
34from pandas.util._decorators import doc
35
36from pandas.core.dtypes.cast import (
37 can_hold_element,
38 maybe_promote,
39)
40from pandas.core.dtypes.common import (
41 is_array_like,
42 is_bool_dtype,
43 is_extension_array_dtype,
44 is_hashable,
45 is_integer,
46 is_iterator,
47 is_list_like,
48 is_numeric_dtype,
49 is_object_dtype,
50 is_scalar,
51 is_sequence,
52)
53from pandas.core.dtypes.concat import concat_compat
54from pandas.core.dtypes.generic import (
55 ABCDataFrame,
56 ABCSeries,
57)
58from pandas.core.dtypes.missing import (
59 infer_fill_value,
60 is_valid_na_for_dtype,
61 isna,
62 na_value_for_dtype,
63)
64
65from pandas.core import algorithms as algos
66import pandas.core.common as com
67from pandas.core.construction import (
68 array as pd_array,
69 extract_array,
70)
71from pandas.core.indexers import (
72 check_array_indexer,
73 is_list_like_indexer,
74 is_scalar_indexer,
75 length_of_indexer,
76)
77from pandas.core.indexes.api import (
78 Index,
79 MultiIndex,
80)
81
82if TYPE_CHECKING:
83 from pandas import (
84 DataFrame,
85 Series,
86 )
87
88_LocationIndexerT = TypeVar("_LocationIndexerT", bound="_LocationIndexer")
89
90# "null slice"
91_NS = slice(None, None)
92_one_ellipsis_message = "indexer may only contain one '...' entry"
93
94
95# the public IndexSlicerMaker
96class _IndexSlice:
97 """
98 Create an object to more easily perform multi-index slicing.
99
100 See Also
101 --------
102 MultiIndex.remove_unused_levels : New MultiIndex with no unused levels.
103
104 Notes
105 -----
106 See :ref:`Defined Levels <advanced.shown_levels>`
107 for further info on slicing a MultiIndex.
108
109 Examples
110 --------
111 >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']])
112 >>> columns = ['foo', 'bar']
113 >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))),
114 ... index=midx, columns=columns)
115
116 Using the default slice command:
117
118 >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :]
119 foo bar
120 A0 B0 0 1
121 B1 2 3
122 A1 B0 8 9
123 B1 10 11
124
125 Using the IndexSlice class for a more intuitive command:
126
127 >>> idx = pd.IndexSlice
128 >>> dfmi.loc[idx[:, 'B0':'B1'], :]
129 foo bar
130 A0 B0 0 1
131 B1 2 3
132 A1 B0 8 9
133 B1 10 11
134 """
135
136 def __getitem__(self, arg):
137 return arg
138
139
140IndexSlice = _IndexSlice()
141
142
143class IndexingMixin:
144 """
145 Mixin for adding .loc/.iloc/.at/.iat to Dataframes and Series.
146 """
147
148 @property
149 def iloc(self) -> _iLocIndexer:
150 """
151 Purely integer-location based indexing for selection by position.
152
153 ``.iloc[]`` is primarily integer position based (from ``0`` to
154 ``length-1`` of the axis), but may also be used with a boolean
155 array.
156
157 Allowed inputs are:
158
159 - An integer, e.g. ``5``.
160 - A list or array of integers, e.g. ``[4, 3, 0]``.
161 - A slice object with ints, e.g. ``1:7``.
162 - A boolean array.
163 - A ``callable`` function with one argument (the calling Series or
164 DataFrame) and that returns valid output for indexing (one of the above).
165 This is useful in method chains, when you don't have a reference to the
166 calling object, but would like to base your selection on some value.
167 - A tuple of row and column indexes. The tuple elements consist of one of the
168 above inputs, e.g. ``(0, 1)``.
169
170 ``.iloc`` will raise ``IndexError`` if a requested indexer is
171 out-of-bounds, except *slice* indexers which allow out-of-bounds
172 indexing (this conforms with python/numpy *slice* semantics).
173
174 See more at :ref:`Selection by Position <indexing.integer>`.
175
176 See Also
177 --------
178 DataFrame.iat : Fast integer location scalar accessor.
179 DataFrame.loc : Purely label-location based indexer for selection by label.
180 Series.iloc : Purely integer-location based indexing for
181 selection by position.
182
183 Examples
184 --------
185 >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
186 ... {'a': 100, 'b': 200, 'c': 300, 'd': 400},
187 ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }]
188 >>> df = pd.DataFrame(mydict)
189 >>> df
190 a b c d
191 0 1 2 3 4
192 1 100 200 300 400
193 2 1000 2000 3000 4000
194
195 **Indexing just the rows**
196
197 With a scalar integer.
198
199 >>> type(df.iloc[0])
200 <class 'pandas.core.series.Series'>
201 >>> df.iloc[0]
202 a 1
203 b 2
204 c 3
205 d 4
206 Name: 0, dtype: int64
207
208 With a list of integers.
209
210 >>> df.iloc[[0]]
211 a b c d
212 0 1 2 3 4
213 >>> type(df.iloc[[0]])
214 <class 'pandas.core.frame.DataFrame'>
215
216 >>> df.iloc[[0, 1]]
217 a b c d
218 0 1 2 3 4
219 1 100 200 300 400
220
221 With a `slice` object.
222
223 >>> df.iloc[:3]
224 a b c d
225 0 1 2 3 4
226 1 100 200 300 400
227 2 1000 2000 3000 4000
228
229 With a boolean mask the same length as the index.
230
231 >>> df.iloc[[True, False, True]]
232 a b c d
233 0 1 2 3 4
234 2 1000 2000 3000 4000
235
236 With a callable, useful in method chains. The `x` passed
237 to the ``lambda`` is the DataFrame being sliced. This selects
238 the rows whose index label even.
239
240 >>> df.iloc[lambda x: x.index % 2 == 0]
241 a b c d
242 0 1 2 3 4
243 2 1000 2000 3000 4000
244
245 **Indexing both axes**
246
247 You can mix the indexer types for the index and columns. Use ``:`` to
248 select the entire axis.
249
250 With scalar integers.
251
252 >>> df.iloc[0, 1]
253 2
254
255 With lists of integers.
256
257 >>> df.iloc[[0, 2], [1, 3]]
258 b d
259 0 2 4
260 2 2000 4000
261
262 With `slice` objects.
263
264 >>> df.iloc[1:3, 0:3]
265 a b c
266 1 100 200 300
267 2 1000 2000 3000
268
269 With a boolean array whose length matches the columns.
270
271 >>> df.iloc[:, [True, False, True, False]]
272 a c
273 0 1 3
274 1 100 300
275 2 1000 3000
276
277 With a callable function that expects the Series or DataFrame.
278
279 >>> df.iloc[:, lambda df: [0, 2]]
280 a c
281 0 1 3
282 1 100 300
283 2 1000 3000
284 """
285 return _iLocIndexer("iloc", self)
286
287 @property
288 def loc(self) -> _LocIndexer:
289 """
290 Access a group of rows and columns by label(s) or a boolean array.
291
292 ``.loc[]`` is primarily label based, but may also be used with a
293 boolean array.
294
295 Allowed inputs are:
296
297 - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
298 interpreted as a *label* of the index, and **never** as an
299 integer position along the index).
300 - A list or array of labels, e.g. ``['a', 'b', 'c']``.
301 - A slice object with labels, e.g. ``'a':'f'``.
302
303 .. warning:: Note that contrary to usual python slices, **both** the
304 start and the stop are included
305
306 - A boolean array of the same length as the axis being sliced,
307 e.g. ``[True, False, True]``.
308 - An alignable boolean Series. The index of the key will be aligned before
309 masking.
310 - An alignable Index. The Index of the returned selection will be the input.
311 - A ``callable`` function with one argument (the calling Series or
312 DataFrame) and that returns valid output for indexing (one of the above)
313
314 See more at :ref:`Selection by Label <indexing.label>`.
315
316 Raises
317 ------
318 KeyError
319 If any items are not found.
320 IndexingError
321 If an indexed key is passed and its index is unalignable to the frame index.
322
323 See Also
324 --------
325 DataFrame.at : Access a single value for a row/column label pair.
326 DataFrame.iloc : Access group of rows and columns by integer position(s).
327 DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the
328 Series/DataFrame.
329 Series.loc : Access group of values using labels.
330
331 Examples
332 --------
333 **Getting values**
334
335 >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
336 ... index=['cobra', 'viper', 'sidewinder'],
337 ... columns=['max_speed', 'shield'])
338 >>> df
339 max_speed shield
340 cobra 1 2
341 viper 4 5
342 sidewinder 7 8
343
344 Single label. Note this returns the row as a Series.
345
346 >>> df.loc['viper']
347 max_speed 4
348 shield 5
349 Name: viper, dtype: int64
350
351 List of labels. Note using ``[[]]`` returns a DataFrame.
352
353 >>> df.loc[['viper', 'sidewinder']]
354 max_speed shield
355 viper 4 5
356 sidewinder 7 8
357
358 Single label for row and column
359
360 >>> df.loc['cobra', 'shield']
361 2
362
363 Slice with labels for row and single label for column. As mentioned
364 above, note that both the start and stop of the slice are included.
365
366 >>> df.loc['cobra':'viper', 'max_speed']
367 cobra 1
368 viper 4
369 Name: max_speed, dtype: int64
370
371 Boolean list with the same length as the row axis
372
373 >>> df.loc[[False, False, True]]
374 max_speed shield
375 sidewinder 7 8
376
377 Alignable boolean Series:
378
379 >>> df.loc[pd.Series([False, True, False],
380 ... index=['viper', 'sidewinder', 'cobra'])]
381 max_speed shield
382 sidewinder 7 8
383
384 Index (same behavior as ``df.reindex``)
385
386 >>> df.loc[pd.Index(["cobra", "viper"], name="foo")]
387 max_speed shield
388 foo
389 cobra 1 2
390 viper 4 5
391
392 Conditional that returns a boolean Series
393
394 >>> df.loc[df['shield'] > 6]
395 max_speed shield
396 sidewinder 7 8
397
398 Conditional that returns a boolean Series with column labels specified
399
400 >>> df.loc[df['shield'] > 6, ['max_speed']]
401 max_speed
402 sidewinder 7
403
404 Callable that returns a boolean Series
405
406 >>> df.loc[lambda df: df['shield'] == 8]
407 max_speed shield
408 sidewinder 7 8
409
410 **Setting values**
411
412 Set value for all items matching the list of labels
413
414 >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50
415 >>> df
416 max_speed shield
417 cobra 1 2
418 viper 4 50
419 sidewinder 7 50
420
421 Set value for an entire row
422
423 >>> df.loc['cobra'] = 10
424 >>> df
425 max_speed shield
426 cobra 10 10
427 viper 4 50
428 sidewinder 7 50
429
430 Set value for an entire column
431
432 >>> df.loc[:, 'max_speed'] = 30
433 >>> df
434 max_speed shield
435 cobra 30 10
436 viper 30 50
437 sidewinder 30 50
438
439 Set value for rows matching callable condition
440
441 >>> df.loc[df['shield'] > 35] = 0
442 >>> df
443 max_speed shield
444 cobra 30 10
445 viper 0 0
446 sidewinder 0 0
447
448 **Getting values on a DataFrame with an index that has integer labels**
449
450 Another example using integers for the index
451
452 >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
453 ... index=[7, 8, 9], columns=['max_speed', 'shield'])
454 >>> df
455 max_speed shield
456 7 1 2
457 8 4 5
458 9 7 8
459
460 Slice with integer labels for rows. As mentioned above, note that both
461 the start and stop of the slice are included.
462
463 >>> df.loc[7:9]
464 max_speed shield
465 7 1 2
466 8 4 5
467 9 7 8
468
469 **Getting values with a MultiIndex**
470
471 A number of examples using a DataFrame with a MultiIndex
472
473 >>> tuples = [
474 ... ('cobra', 'mark i'), ('cobra', 'mark ii'),
475 ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
476 ... ('viper', 'mark ii'), ('viper', 'mark iii')
477 ... ]
478 >>> index = pd.MultiIndex.from_tuples(tuples)
479 >>> values = [[12, 2], [0, 4], [10, 20],
480 ... [1, 4], [7, 1], [16, 36]]
481 >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
482 >>> df
483 max_speed shield
484 cobra mark i 12 2
485 mark ii 0 4
486 sidewinder mark i 10 20
487 mark ii 1 4
488 viper mark ii 7 1
489 mark iii 16 36
490
491 Single label. Note this returns a DataFrame with a single index.
492
493 >>> df.loc['cobra']
494 max_speed shield
495 mark i 12 2
496 mark ii 0 4
497
498 Single index tuple. Note this returns a Series.
499
500 >>> df.loc[('cobra', 'mark ii')]
501 max_speed 0
502 shield 4
503 Name: (cobra, mark ii), dtype: int64
504
505 Single label for row and column. Similar to passing in a tuple, this
506 returns a Series.
507
508 >>> df.loc['cobra', 'mark i']
509 max_speed 12
510 shield 2
511 Name: (cobra, mark i), dtype: int64
512
513 Single tuple. Note using ``[[]]`` returns a DataFrame.
514
515 >>> df.loc[[('cobra', 'mark ii')]]
516 max_speed shield
517 cobra mark ii 0 4
518
519 Single tuple for the index with a single label for the column
520
521 >>> df.loc[('cobra', 'mark i'), 'shield']
522 2
523
524 Slice from index tuple to single label
525
526 >>> df.loc[('cobra', 'mark i'):'viper']
527 max_speed shield
528 cobra mark i 12 2
529 mark ii 0 4
530 sidewinder mark i 10 20
531 mark ii 1 4
532 viper mark ii 7 1
533 mark iii 16 36
534
535 Slice from index tuple to index tuple
536
537 >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')]
538 max_speed shield
539 cobra mark i 12 2
540 mark ii 0 4
541 sidewinder mark i 10 20
542 mark ii 1 4
543 viper mark ii 7 1
544
545 Please see the :ref:`user guide<advanced.advanced_hierarchical>`
546 for more details and explanations of advanced indexing.
547 """
548 return _LocIndexer("loc", self)
549
550 @property
551 def at(self) -> _AtIndexer:
552 """
553 Access a single value for a row/column label pair.
554
555 Similar to ``loc``, in that both provide label-based lookups. Use
556 ``at`` if you only need to get or set a single value in a DataFrame
557 or Series.
558
559 Raises
560 ------
561 KeyError
562 * If getting a value and 'label' does not exist in a DataFrame or
563 Series.
564 ValueError
565 * If row/column label pair is not a tuple or if any label from
566 the pair is not a scalar for DataFrame.
567 * If label is list-like (*excluding* NamedTuple) for Series.
568
569 See Also
570 --------
571 DataFrame.at : Access a single value for a row/column pair by label.
572 DataFrame.iat : Access a single value for a row/column pair by integer
573 position.
574 DataFrame.loc : Access a group of rows and columns by label(s).
575 DataFrame.iloc : Access a group of rows and columns by integer
576 position(s).
577 Series.at : Access a single value by label.
578 Series.iat : Access a single value by integer position.
579 Series.loc : Access a group of rows by label(s).
580 Series.iloc : Access a group of rows by integer position(s).
581
582 Notes
583 -----
584 See :ref:`Fast scalar value getting and setting <indexing.basics.get_value>`
585 for more details.
586
587 Examples
588 --------
589 >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
590 ... index=[4, 5, 6], columns=['A', 'B', 'C'])
591 >>> df
592 A B C
593 4 0 2 3
594 5 0 4 1
595 6 10 20 30
596
597 Get value at specified row/column pair
598
599 >>> df.at[4, 'B']
600 2
601
602 Set value at specified row/column pair
603
604 >>> df.at[4, 'B'] = 10
605 >>> df.at[4, 'B']
606 10
607
608 Get value within a Series
609
610 >>> df.loc[5].at['B']
611 4
612 """
613 return _AtIndexer("at", self)
614
615 @property
616 def iat(self) -> _iAtIndexer:
617 """
618 Access a single value for a row/column pair by integer position.
619
620 Similar to ``iloc``, in that both provide integer-based lookups. Use
621 ``iat`` if you only need to get or set a single value in a DataFrame
622 or Series.
623
624 Raises
625 ------
626 IndexError
627 When integer position is out of bounds.
628
629 See Also
630 --------
631 DataFrame.at : Access a single value for a row/column label pair.
632 DataFrame.loc : Access a group of rows and columns by label(s).
633 DataFrame.iloc : Access a group of rows and columns by integer position(s).
634
635 Examples
636 --------
637 >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
638 ... columns=['A', 'B', 'C'])
639 >>> df
640 A B C
641 0 0 2 3
642 1 0 4 1
643 2 10 20 30
644
645 Get value at specified row/column pair
646
647 >>> df.iat[1, 2]
648 1
649
650 Set value at specified row/column pair
651
652 >>> df.iat[1, 2] = 10
653 >>> df.iat[1, 2]
654 10
655
656 Get value within a series
657
658 >>> df.loc[0].iat[1]
659 2
660 """
661 return _iAtIndexer("iat", self)
662
663
664class _LocationIndexer(NDFrameIndexerBase):
665 _valid_types: str
666 axis: AxisInt | None = None
667
668 # sub-classes need to set _takeable
669 _takeable: bool
670
671 @final
672 def __call__(
673 self: _LocationIndexerT, axis: Axis | None = None
674 ) -> _LocationIndexerT:
675 # we need to return a copy of ourselves
676 new_self = type(self)(self.name, self.obj)
677
678 if axis is not None:
679 axis_int_none = self.obj._get_axis_number(axis)
680 else:
681 axis_int_none = axis
682 new_self.axis = axis_int_none
683 return new_self
684
685 def _get_setitem_indexer(self, key):
686 """
687 Convert a potentially-label-based key into a positional indexer.
688 """
689 if self.name == "loc":
690 # always holds here bc iloc overrides _get_setitem_indexer
691 self._ensure_listlike_indexer(key)
692
693 if isinstance(key, tuple):
694 for x in key:
695 check_dict_or_set_indexers(x)
696
697 if self.axis is not None:
698 key = _tupleize_axis_indexer(self.ndim, self.axis, key)
699
700 ax = self.obj._get_axis(0)
701
702 if isinstance(ax, MultiIndex) and self.name != "iloc" and is_hashable(key):
703 with suppress(KeyError, InvalidIndexError):
704 # TypeError e.g. passed a bool
705 return ax.get_loc(key)
706
707 if isinstance(key, tuple):
708 with suppress(IndexingError):
709 # suppress "Too many indexers"
710 return self._convert_tuple(key)
711
712 if isinstance(key, range):
713 # GH#45479 test_loc_setitem_range_key
714 key = list(key)
715
716 return self._convert_to_indexer(key, axis=0)
717
718 @final
719 def _maybe_mask_setitem_value(self, indexer, value):
720 """
721 If we have obj.iloc[mask] = series_or_frame and series_or_frame has the
722 same length as obj, we treat this as obj.iloc[mask] = series_or_frame[mask],
723 similar to Series.__setitem__.
724
725 Note this is only for loc, not iloc.
726 """
727
728 if (
729 isinstance(indexer, tuple)
730 and len(indexer) == 2
731 and isinstance(value, (ABCSeries, ABCDataFrame))
732 ):
733 pi, icols = indexer
734 ndim = value.ndim
735 if com.is_bool_indexer(pi) and len(value) == len(pi):
736 newkey = pi.nonzero()[0]
737
738 if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1:
739 # e.g. test_loc_setitem_boolean_mask_allfalse
740 if len(newkey) == 0:
741 # FIXME: kludge for test_loc_setitem_boolean_mask_allfalse
742 # TODO(GH#45333): may be fixed when deprecation is enforced
743
744 value = value.iloc[:0]
745 else:
746 # test_loc_setitem_ndframe_values_alignment
747 value = self.obj.iloc._align_series(indexer, value)
748 indexer = (newkey, icols)
749
750 elif (
751 isinstance(icols, np.ndarray)
752 and icols.dtype.kind == "i"
753 and len(icols) == 1
754 ):
755 if ndim == 1:
756 # We implicitly broadcast, though numpy does not, see
757 # github.com/pandas-dev/pandas/pull/45501#discussion_r789071825
758 # test_loc_setitem_ndframe_values_alignment
759 value = self.obj.iloc._align_series(indexer, value)
760 indexer = (newkey, icols)
761
762 elif ndim == 2 and value.shape[1] == 1:
763 if len(newkey) == 0:
764 # FIXME: kludge for
765 # test_loc_setitem_all_false_boolean_two_blocks
766 # TODO(GH#45333): may be fixed when deprecation is enforced
767 value = value.iloc[:0]
768 else:
769 # test_loc_setitem_ndframe_values_alignment
770 value = self.obj.iloc._align_frame(indexer, value)
771 indexer = (newkey, icols)
772 elif com.is_bool_indexer(indexer):
773 indexer = indexer.nonzero()[0]
774
775 return indexer, value
776
777 @final
778 def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
779 """
780 Ensure that a list-like of column labels are all present by adding them if
781 they do not already exist.
782
783 Parameters
784 ----------
785 key : list-like of column labels
786 Target labels.
787 axis : key axis if known
788 """
789 column_axis = 1
790
791 # column only exists in 2-dimensional DataFrame
792 if self.ndim != 2:
793 return
794
795 orig_key = key
796 if isinstance(key, tuple) and len(key) > 1:
797 # key may be a tuple if we are .loc
798 # if length of key is > 1 set key to column part
799 key = key[column_axis]
800 axis = column_axis
801
802 if (
803 axis == column_axis
804 and not isinstance(self.obj.columns, MultiIndex)
805 and is_list_like_indexer(key)
806 and not com.is_bool_indexer(key)
807 and all(is_hashable(k) for k in key)
808 ):
809 # GH#38148
810 keys = self.obj.columns.union(key, sort=False)
811 diff = Index(key).difference(self.obj.columns, sort=False)
812
813 if len(diff) and com.is_null_slice(orig_key[0]):
814 # e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B"
815 # is a new column, add the new columns with dtype=np.void
816 # so that later when we go through setitem_single_column
817 # we will use isetitem. Without this, the reindex_axis
818 # below would create float64 columns in this example, which
819 # would successfully hold 7, so we would end up with the wrong
820 # dtype.
821 indexer = np.arange(len(keys), dtype=np.intp)
822 indexer[len(self.obj.columns) :] = -1
823 new_mgr = self.obj._mgr.reindex_indexer(
824 keys, indexer=indexer, axis=0, only_slice=True, use_na_proxy=True
825 )
826 self.obj._mgr = new_mgr
827 return
828
829 self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True)
830
831 @final
832 def __setitem__(self, key, value) -> None:
833 if not PYPY and using_copy_on_write():
834 if sys.getrefcount(self.obj) <= 2:
835 warnings.warn(
836 _chained_assignment_msg, ChainedAssignmentError, stacklevel=2
837 )
838
839 check_dict_or_set_indexers(key)
840 if isinstance(key, tuple):
841 key = tuple(list(x) if is_iterator(x) else x for x in key)
842 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
843 else:
844 key = com.apply_if_callable(key, self.obj)
845 indexer = self._get_setitem_indexer(key)
846 self._has_valid_setitem_indexer(key)
847
848 iloc = self if self.name == "iloc" else self.obj.iloc
849 iloc._setitem_with_indexer(indexer, value, self.name)
850
851 def _validate_key(self, key, axis: AxisInt):
852 """
853 Ensure that key is valid for current indexer.
854
855 Parameters
856 ----------
857 key : scalar, slice or list-like
858 Key requested.
859 axis : int
860 Dimension on which the indexing is being made.
861
862 Raises
863 ------
864 TypeError
865 If the key (or some element of it) has wrong type.
866 IndexError
867 If the key (or some element of it) is out of bounds.
868 KeyError
869 If the key was not found.
870 """
871 raise AbstractMethodError(self)
872
873 @final
874 def _expand_ellipsis(self, tup: tuple) -> tuple:
875 """
876 If a tuple key includes an Ellipsis, replace it with an appropriate
877 number of null slices.
878 """
879 if any(x is Ellipsis for x in tup):
880 if tup.count(Ellipsis) > 1:
881 raise IndexingError(_one_ellipsis_message)
882
883 if len(tup) == self.ndim:
884 # It is unambiguous what axis this Ellipsis is indexing,
885 # treat as a single null slice.
886 i = tup.index(Ellipsis)
887 # FIXME: this assumes only one Ellipsis
888 new_key = tup[:i] + (_NS,) + tup[i + 1 :]
889 return new_key
890
891 # TODO: other cases? only one test gets here, and that is covered
892 # by _validate_key_length
893 return tup
894
895 @final
896 def _validate_tuple_indexer(self, key: tuple) -> tuple:
897 """
898 Check the key for valid keys across my indexer.
899 """
900 key = self._validate_key_length(key)
901 key = self._expand_ellipsis(key)
902 for i, k in enumerate(key):
903 try:
904 self._validate_key(k, i)
905 except ValueError as err:
906 raise ValueError(
907 "Location based indexing can only have "
908 f"[{self._valid_types}] types"
909 ) from err
910 return key
911
912 @final
913 def _is_nested_tuple_indexer(self, tup: tuple) -> bool:
914 """
915 Returns
916 -------
917 bool
918 """
919 if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):
920 return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
921 return False
922
923 @final
924 def _convert_tuple(self, key: tuple) -> tuple:
925 # Note: we assume _tupleize_axis_indexer has been called, if necessary.
926 self._validate_key_length(key)
927 keyidx = [self._convert_to_indexer(k, axis=i) for i, k in enumerate(key)]
928 return tuple(keyidx)
929
930 @final
931 def _validate_key_length(self, key: tuple) -> tuple:
932 if len(key) > self.ndim:
933 if key[0] is Ellipsis:
934 # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3]
935 key = key[1:]
936 if Ellipsis in key:
937 raise IndexingError(_one_ellipsis_message)
938 return self._validate_key_length(key)
939 raise IndexingError("Too many indexers")
940 return key
941
942 @final
943 def _getitem_tuple_same_dim(self, tup: tuple):
944 """
945 Index with indexers that should return an object of the same dimension
946 as self.obj.
947
948 This is only called after a failed call to _getitem_lowerdim.
949 """
950 retval = self.obj
951 for i, key in enumerate(tup):
952 if com.is_null_slice(key):
953 continue
954
955 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
956 # We should never have retval.ndim < self.ndim, as that should
957 # be handled by the _getitem_lowerdim call above.
958 assert retval.ndim == self.ndim
959
960 if retval is self.obj:
961 # if all axes were a null slice (`df.loc[:, :]`), ensure we still
962 # return a new object (https://github.com/pandas-dev/pandas/pull/49469)
963 retval = retval.copy(deep=False)
964
965 return retval
966
967 @final
968 def _getitem_lowerdim(self, tup: tuple):
969 # we can directly get the axis result since the axis is specified
970 if self.axis is not None:
971 axis = self.obj._get_axis_number(self.axis)
972 return self._getitem_axis(tup, axis=axis)
973
974 # we may have a nested tuples indexer here
975 if self._is_nested_tuple_indexer(tup):
976 return self._getitem_nested_tuple(tup)
977
978 # we maybe be using a tuple to represent multiple dimensions here
979 ax0 = self.obj._get_axis(0)
980 # ...but iloc should handle the tuple as simple integer-location
981 # instead of checking it as multiindex representation (GH 13797)
982 if (
983 isinstance(ax0, MultiIndex)
984 and self.name != "iloc"
985 and not any(isinstance(x, slice) for x in tup)
986 ):
987 # Note: in all extant test cases, replacing the slice condition with
988 # `all(is_hashable(x) or com.is_null_slice(x) for x in tup)`
989 # is equivalent.
990 # (see the other place where we call _handle_lowerdim_multi_index_axis0)
991 with suppress(IndexingError):
992 return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(tup)
993
994 tup = self._validate_key_length(tup)
995
996 for i, key in enumerate(tup):
997 if is_label_like(key):
998 # We don't need to check for tuples here because those are
999 # caught by the _is_nested_tuple_indexer check above.
1000 section = self._getitem_axis(key, axis=i)
1001
1002 # We should never have a scalar section here, because
1003 # _getitem_lowerdim is only called after a check for
1004 # is_scalar_access, which that would be.
1005 if section.ndim == self.ndim:
1006 # we're in the middle of slicing through a MultiIndex
1007 # revise the key wrt to `section` by inserting an _NS
1008 new_key = tup[:i] + (_NS,) + tup[i + 1 :]
1009
1010 else:
1011 # Note: the section.ndim == self.ndim check above
1012 # rules out having DataFrame here, so we dont need to worry
1013 # about transposing.
1014 new_key = tup[:i] + tup[i + 1 :]
1015
1016 if len(new_key) == 1:
1017 new_key = new_key[0]
1018
1019 # Slices should return views, but calling iloc/loc with a null
1020 # slice returns a new object.
1021 if com.is_null_slice(new_key):
1022 return section
1023 # This is an elided recursive call to iloc/loc
1024 return getattr(section, self.name)[new_key]
1025
1026 raise IndexingError("not applicable")
1027
1028 @final
1029 def _getitem_nested_tuple(self, tup: tuple):
1030 # we have a nested tuple so have at least 1 multi-index level
1031 # we should be able to match up the dimensionality here
1032
1033 for key in tup:
1034 check_dict_or_set_indexers(key)
1035
1036 # we have too many indexers for our dim, but have at least 1
1037 # multi-index dimension, try to see if we have something like
1038 # a tuple passed to a series with a multi-index
1039 if len(tup) > self.ndim:
1040 if self.name != "loc":
1041 # This should never be reached, but let's be explicit about it
1042 raise ValueError("Too many indices") # pragma: no cover
1043 if all(is_hashable(x) or com.is_null_slice(x) for x in tup):
1044 # GH#10521 Series should reduce MultiIndex dimensions instead of
1045 # DataFrame, IndexingError is not raised when slice(None,None,None)
1046 # with one row.
1047 with suppress(IndexingError):
1048 return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(
1049 tup
1050 )
1051 elif isinstance(self.obj, ABCSeries) and any(
1052 isinstance(k, tuple) for k in tup
1053 ):
1054 # GH#35349 Raise if tuple in tuple for series
1055 # Do this after the all-hashable-or-null-slice check so that
1056 # we are only getting non-hashable tuples, in particular ones
1057 # that themselves contain a slice entry
1058 # See test_loc_series_getitem_too_many_dimensions
1059 raise IndexingError("Too many indexers")
1060
1061 # this is a series with a multi-index specified a tuple of
1062 # selectors
1063 axis = self.axis or 0
1064 return self._getitem_axis(tup, axis=axis)
1065
1066 # handle the multi-axis by taking sections and reducing
1067 # this is iterative
1068 obj = self.obj
1069 # GH#41369 Loop in reverse order ensures indexing along columns before rows
1070 # which selects only necessary blocks which avoids dtype conversion if possible
1071 axis = len(tup) - 1
1072 for key in tup[::-1]:
1073 if com.is_null_slice(key):
1074 axis -= 1
1075 continue
1076
1077 obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)
1078 axis -= 1
1079
1080 # if we have a scalar, we are done
1081 if is_scalar(obj) or not hasattr(obj, "ndim"):
1082 break
1083
1084 return obj
1085
1086 def _convert_to_indexer(self, key, axis: AxisInt):
1087 raise AbstractMethodError(self)
1088
1089 @final
1090 def __getitem__(self, key):
1091 check_dict_or_set_indexers(key)
1092 if type(key) is tuple:
1093 key = tuple(list(x) if is_iterator(x) else x for x in key)
1094 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
1095 if self._is_scalar_access(key):
1096 return self.obj._get_value(*key, takeable=self._takeable)
1097 return self._getitem_tuple(key)
1098 else:
1099 # we by definition only have the 0th axis
1100 axis = self.axis or 0
1101
1102 maybe_callable = com.apply_if_callable(key, self.obj)
1103 return self._getitem_axis(maybe_callable, axis=axis)
1104
1105 def _is_scalar_access(self, key: tuple):
1106 raise NotImplementedError()
1107
1108 def _getitem_tuple(self, tup: tuple):
1109 raise AbstractMethodError(self)
1110
1111 def _getitem_axis(self, key, axis: AxisInt):
1112 raise NotImplementedError()
1113
1114 def _has_valid_setitem_indexer(self, indexer) -> bool:
1115 raise AbstractMethodError(self)
1116
1117 @final
1118 def _getbool_axis(self, key, axis: AxisInt):
1119 # caller is responsible for ensuring non-None axis
1120 labels = self.obj._get_axis(axis)
1121 key = check_bool_indexer(labels, key)
1122 inds = key.nonzero()[0]
1123 return self.obj._take_with_is_copy(inds, axis=axis)
1124
1125
1126@doc(IndexingMixin.loc)
1127class _LocIndexer(_LocationIndexer):
1128 _takeable: bool = False
1129 _valid_types = (
1130 "labels (MUST BE IN THE INDEX), slices of labels (BOTH "
1131 "endpoints included! Can be slices of integers if the "
1132 "index is integers), listlike of labels, boolean"
1133 )
1134
1135 # -------------------------------------------------------------------
1136 # Key Checks
1137
1138 @doc(_LocationIndexer._validate_key)
1139 def _validate_key(self, key, axis: Axis):
1140 # valid for a collection of labels (we check their presence later)
1141 # slice of labels (where start-end in labels)
1142 # slice of integers (only if in the labels)
1143 # boolean not in slice and with boolean index
1144 ax = self.obj._get_axis(axis)
1145 if isinstance(key, bool) and not (
1146 is_bool_dtype(ax)
1147 or ax.dtype.name == "boolean"
1148 or isinstance(ax, MultiIndex)
1149 and is_bool_dtype(ax.get_level_values(0))
1150 ):
1151 raise KeyError(
1152 f"{key}: boolean label can not be used without a boolean index"
1153 )
1154
1155 if isinstance(key, slice) and (
1156 isinstance(key.start, bool) or isinstance(key.stop, bool)
1157 ):
1158 raise TypeError(f"{key}: boolean values can not be used in a slice")
1159
1160 def _has_valid_setitem_indexer(self, indexer) -> bool:
1161 return True
1162
1163 def _is_scalar_access(self, key: tuple) -> bool:
1164 """
1165 Returns
1166 -------
1167 bool
1168 """
1169 # this is a shortcut accessor to both .loc and .iloc
1170 # that provide the equivalent access of .at and .iat
1171 # a) avoid getting things via sections and (to minimize dtype changes)
1172 # b) provide a performant path
1173 if len(key) != self.ndim:
1174 return False
1175
1176 for i, k in enumerate(key):
1177 if not is_scalar(k):
1178 return False
1179
1180 ax = self.obj.axes[i]
1181 if isinstance(ax, MultiIndex):
1182 return False
1183
1184 if isinstance(k, str) and ax._supports_partial_string_indexing:
1185 # partial string indexing, df.loc['2000', 'A']
1186 # should not be considered scalar
1187 return False
1188
1189 if not ax._index_as_unique:
1190 return False
1191
1192 return True
1193
1194 # -------------------------------------------------------------------
1195 # MultiIndex Handling
1196
1197 def _multi_take_opportunity(self, tup: tuple) -> bool:
1198 """
1199 Check whether there is the possibility to use ``_multi_take``.
1200
1201 Currently the limit is that all axes being indexed, must be indexed with
1202 list-likes.
1203
1204 Parameters
1205 ----------
1206 tup : tuple
1207 Tuple of indexers, one per axis.
1208
1209 Returns
1210 -------
1211 bool
1212 Whether the current indexing,
1213 can be passed through `_multi_take`.
1214 """
1215 if not all(is_list_like_indexer(x) for x in tup):
1216 return False
1217
1218 # just too complicated
1219 return not any(com.is_bool_indexer(x) for x in tup)
1220
1221 def _multi_take(self, tup: tuple):
1222 """
1223 Create the indexers for the passed tuple of keys, and
1224 executes the take operation. This allows the take operation to be
1225 executed all at once, rather than once for each dimension.
1226 Improving efficiency.
1227
1228 Parameters
1229 ----------
1230 tup : tuple
1231 Tuple of indexers, one per axis.
1232
1233 Returns
1234 -------
1235 values: same type as the object being indexed
1236 """
1237 # GH 836
1238 d = {
1239 axis: self._get_listlike_indexer(key, axis)
1240 for (key, axis) in zip(tup, self.obj._AXIS_ORDERS)
1241 }
1242 return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True)
1243
1244 # -------------------------------------------------------------------
1245
1246 def _getitem_iterable(self, key, axis: AxisInt):
1247 """
1248 Index current object with an iterable collection of keys.
1249
1250 Parameters
1251 ----------
1252 key : iterable
1253 Targeted labels.
1254 axis : int
1255 Dimension on which the indexing is being made.
1256
1257 Raises
1258 ------
1259 KeyError
1260 If no key was found. Will change in the future to raise if not all
1261 keys were found.
1262
1263 Returns
1264 -------
1265 scalar, DataFrame, or Series: indexed value(s).
1266 """
1267 # we assume that not com.is_bool_indexer(key), as that is
1268 # handled before we get here.
1269 self._validate_key(key, axis)
1270
1271 # A collection of keys
1272 keyarr, indexer = self._get_listlike_indexer(key, axis)
1273 return self.obj._reindex_with_indexers(
1274 {axis: [keyarr, indexer]}, copy=True, allow_dups=True
1275 )
1276
1277 def _getitem_tuple(self, tup: tuple):
1278 with suppress(IndexingError):
1279 tup = self._expand_ellipsis(tup)
1280 return self._getitem_lowerdim(tup)
1281
1282 # no multi-index, so validate all of the indexers
1283 tup = self._validate_tuple_indexer(tup)
1284
1285 # ugly hack for GH #836
1286 if self._multi_take_opportunity(tup):
1287 return self._multi_take(tup)
1288
1289 return self._getitem_tuple_same_dim(tup)
1290
1291 def _get_label(self, label, axis: AxisInt):
1292 # GH#5567 this will fail if the label is not present in the axis.
1293 return self.obj.xs(label, axis=axis)
1294
1295 def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
1296 # we have an axis0 multi-index, handle or raise
1297 axis = self.axis or 0
1298 try:
1299 # fast path for series or for tup devoid of slices
1300 return self._get_label(tup, axis=axis)
1301
1302 except KeyError as ek:
1303 # raise KeyError if number of indexers match
1304 # else IndexingError will be raised
1305 if self.ndim < len(tup) <= self.obj.index.nlevels:
1306 raise ek
1307 raise IndexingError("No label returned") from ek
1308
1309 def _getitem_axis(self, key, axis: AxisInt):
1310 key = item_from_zerodim(key)
1311 if is_iterator(key):
1312 key = list(key)
1313 if key is Ellipsis:
1314 key = slice(None)
1315
1316 labels = self.obj._get_axis(axis)
1317
1318 if isinstance(key, tuple) and isinstance(labels, MultiIndex):
1319 key = tuple(key)
1320
1321 if isinstance(key, slice):
1322 self._validate_key(key, axis)
1323 return self._get_slice_axis(key, axis=axis)
1324 elif com.is_bool_indexer(key):
1325 return self._getbool_axis(key, axis=axis)
1326 elif is_list_like_indexer(key):
1327 # an iterable multi-selection
1328 if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)):
1329 if hasattr(key, "ndim") and key.ndim > 1:
1330 raise ValueError("Cannot index with multidimensional key")
1331
1332 return self._getitem_iterable(key, axis=axis)
1333
1334 # nested tuple slicing
1335 if is_nested_tuple(key, labels):
1336 locs = labels.get_locs(key)
1337 indexer = [slice(None)] * self.ndim
1338 indexer[axis] = locs
1339 return self.obj.iloc[tuple(indexer)]
1340
1341 # fall thru to straight lookup
1342 self._validate_key(key, axis)
1343 return self._get_label(key, axis=axis)
1344
1345 def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
1346 """
1347 This is pretty simple as we just have to deal with labels.
1348 """
1349 # caller is responsible for ensuring non-None axis
1350 obj = self.obj
1351 if not need_slice(slice_obj):
1352 return obj.copy(deep=False)
1353
1354 labels = obj._get_axis(axis)
1355 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)
1356
1357 if isinstance(indexer, slice):
1358 return self.obj._slice(indexer, axis=axis)
1359 else:
1360 # DatetimeIndex overrides Index.slice_indexer and may
1361 # return a DatetimeIndex instead of a slice object.
1362 return self.obj.take(indexer, axis=axis)
1363
1364 def _convert_to_indexer(self, key, axis: AxisInt):
1365 """
1366 Convert indexing key into something we can use to do actual fancy
1367 indexing on a ndarray.
1368
1369 Examples
1370 ix[:5] -> slice(0, 5)
1371 ix[[1,2,3]] -> [1,2,3]
1372 ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)
1373
1374 Going by Zen of Python?
1375 'In the face of ambiguity, refuse the temptation to guess.'
1376 raise AmbiguousIndexError with integer labels?
1377 - No, prefer label-based indexing
1378 """
1379 labels = self.obj._get_axis(axis)
1380
1381 if isinstance(key, slice):
1382 return labels._convert_slice_indexer(key, kind="loc")
1383
1384 if (
1385 isinstance(key, tuple)
1386 and not isinstance(labels, MultiIndex)
1387 and self.ndim < 2
1388 and len(key) > 1
1389 ):
1390 raise IndexingError("Too many indexers")
1391
1392 if is_scalar(key) or (isinstance(labels, MultiIndex) and is_hashable(key)):
1393 # Otherwise get_loc will raise InvalidIndexError
1394
1395 # if we are a label return me
1396 try:
1397 return labels.get_loc(key)
1398 except LookupError:
1399 if isinstance(key, tuple) and isinstance(labels, MultiIndex):
1400 if len(key) == labels.nlevels:
1401 return {"key": key}
1402 raise
1403 except InvalidIndexError:
1404 # GH35015, using datetime as column indices raises exception
1405 if not isinstance(labels, MultiIndex):
1406 raise
1407 except ValueError:
1408 if not is_integer(key):
1409 raise
1410 return {"key": key}
1411
1412 if is_nested_tuple(key, labels):
1413 if self.ndim == 1 and any(isinstance(k, tuple) for k in key):
1414 # GH#35349 Raise if tuple in tuple for series
1415 raise IndexingError("Too many indexers")
1416 return labels.get_locs(key)
1417
1418 elif is_list_like_indexer(key):
1419 if is_iterator(key):
1420 key = list(key)
1421
1422 if com.is_bool_indexer(key):
1423 key = check_bool_indexer(labels, key)
1424 return key
1425 else:
1426 return self._get_listlike_indexer(key, axis)[1]
1427 else:
1428 try:
1429 return labels.get_loc(key)
1430 except LookupError:
1431 # allow a not found key only if we are a setter
1432 if not is_list_like_indexer(key):
1433 return {"key": key}
1434 raise
1435
1436 def _get_listlike_indexer(self, key, axis: AxisInt):
1437 """
1438 Transform a list-like of keys into a new index and an indexer.
1439
1440 Parameters
1441 ----------
1442 key : list-like
1443 Targeted labels.
1444 axis: int
1445 Dimension on which the indexing is being made.
1446
1447 Raises
1448 ------
1449 KeyError
1450 If at least one key was requested but none was found.
1451
1452 Returns
1453 -------
1454 keyarr: Index
1455 New index (coinciding with 'key' if the axis is unique).
1456 values : array-like
1457 Indexer for the return object, -1 denotes keys not found.
1458 """
1459 ax = self.obj._get_axis(axis)
1460 axis_name = self.obj._get_axis_name(axis)
1461
1462 keyarr, indexer = ax._get_indexer_strict(key, axis_name)
1463
1464 return keyarr, indexer
1465
1466
1467@doc(IndexingMixin.iloc)
1468class _iLocIndexer(_LocationIndexer):
1469 _valid_types = (
1470 "integer, integer slice (START point is INCLUDED, END "
1471 "point is EXCLUDED), listlike of integers, boolean array"
1472 )
1473 _takeable = True
1474
1475 # -------------------------------------------------------------------
1476 # Key Checks
1477
1478 def _validate_key(self, key, axis: AxisInt):
1479 if com.is_bool_indexer(key):
1480 if hasattr(key, "index") and isinstance(key.index, Index):
1481 if key.index.inferred_type == "integer":
1482 raise NotImplementedError(
1483 "iLocation based boolean "
1484 "indexing on an integer type "
1485 "is not available"
1486 )
1487 raise ValueError(
1488 "iLocation based boolean indexing cannot use "
1489 "an indexable as a mask"
1490 )
1491 return
1492
1493 if isinstance(key, slice):
1494 return
1495 elif is_integer(key):
1496 self._validate_integer(key, axis)
1497 elif isinstance(key, tuple):
1498 # a tuple should already have been caught by this point
1499 # so don't treat a tuple as a valid indexer
1500 raise IndexingError("Too many indexers")
1501 elif is_list_like_indexer(key):
1502 if isinstance(key, ABCSeries):
1503 arr = key._values
1504 elif is_array_like(key):
1505 arr = key
1506 else:
1507 arr = np.array(key)
1508 len_axis = len(self.obj._get_axis(axis))
1509
1510 # check that the key has a numeric dtype
1511 if not is_numeric_dtype(arr.dtype):
1512 raise IndexError(f".iloc requires numeric indexers, got {arr}")
1513
1514 # check that the key does not exceed the maximum size of the index
1515 if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis):
1516 raise IndexError("positional indexers are out-of-bounds")
1517 else:
1518 raise ValueError(f"Can only index by location with a [{self._valid_types}]")
1519
1520 def _has_valid_setitem_indexer(self, indexer) -> bool:
1521 """
1522 Validate that a positional indexer cannot enlarge its target
1523 will raise if needed, does not modify the indexer externally.
1524
1525 Returns
1526 -------
1527 bool
1528 """
1529 if isinstance(indexer, dict):
1530 raise IndexError("iloc cannot enlarge its target object")
1531
1532 if isinstance(indexer, ABCDataFrame):
1533 raise TypeError(
1534 "DataFrame indexer for .iloc is not supported. "
1535 "Consider using .loc with a DataFrame indexer for automatic alignment.",
1536 )
1537
1538 if not isinstance(indexer, tuple):
1539 indexer = _tuplify(self.ndim, indexer)
1540
1541 for ax, i in zip(self.obj.axes, indexer):
1542 if isinstance(i, slice):
1543 # should check the stop slice?
1544 pass
1545 elif is_list_like_indexer(i):
1546 # should check the elements?
1547 pass
1548 elif is_integer(i):
1549 if i >= len(ax):
1550 raise IndexError("iloc cannot enlarge its target object")
1551 elif isinstance(i, dict):
1552 raise IndexError("iloc cannot enlarge its target object")
1553
1554 return True
1555
1556 def _is_scalar_access(self, key: tuple) -> bool:
1557 """
1558 Returns
1559 -------
1560 bool
1561 """
1562 # this is a shortcut accessor to both .loc and .iloc
1563 # that provide the equivalent access of .at and .iat
1564 # a) avoid getting things via sections and (to minimize dtype changes)
1565 # b) provide a performant path
1566 if len(key) != self.ndim:
1567 return False
1568
1569 return all(is_integer(k) for k in key)
1570
1571 def _validate_integer(self, key: int, axis: AxisInt) -> None:
1572 """
1573 Check that 'key' is a valid position in the desired axis.
1574
1575 Parameters
1576 ----------
1577 key : int
1578 Requested position.
1579 axis : int
1580 Desired axis.
1581
1582 Raises
1583 ------
1584 IndexError
1585 If 'key' is not a valid position in axis 'axis'.
1586 """
1587 len_axis = len(self.obj._get_axis(axis))
1588 if key >= len_axis or key < -len_axis:
1589 raise IndexError("single positional indexer is out-of-bounds")
1590
1591 # -------------------------------------------------------------------
1592
1593 def _getitem_tuple(self, tup: tuple):
1594 tup = self._validate_tuple_indexer(tup)
1595 with suppress(IndexingError):
1596 return self._getitem_lowerdim(tup)
1597
1598 return self._getitem_tuple_same_dim(tup)
1599
1600 def _get_list_axis(self, key, axis: AxisInt):
1601 """
1602 Return Series values by list or array of integers.
1603
1604 Parameters
1605 ----------
1606 key : list-like positional indexer
1607 axis : int
1608
1609 Returns
1610 -------
1611 Series object
1612
1613 Notes
1614 -----
1615 `axis` can only be zero.
1616 """
1617 try:
1618 return self.obj._take_with_is_copy(key, axis=axis)
1619 except IndexError as err:
1620 # re-raise with different error message
1621 raise IndexError("positional indexers are out-of-bounds") from err
1622
1623 def _getitem_axis(self, key, axis: AxisInt):
1624 if key is Ellipsis:
1625 key = slice(None)
1626 elif isinstance(key, ABCDataFrame):
1627 raise IndexError(
1628 "DataFrame indexer is not allowed for .iloc\n"
1629 "Consider using .loc for automatic alignment."
1630 )
1631
1632 if isinstance(key, slice):
1633 return self._get_slice_axis(key, axis=axis)
1634
1635 if is_iterator(key):
1636 key = list(key)
1637
1638 if isinstance(key, list):
1639 key = np.asarray(key)
1640
1641 if com.is_bool_indexer(key):
1642 self._validate_key(key, axis)
1643 return self._getbool_axis(key, axis=axis)
1644
1645 # a list of integers
1646 elif is_list_like_indexer(key):
1647 return self._get_list_axis(key, axis=axis)
1648
1649 # a single integer
1650 else:
1651 key = item_from_zerodim(key)
1652 if not is_integer(key):
1653 raise TypeError("Cannot index by location index with a non-integer key")
1654
1655 # validate the location
1656 self._validate_integer(key, axis)
1657
1658 return self.obj._ixs(key, axis=axis)
1659
1660 def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
1661 # caller is responsible for ensuring non-None axis
1662 obj = self.obj
1663
1664 if not need_slice(slice_obj):
1665 return obj.copy(deep=False)
1666
1667 labels = obj._get_axis(axis)
1668 labels._validate_positional_slice(slice_obj)
1669 return self.obj._slice(slice_obj, axis=axis)
1670
1671 def _convert_to_indexer(self, key, axis: AxisInt):
1672 """
1673 Much simpler as we only have to deal with our valid types.
1674 """
1675 return key
1676
1677 def _get_setitem_indexer(self, key):
1678 # GH#32257 Fall through to let numpy do validation
1679 if is_iterator(key):
1680 key = list(key)
1681
1682 if self.axis is not None:
1683 key = _tupleize_axis_indexer(self.ndim, self.axis, key)
1684
1685 return key
1686
1687 # -------------------------------------------------------------------
1688
1689 def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
1690 """
1691 _setitem_with_indexer is for setting values on a Series/DataFrame
1692 using positional indexers.
1693
1694 If the relevant keys are not present, the Series/DataFrame may be
1695 expanded.
1696
1697 This method is currently broken when dealing with non-unique Indexes,
1698 since it goes from positional indexers back to labels when calling
1699 BlockManager methods, see GH#12991, GH#22046, GH#15686.
1700 """
1701 info_axis = self.obj._info_axis_number
1702
1703 # maybe partial set
1704 take_split_path = not self.obj._mgr.is_single_block
1705
1706 if not take_split_path and isinstance(value, ABCDataFrame):
1707 # Avoid cast of values
1708 take_split_path = not value._mgr.is_single_block
1709
1710 # if there is only one block/type, still have to take split path
1711 # unless the block is one-dimensional or it can hold the value
1712 if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
1713 # in case of dict, keys are indices
1714 val = list(value.values()) if isinstance(value, dict) else value
1715 arr = self.obj._mgr.arrays[0]
1716 take_split_path = not can_hold_element(
1717 arr, extract_array(val, extract_numpy=True)
1718 )
1719
1720 # if we have any multi-indexes that have non-trivial slices
1721 # (not null slices) then we must take the split path, xref
1722 # GH 10360, GH 27841
1723 if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):
1724 for i, ax in zip(indexer, self.obj.axes):
1725 if isinstance(ax, MultiIndex) and not (
1726 is_integer(i) or com.is_null_slice(i)
1727 ):
1728 take_split_path = True
1729 break
1730
1731 if isinstance(indexer, tuple):
1732 nindexer = []
1733 for i, idx in enumerate(indexer):
1734 if isinstance(idx, dict):
1735 # reindex the axis to the new value
1736 # and set inplace
1737 key, _ = convert_missing_indexer(idx)
1738
1739 # if this is the items axes, then take the main missing
1740 # path first
1741 # this correctly sets the dtype and avoids cache issues
1742 # essentially this separates out the block that is needed
1743 # to possibly be modified
1744 if self.ndim > 1 and i == info_axis:
1745 # add the new item, and set the value
1746 # must have all defined axes if we have a scalar
1747 # or a list-like on the non-info axes if we have a
1748 # list-like
1749 if not len(self.obj):
1750 if not is_list_like_indexer(value):
1751 raise ValueError(
1752 "cannot set a frame with no "
1753 "defined index and a scalar"
1754 )
1755 self.obj[key] = value
1756 return
1757
1758 # add a new item with the dtype setup
1759 if com.is_null_slice(indexer[0]):
1760 # We are setting an entire column
1761 self.obj[key] = value
1762 return
1763 elif is_array_like(value):
1764 # GH#42099
1765 arr = extract_array(value, extract_numpy=True)
1766 taker = -1 * np.ones(len(self.obj), dtype=np.intp)
1767 empty_value = algos.take_nd(arr, taker)
1768 if not isinstance(value, ABCSeries):
1769 # if not Series (in which case we need to align),
1770 # we can short-circuit
1771 if (
1772 isinstance(arr, np.ndarray)
1773 and arr.ndim == 1
1774 and len(arr) == 1
1775 ):
1776 # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615
1777 arr = arr[0, ...]
1778 empty_value[indexer[0]] = arr
1779 self.obj[key] = empty_value
1780 return
1781
1782 self.obj[key] = empty_value
1783
1784 else:
1785 # FIXME: GH#42099#issuecomment-864326014
1786 self.obj[key] = infer_fill_value(value)
1787
1788 new_indexer = convert_from_missing_indexer_tuple(
1789 indexer, self.obj.axes
1790 )
1791 self._setitem_with_indexer(new_indexer, value, name)
1792
1793 return
1794
1795 # reindex the axis
1796 # make sure to clear the cache because we are
1797 # just replacing the block manager here
1798 # so the object is the same
1799 index = self.obj._get_axis(i)
1800 labels = index.insert(len(index), key)
1801
1802 # We are expanding the Series/DataFrame values to match
1803 # the length of thenew index `labels`. GH#40096 ensure
1804 # this is valid even if the index has duplicates.
1805 taker = np.arange(len(index) + 1, dtype=np.intp)
1806 taker[-1] = -1
1807 reindexers = {i: (labels, taker)}
1808 new_obj = self.obj._reindex_with_indexers(
1809 reindexers, allow_dups=True
1810 )
1811 self.obj._mgr = new_obj._mgr
1812 self.obj._maybe_update_cacher(clear=True)
1813 self.obj._is_copy = None
1814
1815 nindexer.append(labels.get_loc(key))
1816
1817 else:
1818 nindexer.append(idx)
1819
1820 indexer = tuple(nindexer)
1821 else:
1822 indexer, missing = convert_missing_indexer(indexer)
1823
1824 if missing:
1825 self._setitem_with_indexer_missing(indexer, value)
1826 return
1827
1828 if name == "loc":
1829 # must come after setting of missing
1830 indexer, value = self._maybe_mask_setitem_value(indexer, value)
1831
1832 # align and set the values
1833 if take_split_path:
1834 # We have to operate column-wise
1835 self._setitem_with_indexer_split_path(indexer, value, name)
1836 else:
1837 self._setitem_single_block(indexer, value, name)
1838
1839 def _setitem_with_indexer_split_path(self, indexer, value, name: str):
1840 """
1841 Setitem column-wise.
1842 """
1843 # Above we only set take_split_path to True for 2D cases
1844 assert self.ndim == 2
1845
1846 if not isinstance(indexer, tuple):
1847 indexer = _tuplify(self.ndim, indexer)
1848 if len(indexer) > self.ndim:
1849 raise IndexError("too many indices for array")
1850 if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2:
1851 raise ValueError(r"Cannot set values with ndim > 2")
1852
1853 if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
1854 from pandas import Series
1855
1856 value = self._align_series(indexer, Series(value))
1857
1858 # Ensure we have something we can iterate over
1859 info_axis = indexer[1]
1860 ilocs = self._ensure_iterable_column_indexer(info_axis)
1861
1862 pi = indexer[0]
1863 lplane_indexer = length_of_indexer(pi, self.obj.index)
1864 # lplane_indexer gives the expected length of obj[indexer[0]]
1865
1866 # we need an iterable, with a ndim of at least 1
1867 # eg. don't pass through np.array(0)
1868 if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:
1869 if isinstance(value, ABCDataFrame):
1870 self._setitem_with_indexer_frame_value(indexer, value, name)
1871
1872 elif np.ndim(value) == 2:
1873 # TODO: avoid np.ndim call in case it isn't an ndarray, since
1874 # that will construct an ndarray, which will be wasteful
1875 self._setitem_with_indexer_2d_value(indexer, value)
1876
1877 elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi):
1878 # We are setting multiple rows in a single column.
1879 self._setitem_single_column(ilocs[0], value, pi)
1880
1881 elif len(ilocs) == 1 and 0 != lplane_indexer != len(value):
1882 # We are trying to set N values into M entries of a single
1883 # column, which is invalid for N != M
1884 # Exclude zero-len for e.g. boolean masking that is all-false
1885
1886 if len(value) == 1 and not is_integer(info_axis):
1887 # This is a case like df.iloc[:3, [1]] = [0]
1888 # where we treat as df.iloc[:3, 1] = 0
1889 return self._setitem_with_indexer((pi, info_axis[0]), value[0])
1890
1891 raise ValueError(
1892 "Must have equal len keys and value "
1893 "when setting with an iterable"
1894 )
1895
1896 elif lplane_indexer == 0 and len(value) == len(self.obj.index):
1897 # We get here in one case via .loc with a all-False mask
1898 pass
1899
1900 elif self._is_scalar_access(indexer) and is_object_dtype(
1901 self.obj.dtypes[ilocs[0]]
1902 ):
1903 # We are setting nested data, only possible for object dtype data
1904 self._setitem_single_column(indexer[1], value, pi)
1905
1906 elif len(ilocs) == len(value):
1907 # We are setting multiple columns in a single row.
1908 for loc, v in zip(ilocs, value):
1909 self._setitem_single_column(loc, v, pi)
1910
1911 elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:
1912 # This is a setitem-with-expansion, see
1913 # test_loc_setitem_empty_append_expands_rows_mixed_dtype
1914 # e.g. df = DataFrame(columns=["x", "y"])
1915 # df["x"] = df["x"].astype(np.int64)
1916 # df.loc[:, "x"] = [1, 2, 3]
1917 self._setitem_single_column(ilocs[0], value, pi)
1918
1919 else:
1920 raise ValueError(
1921 "Must have equal len keys and value "
1922 "when setting with an iterable"
1923 )
1924
1925 else:
1926 # scalar value
1927 for loc in ilocs:
1928 self._setitem_single_column(loc, value, pi)
1929
1930 def _setitem_with_indexer_2d_value(self, indexer, value):
1931 # We get here with np.ndim(value) == 2, excluding DataFrame,
1932 # which goes through _setitem_with_indexer_frame_value
1933 pi = indexer[0]
1934
1935 ilocs = self._ensure_iterable_column_indexer(indexer[1])
1936
1937 if not is_array_like(value):
1938 # cast lists to array
1939 value = np.array(value, dtype=object)
1940 if len(ilocs) != value.shape[1]:
1941 raise ValueError(
1942 "Must have equal len keys and value when setting with an ndarray"
1943 )
1944
1945 for i, loc in enumerate(ilocs):
1946 value_col = value[:, i]
1947 if is_object_dtype(value_col.dtype):
1948 # casting to list so that we do type inference in setitem_single_column
1949 value_col = value_col.tolist()
1950 self._setitem_single_column(loc, value_col, pi)
1951
1952 def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str):
1953 ilocs = self._ensure_iterable_column_indexer(indexer[1])
1954
1955 sub_indexer = list(indexer)
1956 pi = indexer[0]
1957
1958 multiindex_indexer = isinstance(self.obj.columns, MultiIndex)
1959
1960 unique_cols = value.columns.is_unique
1961
1962 # We do not want to align the value in case of iloc GH#37728
1963 if name == "iloc":
1964 for i, loc in enumerate(ilocs):
1965 val = value.iloc[:, i]
1966 self._setitem_single_column(loc, val, pi)
1967
1968 elif not unique_cols and value.columns.equals(self.obj.columns):
1969 # We assume we are already aligned, see
1970 # test_iloc_setitem_frame_duplicate_columns_multiple_blocks
1971 for loc in ilocs:
1972 item = self.obj.columns[loc]
1973 if item in value:
1974 sub_indexer[1] = item
1975 val = self._align_series(
1976 tuple(sub_indexer),
1977 value.iloc[:, loc],
1978 multiindex_indexer,
1979 )
1980 else:
1981 val = np.nan
1982
1983 self._setitem_single_column(loc, val, pi)
1984
1985 elif not unique_cols:
1986 raise ValueError("Setting with non-unique columns is not allowed.")
1987
1988 else:
1989 for loc in ilocs:
1990 item = self.obj.columns[loc]
1991 if item in value:
1992 sub_indexer[1] = item
1993 val = self._align_series(
1994 tuple(sub_indexer), value[item], multiindex_indexer
1995 )
1996 else:
1997 val = np.nan
1998
1999 self._setitem_single_column(loc, val, pi)
2000
2001 def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
2002 """
2003
2004 Parameters
2005 ----------
2006 loc : int
2007 Indexer for column position
2008 plane_indexer : int, slice, listlike[int]
2009 The indexer we use for setitem along axis=0.
2010 """
2011 pi = plane_indexer
2012
2013 is_full_setter = com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj))
2014
2015 is_null_setter = com.is_empty_slice(pi) or is_array_like(pi) and len(pi) == 0
2016
2017 if is_null_setter:
2018 # no-op, don't cast dtype later
2019 return
2020
2021 elif is_full_setter:
2022 try:
2023 self.obj._mgr.column_setitem(
2024 loc, plane_indexer, value, inplace_only=True
2025 )
2026 except (ValueError, TypeError, LossySetitemError):
2027 # If we're setting an entire column and we can't do it inplace,
2028 # then we can use value's dtype (or inferred dtype)
2029 # instead of object
2030 self.obj.isetitem(loc, value)
2031 else:
2032 # set value into the column (first attempting to operate inplace, then
2033 # falling back to casting if necessary)
2034 self.obj._mgr.column_setitem(loc, plane_indexer, value)
2035
2036 self.obj._clear_item_cache()
2037
2038 def _setitem_single_block(self, indexer, value, name: str) -> None:
2039 """
2040 _setitem_with_indexer for the case when we have a single Block.
2041 """
2042 from pandas import Series
2043
2044 info_axis = self.obj._info_axis_number
2045 item_labels = self.obj._get_axis(info_axis)
2046 if isinstance(indexer, tuple):
2047 # if we are setting on the info axis ONLY
2048 # set using those methods to avoid block-splitting
2049 # logic here
2050 if (
2051 self.ndim == len(indexer) == 2
2052 and is_integer(indexer[1])
2053 and com.is_null_slice(indexer[0])
2054 ):
2055 col = item_labels[indexer[info_axis]]
2056 if len(item_labels.get_indexer_for([col])) == 1:
2057 # e.g. test_loc_setitem_empty_append_expands_rows
2058 loc = item_labels.get_loc(col)
2059 self._setitem_single_column(loc, value, indexer[0])
2060 return
2061
2062 indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align
2063
2064 if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
2065 # TODO(EA): ExtensionBlock.setitem this causes issues with
2066 # setting for extensionarrays that store dicts. Need to decide
2067 # if it's worth supporting that.
2068 value = self._align_series(indexer, Series(value))
2069
2070 elif isinstance(value, ABCDataFrame) and name != "iloc":
2071 value = self._align_frame(indexer, value)._values
2072
2073 # check for chained assignment
2074 self.obj._check_is_chained_assignment_possible()
2075
2076 # actually do the set
2077 self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
2078 self.obj._maybe_update_cacher(clear=True, inplace=True)
2079
2080 def _setitem_with_indexer_missing(self, indexer, value):
2081 """
2082 Insert new row(s) or column(s) into the Series or DataFrame.
2083 """
2084 from pandas import Series
2085
2086 # reindex the axis to the new value
2087 # and set inplace
2088 if self.ndim == 1:
2089 index = self.obj.index
2090 new_index = index.insert(len(index), indexer)
2091
2092 # we have a coerced indexer, e.g. a float
2093 # that matches in an int64 Index, so
2094 # we will not create a duplicate index, rather
2095 # index to that element
2096 # e.g. 0.0 -> 0
2097 # GH#12246
2098 if index.is_unique:
2099 # pass new_index[-1:] instead if [new_index[-1]]
2100 # so that we retain dtype
2101 new_indexer = index.get_indexer(new_index[-1:])
2102 if (new_indexer != -1).any():
2103 # We get only here with loc, so can hard code
2104 return self._setitem_with_indexer(new_indexer, value, "loc")
2105
2106 # this preserves dtype of the value and of the object
2107 if not is_scalar(value):
2108 new_dtype = None
2109
2110 elif is_valid_na_for_dtype(value, self.obj.dtype):
2111 if not is_object_dtype(self.obj.dtype):
2112 # Every NA value is suitable for object, no conversion needed
2113 value = na_value_for_dtype(self.obj.dtype, compat=False)
2114
2115 new_dtype = maybe_promote(self.obj.dtype, value)[0]
2116
2117 elif isna(value):
2118 new_dtype = None
2119 elif not self.obj.empty and not is_object_dtype(self.obj.dtype):
2120 # We should not cast, if we have object dtype because we can
2121 # set timedeltas into object series
2122 curr_dtype = self.obj.dtype
2123 curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype)
2124 new_dtype = maybe_promote(curr_dtype, value)[0]
2125 else:
2126 new_dtype = None
2127
2128 new_values = Series([value], dtype=new_dtype)._values
2129
2130 if len(self.obj._values):
2131 # GH#22717 handle casting compatibility that np.concatenate
2132 # does incorrectly
2133 new_values = concat_compat([self.obj._values, new_values])
2134 self.obj._mgr = self.obj._constructor(
2135 new_values, index=new_index, name=self.obj.name
2136 )._mgr
2137 self.obj._maybe_update_cacher(clear=True)
2138
2139 elif self.ndim == 2:
2140 if not len(self.obj.columns):
2141 # no columns and scalar
2142 raise ValueError("cannot set a frame with no defined columns")
2143
2144 has_dtype = hasattr(value, "dtype")
2145 if isinstance(value, ABCSeries):
2146 # append a Series
2147 value = value.reindex(index=self.obj.columns, copy=True)
2148 value.name = indexer
2149 elif isinstance(value, dict):
2150 value = Series(
2151 value, index=self.obj.columns, name=indexer, dtype=object
2152 )
2153 else:
2154 # a list-list
2155 if is_list_like_indexer(value):
2156 # must have conforming columns
2157 if len(value) != len(self.obj.columns):
2158 raise ValueError("cannot set a row with mismatched columns")
2159
2160 value = Series(value, index=self.obj.columns, name=indexer)
2161
2162 if not len(self.obj):
2163 # We will ignore the existing dtypes instead of using
2164 # internals.concat logic
2165 df = value.to_frame().T
2166
2167 idx = self.obj.index
2168 if isinstance(idx, MultiIndex):
2169 name = idx.names
2170 else:
2171 name = idx.name
2172
2173 df.index = Index([indexer], name=name)
2174 if not has_dtype:
2175 # i.e. if we already had a Series or ndarray, keep that
2176 # dtype. But if we had a list or dict, then do inference
2177 df = df.infer_objects(copy=False)
2178 self.obj._mgr = df._mgr
2179 else:
2180 self.obj._mgr = self.obj._append(value)._mgr
2181 self.obj._maybe_update_cacher(clear=True)
2182
2183 def _ensure_iterable_column_indexer(self, column_indexer):
2184 """
2185 Ensure that our column indexer is something that can be iterated over.
2186 """
2187 ilocs: Sequence[int] | np.ndarray
2188 if is_integer(column_indexer):
2189 ilocs = [column_indexer]
2190 elif isinstance(column_indexer, slice):
2191 ilocs = np.arange(len(self.obj.columns))[column_indexer]
2192 elif isinstance(column_indexer, np.ndarray) and is_bool_dtype(
2193 column_indexer.dtype
2194 ):
2195 ilocs = np.arange(len(column_indexer))[column_indexer]
2196 else:
2197 ilocs = column_indexer
2198 return ilocs
2199
2200 def _align_series(self, indexer, ser: Series, multiindex_indexer: bool = False):
2201 """
2202 Parameters
2203 ----------
2204 indexer : tuple, slice, scalar
2205 Indexer used to get the locations that will be set to `ser`.
2206 ser : pd.Series
2207 Values to assign to the locations specified by `indexer`.
2208 multiindex_indexer : bool, optional
2209 Defaults to False. Should be set to True if `indexer` was from
2210 a `pd.MultiIndex`, to avoid unnecessary broadcasting.
2211
2212 Returns
2213 -------
2214 `np.array` of `ser` broadcast to the appropriate shape for assignment
2215 to the locations selected by `indexer`
2216 """
2217 if isinstance(indexer, (slice, np.ndarray, list, Index)):
2218 indexer = (indexer,)
2219
2220 if isinstance(indexer, tuple):
2221 # flatten np.ndarray indexers
2222 def ravel(i):
2223 return i.ravel() if isinstance(i, np.ndarray) else i
2224
2225 indexer = tuple(map(ravel, indexer))
2226
2227 aligners = [not com.is_null_slice(idx) for idx in indexer]
2228 sum_aligners = sum(aligners)
2229 single_aligner = sum_aligners == 1
2230 is_frame = self.ndim == 2
2231 obj = self.obj
2232
2233 # are we a single alignable value on a non-primary
2234 # dim (e.g. panel: 1,2, or frame: 0) ?
2235 # hence need to align to a single axis dimension
2236 # rather that find all valid dims
2237
2238 # frame
2239 if is_frame:
2240 single_aligner = single_aligner and aligners[0]
2241
2242 # we have a frame, with multiple indexers on both axes; and a
2243 # series, so need to broadcast (see GH5206)
2244 if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer):
2245 ser_values = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values
2246
2247 # single indexer
2248 if len(indexer) > 1 and not multiindex_indexer:
2249 len_indexer = len(indexer[1])
2250 ser_values = (
2251 np.tile(ser_values, len_indexer).reshape(len_indexer, -1).T
2252 )
2253
2254 return ser_values
2255
2256 for i, idx in enumerate(indexer):
2257 ax = obj.axes[i]
2258
2259 # multiple aligners (or null slices)
2260 if is_sequence(idx) or isinstance(idx, slice):
2261 if single_aligner and com.is_null_slice(idx):
2262 continue
2263 new_ix = ax[idx]
2264 if not is_list_like_indexer(new_ix):
2265 new_ix = Index([new_ix])
2266 else:
2267 new_ix = Index(new_ix)
2268 if ser.index.equals(new_ix) or not len(new_ix):
2269 return ser._values.copy()
2270
2271 return ser.reindex(new_ix)._values
2272
2273 # 2 dims
2274 elif single_aligner:
2275 # reindex along index
2276 ax = self.obj.axes[1]
2277 if ser.index.equals(ax) or not len(ax):
2278 return ser._values.copy()
2279 return ser.reindex(ax)._values
2280
2281 elif is_integer(indexer) and self.ndim == 1:
2282 if is_object_dtype(self.obj):
2283 return ser
2284 ax = self.obj._get_axis(0)
2285
2286 if ser.index.equals(ax):
2287 return ser._values.copy()
2288
2289 return ser.reindex(ax)._values[indexer]
2290
2291 elif is_integer(indexer):
2292 ax = self.obj._get_axis(1)
2293
2294 if ser.index.equals(ax):
2295 return ser._values.copy()
2296
2297 return ser.reindex(ax)._values
2298
2299 raise ValueError("Incompatible indexer with Series")
2300
2301 def _align_frame(self, indexer, df: DataFrame) -> DataFrame:
2302 is_frame = self.ndim == 2
2303
2304 if isinstance(indexer, tuple):
2305 idx, cols = None, None
2306 sindexers = []
2307 for i, ix in enumerate(indexer):
2308 ax = self.obj.axes[i]
2309 if is_sequence(ix) or isinstance(ix, slice):
2310 if isinstance(ix, np.ndarray):
2311 ix = ix.ravel()
2312 if idx is None:
2313 idx = ax[ix]
2314 elif cols is None:
2315 cols = ax[ix]
2316 else:
2317 break
2318 else:
2319 sindexers.append(i)
2320
2321 if idx is not None and cols is not None:
2322 if df.index.equals(idx) and df.columns.equals(cols):
2323 val = df.copy()
2324 else:
2325 val = df.reindex(idx, columns=cols)
2326 return val
2327
2328 elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame:
2329 ax = self.obj.index[indexer]
2330 if df.index.equals(ax):
2331 val = df.copy()
2332 else:
2333 # we have a multi-index and are trying to align
2334 # with a particular, level GH3738
2335 if (
2336 isinstance(ax, MultiIndex)
2337 and isinstance(df.index, MultiIndex)
2338 and ax.nlevels != df.index.nlevels
2339 ):
2340 raise TypeError(
2341 "cannot align on a multi-index with out "
2342 "specifying the join levels"
2343 )
2344
2345 val = df.reindex(index=ax)
2346 return val
2347
2348 raise ValueError("Incompatible indexer with DataFrame")
2349
2350
2351class _ScalarAccessIndexer(NDFrameIndexerBase):
2352 """
2353 Access scalars quickly.
2354 """
2355
2356 # sub-classes need to set _takeable
2357 _takeable: bool
2358
2359 def _convert_key(self, key):
2360 raise AbstractMethodError(self)
2361
2362 def __getitem__(self, key):
2363 if not isinstance(key, tuple):
2364 # we could have a convertible item here (e.g. Timestamp)
2365 if not is_list_like_indexer(key):
2366 key = (key,)
2367 else:
2368 raise ValueError("Invalid call for scalar access (getting)!")
2369
2370 key = self._convert_key(key)
2371 return self.obj._get_value(*key, takeable=self._takeable)
2372
2373 def __setitem__(self, key, value) -> None:
2374 if isinstance(key, tuple):
2375 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
2376 else:
2377 # scalar callable may return tuple
2378 key = com.apply_if_callable(key, self.obj)
2379
2380 if not isinstance(key, tuple):
2381 key = _tuplify(self.ndim, key)
2382 key = list(self._convert_key(key))
2383 if len(key) != self.ndim:
2384 raise ValueError("Not enough indexers for scalar access (setting)!")
2385
2386 self.obj._set_value(*key, value=value, takeable=self._takeable)
2387
2388
2389@doc(IndexingMixin.at)
2390class _AtIndexer(_ScalarAccessIndexer):
2391 _takeable = False
2392
2393 def _convert_key(self, key):
2394 """
2395 Require they keys to be the same type as the index. (so we don't
2396 fallback)
2397 """
2398 # GH 26989
2399 # For series, unpacking key needs to result in the label.
2400 # This is already the case for len(key) == 1; e.g. (1,)
2401 if self.ndim == 1 and len(key) > 1:
2402 key = (key,)
2403
2404 return key
2405
2406 @property
2407 def _axes_are_unique(self) -> bool:
2408 # Only relevant for self.ndim == 2
2409 assert self.ndim == 2
2410 return self.obj.index.is_unique and self.obj.columns.is_unique
2411
2412 def __getitem__(self, key):
2413 if self.ndim == 2 and not self._axes_are_unique:
2414 # GH#33041 fall back to .loc
2415 if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
2416 raise ValueError("Invalid call for scalar access (getting)!")
2417 return self.obj.loc[key]
2418
2419 return super().__getitem__(key)
2420
2421 def __setitem__(self, key, value):
2422 if self.ndim == 2 and not self._axes_are_unique:
2423 # GH#33041 fall back to .loc
2424 if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
2425 raise ValueError("Invalid call for scalar access (setting)!")
2426
2427 self.obj.loc[key] = value
2428 return
2429
2430 return super().__setitem__(key, value)
2431
2432
2433@doc(IndexingMixin.iat)
2434class _iAtIndexer(_ScalarAccessIndexer):
2435 _takeable = True
2436
2437 def _convert_key(self, key):
2438 """
2439 Require integer args. (and convert to label arguments)
2440 """
2441 for i in key:
2442 if not is_integer(i):
2443 raise ValueError("iAt based indexing can only have integer indexers")
2444 return key
2445
2446
2447def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]:
2448 """
2449 Given an indexer for the first dimension, create an equivalent tuple
2450 for indexing over all dimensions.
2451
2452 Parameters
2453 ----------
2454 ndim : int
2455 loc : object
2456
2457 Returns
2458 -------
2459 tuple
2460 """
2461 _tup: list[Hashable | slice]
2462 _tup = [slice(None, None) for _ in range(ndim)]
2463 _tup[0] = loc
2464 return tuple(_tup)
2465
2466
2467def _tupleize_axis_indexer(ndim: int, axis: AxisInt, key) -> tuple:
2468 """
2469 If we have an axis, adapt the given key to be axis-independent.
2470 """
2471 new_key = [slice(None)] * ndim
2472 new_key[axis] = key
2473 return tuple(new_key)
2474
2475
2476def check_bool_indexer(index: Index, key) -> np.ndarray:
2477 """
2478 Check if key is a valid boolean indexer for an object with such index and
2479 perform reindexing or conversion if needed.
2480
2481 This function assumes that is_bool_indexer(key) == True.
2482
2483 Parameters
2484 ----------
2485 index : Index
2486 Index of the object on which the indexing is done.
2487 key : list-like
2488 Boolean indexer to check.
2489
2490 Returns
2491 -------
2492 np.array
2493 Resulting key.
2494
2495 Raises
2496 ------
2497 IndexError
2498 If the key does not have the same length as index.
2499 IndexingError
2500 If the index of the key is unalignable to index.
2501 """
2502 result = key
2503 if isinstance(key, ABCSeries) and not key.index.equals(index):
2504 indexer = result.index.get_indexer_for(index)
2505 if -1 in indexer:
2506 raise IndexingError(
2507 "Unalignable boolean Series provided as "
2508 "indexer (index of the boolean Series and of "
2509 "the indexed object do not match)."
2510 )
2511
2512 result = result.take(indexer)
2513
2514 # fall through for boolean
2515 if not is_extension_array_dtype(result.dtype):
2516 return result.astype(bool)._values
2517
2518 if is_object_dtype(key):
2519 # key might be object-dtype bool, check_array_indexer needs bool array
2520 result = np.asarray(result, dtype=bool)
2521 elif not is_array_like(result):
2522 # GH 33924
2523 # key may contain nan elements, check_array_indexer needs bool array
2524 result = pd_array(result, dtype=bool)
2525 return check_array_indexer(index, result)
2526
2527
2528def convert_missing_indexer(indexer):
2529 """
2530 Reverse convert a missing indexer, which is a dict
2531 return the scalar indexer and a boolean indicating if we converted
2532 """
2533 if isinstance(indexer, dict):
2534 # a missing key (but not a tuple indexer)
2535 indexer = indexer["key"]
2536
2537 if isinstance(indexer, bool):
2538 raise KeyError("cannot use a single bool to index into setitem")
2539 return indexer, True
2540
2541 return indexer, False
2542
2543
2544def convert_from_missing_indexer_tuple(indexer, axes):
2545 """
2546 Create a filtered indexer that doesn't have any missing indexers.
2547 """
2548
2549 def get_indexer(_i, _idx):
2550 return axes[_i].get_loc(_idx["key"]) if isinstance(_idx, dict) else _idx
2551
2552 return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer))
2553
2554
2555def maybe_convert_ix(*args):
2556 """
2557 We likely want to take the cross-product.
2558 """
2559 for arg in args:
2560 if not isinstance(arg, (np.ndarray, list, ABCSeries, Index)):
2561 return args
2562 return np.ix_(*args)
2563
2564
2565def is_nested_tuple(tup, labels) -> bool:
2566 """
2567 Returns
2568 -------
2569 bool
2570 """
2571 # check for a compatible nested tuple and multiindexes among the axes
2572 if not isinstance(tup, tuple):
2573 return False
2574
2575 for k in tup:
2576 if is_list_like(k) or isinstance(k, slice):
2577 return isinstance(labels, MultiIndex)
2578
2579 return False
2580
2581
2582def is_label_like(key) -> bool:
2583 """
2584 Returns
2585 -------
2586 bool
2587 """
2588 # select a label or row
2589 return (
2590 not isinstance(key, slice)
2591 and not is_list_like_indexer(key)
2592 and key is not Ellipsis
2593 )
2594
2595
2596def need_slice(obj: slice) -> bool:
2597 """
2598 Returns
2599 -------
2600 bool
2601 """
2602 return (
2603 obj.start is not None
2604 or obj.stop is not None
2605 or (obj.step is not None and obj.step != 1)
2606 )
2607
2608
2609def check_dict_or_set_indexers(key) -> None:
2610 """
2611 Check if the indexer is or contains a dict or set, which is no longer allowed.
2612 """
2613 if (
2614 isinstance(key, set)
2615 or isinstance(key, tuple)
2616 and any(isinstance(x, set) for x in key)
2617 ):
2618 raise TypeError(
2619 "Passing a set as an indexer is not supported. Use a list instead."
2620 )
2621
2622 if (
2623 isinstance(key, dict)
2624 or isinstance(key, tuple)
2625 and any(isinstance(x, dict) for x in key)
2626 ):
2627 raise TypeError(
2628 "Passing a dict as an indexer is not supported. Use a list instead."
2629 )