1from __future__ import annotations
2
3from contextlib import suppress
4import sys
5from typing import (
6 TYPE_CHECKING,
7 Any,
8 TypeVar,
9 cast,
10 final,
11)
12import warnings
13
14import numpy as np
15
16from pandas._config import (
17 using_copy_on_write,
18 warn_copy_on_write,
19)
20
21from pandas._libs.indexing import NDFrameIndexerBase
22from pandas._libs.lib import item_from_zerodim
23from pandas.compat import PYPY
24from pandas.errors import (
25 AbstractMethodError,
26 ChainedAssignmentError,
27 IndexingError,
28 InvalidIndexError,
29 LossySetitemError,
30 _chained_assignment_msg,
31 _chained_assignment_warning_msg,
32 _check_cacher,
33)
34from pandas.util._decorators import doc
35from pandas.util._exceptions import find_stack_level
36
37from pandas.core.dtypes.cast import (
38 can_hold_element,
39 maybe_promote,
40)
41from pandas.core.dtypes.common import (
42 is_array_like,
43 is_bool_dtype,
44 is_hashable,
45 is_integer,
46 is_iterator,
47 is_list_like,
48 is_numeric_dtype,
49 is_object_dtype,
50 is_scalar,
51 is_sequence,
52)
53from pandas.core.dtypes.concat import concat_compat
54from pandas.core.dtypes.dtypes import ExtensionDtype
55from pandas.core.dtypes.generic import (
56 ABCDataFrame,
57 ABCSeries,
58)
59from pandas.core.dtypes.missing import (
60 construct_1d_array_from_inferred_fill_value,
61 infer_fill_value,
62 is_valid_na_for_dtype,
63 isna,
64 na_value_for_dtype,
65)
66
67from pandas.core import algorithms as algos
68import pandas.core.common as com
69from pandas.core.construction import (
70 array as pd_array,
71 extract_array,
72)
73from pandas.core.indexers import (
74 check_array_indexer,
75 is_list_like_indexer,
76 is_scalar_indexer,
77 length_of_indexer,
78)
79from pandas.core.indexes.api import (
80 Index,
81 MultiIndex,
82)
83
84if TYPE_CHECKING:
85 from collections.abc import (
86 Hashable,
87 Sequence,
88 )
89
90 from pandas._typing import (
91 Axis,
92 AxisInt,
93 Self,
94 npt,
95 )
96
97 from pandas import (
98 DataFrame,
99 Series,
100 )
101
102T = TypeVar("T")
103# "null slice"
104_NS = slice(None, None)
105_one_ellipsis_message = "indexer may only contain one '...' entry"
106
107
108# the public IndexSlicerMaker
109class _IndexSlice:
110 """
111 Create an object to more easily perform multi-index slicing.
112
113 See Also
114 --------
115 MultiIndex.remove_unused_levels : New MultiIndex with no unused levels.
116
117 Notes
118 -----
119 See :ref:`Defined Levels <advanced.shown_levels>`
120 for further info on slicing a MultiIndex.
121
122 Examples
123 --------
124 >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']])
125 >>> columns = ['foo', 'bar']
126 >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))),
127 ... index=midx, columns=columns)
128
129 Using the default slice command:
130
131 >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :]
132 foo bar
133 A0 B0 0 1
134 B1 2 3
135 A1 B0 8 9
136 B1 10 11
137
138 Using the IndexSlice class for a more intuitive command:
139
140 >>> idx = pd.IndexSlice
141 >>> dfmi.loc[idx[:, 'B0':'B1'], :]
142 foo bar
143 A0 B0 0 1
144 B1 2 3
145 A1 B0 8 9
146 B1 10 11
147 """
148
149 def __getitem__(self, arg):
150 return arg
151
152
153IndexSlice = _IndexSlice()
154
155
156class IndexingMixin:
157 """
158 Mixin for adding .loc/.iloc/.at/.iat to Dataframes and Series.
159 """
160
161 @property
162 def iloc(self) -> _iLocIndexer:
163 """
164 Purely integer-location based indexing for selection by position.
165
166 .. deprecated:: 2.2.0
167
168 Returning a tuple from a callable is deprecated.
169
170 ``.iloc[]`` is primarily integer position based (from ``0`` to
171 ``length-1`` of the axis), but may also be used with a boolean
172 array.
173
174 Allowed inputs are:
175
176 - An integer, e.g. ``5``.
177 - A list or array of integers, e.g. ``[4, 3, 0]``.
178 - A slice object with ints, e.g. ``1:7``.
179 - A boolean array.
180 - A ``callable`` function with one argument (the calling Series or
181 DataFrame) and that returns valid output for indexing (one of the above).
182 This is useful in method chains, when you don't have a reference to the
183 calling object, but would like to base your selection on
184 some value.
185 - A tuple of row and column indexes. The tuple elements consist of one of the
186 above inputs, e.g. ``(0, 1)``.
187
188 ``.iloc`` will raise ``IndexError`` if a requested indexer is
189 out-of-bounds, except *slice* indexers which allow out-of-bounds
190 indexing (this conforms with python/numpy *slice* semantics).
191
192 See more at :ref:`Selection by Position <indexing.integer>`.
193
194 See Also
195 --------
196 DataFrame.iat : Fast integer location scalar accessor.
197 DataFrame.loc : Purely label-location based indexer for selection by label.
198 Series.iloc : Purely integer-location based indexing for
199 selection by position.
200
201 Examples
202 --------
203 >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},
204 ... {'a': 100, 'b': 200, 'c': 300, 'd': 400},
205 ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000}]
206 >>> df = pd.DataFrame(mydict)
207 >>> df
208 a b c d
209 0 1 2 3 4
210 1 100 200 300 400
211 2 1000 2000 3000 4000
212
213 **Indexing just the rows**
214
215 With a scalar integer.
216
217 >>> type(df.iloc[0])
218 <class 'pandas.core.series.Series'>
219 >>> df.iloc[0]
220 a 1
221 b 2
222 c 3
223 d 4
224 Name: 0, dtype: int64
225
226 With a list of integers.
227
228 >>> df.iloc[[0]]
229 a b c d
230 0 1 2 3 4
231 >>> type(df.iloc[[0]])
232 <class 'pandas.core.frame.DataFrame'>
233
234 >>> df.iloc[[0, 1]]
235 a b c d
236 0 1 2 3 4
237 1 100 200 300 400
238
239 With a `slice` object.
240
241 >>> df.iloc[:3]
242 a b c d
243 0 1 2 3 4
244 1 100 200 300 400
245 2 1000 2000 3000 4000
246
247 With a boolean mask the same length as the index.
248
249 >>> df.iloc[[True, False, True]]
250 a b c d
251 0 1 2 3 4
252 2 1000 2000 3000 4000
253
254 With a callable, useful in method chains. The `x` passed
255 to the ``lambda`` is the DataFrame being sliced. This selects
256 the rows whose index label even.
257
258 >>> df.iloc[lambda x: x.index % 2 == 0]
259 a b c d
260 0 1 2 3 4
261 2 1000 2000 3000 4000
262
263 **Indexing both axes**
264
265 You can mix the indexer types for the index and columns. Use ``:`` to
266 select the entire axis.
267
268 With scalar integers.
269
270 >>> df.iloc[0, 1]
271 2
272
273 With lists of integers.
274
275 >>> df.iloc[[0, 2], [1, 3]]
276 b d
277 0 2 4
278 2 2000 4000
279
280 With `slice` objects.
281
282 >>> df.iloc[1:3, 0:3]
283 a b c
284 1 100 200 300
285 2 1000 2000 3000
286
287 With a boolean array whose length matches the columns.
288
289 >>> df.iloc[:, [True, False, True, False]]
290 a c
291 0 1 3
292 1 100 300
293 2 1000 3000
294
295 With a callable function that expects the Series or DataFrame.
296
297 >>> df.iloc[:, lambda df: [0, 2]]
298 a c
299 0 1 3
300 1 100 300
301 2 1000 3000
302 """
303 return _iLocIndexer("iloc", self)
304
305 @property
306 def loc(self) -> _LocIndexer:
307 """
308 Access a group of rows and columns by label(s) or a boolean array.
309
310 ``.loc[]`` is primarily label based, but may also be used with a
311 boolean array.
312
313 Allowed inputs are:
314
315 - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
316 interpreted as a *label* of the index, and **never** as an
317 integer position along the index).
318 - A list or array of labels, e.g. ``['a', 'b', 'c']``.
319 - A slice object with labels, e.g. ``'a':'f'``.
320
321 .. warning:: Note that contrary to usual python slices, **both** the
322 start and the stop are included
323
324 - A boolean array of the same length as the axis being sliced,
325 e.g. ``[True, False, True]``.
326 - An alignable boolean Series. The index of the key will be aligned before
327 masking.
328 - An alignable Index. The Index of the returned selection will be the input.
329 - A ``callable`` function with one argument (the calling Series or
330 DataFrame) and that returns valid output for indexing (one of the above)
331
332 See more at :ref:`Selection by Label <indexing.label>`.
333
334 Raises
335 ------
336 KeyError
337 If any items are not found.
338 IndexingError
339 If an indexed key is passed and its index is unalignable to the frame index.
340
341 See Also
342 --------
343 DataFrame.at : Access a single value for a row/column label pair.
344 DataFrame.iloc : Access group of rows and columns by integer position(s).
345 DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the
346 Series/DataFrame.
347 Series.loc : Access group of values using labels.
348
349 Examples
350 --------
351 **Getting values**
352
353 >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
354 ... index=['cobra', 'viper', 'sidewinder'],
355 ... columns=['max_speed', 'shield'])
356 >>> df
357 max_speed shield
358 cobra 1 2
359 viper 4 5
360 sidewinder 7 8
361
362 Single label. Note this returns the row as a Series.
363
364 >>> df.loc['viper']
365 max_speed 4
366 shield 5
367 Name: viper, dtype: int64
368
369 List of labels. Note using ``[[]]`` returns a DataFrame.
370
371 >>> df.loc[['viper', 'sidewinder']]
372 max_speed shield
373 viper 4 5
374 sidewinder 7 8
375
376 Single label for row and column
377
378 >>> df.loc['cobra', 'shield']
379 2
380
381 Slice with labels for row and single label for column. As mentioned
382 above, note that both the start and stop of the slice are included.
383
384 >>> df.loc['cobra':'viper', 'max_speed']
385 cobra 1
386 viper 4
387 Name: max_speed, dtype: int64
388
389 Boolean list with the same length as the row axis
390
391 >>> df.loc[[False, False, True]]
392 max_speed shield
393 sidewinder 7 8
394
395 Alignable boolean Series:
396
397 >>> df.loc[pd.Series([False, True, False],
398 ... index=['viper', 'sidewinder', 'cobra'])]
399 max_speed shield
400 sidewinder 7 8
401
402 Index (same behavior as ``df.reindex``)
403
404 >>> df.loc[pd.Index(["cobra", "viper"], name="foo")]
405 max_speed shield
406 foo
407 cobra 1 2
408 viper 4 5
409
410 Conditional that returns a boolean Series
411
412 >>> df.loc[df['shield'] > 6]
413 max_speed shield
414 sidewinder 7 8
415
416 Conditional that returns a boolean Series with column labels specified
417
418 >>> df.loc[df['shield'] > 6, ['max_speed']]
419 max_speed
420 sidewinder 7
421
422 Multiple conditional using ``&`` that returns a boolean Series
423
424 >>> df.loc[(df['max_speed'] > 1) & (df['shield'] < 8)]
425 max_speed shield
426 viper 4 5
427
428 Multiple conditional using ``|`` that returns a boolean Series
429
430 >>> df.loc[(df['max_speed'] > 4) | (df['shield'] < 5)]
431 max_speed shield
432 cobra 1 2
433 sidewinder 7 8
434
435 Please ensure that each condition is wrapped in parentheses ``()``.
436 See the :ref:`user guide<indexing.boolean>`
437 for more details and explanations of Boolean indexing.
438
439 .. note::
440 If you find yourself using 3 or more conditionals in ``.loc[]``,
441 consider using :ref:`advanced indexing<advanced.advanced_hierarchical>`.
442
443 See below for using ``.loc[]`` on MultiIndex DataFrames.
444
445 Callable that returns a boolean Series
446
447 >>> df.loc[lambda df: df['shield'] == 8]
448 max_speed shield
449 sidewinder 7 8
450
451 **Setting values**
452
453 Set value for all items matching the list of labels
454
455 >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50
456 >>> df
457 max_speed shield
458 cobra 1 2
459 viper 4 50
460 sidewinder 7 50
461
462 Set value for an entire row
463
464 >>> df.loc['cobra'] = 10
465 >>> df
466 max_speed shield
467 cobra 10 10
468 viper 4 50
469 sidewinder 7 50
470
471 Set value for an entire column
472
473 >>> df.loc[:, 'max_speed'] = 30
474 >>> df
475 max_speed shield
476 cobra 30 10
477 viper 30 50
478 sidewinder 30 50
479
480 Set value for rows matching callable condition
481
482 >>> df.loc[df['shield'] > 35] = 0
483 >>> df
484 max_speed shield
485 cobra 30 10
486 viper 0 0
487 sidewinder 0 0
488
489 Add value matching location
490
491 >>> df.loc["viper", "shield"] += 5
492 >>> df
493 max_speed shield
494 cobra 30 10
495 viper 0 5
496 sidewinder 0 0
497
498 Setting using a ``Series`` or a ``DataFrame`` sets the values matching the
499 index labels, not the index positions.
500
501 >>> shuffled_df = df.loc[["viper", "cobra", "sidewinder"]]
502 >>> df.loc[:] += shuffled_df
503 >>> df
504 max_speed shield
505 cobra 60 20
506 viper 0 10
507 sidewinder 0 0
508
509 **Getting values on a DataFrame with an index that has integer labels**
510
511 Another example using integers for the index
512
513 >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],
514 ... index=[7, 8, 9], columns=['max_speed', 'shield'])
515 >>> df
516 max_speed shield
517 7 1 2
518 8 4 5
519 9 7 8
520
521 Slice with integer labels for rows. As mentioned above, note that both
522 the start and stop of the slice are included.
523
524 >>> df.loc[7:9]
525 max_speed shield
526 7 1 2
527 8 4 5
528 9 7 8
529
530 **Getting values with a MultiIndex**
531
532 A number of examples using a DataFrame with a MultiIndex
533
534 >>> tuples = [
535 ... ('cobra', 'mark i'), ('cobra', 'mark ii'),
536 ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
537 ... ('viper', 'mark ii'), ('viper', 'mark iii')
538 ... ]
539 >>> index = pd.MultiIndex.from_tuples(tuples)
540 >>> values = [[12, 2], [0, 4], [10, 20],
541 ... [1, 4], [7, 1], [16, 36]]
542 >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)
543 >>> df
544 max_speed shield
545 cobra mark i 12 2
546 mark ii 0 4
547 sidewinder mark i 10 20
548 mark ii 1 4
549 viper mark ii 7 1
550 mark iii 16 36
551
552 Single label. Note this returns a DataFrame with a single index.
553
554 >>> df.loc['cobra']
555 max_speed shield
556 mark i 12 2
557 mark ii 0 4
558
559 Single index tuple. Note this returns a Series.
560
561 >>> df.loc[('cobra', 'mark ii')]
562 max_speed 0
563 shield 4
564 Name: (cobra, mark ii), dtype: int64
565
566 Single label for row and column. Similar to passing in a tuple, this
567 returns a Series.
568
569 >>> df.loc['cobra', 'mark i']
570 max_speed 12
571 shield 2
572 Name: (cobra, mark i), dtype: int64
573
574 Single tuple. Note using ``[[]]`` returns a DataFrame.
575
576 >>> df.loc[[('cobra', 'mark ii')]]
577 max_speed shield
578 cobra mark ii 0 4
579
580 Single tuple for the index with a single label for the column
581
582 >>> df.loc[('cobra', 'mark i'), 'shield']
583 2
584
585 Slice from index tuple to single label
586
587 >>> df.loc[('cobra', 'mark i'):'viper']
588 max_speed shield
589 cobra mark i 12 2
590 mark ii 0 4
591 sidewinder mark i 10 20
592 mark ii 1 4
593 viper mark ii 7 1
594 mark iii 16 36
595
596 Slice from index tuple to index tuple
597
598 >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')]
599 max_speed shield
600 cobra mark i 12 2
601 mark ii 0 4
602 sidewinder mark i 10 20
603 mark ii 1 4
604 viper mark ii 7 1
605
606 Please see the :ref:`user guide<advanced.advanced_hierarchical>`
607 for more details and explanations of advanced indexing.
608 """
609 return _LocIndexer("loc", self)
610
611 @property
612 def at(self) -> _AtIndexer:
613 """
614 Access a single value for a row/column label pair.
615
616 Similar to ``loc``, in that both provide label-based lookups. Use
617 ``at`` if you only need to get or set a single value in a DataFrame
618 or Series.
619
620 Raises
621 ------
622 KeyError
623 If getting a value and 'label' does not exist in a DataFrame or Series.
624
625 ValueError
626 If row/column label pair is not a tuple or if any label
627 from the pair is not a scalar for DataFrame.
628 If label is list-like (*excluding* NamedTuple) for Series.
629
630 See Also
631 --------
632 DataFrame.at : Access a single value for a row/column pair by label.
633 DataFrame.iat : Access a single value for a row/column pair by integer
634 position.
635 DataFrame.loc : Access a group of rows and columns by label(s).
636 DataFrame.iloc : Access a group of rows and columns by integer
637 position(s).
638 Series.at : Access a single value by label.
639 Series.iat : Access a single value by integer position.
640 Series.loc : Access a group of rows by label(s).
641 Series.iloc : Access a group of rows by integer position(s).
642
643 Notes
644 -----
645 See :ref:`Fast scalar value getting and setting <indexing.basics.get_value>`
646 for more details.
647
648 Examples
649 --------
650 >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
651 ... index=[4, 5, 6], columns=['A', 'B', 'C'])
652 >>> df
653 A B C
654 4 0 2 3
655 5 0 4 1
656 6 10 20 30
657
658 Get value at specified row/column pair
659
660 >>> df.at[4, 'B']
661 2
662
663 Set value at specified row/column pair
664
665 >>> df.at[4, 'B'] = 10
666 >>> df.at[4, 'B']
667 10
668
669 Get value within a Series
670
671 >>> df.loc[5].at['B']
672 4
673 """
674 return _AtIndexer("at", self)
675
676 @property
677 def iat(self) -> _iAtIndexer:
678 """
679 Access a single value for a row/column pair by integer position.
680
681 Similar to ``iloc``, in that both provide integer-based lookups. Use
682 ``iat`` if you only need to get or set a single value in a DataFrame
683 or Series.
684
685 Raises
686 ------
687 IndexError
688 When integer position is out of bounds.
689
690 See Also
691 --------
692 DataFrame.at : Access a single value for a row/column label pair.
693 DataFrame.loc : Access a group of rows and columns by label(s).
694 DataFrame.iloc : Access a group of rows and columns by integer position(s).
695
696 Examples
697 --------
698 >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],
699 ... columns=['A', 'B', 'C'])
700 >>> df
701 A B C
702 0 0 2 3
703 1 0 4 1
704 2 10 20 30
705
706 Get value at specified row/column pair
707
708 >>> df.iat[1, 2]
709 1
710
711 Set value at specified row/column pair
712
713 >>> df.iat[1, 2] = 10
714 >>> df.iat[1, 2]
715 10
716
717 Get value within a series
718
719 >>> df.loc[0].iat[1]
720 2
721 """
722 return _iAtIndexer("iat", self)
723
724
725class _LocationIndexer(NDFrameIndexerBase):
726 _valid_types: str
727 axis: AxisInt | None = None
728
729 # sub-classes need to set _takeable
730 _takeable: bool
731
732 @final
733 def __call__(self, axis: Axis | None = None) -> Self:
734 # we need to return a copy of ourselves
735 new_self = type(self)(self.name, self.obj)
736
737 if axis is not None:
738 axis_int_none = self.obj._get_axis_number(axis)
739 else:
740 axis_int_none = axis
741 new_self.axis = axis_int_none
742 return new_self
743
744 def _get_setitem_indexer(self, key):
745 """
746 Convert a potentially-label-based key into a positional indexer.
747 """
748 if self.name == "loc":
749 # always holds here bc iloc overrides _get_setitem_indexer
750 self._ensure_listlike_indexer(key)
751
752 if isinstance(key, tuple):
753 for x in key:
754 check_dict_or_set_indexers(x)
755
756 if self.axis is not None:
757 key = _tupleize_axis_indexer(self.ndim, self.axis, key)
758
759 ax = self.obj._get_axis(0)
760
761 if (
762 isinstance(ax, MultiIndex)
763 and self.name != "iloc"
764 and is_hashable(key)
765 and not isinstance(key, slice)
766 ):
767 with suppress(KeyError, InvalidIndexError):
768 # TypeError e.g. passed a bool
769 return ax.get_loc(key)
770
771 if isinstance(key, tuple):
772 with suppress(IndexingError):
773 # suppress "Too many indexers"
774 return self._convert_tuple(key)
775
776 if isinstance(key, range):
777 # GH#45479 test_loc_setitem_range_key
778 key = list(key)
779
780 return self._convert_to_indexer(key, axis=0)
781
782 @final
783 def _maybe_mask_setitem_value(self, indexer, value):
784 """
785 If we have obj.iloc[mask] = series_or_frame and series_or_frame has the
786 same length as obj, we treat this as obj.iloc[mask] = series_or_frame[mask],
787 similar to Series.__setitem__.
788
789 Note this is only for loc, not iloc.
790 """
791
792 if (
793 isinstance(indexer, tuple)
794 and len(indexer) == 2
795 and isinstance(value, (ABCSeries, ABCDataFrame))
796 ):
797 pi, icols = indexer
798 ndim = value.ndim
799 if com.is_bool_indexer(pi) and len(value) == len(pi):
800 newkey = pi.nonzero()[0]
801
802 if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1:
803 # e.g. test_loc_setitem_boolean_mask_allfalse
804 # test_loc_setitem_ndframe_values_alignment
805 value = self.obj.iloc._align_series(indexer, value)
806 indexer = (newkey, icols)
807
808 elif (
809 isinstance(icols, np.ndarray)
810 and icols.dtype.kind == "i"
811 and len(icols) == 1
812 ):
813 if ndim == 1:
814 # We implicitly broadcast, though numpy does not, see
815 # github.com/pandas-dev/pandas/pull/45501#discussion_r789071825
816 # test_loc_setitem_ndframe_values_alignment
817 value = self.obj.iloc._align_series(indexer, value)
818 indexer = (newkey, icols)
819
820 elif ndim == 2 and value.shape[1] == 1:
821 # test_loc_setitem_ndframe_values_alignment
822 value = self.obj.iloc._align_frame(indexer, value)
823 indexer = (newkey, icols)
824 elif com.is_bool_indexer(indexer):
825 indexer = indexer.nonzero()[0]
826
827 return indexer, value
828
829 @final
830 def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:
831 """
832 Ensure that a list-like of column labels are all present by adding them if
833 they do not already exist.
834
835 Parameters
836 ----------
837 key : list-like of column labels
838 Target labels.
839 axis : key axis if known
840 """
841 column_axis = 1
842
843 # column only exists in 2-dimensional DataFrame
844 if self.ndim != 2:
845 return
846
847 if isinstance(key, tuple) and len(key) > 1:
848 # key may be a tuple if we are .loc
849 # if length of key is > 1 set key to column part
850 key = key[column_axis]
851 axis = column_axis
852
853 if (
854 axis == column_axis
855 and not isinstance(self.obj.columns, MultiIndex)
856 and is_list_like_indexer(key)
857 and not com.is_bool_indexer(key)
858 and all(is_hashable(k) for k in key)
859 ):
860 # GH#38148
861 keys = self.obj.columns.union(key, sort=False)
862 diff = Index(key).difference(self.obj.columns, sort=False)
863
864 if len(diff):
865 # e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B"
866 # is a new column, add the new columns with dtype=np.void
867 # so that later when we go through setitem_single_column
868 # we will use isetitem. Without this, the reindex_axis
869 # below would create float64 columns in this example, which
870 # would successfully hold 7, so we would end up with the wrong
871 # dtype.
872 indexer = np.arange(len(keys), dtype=np.intp)
873 indexer[len(self.obj.columns) :] = -1
874 new_mgr = self.obj._mgr.reindex_indexer(
875 keys, indexer=indexer, axis=0, only_slice=True, use_na_proxy=True
876 )
877 self.obj._mgr = new_mgr
878 return
879
880 self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True)
881
882 @final
883 def __setitem__(self, key, value) -> None:
884 if not PYPY and using_copy_on_write():
885 if sys.getrefcount(self.obj) <= 2:
886 warnings.warn(
887 _chained_assignment_msg, ChainedAssignmentError, stacklevel=2
888 )
889 elif not PYPY and not using_copy_on_write():
890 ctr = sys.getrefcount(self.obj)
891 ref_count = 2
892 if not warn_copy_on_write() and _check_cacher(self.obj):
893 # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221
894 ref_count += 1
895 if ctr <= ref_count:
896 warnings.warn(
897 _chained_assignment_warning_msg, FutureWarning, stacklevel=2
898 )
899
900 check_dict_or_set_indexers(key)
901 if isinstance(key, tuple):
902 key = tuple(list(x) if is_iterator(x) else x for x in key)
903 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
904 else:
905 maybe_callable = com.apply_if_callable(key, self.obj)
906 key = self._check_deprecated_callable_usage(key, maybe_callable)
907 indexer = self._get_setitem_indexer(key)
908 self._has_valid_setitem_indexer(key)
909
910 iloc = self if self.name == "iloc" else self.obj.iloc
911 iloc._setitem_with_indexer(indexer, value, self.name)
912
913 def _validate_key(self, key, axis: AxisInt):
914 """
915 Ensure that key is valid for current indexer.
916
917 Parameters
918 ----------
919 key : scalar, slice or list-like
920 Key requested.
921 axis : int
922 Dimension on which the indexing is being made.
923
924 Raises
925 ------
926 TypeError
927 If the key (or some element of it) has wrong type.
928 IndexError
929 If the key (or some element of it) is out of bounds.
930 KeyError
931 If the key was not found.
932 """
933 raise AbstractMethodError(self)
934
935 @final
936 def _expand_ellipsis(self, tup: tuple) -> tuple:
937 """
938 If a tuple key includes an Ellipsis, replace it with an appropriate
939 number of null slices.
940 """
941 if any(x is Ellipsis for x in tup):
942 if tup.count(Ellipsis) > 1:
943 raise IndexingError(_one_ellipsis_message)
944
945 if len(tup) == self.ndim:
946 # It is unambiguous what axis this Ellipsis is indexing,
947 # treat as a single null slice.
948 i = tup.index(Ellipsis)
949 # FIXME: this assumes only one Ellipsis
950 new_key = tup[:i] + (_NS,) + tup[i + 1 :]
951 return new_key
952
953 # TODO: other cases? only one test gets here, and that is covered
954 # by _validate_key_length
955 return tup
956
957 @final
958 def _validate_tuple_indexer(self, key: tuple) -> tuple:
959 """
960 Check the key for valid keys across my indexer.
961 """
962 key = self._validate_key_length(key)
963 key = self._expand_ellipsis(key)
964 for i, k in enumerate(key):
965 try:
966 self._validate_key(k, i)
967 except ValueError as err:
968 raise ValueError(
969 "Location based indexing can only have "
970 f"[{self._valid_types}] types"
971 ) from err
972 return key
973
974 @final
975 def _is_nested_tuple_indexer(self, tup: tuple) -> bool:
976 """
977 Returns
978 -------
979 bool
980 """
981 if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):
982 return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
983 return False
984
985 @final
986 def _convert_tuple(self, key: tuple) -> tuple:
987 # Note: we assume _tupleize_axis_indexer has been called, if necessary.
988 self._validate_key_length(key)
989 keyidx = [self._convert_to_indexer(k, axis=i) for i, k in enumerate(key)]
990 return tuple(keyidx)
991
992 @final
993 def _validate_key_length(self, key: tuple) -> tuple:
994 if len(key) > self.ndim:
995 if key[0] is Ellipsis:
996 # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3]
997 key = key[1:]
998 if Ellipsis in key:
999 raise IndexingError(_one_ellipsis_message)
1000 return self._validate_key_length(key)
1001 raise IndexingError("Too many indexers")
1002 return key
1003
1004 @final
1005 def _getitem_tuple_same_dim(self, tup: tuple):
1006 """
1007 Index with indexers that should return an object of the same dimension
1008 as self.obj.
1009
1010 This is only called after a failed call to _getitem_lowerdim.
1011 """
1012 retval = self.obj
1013 # Selecting columns before rows is significantly faster
1014 start_val = (self.ndim - len(tup)) + 1
1015 for i, key in enumerate(reversed(tup)):
1016 i = self.ndim - i - start_val
1017 if com.is_null_slice(key):
1018 continue
1019
1020 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)
1021 # We should never have retval.ndim < self.ndim, as that should
1022 # be handled by the _getitem_lowerdim call above.
1023 assert retval.ndim == self.ndim
1024
1025 if retval is self.obj:
1026 # if all axes were a null slice (`df.loc[:, :]`), ensure we still
1027 # return a new object (https://github.com/pandas-dev/pandas/pull/49469)
1028 retval = retval.copy(deep=False)
1029
1030 return retval
1031
1032 @final
1033 def _getitem_lowerdim(self, tup: tuple):
1034 # we can directly get the axis result since the axis is specified
1035 if self.axis is not None:
1036 axis = self.obj._get_axis_number(self.axis)
1037 return self._getitem_axis(tup, axis=axis)
1038
1039 # we may have a nested tuples indexer here
1040 if self._is_nested_tuple_indexer(tup):
1041 return self._getitem_nested_tuple(tup)
1042
1043 # we maybe be using a tuple to represent multiple dimensions here
1044 ax0 = self.obj._get_axis(0)
1045 # ...but iloc should handle the tuple as simple integer-location
1046 # instead of checking it as multiindex representation (GH 13797)
1047 if (
1048 isinstance(ax0, MultiIndex)
1049 and self.name != "iloc"
1050 and not any(isinstance(x, slice) for x in tup)
1051 ):
1052 # Note: in all extant test cases, replacing the slice condition with
1053 # `all(is_hashable(x) or com.is_null_slice(x) for x in tup)`
1054 # is equivalent.
1055 # (see the other place where we call _handle_lowerdim_multi_index_axis0)
1056 with suppress(IndexingError):
1057 return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(tup)
1058
1059 tup = self._validate_key_length(tup)
1060
1061 for i, key in enumerate(tup):
1062 if is_label_like(key):
1063 # We don't need to check for tuples here because those are
1064 # caught by the _is_nested_tuple_indexer check above.
1065 section = self._getitem_axis(key, axis=i)
1066
1067 # We should never have a scalar section here, because
1068 # _getitem_lowerdim is only called after a check for
1069 # is_scalar_access, which that would be.
1070 if section.ndim == self.ndim:
1071 # we're in the middle of slicing through a MultiIndex
1072 # revise the key wrt to `section` by inserting an _NS
1073 new_key = tup[:i] + (_NS,) + tup[i + 1 :]
1074
1075 else:
1076 # Note: the section.ndim == self.ndim check above
1077 # rules out having DataFrame here, so we dont need to worry
1078 # about transposing.
1079 new_key = tup[:i] + tup[i + 1 :]
1080
1081 if len(new_key) == 1:
1082 new_key = new_key[0]
1083
1084 # Slices should return views, but calling iloc/loc with a null
1085 # slice returns a new object.
1086 if com.is_null_slice(new_key):
1087 return section
1088 # This is an elided recursive call to iloc/loc
1089 return getattr(section, self.name)[new_key]
1090
1091 raise IndexingError("not applicable")
1092
1093 @final
1094 def _getitem_nested_tuple(self, tup: tuple):
1095 # we have a nested tuple so have at least 1 multi-index level
1096 # we should be able to match up the dimensionality here
1097
1098 def _contains_slice(x: object) -> bool:
1099 # Check if object is a slice or a tuple containing a slice
1100 if isinstance(x, tuple):
1101 return any(isinstance(v, slice) for v in x)
1102 elif isinstance(x, slice):
1103 return True
1104 return False
1105
1106 for key in tup:
1107 check_dict_or_set_indexers(key)
1108
1109 # we have too many indexers for our dim, but have at least 1
1110 # multi-index dimension, try to see if we have something like
1111 # a tuple passed to a series with a multi-index
1112 if len(tup) > self.ndim:
1113 if self.name != "loc":
1114 # This should never be reached, but let's be explicit about it
1115 raise ValueError("Too many indices") # pragma: no cover
1116 if all(
1117 (is_hashable(x) and not _contains_slice(x)) or com.is_null_slice(x)
1118 for x in tup
1119 ):
1120 # GH#10521 Series should reduce MultiIndex dimensions instead of
1121 # DataFrame, IndexingError is not raised when slice(None,None,None)
1122 # with one row.
1123 with suppress(IndexingError):
1124 return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(
1125 tup
1126 )
1127 elif isinstance(self.obj, ABCSeries) and any(
1128 isinstance(k, tuple) for k in tup
1129 ):
1130 # GH#35349 Raise if tuple in tuple for series
1131 # Do this after the all-hashable-or-null-slice check so that
1132 # we are only getting non-hashable tuples, in particular ones
1133 # that themselves contain a slice entry
1134 # See test_loc_series_getitem_too_many_dimensions
1135 raise IndexingError("Too many indexers")
1136
1137 # this is a series with a multi-index specified a tuple of
1138 # selectors
1139 axis = self.axis or 0
1140 return self._getitem_axis(tup, axis=axis)
1141
1142 # handle the multi-axis by taking sections and reducing
1143 # this is iterative
1144 obj = self.obj
1145 # GH#41369 Loop in reverse order ensures indexing along columns before rows
1146 # which selects only necessary blocks which avoids dtype conversion if possible
1147 axis = len(tup) - 1
1148 for key in tup[::-1]:
1149 if com.is_null_slice(key):
1150 axis -= 1
1151 continue
1152
1153 obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)
1154 axis -= 1
1155
1156 # if we have a scalar, we are done
1157 if is_scalar(obj) or not hasattr(obj, "ndim"):
1158 break
1159
1160 return obj
1161
1162 def _convert_to_indexer(self, key, axis: AxisInt):
1163 raise AbstractMethodError(self)
1164
1165 def _check_deprecated_callable_usage(self, key: Any, maybe_callable: T) -> T:
1166 # GH53533
1167 if self.name == "iloc" and callable(key) and isinstance(maybe_callable, tuple):
1168 warnings.warn(
1169 "Returning a tuple from a callable with iloc "
1170 "is deprecated and will be removed in a future version",
1171 FutureWarning,
1172 stacklevel=find_stack_level(),
1173 )
1174 return maybe_callable
1175
1176 @final
1177 def __getitem__(self, key):
1178 check_dict_or_set_indexers(key)
1179 if type(key) is tuple:
1180 key = tuple(list(x) if is_iterator(x) else x for x in key)
1181 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
1182 if self._is_scalar_access(key):
1183 return self.obj._get_value(*key, takeable=self._takeable)
1184 return self._getitem_tuple(key)
1185 else:
1186 # we by definition only have the 0th axis
1187 axis = self.axis or 0
1188
1189 maybe_callable = com.apply_if_callable(key, self.obj)
1190 maybe_callable = self._check_deprecated_callable_usage(key, maybe_callable)
1191 return self._getitem_axis(maybe_callable, axis=axis)
1192
1193 def _is_scalar_access(self, key: tuple):
1194 raise NotImplementedError()
1195
1196 def _getitem_tuple(self, tup: tuple):
1197 raise AbstractMethodError(self)
1198
1199 def _getitem_axis(self, key, axis: AxisInt):
1200 raise NotImplementedError()
1201
1202 def _has_valid_setitem_indexer(self, indexer) -> bool:
1203 raise AbstractMethodError(self)
1204
1205 @final
1206 def _getbool_axis(self, key, axis: AxisInt):
1207 # caller is responsible for ensuring non-None axis
1208 labels = self.obj._get_axis(axis)
1209 key = check_bool_indexer(labels, key)
1210 inds = key.nonzero()[0]
1211 return self.obj._take_with_is_copy(inds, axis=axis)
1212
1213
1214@doc(IndexingMixin.loc)
1215class _LocIndexer(_LocationIndexer):
1216 _takeable: bool = False
1217 _valid_types = (
1218 "labels (MUST BE IN THE INDEX), slices of labels (BOTH "
1219 "endpoints included! Can be slices of integers if the "
1220 "index is integers), listlike of labels, boolean"
1221 )
1222
1223 # -------------------------------------------------------------------
1224 # Key Checks
1225
1226 @doc(_LocationIndexer._validate_key)
1227 def _validate_key(self, key, axis: Axis):
1228 # valid for a collection of labels (we check their presence later)
1229 # slice of labels (where start-end in labels)
1230 # slice of integers (only if in the labels)
1231 # boolean not in slice and with boolean index
1232 ax = self.obj._get_axis(axis)
1233 if isinstance(key, bool) and not (
1234 is_bool_dtype(ax.dtype)
1235 or ax.dtype.name == "boolean"
1236 or isinstance(ax, MultiIndex)
1237 and is_bool_dtype(ax.get_level_values(0).dtype)
1238 ):
1239 raise KeyError(
1240 f"{key}: boolean label can not be used without a boolean index"
1241 )
1242
1243 if isinstance(key, slice) and (
1244 isinstance(key.start, bool) or isinstance(key.stop, bool)
1245 ):
1246 raise TypeError(f"{key}: boolean values can not be used in a slice")
1247
1248 def _has_valid_setitem_indexer(self, indexer) -> bool:
1249 return True
1250
1251 def _is_scalar_access(self, key: tuple) -> bool:
1252 """
1253 Returns
1254 -------
1255 bool
1256 """
1257 # this is a shortcut accessor to both .loc and .iloc
1258 # that provide the equivalent access of .at and .iat
1259 # a) avoid getting things via sections and (to minimize dtype changes)
1260 # b) provide a performant path
1261 if len(key) != self.ndim:
1262 return False
1263
1264 for i, k in enumerate(key):
1265 if not is_scalar(k):
1266 return False
1267
1268 ax = self.obj.axes[i]
1269 if isinstance(ax, MultiIndex):
1270 return False
1271
1272 if isinstance(k, str) and ax._supports_partial_string_indexing:
1273 # partial string indexing, df.loc['2000', 'A']
1274 # should not be considered scalar
1275 return False
1276
1277 if not ax._index_as_unique:
1278 return False
1279
1280 return True
1281
1282 # -------------------------------------------------------------------
1283 # MultiIndex Handling
1284
1285 def _multi_take_opportunity(self, tup: tuple) -> bool:
1286 """
1287 Check whether there is the possibility to use ``_multi_take``.
1288
1289 Currently the limit is that all axes being indexed, must be indexed with
1290 list-likes.
1291
1292 Parameters
1293 ----------
1294 tup : tuple
1295 Tuple of indexers, one per axis.
1296
1297 Returns
1298 -------
1299 bool
1300 Whether the current indexing,
1301 can be passed through `_multi_take`.
1302 """
1303 if not all(is_list_like_indexer(x) for x in tup):
1304 return False
1305
1306 # just too complicated
1307 return not any(com.is_bool_indexer(x) for x in tup)
1308
1309 def _multi_take(self, tup: tuple):
1310 """
1311 Create the indexers for the passed tuple of keys, and
1312 executes the take operation. This allows the take operation to be
1313 executed all at once, rather than once for each dimension.
1314 Improving efficiency.
1315
1316 Parameters
1317 ----------
1318 tup : tuple
1319 Tuple of indexers, one per axis.
1320
1321 Returns
1322 -------
1323 values: same type as the object being indexed
1324 """
1325 # GH 836
1326 d = {
1327 axis: self._get_listlike_indexer(key, axis)
1328 for (key, axis) in zip(tup, self.obj._AXIS_ORDERS)
1329 }
1330 return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True)
1331
1332 # -------------------------------------------------------------------
1333
1334 def _getitem_iterable(self, key, axis: AxisInt):
1335 """
1336 Index current object with an iterable collection of keys.
1337
1338 Parameters
1339 ----------
1340 key : iterable
1341 Targeted labels.
1342 axis : int
1343 Dimension on which the indexing is being made.
1344
1345 Raises
1346 ------
1347 KeyError
1348 If no key was found. Will change in the future to raise if not all
1349 keys were found.
1350
1351 Returns
1352 -------
1353 scalar, DataFrame, or Series: indexed value(s).
1354 """
1355 # we assume that not com.is_bool_indexer(key), as that is
1356 # handled before we get here.
1357 self._validate_key(key, axis)
1358
1359 # A collection of keys
1360 keyarr, indexer = self._get_listlike_indexer(key, axis)
1361 return self.obj._reindex_with_indexers(
1362 {axis: [keyarr, indexer]}, copy=True, allow_dups=True
1363 )
1364
1365 def _getitem_tuple(self, tup: tuple):
1366 with suppress(IndexingError):
1367 tup = self._expand_ellipsis(tup)
1368 return self._getitem_lowerdim(tup)
1369
1370 # no multi-index, so validate all of the indexers
1371 tup = self._validate_tuple_indexer(tup)
1372
1373 # ugly hack for GH #836
1374 if self._multi_take_opportunity(tup):
1375 return self._multi_take(tup)
1376
1377 return self._getitem_tuple_same_dim(tup)
1378
1379 def _get_label(self, label, axis: AxisInt):
1380 # GH#5567 this will fail if the label is not present in the axis.
1381 return self.obj.xs(label, axis=axis)
1382
1383 def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
1384 # we have an axis0 multi-index, handle or raise
1385 axis = self.axis or 0
1386 try:
1387 # fast path for series or for tup devoid of slices
1388 return self._get_label(tup, axis=axis)
1389
1390 except KeyError as ek:
1391 # raise KeyError if number of indexers match
1392 # else IndexingError will be raised
1393 if self.ndim < len(tup) <= self.obj.index.nlevels:
1394 raise ek
1395 raise IndexingError("No label returned") from ek
1396
1397 def _getitem_axis(self, key, axis: AxisInt):
1398 key = item_from_zerodim(key)
1399 if is_iterator(key):
1400 key = list(key)
1401 if key is Ellipsis:
1402 key = slice(None)
1403
1404 labels = self.obj._get_axis(axis)
1405
1406 if isinstance(key, tuple) and isinstance(labels, MultiIndex):
1407 key = tuple(key)
1408
1409 if isinstance(key, slice):
1410 self._validate_key(key, axis)
1411 return self._get_slice_axis(key, axis=axis)
1412 elif com.is_bool_indexer(key):
1413 return self._getbool_axis(key, axis=axis)
1414 elif is_list_like_indexer(key):
1415 # an iterable multi-selection
1416 if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)):
1417 if hasattr(key, "ndim") and key.ndim > 1:
1418 raise ValueError("Cannot index with multidimensional key")
1419
1420 return self._getitem_iterable(key, axis=axis)
1421
1422 # nested tuple slicing
1423 if is_nested_tuple(key, labels):
1424 locs = labels.get_locs(key)
1425 indexer: list[slice | npt.NDArray[np.intp]] = [slice(None)] * self.ndim
1426 indexer[axis] = locs
1427 return self.obj.iloc[tuple(indexer)]
1428
1429 # fall thru to straight lookup
1430 self._validate_key(key, axis)
1431 return self._get_label(key, axis=axis)
1432
1433 def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
1434 """
1435 This is pretty simple as we just have to deal with labels.
1436 """
1437 # caller is responsible for ensuring non-None axis
1438 obj = self.obj
1439 if not need_slice(slice_obj):
1440 return obj.copy(deep=False)
1441
1442 labels = obj._get_axis(axis)
1443 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)
1444
1445 if isinstance(indexer, slice):
1446 return self.obj._slice(indexer, axis=axis)
1447 else:
1448 # DatetimeIndex overrides Index.slice_indexer and may
1449 # return a DatetimeIndex instead of a slice object.
1450 return self.obj.take(indexer, axis=axis)
1451
1452 def _convert_to_indexer(self, key, axis: AxisInt):
1453 """
1454 Convert indexing key into something we can use to do actual fancy
1455 indexing on a ndarray.
1456
1457 Examples
1458 ix[:5] -> slice(0, 5)
1459 ix[[1,2,3]] -> [1,2,3]
1460 ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)
1461
1462 Going by Zen of Python?
1463 'In the face of ambiguity, refuse the temptation to guess.'
1464 raise AmbiguousIndexError with integer labels?
1465 - No, prefer label-based indexing
1466 """
1467 labels = self.obj._get_axis(axis)
1468
1469 if isinstance(key, slice):
1470 return labels._convert_slice_indexer(key, kind="loc")
1471
1472 if (
1473 isinstance(key, tuple)
1474 and not isinstance(labels, MultiIndex)
1475 and self.ndim < 2
1476 and len(key) > 1
1477 ):
1478 raise IndexingError("Too many indexers")
1479
1480 # Slices are not valid keys passed in by the user,
1481 # even though they are hashable in Python 3.12
1482 contains_slice = False
1483 if isinstance(key, tuple):
1484 contains_slice = any(isinstance(v, slice) for v in key)
1485
1486 if is_scalar(key) or (
1487 isinstance(labels, MultiIndex) and is_hashable(key) and not contains_slice
1488 ):
1489 # Otherwise get_loc will raise InvalidIndexError
1490
1491 # if we are a label return me
1492 try:
1493 return labels.get_loc(key)
1494 except LookupError:
1495 if isinstance(key, tuple) and isinstance(labels, MultiIndex):
1496 if len(key) == labels.nlevels:
1497 return {"key": key}
1498 raise
1499 except InvalidIndexError:
1500 # GH35015, using datetime as column indices raises exception
1501 if not isinstance(labels, MultiIndex):
1502 raise
1503 except ValueError:
1504 if not is_integer(key):
1505 raise
1506 return {"key": key}
1507
1508 if is_nested_tuple(key, labels):
1509 if self.ndim == 1 and any(isinstance(k, tuple) for k in key):
1510 # GH#35349 Raise if tuple in tuple for series
1511 raise IndexingError("Too many indexers")
1512 return labels.get_locs(key)
1513
1514 elif is_list_like_indexer(key):
1515 if is_iterator(key):
1516 key = list(key)
1517
1518 if com.is_bool_indexer(key):
1519 key = check_bool_indexer(labels, key)
1520 return key
1521 else:
1522 return self._get_listlike_indexer(key, axis)[1]
1523 else:
1524 try:
1525 return labels.get_loc(key)
1526 except LookupError:
1527 # allow a not found key only if we are a setter
1528 if not is_list_like_indexer(key):
1529 return {"key": key}
1530 raise
1531
1532 def _get_listlike_indexer(self, key, axis: AxisInt):
1533 """
1534 Transform a list-like of keys into a new index and an indexer.
1535
1536 Parameters
1537 ----------
1538 key : list-like
1539 Targeted labels.
1540 axis: int
1541 Dimension on which the indexing is being made.
1542
1543 Raises
1544 ------
1545 KeyError
1546 If at least one key was requested but none was found.
1547
1548 Returns
1549 -------
1550 keyarr: Index
1551 New index (coinciding with 'key' if the axis is unique).
1552 values : array-like
1553 Indexer for the return object, -1 denotes keys not found.
1554 """
1555 ax = self.obj._get_axis(axis)
1556 axis_name = self.obj._get_axis_name(axis)
1557
1558 keyarr, indexer = ax._get_indexer_strict(key, axis_name)
1559
1560 return keyarr, indexer
1561
1562
1563@doc(IndexingMixin.iloc)
1564class _iLocIndexer(_LocationIndexer):
1565 _valid_types = (
1566 "integer, integer slice (START point is INCLUDED, END "
1567 "point is EXCLUDED), listlike of integers, boolean array"
1568 )
1569 _takeable = True
1570
1571 # -------------------------------------------------------------------
1572 # Key Checks
1573
1574 def _validate_key(self, key, axis: AxisInt):
1575 if com.is_bool_indexer(key):
1576 if hasattr(key, "index") and isinstance(key.index, Index):
1577 if key.index.inferred_type == "integer":
1578 raise NotImplementedError(
1579 "iLocation based boolean "
1580 "indexing on an integer type "
1581 "is not available"
1582 )
1583 raise ValueError(
1584 "iLocation based boolean indexing cannot use "
1585 "an indexable as a mask"
1586 )
1587 return
1588
1589 if isinstance(key, slice):
1590 return
1591 elif is_integer(key):
1592 self._validate_integer(key, axis)
1593 elif isinstance(key, tuple):
1594 # a tuple should already have been caught by this point
1595 # so don't treat a tuple as a valid indexer
1596 raise IndexingError("Too many indexers")
1597 elif is_list_like_indexer(key):
1598 if isinstance(key, ABCSeries):
1599 arr = key._values
1600 elif is_array_like(key):
1601 arr = key
1602 else:
1603 arr = np.array(key)
1604 len_axis = len(self.obj._get_axis(axis))
1605
1606 # check that the key has a numeric dtype
1607 if not is_numeric_dtype(arr.dtype):
1608 raise IndexError(f".iloc requires numeric indexers, got {arr}")
1609
1610 # check that the key does not exceed the maximum size of the index
1611 if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis):
1612 raise IndexError("positional indexers are out-of-bounds")
1613 else:
1614 raise ValueError(f"Can only index by location with a [{self._valid_types}]")
1615
1616 def _has_valid_setitem_indexer(self, indexer) -> bool:
1617 """
1618 Validate that a positional indexer cannot enlarge its target
1619 will raise if needed, does not modify the indexer externally.
1620
1621 Returns
1622 -------
1623 bool
1624 """
1625 if isinstance(indexer, dict):
1626 raise IndexError("iloc cannot enlarge its target object")
1627
1628 if isinstance(indexer, ABCDataFrame):
1629 raise TypeError(
1630 "DataFrame indexer for .iloc is not supported. "
1631 "Consider using .loc with a DataFrame indexer for automatic alignment.",
1632 )
1633
1634 if not isinstance(indexer, tuple):
1635 indexer = _tuplify(self.ndim, indexer)
1636
1637 for ax, i in zip(self.obj.axes, indexer):
1638 if isinstance(i, slice):
1639 # should check the stop slice?
1640 pass
1641 elif is_list_like_indexer(i):
1642 # should check the elements?
1643 pass
1644 elif is_integer(i):
1645 if i >= len(ax):
1646 raise IndexError("iloc cannot enlarge its target object")
1647 elif isinstance(i, dict):
1648 raise IndexError("iloc cannot enlarge its target object")
1649
1650 return True
1651
1652 def _is_scalar_access(self, key: tuple) -> bool:
1653 """
1654 Returns
1655 -------
1656 bool
1657 """
1658 # this is a shortcut accessor to both .loc and .iloc
1659 # that provide the equivalent access of .at and .iat
1660 # a) avoid getting things via sections and (to minimize dtype changes)
1661 # b) provide a performant path
1662 if len(key) != self.ndim:
1663 return False
1664
1665 return all(is_integer(k) for k in key)
1666
1667 def _validate_integer(self, key: int | np.integer, axis: AxisInt) -> None:
1668 """
1669 Check that 'key' is a valid position in the desired axis.
1670
1671 Parameters
1672 ----------
1673 key : int
1674 Requested position.
1675 axis : int
1676 Desired axis.
1677
1678 Raises
1679 ------
1680 IndexError
1681 If 'key' is not a valid position in axis 'axis'.
1682 """
1683 len_axis = len(self.obj._get_axis(axis))
1684 if key >= len_axis or key < -len_axis:
1685 raise IndexError("single positional indexer is out-of-bounds")
1686
1687 # -------------------------------------------------------------------
1688
1689 def _getitem_tuple(self, tup: tuple):
1690 tup = self._validate_tuple_indexer(tup)
1691 with suppress(IndexingError):
1692 return self._getitem_lowerdim(tup)
1693
1694 return self._getitem_tuple_same_dim(tup)
1695
1696 def _get_list_axis(self, key, axis: AxisInt):
1697 """
1698 Return Series values by list or array of integers.
1699
1700 Parameters
1701 ----------
1702 key : list-like positional indexer
1703 axis : int
1704
1705 Returns
1706 -------
1707 Series object
1708
1709 Notes
1710 -----
1711 `axis` can only be zero.
1712 """
1713 try:
1714 return self.obj._take_with_is_copy(key, axis=axis)
1715 except IndexError as err:
1716 # re-raise with different error message, e.g. test_getitem_ndarray_3d
1717 raise IndexError("positional indexers are out-of-bounds") from err
1718
1719 def _getitem_axis(self, key, axis: AxisInt):
1720 if key is Ellipsis:
1721 key = slice(None)
1722 elif isinstance(key, ABCDataFrame):
1723 raise IndexError(
1724 "DataFrame indexer is not allowed for .iloc\n"
1725 "Consider using .loc for automatic alignment."
1726 )
1727
1728 if isinstance(key, slice):
1729 return self._get_slice_axis(key, axis=axis)
1730
1731 if is_iterator(key):
1732 key = list(key)
1733
1734 if isinstance(key, list):
1735 key = np.asarray(key)
1736
1737 if com.is_bool_indexer(key):
1738 self._validate_key(key, axis)
1739 return self._getbool_axis(key, axis=axis)
1740
1741 # a list of integers
1742 elif is_list_like_indexer(key):
1743 return self._get_list_axis(key, axis=axis)
1744
1745 # a single integer
1746 else:
1747 key = item_from_zerodim(key)
1748 if not is_integer(key):
1749 raise TypeError("Cannot index by location index with a non-integer key")
1750
1751 # validate the location
1752 self._validate_integer(key, axis)
1753
1754 return self.obj._ixs(key, axis=axis)
1755
1756 def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):
1757 # caller is responsible for ensuring non-None axis
1758 obj = self.obj
1759
1760 if not need_slice(slice_obj):
1761 return obj.copy(deep=False)
1762
1763 labels = obj._get_axis(axis)
1764 labels._validate_positional_slice(slice_obj)
1765 return self.obj._slice(slice_obj, axis=axis)
1766
1767 def _convert_to_indexer(self, key, axis: AxisInt):
1768 """
1769 Much simpler as we only have to deal with our valid types.
1770 """
1771 return key
1772
1773 def _get_setitem_indexer(self, key):
1774 # GH#32257 Fall through to let numpy do validation
1775 if is_iterator(key):
1776 key = list(key)
1777
1778 if self.axis is not None:
1779 key = _tupleize_axis_indexer(self.ndim, self.axis, key)
1780
1781 return key
1782
1783 # -------------------------------------------------------------------
1784
1785 def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):
1786 """
1787 _setitem_with_indexer is for setting values on a Series/DataFrame
1788 using positional indexers.
1789
1790 If the relevant keys are not present, the Series/DataFrame may be
1791 expanded.
1792
1793 This method is currently broken when dealing with non-unique Indexes,
1794 since it goes from positional indexers back to labels when calling
1795 BlockManager methods, see GH#12991, GH#22046, GH#15686.
1796 """
1797 info_axis = self.obj._info_axis_number
1798
1799 # maybe partial set
1800 take_split_path = not self.obj._mgr.is_single_block
1801
1802 if not take_split_path and isinstance(value, ABCDataFrame):
1803 # Avoid cast of values
1804 take_split_path = not value._mgr.is_single_block
1805
1806 # if there is only one block/type, still have to take split path
1807 # unless the block is one-dimensional or it can hold the value
1808 if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
1809 # in case of dict, keys are indices
1810 val = list(value.values()) if isinstance(value, dict) else value
1811 arr = self.obj._mgr.arrays[0]
1812 take_split_path = not can_hold_element(
1813 arr, extract_array(val, extract_numpy=True)
1814 )
1815
1816 # if we have any multi-indexes that have non-trivial slices
1817 # (not null slices) then we must take the split path, xref
1818 # GH 10360, GH 27841
1819 if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):
1820 for i, ax in zip(indexer, self.obj.axes):
1821 if isinstance(ax, MultiIndex) and not (
1822 is_integer(i) or com.is_null_slice(i)
1823 ):
1824 take_split_path = True
1825 break
1826
1827 if isinstance(indexer, tuple):
1828 nindexer = []
1829 for i, idx in enumerate(indexer):
1830 if isinstance(idx, dict):
1831 # reindex the axis to the new value
1832 # and set inplace
1833 key, _ = convert_missing_indexer(idx)
1834
1835 # if this is the items axes, then take the main missing
1836 # path first
1837 # this correctly sets the dtype and avoids cache issues
1838 # essentially this separates out the block that is needed
1839 # to possibly be modified
1840 if self.ndim > 1 and i == info_axis:
1841 # add the new item, and set the value
1842 # must have all defined axes if we have a scalar
1843 # or a list-like on the non-info axes if we have a
1844 # list-like
1845 if not len(self.obj):
1846 if not is_list_like_indexer(value):
1847 raise ValueError(
1848 "cannot set a frame with no "
1849 "defined index and a scalar"
1850 )
1851 self.obj[key] = value
1852 return
1853
1854 # add a new item with the dtype setup
1855 if com.is_null_slice(indexer[0]):
1856 # We are setting an entire column
1857 self.obj[key] = value
1858 return
1859 elif is_array_like(value):
1860 # GH#42099
1861 arr = extract_array(value, extract_numpy=True)
1862 taker = -1 * np.ones(len(self.obj), dtype=np.intp)
1863 empty_value = algos.take_nd(arr, taker)
1864 if not isinstance(value, ABCSeries):
1865 # if not Series (in which case we need to align),
1866 # we can short-circuit
1867 if (
1868 isinstance(arr, np.ndarray)
1869 and arr.ndim == 1
1870 and len(arr) == 1
1871 ):
1872 # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615
1873 arr = arr[0, ...]
1874 empty_value[indexer[0]] = arr
1875 self.obj[key] = empty_value
1876 return
1877
1878 self.obj[key] = empty_value
1879 elif not is_list_like(value):
1880 self.obj[key] = construct_1d_array_from_inferred_fill_value(
1881 value, len(self.obj)
1882 )
1883 else:
1884 # FIXME: GH#42099#issuecomment-864326014
1885 self.obj[key] = infer_fill_value(value)
1886
1887 new_indexer = convert_from_missing_indexer_tuple(
1888 indexer, self.obj.axes
1889 )
1890 self._setitem_with_indexer(new_indexer, value, name)
1891
1892 return
1893
1894 # reindex the axis
1895 # make sure to clear the cache because we are
1896 # just replacing the block manager here
1897 # so the object is the same
1898 index = self.obj._get_axis(i)
1899 with warnings.catch_warnings():
1900 # TODO: re-issue this with setitem-specific message?
1901 warnings.filterwarnings(
1902 "ignore",
1903 "The behavior of Index.insert with object-dtype "
1904 "is deprecated",
1905 category=FutureWarning,
1906 )
1907 labels = index.insert(len(index), key)
1908
1909 # We are expanding the Series/DataFrame values to match
1910 # the length of thenew index `labels`. GH#40096 ensure
1911 # this is valid even if the index has duplicates.
1912 taker = np.arange(len(index) + 1, dtype=np.intp)
1913 taker[-1] = -1
1914 reindexers = {i: (labels, taker)}
1915 new_obj = self.obj._reindex_with_indexers(
1916 reindexers, allow_dups=True
1917 )
1918 self.obj._mgr = new_obj._mgr
1919 self.obj._maybe_update_cacher(clear=True)
1920 self.obj._is_copy = None
1921
1922 nindexer.append(labels.get_loc(key))
1923
1924 else:
1925 nindexer.append(idx)
1926
1927 indexer = tuple(nindexer)
1928 else:
1929 indexer, missing = convert_missing_indexer(indexer)
1930
1931 if missing:
1932 self._setitem_with_indexer_missing(indexer, value)
1933 return
1934
1935 if name == "loc":
1936 # must come after setting of missing
1937 indexer, value = self._maybe_mask_setitem_value(indexer, value)
1938
1939 # align and set the values
1940 if take_split_path:
1941 # We have to operate column-wise
1942 self._setitem_with_indexer_split_path(indexer, value, name)
1943 else:
1944 self._setitem_single_block(indexer, value, name)
1945
1946 def _setitem_with_indexer_split_path(self, indexer, value, name: str):
1947 """
1948 Setitem column-wise.
1949 """
1950 # Above we only set take_split_path to True for 2D cases
1951 assert self.ndim == 2
1952
1953 if not isinstance(indexer, tuple):
1954 indexer = _tuplify(self.ndim, indexer)
1955 if len(indexer) > self.ndim:
1956 raise IndexError("too many indices for array")
1957 if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2:
1958 raise ValueError(r"Cannot set values with ndim > 2")
1959
1960 if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
1961 from pandas import Series
1962
1963 value = self._align_series(indexer, Series(value))
1964
1965 # Ensure we have something we can iterate over
1966 info_axis = indexer[1]
1967 ilocs = self._ensure_iterable_column_indexer(info_axis)
1968
1969 pi = indexer[0]
1970 lplane_indexer = length_of_indexer(pi, self.obj.index)
1971 # lplane_indexer gives the expected length of obj[indexer[0]]
1972
1973 # we need an iterable, with a ndim of at least 1
1974 # eg. don't pass through np.array(0)
1975 if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:
1976 if isinstance(value, ABCDataFrame):
1977 self._setitem_with_indexer_frame_value(indexer, value, name)
1978
1979 elif np.ndim(value) == 2:
1980 # TODO: avoid np.ndim call in case it isn't an ndarray, since
1981 # that will construct an ndarray, which will be wasteful
1982 self._setitem_with_indexer_2d_value(indexer, value)
1983
1984 elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi):
1985 # We are setting multiple rows in a single column.
1986 self._setitem_single_column(ilocs[0], value, pi)
1987
1988 elif len(ilocs) == 1 and 0 != lplane_indexer != len(value):
1989 # We are trying to set N values into M entries of a single
1990 # column, which is invalid for N != M
1991 # Exclude zero-len for e.g. boolean masking that is all-false
1992
1993 if len(value) == 1 and not is_integer(info_axis):
1994 # This is a case like df.iloc[:3, [1]] = [0]
1995 # where we treat as df.iloc[:3, 1] = 0
1996 return self._setitem_with_indexer((pi, info_axis[0]), value[0])
1997
1998 raise ValueError(
1999 "Must have equal len keys and value "
2000 "when setting with an iterable"
2001 )
2002
2003 elif lplane_indexer == 0 and len(value) == len(self.obj.index):
2004 # We get here in one case via .loc with a all-False mask
2005 pass
2006
2007 elif self._is_scalar_access(indexer) and is_object_dtype(
2008 self.obj.dtypes._values[ilocs[0]]
2009 ):
2010 # We are setting nested data, only possible for object dtype data
2011 self._setitem_single_column(indexer[1], value, pi)
2012
2013 elif len(ilocs) == len(value):
2014 # We are setting multiple columns in a single row.
2015 for loc, v in zip(ilocs, value):
2016 self._setitem_single_column(loc, v, pi)
2017
2018 elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:
2019 # This is a setitem-with-expansion, see
2020 # test_loc_setitem_empty_append_expands_rows_mixed_dtype
2021 # e.g. df = DataFrame(columns=["x", "y"])
2022 # df["x"] = df["x"].astype(np.int64)
2023 # df.loc[:, "x"] = [1, 2, 3]
2024 self._setitem_single_column(ilocs[0], value, pi)
2025
2026 else:
2027 raise ValueError(
2028 "Must have equal len keys and value "
2029 "when setting with an iterable"
2030 )
2031
2032 else:
2033 # scalar value
2034 for loc in ilocs:
2035 self._setitem_single_column(loc, value, pi)
2036
2037 def _setitem_with_indexer_2d_value(self, indexer, value):
2038 # We get here with np.ndim(value) == 2, excluding DataFrame,
2039 # which goes through _setitem_with_indexer_frame_value
2040 pi = indexer[0]
2041
2042 ilocs = self._ensure_iterable_column_indexer(indexer[1])
2043
2044 if not is_array_like(value):
2045 # cast lists to array
2046 value = np.array(value, dtype=object)
2047 if len(ilocs) != value.shape[1]:
2048 raise ValueError(
2049 "Must have equal len keys and value when setting with an ndarray"
2050 )
2051
2052 for i, loc in enumerate(ilocs):
2053 value_col = value[:, i]
2054 if is_object_dtype(value_col.dtype):
2055 # casting to list so that we do type inference in setitem_single_column
2056 value_col = value_col.tolist()
2057 self._setitem_single_column(loc, value_col, pi)
2058
2059 def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str):
2060 ilocs = self._ensure_iterable_column_indexer(indexer[1])
2061
2062 sub_indexer = list(indexer)
2063 pi = indexer[0]
2064
2065 multiindex_indexer = isinstance(self.obj.columns, MultiIndex)
2066
2067 unique_cols = value.columns.is_unique
2068
2069 # We do not want to align the value in case of iloc GH#37728
2070 if name == "iloc":
2071 for i, loc in enumerate(ilocs):
2072 val = value.iloc[:, i]
2073 self._setitem_single_column(loc, val, pi)
2074
2075 elif not unique_cols and value.columns.equals(self.obj.columns):
2076 # We assume we are already aligned, see
2077 # test_iloc_setitem_frame_duplicate_columns_multiple_blocks
2078 for loc in ilocs:
2079 item = self.obj.columns[loc]
2080 if item in value:
2081 sub_indexer[1] = item
2082 val = self._align_series(
2083 tuple(sub_indexer),
2084 value.iloc[:, loc],
2085 multiindex_indexer,
2086 )
2087 else:
2088 val = np.nan
2089
2090 self._setitem_single_column(loc, val, pi)
2091
2092 elif not unique_cols:
2093 raise ValueError("Setting with non-unique columns is not allowed.")
2094
2095 else:
2096 for loc in ilocs:
2097 item = self.obj.columns[loc]
2098 if item in value:
2099 sub_indexer[1] = item
2100 val = self._align_series(
2101 tuple(sub_indexer),
2102 value[item],
2103 multiindex_indexer,
2104 using_cow=using_copy_on_write(),
2105 )
2106 else:
2107 val = np.nan
2108
2109 self._setitem_single_column(loc, val, pi)
2110
2111 def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:
2112 """
2113
2114 Parameters
2115 ----------
2116 loc : int
2117 Indexer for column position
2118 plane_indexer : int, slice, listlike[int]
2119 The indexer we use for setitem along axis=0.
2120 """
2121 pi = plane_indexer
2122
2123 is_full_setter = com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj))
2124
2125 is_null_setter = com.is_empty_slice(pi) or is_array_like(pi) and len(pi) == 0
2126
2127 if is_null_setter:
2128 # no-op, don't cast dtype later
2129 return
2130
2131 elif is_full_setter:
2132 try:
2133 self.obj._mgr.column_setitem(
2134 loc, plane_indexer, value, inplace_only=True
2135 )
2136 except (ValueError, TypeError, LossySetitemError):
2137 # If we're setting an entire column and we can't do it inplace,
2138 # then we can use value's dtype (or inferred dtype)
2139 # instead of object
2140 dtype = self.obj.dtypes.iloc[loc]
2141 if dtype not in (np.void, object) and not self.obj.empty:
2142 # - Exclude np.void, as that is a special case for expansion.
2143 # We want to warn for
2144 # df = pd.DataFrame({'a': [1, 2]})
2145 # df.loc[:, 'a'] = .3
2146 # but not for
2147 # df = pd.DataFrame({'a': [1, 2]})
2148 # df.loc[:, 'b'] = .3
2149 # - Exclude `object`, as then no upcasting happens.
2150 # - Exclude empty initial object with enlargement,
2151 # as then there's nothing to be inconsistent with.
2152 warnings.warn(
2153 f"Setting an item of incompatible dtype is deprecated "
2154 "and will raise in a future error of pandas. "
2155 f"Value '{value}' has dtype incompatible with {dtype}, "
2156 "please explicitly cast to a compatible dtype first.",
2157 FutureWarning,
2158 stacklevel=find_stack_level(),
2159 )
2160 self.obj.isetitem(loc, value)
2161 else:
2162 # set value into the column (first attempting to operate inplace, then
2163 # falling back to casting if necessary)
2164 dtype = self.obj.dtypes.iloc[loc]
2165 if dtype == np.void:
2166 # This means we're expanding, with multiple columns, e.g.
2167 # df = pd.DataFrame({'A': [1,2,3], 'B': [4,5,6]})
2168 # df.loc[df.index <= 2, ['F', 'G']] = (1, 'abc')
2169 # Columns F and G will initially be set to np.void.
2170 # Here, we replace those temporary `np.void` columns with
2171 # columns of the appropriate dtype, based on `value`.
2172 self.obj.iloc[:, loc] = construct_1d_array_from_inferred_fill_value(
2173 value, len(self.obj)
2174 )
2175 self.obj._mgr.column_setitem(loc, plane_indexer, value)
2176
2177 self.obj._clear_item_cache()
2178
2179 def _setitem_single_block(self, indexer, value, name: str) -> None:
2180 """
2181 _setitem_with_indexer for the case when we have a single Block.
2182 """
2183 from pandas import Series
2184
2185 if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):
2186 # TODO(EA): ExtensionBlock.setitem this causes issues with
2187 # setting for extensionarrays that store dicts. Need to decide
2188 # if it's worth supporting that.
2189 value = self._align_series(indexer, Series(value))
2190
2191 info_axis = self.obj._info_axis_number
2192 item_labels = self.obj._get_axis(info_axis)
2193 if isinstance(indexer, tuple):
2194 # if we are setting on the info axis ONLY
2195 # set using those methods to avoid block-splitting
2196 # logic here
2197 if (
2198 self.ndim == len(indexer) == 2
2199 and is_integer(indexer[1])
2200 and com.is_null_slice(indexer[0])
2201 ):
2202 col = item_labels[indexer[info_axis]]
2203 if len(item_labels.get_indexer_for([col])) == 1:
2204 # e.g. test_loc_setitem_empty_append_expands_rows
2205 loc = item_labels.get_loc(col)
2206 self._setitem_single_column(loc, value, indexer[0])
2207 return
2208
2209 indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align
2210
2211 if isinstance(value, ABCDataFrame) and name != "iloc":
2212 value = self._align_frame(indexer, value)._values
2213
2214 # check for chained assignment
2215 self.obj._check_is_chained_assignment_possible()
2216
2217 # actually do the set
2218 self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)
2219 self.obj._maybe_update_cacher(clear=True, inplace=True)
2220
2221 def _setitem_with_indexer_missing(self, indexer, value):
2222 """
2223 Insert new row(s) or column(s) into the Series or DataFrame.
2224 """
2225 from pandas import Series
2226
2227 # reindex the axis to the new value
2228 # and set inplace
2229 if self.ndim == 1:
2230 index = self.obj.index
2231 with warnings.catch_warnings():
2232 # TODO: re-issue this with setitem-specific message?
2233 warnings.filterwarnings(
2234 "ignore",
2235 "The behavior of Index.insert with object-dtype is deprecated",
2236 category=FutureWarning,
2237 )
2238 new_index = index.insert(len(index), indexer)
2239
2240 # we have a coerced indexer, e.g. a float
2241 # that matches in an int64 Index, so
2242 # we will not create a duplicate index, rather
2243 # index to that element
2244 # e.g. 0.0 -> 0
2245 # GH#12246
2246 if index.is_unique:
2247 # pass new_index[-1:] instead if [new_index[-1]]
2248 # so that we retain dtype
2249 new_indexer = index.get_indexer(new_index[-1:])
2250 if (new_indexer != -1).any():
2251 # We get only here with loc, so can hard code
2252 return self._setitem_with_indexer(new_indexer, value, "loc")
2253
2254 # this preserves dtype of the value and of the object
2255 if not is_scalar(value):
2256 new_dtype = None
2257
2258 elif is_valid_na_for_dtype(value, self.obj.dtype):
2259 if not is_object_dtype(self.obj.dtype):
2260 # Every NA value is suitable for object, no conversion needed
2261 value = na_value_for_dtype(self.obj.dtype, compat=False)
2262
2263 new_dtype = maybe_promote(self.obj.dtype, value)[0]
2264
2265 elif isna(value):
2266 new_dtype = None
2267 elif not self.obj.empty and not is_object_dtype(self.obj.dtype):
2268 # We should not cast, if we have object dtype because we can
2269 # set timedeltas into object series
2270 curr_dtype = self.obj.dtype
2271 curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype)
2272 new_dtype = maybe_promote(curr_dtype, value)[0]
2273 else:
2274 new_dtype = None
2275
2276 new_values = Series([value], dtype=new_dtype)._values
2277
2278 if len(self.obj._values):
2279 # GH#22717 handle casting compatibility that np.concatenate
2280 # does incorrectly
2281 new_values = concat_compat([self.obj._values, new_values])
2282 self.obj._mgr = self.obj._constructor(
2283 new_values, index=new_index, name=self.obj.name
2284 )._mgr
2285 self.obj._maybe_update_cacher(clear=True)
2286
2287 elif self.ndim == 2:
2288 if not len(self.obj.columns):
2289 # no columns and scalar
2290 raise ValueError("cannot set a frame with no defined columns")
2291
2292 has_dtype = hasattr(value, "dtype")
2293 if isinstance(value, ABCSeries):
2294 # append a Series
2295 value = value.reindex(index=self.obj.columns, copy=True)
2296 value.name = indexer
2297 elif isinstance(value, dict):
2298 value = Series(
2299 value, index=self.obj.columns, name=indexer, dtype=object
2300 )
2301 else:
2302 # a list-list
2303 if is_list_like_indexer(value):
2304 # must have conforming columns
2305 if len(value) != len(self.obj.columns):
2306 raise ValueError("cannot set a row with mismatched columns")
2307
2308 value = Series(value, index=self.obj.columns, name=indexer)
2309
2310 if not len(self.obj):
2311 # We will ignore the existing dtypes instead of using
2312 # internals.concat logic
2313 df = value.to_frame().T
2314
2315 idx = self.obj.index
2316 if isinstance(idx, MultiIndex):
2317 name = idx.names
2318 else:
2319 name = idx.name
2320
2321 df.index = Index([indexer], name=name)
2322 if not has_dtype:
2323 # i.e. if we already had a Series or ndarray, keep that
2324 # dtype. But if we had a list or dict, then do inference
2325 df = df.infer_objects(copy=False)
2326 self.obj._mgr = df._mgr
2327 else:
2328 self.obj._mgr = self.obj._append(value)._mgr
2329 self.obj._maybe_update_cacher(clear=True)
2330
2331 def _ensure_iterable_column_indexer(self, column_indexer):
2332 """
2333 Ensure that our column indexer is something that can be iterated over.
2334 """
2335 ilocs: Sequence[int | np.integer] | np.ndarray
2336 if is_integer(column_indexer):
2337 ilocs = [column_indexer]
2338 elif isinstance(column_indexer, slice):
2339 ilocs = np.arange(len(self.obj.columns))[column_indexer]
2340 elif (
2341 isinstance(column_indexer, np.ndarray) and column_indexer.dtype.kind == "b"
2342 ):
2343 ilocs = np.arange(len(column_indexer))[column_indexer]
2344 else:
2345 ilocs = column_indexer
2346 return ilocs
2347
2348 def _align_series(
2349 self,
2350 indexer,
2351 ser: Series,
2352 multiindex_indexer: bool = False,
2353 using_cow: bool = False,
2354 ):
2355 """
2356 Parameters
2357 ----------
2358 indexer : tuple, slice, scalar
2359 Indexer used to get the locations that will be set to `ser`.
2360 ser : pd.Series
2361 Values to assign to the locations specified by `indexer`.
2362 multiindex_indexer : bool, optional
2363 Defaults to False. Should be set to True if `indexer` was from
2364 a `pd.MultiIndex`, to avoid unnecessary broadcasting.
2365
2366 Returns
2367 -------
2368 `np.array` of `ser` broadcast to the appropriate shape for assignment
2369 to the locations selected by `indexer`
2370 """
2371 if isinstance(indexer, (slice, np.ndarray, list, Index)):
2372 indexer = (indexer,)
2373
2374 if isinstance(indexer, tuple):
2375 # flatten np.ndarray indexers
2376 def ravel(i):
2377 return i.ravel() if isinstance(i, np.ndarray) else i
2378
2379 indexer = tuple(map(ravel, indexer))
2380
2381 aligners = [not com.is_null_slice(idx) for idx in indexer]
2382 sum_aligners = sum(aligners)
2383 single_aligner = sum_aligners == 1
2384 is_frame = self.ndim == 2
2385 obj = self.obj
2386
2387 # are we a single alignable value on a non-primary
2388 # dim (e.g. panel: 1,2, or frame: 0) ?
2389 # hence need to align to a single axis dimension
2390 # rather that find all valid dims
2391
2392 # frame
2393 if is_frame:
2394 single_aligner = single_aligner and aligners[0]
2395
2396 # we have a frame, with multiple indexers on both axes; and a
2397 # series, so need to broadcast (see GH5206)
2398 if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer):
2399 ser_values = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values
2400
2401 # single indexer
2402 if len(indexer) > 1 and not multiindex_indexer:
2403 len_indexer = len(indexer[1])
2404 ser_values = (
2405 np.tile(ser_values, len_indexer).reshape(len_indexer, -1).T
2406 )
2407
2408 return ser_values
2409
2410 for i, idx in enumerate(indexer):
2411 ax = obj.axes[i]
2412
2413 # multiple aligners (or null slices)
2414 if is_sequence(idx) or isinstance(idx, slice):
2415 if single_aligner and com.is_null_slice(idx):
2416 continue
2417 new_ix = ax[idx]
2418 if not is_list_like_indexer(new_ix):
2419 new_ix = Index([new_ix])
2420 else:
2421 new_ix = Index(new_ix)
2422 if ser.index.equals(new_ix):
2423 if using_cow:
2424 return ser
2425 return ser._values.copy()
2426
2427 return ser.reindex(new_ix)._values
2428
2429 # 2 dims
2430 elif single_aligner:
2431 # reindex along index
2432 ax = self.obj.axes[1]
2433 if ser.index.equals(ax) or not len(ax):
2434 return ser._values.copy()
2435 return ser.reindex(ax)._values
2436
2437 elif is_integer(indexer) and self.ndim == 1:
2438 if is_object_dtype(self.obj.dtype):
2439 return ser
2440 ax = self.obj._get_axis(0)
2441
2442 if ser.index.equals(ax):
2443 return ser._values.copy()
2444
2445 return ser.reindex(ax)._values[indexer]
2446
2447 elif is_integer(indexer):
2448 ax = self.obj._get_axis(1)
2449
2450 if ser.index.equals(ax):
2451 return ser._values.copy()
2452
2453 return ser.reindex(ax)._values
2454
2455 raise ValueError("Incompatible indexer with Series")
2456
2457 def _align_frame(self, indexer, df: DataFrame) -> DataFrame:
2458 is_frame = self.ndim == 2
2459
2460 if isinstance(indexer, tuple):
2461 idx, cols = None, None
2462 sindexers = []
2463 for i, ix in enumerate(indexer):
2464 ax = self.obj.axes[i]
2465 if is_sequence(ix) or isinstance(ix, slice):
2466 if isinstance(ix, np.ndarray):
2467 ix = ix.ravel()
2468 if idx is None:
2469 idx = ax[ix]
2470 elif cols is None:
2471 cols = ax[ix]
2472 else:
2473 break
2474 else:
2475 sindexers.append(i)
2476
2477 if idx is not None and cols is not None:
2478 if df.index.equals(idx) and df.columns.equals(cols):
2479 val = df.copy()
2480 else:
2481 val = df.reindex(idx, columns=cols)
2482 return val
2483
2484 elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame:
2485 ax = self.obj.index[indexer]
2486 if df.index.equals(ax):
2487 val = df.copy()
2488 else:
2489 # we have a multi-index and are trying to align
2490 # with a particular, level GH3738
2491 if (
2492 isinstance(ax, MultiIndex)
2493 and isinstance(df.index, MultiIndex)
2494 and ax.nlevels != df.index.nlevels
2495 ):
2496 raise TypeError(
2497 "cannot align on a multi-index with out "
2498 "specifying the join levels"
2499 )
2500
2501 val = df.reindex(index=ax)
2502 return val
2503
2504 raise ValueError("Incompatible indexer with DataFrame")
2505
2506
2507class _ScalarAccessIndexer(NDFrameIndexerBase):
2508 """
2509 Access scalars quickly.
2510 """
2511
2512 # sub-classes need to set _takeable
2513 _takeable: bool
2514
2515 def _convert_key(self, key):
2516 raise AbstractMethodError(self)
2517
2518 def __getitem__(self, key):
2519 if not isinstance(key, tuple):
2520 # we could have a convertible item here (e.g. Timestamp)
2521 if not is_list_like_indexer(key):
2522 key = (key,)
2523 else:
2524 raise ValueError("Invalid call for scalar access (getting)!")
2525
2526 key = self._convert_key(key)
2527 return self.obj._get_value(*key, takeable=self._takeable)
2528
2529 def __setitem__(self, key, value) -> None:
2530 if isinstance(key, tuple):
2531 key = tuple(com.apply_if_callable(x, self.obj) for x in key)
2532 else:
2533 # scalar callable may return tuple
2534 key = com.apply_if_callable(key, self.obj)
2535
2536 if not isinstance(key, tuple):
2537 key = _tuplify(self.ndim, key)
2538 key = list(self._convert_key(key))
2539 if len(key) != self.ndim:
2540 raise ValueError("Not enough indexers for scalar access (setting)!")
2541
2542 self.obj._set_value(*key, value=value, takeable=self._takeable)
2543
2544
2545@doc(IndexingMixin.at)
2546class _AtIndexer(_ScalarAccessIndexer):
2547 _takeable = False
2548
2549 def _convert_key(self, key):
2550 """
2551 Require they keys to be the same type as the index. (so we don't
2552 fallback)
2553 """
2554 # GH 26989
2555 # For series, unpacking key needs to result in the label.
2556 # This is already the case for len(key) == 1; e.g. (1,)
2557 if self.ndim == 1 and len(key) > 1:
2558 key = (key,)
2559
2560 return key
2561
2562 @property
2563 def _axes_are_unique(self) -> bool:
2564 # Only relevant for self.ndim == 2
2565 assert self.ndim == 2
2566 return self.obj.index.is_unique and self.obj.columns.is_unique
2567
2568 def __getitem__(self, key):
2569 if self.ndim == 2 and not self._axes_are_unique:
2570 # GH#33041 fall back to .loc
2571 if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
2572 raise ValueError("Invalid call for scalar access (getting)!")
2573 return self.obj.loc[key]
2574
2575 return super().__getitem__(key)
2576
2577 def __setitem__(self, key, value) -> None:
2578 if self.ndim == 2 and not self._axes_are_unique:
2579 # GH#33041 fall back to .loc
2580 if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):
2581 raise ValueError("Invalid call for scalar access (setting)!")
2582
2583 self.obj.loc[key] = value
2584 return
2585
2586 return super().__setitem__(key, value)
2587
2588
2589@doc(IndexingMixin.iat)
2590class _iAtIndexer(_ScalarAccessIndexer):
2591 _takeable = True
2592
2593 def _convert_key(self, key):
2594 """
2595 Require integer args. (and convert to label arguments)
2596 """
2597 for i in key:
2598 if not is_integer(i):
2599 raise ValueError("iAt based indexing can only have integer indexers")
2600 return key
2601
2602
2603def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]:
2604 """
2605 Given an indexer for the first dimension, create an equivalent tuple
2606 for indexing over all dimensions.
2607
2608 Parameters
2609 ----------
2610 ndim : int
2611 loc : object
2612
2613 Returns
2614 -------
2615 tuple
2616 """
2617 _tup: list[Hashable | slice]
2618 _tup = [slice(None, None) for _ in range(ndim)]
2619 _tup[0] = loc
2620 return tuple(_tup)
2621
2622
2623def _tupleize_axis_indexer(ndim: int, axis: AxisInt, key) -> tuple:
2624 """
2625 If we have an axis, adapt the given key to be axis-independent.
2626 """
2627 new_key = [slice(None)] * ndim
2628 new_key[axis] = key
2629 return tuple(new_key)
2630
2631
2632def check_bool_indexer(index: Index, key) -> np.ndarray:
2633 """
2634 Check if key is a valid boolean indexer for an object with such index and
2635 perform reindexing or conversion if needed.
2636
2637 This function assumes that is_bool_indexer(key) == True.
2638
2639 Parameters
2640 ----------
2641 index : Index
2642 Index of the object on which the indexing is done.
2643 key : list-like
2644 Boolean indexer to check.
2645
2646 Returns
2647 -------
2648 np.array
2649 Resulting key.
2650
2651 Raises
2652 ------
2653 IndexError
2654 If the key does not have the same length as index.
2655 IndexingError
2656 If the index of the key is unalignable to index.
2657 """
2658 result = key
2659 if isinstance(key, ABCSeries) and not key.index.equals(index):
2660 indexer = result.index.get_indexer_for(index)
2661 if -1 in indexer:
2662 raise IndexingError(
2663 "Unalignable boolean Series provided as "
2664 "indexer (index of the boolean Series and of "
2665 "the indexed object do not match)."
2666 )
2667
2668 result = result.take(indexer)
2669
2670 # fall through for boolean
2671 if not isinstance(result.dtype, ExtensionDtype):
2672 return result.astype(bool)._values
2673
2674 if is_object_dtype(key):
2675 # key might be object-dtype bool, check_array_indexer needs bool array
2676 result = np.asarray(result, dtype=bool)
2677 elif not is_array_like(result):
2678 # GH 33924
2679 # key may contain nan elements, check_array_indexer needs bool array
2680 result = pd_array(result, dtype=bool)
2681 return check_array_indexer(index, result)
2682
2683
2684def convert_missing_indexer(indexer):
2685 """
2686 Reverse convert a missing indexer, which is a dict
2687 return the scalar indexer and a boolean indicating if we converted
2688 """
2689 if isinstance(indexer, dict):
2690 # a missing key (but not a tuple indexer)
2691 indexer = indexer["key"]
2692
2693 if isinstance(indexer, bool):
2694 raise KeyError("cannot use a single bool to index into setitem")
2695 return indexer, True
2696
2697 return indexer, False
2698
2699
2700def convert_from_missing_indexer_tuple(indexer, axes):
2701 """
2702 Create a filtered indexer that doesn't have any missing indexers.
2703 """
2704
2705 def get_indexer(_i, _idx):
2706 return axes[_i].get_loc(_idx["key"]) if isinstance(_idx, dict) else _idx
2707
2708 return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer))
2709
2710
2711def maybe_convert_ix(*args):
2712 """
2713 We likely want to take the cross-product.
2714 """
2715 for arg in args:
2716 if not isinstance(arg, (np.ndarray, list, ABCSeries, Index)):
2717 return args
2718 return np.ix_(*args)
2719
2720
2721def is_nested_tuple(tup, labels) -> bool:
2722 """
2723 Returns
2724 -------
2725 bool
2726 """
2727 # check for a compatible nested tuple and multiindexes among the axes
2728 if not isinstance(tup, tuple):
2729 return False
2730
2731 for k in tup:
2732 if is_list_like(k) or isinstance(k, slice):
2733 return isinstance(labels, MultiIndex)
2734
2735 return False
2736
2737
2738def is_label_like(key) -> bool:
2739 """
2740 Returns
2741 -------
2742 bool
2743 """
2744 # select a label or row
2745 return (
2746 not isinstance(key, slice)
2747 and not is_list_like_indexer(key)
2748 and key is not Ellipsis
2749 )
2750
2751
2752def need_slice(obj: slice) -> bool:
2753 """
2754 Returns
2755 -------
2756 bool
2757 """
2758 return (
2759 obj.start is not None
2760 or obj.stop is not None
2761 or (obj.step is not None and obj.step != 1)
2762 )
2763
2764
2765def check_dict_or_set_indexers(key) -> None:
2766 """
2767 Check if the indexer is or contains a dict or set, which is no longer allowed.
2768 """
2769 if (
2770 isinstance(key, set)
2771 or isinstance(key, tuple)
2772 and any(isinstance(x, set) for x in key)
2773 ):
2774 raise TypeError(
2775 "Passing a set as an indexer is not supported. Use a list instead."
2776 )
2777
2778 if (
2779 isinstance(key, dict)
2780 or isinstance(key, tuple)
2781 and any(isinstance(x, dict) for x in key)
2782 ):
2783 raise TypeError(
2784 "Passing a dict as an indexer is not supported. Use a list instead."
2785 )