Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexing.py: 29%

1from __future__ import annotations

3from contextlib import suppress

4import sys

5from typing import (

6 TYPE_CHECKING,

7 Any,

8 TypeVar,

9 cast,

10 final,

11)

12import warnings

14import numpy as np

16from pandas._config import (

17 using_copy_on_write,

18 warn_copy_on_write,

19)

21from pandas._libs.indexing import NDFrameIndexerBase

22from pandas._libs.lib import item_from_zerodim

23from pandas.compat import PYPY

24from pandas.errors import (

25 AbstractMethodError,

26 ChainedAssignmentError,

27 IndexingError,

28 InvalidIndexError,

29 LossySetitemError,

30 _chained_assignment_msg,

31 _chained_assignment_warning_msg,

32 _check_cacher,

33)

34from pandas.util._decorators import doc

35from pandas.util._exceptions import find_stack_level

37from pandas.core.dtypes.cast import (

38 can_hold_element,

39 maybe_promote,

40)

41from pandas.core.dtypes.common import (

42 is_array_like,

43 is_bool_dtype,

44 is_hashable,

45 is_integer,

46 is_iterator,

47 is_list_like,

48 is_numeric_dtype,

49 is_object_dtype,

50 is_scalar,

51 is_sequence,

52)

53from pandas.core.dtypes.concat import concat_compat

54from pandas.core.dtypes.dtypes import ExtensionDtype

55from pandas.core.dtypes.generic import (

56 ABCDataFrame,

57 ABCSeries,

58)

59from pandas.core.dtypes.missing import (

60 construct_1d_array_from_inferred_fill_value,

61 infer_fill_value,

62 is_valid_na_for_dtype,

63 isna,

64 na_value_for_dtype,

65)

67from pandas.core import algorithms as algos

68import pandas.core.common as com

69from pandas.core.construction import (

70 array as pd_array,

71 extract_array,

72)

73from pandas.core.indexers import (

74 check_array_indexer,

75 is_list_like_indexer,

76 is_scalar_indexer,

77 length_of_indexer,

78)

79from pandas.core.indexes.api import (

80 Index,

81 MultiIndex,

82)

84if TYPE_CHECKING:

85 from collections.abc import (

86 Hashable,

87 Sequence,

88 )

90 from pandas._typing import (

91 Axis,

92 AxisInt,

93 Self,

94 npt,

95 )

97 from pandas import (

98 DataFrame,

99 Series,

100 )

101

102T = TypeVar("T")

103# "null slice"

104_NS = slice(None, None)

105_one_ellipsis_message = "indexer may only contain one '...' entry"

106

107

108# the public IndexSlicerMaker

109class _IndexSlice:

110 """

111 Create an object to more easily perform multi-index slicing.

112

113 See Also

114 --------

115 MultiIndex.remove_unused_levels : New MultiIndex with no unused levels.

116

117 Notes

118 -----

119 See :ref:`Defined Levels <advanced.shown_levels>`

120 for further info on slicing a MultiIndex.

121

122 Examples

123 --------

124 >>> midx = pd.MultiIndex.from_product([['A0','A1'], ['B0','B1','B2','B3']])

125 >>> columns = ['foo', 'bar']

126 >>> dfmi = pd.DataFrame(np.arange(16).reshape((len(midx), len(columns))),

127 ... index=midx, columns=columns)

128

129 Using the default slice command:

130

131 >>> dfmi.loc[(slice(None), slice('B0', 'B1')), :]

132 foo bar

133 A0 B0 0 1

134 B1 2 3

135 A1 B0 8 9

136 B1 10 11

137

138 Using the IndexSlice class for a more intuitive command:

139

140 >>> idx = pd.IndexSlice

141 >>> dfmi.loc[idx[:, 'B0':'B1'], :]

142 foo bar

143 A0 B0 0 1

144 B1 2 3

145 A1 B0 8 9

146 B1 10 11

147 """

148

149 def __getitem__(self, arg):

150 return arg

151

152

153IndexSlice = _IndexSlice()

154

155

156class IndexingMixin:

157 """

158 Mixin for adding .loc/.iloc/.at/.iat to Dataframes and Series.

159 """

160

161 @property

162 def iloc(self) -> _iLocIndexer:

163 """

164 Purely integer-location based indexing for selection by position.

165

166 .. deprecated:: 2.2.0

167

168 Returning a tuple from a callable is deprecated.

169

170 ``.iloc[]`` is primarily integer position based (from ``0`` to

171 ``length-1`` of the axis), but may also be used with a boolean

172 array.

173

174 Allowed inputs are:

175

176 - An integer, e.g. ``5``.

177 - A list or array of integers, e.g. ``[4, 3, 0]``.

178 - A slice object with ints, e.g. ``1:7``.

179 - A boolean array.

180 - A ``callable`` function with one argument (the calling Series or

181 DataFrame) and that returns valid output for indexing (one of the above).

182 This is useful in method chains, when you don't have a reference to the

183 calling object, but would like to base your selection on

184 some value.

185 - A tuple of row and column indexes. The tuple elements consist of one of the

186 above inputs, e.g. ``(0, 1)``.

187

188 ``.iloc`` will raise ``IndexError`` if a requested indexer is

189 out-of-bounds, except *slice* indexers which allow out-of-bounds

190 indexing (this conforms with python/numpy *slice* semantics).

191

192 See more at :ref:`Selection by Position <indexing.integer>`.

193

194 See Also

195 --------

196 DataFrame.iat : Fast integer location scalar accessor.

197 DataFrame.loc : Purely label-location based indexer for selection by label.

198 Series.iloc : Purely integer-location based indexing for

199 selection by position.

200

201 Examples

202 --------

203 >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4},

204 ... {'a': 100, 'b': 200, 'c': 300, 'd': 400},

205 ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000}]

206 >>> df = pd.DataFrame(mydict)

207 >>> df

208 a b c d

209 0 1 2 3 4

210 1 100 200 300 400

211 2 1000 2000 3000 4000

212

213 **Indexing just the rows**

214

215 With a scalar integer.

216

217 >>> type(df.iloc[0])

218 <class 'pandas.core.series.Series'>

219 >>> df.iloc[0]

220 a 1

221 b 2

222 c 3

223 d 4

224 Name: 0, dtype: int64

225

226 With a list of integers.

227

228 >>> df.iloc[[0]]

229 a b c d

230 0 1 2 3 4

231 >>> type(df.iloc[[0]])

232 <class 'pandas.core.frame.DataFrame'>

233

234 >>> df.iloc[[0, 1]]

235 a b c d

236 0 1 2 3 4

237 1 100 200 300 400

238

239 With a `slice` object.

240

241 >>> df.iloc[:3]

242 a b c d

243 0 1 2 3 4

244 1 100 200 300 400

245 2 1000 2000 3000 4000

246

247 With a boolean mask the same length as the index.

248

249 >>> df.iloc[[True, False, True]]

250 a b c d

251 0 1 2 3 4

252 2 1000 2000 3000 4000

253

254 With a callable, useful in method chains. The `x` passed

255 to the ``lambda`` is the DataFrame being sliced. This selects

256 the rows whose index label even.

257

258 >>> df.iloc[lambda x: x.index % 2 == 0]

259 a b c d

260 0 1 2 3 4

261 2 1000 2000 3000 4000

262

263 **Indexing both axes**

264

265 You can mix the indexer types for the index and columns. Use ``:`` to

266 select the entire axis.

267

268 With scalar integers.

269

270 >>> df.iloc[0, 1]

271 2

272

273 With lists of integers.

274

275 >>> df.iloc[[0, 2], [1, 3]]

276 b d

277 0 2 4

278 2 2000 4000

279

280 With `slice` objects.

281

282 >>> df.iloc[1:3, 0:3]

283 a b c

284 1 100 200 300

285 2 1000 2000 3000

286

287 With a boolean array whose length matches the columns.

288

289 >>> df.iloc[:, [True, False, True, False]]

290 a c

291 0 1 3

292 1 100 300

293 2 1000 3000

294

295 With a callable function that expects the Series or DataFrame.

296

297 >>> df.iloc[:, lambda df: [0, 2]]

298 a c

299 0 1 3

300 1 100 300

301 2 1000 3000

302 """

303 return _iLocIndexer("iloc", self)

304

305 @property

306 def loc(self) -> _LocIndexer:

307 """

308 Access a group of rows and columns by label(s) or a boolean array.

309

310 ``.loc[]`` is primarily label based, but may also be used with a

311 boolean array.

312

313 Allowed inputs are:

314

315 - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is

316 interpreted as a *label* of the index, and **never** as an

317 integer position along the index).

318 - A list or array of labels, e.g. ``['a', 'b', 'c']``.

319 - A slice object with labels, e.g. ``'a':'f'``.

320

321 .. warning:: Note that contrary to usual python slices, **both** the

322 start and the stop are included

323

324 - A boolean array of the same length as the axis being sliced,

325 e.g. ``[True, False, True]``.

326 - An alignable boolean Series. The index of the key will be aligned before

327 masking.

328 - An alignable Index. The Index of the returned selection will be the input.

329 - A ``callable`` function with one argument (the calling Series or

330 DataFrame) and that returns valid output for indexing (one of the above)

331

332 See more at :ref:`Selection by Label <indexing.label>`.

333

334 Raises

335 ------

336 KeyError

337 If any items are not found.

338 IndexingError

339 If an indexed key is passed and its index is unalignable to the frame index.

340

341 See Also

342 --------

343 DataFrame.at : Access a single value for a row/column label pair.

344 DataFrame.iloc : Access group of rows and columns by integer position(s).

345 DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the

346 Series/DataFrame.

347 Series.loc : Access group of values using labels.

348

349 Examples

350 --------

351 **Getting values**

352

353 >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],

354 ... index=['cobra', 'viper', 'sidewinder'],

355 ... columns=['max_speed', 'shield'])

356 >>> df

357 max_speed shield

358 cobra 1 2

359 viper 4 5

360 sidewinder 7 8

361

362 Single label. Note this returns the row as a Series.

363

364 >>> df.loc['viper']

365 max_speed 4

366 shield 5

367 Name: viper, dtype: int64

368

369 List of labels. Note using ``[[]]`` returns a DataFrame.

370

371 >>> df.loc[['viper', 'sidewinder']]

372 max_speed shield

373 viper 4 5

374 sidewinder 7 8

375

376 Single label for row and column

377

378 >>> df.loc['cobra', 'shield']

379 2

380

381 Slice with labels for row and single label for column. As mentioned

382 above, note that both the start and stop of the slice are included.

383

384 >>> df.loc['cobra':'viper', 'max_speed']

385 cobra 1

386 viper 4

387 Name: max_speed, dtype: int64

388

389 Boolean list with the same length as the row axis

390

391 >>> df.loc[[False, False, True]]

392 max_speed shield

393 sidewinder 7 8

394

395 Alignable boolean Series:

396

397 >>> df.loc[pd.Series([False, True, False],

398 ... index=['viper', 'sidewinder', 'cobra'])]

399 max_speed shield

400 sidewinder 7 8

401

402 Index (same behavior as ``df.reindex``)

403

404 >>> df.loc[pd.Index(["cobra", "viper"], name="foo")]

405 max_speed shield

406 foo

407 cobra 1 2

408 viper 4 5

409

410 Conditional that returns a boolean Series

411

412 >>> df.loc[df['shield'] > 6]

413 max_speed shield

414 sidewinder 7 8

415

416 Conditional that returns a boolean Series with column labels specified

417

418 >>> df.loc[df['shield'] > 6, ['max_speed']]

419 max_speed

420 sidewinder 7

421

422 Multiple conditional using ``&`` that returns a boolean Series

423

424 >>> df.loc[(df['max_speed'] > 1) & (df['shield'] < 8)]

425 max_speed shield

426 viper 4 5

427

428 Multiple conditional using ``|`` that returns a boolean Series

429

430 >>> df.loc[(df['max_speed'] > 4) | (df['shield'] < 5)]

431 max_speed shield

432 cobra 1 2

433 sidewinder 7 8

434

435 Please ensure that each condition is wrapped in parentheses ``()``.

436 See the :ref:`user guide<indexing.boolean>`

437 for more details and explanations of Boolean indexing.

438

439 .. note::

440 If you find yourself using 3 or more conditionals in ``.loc[]``,

441 consider using :ref:`advanced indexing<advanced.advanced_hierarchical>`.

442

443 See below for using ``.loc[]`` on MultiIndex DataFrames.

444

445 Callable that returns a boolean Series

446

447 >>> df.loc[lambda df: df['shield'] == 8]

448 max_speed shield

449 sidewinder 7 8

450

451 **Setting values**

452

453 Set value for all items matching the list of labels

454

455 >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50

456 >>> df

457 max_speed shield

458 cobra 1 2

459 viper 4 50

460 sidewinder 7 50

461

462 Set value for an entire row

463

464 >>> df.loc['cobra'] = 10

465 >>> df

466 max_speed shield

467 cobra 10 10

468 viper 4 50

469 sidewinder 7 50

470

471 Set value for an entire column

472

473 >>> df.loc[:, 'max_speed'] = 30

474 >>> df

475 max_speed shield

476 cobra 30 10

477 viper 30 50

478 sidewinder 30 50

479

480 Set value for rows matching callable condition

481

482 >>> df.loc[df['shield'] > 35] = 0

483 >>> df

484 max_speed shield

485 cobra 30 10

486 viper 0 0

487 sidewinder 0 0

488

489 Add value matching location

490

491 >>> df.loc["viper", "shield"] += 5

492 >>> df

493 max_speed shield

494 cobra 30 10

495 viper 0 5

496 sidewinder 0 0

497

498 Setting using a ``Series`` or a ``DataFrame`` sets the values matching the

499 index labels, not the index positions.

500

501 >>> shuffled_df = df.loc[["viper", "cobra", "sidewinder"]]

502 >>> df.loc[:] += shuffled_df

503 >>> df

504 max_speed shield

505 cobra 60 20

506 viper 0 10

507 sidewinder 0 0

508

509 **Getting values on a DataFrame with an index that has integer labels**

510

511 Another example using integers for the index

512

513 >>> df = pd.DataFrame([[1, 2], [4, 5], [7, 8]],

514 ... index=[7, 8, 9], columns=['max_speed', 'shield'])

515 >>> df

516 max_speed shield

517 7 1 2

518 8 4 5

519 9 7 8

520

521 Slice with integer labels for rows. As mentioned above, note that both

522 the start and stop of the slice are included.

523

524 >>> df.loc[7:9]

525 max_speed shield

526 7 1 2

527 8 4 5

528 9 7 8

529

530 **Getting values with a MultiIndex**

531

532 A number of examples using a DataFrame with a MultiIndex

533

534 >>> tuples = [

535 ... ('cobra', 'mark i'), ('cobra', 'mark ii'),

536 ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),

537 ... ('viper', 'mark ii'), ('viper', 'mark iii')

538 ... ]

539 >>> index = pd.MultiIndex.from_tuples(tuples)

540 >>> values = [[12, 2], [0, 4], [10, 20],

541 ... [1, 4], [7, 1], [16, 36]]

542 >>> df = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index)

543 >>> df

544 max_speed shield

545 cobra mark i 12 2

546 mark ii 0 4

547 sidewinder mark i 10 20

548 mark ii 1 4

549 viper mark ii 7 1

550 mark iii 16 36

551

552 Single label. Note this returns a DataFrame with a single index.

553

554 >>> df.loc['cobra']

555 max_speed shield

556 mark i 12 2

557 mark ii 0 4

558

559 Single index tuple. Note this returns a Series.

560

561 >>> df.loc[('cobra', 'mark ii')]

562 max_speed 0

563 shield 4

564 Name: (cobra, mark ii), dtype: int64

565

566 Single label for row and column. Similar to passing in a tuple, this

567 returns a Series.

568

569 >>> df.loc['cobra', 'mark i']

570 max_speed 12

571 shield 2

572 Name: (cobra, mark i), dtype: int64

573

574 Single tuple. Note using ``[[]]`` returns a DataFrame.

575

576 >>> df.loc[[('cobra', 'mark ii')]]

577 max_speed shield

578 cobra mark ii 0 4

579

580 Single tuple for the index with a single label for the column

581

582 >>> df.loc[('cobra', 'mark i'), 'shield']

583 2

584

585 Slice from index tuple to single label

586

587 >>> df.loc[('cobra', 'mark i'):'viper']

588 max_speed shield

589 cobra mark i 12 2

590 mark ii 0 4

591 sidewinder mark i 10 20

592 mark ii 1 4

593 viper mark ii 7 1

594 mark iii 16 36

595

596 Slice from index tuple to index tuple

597

598 >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')]

599 max_speed shield

600 cobra mark i 12 2

601 mark ii 0 4

602 sidewinder mark i 10 20

603 mark ii 1 4

604 viper mark ii 7 1

605

606 Please see the :ref:`user guide<advanced.advanced_hierarchical>`

607 for more details and explanations of advanced indexing.

608 """

609 return _LocIndexer("loc", self)

610

611 @property

612 def at(self) -> _AtIndexer:

613 """

614 Access a single value for a row/column label pair.

615

616 Similar to ``loc``, in that both provide label-based lookups. Use

617 ``at`` if you only need to get or set a single value in a DataFrame

618 or Series.

619

620 Raises

621 ------

622 KeyError

623 If getting a value and 'label' does not exist in a DataFrame or Series.

624

625 ValueError

626 If row/column label pair is not a tuple or if any label

627 from the pair is not a scalar for DataFrame.

628 If label is list-like (*excluding* NamedTuple) for Series.

629

630 See Also

631 --------

632 DataFrame.at : Access a single value for a row/column pair by label.

633 DataFrame.iat : Access a single value for a row/column pair by integer

634 position.

635 DataFrame.loc : Access a group of rows and columns by label(s).

636 DataFrame.iloc : Access a group of rows and columns by integer

637 position(s).

638 Series.at : Access a single value by label.

639 Series.iat : Access a single value by integer position.

640 Series.loc : Access a group of rows by label(s).

641 Series.iloc : Access a group of rows by integer position(s).

642

643 Notes

644 -----

645 See :ref:`Fast scalar value getting and setting <indexing.basics.get_value>`

646 for more details.

647

648 Examples

649 --------

650 >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],

651 ... index=[4, 5, 6], columns=['A', 'B', 'C'])

652 >>> df

653 A B C

654 4 0 2 3

655 5 0 4 1

656 6 10 20 30

657

658 Get value at specified row/column pair

659

660 >>> df.at[4, 'B']

661 2

662

663 Set value at specified row/column pair

664

665 >>> df.at[4, 'B'] = 10

666 >>> df.at[4, 'B']

667 10

668

669 Get value within a Series

670

671 >>> df.loc[5].at['B']

672 4

673 """

674 return _AtIndexer("at", self)

675

676 @property

677 def iat(self) -> _iAtIndexer:

678 """

679 Access a single value for a row/column pair by integer position.

680

681 Similar to ``iloc``, in that both provide integer-based lookups. Use

682 ``iat`` if you only need to get or set a single value in a DataFrame

683 or Series.

684

685 Raises

686 ------

687 IndexError

688 When integer position is out of bounds.

689

690 See Also

691 --------

692 DataFrame.at : Access a single value for a row/column label pair.

693 DataFrame.loc : Access a group of rows and columns by label(s).

694 DataFrame.iloc : Access a group of rows and columns by integer position(s).

695

696 Examples

697 --------

698 >>> df = pd.DataFrame([[0, 2, 3], [0, 4, 1], [10, 20, 30]],

699 ... columns=['A', 'B', 'C'])

700 >>> df

701 A B C

702 0 0 2 3

703 1 0 4 1

704 2 10 20 30

705

706 Get value at specified row/column pair

707

708 >>> df.iat[1, 2]

709 1

710

711 Set value at specified row/column pair

712

713 >>> df.iat[1, 2] = 10

714 >>> df.iat[1, 2]

715 10

716

717 Get value within a series

718

719 >>> df.loc[0].iat[1]

720 2

721 """

722 return _iAtIndexer("iat", self)

723

724

725class _LocationIndexer(NDFrameIndexerBase):

726 _valid_types: str

727 axis: AxisInt | None = None

728

729 # sub-classes need to set _takeable

730 _takeable: bool

731

732 @final

733 def __call__(self, axis: Axis | None = None) -> Self:

734 # we need to return a copy of ourselves

735 new_self = type(self)(self.name, self.obj)

736

737 if axis is not None:

738 axis_int_none = self.obj._get_axis_number(axis)

739 else:

740 axis_int_none = axis

741 new_self.axis = axis_int_none

742 return new_self

743

744 def _get_setitem_indexer(self, key):

745 """

746 Convert a potentially-label-based key into a positional indexer.

747 """

748 if self.name == "loc":

749 # always holds here bc iloc overrides _get_setitem_indexer

750 self._ensure_listlike_indexer(key)

751

752 if isinstance(key, tuple):

753 for x in key:

754 check_dict_or_set_indexers(x)

755

756 if self.axis is not None:

757 key = _tupleize_axis_indexer(self.ndim, self.axis, key)

758

759 ax = self.obj._get_axis(0)

760

761 if (

762 isinstance(ax, MultiIndex)

763 and self.name != "iloc"

764 and is_hashable(key)

765 and not isinstance(key, slice)

766 ):

767 with suppress(KeyError, InvalidIndexError):

768 # TypeError e.g. passed a bool

769 return ax.get_loc(key)

770

771 if isinstance(key, tuple):

772 with suppress(IndexingError):

773 # suppress "Too many indexers"

774 return self._convert_tuple(key)

775

776 if isinstance(key, range):

777 # GH#45479 test_loc_setitem_range_key

778 key = list(key)

779

780 return self._convert_to_indexer(key, axis=0)

781

782 @final

783 def _maybe_mask_setitem_value(self, indexer, value):

784 """

785 If we have obj.iloc[mask] = series_or_frame and series_or_frame has the

786 same length as obj, we treat this as obj.iloc[mask] = series_or_frame[mask],

787 similar to Series.__setitem__.

788

789 Note this is only for loc, not iloc.

790 """

791

792 if (

793 isinstance(indexer, tuple)

794 and len(indexer) == 2

795 and isinstance(value, (ABCSeries, ABCDataFrame))

796 ):

797 pi, icols = indexer

798 ndim = value.ndim

799 if com.is_bool_indexer(pi) and len(value) == len(pi):

800 newkey = pi.nonzero()[0]

801

802 if is_scalar_indexer(icols, self.ndim - 1) and ndim == 1:

803 # e.g. test_loc_setitem_boolean_mask_allfalse

804 # test_loc_setitem_ndframe_values_alignment

805 value = self.obj.iloc._align_series(indexer, value)

806 indexer = (newkey, icols)

807

808 elif (

809 isinstance(icols, np.ndarray)

810 and icols.dtype.kind == "i"

811 and len(icols) == 1

812 ):

813 if ndim == 1:

814 # We implicitly broadcast, though numpy does not, see

815 # github.com/pandas-dev/pandas/pull/45501#discussion_r789071825

816 # test_loc_setitem_ndframe_values_alignment

817 value = self.obj.iloc._align_series(indexer, value)

818 indexer = (newkey, icols)

819

820 elif ndim == 2 and value.shape[1] == 1:

821 # test_loc_setitem_ndframe_values_alignment

822 value = self.obj.iloc._align_frame(indexer, value)

823 indexer = (newkey, icols)

824 elif com.is_bool_indexer(indexer):

825 indexer = indexer.nonzero()[0]

826

827 return indexer, value

828

829 @final

830 def _ensure_listlike_indexer(self, key, axis=None, value=None) -> None:

831 """

832 Ensure that a list-like of column labels are all present by adding them if

833 they do not already exist.

834

835 Parameters

836 ----------

837 key : list-like of column labels

838 Target labels.

839 axis : key axis if known

840 """

841 column_axis = 1

842

843 # column only exists in 2-dimensional DataFrame

844 if self.ndim != 2:

845 return

846

847 if isinstance(key, tuple) and len(key) > 1:

848 # key may be a tuple if we are .loc

849 # if length of key is > 1 set key to column part

850 key = key[column_axis]

851 axis = column_axis

852

853 if (

854 axis == column_axis

855 and not isinstance(self.obj.columns, MultiIndex)

856 and is_list_like_indexer(key)

857 and not com.is_bool_indexer(key)

858 and all(is_hashable(k) for k in key)

859 ):

860 # GH#38148

861 keys = self.obj.columns.union(key, sort=False)

862 diff = Index(key).difference(self.obj.columns, sort=False)

863

864 if len(diff):

865 # e.g. if we are doing df.loc[:, ["A", "B"]] = 7 and "B"

866 # is a new column, add the new columns with dtype=np.void

867 # so that later when we go through setitem_single_column

868 # we will use isetitem. Without this, the reindex_axis

869 # below would create float64 columns in this example, which

870 # would successfully hold 7, so we would end up with the wrong

871 # dtype.

872 indexer = np.arange(len(keys), dtype=np.intp)

873 indexer[len(self.obj.columns) :] = -1

874 new_mgr = self.obj._mgr.reindex_indexer(

875 keys, indexer=indexer, axis=0, only_slice=True, use_na_proxy=True

876 )

877 self.obj._mgr = new_mgr

878 return

879

880 self.obj._mgr = self.obj._mgr.reindex_axis(keys, axis=0, only_slice=True)

881

882 @final

883 def __setitem__(self, key, value) -> None:

884 if not PYPY and using_copy_on_write():

885 if sys.getrefcount(self.obj) <= 2:

886 warnings.warn(

887 _chained_assignment_msg, ChainedAssignmentError, stacklevel=2

888 )

889 elif not PYPY and not using_copy_on_write():

890 ctr = sys.getrefcount(self.obj)

891 ref_count = 2

892 if not warn_copy_on_write() and _check_cacher(self.obj):

893 # see https://github.com/pandas-dev/pandas/pull/56060#discussion_r1399245221

894 ref_count += 1

895 if ctr <= ref_count:

896 warnings.warn(

897 _chained_assignment_warning_msg, FutureWarning, stacklevel=2

898 )

899

900 check_dict_or_set_indexers(key)

901 if isinstance(key, tuple):

902 key = tuple(list(x) if is_iterator(x) else x for x in key)

903 key = tuple(com.apply_if_callable(x, self.obj) for x in key)

904 else:

905 maybe_callable = com.apply_if_callable(key, self.obj)

906 key = self._check_deprecated_callable_usage(key, maybe_callable)

907 indexer = self._get_setitem_indexer(key)

908 self._has_valid_setitem_indexer(key)

909

910 iloc = self if self.name == "iloc" else self.obj.iloc

911 iloc._setitem_with_indexer(indexer, value, self.name)

912

913 def _validate_key(self, key, axis: AxisInt):

914 """

915 Ensure that key is valid for current indexer.

916

917 Parameters

918 ----------

919 key : scalar, slice or list-like

920 Key requested.

921 axis : int

922 Dimension on which the indexing is being made.

923

924 Raises

925 ------

926 TypeError

927 If the key (or some element of it) has wrong type.

928 IndexError

929 If the key (or some element of it) is out of bounds.

930 KeyError

931 If the key was not found.

932 """

933 raise AbstractMethodError(self)

934

935 @final

936 def _expand_ellipsis(self, tup: tuple) -> tuple:

937 """

938 If a tuple key includes an Ellipsis, replace it with an appropriate

939 number of null slices.

940 """

941 if any(x is Ellipsis for x in tup):

942 if tup.count(Ellipsis) > 1:

943 raise IndexingError(_one_ellipsis_message)

944

945 if len(tup) == self.ndim:

946 # It is unambiguous what axis this Ellipsis is indexing,

947 # treat as a single null slice.

948 i = tup.index(Ellipsis)

949 # FIXME: this assumes only one Ellipsis

950 new_key = tup[:i] + (_NS,) + tup[i + 1 :]

951 return new_key

952

953 # TODO: other cases? only one test gets here, and that is covered

954 # by _validate_key_length

955 return tup

956

957 @final

958 def _validate_tuple_indexer(self, key: tuple) -> tuple:

959 """

960 Check the key for valid keys across my indexer.

961 """

962 key = self._validate_key_length(key)

963 key = self._expand_ellipsis(key)

964 for i, k in enumerate(key):

965 try:

966 self._validate_key(k, i)

967 except ValueError as err:

968 raise ValueError(

969 "Location based indexing can only have "

970 f"[{self._valid_types}] types"

971 ) from err

972 return key

973

974 @final

975 def _is_nested_tuple_indexer(self, tup: tuple) -> bool:

976 """

977 Returns

978 -------

979 bool

980 """

981 if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):

982 return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)

983 return False

984

985 @final

986 def _convert_tuple(self, key: tuple) -> tuple:

987 # Note: we assume _tupleize_axis_indexer has been called, if necessary.

988 self._validate_key_length(key)

989 keyidx = [self._convert_to_indexer(k, axis=i) for i, k in enumerate(key)]

990 return tuple(keyidx)

991

992 @final

993 def _validate_key_length(self, key: tuple) -> tuple:

994 if len(key) > self.ndim:

995 if key[0] is Ellipsis:

996 # e.g. Series.iloc[..., 3] reduces to just Series.iloc[3]

997 key = key[1:]

998 if Ellipsis in key:

999 raise IndexingError(_one_ellipsis_message)

1000 return self._validate_key_length(key)

1001 raise IndexingError("Too many indexers")

1002 return key

1003

1004 @final

1005 def _getitem_tuple_same_dim(self, tup: tuple):

1006 """

1007 Index with indexers that should return an object of the same dimension

1008 as self.obj.

1009

1010 This is only called after a failed call to _getitem_lowerdim.

1011 """

1012 retval = self.obj

1013 # Selecting columns before rows is significantly faster

1014 start_val = (self.ndim - len(tup)) + 1

1015 for i, key in enumerate(reversed(tup)):

1016 i = self.ndim - i - start_val

1017 if com.is_null_slice(key):

1018 continue

1019

1020 retval = getattr(retval, self.name)._getitem_axis(key, axis=i)

1021 # We should never have retval.ndim < self.ndim, as that should

1022 # be handled by the _getitem_lowerdim call above.

1023 assert retval.ndim == self.ndim

1024

1025 if retval is self.obj:

1026 # if all axes were a null slice (`df.loc[:, :]`), ensure we still

1027 # return a new object (https://github.com/pandas-dev/pandas/pull/49469)

1028 retval = retval.copy(deep=False)

1029

1030 return retval

1031

1032 @final

1033 def _getitem_lowerdim(self, tup: tuple):

1034 # we can directly get the axis result since the axis is specified

1035 if self.axis is not None:

1036 axis = self.obj._get_axis_number(self.axis)

1037 return self._getitem_axis(tup, axis=axis)

1038

1039 # we may have a nested tuples indexer here

1040 if self._is_nested_tuple_indexer(tup):

1041 return self._getitem_nested_tuple(tup)

1042

1043 # we maybe be using a tuple to represent multiple dimensions here

1044 ax0 = self.obj._get_axis(0)

1045 # ...but iloc should handle the tuple as simple integer-location

1046 # instead of checking it as multiindex representation (GH 13797)

1047 if (

1048 isinstance(ax0, MultiIndex)

1049 and self.name != "iloc"

1050 and not any(isinstance(x, slice) for x in tup)

1051 ):

1052 # Note: in all extant test cases, replacing the slice condition with

1053 # `all(is_hashable(x) or com.is_null_slice(x) for x in tup)`

1054 # is equivalent.

1055 # (see the other place where we call _handle_lowerdim_multi_index_axis0)

1056 with suppress(IndexingError):

1057 return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(tup)

1058

1059 tup = self._validate_key_length(tup)

1060

1061 for i, key in enumerate(tup):

1062 if is_label_like(key):

1063 # We don't need to check for tuples here because those are

1064 # caught by the _is_nested_tuple_indexer check above.

1065 section = self._getitem_axis(key, axis=i)

1066

1067 # We should never have a scalar section here, because

1068 # _getitem_lowerdim is only called after a check for

1069 # is_scalar_access, which that would be.

1070 if section.ndim == self.ndim:

1071 # we're in the middle of slicing through a MultiIndex

1072 # revise the key wrt to `section` by inserting an _NS

1073 new_key = tup[:i] + (_NS,) + tup[i + 1 :]

1074

1075 else:

1076 # Note: the section.ndim == self.ndim check above

1077 # rules out having DataFrame here, so we dont need to worry

1078 # about transposing.

1079 new_key = tup[:i] + tup[i + 1 :]

1080

1081 if len(new_key) == 1:

1082 new_key = new_key[0]

1083

1084 # Slices should return views, but calling iloc/loc with a null

1085 # slice returns a new object.

1086 if com.is_null_slice(new_key):

1087 return section

1088 # This is an elided recursive call to iloc/loc

1089 return getattr(section, self.name)[new_key]

1090

1091 raise IndexingError("not applicable")

1092

1093 @final

1094 def _getitem_nested_tuple(self, tup: tuple):

1095 # we have a nested tuple so have at least 1 multi-index level

1096 # we should be able to match up the dimensionality here

1097

1098 def _contains_slice(x: object) -> bool:

1099 # Check if object is a slice or a tuple containing a slice

1100 if isinstance(x, tuple):

1101 return any(isinstance(v, slice) for v in x)

1102 elif isinstance(x, slice):

1103 return True

1104 return False

1105

1106 for key in tup:

1107 check_dict_or_set_indexers(key)

1108

1109 # we have too many indexers for our dim, but have at least 1

1110 # multi-index dimension, try to see if we have something like

1111 # a tuple passed to a series with a multi-index

1112 if len(tup) > self.ndim:

1113 if self.name != "loc":

1114 # This should never be reached, but let's be explicit about it

1115 raise ValueError("Too many indices") # pragma: no cover

1116 if all(

1117 (is_hashable(x) and not _contains_slice(x)) or com.is_null_slice(x)

1118 for x in tup

1119 ):

1120 # GH#10521 Series should reduce MultiIndex dimensions instead of

1121 # DataFrame, IndexingError is not raised when slice(None,None,None)

1122 # with one row.

1123 with suppress(IndexingError):

1124 return cast(_LocIndexer, self)._handle_lowerdim_multi_index_axis0(

1125 tup

1126 )

1127 elif isinstance(self.obj, ABCSeries) and any(

1128 isinstance(k, tuple) for k in tup

1129 ):

1130 # GH#35349 Raise if tuple in tuple for series

1131 # Do this after the all-hashable-or-null-slice check so that

1132 # we are only getting non-hashable tuples, in particular ones

1133 # that themselves contain a slice entry

1134 # See test_loc_series_getitem_too_many_dimensions

1135 raise IndexingError("Too many indexers")

1136

1137 # this is a series with a multi-index specified a tuple of

1138 # selectors

1139 axis = self.axis or 0

1140 return self._getitem_axis(tup, axis=axis)

1141

1142 # handle the multi-axis by taking sections and reducing

1143 # this is iterative

1144 obj = self.obj

1145 # GH#41369 Loop in reverse order ensures indexing along columns before rows

1146 # which selects only necessary blocks which avoids dtype conversion if possible

1147 axis = len(tup) - 1

1148 for key in tup[::-1]:

1149 if com.is_null_slice(key):

1150 axis -= 1

1151 continue

1152

1153 obj = getattr(obj, self.name)._getitem_axis(key, axis=axis)

1154 axis -= 1

1155

1156 # if we have a scalar, we are done

1157 if is_scalar(obj) or not hasattr(obj, "ndim"):

1158 break

1159

1160 return obj

1161

1162 def _convert_to_indexer(self, key, axis: AxisInt):

1163 raise AbstractMethodError(self)

1164

1165 def _check_deprecated_callable_usage(self, key: Any, maybe_callable: T) -> T:

1166 # GH53533

1167 if self.name == "iloc" and callable(key) and isinstance(maybe_callable, tuple):

1168 warnings.warn(

1169 "Returning a tuple from a callable with iloc "

1170 "is deprecated and will be removed in a future version",

1171 FutureWarning,

1172 stacklevel=find_stack_level(),

1173 )

1174 return maybe_callable

1175

1176 @final

1177 def __getitem__(self, key):

1178 check_dict_or_set_indexers(key)

1179 if type(key) is tuple:

1180 key = tuple(list(x) if is_iterator(x) else x for x in key)

1181 key = tuple(com.apply_if_callable(x, self.obj) for x in key)

1182 if self._is_scalar_access(key):

1183 return self.obj._get_value(*key, takeable=self._takeable)

1184 return self._getitem_tuple(key)

1185 else:

1186 # we by definition only have the 0th axis

1187 axis = self.axis or 0

1188

1189 maybe_callable = com.apply_if_callable(key, self.obj)

1190 maybe_callable = self._check_deprecated_callable_usage(key, maybe_callable)

1191 return self._getitem_axis(maybe_callable, axis=axis)

1192

1193 def _is_scalar_access(self, key: tuple):

1194 raise NotImplementedError()

1195

1196 def _getitem_tuple(self, tup: tuple):

1197 raise AbstractMethodError(self)

1198

1199 def _getitem_axis(self, key, axis: AxisInt):

1200 raise NotImplementedError()

1201

1202 def _has_valid_setitem_indexer(self, indexer) -> bool:

1203 raise AbstractMethodError(self)

1204

1205 @final

1206 def _getbool_axis(self, key, axis: AxisInt):

1207 # caller is responsible for ensuring non-None axis

1208 labels = self.obj._get_axis(axis)

1209 key = check_bool_indexer(labels, key)

1210 inds = key.nonzero()[0]

1211 return self.obj._take_with_is_copy(inds, axis=axis)

1212

1213

1214@doc(IndexingMixin.loc)

1215class _LocIndexer(_LocationIndexer):

1216 _takeable: bool = False

1217 _valid_types = (

1218 "labels (MUST BE IN THE INDEX), slices of labels (BOTH "

1219 "endpoints included! Can be slices of integers if the "

1220 "index is integers), listlike of labels, boolean"

1221 )

1222

1223 # -------------------------------------------------------------------

1224 # Key Checks

1225

1226 @doc(_LocationIndexer._validate_key)

1227 def _validate_key(self, key, axis: Axis):

1228 # valid for a collection of labels (we check their presence later)

1229 # slice of labels (where start-end in labels)

1230 # slice of integers (only if in the labels)

1231 # boolean not in slice and with boolean index

1232 ax = self.obj._get_axis(axis)

1233 if isinstance(key, bool) and not (

1234 is_bool_dtype(ax.dtype)

1235 or ax.dtype.name == "boolean"

1236 or isinstance(ax, MultiIndex)

1237 and is_bool_dtype(ax.get_level_values(0).dtype)

1238 ):

1239 raise KeyError(

1240 f"{key}: boolean label can not be used without a boolean index"

1241 )

1242

1243 if isinstance(key, slice) and (

1244 isinstance(key.start, bool) or isinstance(key.stop, bool)

1245 ):

1246 raise TypeError(f"{key}: boolean values can not be used in a slice")

1247

1248 def _has_valid_setitem_indexer(self, indexer) -> bool:

1249 return True

1250

1251 def _is_scalar_access(self, key: tuple) -> bool:

1252 """

1253 Returns

1254 -------

1255 bool

1256 """

1257 # this is a shortcut accessor to both .loc and .iloc

1258 # that provide the equivalent access of .at and .iat

1259 # a) avoid getting things via sections and (to minimize dtype changes)

1260 # b) provide a performant path

1261 if len(key) != self.ndim:

1262 return False

1263

1264 for i, k in enumerate(key):

1265 if not is_scalar(k):

1266 return False

1267

1268 ax = self.obj.axes[i]

1269 if isinstance(ax, MultiIndex):

1270 return False

1271

1272 if isinstance(k, str) and ax._supports_partial_string_indexing:

1273 # partial string indexing, df.loc['2000', 'A']

1274 # should not be considered scalar

1275 return False

1276

1277 if not ax._index_as_unique:

1278 return False

1279

1280 return True

1281

1282 # -------------------------------------------------------------------

1283 # MultiIndex Handling

1284

1285 def _multi_take_opportunity(self, tup: tuple) -> bool:

1286 """

1287 Check whether there is the possibility to use ``_multi_take``.

1288

1289 Currently the limit is that all axes being indexed, must be indexed with

1290 list-likes.

1291

1292 Parameters

1293 ----------

1294 tup : tuple

1295 Tuple of indexers, one per axis.

1296

1297 Returns

1298 -------

1299 bool

1300 Whether the current indexing,

1301 can be passed through `_multi_take`.

1302 """

1303 if not all(is_list_like_indexer(x) for x in tup):

1304 return False

1305

1306 # just too complicated

1307 return not any(com.is_bool_indexer(x) for x in tup)

1308

1309 def _multi_take(self, tup: tuple):

1310 """

1311 Create the indexers for the passed tuple of keys, and

1312 executes the take operation. This allows the take operation to be

1313 executed all at once, rather than once for each dimension.

1314 Improving efficiency.

1315

1316 Parameters

1317 ----------

1318 tup : tuple

1319 Tuple of indexers, one per axis.

1320

1321 Returns

1322 -------

1323 values: same type as the object being indexed

1324 """

1325 # GH 836

1326 d = {

1327 axis: self._get_listlike_indexer(key, axis)

1328 for (key, axis) in zip(tup, self.obj._AXIS_ORDERS)

1329 }

1330 return self.obj._reindex_with_indexers(d, copy=True, allow_dups=True)

1331

1332 # -------------------------------------------------------------------

1333

1334 def _getitem_iterable(self, key, axis: AxisInt):

1335 """

1336 Index current object with an iterable collection of keys.

1337

1338 Parameters

1339 ----------

1340 key : iterable

1341 Targeted labels.

1342 axis : int

1343 Dimension on which the indexing is being made.

1344

1345 Raises

1346 ------

1347 KeyError

1348 If no key was found. Will change in the future to raise if not all

1349 keys were found.

1350

1351 Returns

1352 -------

1353 scalar, DataFrame, or Series: indexed value(s).

1354 """

1355 # we assume that not com.is_bool_indexer(key), as that is

1356 # handled before we get here.

1357 self._validate_key(key, axis)

1358

1359 # A collection of keys

1360 keyarr, indexer = self._get_listlike_indexer(key, axis)

1361 return self.obj._reindex_with_indexers(

1362 {axis: [keyarr, indexer]}, copy=True, allow_dups=True

1363 )

1364

1365 def _getitem_tuple(self, tup: tuple):

1366 with suppress(IndexingError):

1367 tup = self._expand_ellipsis(tup)

1368 return self._getitem_lowerdim(tup)

1369

1370 # no multi-index, so validate all of the indexers

1371 tup = self._validate_tuple_indexer(tup)

1372

1373 # ugly hack for GH #836

1374 if self._multi_take_opportunity(tup):

1375 return self._multi_take(tup)

1376

1377 return self._getitem_tuple_same_dim(tup)

1378

1379 def _get_label(self, label, axis: AxisInt):

1380 # GH#5567 this will fail if the label is not present in the axis.

1381 return self.obj.xs(label, axis=axis)

1382

1383 def _handle_lowerdim_multi_index_axis0(self, tup: tuple):

1384 # we have an axis0 multi-index, handle or raise

1385 axis = self.axis or 0

1386 try:

1387 # fast path for series or for tup devoid of slices

1388 return self._get_label(tup, axis=axis)

1389

1390 except KeyError as ek:

1391 # raise KeyError if number of indexers match

1392 # else IndexingError will be raised

1393 if self.ndim < len(tup) <= self.obj.index.nlevels:

1394 raise ek

1395 raise IndexingError("No label returned") from ek

1396

1397 def _getitem_axis(self, key, axis: AxisInt):

1398 key = item_from_zerodim(key)

1399 if is_iterator(key):

1400 key = list(key)

1401 if key is Ellipsis:

1402 key = slice(None)

1403

1404 labels = self.obj._get_axis(axis)

1405

1406 if isinstance(key, tuple) and isinstance(labels, MultiIndex):

1407 key = tuple(key)

1408

1409 if isinstance(key, slice):

1410 self._validate_key(key, axis)

1411 return self._get_slice_axis(key, axis=axis)

1412 elif com.is_bool_indexer(key):

1413 return self._getbool_axis(key, axis=axis)

1414 elif is_list_like_indexer(key):

1415 # an iterable multi-selection

1416 if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)):

1417 if hasattr(key, "ndim") and key.ndim > 1:

1418 raise ValueError("Cannot index with multidimensional key")

1419

1420 return self._getitem_iterable(key, axis=axis)

1421

1422 # nested tuple slicing

1423 if is_nested_tuple(key, labels):

1424 locs = labels.get_locs(key)

1425 indexer: list[slice | npt.NDArray[np.intp]] = [slice(None)] * self.ndim

1426 indexer[axis] = locs

1427 return self.obj.iloc[tuple(indexer)]

1428

1429 # fall thru to straight lookup

1430 self._validate_key(key, axis)

1431 return self._get_label(key, axis=axis)

1432

1433 def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):

1434 """

1435 This is pretty simple as we just have to deal with labels.

1436 """

1437 # caller is responsible for ensuring non-None axis

1438 obj = self.obj

1439 if not need_slice(slice_obj):

1440 return obj.copy(deep=False)

1441

1442 labels = obj._get_axis(axis)

1443 indexer = labels.slice_indexer(slice_obj.start, slice_obj.stop, slice_obj.step)

1444

1445 if isinstance(indexer, slice):

1446 return self.obj._slice(indexer, axis=axis)

1447 else:

1448 # DatetimeIndex overrides Index.slice_indexer and may

1449 # return a DatetimeIndex instead of a slice object.

1450 return self.obj.take(indexer, axis=axis)

1451

1452 def _convert_to_indexer(self, key, axis: AxisInt):

1453 """

1454 Convert indexing key into something we can use to do actual fancy

1455 indexing on a ndarray.

1456

1457 Examples

1458 ix[:5] -> slice(0, 5)

1459 ix[[1,2,3]] -> [1,2,3]

1460 ix[['foo', 'bar', 'baz']] -> [i, j, k] (indices of foo, bar, baz)

1461

1462 Going by Zen of Python?

1463 'In the face of ambiguity, refuse the temptation to guess.'

1464 raise AmbiguousIndexError with integer labels?

1465 - No, prefer label-based indexing

1466 """

1467 labels = self.obj._get_axis(axis)

1468

1469 if isinstance(key, slice):

1470 return labels._convert_slice_indexer(key, kind="loc")

1471

1472 if (

1473 isinstance(key, tuple)

1474 and not isinstance(labels, MultiIndex)

1475 and self.ndim < 2

1476 and len(key) > 1

1477 ):

1478 raise IndexingError("Too many indexers")

1479

1480 # Slices are not valid keys passed in by the user,

1481 # even though they are hashable in Python 3.12

1482 contains_slice = False

1483 if isinstance(key, tuple):

1484 contains_slice = any(isinstance(v, slice) for v in key)

1485

1486 if is_scalar(key) or (

1487 isinstance(labels, MultiIndex) and is_hashable(key) and not contains_slice

1488 ):

1489 # Otherwise get_loc will raise InvalidIndexError

1490

1491 # if we are a label return me

1492 try:

1493 return labels.get_loc(key)

1494 except LookupError:

1495 if isinstance(key, tuple) and isinstance(labels, MultiIndex):

1496 if len(key) == labels.nlevels:

1497 return {"key": key}

1498 raise

1499 except InvalidIndexError:

1500 # GH35015, using datetime as column indices raises exception

1501 if not isinstance(labels, MultiIndex):

1502 raise

1503 except ValueError:

1504 if not is_integer(key):

1505 raise

1506 return {"key": key}

1507

1508 if is_nested_tuple(key, labels):

1509 if self.ndim == 1 and any(isinstance(k, tuple) for k in key):

1510 # GH#35349 Raise if tuple in tuple for series

1511 raise IndexingError("Too many indexers")

1512 return labels.get_locs(key)

1513

1514 elif is_list_like_indexer(key):

1515 if is_iterator(key):

1516 key = list(key)

1517

1518 if com.is_bool_indexer(key):

1519 key = check_bool_indexer(labels, key)

1520 return key

1521 else:

1522 return self._get_listlike_indexer(key, axis)[1]

1523 else:

1524 try:

1525 return labels.get_loc(key)

1526 except LookupError:

1527 # allow a not found key only if we are a setter

1528 if not is_list_like_indexer(key):

1529 return {"key": key}

1530 raise

1531

1532 def _get_listlike_indexer(self, key, axis: AxisInt):

1533 """

1534 Transform a list-like of keys into a new index and an indexer.

1535

1536 Parameters

1537 ----------

1538 key : list-like

1539 Targeted labels.

1540 axis: int

1541 Dimension on which the indexing is being made.

1542

1543 Raises

1544 ------

1545 KeyError

1546 If at least one key was requested but none was found.

1547

1548 Returns

1549 -------

1550 keyarr: Index

1551 New index (coinciding with 'key' if the axis is unique).

1552 values : array-like

1553 Indexer for the return object, -1 denotes keys not found.

1554 """

1555 ax = self.obj._get_axis(axis)

1556 axis_name = self.obj._get_axis_name(axis)

1557

1558 keyarr, indexer = ax._get_indexer_strict(key, axis_name)

1559

1560 return keyarr, indexer

1561

1562

1563@doc(IndexingMixin.iloc)

1564class _iLocIndexer(_LocationIndexer):

1565 _valid_types = (

1566 "integer, integer slice (START point is INCLUDED, END "

1567 "point is EXCLUDED), listlike of integers, boolean array"

1568 )

1569 _takeable = True

1570

1571 # -------------------------------------------------------------------

1572 # Key Checks

1573

1574 def _validate_key(self, key, axis: AxisInt):

1575 if com.is_bool_indexer(key):

1576 if hasattr(key, "index") and isinstance(key.index, Index):

1577 if key.index.inferred_type == "integer":

1578 raise NotImplementedError(

1579 "iLocation based boolean "

1580 "indexing on an integer type "

1581 "is not available"

1582 )

1583 raise ValueError(

1584 "iLocation based boolean indexing cannot use "

1585 "an indexable as a mask"

1586 )

1587 return

1588

1589 if isinstance(key, slice):

1590 return

1591 elif is_integer(key):

1592 self._validate_integer(key, axis)

1593 elif isinstance(key, tuple):

1594 # a tuple should already have been caught by this point

1595 # so don't treat a tuple as a valid indexer

1596 raise IndexingError("Too many indexers")

1597 elif is_list_like_indexer(key):

1598 if isinstance(key, ABCSeries):

1599 arr = key._values

1600 elif is_array_like(key):

1601 arr = key

1602 else:

1603 arr = np.array(key)

1604 len_axis = len(self.obj._get_axis(axis))

1605

1606 # check that the key has a numeric dtype

1607 if not is_numeric_dtype(arr.dtype):

1608 raise IndexError(f".iloc requires numeric indexers, got {arr}")

1609

1610 # check that the key does not exceed the maximum size of the index

1611 if len(arr) and (arr.max() >= len_axis or arr.min() < -len_axis):

1612 raise IndexError("positional indexers are out-of-bounds")

1613 else:

1614 raise ValueError(f"Can only index by location with a [{self._valid_types}]")

1615

1616 def _has_valid_setitem_indexer(self, indexer) -> bool:

1617 """

1618 Validate that a positional indexer cannot enlarge its target

1619 will raise if needed, does not modify the indexer externally.

1620

1621 Returns

1622 -------

1623 bool

1624 """

1625 if isinstance(indexer, dict):

1626 raise IndexError("iloc cannot enlarge its target object")

1627

1628 if isinstance(indexer, ABCDataFrame):

1629 raise TypeError(

1630 "DataFrame indexer for .iloc is not supported. "

1631 "Consider using .loc with a DataFrame indexer for automatic alignment.",

1632 )

1633

1634 if not isinstance(indexer, tuple):

1635 indexer = _tuplify(self.ndim, indexer)

1636

1637 for ax, i in zip(self.obj.axes, indexer):

1638 if isinstance(i, slice):

1639 # should check the stop slice?

1640 pass

1641 elif is_list_like_indexer(i):

1642 # should check the elements?

1643 pass

1644 elif is_integer(i):

1645 if i >= len(ax):

1646 raise IndexError("iloc cannot enlarge its target object")

1647 elif isinstance(i, dict):

1648 raise IndexError("iloc cannot enlarge its target object")

1649

1650 return True

1651

1652 def _is_scalar_access(self, key: tuple) -> bool:

1653 """

1654 Returns

1655 -------

1656 bool

1657 """

1658 # this is a shortcut accessor to both .loc and .iloc

1659 # that provide the equivalent access of .at and .iat

1660 # a) avoid getting things via sections and (to minimize dtype changes)

1661 # b) provide a performant path

1662 if len(key) != self.ndim:

1663 return False

1664

1665 return all(is_integer(k) for k in key)

1666

1667 def _validate_integer(self, key: int | np.integer, axis: AxisInt) -> None:

1668 """

1669 Check that 'key' is a valid position in the desired axis.

1670

1671 Parameters

1672 ----------

1673 key : int

1674 Requested position.

1675 axis : int

1676 Desired axis.

1677

1678 Raises

1679 ------

1680 IndexError

1681 If 'key' is not a valid position in axis 'axis'.

1682 """

1683 len_axis = len(self.obj._get_axis(axis))

1684 if key >= len_axis or key < -len_axis:

1685 raise IndexError("single positional indexer is out-of-bounds")

1686

1687 # -------------------------------------------------------------------

1688

1689 def _getitem_tuple(self, tup: tuple):

1690 tup = self._validate_tuple_indexer(tup)

1691 with suppress(IndexingError):

1692 return self._getitem_lowerdim(tup)

1693

1694 return self._getitem_tuple_same_dim(tup)

1695

1696 def _get_list_axis(self, key, axis: AxisInt):

1697 """

1698 Return Series values by list or array of integers.

1699

1700 Parameters

1701 ----------

1702 key : list-like positional indexer

1703 axis : int

1704

1705 Returns

1706 -------

1707 Series object

1708

1709 Notes

1710 -----

1711 `axis` can only be zero.

1712 """

1713 try:

1714 return self.obj._take_with_is_copy(key, axis=axis)

1715 except IndexError as err:

1716 # re-raise with different error message, e.g. test_getitem_ndarray_3d

1717 raise IndexError("positional indexers are out-of-bounds") from err

1718

1719 def _getitem_axis(self, key, axis: AxisInt):

1720 if key is Ellipsis:

1721 key = slice(None)

1722 elif isinstance(key, ABCDataFrame):

1723 raise IndexError(

1724 "DataFrame indexer is not allowed for .iloc\n"

1725 "Consider using .loc for automatic alignment."

1726 )

1727

1728 if isinstance(key, slice):

1729 return self._get_slice_axis(key, axis=axis)

1730

1731 if is_iterator(key):

1732 key = list(key)

1733

1734 if isinstance(key, list):

1735 key = np.asarray(key)

1736

1737 if com.is_bool_indexer(key):

1738 self._validate_key(key, axis)

1739 return self._getbool_axis(key, axis=axis)

1740

1741 # a list of integers

1742 elif is_list_like_indexer(key):

1743 return self._get_list_axis(key, axis=axis)

1744

1745 # a single integer

1746 else:

1747 key = item_from_zerodim(key)

1748 if not is_integer(key):

1749 raise TypeError("Cannot index by location index with a non-integer key")

1750

1751 # validate the location

1752 self._validate_integer(key, axis)

1753

1754 return self.obj._ixs(key, axis=axis)

1755

1756 def _get_slice_axis(self, slice_obj: slice, axis: AxisInt):

1757 # caller is responsible for ensuring non-None axis

1758 obj = self.obj

1759

1760 if not need_slice(slice_obj):

1761 return obj.copy(deep=False)

1762

1763 labels = obj._get_axis(axis)

1764 labels._validate_positional_slice(slice_obj)

1765 return self.obj._slice(slice_obj, axis=axis)

1766

1767 def _convert_to_indexer(self, key, axis: AxisInt):

1768 """

1769 Much simpler as we only have to deal with our valid types.

1770 """

1771 return key

1772

1773 def _get_setitem_indexer(self, key):

1774 # GH#32257 Fall through to let numpy do validation

1775 if is_iterator(key):

1776 key = list(key)

1777

1778 if self.axis is not None:

1779 key = _tupleize_axis_indexer(self.ndim, self.axis, key)

1780

1781 return key

1782

1783 # -------------------------------------------------------------------

1784

1785 def _setitem_with_indexer(self, indexer, value, name: str = "iloc"):

1786 """

1787 _setitem_with_indexer is for setting values on a Series/DataFrame

1788 using positional indexers.

1789

1790 If the relevant keys are not present, the Series/DataFrame may be

1791 expanded.

1792

1793 This method is currently broken when dealing with non-unique Indexes,

1794 since it goes from positional indexers back to labels when calling

1795 BlockManager methods, see GH#12991, GH#22046, GH#15686.

1796 """

1797 info_axis = self.obj._info_axis_number

1798

1799 # maybe partial set

1800 take_split_path = not self.obj._mgr.is_single_block

1801

1802 if not take_split_path and isinstance(value, ABCDataFrame):

1803 # Avoid cast of values

1804 take_split_path = not value._mgr.is_single_block

1805

1806 # if there is only one block/type, still have to take split path

1807 # unless the block is one-dimensional or it can hold the value

1808 if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:

1809 # in case of dict, keys are indices

1810 val = list(value.values()) if isinstance(value, dict) else value

1811 arr = self.obj._mgr.arrays[0]

1812 take_split_path = not can_hold_element(

1813 arr, extract_array(val, extract_numpy=True)

1814 )

1815

1816 # if we have any multi-indexes that have non-trivial slices

1817 # (not null slices) then we must take the split path, xref

1818 # GH 10360, GH 27841

1819 if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):

1820 for i, ax in zip(indexer, self.obj.axes):

1821 if isinstance(ax, MultiIndex) and not (

1822 is_integer(i) or com.is_null_slice(i)

1823 ):

1824 take_split_path = True

1825 break

1826

1827 if isinstance(indexer, tuple):

1828 nindexer = []

1829 for i, idx in enumerate(indexer):

1830 if isinstance(idx, dict):

1831 # reindex the axis to the new value

1832 # and set inplace

1833 key, _ = convert_missing_indexer(idx)

1834

1835 # if this is the items axes, then take the main missing

1836 # path first

1837 # this correctly sets the dtype and avoids cache issues

1838 # essentially this separates out the block that is needed

1839 # to possibly be modified

1840 if self.ndim > 1 and i == info_axis:

1841 # add the new item, and set the value

1842 # must have all defined axes if we have a scalar

1843 # or a list-like on the non-info axes if we have a

1844 # list-like

1845 if not len(self.obj):

1846 if not is_list_like_indexer(value):

1847 raise ValueError(

1848 "cannot set a frame with no "

1849 "defined index and a scalar"

1850 )

1851 self.obj[key] = value

1852 return

1853

1854 # add a new item with the dtype setup

1855 if com.is_null_slice(indexer[0]):

1856 # We are setting an entire column

1857 self.obj[key] = value

1858 return

1859 elif is_array_like(value):

1860 # GH#42099

1861 arr = extract_array(value, extract_numpy=True)

1862 taker = -1 * np.ones(len(self.obj), dtype=np.intp)

1863 empty_value = algos.take_nd(arr, taker)

1864 if not isinstance(value, ABCSeries):

1865 # if not Series (in which case we need to align),

1866 # we can short-circuit

1867 if (

1868 isinstance(arr, np.ndarray)

1869 and arr.ndim == 1

1870 and len(arr) == 1

1871 ):

1872 # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615

1873 arr = arr[0, ...]

1874 empty_value[indexer[0]] = arr

1875 self.obj[key] = empty_value

1876 return

1877

1878 self.obj[key] = empty_value

1879 elif not is_list_like(value):

1880 self.obj[key] = construct_1d_array_from_inferred_fill_value(

1881 value, len(self.obj)

1882 )

1883 else:

1884 # FIXME: GH#42099#issuecomment-864326014

1885 self.obj[key] = infer_fill_value(value)

1886

1887 new_indexer = convert_from_missing_indexer_tuple(

1888 indexer, self.obj.axes

1889 )

1890 self._setitem_with_indexer(new_indexer, value, name)

1891

1892 return

1893

1894 # reindex the axis

1895 # make sure to clear the cache because we are

1896 # just replacing the block manager here

1897 # so the object is the same

1898 index = self.obj._get_axis(i)

1899 with warnings.catch_warnings():

1900 # TODO: re-issue this with setitem-specific message?

1901 warnings.filterwarnings(

1902 "ignore",

1903 "The behavior of Index.insert with object-dtype "

1904 "is deprecated",

1905 category=FutureWarning,

1906 )

1907 labels = index.insert(len(index), key)

1908

1909 # We are expanding the Series/DataFrame values to match

1910 # the length of thenew index `labels`. GH#40096 ensure

1911 # this is valid even if the index has duplicates.

1912 taker = np.arange(len(index) + 1, dtype=np.intp)

1913 taker[-1] = -1

1914 reindexers = {i: (labels, taker)}

1915 new_obj = self.obj._reindex_with_indexers(

1916 reindexers, allow_dups=True

1917 )

1918 self.obj._mgr = new_obj._mgr

1919 self.obj._maybe_update_cacher(clear=True)

1920 self.obj._is_copy = None

1921

1922 nindexer.append(labels.get_loc(key))

1923

1924 else:

1925 nindexer.append(idx)

1926

1927 indexer = tuple(nindexer)

1928 else:

1929 indexer, missing = convert_missing_indexer(indexer)

1930

1931 if missing:

1932 self._setitem_with_indexer_missing(indexer, value)

1933 return

1934

1935 if name == "loc":

1936 # must come after setting of missing

1937 indexer, value = self._maybe_mask_setitem_value(indexer, value)

1938

1939 # align and set the values

1940 if take_split_path:

1941 # We have to operate column-wise

1942 self._setitem_with_indexer_split_path(indexer, value, name)

1943 else:

1944 self._setitem_single_block(indexer, value, name)

1945

1946 def _setitem_with_indexer_split_path(self, indexer, value, name: str):

1947 """

1948 Setitem column-wise.

1949 """

1950 # Above we only set take_split_path to True for 2D cases

1951 assert self.ndim == 2

1952

1953 if not isinstance(indexer, tuple):

1954 indexer = _tuplify(self.ndim, indexer)

1955 if len(indexer) > self.ndim:

1956 raise IndexError("too many indices for array")

1957 if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2:

1958 raise ValueError(r"Cannot set values with ndim > 2")

1959

1960 if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):

1961 from pandas import Series

1962

1963 value = self._align_series(indexer, Series(value))

1964

1965 # Ensure we have something we can iterate over

1966 info_axis = indexer[1]

1967 ilocs = self._ensure_iterable_column_indexer(info_axis)

1968

1969 pi = indexer[0]

1970 lplane_indexer = length_of_indexer(pi, self.obj.index)

1971 # lplane_indexer gives the expected length of obj[indexer[0]]

1972

1973 # we need an iterable, with a ndim of at least 1

1974 # eg. don't pass through np.array(0)

1975 if is_list_like_indexer(value) and getattr(value, "ndim", 1) > 0:

1976 if isinstance(value, ABCDataFrame):

1977 self._setitem_with_indexer_frame_value(indexer, value, name)

1978

1979 elif np.ndim(value) == 2:

1980 # TODO: avoid np.ndim call in case it isn't an ndarray, since

1981 # that will construct an ndarray, which will be wasteful

1982 self._setitem_with_indexer_2d_value(indexer, value)

1983

1984 elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi):

1985 # We are setting multiple rows in a single column.

1986 self._setitem_single_column(ilocs[0], value, pi)

1987

1988 elif len(ilocs) == 1 and 0 != lplane_indexer != len(value):

1989 # We are trying to set N values into M entries of a single

1990 # column, which is invalid for N != M

1991 # Exclude zero-len for e.g. boolean masking that is all-false

1992

1993 if len(value) == 1 and not is_integer(info_axis):

1994 # This is a case like df.iloc[:3, [1]] = [0]

1995 # where we treat as df.iloc[:3, 1] = 0

1996 return self._setitem_with_indexer((pi, info_axis[0]), value[0])

1997

1998 raise ValueError(

1999 "Must have equal len keys and value "

2000 "when setting with an iterable"

2001 )

2002

2003 elif lplane_indexer == 0 and len(value) == len(self.obj.index):

2004 # We get here in one case via .loc with a all-False mask

2005 pass

2006

2007 elif self._is_scalar_access(indexer) and is_object_dtype(

2008 self.obj.dtypes._values[ilocs[0]]

2009 ):

2010 # We are setting nested data, only possible for object dtype data

2011 self._setitem_single_column(indexer[1], value, pi)

2012

2013 elif len(ilocs) == len(value):

2014 # We are setting multiple columns in a single row.

2015 for loc, v in zip(ilocs, value):

2016 self._setitem_single_column(loc, v, pi)

2017

2018 elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0:

2019 # This is a setitem-with-expansion, see

2020 # test_loc_setitem_empty_append_expands_rows_mixed_dtype

2021 # e.g. df = DataFrame(columns=["x", "y"])

2022 # df["x"] = df["x"].astype(np.int64)

2023 # df.loc[:, "x"] = [1, 2, 3]

2024 self._setitem_single_column(ilocs[0], value, pi)

2025

2026 else:

2027 raise ValueError(

2028 "Must have equal len keys and value "

2029 "when setting with an iterable"

2030 )

2031

2032 else:

2033 # scalar value

2034 for loc in ilocs:

2035 self._setitem_single_column(loc, value, pi)

2036

2037 def _setitem_with_indexer_2d_value(self, indexer, value):

2038 # We get here with np.ndim(value) == 2, excluding DataFrame,

2039 # which goes through _setitem_with_indexer_frame_value

2040 pi = indexer[0]

2041

2042 ilocs = self._ensure_iterable_column_indexer(indexer[1])

2043

2044 if not is_array_like(value):

2045 # cast lists to array

2046 value = np.array(value, dtype=object)

2047 if len(ilocs) != value.shape[1]:

2048 raise ValueError(

2049 "Must have equal len keys and value when setting with an ndarray"

2050 )

2051

2052 for i, loc in enumerate(ilocs):

2053 value_col = value[:, i]

2054 if is_object_dtype(value_col.dtype):

2055 # casting to list so that we do type inference in setitem_single_column

2056 value_col = value_col.tolist()

2057 self._setitem_single_column(loc, value_col, pi)

2058

2059 def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str):

2060 ilocs = self._ensure_iterable_column_indexer(indexer[1])

2061

2062 sub_indexer = list(indexer)

2063 pi = indexer[0]

2064

2065 multiindex_indexer = isinstance(self.obj.columns, MultiIndex)

2066

2067 unique_cols = value.columns.is_unique

2068

2069 # We do not want to align the value in case of iloc GH#37728

2070 if name == "iloc":

2071 for i, loc in enumerate(ilocs):

2072 val = value.iloc[:, i]

2073 self._setitem_single_column(loc, val, pi)

2074

2075 elif not unique_cols and value.columns.equals(self.obj.columns):

2076 # We assume we are already aligned, see

2077 # test_iloc_setitem_frame_duplicate_columns_multiple_blocks

2078 for loc in ilocs:

2079 item = self.obj.columns[loc]

2080 if item in value:

2081 sub_indexer[1] = item

2082 val = self._align_series(

2083 tuple(sub_indexer),

2084 value.iloc[:, loc],

2085 multiindex_indexer,

2086 )

2087 else:

2088 val = np.nan

2089

2090 self._setitem_single_column(loc, val, pi)

2091

2092 elif not unique_cols:

2093 raise ValueError("Setting with non-unique columns is not allowed.")

2094

2095 else:

2096 for loc in ilocs:

2097 item = self.obj.columns[loc]

2098 if item in value:

2099 sub_indexer[1] = item

2100 val = self._align_series(

2101 tuple(sub_indexer),

2102 value[item],

2103 multiindex_indexer,

2104 using_cow=using_copy_on_write(),

2105 )

2106 else:

2107 val = np.nan

2108

2109 self._setitem_single_column(loc, val, pi)

2110

2111 def _setitem_single_column(self, loc: int, value, plane_indexer) -> None:

2112 """

2113

2114 Parameters

2115 ----------

2116 loc : int

2117 Indexer for column position

2118 plane_indexer : int, slice, listlike[int]

2119 The indexer we use for setitem along axis=0.

2120 """

2121 pi = plane_indexer

2122

2123 is_full_setter = com.is_null_slice(pi) or com.is_full_slice(pi, len(self.obj))

2124

2125 is_null_setter = com.is_empty_slice(pi) or is_array_like(pi) and len(pi) == 0

2126

2127 if is_null_setter:

2128 # no-op, don't cast dtype later

2129 return

2130

2131 elif is_full_setter:

2132 try:

2133 self.obj._mgr.column_setitem(

2134 loc, plane_indexer, value, inplace_only=True

2135 )

2136 except (ValueError, TypeError, LossySetitemError):

2137 # If we're setting an entire column and we can't do it inplace,

2138 # then we can use value's dtype (or inferred dtype)

2139 # instead of object

2140 dtype = self.obj.dtypes.iloc[loc]

2141 if dtype not in (np.void, object) and not self.obj.empty:

2142 # - Exclude np.void, as that is a special case for expansion.

2143 # We want to warn for

2144 # df = pd.DataFrame({'a': [1, 2]})

2145 # df.loc[:, 'a'] = .3

2146 # but not for

2147 # df = pd.DataFrame({'a': [1, 2]})

2148 # df.loc[:, 'b'] = .3

2149 # - Exclude `object`, as then no upcasting happens.

2150 # - Exclude empty initial object with enlargement,

2151 # as then there's nothing to be inconsistent with.

2152 warnings.warn(

2153 f"Setting an item of incompatible dtype is deprecated "

2154 "and will raise in a future error of pandas. "

2155 f"Value '{value}' has dtype incompatible with {dtype}, "

2156 "please explicitly cast to a compatible dtype first.",

2157 FutureWarning,

2158 stacklevel=find_stack_level(),

2159 )

2160 self.obj.isetitem(loc, value)

2161 else:

2162 # set value into the column (first attempting to operate inplace, then

2163 # falling back to casting if necessary)

2164 dtype = self.obj.dtypes.iloc[loc]

2165 if dtype == np.void:

2166 # This means we're expanding, with multiple columns, e.g.

2167 # df = pd.DataFrame({'A': [1,2,3], 'B': [4,5,6]})

2168 # df.loc[df.index <= 2, ['F', 'G']] = (1, 'abc')

2169 # Columns F and G will initially be set to np.void.

2170 # Here, we replace those temporary `np.void` columns with

2171 # columns of the appropriate dtype, based on `value`.

2172 self.obj.iloc[:, loc] = construct_1d_array_from_inferred_fill_value(

2173 value, len(self.obj)

2174 )

2175 self.obj._mgr.column_setitem(loc, plane_indexer, value)

2176

2177 self.obj._clear_item_cache()

2178

2179 def _setitem_single_block(self, indexer, value, name: str) -> None:

2180 """

2181 _setitem_with_indexer for the case when we have a single Block.

2182 """

2183 from pandas import Series

2184

2185 if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict):

2186 # TODO(EA): ExtensionBlock.setitem this causes issues with

2187 # setting for extensionarrays that store dicts. Need to decide

2188 # if it's worth supporting that.

2189 value = self._align_series(indexer, Series(value))

2190

2191 info_axis = self.obj._info_axis_number

2192 item_labels = self.obj._get_axis(info_axis)

2193 if isinstance(indexer, tuple):

2194 # if we are setting on the info axis ONLY

2195 # set using those methods to avoid block-splitting

2196 # logic here

2197 if (

2198 self.ndim == len(indexer) == 2

2199 and is_integer(indexer[1])

2200 and com.is_null_slice(indexer[0])

2201 ):

2202 col = item_labels[indexer[info_axis]]

2203 if len(item_labels.get_indexer_for([col])) == 1:

2204 # e.g. test_loc_setitem_empty_append_expands_rows

2205 loc = item_labels.get_loc(col)

2206 self._setitem_single_column(loc, value, indexer[0])

2207 return

2208

2209 indexer = maybe_convert_ix(*indexer) # e.g. test_setitem_frame_align

2210

2211 if isinstance(value, ABCDataFrame) and name != "iloc":

2212 value = self._align_frame(indexer, value)._values

2213

2214 # check for chained assignment

2215 self.obj._check_is_chained_assignment_possible()

2216

2217 # actually do the set

2218 self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value)

2219 self.obj._maybe_update_cacher(clear=True, inplace=True)

2220

2221 def _setitem_with_indexer_missing(self, indexer, value):

2222 """

2223 Insert new row(s) or column(s) into the Series or DataFrame.

2224 """

2225 from pandas import Series

2226

2227 # reindex the axis to the new value

2228 # and set inplace

2229 if self.ndim == 1:

2230 index = self.obj.index

2231 with warnings.catch_warnings():

2232 # TODO: re-issue this with setitem-specific message?

2233 warnings.filterwarnings(

2234 "ignore",

2235 "The behavior of Index.insert with object-dtype is deprecated",

2236 category=FutureWarning,

2237 )

2238 new_index = index.insert(len(index), indexer)

2239

2240 # we have a coerced indexer, e.g. a float

2241 # that matches in an int64 Index, so

2242 # we will not create a duplicate index, rather

2243 # index to that element

2244 # e.g. 0.0 -> 0

2245 # GH#12246

2246 if index.is_unique:

2247 # pass new_index[-1:] instead if [new_index[-1]]

2248 # so that we retain dtype

2249 new_indexer = index.get_indexer(new_index[-1:])

2250 if (new_indexer != -1).any():

2251 # We get only here with loc, so can hard code

2252 return self._setitem_with_indexer(new_indexer, value, "loc")

2253

2254 # this preserves dtype of the value and of the object

2255 if not is_scalar(value):

2256 new_dtype = None

2257

2258 elif is_valid_na_for_dtype(value, self.obj.dtype):

2259 if not is_object_dtype(self.obj.dtype):

2260 # Every NA value is suitable for object, no conversion needed

2261 value = na_value_for_dtype(self.obj.dtype, compat=False)

2262

2263 new_dtype = maybe_promote(self.obj.dtype, value)[0]

2264

2265 elif isna(value):

2266 new_dtype = None

2267 elif not self.obj.empty and not is_object_dtype(self.obj.dtype):

2268 # We should not cast, if we have object dtype because we can

2269 # set timedeltas into object series

2270 curr_dtype = self.obj.dtype

2271 curr_dtype = getattr(curr_dtype, "numpy_dtype", curr_dtype)

2272 new_dtype = maybe_promote(curr_dtype, value)[0]

2273 else:

2274 new_dtype = None

2275

2276 new_values = Series([value], dtype=new_dtype)._values

2277

2278 if len(self.obj._values):

2279 # GH#22717 handle casting compatibility that np.concatenate

2280 # does incorrectly

2281 new_values = concat_compat([self.obj._values, new_values])

2282 self.obj._mgr = self.obj._constructor(

2283 new_values, index=new_index, name=self.obj.name

2284 )._mgr

2285 self.obj._maybe_update_cacher(clear=True)

2286

2287 elif self.ndim == 2:

2288 if not len(self.obj.columns):

2289 # no columns and scalar

2290 raise ValueError("cannot set a frame with no defined columns")

2291

2292 has_dtype = hasattr(value, "dtype")

2293 if isinstance(value, ABCSeries):

2294 # append a Series

2295 value = value.reindex(index=self.obj.columns, copy=True)

2296 value.name = indexer

2297 elif isinstance(value, dict):

2298 value = Series(

2299 value, index=self.obj.columns, name=indexer, dtype=object

2300 )

2301 else:

2302 # a list-list

2303 if is_list_like_indexer(value):

2304 # must have conforming columns

2305 if len(value) != len(self.obj.columns):

2306 raise ValueError("cannot set a row with mismatched columns")

2307

2308 value = Series(value, index=self.obj.columns, name=indexer)

2309

2310 if not len(self.obj):

2311 # We will ignore the existing dtypes instead of using

2312 # internals.concat logic

2313 df = value.to_frame().T

2314

2315 idx = self.obj.index

2316 if isinstance(idx, MultiIndex):

2317 name = idx.names

2318 else:

2319 name = idx.name

2320

2321 df.index = Index([indexer], name=name)

2322 if not has_dtype:

2323 # i.e. if we already had a Series or ndarray, keep that

2324 # dtype. But if we had a list or dict, then do inference

2325 df = df.infer_objects(copy=False)

2326 self.obj._mgr = df._mgr

2327 else:

2328 self.obj._mgr = self.obj._append(value)._mgr

2329 self.obj._maybe_update_cacher(clear=True)

2330

2331 def _ensure_iterable_column_indexer(self, column_indexer):

2332 """

2333 Ensure that our column indexer is something that can be iterated over.

2334 """

2335 ilocs: Sequence[int | np.integer] | np.ndarray

2336 if is_integer(column_indexer):

2337 ilocs = [column_indexer]

2338 elif isinstance(column_indexer, slice):

2339 ilocs = np.arange(len(self.obj.columns))[column_indexer]

2340 elif (

2341 isinstance(column_indexer, np.ndarray) and column_indexer.dtype.kind == "b"

2342 ):

2343 ilocs = np.arange(len(column_indexer))[column_indexer]

2344 else:

2345 ilocs = column_indexer

2346 return ilocs

2347

2348 def _align_series(

2349 self,

2350 indexer,

2351 ser: Series,

2352 multiindex_indexer: bool = False,

2353 using_cow: bool = False,

2354 ):

2355 """

2356 Parameters

2357 ----------

2358 indexer : tuple, slice, scalar

2359 Indexer used to get the locations that will be set to `ser`.

2360 ser : pd.Series

2361 Values to assign to the locations specified by `indexer`.

2362 multiindex_indexer : bool, optional

2363 Defaults to False. Should be set to True if `indexer` was from

2364 a `pd.MultiIndex`, to avoid unnecessary broadcasting.

2365

2366 Returns

2367 -------

2368 `np.array` of `ser` broadcast to the appropriate shape for assignment

2369 to the locations selected by `indexer`

2370 """

2371 if isinstance(indexer, (slice, np.ndarray, list, Index)):

2372 indexer = (indexer,)

2373

2374 if isinstance(indexer, tuple):

2375 # flatten np.ndarray indexers

2376 def ravel(i):

2377 return i.ravel() if isinstance(i, np.ndarray) else i

2378

2379 indexer = tuple(map(ravel, indexer))

2380

2381 aligners = [not com.is_null_slice(idx) for idx in indexer]

2382 sum_aligners = sum(aligners)

2383 single_aligner = sum_aligners == 1

2384 is_frame = self.ndim == 2

2385 obj = self.obj

2386

2387 # are we a single alignable value on a non-primary

2388 # dim (e.g. panel: 1,2, or frame: 0) ?

2389 # hence need to align to a single axis dimension

2390 # rather that find all valid dims

2391

2392 # frame

2393 if is_frame:

2394 single_aligner = single_aligner and aligners[0]

2395

2396 # we have a frame, with multiple indexers on both axes; and a

2397 # series, so need to broadcast (see GH5206)

2398 if sum_aligners == self.ndim and all(is_sequence(_) for _ in indexer):

2399 ser_values = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values

2400

2401 # single indexer

2402 if len(indexer) > 1 and not multiindex_indexer:

2403 len_indexer = len(indexer[1])

2404 ser_values = (

2405 np.tile(ser_values, len_indexer).reshape(len_indexer, -1).T

2406 )

2407

2408 return ser_values

2409

2410 for i, idx in enumerate(indexer):

2411 ax = obj.axes[i]

2412

2413 # multiple aligners (or null slices)

2414 if is_sequence(idx) or isinstance(idx, slice):

2415 if single_aligner and com.is_null_slice(idx):

2416 continue

2417 new_ix = ax[idx]

2418 if not is_list_like_indexer(new_ix):

2419 new_ix = Index([new_ix])

2420 else:

2421 new_ix = Index(new_ix)

2422 if ser.index.equals(new_ix):

2423 if using_cow:

2424 return ser

2425 return ser._values.copy()

2426

2427 return ser.reindex(new_ix)._values

2428

2429 # 2 dims

2430 elif single_aligner:

2431 # reindex along index

2432 ax = self.obj.axes[1]

2433 if ser.index.equals(ax) or not len(ax):

2434 return ser._values.copy()

2435 return ser.reindex(ax)._values

2436

2437 elif is_integer(indexer) and self.ndim == 1:

2438 if is_object_dtype(self.obj.dtype):

2439 return ser

2440 ax = self.obj._get_axis(0)

2441

2442 if ser.index.equals(ax):

2443 return ser._values.copy()

2444

2445 return ser.reindex(ax)._values[indexer]

2446

2447 elif is_integer(indexer):

2448 ax = self.obj._get_axis(1)

2449

2450 if ser.index.equals(ax):

2451 return ser._values.copy()

2452

2453 return ser.reindex(ax)._values

2454

2455 raise ValueError("Incompatible indexer with Series")

2456

2457 def _align_frame(self, indexer, df: DataFrame) -> DataFrame:

2458 is_frame = self.ndim == 2

2459

2460 if isinstance(indexer, tuple):

2461 idx, cols = None, None

2462 sindexers = []

2463 for i, ix in enumerate(indexer):

2464 ax = self.obj.axes[i]

2465 if is_sequence(ix) or isinstance(ix, slice):

2466 if isinstance(ix, np.ndarray):

2467 ix = ix.ravel()

2468 if idx is None:

2469 idx = ax[ix]

2470 elif cols is None:

2471 cols = ax[ix]

2472 else:

2473 break

2474 else:

2475 sindexers.append(i)

2476

2477 if idx is not None and cols is not None:

2478 if df.index.equals(idx) and df.columns.equals(cols):

2479 val = df.copy()

2480 else:

2481 val = df.reindex(idx, columns=cols)

2482 return val

2483

2484 elif (isinstance(indexer, slice) or is_list_like_indexer(indexer)) and is_frame:

2485 ax = self.obj.index[indexer]

2486 if df.index.equals(ax):

2487 val = df.copy()

2488 else:

2489 # we have a multi-index and are trying to align

2490 # with a particular, level GH3738

2491 if (

2492 isinstance(ax, MultiIndex)

2493 and isinstance(df.index, MultiIndex)

2494 and ax.nlevels != df.index.nlevels

2495 ):

2496 raise TypeError(

2497 "cannot align on a multi-index with out "

2498 "specifying the join levels"

2499 )

2500

2501 val = df.reindex(index=ax)

2502 return val

2503

2504 raise ValueError("Incompatible indexer with DataFrame")

2505

2506

2507class _ScalarAccessIndexer(NDFrameIndexerBase):

2508 """

2509 Access scalars quickly.

2510 """

2511

2512 # sub-classes need to set _takeable

2513 _takeable: bool

2514

2515 def _convert_key(self, key):

2516 raise AbstractMethodError(self)

2517

2518 def __getitem__(self, key):

2519 if not isinstance(key, tuple):

2520 # we could have a convertible item here (e.g. Timestamp)

2521 if not is_list_like_indexer(key):

2522 key = (key,)

2523 else:

2524 raise ValueError("Invalid call for scalar access (getting)!")

2525

2526 key = self._convert_key(key)

2527 return self.obj._get_value(*key, takeable=self._takeable)

2528

2529 def __setitem__(self, key, value) -> None:

2530 if isinstance(key, tuple):

2531 key = tuple(com.apply_if_callable(x, self.obj) for x in key)

2532 else:

2533 # scalar callable may return tuple

2534 key = com.apply_if_callable(key, self.obj)

2535

2536 if not isinstance(key, tuple):

2537 key = _tuplify(self.ndim, key)

2538 key = list(self._convert_key(key))

2539 if len(key) != self.ndim:

2540 raise ValueError("Not enough indexers for scalar access (setting)!")

2541

2542 self.obj._set_value(*key, value=value, takeable=self._takeable)

2543

2544

2545@doc(IndexingMixin.at)

2546class _AtIndexer(_ScalarAccessIndexer):

2547 _takeable = False

2548

2549 def _convert_key(self, key):

2550 """

2551 Require they keys to be the same type as the index. (so we don't

2552 fallback)

2553 """

2554 # GH 26989

2555 # For series, unpacking key needs to result in the label.

2556 # This is already the case for len(key) == 1; e.g. (1,)

2557 if self.ndim == 1 and len(key) > 1:

2558 key = (key,)

2559

2560 return key

2561

2562 @property

2563 def _axes_are_unique(self) -> bool:

2564 # Only relevant for self.ndim == 2

2565 assert self.ndim == 2

2566 return self.obj.index.is_unique and self.obj.columns.is_unique

2567

2568 def __getitem__(self, key):

2569 if self.ndim == 2 and not self._axes_are_unique:

2570 # GH#33041 fall back to .loc

2571 if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):

2572 raise ValueError("Invalid call for scalar access (getting)!")

2573 return self.obj.loc[key]

2574

2575 return super().__getitem__(key)

2576

2577 def __setitem__(self, key, value) -> None:

2578 if self.ndim == 2 and not self._axes_are_unique:

2579 # GH#33041 fall back to .loc

2580 if not isinstance(key, tuple) or not all(is_scalar(x) for x in key):

2581 raise ValueError("Invalid call for scalar access (setting)!")

2582

2583 self.obj.loc[key] = value

2584 return

2585

2586 return super().__setitem__(key, value)

2587

2588

2589@doc(IndexingMixin.iat)

2590class _iAtIndexer(_ScalarAccessIndexer):

2591 _takeable = True

2592

2593 def _convert_key(self, key):

2594 """

2595 Require integer args. (and convert to label arguments)

2596 """

2597 for i in key:

2598 if not is_integer(i):

2599 raise ValueError("iAt based indexing can only have integer indexers")

2600 return key

2601

2602

2603def _tuplify(ndim: int, loc: Hashable) -> tuple[Hashable | slice, ...]:

2604 """

2605 Given an indexer for the first dimension, create an equivalent tuple

2606 for indexing over all dimensions.

2607

2608 Parameters

2609 ----------

2610 ndim : int

2611 loc : object

2612

2613 Returns

2614 -------

2615 tuple

2616 """

2617 _tup: list[Hashable | slice]

2618 _tup = [slice(None, None) for _ in range(ndim)]

2619 _tup[0] = loc

2620 return tuple(_tup)

2621

2622

2623def _tupleize_axis_indexer(ndim: int, axis: AxisInt, key) -> tuple:

2624 """

2625 If we have an axis, adapt the given key to be axis-independent.

2626 """

2627 new_key = [slice(None)] * ndim

2628 new_key[axis] = key

2629 return tuple(new_key)

2630

2631

2632def check_bool_indexer(index: Index, key) -> np.ndarray:

2633 """

2634 Check if key is a valid boolean indexer for an object with such index and

2635 perform reindexing or conversion if needed.

2636

2637 This function assumes that is_bool_indexer(key) == True.

2638

2639 Parameters

2640 ----------

2641 index : Index

2642 Index of the object on which the indexing is done.

2643 key : list-like

2644 Boolean indexer to check.

2645

2646 Returns

2647 -------

2648 np.array

2649 Resulting key.

2650

2651 Raises

2652 ------

2653 IndexError

2654 If the key does not have the same length as index.

2655 IndexingError

2656 If the index of the key is unalignable to index.

2657 """

2658 result = key

2659 if isinstance(key, ABCSeries) and not key.index.equals(index):

2660 indexer = result.index.get_indexer_for(index)

2661 if -1 in indexer:

2662 raise IndexingError(

2663 "Unalignable boolean Series provided as "

2664 "indexer (index of the boolean Series and of "

2665 "the indexed object do not match)."

2666 )

2667

2668 result = result.take(indexer)

2669

2670 # fall through for boolean

2671 if not isinstance(result.dtype, ExtensionDtype):

2672 return result.astype(bool)._values

2673

2674 if is_object_dtype(key):

2675 # key might be object-dtype bool, check_array_indexer needs bool array

2676 result = np.asarray(result, dtype=bool)

2677 elif not is_array_like(result):

2678 # GH 33924

2679 # key may contain nan elements, check_array_indexer needs bool array

2680 result = pd_array(result, dtype=bool)

2681 return check_array_indexer(index, result)

2682

2683

2684def convert_missing_indexer(indexer):

2685 """

2686 Reverse convert a missing indexer, which is a dict

2687 return the scalar indexer and a boolean indicating if we converted

2688 """

2689 if isinstance(indexer, dict):

2690 # a missing key (but not a tuple indexer)

2691 indexer = indexer["key"]

2692

2693 if isinstance(indexer, bool):

2694 raise KeyError("cannot use a single bool to index into setitem")

2695 return indexer, True

2696

2697 return indexer, False

2698

2699

2700def convert_from_missing_indexer_tuple(indexer, axes):

2701 """

2702 Create a filtered indexer that doesn't have any missing indexers.

2703 """

2704

2705 def get_indexer(_i, _idx):

2706 return axes[_i].get_loc(_idx["key"]) if isinstance(_idx, dict) else _idx

2707

2708 return tuple(get_indexer(_i, _idx) for _i, _idx in enumerate(indexer))

2709

2710

2711def maybe_convert_ix(*args):

2712 """

2713 We likely want to take the cross-product.

2714 """

2715 for arg in args:

2716 if not isinstance(arg, (np.ndarray, list, ABCSeries, Index)):

2717 return args

2718 return np.ix_(*args)

2719

2720

2721def is_nested_tuple(tup, labels) -> bool:

2722 """

2723 Returns

2724 -------

2725 bool

2726 """

2727 # check for a compatible nested tuple and multiindexes among the axes

2728 if not isinstance(tup, tuple):

2729 return False

2730

2731 for k in tup:

2732 if is_list_like(k) or isinstance(k, slice):

2733 return isinstance(labels, MultiIndex)

2734

2735 return False

2736

2737

2738def is_label_like(key) -> bool:

2739 """

2740 Returns

2741 -------

2742 bool

2743 """

2744 # select a label or row

2745 return (

2746 not isinstance(key, slice)

2747 and not is_list_like_indexer(key)

2748 and key is not Ellipsis

2749 )

2750

2751

2752def need_slice(obj: slice) -> bool:

2753 """

2754 Returns

2755 -------

2756 bool

2757 """

2758 return (

2759 obj.start is not None

2760 or obj.stop is not None

2761 or (obj.step is not None and obj.step != 1)

2762 )

2763

2764

2765def check_dict_or_set_indexers(key) -> None:

2766 """

2767 Check if the indexer is or contains a dict or set, which is no longer allowed.

2768 """

2769 if (

2770 isinstance(key, set)

2771 or isinstance(key, tuple)

2772 and any(isinstance(x, set) for x in key)

2773 ):

2774 raise TypeError(

2775 "Passing a set as an indexer is not supported. Use a list instead."

2776 )

2777

2778 if (

2779 isinstance(key, dict)

2780 or isinstance(key, tuple)

2781 and any(isinstance(x, dict) for x in key)

2782 ):

2783 raise TypeError(

2784 "Passing a dict as an indexer is not supported. Use a list instead."

2785 )