Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexers/utils.py: 14%

1"""

2Low-dependency indexing utilities.

3"""

4from __future__ import annotations

6from typing import (

7 TYPE_CHECKING,

8 Any,

11import numpy as np

13from pandas._typing import AnyArrayLike

15from pandas.core.dtypes.common import (

16 is_array_like,

17 is_bool_dtype,

18 is_extension_array_dtype,

19 is_integer,

20 is_integer_dtype,

21 is_list_like,

22)

23from pandas.core.dtypes.generic import (

24 ABCIndex,

25 ABCSeries,

26)

28if TYPE_CHECKING:

29 from pandas.core.frame import DataFrame

30 from pandas.core.indexes.base import Index

32# -----------------------------------------------------------

33# Indexer Identification

36def is_valid_positional_slice(slc: slice) -> bool:

37 """

38 Check if a slice object can be interpreted as a positional indexer.

40 Parameters

41 ----------

42 slc : slice

44 Returns

45 -------

46 bool

48 Notes

49 -----

50 A valid positional slice may also be interpreted as a label-based slice

51 depending on the index being sliced.

52 """

54 def is_int_or_none(val):

55 return val is None or is_integer(val)

57 return (

58 is_int_or_none(slc.start)

59 and is_int_or_none(slc.stop)

60 and is_int_or_none(slc.step)

61 )

64def is_list_like_indexer(key) -> bool:

65 """

66 Check if we have a list-like indexer that is *not* a NamedTuple.

68 Parameters

69 ----------

70 key : object

72 Returns

73 -------

74 bool

75 """

76 # allow a list_like, but exclude NamedTuples which can be indexers

77 return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple)

80def is_scalar_indexer(indexer, ndim: int) -> bool:

81 """

82 Return True if we are all scalar indexers.

84 Parameters

85 ----------

86 indexer : object

87 ndim : int

88 Number of dimensions in the object being indexed.

90 Returns

91 -------

92 bool

93 """

94 if ndim == 1 and is_integer(indexer):

95 # GH37748: allow indexer to be an integer for Series

96 return True

97 if isinstance(indexer, tuple) and len(indexer) == ndim:

98 return all(is_integer(x) for x in indexer)

99 return False

100

101

102def is_empty_indexer(indexer) -> bool:

103 """

104 Check if we have an empty indexer.

105

106 Parameters

107 ----------

108 indexer : object

109

110 Returns

111 -------

112 bool

113 """

114 if is_list_like(indexer) and not len(indexer):

115 return True

116 if not isinstance(indexer, tuple):

117 indexer = (indexer,)

118 return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer)

119

120

121# -----------------------------------------------------------

122# Indexer Validation

123

124

125def check_setitem_lengths(indexer, value, values) -> bool:

126 """

127 Validate that value and indexer are the same length.

128

129 An special-case is allowed for when the indexer is a boolean array

130 and the number of true values equals the length of ``value``. In

131 this case, no exception is raised.

132

133 Parameters

134 ----------

135 indexer : sequence

136 Key for the setitem.

137 value : array-like

138 Value for the setitem.

139 values : array-like

140 Values being set into.

141

142 Returns

143 -------

144 bool

145 Whether this is an empty listlike setting which is a no-op.

146

147 Raises

148 ------

149 ValueError

150 When the indexer is an ndarray or list and the lengths don't match.

151 """

152 no_op = False

153

154 if isinstance(indexer, (np.ndarray, list)):

155 # We can ignore other listlikes because they are either

156 # a) not necessarily 1-D indexers, e.g. tuple

157 # b) boolean indexers e.g. BoolArray

158 if is_list_like(value):

159 if len(indexer) != len(value) and values.ndim == 1:

160 # boolean with truth values == len of the value is ok too

161 if isinstance(indexer, list):

162 indexer = np.array(indexer)

163 if not (

164 isinstance(indexer, np.ndarray)

165 and indexer.dtype == np.bool_

166 and indexer.sum() == len(value)

167 ):

168 raise ValueError(

169 "cannot set using a list-like indexer "

170 "with a different length than the value"

171 )

172 if not len(indexer):

173 no_op = True

174

175 elif isinstance(indexer, slice):

176 if is_list_like(value):

177 if len(value) != length_of_indexer(indexer, values) and values.ndim == 1:

178 # In case of two dimensional value is used row-wise and broadcasted

179 raise ValueError(

180 "cannot set using a slice indexer with a "

181 "different length than the value"

182 )

183 if not len(value):

184 no_op = True

185

186 return no_op

187

188

189def validate_indices(indices: np.ndarray, n: int) -> None:

190 """

191 Perform bounds-checking for an indexer.

192

193 -1 is allowed for indicating missing values.

194

195 Parameters

196 ----------

197 indices : ndarray

198 n : int

199 Length of the array being indexed.

200

201 Raises

202 ------

203 ValueError

204

205 Examples

206 --------

207 >>> validate_indices(np.array([1, 2]), 3) # OK

208

209 >>> validate_indices(np.array([1, -2]), 3)

210 Traceback (most recent call last):

211 ...

212 ValueError: negative dimensions are not allowed

213

214 >>> validate_indices(np.array([1, 2, 3]), 3)

215 Traceback (most recent call last):

216 ...

217 IndexError: indices are out-of-bounds

218

219 >>> validate_indices(np.array([-1, -1]), 0) # OK

220

221 >>> validate_indices(np.array([0, 1]), 0)

222 Traceback (most recent call last):

223 ...

224 IndexError: indices are out-of-bounds

225 """

226 if len(indices):

227 min_idx = indices.min()

228 if min_idx < -1:

229 msg = f"'indices' contains values less than allowed ({min_idx} < -1)"

230 raise ValueError(msg)

231

232 max_idx = indices.max()

233 if max_idx >= n:

234 raise IndexError("indices are out-of-bounds")

235

236

237# -----------------------------------------------------------

238# Indexer Conversion

239

240

241def maybe_convert_indices(indices, n: int, verify: bool = True) -> np.ndarray:

242 """

243 Attempt to convert indices into valid, positive indices.

244

245 If we have negative indices, translate to positive here.

246 If we have indices that are out-of-bounds, raise an IndexError.

247

248 Parameters

249 ----------

250 indices : array-like

251 Array of indices that we are to convert.

252 n : int

253 Number of elements in the array that we are indexing.

254 verify : bool, default True

255 Check that all entries are between 0 and n - 1, inclusive.

256

257 Returns

258 -------

259 array-like

260 An array-like of positive indices that correspond to the ones

261 that were passed in initially to this function.

262

263 Raises

264 ------

265 IndexError

266 One of the converted indices either exceeded the number of,

267 elements (specified by `n`), or was still negative.

268 """

269 if isinstance(indices, list):

270 indices = np.array(indices)

271 if len(indices) == 0:

272 # If `indices` is empty, np.array will return a float,

273 # and will cause indexing errors.

274 return np.empty(0, dtype=np.intp)

275

276 mask = indices < 0

277 if mask.any():

278 indices = indices.copy()

279 indices[mask] += n

280

281 if verify:

282 mask = (indices >= n) | (indices < 0)

283 if mask.any():

284 raise IndexError("indices are out-of-bounds")

285 return indices

286

287

288# -----------------------------------------------------------

289# Unsorted

290

291

292def length_of_indexer(indexer, target=None) -> int:

293 """

294 Return the expected length of target[indexer]

295

296 Returns

297 -------

298 int

299 """

300 if target is not None and isinstance(indexer, slice):

301 target_len = len(target)

302 start = indexer.start

303 stop = indexer.stop

304 step = indexer.step

305 if start is None:

306 start = 0

307 elif start < 0:

308 start += target_len

309 if stop is None or stop > target_len:

310 stop = target_len

311 elif stop < 0:

312 stop += target_len

313 if step is None:

314 step = 1

315 elif step < 0:

316 start, stop = stop + 1, start + 1

317 step = -step

318 return (stop - start + step - 1) // step

319 elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)):

320 if isinstance(indexer, list):

321 indexer = np.array(indexer)

322

323 if indexer.dtype == bool:

324 # GH#25774

325 return indexer.sum()

326 return len(indexer)

327 elif isinstance(indexer, range):

328 return (indexer.stop - indexer.start) // indexer.step

329 elif not is_list_like_indexer(indexer):

330 return 1

331 raise AssertionError("cannot find the length of the indexer")

332

333

334def disallow_ndim_indexing(result) -> None:

335 """

336 Helper function to disallow multi-dimensional indexing on 1D Series/Index.

337

338 GH#27125 indexer like idx[:, None] expands dim, but we cannot do that

339 and keep an index, so we used to return ndarray, which was deprecated

340 in GH#30588.

341 """

342 if np.ndim(result) > 1:

343 raise ValueError(

344 "Multi-dimensional indexing (e.g. `obj[:, None]`) is no longer "

345 "supported. Convert to a numpy array before indexing instead."

346 )

347

348

349def unpack_1tuple(tup):

350 """

351 If we have a length-1 tuple/list that contains a slice, unpack to just

352 the slice.

353

354 Notes

355 -----

356 The list case is deprecated.

357 """

358 if len(tup) == 1 and isinstance(tup[0], slice):

359 # if we don't have a MultiIndex, we may still be able to handle

360 # a 1-tuple. see test_1tuple_without_multiindex

361

362 if isinstance(tup, list):

363 # GH#31299

364 raise ValueError(

365 "Indexing with a single-item list containing a "

366 "slice is not allowed. Pass a tuple instead.",

367 )

368

369 return tup[0]

370 return tup

371

372

373def check_key_length(columns: Index, key, value: DataFrame) -> None:

374 """

375 Checks if a key used as indexer has the same length as the columns it is

376 associated with.

377

378 Parameters

379 ----------

380 columns : Index The columns of the DataFrame to index.

381 key : A list-like of keys to index with.

382 value : DataFrame The value to set for the keys.

383

384 Raises

385 ------

386 ValueError: If the length of key is not equal to the number of columns in value

387 or if the number of columns referenced by key is not equal to number

388 of columns.

389 """

390 if columns.is_unique:

391 if len(value.columns) != len(key):

392 raise ValueError("Columns must be same length as key")

393 else:

394 # Missing keys in columns are represented as -1

395 if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns):

396 raise ValueError("Columns must be same length as key")

397

398

399def unpack_tuple_and_ellipses(item: tuple):

400 """

401 Possibly unpack arr[..., n] to arr[n]

402 """

403 if len(item) > 1:

404 # Note: we are assuming this indexing is being done on a 1D arraylike

405 if item[0] is Ellipsis:

406 item = item[1:]

407 elif item[-1] is Ellipsis:

408 item = item[:-1]

409

410 if len(item) > 1:

411 raise IndexError("too many indices for array.")

412

413 item = item[0]

414 return item

415

416

417# -----------------------------------------------------------

418# Public indexer validation

419

420

421def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any:

422 """

423 Check if `indexer` is a valid array indexer for `array`.

424

425 For a boolean mask, `array` and `indexer` are checked to have the same

426 length. The dtype is validated, and if it is an integer or boolean

427 ExtensionArray, it is checked if there are missing values present, and

428 it is converted to the appropriate numpy array. Other dtypes will raise

429 an error.

430

431 Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed

432 through as is.

433

434 Parameters

435 ----------

436 array : array-like

437 The array that is being indexed (only used for the length).

438 indexer : array-like or list-like

439 The array-like that's used to index. List-like input that is not yet

440 a numpy array or an ExtensionArray is converted to one. Other input

441 types are passed through as is.

442

443 Returns

444 -------

445 numpy.ndarray

446 The validated indexer as a numpy array that can be used to index.

447

448 Raises

449 ------

450 IndexError

451 When the lengths don't match.

452 ValueError

453 When `indexer` cannot be converted to a numpy ndarray to index

454 (e.g. presence of missing values).

455

456 See Also

457 --------

458 api.types.is_bool_dtype : Check if `key` is of boolean dtype.

459

460 Examples

461 --------

462 When checking a boolean mask, a boolean ndarray is returned when the

463 arguments are all valid.

464

465 >>> mask = pd.array([True, False])

466 >>> arr = pd.array([1, 2])

467 >>> pd.api.indexers.check_array_indexer(arr, mask)

468 array([ True, False])

469

470 An IndexError is raised when the lengths don't match.

471

472 >>> mask = pd.array([True, False, True])

473 >>> pd.api.indexers.check_array_indexer(arr, mask)

474 Traceback (most recent call last):

475 ...

476 IndexError: Boolean index has wrong length: 3 instead of 2.

477

478 NA values in a boolean array are treated as False.

479

480 >>> mask = pd.array([True, pd.NA])

481 >>> pd.api.indexers.check_array_indexer(arr, mask)

482 array([ True, False])

483

484 A numpy boolean mask will get passed through (if the length is correct):

485

486 >>> mask = np.array([True, False])

487 >>> pd.api.indexers.check_array_indexer(arr, mask)

488 array([ True, False])

489

490 Similarly for integer indexers, an integer ndarray is returned when it is

491 a valid indexer, otherwise an error is (for integer indexers, a matching

492 length is not required):

493

494 >>> indexer = pd.array([0, 2], dtype="Int64")

495 >>> arr = pd.array([1, 2, 3])

496 >>> pd.api.indexers.check_array_indexer(arr, indexer)

497 array([0, 2])

498

499 >>> indexer = pd.array([0, pd.NA], dtype="Int64")

500 >>> pd.api.indexers.check_array_indexer(arr, indexer)

501 Traceback (most recent call last):

502 ...

503 ValueError: Cannot index with an integer indexer containing NA values

504

505 For non-integer/boolean dtypes, an appropriate error is raised:

506

507 >>> indexer = np.array([0., 2.], dtype="float64")

508 >>> pd.api.indexers.check_array_indexer(arr, indexer)

509 Traceback (most recent call last):

510 ...

511 IndexError: arrays used as indices must be of integer or boolean type

512 """

513 from pandas.core.construction import array as pd_array

514

515 # whatever is not an array-like is returned as-is (possible valid array

516 # indexers that are not array-like: integer, slice, Ellipsis, None)

517 # In this context, tuples are not considered as array-like, as they have

518 # a specific meaning in indexing (multi-dimensional indexing)

519 if is_list_like(indexer):

520 if isinstance(indexer, tuple):

521 return indexer

522 else:

523 return indexer

524

525 # convert list-likes to array

526 if not is_array_like(indexer):

527 indexer = pd_array(indexer)

528 if len(indexer) == 0:

529 # empty list is converted to float array by pd.array

530 indexer = np.array([], dtype=np.intp)

531

532 dtype = indexer.dtype

533 if is_bool_dtype(dtype):

534 if is_extension_array_dtype(dtype):

535 indexer = indexer.to_numpy(dtype=bool, na_value=False)

536 else:

537 indexer = np.asarray(indexer, dtype=bool)

538

539 # GH26658

540 if len(indexer) != len(array):

541 raise IndexError(

542 f"Boolean index has wrong length: "

543 f"{len(indexer)} instead of {len(array)}"

544 )

545 elif is_integer_dtype(dtype):

546 try:

547 indexer = np.asarray(indexer, dtype=np.intp)

548 except ValueError as err:

549 raise ValueError(

550 "Cannot index with an integer indexer containing NA values"

551 ) from err

552 else:

553 raise IndexError("arrays used as indices must be of integer or boolean type")

554

555 return indexer