Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexers/utils.py: 26%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

148 statements  

1""" 

2Low-dependency indexing utilities. 

3""" 

4from __future__ import annotations 

5 

6from typing import ( 

7 TYPE_CHECKING, 

8 Any, 

9) 

10 

11import numpy as np 

12 

13from pandas._libs import lib 

14 

15from pandas.core.dtypes.common import ( 

16 is_array_like, 

17 is_bool_dtype, 

18 is_integer, 

19 is_integer_dtype, 

20 is_list_like, 

21) 

22from pandas.core.dtypes.dtypes import ExtensionDtype 

23from pandas.core.dtypes.generic import ( 

24 ABCIndex, 

25 ABCSeries, 

26) 

27 

28if TYPE_CHECKING: 

29 from pandas._typing import AnyArrayLike 

30 

31 from pandas.core.frame import DataFrame 

32 from pandas.core.indexes.base import Index 

33 

34# ----------------------------------------------------------- 

35# Indexer Identification 

36 

37 

38def is_valid_positional_slice(slc: slice) -> bool: 

39 """ 

40 Check if a slice object can be interpreted as a positional indexer. 

41 

42 Parameters 

43 ---------- 

44 slc : slice 

45 

46 Returns 

47 ------- 

48 bool 

49 

50 Notes 

51 ----- 

52 A valid positional slice may also be interpreted as a label-based slice 

53 depending on the index being sliced. 

54 """ 

55 return ( 

56 lib.is_int_or_none(slc.start) 

57 and lib.is_int_or_none(slc.stop) 

58 and lib.is_int_or_none(slc.step) 

59 ) 

60 

61 

62def is_list_like_indexer(key) -> bool: 

63 """ 

64 Check if we have a list-like indexer that is *not* a NamedTuple. 

65 

66 Parameters 

67 ---------- 

68 key : object 

69 

70 Returns 

71 ------- 

72 bool 

73 """ 

74 # allow a list_like, but exclude NamedTuples which can be indexers 

75 return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) 

76 

77 

78def is_scalar_indexer(indexer, ndim: int) -> bool: 

79 """ 

80 Return True if we are all scalar indexers. 

81 

82 Parameters 

83 ---------- 

84 indexer : object 

85 ndim : int 

86 Number of dimensions in the object being indexed. 

87 

88 Returns 

89 ------- 

90 bool 

91 """ 

92 if ndim == 1 and is_integer(indexer): 

93 # GH37748: allow indexer to be an integer for Series 

94 return True 

95 if isinstance(indexer, tuple) and len(indexer) == ndim: 

96 return all(is_integer(x) for x in indexer) 

97 return False 

98 

99 

100def is_empty_indexer(indexer) -> bool: 

101 """ 

102 Check if we have an empty indexer. 

103 

104 Parameters 

105 ---------- 

106 indexer : object 

107 

108 Returns 

109 ------- 

110 bool 

111 """ 

112 if is_list_like(indexer) and not len(indexer): 

113 return True 

114 if not isinstance(indexer, tuple): 

115 indexer = (indexer,) 

116 return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) 

117 

118 

119# ----------------------------------------------------------- 

120# Indexer Validation 

121 

122 

123def check_setitem_lengths(indexer, value, values) -> bool: 

124 """ 

125 Validate that value and indexer are the same length. 

126 

127 An special-case is allowed for when the indexer is a boolean array 

128 and the number of true values equals the length of ``value``. In 

129 this case, no exception is raised. 

130 

131 Parameters 

132 ---------- 

133 indexer : sequence 

134 Key for the setitem. 

135 value : array-like 

136 Value for the setitem. 

137 values : array-like 

138 Values being set into. 

139 

140 Returns 

141 ------- 

142 bool 

143 Whether this is an empty listlike setting which is a no-op. 

144 

145 Raises 

146 ------ 

147 ValueError 

148 When the indexer is an ndarray or list and the lengths don't match. 

149 """ 

150 no_op = False 

151 

152 if isinstance(indexer, (np.ndarray, list)): 

153 # We can ignore other listlikes because they are either 

154 # a) not necessarily 1-D indexers, e.g. tuple 

155 # b) boolean indexers e.g. BoolArray 

156 if is_list_like(value): 

157 if len(indexer) != len(value) and values.ndim == 1: 

158 # boolean with truth values == len of the value is ok too 

159 if isinstance(indexer, list): 

160 indexer = np.array(indexer) 

161 if not ( 

162 isinstance(indexer, np.ndarray) 

163 and indexer.dtype == np.bool_ 

164 and indexer.sum() == len(value) 

165 ): 

166 raise ValueError( 

167 "cannot set using a list-like indexer " 

168 "with a different length than the value" 

169 ) 

170 if not len(indexer): 

171 no_op = True 

172 

173 elif isinstance(indexer, slice): 

174 if is_list_like(value): 

175 if len(value) != length_of_indexer(indexer, values) and values.ndim == 1: 

176 # In case of two dimensional value is used row-wise and broadcasted 

177 raise ValueError( 

178 "cannot set using a slice indexer with a " 

179 "different length than the value" 

180 ) 

181 if not len(value): 

182 no_op = True 

183 

184 return no_op 

185 

186 

187def validate_indices(indices: np.ndarray, n: int) -> None: 

188 """ 

189 Perform bounds-checking for an indexer. 

190 

191 -1 is allowed for indicating missing values. 

192 

193 Parameters 

194 ---------- 

195 indices : ndarray 

196 n : int 

197 Length of the array being indexed. 

198 

199 Raises 

200 ------ 

201 ValueError 

202 

203 Examples 

204 -------- 

205 >>> validate_indices(np.array([1, 2]), 3) # OK 

206 

207 >>> validate_indices(np.array([1, -2]), 3) 

208 Traceback (most recent call last): 

209 ... 

210 ValueError: negative dimensions are not allowed 

211 

212 >>> validate_indices(np.array([1, 2, 3]), 3) 

213 Traceback (most recent call last): 

214 ... 

215 IndexError: indices are out-of-bounds 

216 

217 >>> validate_indices(np.array([-1, -1]), 0) # OK 

218 

219 >>> validate_indices(np.array([0, 1]), 0) 

220 Traceback (most recent call last): 

221 ... 

222 IndexError: indices are out-of-bounds 

223 """ 

224 if len(indices): 

225 min_idx = indices.min() 

226 if min_idx < -1: 

227 msg = f"'indices' contains values less than allowed ({min_idx} < -1)" 

228 raise ValueError(msg) 

229 

230 max_idx = indices.max() 

231 if max_idx >= n: 

232 raise IndexError("indices are out-of-bounds") 

233 

234 

235# ----------------------------------------------------------- 

236# Indexer Conversion 

237 

238 

239def maybe_convert_indices(indices, n: int, verify: bool = True) -> np.ndarray: 

240 """ 

241 Attempt to convert indices into valid, positive indices. 

242 

243 If we have negative indices, translate to positive here. 

244 If we have indices that are out-of-bounds, raise an IndexError. 

245 

246 Parameters 

247 ---------- 

248 indices : array-like 

249 Array of indices that we are to convert. 

250 n : int 

251 Number of elements in the array that we are indexing. 

252 verify : bool, default True 

253 Check that all entries are between 0 and n - 1, inclusive. 

254 

255 Returns 

256 ------- 

257 array-like 

258 An array-like of positive indices that correspond to the ones 

259 that were passed in initially to this function. 

260 

261 Raises 

262 ------ 

263 IndexError 

264 One of the converted indices either exceeded the number of, 

265 elements (specified by `n`), or was still negative. 

266 """ 

267 if isinstance(indices, list): 

268 indices = np.array(indices) 

269 if len(indices) == 0: 

270 # If `indices` is empty, np.array will return a float, 

271 # and will cause indexing errors. 

272 return np.empty(0, dtype=np.intp) 

273 

274 mask = indices < 0 

275 if mask.any(): 

276 indices = indices.copy() 

277 indices[mask] += n 

278 

279 if verify: 

280 mask = (indices >= n) | (indices < 0) 

281 if mask.any(): 

282 raise IndexError("indices are out-of-bounds") 

283 return indices 

284 

285 

286# ----------------------------------------------------------- 

287# Unsorted 

288 

289 

290def length_of_indexer(indexer, target=None) -> int: 

291 """ 

292 Return the expected length of target[indexer] 

293 

294 Returns 

295 ------- 

296 int 

297 """ 

298 if target is not None and isinstance(indexer, slice): 

299 target_len = len(target) 

300 start = indexer.start 

301 stop = indexer.stop 

302 step = indexer.step 

303 if start is None: 

304 start = 0 

305 elif start < 0: 

306 start += target_len 

307 if stop is None or stop > target_len: 

308 stop = target_len 

309 elif stop < 0: 

310 stop += target_len 

311 if step is None: 

312 step = 1 

313 elif step < 0: 

314 start, stop = stop + 1, start + 1 

315 step = -step 

316 return (stop - start + step - 1) // step 

317 elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)): 

318 if isinstance(indexer, list): 

319 indexer = np.array(indexer) 

320 

321 if indexer.dtype == bool: 

322 # GH#25774 

323 return indexer.sum() 

324 return len(indexer) 

325 elif isinstance(indexer, range): 

326 return (indexer.stop - indexer.start) // indexer.step 

327 elif not is_list_like_indexer(indexer): 

328 return 1 

329 raise AssertionError("cannot find the length of the indexer") 

330 

331 

332def disallow_ndim_indexing(result) -> None: 

333 """ 

334 Helper function to disallow multi-dimensional indexing on 1D Series/Index. 

335 

336 GH#27125 indexer like idx[:, None] expands dim, but we cannot do that 

337 and keep an index, so we used to return ndarray, which was deprecated 

338 in GH#30588. 

339 """ 

340 if np.ndim(result) > 1: 

341 raise ValueError( 

342 "Multi-dimensional indexing (e.g. `obj[:, None]`) is no longer " 

343 "supported. Convert to a numpy array before indexing instead." 

344 ) 

345 

346 

347def unpack_1tuple(tup): 

348 """ 

349 If we have a length-1 tuple/list that contains a slice, unpack to just 

350 the slice. 

351 

352 Notes 

353 ----- 

354 The list case is deprecated. 

355 """ 

356 if len(tup) == 1 and isinstance(tup[0], slice): 

357 # if we don't have a MultiIndex, we may still be able to handle 

358 # a 1-tuple. see test_1tuple_without_multiindex 

359 

360 if isinstance(tup, list): 

361 # GH#31299 

362 raise ValueError( 

363 "Indexing with a single-item list containing a " 

364 "slice is not allowed. Pass a tuple instead.", 

365 ) 

366 

367 return tup[0] 

368 return tup 

369 

370 

371def check_key_length(columns: Index, key, value: DataFrame) -> None: 

372 """ 

373 Checks if a key used as indexer has the same length as the columns it is 

374 associated with. 

375 

376 Parameters 

377 ---------- 

378 columns : Index The columns of the DataFrame to index. 

379 key : A list-like of keys to index with. 

380 value : DataFrame The value to set for the keys. 

381 

382 Raises 

383 ------ 

384 ValueError: If the length of key is not equal to the number of columns in value 

385 or if the number of columns referenced by key is not equal to number 

386 of columns. 

387 """ 

388 if columns.is_unique: 

389 if len(value.columns) != len(key): 

390 raise ValueError("Columns must be same length as key") 

391 else: 

392 # Missing keys in columns are represented as -1 

393 if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns): 

394 raise ValueError("Columns must be same length as key") 

395 

396 

397def unpack_tuple_and_ellipses(item: tuple): 

398 """ 

399 Possibly unpack arr[..., n] to arr[n] 

400 """ 

401 if len(item) > 1: 

402 # Note: we are assuming this indexing is being done on a 1D arraylike 

403 if item[0] is Ellipsis: 

404 item = item[1:] 

405 elif item[-1] is Ellipsis: 

406 item = item[:-1] 

407 

408 if len(item) > 1: 

409 raise IndexError("too many indices for array.") 

410 

411 item = item[0] 

412 return item 

413 

414 

415# ----------------------------------------------------------- 

416# Public indexer validation 

417 

418 

419def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: 

420 """ 

421 Check if `indexer` is a valid array indexer for `array`. 

422 

423 For a boolean mask, `array` and `indexer` are checked to have the same 

424 length. The dtype is validated, and if it is an integer or boolean 

425 ExtensionArray, it is checked if there are missing values present, and 

426 it is converted to the appropriate numpy array. Other dtypes will raise 

427 an error. 

428 

429 Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed 

430 through as is. 

431 

432 Parameters 

433 ---------- 

434 array : array-like 

435 The array that is being indexed (only used for the length). 

436 indexer : array-like or list-like 

437 The array-like that's used to index. List-like input that is not yet 

438 a numpy array or an ExtensionArray is converted to one. Other input 

439 types are passed through as is. 

440 

441 Returns 

442 ------- 

443 numpy.ndarray 

444 The validated indexer as a numpy array that can be used to index. 

445 

446 Raises 

447 ------ 

448 IndexError 

449 When the lengths don't match. 

450 ValueError 

451 When `indexer` cannot be converted to a numpy ndarray to index 

452 (e.g. presence of missing values). 

453 

454 See Also 

455 -------- 

456 api.types.is_bool_dtype : Check if `key` is of boolean dtype. 

457 

458 Examples 

459 -------- 

460 When checking a boolean mask, a boolean ndarray is returned when the 

461 arguments are all valid. 

462 

463 >>> mask = pd.array([True, False]) 

464 >>> arr = pd.array([1, 2]) 

465 >>> pd.api.indexers.check_array_indexer(arr, mask) 

466 array([ True, False]) 

467 

468 An IndexError is raised when the lengths don't match. 

469 

470 >>> mask = pd.array([True, False, True]) 

471 >>> pd.api.indexers.check_array_indexer(arr, mask) 

472 Traceback (most recent call last): 

473 ... 

474 IndexError: Boolean index has wrong length: 3 instead of 2. 

475 

476 NA values in a boolean array are treated as False. 

477 

478 >>> mask = pd.array([True, pd.NA]) 

479 >>> pd.api.indexers.check_array_indexer(arr, mask) 

480 array([ True, False]) 

481 

482 A numpy boolean mask will get passed through (if the length is correct): 

483 

484 >>> mask = np.array([True, False]) 

485 >>> pd.api.indexers.check_array_indexer(arr, mask) 

486 array([ True, False]) 

487 

488 Similarly for integer indexers, an integer ndarray is returned when it is 

489 a valid indexer, otherwise an error is (for integer indexers, a matching 

490 length is not required): 

491 

492 >>> indexer = pd.array([0, 2], dtype="Int64") 

493 >>> arr = pd.array([1, 2, 3]) 

494 >>> pd.api.indexers.check_array_indexer(arr, indexer) 

495 array([0, 2]) 

496 

497 >>> indexer = pd.array([0, pd.NA], dtype="Int64") 

498 >>> pd.api.indexers.check_array_indexer(arr, indexer) 

499 Traceback (most recent call last): 

500 ... 

501 ValueError: Cannot index with an integer indexer containing NA values 

502 

503 For non-integer/boolean dtypes, an appropriate error is raised: 

504 

505 >>> indexer = np.array([0., 2.], dtype="float64") 

506 >>> pd.api.indexers.check_array_indexer(arr, indexer) 

507 Traceback (most recent call last): 

508 ... 

509 IndexError: arrays used as indices must be of integer or boolean type 

510 """ 

511 from pandas.core.construction import array as pd_array 

512 

513 # whatever is not an array-like is returned as-is (possible valid array 

514 # indexers that are not array-like: integer, slice, Ellipsis, None) 

515 # In this context, tuples are not considered as array-like, as they have 

516 # a specific meaning in indexing (multi-dimensional indexing) 

517 if is_list_like(indexer): 

518 if isinstance(indexer, tuple): 

519 return indexer 

520 else: 

521 return indexer 

522 

523 # convert list-likes to array 

524 if not is_array_like(indexer): 

525 indexer = pd_array(indexer) 

526 if len(indexer) == 0: 

527 # empty list is converted to float array by pd.array 

528 indexer = np.array([], dtype=np.intp) 

529 

530 dtype = indexer.dtype 

531 if is_bool_dtype(dtype): 

532 if isinstance(dtype, ExtensionDtype): 

533 indexer = indexer.to_numpy(dtype=bool, na_value=False) 

534 else: 

535 indexer = np.asarray(indexer, dtype=bool) 

536 

537 # GH26658 

538 if len(indexer) != len(array): 

539 raise IndexError( 

540 f"Boolean index has wrong length: " 

541 f"{len(indexer)} instead of {len(array)}" 

542 ) 

543 elif is_integer_dtype(dtype): 

544 try: 

545 indexer = np.asarray(indexer, dtype=np.intp) 

546 except ValueError as err: 

547 raise ValueError( 

548 "Cannot index with an integer indexer containing NA values" 

549 ) from err 

550 else: 

551 raise IndexError("arrays used as indices must be of integer or boolean type") 

552 

553 return indexer