Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexers/utils.py: 14%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

148 statements  

1""" 

2Low-dependency indexing utilities. 

3""" 

4from __future__ import annotations 

5 

6from typing import ( 

7 TYPE_CHECKING, 

8 Any, 

9) 

10 

11import numpy as np 

12 

13from pandas._typing import AnyArrayLike 

14 

15from pandas.core.dtypes.common import ( 

16 is_array_like, 

17 is_bool_dtype, 

18 is_extension_array_dtype, 

19 is_integer, 

20 is_integer_dtype, 

21 is_list_like, 

22) 

23from pandas.core.dtypes.generic import ( 

24 ABCIndex, 

25 ABCSeries, 

26) 

27 

28if TYPE_CHECKING: 

29 from pandas.core.frame import DataFrame 

30 from pandas.core.indexes.base import Index 

31 

32# ----------------------------------------------------------- 

33# Indexer Identification 

34 

35 

36def is_valid_positional_slice(slc: slice) -> bool: 

37 """ 

38 Check if a slice object can be interpreted as a positional indexer. 

39 

40 Parameters 

41 ---------- 

42 slc : slice 

43 

44 Returns 

45 ------- 

46 bool 

47 

48 Notes 

49 ----- 

50 A valid positional slice may also be interpreted as a label-based slice 

51 depending on the index being sliced. 

52 """ 

53 

54 def is_int_or_none(val): 

55 return val is None or is_integer(val) 

56 

57 return ( 

58 is_int_or_none(slc.start) 

59 and is_int_or_none(slc.stop) 

60 and is_int_or_none(slc.step) 

61 ) 

62 

63 

64def is_list_like_indexer(key) -> bool: 

65 """ 

66 Check if we have a list-like indexer that is *not* a NamedTuple. 

67 

68 Parameters 

69 ---------- 

70 key : object 

71 

72 Returns 

73 ------- 

74 bool 

75 """ 

76 # allow a list_like, but exclude NamedTuples which can be indexers 

77 return is_list_like(key) and not (isinstance(key, tuple) and type(key) is not tuple) 

78 

79 

80def is_scalar_indexer(indexer, ndim: int) -> bool: 

81 """ 

82 Return True if we are all scalar indexers. 

83 

84 Parameters 

85 ---------- 

86 indexer : object 

87 ndim : int 

88 Number of dimensions in the object being indexed. 

89 

90 Returns 

91 ------- 

92 bool 

93 """ 

94 if ndim == 1 and is_integer(indexer): 

95 # GH37748: allow indexer to be an integer for Series 

96 return True 

97 if isinstance(indexer, tuple) and len(indexer) == ndim: 

98 return all(is_integer(x) for x in indexer) 

99 return False 

100 

101 

102def is_empty_indexer(indexer) -> bool: 

103 """ 

104 Check if we have an empty indexer. 

105 

106 Parameters 

107 ---------- 

108 indexer : object 

109 

110 Returns 

111 ------- 

112 bool 

113 """ 

114 if is_list_like(indexer) and not len(indexer): 

115 return True 

116 if not isinstance(indexer, tuple): 

117 indexer = (indexer,) 

118 return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) 

119 

120 

121# ----------------------------------------------------------- 

122# Indexer Validation 

123 

124 

125def check_setitem_lengths(indexer, value, values) -> bool: 

126 """ 

127 Validate that value and indexer are the same length. 

128 

129 An special-case is allowed for when the indexer is a boolean array 

130 and the number of true values equals the length of ``value``. In 

131 this case, no exception is raised. 

132 

133 Parameters 

134 ---------- 

135 indexer : sequence 

136 Key for the setitem. 

137 value : array-like 

138 Value for the setitem. 

139 values : array-like 

140 Values being set into. 

141 

142 Returns 

143 ------- 

144 bool 

145 Whether this is an empty listlike setting which is a no-op. 

146 

147 Raises 

148 ------ 

149 ValueError 

150 When the indexer is an ndarray or list and the lengths don't match. 

151 """ 

152 no_op = False 

153 

154 if isinstance(indexer, (np.ndarray, list)): 

155 # We can ignore other listlikes because they are either 

156 # a) not necessarily 1-D indexers, e.g. tuple 

157 # b) boolean indexers e.g. BoolArray 

158 if is_list_like(value): 

159 if len(indexer) != len(value) and values.ndim == 1: 

160 # boolean with truth values == len of the value is ok too 

161 if isinstance(indexer, list): 

162 indexer = np.array(indexer) 

163 if not ( 

164 isinstance(indexer, np.ndarray) 

165 and indexer.dtype == np.bool_ 

166 and indexer.sum() == len(value) 

167 ): 

168 raise ValueError( 

169 "cannot set using a list-like indexer " 

170 "with a different length than the value" 

171 ) 

172 if not len(indexer): 

173 no_op = True 

174 

175 elif isinstance(indexer, slice): 

176 if is_list_like(value): 

177 if len(value) != length_of_indexer(indexer, values) and values.ndim == 1: 

178 # In case of two dimensional value is used row-wise and broadcasted 

179 raise ValueError( 

180 "cannot set using a slice indexer with a " 

181 "different length than the value" 

182 ) 

183 if not len(value): 

184 no_op = True 

185 

186 return no_op 

187 

188 

189def validate_indices(indices: np.ndarray, n: int) -> None: 

190 """ 

191 Perform bounds-checking for an indexer. 

192 

193 -1 is allowed for indicating missing values. 

194 

195 Parameters 

196 ---------- 

197 indices : ndarray 

198 n : int 

199 Length of the array being indexed. 

200 

201 Raises 

202 ------ 

203 ValueError 

204 

205 Examples 

206 -------- 

207 >>> validate_indices(np.array([1, 2]), 3) # OK 

208 

209 >>> validate_indices(np.array([1, -2]), 3) 

210 Traceback (most recent call last): 

211 ... 

212 ValueError: negative dimensions are not allowed 

213 

214 >>> validate_indices(np.array([1, 2, 3]), 3) 

215 Traceback (most recent call last): 

216 ... 

217 IndexError: indices are out-of-bounds 

218 

219 >>> validate_indices(np.array([-1, -1]), 0) # OK 

220 

221 >>> validate_indices(np.array([0, 1]), 0) 

222 Traceback (most recent call last): 

223 ... 

224 IndexError: indices are out-of-bounds 

225 """ 

226 if len(indices): 

227 min_idx = indices.min() 

228 if min_idx < -1: 

229 msg = f"'indices' contains values less than allowed ({min_idx} < -1)" 

230 raise ValueError(msg) 

231 

232 max_idx = indices.max() 

233 if max_idx >= n: 

234 raise IndexError("indices are out-of-bounds") 

235 

236 

237# ----------------------------------------------------------- 

238# Indexer Conversion 

239 

240 

241def maybe_convert_indices(indices, n: int, verify: bool = True) -> np.ndarray: 

242 """ 

243 Attempt to convert indices into valid, positive indices. 

244 

245 If we have negative indices, translate to positive here. 

246 If we have indices that are out-of-bounds, raise an IndexError. 

247 

248 Parameters 

249 ---------- 

250 indices : array-like 

251 Array of indices that we are to convert. 

252 n : int 

253 Number of elements in the array that we are indexing. 

254 verify : bool, default True 

255 Check that all entries are between 0 and n - 1, inclusive. 

256 

257 Returns 

258 ------- 

259 array-like 

260 An array-like of positive indices that correspond to the ones 

261 that were passed in initially to this function. 

262 

263 Raises 

264 ------ 

265 IndexError 

266 One of the converted indices either exceeded the number of, 

267 elements (specified by `n`), or was still negative. 

268 """ 

269 if isinstance(indices, list): 

270 indices = np.array(indices) 

271 if len(indices) == 0: 

272 # If `indices` is empty, np.array will return a float, 

273 # and will cause indexing errors. 

274 return np.empty(0, dtype=np.intp) 

275 

276 mask = indices < 0 

277 if mask.any(): 

278 indices = indices.copy() 

279 indices[mask] += n 

280 

281 if verify: 

282 mask = (indices >= n) | (indices < 0) 

283 if mask.any(): 

284 raise IndexError("indices are out-of-bounds") 

285 return indices 

286 

287 

288# ----------------------------------------------------------- 

289# Unsorted 

290 

291 

292def length_of_indexer(indexer, target=None) -> int: 

293 """ 

294 Return the expected length of target[indexer] 

295 

296 Returns 

297 ------- 

298 int 

299 """ 

300 if target is not None and isinstance(indexer, slice): 

301 target_len = len(target) 

302 start = indexer.start 

303 stop = indexer.stop 

304 step = indexer.step 

305 if start is None: 

306 start = 0 

307 elif start < 0: 

308 start += target_len 

309 if stop is None or stop > target_len: 

310 stop = target_len 

311 elif stop < 0: 

312 stop += target_len 

313 if step is None: 

314 step = 1 

315 elif step < 0: 

316 start, stop = stop + 1, start + 1 

317 step = -step 

318 return (stop - start + step - 1) // step 

319 elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)): 

320 if isinstance(indexer, list): 

321 indexer = np.array(indexer) 

322 

323 if indexer.dtype == bool: 

324 # GH#25774 

325 return indexer.sum() 

326 return len(indexer) 

327 elif isinstance(indexer, range): 

328 return (indexer.stop - indexer.start) // indexer.step 

329 elif not is_list_like_indexer(indexer): 

330 return 1 

331 raise AssertionError("cannot find the length of the indexer") 

332 

333 

334def disallow_ndim_indexing(result) -> None: 

335 """ 

336 Helper function to disallow multi-dimensional indexing on 1D Series/Index. 

337 

338 GH#27125 indexer like idx[:, None] expands dim, but we cannot do that 

339 and keep an index, so we used to return ndarray, which was deprecated 

340 in GH#30588. 

341 """ 

342 if np.ndim(result) > 1: 

343 raise ValueError( 

344 "Multi-dimensional indexing (e.g. `obj[:, None]`) is no longer " 

345 "supported. Convert to a numpy array before indexing instead." 

346 ) 

347 

348 

349def unpack_1tuple(tup): 

350 """ 

351 If we have a length-1 tuple/list that contains a slice, unpack to just 

352 the slice. 

353 

354 Notes 

355 ----- 

356 The list case is deprecated. 

357 """ 

358 if len(tup) == 1 and isinstance(tup[0], slice): 

359 # if we don't have a MultiIndex, we may still be able to handle 

360 # a 1-tuple. see test_1tuple_without_multiindex 

361 

362 if isinstance(tup, list): 

363 # GH#31299 

364 raise ValueError( 

365 "Indexing with a single-item list containing a " 

366 "slice is not allowed. Pass a tuple instead.", 

367 ) 

368 

369 return tup[0] 

370 return tup 

371 

372 

373def check_key_length(columns: Index, key, value: DataFrame) -> None: 

374 """ 

375 Checks if a key used as indexer has the same length as the columns it is 

376 associated with. 

377 

378 Parameters 

379 ---------- 

380 columns : Index The columns of the DataFrame to index. 

381 key : A list-like of keys to index with. 

382 value : DataFrame The value to set for the keys. 

383 

384 Raises 

385 ------ 

386 ValueError: If the length of key is not equal to the number of columns in value 

387 or if the number of columns referenced by key is not equal to number 

388 of columns. 

389 """ 

390 if columns.is_unique: 

391 if len(value.columns) != len(key): 

392 raise ValueError("Columns must be same length as key") 

393 else: 

394 # Missing keys in columns are represented as -1 

395 if len(columns.get_indexer_non_unique(key)[0]) != len(value.columns): 

396 raise ValueError("Columns must be same length as key") 

397 

398 

399def unpack_tuple_and_ellipses(item: tuple): 

400 """ 

401 Possibly unpack arr[..., n] to arr[n] 

402 """ 

403 if len(item) > 1: 

404 # Note: we are assuming this indexing is being done on a 1D arraylike 

405 if item[0] is Ellipsis: 

406 item = item[1:] 

407 elif item[-1] is Ellipsis: 

408 item = item[:-1] 

409 

410 if len(item) > 1: 

411 raise IndexError("too many indices for array.") 

412 

413 item = item[0] 

414 return item 

415 

416 

417# ----------------------------------------------------------- 

418# Public indexer validation 

419 

420 

421def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: 

422 """ 

423 Check if `indexer` is a valid array indexer for `array`. 

424 

425 For a boolean mask, `array` and `indexer` are checked to have the same 

426 length. The dtype is validated, and if it is an integer or boolean 

427 ExtensionArray, it is checked if there are missing values present, and 

428 it is converted to the appropriate numpy array. Other dtypes will raise 

429 an error. 

430 

431 Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed 

432 through as is. 

433 

434 Parameters 

435 ---------- 

436 array : array-like 

437 The array that is being indexed (only used for the length). 

438 indexer : array-like or list-like 

439 The array-like that's used to index. List-like input that is not yet 

440 a numpy array or an ExtensionArray is converted to one. Other input 

441 types are passed through as is. 

442 

443 Returns 

444 ------- 

445 numpy.ndarray 

446 The validated indexer as a numpy array that can be used to index. 

447 

448 Raises 

449 ------ 

450 IndexError 

451 When the lengths don't match. 

452 ValueError 

453 When `indexer` cannot be converted to a numpy ndarray to index 

454 (e.g. presence of missing values). 

455 

456 See Also 

457 -------- 

458 api.types.is_bool_dtype : Check if `key` is of boolean dtype. 

459 

460 Examples 

461 -------- 

462 When checking a boolean mask, a boolean ndarray is returned when the 

463 arguments are all valid. 

464 

465 >>> mask = pd.array([True, False]) 

466 >>> arr = pd.array([1, 2]) 

467 >>> pd.api.indexers.check_array_indexer(arr, mask) 

468 array([ True, False]) 

469 

470 An IndexError is raised when the lengths don't match. 

471 

472 >>> mask = pd.array([True, False, True]) 

473 >>> pd.api.indexers.check_array_indexer(arr, mask) 

474 Traceback (most recent call last): 

475 ... 

476 IndexError: Boolean index has wrong length: 3 instead of 2. 

477 

478 NA values in a boolean array are treated as False. 

479 

480 >>> mask = pd.array([True, pd.NA]) 

481 >>> pd.api.indexers.check_array_indexer(arr, mask) 

482 array([ True, False]) 

483 

484 A numpy boolean mask will get passed through (if the length is correct): 

485 

486 >>> mask = np.array([True, False]) 

487 >>> pd.api.indexers.check_array_indexer(arr, mask) 

488 array([ True, False]) 

489 

490 Similarly for integer indexers, an integer ndarray is returned when it is 

491 a valid indexer, otherwise an error is (for integer indexers, a matching 

492 length is not required): 

493 

494 >>> indexer = pd.array([0, 2], dtype="Int64") 

495 >>> arr = pd.array([1, 2, 3]) 

496 >>> pd.api.indexers.check_array_indexer(arr, indexer) 

497 array([0, 2]) 

498 

499 >>> indexer = pd.array([0, pd.NA], dtype="Int64") 

500 >>> pd.api.indexers.check_array_indexer(arr, indexer) 

501 Traceback (most recent call last): 

502 ... 

503 ValueError: Cannot index with an integer indexer containing NA values 

504 

505 For non-integer/boolean dtypes, an appropriate error is raised: 

506 

507 >>> indexer = np.array([0., 2.], dtype="float64") 

508 >>> pd.api.indexers.check_array_indexer(arr, indexer) 

509 Traceback (most recent call last): 

510 ... 

511 IndexError: arrays used as indices must be of integer or boolean type 

512 """ 

513 from pandas.core.construction import array as pd_array 

514 

515 # whatever is not an array-like is returned as-is (possible valid array 

516 # indexers that are not array-like: integer, slice, Ellipsis, None) 

517 # In this context, tuples are not considered as array-like, as they have 

518 # a specific meaning in indexing (multi-dimensional indexing) 

519 if is_list_like(indexer): 

520 if isinstance(indexer, tuple): 

521 return indexer 

522 else: 

523 return indexer 

524 

525 # convert list-likes to array 

526 if not is_array_like(indexer): 

527 indexer = pd_array(indexer) 

528 if len(indexer) == 0: 

529 # empty list is converted to float array by pd.array 

530 indexer = np.array([], dtype=np.intp) 

531 

532 dtype = indexer.dtype 

533 if is_bool_dtype(dtype): 

534 if is_extension_array_dtype(dtype): 

535 indexer = indexer.to_numpy(dtype=bool, na_value=False) 

536 else: 

537 indexer = np.asarray(indexer, dtype=bool) 

538 

539 # GH26658 

540 if len(indexer) != len(array): 

541 raise IndexError( 

542 f"Boolean index has wrong length: " 

543 f"{len(indexer)} instead of {len(array)}" 

544 ) 

545 elif is_integer_dtype(dtype): 

546 try: 

547 indexer = np.asarray(indexer, dtype=np.intp) 

548 except ValueError as err: 

549 raise ValueError( 

550 "Cannot index with an integer indexer containing NA values" 

551 ) from err 

552 else: 

553 raise IndexError("arrays used as indices must be of integer or boolean type") 

554 

555 return indexer