Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/common.py: 55%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

202 statements  

1""" 

2Misc tools for implementing data structures 

3 

4Note: pandas.core.common is *not* part of the public API. 

5""" 

6from __future__ import annotations 

7 

8import builtins 

9from collections import ( 

10 abc, 

11 defaultdict, 

12) 

13from collections.abc import ( 

14 Collection, 

15 Generator, 

16 Hashable, 

17 Iterable, 

18 Sequence, 

19) 

20import contextlib 

21from functools import partial 

22import inspect 

23from typing import ( 

24 TYPE_CHECKING, 

25 Any, 

26 Callable, 

27 cast, 

28 overload, 

29) 

30import warnings 

31 

32import numpy as np 

33 

34from pandas._libs import lib 

35from pandas.compat.numpy import np_version_gte1p24 

36 

37from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike 

38from pandas.core.dtypes.common import ( 

39 is_bool_dtype, 

40 is_integer, 

41) 

42from pandas.core.dtypes.generic import ( 

43 ABCExtensionArray, 

44 ABCIndex, 

45 ABCMultiIndex, 

46 ABCSeries, 

47) 

48from pandas.core.dtypes.inference import iterable_not_string 

49 

50if TYPE_CHECKING: 

51 from pandas._typing import ( 

52 AnyArrayLike, 

53 ArrayLike, 

54 NpDtype, 

55 RandomState, 

56 T, 

57 ) 

58 

59 from pandas import Index 

60 

61 

62def flatten(line): 

63 """ 

64 Flatten an arbitrarily nested sequence. 

65 

66 Parameters 

67 ---------- 

68 line : sequence 

69 The non string sequence to flatten 

70 

71 Notes 

72 ----- 

73 This doesn't consider strings sequences. 

74 

75 Returns 

76 ------- 

77 flattened : generator 

78 """ 

79 for element in line: 

80 if iterable_not_string(element): 

81 yield from flatten(element) 

82 else: 

83 yield element 

84 

85 

86def consensus_name_attr(objs): 

87 name = objs[0].name 

88 for obj in objs[1:]: 

89 try: 

90 if obj.name != name: 

91 name = None 

92 except ValueError: 

93 name = None 

94 return name 

95 

96 

97def is_bool_indexer(key: Any) -> bool: 

98 """ 

99 Check whether `key` is a valid boolean indexer. 

100 

101 Parameters 

102 ---------- 

103 key : Any 

104 Only list-likes may be considered boolean indexers. 

105 All other types are not considered a boolean indexer. 

106 For array-like input, boolean ndarrays or ExtensionArrays 

107 with ``_is_boolean`` set are considered boolean indexers. 

108 

109 Returns 

110 ------- 

111 bool 

112 Whether `key` is a valid boolean indexer. 

113 

114 Raises 

115 ------ 

116 ValueError 

117 When the array is an object-dtype ndarray or ExtensionArray 

118 and contains missing values. 

119 

120 See Also 

121 -------- 

122 check_array_indexer : Check that `key` is a valid array to index, 

123 and convert to an ndarray. 

124 """ 

125 if isinstance( 

126 key, (ABCSeries, np.ndarray, ABCIndex, ABCExtensionArray) 

127 ) and not isinstance(key, ABCMultiIndex): 

128 if key.dtype == np.object_: 

129 key_array = np.asarray(key) 

130 

131 if not lib.is_bool_array(key_array): 

132 na_msg = "Cannot mask with non-boolean array containing NA / NaN values" 

133 if lib.is_bool_array(key_array, skipna=True): 

134 # Don't raise on e.g. ["A", "B", np.nan], see 

135 # test_loc_getitem_list_of_labels_categoricalindex_with_na 

136 raise ValueError(na_msg) 

137 return False 

138 return True 

139 elif is_bool_dtype(key.dtype): 

140 return True 

141 elif isinstance(key, list): 

142 # check if np.array(key).dtype would be bool 

143 if len(key) > 0: 

144 if type(key) is not list: # noqa: E721 

145 # GH#42461 cython will raise TypeError if we pass a subclass 

146 key = list(key) 

147 return lib.is_bool_list(key) 

148 

149 return False 

150 

151 

152def cast_scalar_indexer(val): 

153 """ 

154 Disallow indexing with a float key, even if that key is a round number. 

155 

156 Parameters 

157 ---------- 

158 val : scalar 

159 

160 Returns 

161 ------- 

162 outval : scalar 

163 """ 

164 # assumes lib.is_scalar(val) 

165 if lib.is_float(val) and val.is_integer(): 

166 raise IndexError( 

167 # GH#34193 

168 "Indexing with a float is no longer supported. Manually convert " 

169 "to an integer key instead." 

170 ) 

171 return val 

172 

173 

174def not_none(*args): 

175 """ 

176 Returns a generator consisting of the arguments that are not None. 

177 """ 

178 return (arg for arg in args if arg is not None) 

179 

180 

181def any_none(*args) -> bool: 

182 """ 

183 Returns a boolean indicating if any argument is None. 

184 """ 

185 return any(arg is None for arg in args) 

186 

187 

188def all_none(*args) -> bool: 

189 """ 

190 Returns a boolean indicating if all arguments are None. 

191 """ 

192 return all(arg is None for arg in args) 

193 

194 

195def any_not_none(*args) -> bool: 

196 """ 

197 Returns a boolean indicating if any argument is not None. 

198 """ 

199 return any(arg is not None for arg in args) 

200 

201 

202def all_not_none(*args) -> bool: 

203 """ 

204 Returns a boolean indicating if all arguments are not None. 

205 """ 

206 return all(arg is not None for arg in args) 

207 

208 

209def count_not_none(*args) -> int: 

210 """ 

211 Returns the count of arguments that are not None. 

212 """ 

213 return sum(x is not None for x in args) 

214 

215 

216@overload 

217def asarray_tuplesafe( 

218 values: ArrayLike | list | tuple | zip, dtype: NpDtype | None = ... 

219) -> np.ndarray: 

220 # ExtensionArray can only be returned when values is an Index, all other iterables 

221 # will return np.ndarray. Unfortunately "all other" cannot be encoded in a type 

222 # signature, so instead we special-case some common types. 

223 ... 

224 

225 

226@overload 

227def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = ...) -> ArrayLike: 

228 ... 

229 

230 

231def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLike: 

232 if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")): 

233 values = list(values) 

234 elif isinstance(values, ABCIndex): 

235 return values._values 

236 elif isinstance(values, ABCSeries): 

237 return values._values 

238 

239 if isinstance(values, list) and dtype in [np.object_, object]: 

240 return construct_1d_object_array_from_listlike(values) 

241 

242 try: 

243 with warnings.catch_warnings(): 

244 # Can remove warning filter once NumPy 1.24 is min version 

245 if not np_version_gte1p24: 

246 warnings.simplefilter("ignore", np.VisibleDeprecationWarning) 

247 result = np.asarray(values, dtype=dtype) 

248 except ValueError: 

249 # Using try/except since it's more performant than checking is_list_like 

250 # over each element 

251 # error: Argument 1 to "construct_1d_object_array_from_listlike" 

252 # has incompatible type "Iterable[Any]"; expected "Sized" 

253 return construct_1d_object_array_from_listlike(values) # type: ignore[arg-type] 

254 

255 if issubclass(result.dtype.type, str): 

256 result = np.asarray(values, dtype=object) 

257 

258 if result.ndim == 2: 

259 # Avoid building an array of arrays: 

260 values = [tuple(x) for x in values] 

261 result = construct_1d_object_array_from_listlike(values) 

262 

263 return result 

264 

265 

266def index_labels_to_array( 

267 labels: np.ndarray | Iterable, dtype: NpDtype | None = None 

268) -> np.ndarray: 

269 """ 

270 Transform label or iterable of labels to array, for use in Index. 

271 

272 Parameters 

273 ---------- 

274 dtype : dtype 

275 If specified, use as dtype of the resulting array, otherwise infer. 

276 

277 Returns 

278 ------- 

279 array 

280 """ 

281 if isinstance(labels, (str, tuple)): 

282 labels = [labels] 

283 

284 if not isinstance(labels, (list, np.ndarray)): 

285 try: 

286 labels = list(labels) 

287 except TypeError: # non-iterable 

288 labels = [labels] 

289 

290 labels = asarray_tuplesafe(labels, dtype=dtype) 

291 

292 return labels 

293 

294 

295def maybe_make_list(obj): 

296 if obj is not None and not isinstance(obj, (tuple, list)): 

297 return [obj] 

298 return obj 

299 

300 

301def maybe_iterable_to_list(obj: Iterable[T] | T) -> Collection[T] | T: 

302 """ 

303 If obj is Iterable but not list-like, consume into list. 

304 """ 

305 if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized): 

306 return list(obj) 

307 obj = cast(Collection, obj) 

308 return obj 

309 

310 

311def is_null_slice(obj) -> bool: 

312 """ 

313 We have a null slice. 

314 """ 

315 return ( 

316 isinstance(obj, slice) 

317 and obj.start is None 

318 and obj.stop is None 

319 and obj.step is None 

320 ) 

321 

322 

323def is_empty_slice(obj) -> bool: 

324 """ 

325 We have an empty slice, e.g. no values are selected. 

326 """ 

327 return ( 

328 isinstance(obj, slice) 

329 and obj.start is not None 

330 and obj.stop is not None 

331 and obj.start == obj.stop 

332 ) 

333 

334 

335def is_true_slices(line) -> list[bool]: 

336 """ 

337 Find non-trivial slices in "line": return a list of booleans with same length. 

338 """ 

339 return [isinstance(k, slice) and not is_null_slice(k) for k in line] 

340 

341 

342# TODO: used only once in indexing; belongs elsewhere? 

343def is_full_slice(obj, line: int) -> bool: 

344 """ 

345 We have a full length slice. 

346 """ 

347 return ( 

348 isinstance(obj, slice) 

349 and obj.start == 0 

350 and obj.stop == line 

351 and obj.step is None 

352 ) 

353 

354 

355def get_callable_name(obj): 

356 # typical case has name 

357 if hasattr(obj, "__name__"): 

358 return getattr(obj, "__name__") 

359 # some objects don't; could recurse 

360 if isinstance(obj, partial): 

361 return get_callable_name(obj.func) 

362 # fall back to class name 

363 if callable(obj): 

364 return type(obj).__name__ 

365 # everything failed (probably because the argument 

366 # wasn't actually callable); we return None 

367 # instead of the empty string in this case to allow 

368 # distinguishing between no name and a name of '' 

369 return None 

370 

371 

372def apply_if_callable(maybe_callable, obj, **kwargs): 

373 """ 

374 Evaluate possibly callable input using obj and kwargs if it is callable, 

375 otherwise return as it is. 

376 

377 Parameters 

378 ---------- 

379 maybe_callable : possibly a callable 

380 obj : NDFrame 

381 **kwargs 

382 """ 

383 if callable(maybe_callable): 

384 return maybe_callable(obj, **kwargs) 

385 

386 return maybe_callable 

387 

388 

389def standardize_mapping(into): 

390 """ 

391 Helper function to standardize a supplied mapping. 

392 

393 Parameters 

394 ---------- 

395 into : instance or subclass of collections.abc.Mapping 

396 Must be a class, an initialized collections.defaultdict, 

397 or an instance of a collections.abc.Mapping subclass. 

398 

399 Returns 

400 ------- 

401 mapping : a collections.abc.Mapping subclass or other constructor 

402 a callable object that can accept an iterator to create 

403 the desired Mapping. 

404 

405 See Also 

406 -------- 

407 DataFrame.to_dict 

408 Series.to_dict 

409 """ 

410 if not inspect.isclass(into): 

411 if isinstance(into, defaultdict): 

412 return partial(defaultdict, into.default_factory) 

413 into = type(into) 

414 if not issubclass(into, abc.Mapping): 

415 raise TypeError(f"unsupported type: {into}") 

416 if into == defaultdict: 

417 raise TypeError("to_dict() only accepts initialized defaultdicts") 

418 return into 

419 

420 

421@overload 

422def random_state(state: np.random.Generator) -> np.random.Generator: 

423 ... 

424 

425 

426@overload 

427def random_state( 

428 state: int | np.ndarray | np.random.BitGenerator | np.random.RandomState | None, 

429) -> np.random.RandomState: 

430 ... 

431 

432 

433def random_state(state: RandomState | None = None): 

434 """ 

435 Helper function for processing random_state arguments. 

436 

437 Parameters 

438 ---------- 

439 state : int, array-like, BitGenerator, Generator, np.random.RandomState, None. 

440 If receives an int, array-like, or BitGenerator, passes to 

441 np.random.RandomState() as seed. 

442 If receives an np.random RandomState or Generator, just returns that unchanged. 

443 If receives `None`, returns np.random. 

444 If receives anything else, raises an informative ValueError. 

445 

446 Default None. 

447 

448 Returns 

449 ------- 

450 np.random.RandomState or np.random.Generator. If state is None, returns np.random 

451 

452 """ 

453 if is_integer(state) or isinstance(state, (np.ndarray, np.random.BitGenerator)): 

454 return np.random.RandomState(state) 

455 elif isinstance(state, np.random.RandomState): 

456 return state 

457 elif isinstance(state, np.random.Generator): 

458 return state 

459 elif state is None: 

460 return np.random 

461 else: 

462 raise ValueError( 

463 "random_state must be an integer, array-like, a BitGenerator, Generator, " 

464 "a numpy RandomState, or None" 

465 ) 

466 

467 

468def pipe( 

469 obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs 

470) -> T: 

471 """ 

472 Apply a function ``func`` to object ``obj`` either by passing obj as the 

473 first argument to the function or, in the case that the func is a tuple, 

474 interpret the first element of the tuple as a function and pass the obj to 

475 that function as a keyword argument whose key is the value of the second 

476 element of the tuple. 

477 

478 Parameters 

479 ---------- 

480 func : callable or tuple of (callable, str) 

481 Function to apply to this object or, alternatively, a 

482 ``(callable, data_keyword)`` tuple where ``data_keyword`` is a 

483 string indicating the keyword of ``callable`` that expects the 

484 object. 

485 *args : iterable, optional 

486 Positional arguments passed into ``func``. 

487 **kwargs : dict, optional 

488 A dictionary of keyword arguments passed into ``func``. 

489 

490 Returns 

491 ------- 

492 object : the return type of ``func``. 

493 """ 

494 if isinstance(func, tuple): 

495 func, target = func 

496 if target in kwargs: 

497 msg = f"{target} is both the pipe target and a keyword argument" 

498 raise ValueError(msg) 

499 kwargs[target] = obj 

500 return func(*args, **kwargs) 

501 else: 

502 return func(obj, *args, **kwargs) 

503 

504 

505def get_rename_function(mapper): 

506 """ 

507 Returns a function that will map names/labels, dependent if mapper 

508 is a dict, Series or just a function. 

509 """ 

510 

511 def f(x): 

512 if x in mapper: 

513 return mapper[x] 

514 else: 

515 return x 

516 

517 return f if isinstance(mapper, (abc.Mapping, ABCSeries)) else mapper 

518 

519 

520def convert_to_list_like( 

521 values: Hashable | Iterable | AnyArrayLike, 

522) -> list | AnyArrayLike: 

523 """ 

524 Convert list-like or scalar input to list-like. List, numpy and pandas array-like 

525 inputs are returned unmodified whereas others are converted to list. 

526 """ 

527 if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)): 

528 return values 

529 elif isinstance(values, abc.Iterable) and not isinstance(values, str): 

530 return list(values) 

531 

532 return [values] 

533 

534 

535@contextlib.contextmanager 

536def temp_setattr( 

537 obj, attr: str, value, condition: bool = True 

538) -> Generator[None, None, None]: 

539 """ 

540 Temporarily set attribute on an object. 

541 

542 Parameters 

543 ---------- 

544 obj : object 

545 Object whose attribute will be modified. 

546 attr : str 

547 Attribute to modify. 

548 value : Any 

549 Value to temporarily set attribute to. 

550 condition : bool, default True 

551 Whether to set the attribute. Provided in order to not have to 

552 conditionally use this context manager. 

553 

554 Yields 

555 ------ 

556 object : obj with modified attribute. 

557 """ 

558 if condition: 

559 old_value = getattr(obj, attr) 

560 setattr(obj, attr, value) 

561 try: 

562 yield obj 

563 finally: 

564 if condition: 

565 setattr(obj, attr, old_value) 

566 

567 

568def require_length_match(data, index: Index) -> None: 

569 """ 

570 Check the length of data matches the length of the index. 

571 """ 

572 if len(data) != len(index): 

573 raise ValueError( 

574 "Length of values " 

575 f"({len(data)}) " 

576 "does not match length of index " 

577 f"({len(index)})" 

578 ) 

579 

580 

581# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0, 

582# whereas np.min and np.max (which directly call obj.min and obj.max) 

583# default to axis=None. 

584_builtin_table = { 

585 builtins.sum: np.sum, 

586 builtins.max: np.maximum.reduce, 

587 builtins.min: np.minimum.reduce, 

588} 

589 

590# GH#53425: Only for deprecation 

591_builtin_table_alias = { 

592 builtins.sum: "np.sum", 

593 builtins.max: "np.maximum.reduce", 

594 builtins.min: "np.minimum.reduce", 

595} 

596 

597_cython_table = { 

598 builtins.sum: "sum", 

599 builtins.max: "max", 

600 builtins.min: "min", 

601 np.all: "all", 

602 np.any: "any", 

603 np.sum: "sum", 

604 np.nansum: "sum", 

605 np.mean: "mean", 

606 np.nanmean: "mean", 

607 np.prod: "prod", 

608 np.nanprod: "prod", 

609 np.std: "std", 

610 np.nanstd: "std", 

611 np.var: "var", 

612 np.nanvar: "var", 

613 np.median: "median", 

614 np.nanmedian: "median", 

615 np.max: "max", 

616 np.nanmax: "max", 

617 np.min: "min", 

618 np.nanmin: "min", 

619 np.cumprod: "cumprod", 

620 np.nancumprod: "cumprod", 

621 np.cumsum: "cumsum", 

622 np.nancumsum: "cumsum", 

623} 

624 

625 

626def get_cython_func(arg: Callable) -> str | None: 

627 """ 

628 if we define an internal function for this argument, return it 

629 """ 

630 return _cython_table.get(arg) 

631 

632 

633def is_builtin_func(arg): 

634 """ 

635 if we define a builtin function for this argument, return it, 

636 otherwise return the arg 

637 """ 

638 return _builtin_table.get(arg, arg) 

639 

640 

641def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]: 

642 """ 

643 If a name is missing then replace it by level_n, where n is the count 

644 

645 .. versionadded:: 1.4.0 

646 

647 Parameters 

648 ---------- 

649 names : list-like 

650 list of column names or None values. 

651 

652 Returns 

653 ------- 

654 list 

655 list of column names with the None values replaced. 

656 """ 

657 return [f"level_{i}" if name is None else name for i, name in enumerate(names)]