Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/common.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

195 statements  

1""" 

2Misc tools for implementing data structures 

3 

4Note: pandas.core.common is *not* part of the public API. 

5""" 

6from __future__ import annotations 

7 

8import builtins 

9from collections import ( 

10 abc, 

11 defaultdict, 

12) 

13import contextlib 

14from functools import partial 

15import inspect 

16from typing import ( 

17 TYPE_CHECKING, 

18 Any, 

19 Callable, 

20 Collection, 

21 Generator, 

22 Hashable, 

23 Iterable, 

24 Sequence, 

25 cast, 

26 overload, 

27) 

28import warnings 

29 

30import numpy as np 

31 

32from pandas._libs import lib 

33from pandas._typing import ( 

34 AnyArrayLike, 

35 ArrayLike, 

36 NpDtype, 

37 RandomState, 

38 T, 

39) 

40 

41from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike 

42from pandas.core.dtypes.common import ( 

43 is_array_like, 

44 is_bool_dtype, 

45 is_extension_array_dtype, 

46 is_integer, 

47) 

48from pandas.core.dtypes.generic import ( 

49 ABCExtensionArray, 

50 ABCIndex, 

51 ABCSeries, 

52) 

53from pandas.core.dtypes.inference import iterable_not_string 

54from pandas.core.dtypes.missing import isna 

55 

56if TYPE_CHECKING: 

57 from pandas import Index 

58 

59 

60def flatten(line): 

61 """ 

62 Flatten an arbitrarily nested sequence. 

63 

64 Parameters 

65 ---------- 

66 line : sequence 

67 The non string sequence to flatten 

68 

69 Notes 

70 ----- 

71 This doesn't consider strings sequences. 

72 

73 Returns 

74 ------- 

75 flattened : generator 

76 """ 

77 for element in line: 

78 if iterable_not_string(element): 

79 yield from flatten(element) 

80 else: 

81 yield element 

82 

83 

84def consensus_name_attr(objs): 

85 name = objs[0].name 

86 for obj in objs[1:]: 

87 try: 

88 if obj.name != name: 

89 name = None 

90 except ValueError: 

91 name = None 

92 return name 

93 

94 

95def is_bool_indexer(key: Any) -> bool: 

96 """ 

97 Check whether `key` is a valid boolean indexer. 

98 

99 Parameters 

100 ---------- 

101 key : Any 

102 Only list-likes may be considered boolean indexers. 

103 All other types are not considered a boolean indexer. 

104 For array-like input, boolean ndarrays or ExtensionArrays 

105 with ``_is_boolean`` set are considered boolean indexers. 

106 

107 Returns 

108 ------- 

109 bool 

110 Whether `key` is a valid boolean indexer. 

111 

112 Raises 

113 ------ 

114 ValueError 

115 When the array is an object-dtype ndarray or ExtensionArray 

116 and contains missing values. 

117 

118 See Also 

119 -------- 

120 check_array_indexer : Check that `key` is a valid array to index, 

121 and convert to an ndarray. 

122 """ 

123 if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or ( 

124 is_array_like(key) and is_extension_array_dtype(key.dtype) 

125 ): 

126 if key.dtype == np.object_: 

127 key_array = np.asarray(key) 

128 

129 if not lib.is_bool_array(key_array): 

130 na_msg = "Cannot mask with non-boolean array containing NA / NaN values" 

131 if lib.infer_dtype(key_array) == "boolean" and isna(key_array).any(): 

132 # Don't raise on e.g. ["A", "B", np.nan], see 

133 # test_loc_getitem_list_of_labels_categoricalindex_with_na 

134 raise ValueError(na_msg) 

135 return False 

136 return True 

137 elif is_bool_dtype(key.dtype): 

138 return True 

139 elif isinstance(key, list): 

140 # check if np.array(key).dtype would be bool 

141 if len(key) > 0: 

142 if type(key) is not list: 

143 # GH#42461 cython will raise TypeError if we pass a subclass 

144 key = list(key) 

145 return lib.is_bool_list(key) 

146 

147 return False 

148 

149 

150def cast_scalar_indexer(val): 

151 """ 

152 Disallow indexing with a float key, even if that key is a round number. 

153 

154 Parameters 

155 ---------- 

156 val : scalar 

157 

158 Returns 

159 ------- 

160 outval : scalar 

161 """ 

162 # assumes lib.is_scalar(val) 

163 if lib.is_float(val) and val.is_integer(): 

164 raise IndexError( 

165 # GH#34193 

166 "Indexing with a float is no longer supported. Manually convert " 

167 "to an integer key instead." 

168 ) 

169 return val 

170 

171 

172def not_none(*args): 

173 """ 

174 Returns a generator consisting of the arguments that are not None. 

175 """ 

176 return (arg for arg in args if arg is not None) 

177 

178 

179def any_none(*args) -> bool: 

180 """ 

181 Returns a boolean indicating if any argument is None. 

182 """ 

183 return any(arg is None for arg in args) 

184 

185 

186def all_none(*args) -> bool: 

187 """ 

188 Returns a boolean indicating if all arguments are None. 

189 """ 

190 return all(arg is None for arg in args) 

191 

192 

193def any_not_none(*args) -> bool: 

194 """ 

195 Returns a boolean indicating if any argument is not None. 

196 """ 

197 return any(arg is not None for arg in args) 

198 

199 

200def all_not_none(*args) -> bool: 

201 """ 

202 Returns a boolean indicating if all arguments are not None. 

203 """ 

204 return all(arg is not None for arg in args) 

205 

206 

207def count_not_none(*args) -> int: 

208 """ 

209 Returns the count of arguments that are not None. 

210 """ 

211 return sum(x is not None for x in args) 

212 

213 

214@overload 

215def asarray_tuplesafe( 

216 values: ArrayLike | list | tuple | zip, dtype: NpDtype | None = ... 

217) -> np.ndarray: 

218 # ExtensionArray can only be returned when values is an Index, all other iterables 

219 # will return np.ndarray. Unfortunately "all other" cannot be encoded in a type 

220 # signature, so instead we special-case some common types. 

221 ... 

222 

223 

224@overload 

225def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = ...) -> ArrayLike: 

226 ... 

227 

228 

229def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLike: 

230 if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")): 

231 values = list(values) 

232 elif isinstance(values, ABCIndex): 

233 return values._values 

234 

235 if isinstance(values, list) and dtype in [np.object_, object]: 

236 return construct_1d_object_array_from_listlike(values) 

237 

238 try: 

239 with warnings.catch_warnings(): 

240 # Can remove warning filter once NumPy 1.24 is min version 

241 warnings.simplefilter("ignore", np.VisibleDeprecationWarning) 

242 result = np.asarray(values, dtype=dtype) 

243 except ValueError: 

244 # Using try/except since it's more performant than checking is_list_like 

245 # over each element 

246 # error: Argument 1 to "construct_1d_object_array_from_listlike" 

247 # has incompatible type "Iterable[Any]"; expected "Sized" 

248 return construct_1d_object_array_from_listlike(values) # type: ignore[arg-type] 

249 

250 if issubclass(result.dtype.type, str): 

251 result = np.asarray(values, dtype=object) 

252 

253 if result.ndim == 2: 

254 # Avoid building an array of arrays: 

255 values = [tuple(x) for x in values] 

256 result = construct_1d_object_array_from_listlike(values) 

257 

258 return result 

259 

260 

261def index_labels_to_array( 

262 labels: np.ndarray | Iterable, dtype: NpDtype | None = None 

263) -> np.ndarray: 

264 """ 

265 Transform label or iterable of labels to array, for use in Index. 

266 

267 Parameters 

268 ---------- 

269 dtype : dtype 

270 If specified, use as dtype of the resulting array, otherwise infer. 

271 

272 Returns 

273 ------- 

274 array 

275 """ 

276 if isinstance(labels, (str, tuple)): 

277 labels = [labels] 

278 

279 if not isinstance(labels, (list, np.ndarray)): 

280 try: 

281 labels = list(labels) 

282 except TypeError: # non-iterable 

283 labels = [labels] 

284 

285 labels = asarray_tuplesafe(labels, dtype=dtype) 

286 

287 return labels 

288 

289 

290def maybe_make_list(obj): 

291 if obj is not None and not isinstance(obj, (tuple, list)): 

292 return [obj] 

293 return obj 

294 

295 

296def maybe_iterable_to_list(obj: Iterable[T] | T) -> Collection[T] | T: 

297 """ 

298 If obj is Iterable but not list-like, consume into list. 

299 """ 

300 if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized): 

301 return list(obj) 

302 obj = cast(Collection, obj) 

303 return obj 

304 

305 

306def is_null_slice(obj) -> bool: 

307 """ 

308 We have a null slice. 

309 """ 

310 return ( 

311 isinstance(obj, slice) 

312 and obj.start is None 

313 and obj.stop is None 

314 and obj.step is None 

315 ) 

316 

317 

318def is_empty_slice(obj) -> bool: 

319 """ 

320 We have an empty slice, e.g. no values are selected. 

321 """ 

322 return ( 

323 isinstance(obj, slice) 

324 and obj.start is not None 

325 and obj.stop is not None 

326 and obj.start == obj.stop 

327 ) 

328 

329 

330def is_true_slices(line) -> list[bool]: 

331 """ 

332 Find non-trivial slices in "line": return a list of booleans with same length. 

333 """ 

334 return [isinstance(k, slice) and not is_null_slice(k) for k in line] 

335 

336 

337# TODO: used only once in indexing; belongs elsewhere? 

338def is_full_slice(obj, line: int) -> bool: 

339 """ 

340 We have a full length slice. 

341 """ 

342 return ( 

343 isinstance(obj, slice) 

344 and obj.start == 0 

345 and obj.stop == line 

346 and obj.step is None 

347 ) 

348 

349 

350def get_callable_name(obj): 

351 # typical case has name 

352 if hasattr(obj, "__name__"): 

353 return getattr(obj, "__name__") 

354 # some objects don't; could recurse 

355 if isinstance(obj, partial): 

356 return get_callable_name(obj.func) 

357 # fall back to class name 

358 if callable(obj): 

359 return type(obj).__name__ 

360 # everything failed (probably because the argument 

361 # wasn't actually callable); we return None 

362 # instead of the empty string in this case to allow 

363 # distinguishing between no name and a name of '' 

364 return None 

365 

366 

367def apply_if_callable(maybe_callable, obj, **kwargs): 

368 """ 

369 Evaluate possibly callable input using obj and kwargs if it is callable, 

370 otherwise return as it is. 

371 

372 Parameters 

373 ---------- 

374 maybe_callable : possibly a callable 

375 obj : NDFrame 

376 **kwargs 

377 """ 

378 if callable(maybe_callable): 

379 return maybe_callable(obj, **kwargs) 

380 

381 return maybe_callable 

382 

383 

384def standardize_mapping(into): 

385 """ 

386 Helper function to standardize a supplied mapping. 

387 

388 Parameters 

389 ---------- 

390 into : instance or subclass of collections.abc.Mapping 

391 Must be a class, an initialized collections.defaultdict, 

392 or an instance of a collections.abc.Mapping subclass. 

393 

394 Returns 

395 ------- 

396 mapping : a collections.abc.Mapping subclass or other constructor 

397 a callable object that can accept an iterator to create 

398 the desired Mapping. 

399 

400 See Also 

401 -------- 

402 DataFrame.to_dict 

403 Series.to_dict 

404 """ 

405 if not inspect.isclass(into): 

406 if isinstance(into, defaultdict): 

407 return partial(defaultdict, into.default_factory) 

408 into = type(into) 

409 if not issubclass(into, abc.Mapping): 

410 raise TypeError(f"unsupported type: {into}") 

411 if into == defaultdict: 

412 raise TypeError("to_dict() only accepts initialized defaultdicts") 

413 return into 

414 

415 

416@overload 

417def random_state(state: np.random.Generator) -> np.random.Generator: 

418 ... 

419 

420 

421@overload 

422def random_state( 

423 state: int | ArrayLike | np.random.BitGenerator | np.random.RandomState | None, 

424) -> np.random.RandomState: 

425 ... 

426 

427 

428def random_state(state: RandomState | None = None): 

429 """ 

430 Helper function for processing random_state arguments. 

431 

432 Parameters 

433 ---------- 

434 state : int, array-like, BitGenerator, Generator, np.random.RandomState, None. 

435 If receives an int, array-like, or BitGenerator, passes to 

436 np.random.RandomState() as seed. 

437 If receives an np.random RandomState or Generator, just returns that unchanged. 

438 If receives `None`, returns np.random. 

439 If receives anything else, raises an informative ValueError. 

440 

441 .. versionchanged:: 1.1.0 

442 

443 array-like and BitGenerator object now passed to np.random.RandomState() 

444 as seed 

445 

446 Default None. 

447 

448 Returns 

449 ------- 

450 np.random.RandomState or np.random.Generator. If state is None, returns np.random 

451 

452 """ 

453 if ( 

454 is_integer(state) 

455 or is_array_like(state) 

456 or isinstance(state, np.random.BitGenerator) 

457 ): 

458 # error: Argument 1 to "RandomState" has incompatible type "Optional[Union[int, 

459 # Union[ExtensionArray, ndarray[Any, Any]], Generator, RandomState]]"; expected 

460 # "Union[None, Union[Union[_SupportsArray[dtype[Union[bool_, integer[Any]]]], 

461 # Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]], 

462 # Sequence[Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]]], 

463 # Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_, 

464 # integer[Any]]]]]]], 

465 # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_, 

466 # integer[Any]]]]]]]]], Union[bool, int, Sequence[Union[bool, int]], 

467 # Sequence[Sequence[Union[bool, int]]], Sequence[Sequence[Sequence[Union[bool, 

468 # int]]]], Sequence[Sequence[Sequence[Sequence[Union[bool, int]]]]]]], 

469 # BitGenerator]" 

470 return np.random.RandomState(state) # type: ignore[arg-type] 

471 elif isinstance(state, np.random.RandomState): 

472 return state 

473 elif isinstance(state, np.random.Generator): 

474 return state 

475 elif state is None: 

476 return np.random 

477 else: 

478 raise ValueError( 

479 "random_state must be an integer, array-like, a BitGenerator, Generator, " 

480 "a numpy RandomState, or None" 

481 ) 

482 

483 

484def pipe( 

485 obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs 

486) -> T: 

487 """ 

488 Apply a function ``func`` to object ``obj`` either by passing obj as the 

489 first argument to the function or, in the case that the func is a tuple, 

490 interpret the first element of the tuple as a function and pass the obj to 

491 that function as a keyword argument whose key is the value of the second 

492 element of the tuple. 

493 

494 Parameters 

495 ---------- 

496 func : callable or tuple of (callable, str) 

497 Function to apply to this object or, alternatively, a 

498 ``(callable, data_keyword)`` tuple where ``data_keyword`` is a 

499 string indicating the keyword of ``callable`` that expects the 

500 object. 

501 *args : iterable, optional 

502 Positional arguments passed into ``func``. 

503 **kwargs : dict, optional 

504 A dictionary of keyword arguments passed into ``func``. 

505 

506 Returns 

507 ------- 

508 object : the return type of ``func``. 

509 """ 

510 if isinstance(func, tuple): 

511 func, target = func 

512 if target in kwargs: 

513 msg = f"{target} is both the pipe target and a keyword argument" 

514 raise ValueError(msg) 

515 kwargs[target] = obj 

516 return func(*args, **kwargs) 

517 else: 

518 return func(obj, *args, **kwargs) 

519 

520 

521def get_rename_function(mapper): 

522 """ 

523 Returns a function that will map names/labels, dependent if mapper 

524 is a dict, Series or just a function. 

525 """ 

526 

527 def f(x): 

528 if x in mapper: 

529 return mapper[x] 

530 else: 

531 return x 

532 

533 return f if isinstance(mapper, (abc.Mapping, ABCSeries)) else mapper 

534 

535 

536def convert_to_list_like( 

537 values: Hashable | Iterable | AnyArrayLike, 

538) -> list | AnyArrayLike: 

539 """ 

540 Convert list-like or scalar input to list-like. List, numpy and pandas array-like 

541 inputs are returned unmodified whereas others are converted to list. 

542 """ 

543 if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)): 

544 return values 

545 elif isinstance(values, abc.Iterable) and not isinstance(values, str): 

546 return list(values) 

547 

548 return [values] 

549 

550 

551@contextlib.contextmanager 

552def temp_setattr(obj, attr: str, value) -> Generator[None, None, None]: 

553 """Temporarily set attribute on an object. 

554 

555 Args: 

556 obj: Object whose attribute will be modified. 

557 attr: Attribute to modify. 

558 value: Value to temporarily set attribute to. 

559 

560 Yields: 

561 obj with modified attribute. 

562 """ 

563 old_value = getattr(obj, attr) 

564 setattr(obj, attr, value) 

565 try: 

566 yield obj 

567 finally: 

568 setattr(obj, attr, old_value) 

569 

570 

571def require_length_match(data, index: Index) -> None: 

572 """ 

573 Check the length of data matches the length of the index. 

574 """ 

575 if len(data) != len(index): 

576 raise ValueError( 

577 "Length of values " 

578 f"({len(data)}) " 

579 "does not match length of index " 

580 f"({len(index)})" 

581 ) 

582 

583 

584# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0, 

585# whereas np.min and np.max (which directly call obj.min and obj.max) 

586# default to axis=None. 

587_builtin_table = { 

588 builtins.sum: np.sum, 

589 builtins.max: np.maximum.reduce, 

590 builtins.min: np.minimum.reduce, 

591} 

592 

593_cython_table = { 

594 builtins.sum: "sum", 

595 builtins.max: "max", 

596 builtins.min: "min", 

597 np.all: "all", 

598 np.any: "any", 

599 np.sum: "sum", 

600 np.nansum: "sum", 

601 np.mean: "mean", 

602 np.nanmean: "mean", 

603 np.prod: "prod", 

604 np.nanprod: "prod", 

605 np.std: "std", 

606 np.nanstd: "std", 

607 np.var: "var", 

608 np.nanvar: "var", 

609 np.median: "median", 

610 np.nanmedian: "median", 

611 np.max: "max", 

612 np.nanmax: "max", 

613 np.min: "min", 

614 np.nanmin: "min", 

615 np.cumprod: "cumprod", 

616 np.nancumprod: "cumprod", 

617 np.cumsum: "cumsum", 

618 np.nancumsum: "cumsum", 

619} 

620 

621 

622def get_cython_func(arg: Callable) -> str | None: 

623 """ 

624 if we define an internal function for this argument, return it 

625 """ 

626 return _cython_table.get(arg) 

627 

628 

629def is_builtin_func(arg): 

630 """ 

631 if we define a builtin function for this argument, return it, 

632 otherwise return the arg 

633 """ 

634 return _builtin_table.get(arg, arg) 

635 

636 

637def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]: 

638 """ 

639 If a name is missing then replace it by level_n, where n is the count 

640 

641 .. versionadded:: 1.4.0 

642 

643 Parameters 

644 ---------- 

645 names : list-like 

646 list of column names or None values. 

647 

648 Returns 

649 ------- 

650 list 

651 list of column names with the None values replaced. 

652 """ 

653 return [f"level_{i}" if name is None else name for i, name in enumerate(names)]