1"""
2Misc tools for implementing data structures
3
4Note: pandas.core.common is *not* part of the public API.
5"""
6from __future__ import annotations
7
8import builtins
9from collections import (
10 abc,
11 defaultdict,
12)
13import contextlib
14from functools import partial
15import inspect
16from typing import (
17 TYPE_CHECKING,
18 Any,
19 Callable,
20 Collection,
21 Generator,
22 Hashable,
23 Iterable,
24 Sequence,
25 cast,
26 overload,
27)
28import warnings
29
30import numpy as np
31
32from pandas._libs import lib
33from pandas._typing import (
34 AnyArrayLike,
35 ArrayLike,
36 NpDtype,
37 RandomState,
38 T,
39)
40
41from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
42from pandas.core.dtypes.common import (
43 is_array_like,
44 is_bool_dtype,
45 is_extension_array_dtype,
46 is_integer,
47)
48from pandas.core.dtypes.generic import (
49 ABCExtensionArray,
50 ABCIndex,
51 ABCSeries,
52)
53from pandas.core.dtypes.inference import iterable_not_string
54from pandas.core.dtypes.missing import isna
55
56if TYPE_CHECKING:
57 from pandas import Index
58
59
60def flatten(line):
61 """
62 Flatten an arbitrarily nested sequence.
63
64 Parameters
65 ----------
66 line : sequence
67 The non string sequence to flatten
68
69 Notes
70 -----
71 This doesn't consider strings sequences.
72
73 Returns
74 -------
75 flattened : generator
76 """
77 for element in line:
78 if iterable_not_string(element):
79 yield from flatten(element)
80 else:
81 yield element
82
83
84def consensus_name_attr(objs):
85 name = objs[0].name
86 for obj in objs[1:]:
87 try:
88 if obj.name != name:
89 name = None
90 except ValueError:
91 name = None
92 return name
93
94
95def is_bool_indexer(key: Any) -> bool:
96 """
97 Check whether `key` is a valid boolean indexer.
98
99 Parameters
100 ----------
101 key : Any
102 Only list-likes may be considered boolean indexers.
103 All other types are not considered a boolean indexer.
104 For array-like input, boolean ndarrays or ExtensionArrays
105 with ``_is_boolean`` set are considered boolean indexers.
106
107 Returns
108 -------
109 bool
110 Whether `key` is a valid boolean indexer.
111
112 Raises
113 ------
114 ValueError
115 When the array is an object-dtype ndarray or ExtensionArray
116 and contains missing values.
117
118 See Also
119 --------
120 check_array_indexer : Check that `key` is a valid array to index,
121 and convert to an ndarray.
122 """
123 if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or (
124 is_array_like(key) and is_extension_array_dtype(key.dtype)
125 ):
126 if key.dtype == np.object_:
127 key_array = np.asarray(key)
128
129 if not lib.is_bool_array(key_array):
130 na_msg = "Cannot mask with non-boolean array containing NA / NaN values"
131 if lib.infer_dtype(key_array) == "boolean" and isna(key_array).any():
132 # Don't raise on e.g. ["A", "B", np.nan], see
133 # test_loc_getitem_list_of_labels_categoricalindex_with_na
134 raise ValueError(na_msg)
135 return False
136 return True
137 elif is_bool_dtype(key.dtype):
138 return True
139 elif isinstance(key, list):
140 # check if np.array(key).dtype would be bool
141 if len(key) > 0:
142 if type(key) is not list:
143 # GH#42461 cython will raise TypeError if we pass a subclass
144 key = list(key)
145 return lib.is_bool_list(key)
146
147 return False
148
149
150def cast_scalar_indexer(val):
151 """
152 Disallow indexing with a float key, even if that key is a round number.
153
154 Parameters
155 ----------
156 val : scalar
157
158 Returns
159 -------
160 outval : scalar
161 """
162 # assumes lib.is_scalar(val)
163 if lib.is_float(val) and val.is_integer():
164 raise IndexError(
165 # GH#34193
166 "Indexing with a float is no longer supported. Manually convert "
167 "to an integer key instead."
168 )
169 return val
170
171
172def not_none(*args):
173 """
174 Returns a generator consisting of the arguments that are not None.
175 """
176 return (arg for arg in args if arg is not None)
177
178
179def any_none(*args) -> bool:
180 """
181 Returns a boolean indicating if any argument is None.
182 """
183 return any(arg is None for arg in args)
184
185
186def all_none(*args) -> bool:
187 """
188 Returns a boolean indicating if all arguments are None.
189 """
190 return all(arg is None for arg in args)
191
192
193def any_not_none(*args) -> bool:
194 """
195 Returns a boolean indicating if any argument is not None.
196 """
197 return any(arg is not None for arg in args)
198
199
200def all_not_none(*args) -> bool:
201 """
202 Returns a boolean indicating if all arguments are not None.
203 """
204 return all(arg is not None for arg in args)
205
206
207def count_not_none(*args) -> int:
208 """
209 Returns the count of arguments that are not None.
210 """
211 return sum(x is not None for x in args)
212
213
214@overload
215def asarray_tuplesafe(
216 values: ArrayLike | list | tuple | zip, dtype: NpDtype | None = ...
217) -> np.ndarray:
218 # ExtensionArray can only be returned when values is an Index, all other iterables
219 # will return np.ndarray. Unfortunately "all other" cannot be encoded in a type
220 # signature, so instead we special-case some common types.
221 ...
222
223
224@overload
225def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = ...) -> ArrayLike:
226 ...
227
228
229def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLike:
230 if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")):
231 values = list(values)
232 elif isinstance(values, ABCIndex):
233 return values._values
234
235 if isinstance(values, list) and dtype in [np.object_, object]:
236 return construct_1d_object_array_from_listlike(values)
237
238 try:
239 with warnings.catch_warnings():
240 # Can remove warning filter once NumPy 1.24 is min version
241 warnings.simplefilter("ignore", np.VisibleDeprecationWarning)
242 result = np.asarray(values, dtype=dtype)
243 except ValueError:
244 # Using try/except since it's more performant than checking is_list_like
245 # over each element
246 # error: Argument 1 to "construct_1d_object_array_from_listlike"
247 # has incompatible type "Iterable[Any]"; expected "Sized"
248 return construct_1d_object_array_from_listlike(values) # type: ignore[arg-type]
249
250 if issubclass(result.dtype.type, str):
251 result = np.asarray(values, dtype=object)
252
253 if result.ndim == 2:
254 # Avoid building an array of arrays:
255 values = [tuple(x) for x in values]
256 result = construct_1d_object_array_from_listlike(values)
257
258 return result
259
260
261def index_labels_to_array(
262 labels: np.ndarray | Iterable, dtype: NpDtype | None = None
263) -> np.ndarray:
264 """
265 Transform label or iterable of labels to array, for use in Index.
266
267 Parameters
268 ----------
269 dtype : dtype
270 If specified, use as dtype of the resulting array, otherwise infer.
271
272 Returns
273 -------
274 array
275 """
276 if isinstance(labels, (str, tuple)):
277 labels = [labels]
278
279 if not isinstance(labels, (list, np.ndarray)):
280 try:
281 labels = list(labels)
282 except TypeError: # non-iterable
283 labels = [labels]
284
285 labels = asarray_tuplesafe(labels, dtype=dtype)
286
287 return labels
288
289
290def maybe_make_list(obj):
291 if obj is not None and not isinstance(obj, (tuple, list)):
292 return [obj]
293 return obj
294
295
296def maybe_iterable_to_list(obj: Iterable[T] | T) -> Collection[T] | T:
297 """
298 If obj is Iterable but not list-like, consume into list.
299 """
300 if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized):
301 return list(obj)
302 obj = cast(Collection, obj)
303 return obj
304
305
306def is_null_slice(obj) -> bool:
307 """
308 We have a null slice.
309 """
310 return (
311 isinstance(obj, slice)
312 and obj.start is None
313 and obj.stop is None
314 and obj.step is None
315 )
316
317
318def is_empty_slice(obj) -> bool:
319 """
320 We have an empty slice, e.g. no values are selected.
321 """
322 return (
323 isinstance(obj, slice)
324 and obj.start is not None
325 and obj.stop is not None
326 and obj.start == obj.stop
327 )
328
329
330def is_true_slices(line) -> list[bool]:
331 """
332 Find non-trivial slices in "line": return a list of booleans with same length.
333 """
334 return [isinstance(k, slice) and not is_null_slice(k) for k in line]
335
336
337# TODO: used only once in indexing; belongs elsewhere?
338def is_full_slice(obj, line: int) -> bool:
339 """
340 We have a full length slice.
341 """
342 return (
343 isinstance(obj, slice)
344 and obj.start == 0
345 and obj.stop == line
346 and obj.step is None
347 )
348
349
350def get_callable_name(obj):
351 # typical case has name
352 if hasattr(obj, "__name__"):
353 return getattr(obj, "__name__")
354 # some objects don't; could recurse
355 if isinstance(obj, partial):
356 return get_callable_name(obj.func)
357 # fall back to class name
358 if callable(obj):
359 return type(obj).__name__
360 # everything failed (probably because the argument
361 # wasn't actually callable); we return None
362 # instead of the empty string in this case to allow
363 # distinguishing between no name and a name of ''
364 return None
365
366
367def apply_if_callable(maybe_callable, obj, **kwargs):
368 """
369 Evaluate possibly callable input using obj and kwargs if it is callable,
370 otherwise return as it is.
371
372 Parameters
373 ----------
374 maybe_callable : possibly a callable
375 obj : NDFrame
376 **kwargs
377 """
378 if callable(maybe_callable):
379 return maybe_callable(obj, **kwargs)
380
381 return maybe_callable
382
383
384def standardize_mapping(into):
385 """
386 Helper function to standardize a supplied mapping.
387
388 Parameters
389 ----------
390 into : instance or subclass of collections.abc.Mapping
391 Must be a class, an initialized collections.defaultdict,
392 or an instance of a collections.abc.Mapping subclass.
393
394 Returns
395 -------
396 mapping : a collections.abc.Mapping subclass or other constructor
397 a callable object that can accept an iterator to create
398 the desired Mapping.
399
400 See Also
401 --------
402 DataFrame.to_dict
403 Series.to_dict
404 """
405 if not inspect.isclass(into):
406 if isinstance(into, defaultdict):
407 return partial(defaultdict, into.default_factory)
408 into = type(into)
409 if not issubclass(into, abc.Mapping):
410 raise TypeError(f"unsupported type: {into}")
411 if into == defaultdict:
412 raise TypeError("to_dict() only accepts initialized defaultdicts")
413 return into
414
415
416@overload
417def random_state(state: np.random.Generator) -> np.random.Generator:
418 ...
419
420
421@overload
422def random_state(
423 state: int | ArrayLike | np.random.BitGenerator | np.random.RandomState | None,
424) -> np.random.RandomState:
425 ...
426
427
428def random_state(state: RandomState | None = None):
429 """
430 Helper function for processing random_state arguments.
431
432 Parameters
433 ----------
434 state : int, array-like, BitGenerator, Generator, np.random.RandomState, None.
435 If receives an int, array-like, or BitGenerator, passes to
436 np.random.RandomState() as seed.
437 If receives an np.random RandomState or Generator, just returns that unchanged.
438 If receives `None`, returns np.random.
439 If receives anything else, raises an informative ValueError.
440
441 .. versionchanged:: 1.1.0
442
443 array-like and BitGenerator object now passed to np.random.RandomState()
444 as seed
445
446 Default None.
447
448 Returns
449 -------
450 np.random.RandomState or np.random.Generator. If state is None, returns np.random
451
452 """
453 if (
454 is_integer(state)
455 or is_array_like(state)
456 or isinstance(state, np.random.BitGenerator)
457 ):
458 # error: Argument 1 to "RandomState" has incompatible type "Optional[Union[int,
459 # Union[ExtensionArray, ndarray[Any, Any]], Generator, RandomState]]"; expected
460 # "Union[None, Union[Union[_SupportsArray[dtype[Union[bool_, integer[Any]]]],
461 # Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]],
462 # Sequence[Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]]],
463 # Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_,
464 # integer[Any]]]]]]],
465 # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_,
466 # integer[Any]]]]]]]]], Union[bool, int, Sequence[Union[bool, int]],
467 # Sequence[Sequence[Union[bool, int]]], Sequence[Sequence[Sequence[Union[bool,
468 # int]]]], Sequence[Sequence[Sequence[Sequence[Union[bool, int]]]]]]],
469 # BitGenerator]"
470 return np.random.RandomState(state) # type: ignore[arg-type]
471 elif isinstance(state, np.random.RandomState):
472 return state
473 elif isinstance(state, np.random.Generator):
474 return state
475 elif state is None:
476 return np.random
477 else:
478 raise ValueError(
479 "random_state must be an integer, array-like, a BitGenerator, Generator, "
480 "a numpy RandomState, or None"
481 )
482
483
484def pipe(
485 obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs
486) -> T:
487 """
488 Apply a function ``func`` to object ``obj`` either by passing obj as the
489 first argument to the function or, in the case that the func is a tuple,
490 interpret the first element of the tuple as a function and pass the obj to
491 that function as a keyword argument whose key is the value of the second
492 element of the tuple.
493
494 Parameters
495 ----------
496 func : callable or tuple of (callable, str)
497 Function to apply to this object or, alternatively, a
498 ``(callable, data_keyword)`` tuple where ``data_keyword`` is a
499 string indicating the keyword of ``callable`` that expects the
500 object.
501 *args : iterable, optional
502 Positional arguments passed into ``func``.
503 **kwargs : dict, optional
504 A dictionary of keyword arguments passed into ``func``.
505
506 Returns
507 -------
508 object : the return type of ``func``.
509 """
510 if isinstance(func, tuple):
511 func, target = func
512 if target in kwargs:
513 msg = f"{target} is both the pipe target and a keyword argument"
514 raise ValueError(msg)
515 kwargs[target] = obj
516 return func(*args, **kwargs)
517 else:
518 return func(obj, *args, **kwargs)
519
520
521def get_rename_function(mapper):
522 """
523 Returns a function that will map names/labels, dependent if mapper
524 is a dict, Series or just a function.
525 """
526
527 def f(x):
528 if x in mapper:
529 return mapper[x]
530 else:
531 return x
532
533 return f if isinstance(mapper, (abc.Mapping, ABCSeries)) else mapper
534
535
536def convert_to_list_like(
537 values: Hashable | Iterable | AnyArrayLike,
538) -> list | AnyArrayLike:
539 """
540 Convert list-like or scalar input to list-like. List, numpy and pandas array-like
541 inputs are returned unmodified whereas others are converted to list.
542 """
543 if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)):
544 return values
545 elif isinstance(values, abc.Iterable) and not isinstance(values, str):
546 return list(values)
547
548 return [values]
549
550
551@contextlib.contextmanager
552def temp_setattr(obj, attr: str, value) -> Generator[None, None, None]:
553 """Temporarily set attribute on an object.
554
555 Args:
556 obj: Object whose attribute will be modified.
557 attr: Attribute to modify.
558 value: Value to temporarily set attribute to.
559
560 Yields:
561 obj with modified attribute.
562 """
563 old_value = getattr(obj, attr)
564 setattr(obj, attr, value)
565 try:
566 yield obj
567 finally:
568 setattr(obj, attr, old_value)
569
570
571def require_length_match(data, index: Index) -> None:
572 """
573 Check the length of data matches the length of the index.
574 """
575 if len(data) != len(index):
576 raise ValueError(
577 "Length of values "
578 f"({len(data)}) "
579 "does not match length of index "
580 f"({len(index)})"
581 )
582
583
584# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0,
585# whereas np.min and np.max (which directly call obj.min and obj.max)
586# default to axis=None.
587_builtin_table = {
588 builtins.sum: np.sum,
589 builtins.max: np.maximum.reduce,
590 builtins.min: np.minimum.reduce,
591}
592
593_cython_table = {
594 builtins.sum: "sum",
595 builtins.max: "max",
596 builtins.min: "min",
597 np.all: "all",
598 np.any: "any",
599 np.sum: "sum",
600 np.nansum: "sum",
601 np.mean: "mean",
602 np.nanmean: "mean",
603 np.prod: "prod",
604 np.nanprod: "prod",
605 np.std: "std",
606 np.nanstd: "std",
607 np.var: "var",
608 np.nanvar: "var",
609 np.median: "median",
610 np.nanmedian: "median",
611 np.max: "max",
612 np.nanmax: "max",
613 np.min: "min",
614 np.nanmin: "min",
615 np.cumprod: "cumprod",
616 np.nancumprod: "cumprod",
617 np.cumsum: "cumsum",
618 np.nancumsum: "cumsum",
619}
620
621
622def get_cython_func(arg: Callable) -> str | None:
623 """
624 if we define an internal function for this argument, return it
625 """
626 return _cython_table.get(arg)
627
628
629def is_builtin_func(arg):
630 """
631 if we define a builtin function for this argument, return it,
632 otherwise return the arg
633 """
634 return _builtin_table.get(arg, arg)
635
636
637def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]:
638 """
639 If a name is missing then replace it by level_n, where n is the count
640
641 .. versionadded:: 1.4.0
642
643 Parameters
644 ----------
645 names : list-like
646 list of column names or None values.
647
648 Returns
649 -------
650 list
651 list of column names with the None values replaced.
652 """
653 return [f"level_{i}" if name is None else name for i, name in enumerate(names)]