1"""
2Misc tools for implementing data structures
3
4Note: pandas.core.common is *not* part of the public API.
5"""
6from __future__ import annotations
7
8import builtins
9from collections import (
10 abc,
11 defaultdict,
12)
13from collections.abc import (
14 Collection,
15 Generator,
16 Hashable,
17 Iterable,
18 Sequence,
19)
20import contextlib
21from functools import partial
22import inspect
23from typing import (
24 TYPE_CHECKING,
25 Any,
26 Callable,
27 cast,
28 overload,
29)
30import warnings
31
32import numpy as np
33
34from pandas._libs import lib
35from pandas.compat.numpy import np_version_gte1p24
36
37from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
38from pandas.core.dtypes.common import (
39 is_bool_dtype,
40 is_integer,
41)
42from pandas.core.dtypes.generic import (
43 ABCExtensionArray,
44 ABCIndex,
45 ABCMultiIndex,
46 ABCSeries,
47)
48from pandas.core.dtypes.inference import iterable_not_string
49
50if TYPE_CHECKING:
51 from pandas._typing import (
52 AnyArrayLike,
53 ArrayLike,
54 NpDtype,
55 RandomState,
56 T,
57 )
58
59 from pandas import Index
60
61
62def flatten(line):
63 """
64 Flatten an arbitrarily nested sequence.
65
66 Parameters
67 ----------
68 line : sequence
69 The non string sequence to flatten
70
71 Notes
72 -----
73 This doesn't consider strings sequences.
74
75 Returns
76 -------
77 flattened : generator
78 """
79 for element in line:
80 if iterable_not_string(element):
81 yield from flatten(element)
82 else:
83 yield element
84
85
86def consensus_name_attr(objs):
87 name = objs[0].name
88 for obj in objs[1:]:
89 try:
90 if obj.name != name:
91 name = None
92 except ValueError:
93 name = None
94 return name
95
96
97def is_bool_indexer(key: Any) -> bool:
98 """
99 Check whether `key` is a valid boolean indexer.
100
101 Parameters
102 ----------
103 key : Any
104 Only list-likes may be considered boolean indexers.
105 All other types are not considered a boolean indexer.
106 For array-like input, boolean ndarrays or ExtensionArrays
107 with ``_is_boolean`` set are considered boolean indexers.
108
109 Returns
110 -------
111 bool
112 Whether `key` is a valid boolean indexer.
113
114 Raises
115 ------
116 ValueError
117 When the array is an object-dtype ndarray or ExtensionArray
118 and contains missing values.
119
120 See Also
121 --------
122 check_array_indexer : Check that `key` is a valid array to index,
123 and convert to an ndarray.
124 """
125 if isinstance(
126 key, (ABCSeries, np.ndarray, ABCIndex, ABCExtensionArray)
127 ) and not isinstance(key, ABCMultiIndex):
128 if key.dtype == np.object_:
129 key_array = np.asarray(key)
130
131 if not lib.is_bool_array(key_array):
132 na_msg = "Cannot mask with non-boolean array containing NA / NaN values"
133 if lib.is_bool_array(key_array, skipna=True):
134 # Don't raise on e.g. ["A", "B", np.nan], see
135 # test_loc_getitem_list_of_labels_categoricalindex_with_na
136 raise ValueError(na_msg)
137 return False
138 return True
139 elif is_bool_dtype(key.dtype):
140 return True
141 elif isinstance(key, list):
142 # check if np.array(key).dtype would be bool
143 if len(key) > 0:
144 if type(key) is not list: # noqa: E721
145 # GH#42461 cython will raise TypeError if we pass a subclass
146 key = list(key)
147 return lib.is_bool_list(key)
148
149 return False
150
151
152def cast_scalar_indexer(val):
153 """
154 Disallow indexing with a float key, even if that key is a round number.
155
156 Parameters
157 ----------
158 val : scalar
159
160 Returns
161 -------
162 outval : scalar
163 """
164 # assumes lib.is_scalar(val)
165 if lib.is_float(val) and val.is_integer():
166 raise IndexError(
167 # GH#34193
168 "Indexing with a float is no longer supported. Manually convert "
169 "to an integer key instead."
170 )
171 return val
172
173
174def not_none(*args):
175 """
176 Returns a generator consisting of the arguments that are not None.
177 """
178 return (arg for arg in args if arg is not None)
179
180
181def any_none(*args) -> bool:
182 """
183 Returns a boolean indicating if any argument is None.
184 """
185 return any(arg is None for arg in args)
186
187
188def all_none(*args) -> bool:
189 """
190 Returns a boolean indicating if all arguments are None.
191 """
192 return all(arg is None for arg in args)
193
194
195def any_not_none(*args) -> bool:
196 """
197 Returns a boolean indicating if any argument is not None.
198 """
199 return any(arg is not None for arg in args)
200
201
202def all_not_none(*args) -> bool:
203 """
204 Returns a boolean indicating if all arguments are not None.
205 """
206 return all(arg is not None for arg in args)
207
208
209def count_not_none(*args) -> int:
210 """
211 Returns the count of arguments that are not None.
212 """
213 return sum(x is not None for x in args)
214
215
216@overload
217def asarray_tuplesafe(
218 values: ArrayLike | list | tuple | zip, dtype: NpDtype | None = ...
219) -> np.ndarray:
220 # ExtensionArray can only be returned when values is an Index, all other iterables
221 # will return np.ndarray. Unfortunately "all other" cannot be encoded in a type
222 # signature, so instead we special-case some common types.
223 ...
224
225
226@overload
227def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = ...) -> ArrayLike:
228 ...
229
230
231def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLike:
232 if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")):
233 values = list(values)
234 elif isinstance(values, ABCIndex):
235 return values._values
236 elif isinstance(values, ABCSeries):
237 return values._values
238
239 if isinstance(values, list) and dtype in [np.object_, object]:
240 return construct_1d_object_array_from_listlike(values)
241
242 try:
243 with warnings.catch_warnings():
244 # Can remove warning filter once NumPy 1.24 is min version
245 if not np_version_gte1p24:
246 warnings.simplefilter("ignore", np.VisibleDeprecationWarning)
247 result = np.asarray(values, dtype=dtype)
248 except ValueError:
249 # Using try/except since it's more performant than checking is_list_like
250 # over each element
251 # error: Argument 1 to "construct_1d_object_array_from_listlike"
252 # has incompatible type "Iterable[Any]"; expected "Sized"
253 return construct_1d_object_array_from_listlike(values) # type: ignore[arg-type]
254
255 if issubclass(result.dtype.type, str):
256 result = np.asarray(values, dtype=object)
257
258 if result.ndim == 2:
259 # Avoid building an array of arrays:
260 values = [tuple(x) for x in values]
261 result = construct_1d_object_array_from_listlike(values)
262
263 return result
264
265
266def index_labels_to_array(
267 labels: np.ndarray | Iterable, dtype: NpDtype | None = None
268) -> np.ndarray:
269 """
270 Transform label or iterable of labels to array, for use in Index.
271
272 Parameters
273 ----------
274 dtype : dtype
275 If specified, use as dtype of the resulting array, otherwise infer.
276
277 Returns
278 -------
279 array
280 """
281 if isinstance(labels, (str, tuple)):
282 labels = [labels]
283
284 if not isinstance(labels, (list, np.ndarray)):
285 try:
286 labels = list(labels)
287 except TypeError: # non-iterable
288 labels = [labels]
289
290 labels = asarray_tuplesafe(labels, dtype=dtype)
291
292 return labels
293
294
295def maybe_make_list(obj):
296 if obj is not None and not isinstance(obj, (tuple, list)):
297 return [obj]
298 return obj
299
300
301def maybe_iterable_to_list(obj: Iterable[T] | T) -> Collection[T] | T:
302 """
303 If obj is Iterable but not list-like, consume into list.
304 """
305 if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized):
306 return list(obj)
307 obj = cast(Collection, obj)
308 return obj
309
310
311def is_null_slice(obj) -> bool:
312 """
313 We have a null slice.
314 """
315 return (
316 isinstance(obj, slice)
317 and obj.start is None
318 and obj.stop is None
319 and obj.step is None
320 )
321
322
323def is_empty_slice(obj) -> bool:
324 """
325 We have an empty slice, e.g. no values are selected.
326 """
327 return (
328 isinstance(obj, slice)
329 and obj.start is not None
330 and obj.stop is not None
331 and obj.start == obj.stop
332 )
333
334
335def is_true_slices(line) -> list[bool]:
336 """
337 Find non-trivial slices in "line": return a list of booleans with same length.
338 """
339 return [isinstance(k, slice) and not is_null_slice(k) for k in line]
340
341
342# TODO: used only once in indexing; belongs elsewhere?
343def is_full_slice(obj, line: int) -> bool:
344 """
345 We have a full length slice.
346 """
347 return (
348 isinstance(obj, slice)
349 and obj.start == 0
350 and obj.stop == line
351 and obj.step is None
352 )
353
354
355def get_callable_name(obj):
356 # typical case has name
357 if hasattr(obj, "__name__"):
358 return getattr(obj, "__name__")
359 # some objects don't; could recurse
360 if isinstance(obj, partial):
361 return get_callable_name(obj.func)
362 # fall back to class name
363 if callable(obj):
364 return type(obj).__name__
365 # everything failed (probably because the argument
366 # wasn't actually callable); we return None
367 # instead of the empty string in this case to allow
368 # distinguishing between no name and a name of ''
369 return None
370
371
372def apply_if_callable(maybe_callable, obj, **kwargs):
373 """
374 Evaluate possibly callable input using obj and kwargs if it is callable,
375 otherwise return as it is.
376
377 Parameters
378 ----------
379 maybe_callable : possibly a callable
380 obj : NDFrame
381 **kwargs
382 """
383 if callable(maybe_callable):
384 return maybe_callable(obj, **kwargs)
385
386 return maybe_callable
387
388
389def standardize_mapping(into):
390 """
391 Helper function to standardize a supplied mapping.
392
393 Parameters
394 ----------
395 into : instance or subclass of collections.abc.Mapping
396 Must be a class, an initialized collections.defaultdict,
397 or an instance of a collections.abc.Mapping subclass.
398
399 Returns
400 -------
401 mapping : a collections.abc.Mapping subclass or other constructor
402 a callable object that can accept an iterator to create
403 the desired Mapping.
404
405 See Also
406 --------
407 DataFrame.to_dict
408 Series.to_dict
409 """
410 if not inspect.isclass(into):
411 if isinstance(into, defaultdict):
412 return partial(defaultdict, into.default_factory)
413 into = type(into)
414 if not issubclass(into, abc.Mapping):
415 raise TypeError(f"unsupported type: {into}")
416 if into == defaultdict:
417 raise TypeError("to_dict() only accepts initialized defaultdicts")
418 return into
419
420
421@overload
422def random_state(state: np.random.Generator) -> np.random.Generator:
423 ...
424
425
426@overload
427def random_state(
428 state: int | np.ndarray | np.random.BitGenerator | np.random.RandomState | None,
429) -> np.random.RandomState:
430 ...
431
432
433def random_state(state: RandomState | None = None):
434 """
435 Helper function for processing random_state arguments.
436
437 Parameters
438 ----------
439 state : int, array-like, BitGenerator, Generator, np.random.RandomState, None.
440 If receives an int, array-like, or BitGenerator, passes to
441 np.random.RandomState() as seed.
442 If receives an np.random RandomState or Generator, just returns that unchanged.
443 If receives `None`, returns np.random.
444 If receives anything else, raises an informative ValueError.
445
446 Default None.
447
448 Returns
449 -------
450 np.random.RandomState or np.random.Generator. If state is None, returns np.random
451
452 """
453 if is_integer(state) or isinstance(state, (np.ndarray, np.random.BitGenerator)):
454 return np.random.RandomState(state)
455 elif isinstance(state, np.random.RandomState):
456 return state
457 elif isinstance(state, np.random.Generator):
458 return state
459 elif state is None:
460 return np.random
461 else:
462 raise ValueError(
463 "random_state must be an integer, array-like, a BitGenerator, Generator, "
464 "a numpy RandomState, or None"
465 )
466
467
468def pipe(
469 obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs
470) -> T:
471 """
472 Apply a function ``func`` to object ``obj`` either by passing obj as the
473 first argument to the function or, in the case that the func is a tuple,
474 interpret the first element of the tuple as a function and pass the obj to
475 that function as a keyword argument whose key is the value of the second
476 element of the tuple.
477
478 Parameters
479 ----------
480 func : callable or tuple of (callable, str)
481 Function to apply to this object or, alternatively, a
482 ``(callable, data_keyword)`` tuple where ``data_keyword`` is a
483 string indicating the keyword of ``callable`` that expects the
484 object.
485 *args : iterable, optional
486 Positional arguments passed into ``func``.
487 **kwargs : dict, optional
488 A dictionary of keyword arguments passed into ``func``.
489
490 Returns
491 -------
492 object : the return type of ``func``.
493 """
494 if isinstance(func, tuple):
495 func, target = func
496 if target in kwargs:
497 msg = f"{target} is both the pipe target and a keyword argument"
498 raise ValueError(msg)
499 kwargs[target] = obj
500 return func(*args, **kwargs)
501 else:
502 return func(obj, *args, **kwargs)
503
504
505def get_rename_function(mapper):
506 """
507 Returns a function that will map names/labels, dependent if mapper
508 is a dict, Series or just a function.
509 """
510
511 def f(x):
512 if x in mapper:
513 return mapper[x]
514 else:
515 return x
516
517 return f if isinstance(mapper, (abc.Mapping, ABCSeries)) else mapper
518
519
520def convert_to_list_like(
521 values: Hashable | Iterable | AnyArrayLike,
522) -> list | AnyArrayLike:
523 """
524 Convert list-like or scalar input to list-like. List, numpy and pandas array-like
525 inputs are returned unmodified whereas others are converted to list.
526 """
527 if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)):
528 return values
529 elif isinstance(values, abc.Iterable) and not isinstance(values, str):
530 return list(values)
531
532 return [values]
533
534
535@contextlib.contextmanager
536def temp_setattr(
537 obj, attr: str, value, condition: bool = True
538) -> Generator[None, None, None]:
539 """
540 Temporarily set attribute on an object.
541
542 Parameters
543 ----------
544 obj : object
545 Object whose attribute will be modified.
546 attr : str
547 Attribute to modify.
548 value : Any
549 Value to temporarily set attribute to.
550 condition : bool, default True
551 Whether to set the attribute. Provided in order to not have to
552 conditionally use this context manager.
553
554 Yields
555 ------
556 object : obj with modified attribute.
557 """
558 if condition:
559 old_value = getattr(obj, attr)
560 setattr(obj, attr, value)
561 try:
562 yield obj
563 finally:
564 if condition:
565 setattr(obj, attr, old_value)
566
567
568def require_length_match(data, index: Index) -> None:
569 """
570 Check the length of data matches the length of the index.
571 """
572 if len(data) != len(index):
573 raise ValueError(
574 "Length of values "
575 f"({len(data)}) "
576 "does not match length of index "
577 f"({len(index)})"
578 )
579
580
581# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0,
582# whereas np.min and np.max (which directly call obj.min and obj.max)
583# default to axis=None.
584_builtin_table = {
585 builtins.sum: np.sum,
586 builtins.max: np.maximum.reduce,
587 builtins.min: np.minimum.reduce,
588}
589
590# GH#53425: Only for deprecation
591_builtin_table_alias = {
592 builtins.sum: "np.sum",
593 builtins.max: "np.maximum.reduce",
594 builtins.min: "np.minimum.reduce",
595}
596
597_cython_table = {
598 builtins.sum: "sum",
599 builtins.max: "max",
600 builtins.min: "min",
601 np.all: "all",
602 np.any: "any",
603 np.sum: "sum",
604 np.nansum: "sum",
605 np.mean: "mean",
606 np.nanmean: "mean",
607 np.prod: "prod",
608 np.nanprod: "prod",
609 np.std: "std",
610 np.nanstd: "std",
611 np.var: "var",
612 np.nanvar: "var",
613 np.median: "median",
614 np.nanmedian: "median",
615 np.max: "max",
616 np.nanmax: "max",
617 np.min: "min",
618 np.nanmin: "min",
619 np.cumprod: "cumprod",
620 np.nancumprod: "cumprod",
621 np.cumsum: "cumsum",
622 np.nancumsum: "cumsum",
623}
624
625
626def get_cython_func(arg: Callable) -> str | None:
627 """
628 if we define an internal function for this argument, return it
629 """
630 return _cython_table.get(arg)
631
632
633def is_builtin_func(arg):
634 """
635 if we define a builtin function for this argument, return it,
636 otherwise return the arg
637 """
638 return _builtin_table.get(arg, arg)
639
640
641def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]:
642 """
643 If a name is missing then replace it by level_n, where n is the count
644
645 .. versionadded:: 1.4.0
646
647 Parameters
648 ----------
649 names : list-like
650 list of column names or None values.
651
652 Returns
653 -------
654 list
655 list of column names with the None values replaced.
656 """
657 return [f"level_{i}" if name is None else name for i, name in enumerate(names)]