1"""
2Module that contains many useful utilities
3for validating data or function arguments
4"""
5from __future__ import annotations
6
7from collections.abc import (
8 Iterable,
9 Sequence,
10)
11from typing import (
12 TypeVar,
13 overload,
14)
15
16import numpy as np
17
18from pandas._libs import lib
19
20from pandas.core.dtypes.common import (
21 is_bool,
22 is_integer,
23)
24
25BoolishT = TypeVar("BoolishT", bool, int)
26BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
27
28
29def _check_arg_length(fname, args, max_fname_arg_count, compat_args) -> None:
30 """
31 Checks whether 'args' has length of at most 'compat_args'. Raises
32 a TypeError if that is not the case, similar to in Python when a
33 function is called with too many arguments.
34 """
35 if max_fname_arg_count < 0:
36 raise ValueError("'max_fname_arg_count' must be non-negative")
37
38 if len(args) > len(compat_args):
39 max_arg_count = len(compat_args) + max_fname_arg_count
40 actual_arg_count = len(args) + max_fname_arg_count
41 argument = "argument" if max_arg_count == 1 else "arguments"
42
43 raise TypeError(
44 f"{fname}() takes at most {max_arg_count} {argument} "
45 f"({actual_arg_count} given)"
46 )
47
48
49def _check_for_default_values(fname, arg_val_dict, compat_args) -> None:
50 """
51 Check that the keys in `arg_val_dict` are mapped to their
52 default values as specified in `compat_args`.
53
54 Note that this function is to be called only when it has been
55 checked that arg_val_dict.keys() is a subset of compat_args
56 """
57 for key in arg_val_dict:
58 # try checking equality directly with '=' operator,
59 # as comparison may have been overridden for the left
60 # hand object
61 try:
62 v1 = arg_val_dict[key]
63 v2 = compat_args[key]
64
65 # check for None-ness otherwise we could end up
66 # comparing a numpy array vs None
67 if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
68 match = False
69 else:
70 match = v1 == v2
71
72 if not is_bool(match):
73 raise ValueError("'match' is not a boolean")
74
75 # could not compare them directly, so try comparison
76 # using the 'is' operator
77 except ValueError:
78 match = arg_val_dict[key] is compat_args[key]
79
80 if not match:
81 raise ValueError(
82 f"the '{key}' parameter is not supported in "
83 f"the pandas implementation of {fname}()"
84 )
85
86
87def validate_args(fname, args, max_fname_arg_count, compat_args) -> None:
88 """
89 Checks whether the length of the `*args` argument passed into a function
90 has at most `len(compat_args)` arguments and whether or not all of these
91 elements in `args` are set to their default values.
92
93 Parameters
94 ----------
95 fname : str
96 The name of the function being passed the `*args` parameter
97 args : tuple
98 The `*args` parameter passed into a function
99 max_fname_arg_count : int
100 The maximum number of arguments that the function `fname`
101 can accept, excluding those in `args`. Used for displaying
102 appropriate error messages. Must be non-negative.
103 compat_args : dict
104 A dictionary of keys and their associated default values.
105 In order to accommodate buggy behaviour in some versions of `numpy`,
106 where a signature displayed keyword arguments but then passed those
107 arguments **positionally** internally when calling downstream
108 implementations, a dict ensures that the original
109 order of the keyword arguments is enforced.
110
111 Raises
112 ------
113 TypeError
114 If `args` contains more values than there are `compat_args`
115 ValueError
116 If `args` contains values that do not correspond to those
117 of the default values specified in `compat_args`
118 """
119 _check_arg_length(fname, args, max_fname_arg_count, compat_args)
120
121 # We do this so that we can provide a more informative
122 # error message about the parameters that we are not
123 # supporting in the pandas implementation of 'fname'
124 kwargs = dict(zip(compat_args, args))
125 _check_for_default_values(fname, kwargs, compat_args)
126
127
128def _check_for_invalid_keys(fname, kwargs, compat_args) -> None:
129 """
130 Checks whether 'kwargs' contains any keys that are not
131 in 'compat_args' and raises a TypeError if there is one.
132 """
133 # set(dict) --> set of the dictionary's keys
134 diff = set(kwargs) - set(compat_args)
135
136 if diff:
137 bad_arg = next(iter(diff))
138 raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
139
140
141def validate_kwargs(fname, kwargs, compat_args) -> None:
142 """
143 Checks whether parameters passed to the **kwargs argument in a
144 function `fname` are valid parameters as specified in `*compat_args`
145 and whether or not they are set to their default values.
146
147 Parameters
148 ----------
149 fname : str
150 The name of the function being passed the `**kwargs` parameter
151 kwargs : dict
152 The `**kwargs` parameter passed into `fname`
153 compat_args: dict
154 A dictionary of keys that `kwargs` is allowed to have and their
155 associated default values
156
157 Raises
158 ------
159 TypeError if `kwargs` contains keys not in `compat_args`
160 ValueError if `kwargs` contains keys in `compat_args` that do not
161 map to the default values specified in `compat_args`
162 """
163 kwds = kwargs.copy()
164 _check_for_invalid_keys(fname, kwargs, compat_args)
165 _check_for_default_values(fname, kwds, compat_args)
166
167
168def validate_args_and_kwargs(
169 fname, args, kwargs, max_fname_arg_count, compat_args
170) -> None:
171 """
172 Checks whether parameters passed to the *args and **kwargs argument in a
173 function `fname` are valid parameters as specified in `*compat_args`
174 and whether or not they are set to their default values.
175
176 Parameters
177 ----------
178 fname: str
179 The name of the function being passed the `**kwargs` parameter
180 args: tuple
181 The `*args` parameter passed into a function
182 kwargs: dict
183 The `**kwargs` parameter passed into `fname`
184 max_fname_arg_count: int
185 The minimum number of arguments that the function `fname`
186 requires, excluding those in `args`. Used for displaying
187 appropriate error messages. Must be non-negative.
188 compat_args: dict
189 A dictionary of keys that `kwargs` is allowed to
190 have and their associated default values.
191
192 Raises
193 ------
194 TypeError if `args` contains more values than there are
195 `compat_args` OR `kwargs` contains keys not in `compat_args`
196 ValueError if `args` contains values not at the default value (`None`)
197 `kwargs` contains keys in `compat_args` that do not map to the default
198 value as specified in `compat_args`
199
200 See Also
201 --------
202 validate_args : Purely args validation.
203 validate_kwargs : Purely kwargs validation.
204
205 """
206 # Check that the total number of arguments passed in (i.e.
207 # args and kwargs) does not exceed the length of compat_args
208 _check_arg_length(
209 fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
210 )
211
212 # Check there is no overlap with the positional and keyword
213 # arguments, similar to what is done in actual Python functions
214 args_dict = dict(zip(compat_args, args))
215
216 for key in args_dict:
217 if key in kwargs:
218 raise TypeError(
219 f"{fname}() got multiple values for keyword argument '{key}'"
220 )
221
222 kwargs.update(args_dict)
223 validate_kwargs(fname, kwargs, compat_args)
224
225
226def validate_bool_kwarg(
227 value: BoolishNoneT,
228 arg_name: str,
229 none_allowed: bool = True,
230 int_allowed: bool = False,
231) -> BoolishNoneT:
232 """
233 Ensure that argument passed in arg_name can be interpreted as boolean.
234
235 Parameters
236 ----------
237 value : bool
238 Value to be validated.
239 arg_name : str
240 Name of the argument. To be reflected in the error message.
241 none_allowed : bool, default True
242 Whether to consider None to be a valid boolean.
243 int_allowed : bool, default False
244 Whether to consider integer value to be a valid boolean.
245
246 Returns
247 -------
248 value
249 The same value as input.
250
251 Raises
252 ------
253 ValueError
254 If the value is not a valid boolean.
255 """
256 good_value = is_bool(value)
257 if none_allowed:
258 good_value = good_value or (value is None)
259
260 if int_allowed:
261 good_value = good_value or isinstance(value, int)
262
263 if not good_value:
264 raise ValueError(
265 f'For argument "{arg_name}" expected type bool, received '
266 f"type {type(value).__name__}."
267 )
268 return value # pyright: ignore[reportGeneralTypeIssues]
269
270
271def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
272 """
273 Validate the keyword arguments to 'fillna'.
274
275 This checks that exactly one of 'value' and 'method' is specified.
276 If 'method' is specified, this validates that it's a valid method.
277
278 Parameters
279 ----------
280 value, method : object
281 The 'value' and 'method' keyword arguments for 'fillna'.
282 validate_scalar_dict_value : bool, default True
283 Whether to validate that 'value' is a scalar or dict. Specifically,
284 validate that it is not a list or tuple.
285
286 Returns
287 -------
288 value, method : object
289 """
290 from pandas.core.missing import clean_fill_method
291
292 if value is None and method is None:
293 raise ValueError("Must specify a fill 'value' or 'method'.")
294 if value is None and method is not None:
295 method = clean_fill_method(method)
296
297 elif value is not None and method is None:
298 if validate_scalar_dict_value and isinstance(value, (list, tuple)):
299 raise TypeError(
300 '"value" parameter must be a scalar or dict, but '
301 f'you passed a "{type(value).__name__}"'
302 )
303
304 elif value is not None and method is not None:
305 raise ValueError("Cannot specify both 'value' and 'method'.")
306
307 return value, method
308
309
310def validate_percentile(q: float | Iterable[float]) -> np.ndarray:
311 """
312 Validate percentiles (used by describe and quantile).
313
314 This function checks if the given float or iterable of floats is a valid percentile
315 otherwise raises a ValueError.
316
317 Parameters
318 ----------
319 q: float or iterable of floats
320 A single percentile or an iterable of percentiles.
321
322 Returns
323 -------
324 ndarray
325 An ndarray of the percentiles if valid.
326
327 Raises
328 ------
329 ValueError if percentiles are not in given interval([0, 1]).
330 """
331 q_arr = np.asarray(q)
332 # Don't change this to an f-string. The string formatting
333 # is too expensive for cases where we don't need it.
334 msg = "percentiles should all be in the interval [0, 1]"
335 if q_arr.ndim == 0:
336 if not 0 <= q_arr <= 1:
337 raise ValueError(msg)
338 else:
339 if not all(0 <= qs <= 1 for qs in q_arr):
340 raise ValueError(msg)
341 return q_arr
342
343
344@overload
345def validate_ascending(ascending: BoolishT) -> BoolishT:
346 ...
347
348
349@overload
350def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]:
351 ...
352
353
354def validate_ascending(
355 ascending: bool | int | Sequence[BoolishT],
356) -> bool | int | list[BoolishT]:
357 """Validate ``ascending`` kwargs for ``sort_index`` method."""
358 kwargs = {"none_allowed": False, "int_allowed": True}
359 if not isinstance(ascending, Sequence):
360 return validate_bool_kwarg(ascending, "ascending", **kwargs)
361
362 return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]
363
364
365def validate_endpoints(closed: str | None) -> tuple[bool, bool]:
366 """
367 Check that the `closed` argument is among [None, "left", "right"]
368
369 Parameters
370 ----------
371 closed : {None, "left", "right"}
372
373 Returns
374 -------
375 left_closed : bool
376 right_closed : bool
377
378 Raises
379 ------
380 ValueError : if argument is not among valid values
381 """
382 left_closed = False
383 right_closed = False
384
385 if closed is None:
386 left_closed = True
387 right_closed = True
388 elif closed == "left":
389 left_closed = True
390 elif closed == "right":
391 right_closed = True
392 else:
393 raise ValueError("Closed has to be either 'left', 'right' or None")
394
395 return left_closed, right_closed
396
397
398def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]:
399 """
400 Check that the `inclusive` argument is among {"both", "neither", "left", "right"}.
401
402 Parameters
403 ----------
404 inclusive : {"both", "neither", "left", "right"}
405
406 Returns
407 -------
408 left_right_inclusive : tuple[bool, bool]
409
410 Raises
411 ------
412 ValueError : if argument is not among valid values
413 """
414 left_right_inclusive: tuple[bool, bool] | None = None
415
416 if isinstance(inclusive, str):
417 left_right_inclusive = {
418 "both": (True, True),
419 "left": (True, False),
420 "right": (False, True),
421 "neither": (False, False),
422 }.get(inclusive)
423
424 if left_right_inclusive is None:
425 raise ValueError(
426 "Inclusive has to be either 'both', 'neither', 'left' or 'right'"
427 )
428
429 return left_right_inclusive
430
431
432def validate_insert_loc(loc: int, length: int) -> int:
433 """
434 Check that we have an integer between -length and length, inclusive.
435
436 Standardize negative loc to within [0, length].
437
438 The exceptions we raise on failure match np.insert.
439 """
440 if not is_integer(loc):
441 raise TypeError(f"loc must be an integer between -{length} and {length}")
442
443 if loc < 0:
444 loc += length
445 if not 0 <= loc <= length:
446 raise IndexError(f"loc must be an integer between -{length} and {length}")
447 return loc # pyright: ignore[reportGeneralTypeIssues]
448
449
450def check_dtype_backend(dtype_backend) -> None:
451 if dtype_backend is not lib.no_default:
452 if dtype_backend not in ["numpy_nullable", "pyarrow"]:
453 raise ValueError(
454 f"dtype_backend {dtype_backend} is invalid, only 'numpy_nullable' and "
455 f"'pyarrow' are allowed.",
456 )