1"""
2Module that contains many useful utilities
3for validating data or function arguments
4"""
5from __future__ import annotations
6
7from typing import (
8 Iterable,
9 Sequence,
10 TypeVar,
11 overload,
12)
13
14import numpy as np
15
16from pandas._libs import lib
17
18from pandas.core.dtypes.common import (
19 is_bool,
20 is_integer,
21)
22
23BoolishT = TypeVar("BoolishT", bool, int)
24BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
25
26
27def _check_arg_length(fname, args, max_fname_arg_count, compat_args):
28 """
29 Checks whether 'args' has length of at most 'compat_args'. Raises
30 a TypeError if that is not the case, similar to in Python when a
31 function is called with too many arguments.
32 """
33 if max_fname_arg_count < 0:
34 raise ValueError("'max_fname_arg_count' must be non-negative")
35
36 if len(args) > len(compat_args):
37 max_arg_count = len(compat_args) + max_fname_arg_count
38 actual_arg_count = len(args) + max_fname_arg_count
39 argument = "argument" if max_arg_count == 1 else "arguments"
40
41 raise TypeError(
42 f"{fname}() takes at most {max_arg_count} {argument} "
43 f"({actual_arg_count} given)"
44 )
45
46
47def _check_for_default_values(fname, arg_val_dict, compat_args):
48 """
49 Check that the keys in `arg_val_dict` are mapped to their
50 default values as specified in `compat_args`.
51
52 Note that this function is to be called only when it has been
53 checked that arg_val_dict.keys() is a subset of compat_args
54 """
55 for key in arg_val_dict:
56 # try checking equality directly with '=' operator,
57 # as comparison may have been overridden for the left
58 # hand object
59 try:
60 v1 = arg_val_dict[key]
61 v2 = compat_args[key]
62
63 # check for None-ness otherwise we could end up
64 # comparing a numpy array vs None
65 if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
66 match = False
67 else:
68 match = v1 == v2
69
70 if not is_bool(match):
71 raise ValueError("'match' is not a boolean")
72
73 # could not compare them directly, so try comparison
74 # using the 'is' operator
75 except ValueError:
76 match = arg_val_dict[key] is compat_args[key]
77
78 if not match:
79 raise ValueError(
80 f"the '{key}' parameter is not supported in "
81 f"the pandas implementation of {fname}()"
82 )
83
84
85def validate_args(fname, args, max_fname_arg_count, compat_args) -> None:
86 """
87 Checks whether the length of the `*args` argument passed into a function
88 has at most `len(compat_args)` arguments and whether or not all of these
89 elements in `args` are set to their default values.
90
91 Parameters
92 ----------
93 fname : str
94 The name of the function being passed the `*args` parameter
95 args : tuple
96 The `*args` parameter passed into a function
97 max_fname_arg_count : int
98 The maximum number of arguments that the function `fname`
99 can accept, excluding those in `args`. Used for displaying
100 appropriate error messages. Must be non-negative.
101 compat_args : dict
102 A dictionary of keys and their associated default values.
103 In order to accommodate buggy behaviour in some versions of `numpy`,
104 where a signature displayed keyword arguments but then passed those
105 arguments **positionally** internally when calling downstream
106 implementations, a dict ensures that the original
107 order of the keyword arguments is enforced.
108
109 Raises
110 ------
111 TypeError
112 If `args` contains more values than there are `compat_args`
113 ValueError
114 If `args` contains values that do not correspond to those
115 of the default values specified in `compat_args`
116 """
117 _check_arg_length(fname, args, max_fname_arg_count, compat_args)
118
119 # We do this so that we can provide a more informative
120 # error message about the parameters that we are not
121 # supporting in the pandas implementation of 'fname'
122 kwargs = dict(zip(compat_args, args))
123 _check_for_default_values(fname, kwargs, compat_args)
124
125
126def _check_for_invalid_keys(fname, kwargs, compat_args):
127 """
128 Checks whether 'kwargs' contains any keys that are not
129 in 'compat_args' and raises a TypeError if there is one.
130 """
131 # set(dict) --> set of the dictionary's keys
132 diff = set(kwargs) - set(compat_args)
133
134 if diff:
135 bad_arg = list(diff)[0]
136 raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
137
138
139def validate_kwargs(fname, kwargs, compat_args) -> None:
140 """
141 Checks whether parameters passed to the **kwargs argument in a
142 function `fname` are valid parameters as specified in `*compat_args`
143 and whether or not they are set to their default values.
144
145 Parameters
146 ----------
147 fname : str
148 The name of the function being passed the `**kwargs` parameter
149 kwargs : dict
150 The `**kwargs` parameter passed into `fname`
151 compat_args: dict
152 A dictionary of keys that `kwargs` is allowed to have and their
153 associated default values
154
155 Raises
156 ------
157 TypeError if `kwargs` contains keys not in `compat_args`
158 ValueError if `kwargs` contains keys in `compat_args` that do not
159 map to the default values specified in `compat_args`
160 """
161 kwds = kwargs.copy()
162 _check_for_invalid_keys(fname, kwargs, compat_args)
163 _check_for_default_values(fname, kwds, compat_args)
164
165
166def validate_args_and_kwargs(
167 fname, args, kwargs, max_fname_arg_count, compat_args
168) -> None:
169 """
170 Checks whether parameters passed to the *args and **kwargs argument in a
171 function `fname` are valid parameters as specified in `*compat_args`
172 and whether or not they are set to their default values.
173
174 Parameters
175 ----------
176 fname: str
177 The name of the function being passed the `**kwargs` parameter
178 args: tuple
179 The `*args` parameter passed into a function
180 kwargs: dict
181 The `**kwargs` parameter passed into `fname`
182 max_fname_arg_count: int
183 The minimum number of arguments that the function `fname`
184 requires, excluding those in `args`. Used for displaying
185 appropriate error messages. Must be non-negative.
186 compat_args: dict
187 A dictionary of keys that `kwargs` is allowed to
188 have and their associated default values.
189
190 Raises
191 ------
192 TypeError if `args` contains more values than there are
193 `compat_args` OR `kwargs` contains keys not in `compat_args`
194 ValueError if `args` contains values not at the default value (`None`)
195 `kwargs` contains keys in `compat_args` that do not map to the default
196 value as specified in `compat_args`
197
198 See Also
199 --------
200 validate_args : Purely args validation.
201 validate_kwargs : Purely kwargs validation.
202
203 """
204 # Check that the total number of arguments passed in (i.e.
205 # args and kwargs) does not exceed the length of compat_args
206 _check_arg_length(
207 fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
208 )
209
210 # Check there is no overlap with the positional and keyword
211 # arguments, similar to what is done in actual Python functions
212 args_dict = dict(zip(compat_args, args))
213
214 for key in args_dict:
215 if key in kwargs:
216 raise TypeError(
217 f"{fname}() got multiple values for keyword argument '{key}'"
218 )
219
220 kwargs.update(args_dict)
221 validate_kwargs(fname, kwargs, compat_args)
222
223
224def validate_bool_kwarg(
225 value: BoolishNoneT, arg_name, none_allowed: bool = True, int_allowed: bool = False
226) -> BoolishNoneT:
227 """
228 Ensure that argument passed in arg_name can be interpreted as boolean.
229
230 Parameters
231 ----------
232 value : bool
233 Value to be validated.
234 arg_name : str
235 Name of the argument. To be reflected in the error message.
236 none_allowed : bool, default True
237 Whether to consider None to be a valid boolean.
238 int_allowed : bool, default False
239 Whether to consider integer value to be a valid boolean.
240
241 Returns
242 -------
243 value
244 The same value as input.
245
246 Raises
247 ------
248 ValueError
249 If the value is not a valid boolean.
250 """
251 good_value = is_bool(value)
252 if none_allowed:
253 good_value = good_value or value is None
254
255 if int_allowed:
256 good_value = good_value or isinstance(value, int)
257
258 if not good_value:
259 raise ValueError(
260 f'For argument "{arg_name}" expected type bool, received '
261 f"type {type(value).__name__}."
262 )
263 return value
264
265
266def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
267 """
268 Validate the keyword arguments to 'fillna'.
269
270 This checks that exactly one of 'value' and 'method' is specified.
271 If 'method' is specified, this validates that it's a valid method.
272
273 Parameters
274 ----------
275 value, method : object
276 The 'value' and 'method' keyword arguments for 'fillna'.
277 validate_scalar_dict_value : bool, default True
278 Whether to validate that 'value' is a scalar or dict. Specifically,
279 validate that it is not a list or tuple.
280
281 Returns
282 -------
283 value, method : object
284 """
285 from pandas.core.missing import clean_fill_method
286
287 if value is None and method is None:
288 raise ValueError("Must specify a fill 'value' or 'method'.")
289 if value is None and method is not None:
290 method = clean_fill_method(method)
291
292 elif value is not None and method is None:
293 if validate_scalar_dict_value and isinstance(value, (list, tuple)):
294 raise TypeError(
295 '"value" parameter must be a scalar or dict, but '
296 f'you passed a "{type(value).__name__}"'
297 )
298
299 elif value is not None and method is not None:
300 raise ValueError("Cannot specify both 'value' and 'method'.")
301
302 return value, method
303
304
305def validate_percentile(q: float | Iterable[float]) -> np.ndarray:
306 """
307 Validate percentiles (used by describe and quantile).
308
309 This function checks if the given float or iterable of floats is a valid percentile
310 otherwise raises a ValueError.
311
312 Parameters
313 ----------
314 q: float or iterable of floats
315 A single percentile or an iterable of percentiles.
316
317 Returns
318 -------
319 ndarray
320 An ndarray of the percentiles if valid.
321
322 Raises
323 ------
324 ValueError if percentiles are not in given interval([0, 1]).
325 """
326 q_arr = np.asarray(q)
327 # Don't change this to an f-string. The string formatting
328 # is too expensive for cases where we don't need it.
329 msg = "percentiles should all be in the interval [0, 1]. Try {} instead."
330 if q_arr.ndim == 0:
331 if not 0 <= q_arr <= 1:
332 raise ValueError(msg.format(q_arr / 100.0))
333 else:
334 if not all(0 <= qs <= 1 for qs in q_arr):
335 raise ValueError(msg.format(q_arr / 100.0))
336 return q_arr
337
338
339@overload
340def validate_ascending(ascending: BoolishT) -> BoolishT:
341 ...
342
343
344@overload
345def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]:
346 ...
347
348
349def validate_ascending(
350 ascending: bool | int | Sequence[BoolishT],
351) -> bool | int | list[BoolishT]:
352 """Validate ``ascending`` kwargs for ``sort_index`` method."""
353 kwargs = {"none_allowed": False, "int_allowed": True}
354 if not isinstance(ascending, Sequence):
355 return validate_bool_kwarg(ascending, "ascending", **kwargs)
356
357 return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]
358
359
360def validate_endpoints(closed: str | None) -> tuple[bool, bool]:
361 """
362 Check that the `closed` argument is among [None, "left", "right"]
363
364 Parameters
365 ----------
366 closed : {None, "left", "right"}
367
368 Returns
369 -------
370 left_closed : bool
371 right_closed : bool
372
373 Raises
374 ------
375 ValueError : if argument is not among valid values
376 """
377 left_closed = False
378 right_closed = False
379
380 if closed is None:
381 left_closed = True
382 right_closed = True
383 elif closed == "left":
384 left_closed = True
385 elif closed == "right":
386 right_closed = True
387 else:
388 raise ValueError("Closed has to be either 'left', 'right' or None")
389
390 return left_closed, right_closed
391
392
393def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]:
394 """
395 Check that the `inclusive` argument is among {"both", "neither", "left", "right"}.
396
397 Parameters
398 ----------
399 inclusive : {"both", "neither", "left", "right"}
400
401 Returns
402 -------
403 left_right_inclusive : tuple[bool, bool]
404
405 Raises
406 ------
407 ValueError : if argument is not among valid values
408 """
409 left_right_inclusive: tuple[bool, bool] | None = None
410
411 if isinstance(inclusive, str):
412 left_right_inclusive = {
413 "both": (True, True),
414 "left": (True, False),
415 "right": (False, True),
416 "neither": (False, False),
417 }.get(inclusive)
418
419 if left_right_inclusive is None:
420 raise ValueError(
421 "Inclusive has to be either 'both', 'neither', 'left' or 'right'"
422 )
423
424 return left_right_inclusive
425
426
427def validate_insert_loc(loc: int, length: int) -> int:
428 """
429 Check that we have an integer between -length and length, inclusive.
430
431 Standardize negative loc to within [0, length].
432
433 The exceptions we raise on failure match np.insert.
434 """
435 if not is_integer(loc):
436 raise TypeError(f"loc must be an integer between -{length} and {length}")
437
438 if loc < 0:
439 loc += length
440 if not 0 <= loc <= length:
441 raise IndexError(f"loc must be an integer between -{length} and {length}")
442 return loc
443
444
445def check_dtype_backend(dtype_backend) -> None:
446 if dtype_backend is not lib.no_default:
447 if dtype_backend not in ["numpy_nullable", "pyarrow"]:
448 raise ValueError(
449 f"dtype_backend {dtype_backend} is invalid, only 'numpy_nullable' and "
450 f"'pyarrow' are allowed.",
451 )