1"""
2For compatibility with numpy libraries, pandas functions or methods have to
3accept '*args' and '**kwargs' parameters to accommodate numpy arguments that
4are not actually used or respected in the pandas implementation.
5
6To ensure that users do not abuse these parameters, validation is performed in
7'validators.py' to make sure that any extra parameters passed correspond ONLY
8to those in the numpy signature. Part of that validation includes whether or
9not the user attempted to pass in non-default values for these extraneous
10parameters. As we want to discourage users from relying on these parameters
11when calling the pandas implementation, we want them only to pass in the
12default values for these parameters.
13
14This module provides a set of commonly used default arguments for functions and
15methods that are spread throughout the codebase. This module will make it
16easier to adjust to future upstream changes in the analogous numpy signatures.
17"""
18from __future__ import annotations
19
20from typing import (
21 Any,
22 TypeVar,
23 cast,
24 overload,
25)
26
27from numpy import ndarray
28
29from pandas._libs.lib import (
30 is_bool,
31 is_integer,
32)
33from pandas._typing import (
34 Axis,
35 AxisInt,
36)
37from pandas.errors import UnsupportedFunctionCall
38from pandas.util._validators import (
39 validate_args,
40 validate_args_and_kwargs,
41 validate_kwargs,
42)
43
44AxisNoneT = TypeVar("AxisNoneT", Axis, None)
45
46
47class CompatValidator:
48 def __init__(
49 self,
50 defaults,
51 fname=None,
52 method: str | None = None,
53 max_fname_arg_count=None,
54 ) -> None:
55 self.fname = fname
56 self.method = method
57 self.defaults = defaults
58 self.max_fname_arg_count = max_fname_arg_count
59
60 def __call__(
61 self,
62 args,
63 kwargs,
64 fname=None,
65 max_fname_arg_count=None,
66 method: str | None = None,
67 ) -> None:
68 if args or kwargs:
69 fname = self.fname if fname is None else fname
70 max_fname_arg_count = (
71 self.max_fname_arg_count
72 if max_fname_arg_count is None
73 else max_fname_arg_count
74 )
75 method = self.method if method is None else method
76
77 if method == "args":
78 validate_args(fname, args, max_fname_arg_count, self.defaults)
79 elif method == "kwargs":
80 validate_kwargs(fname, kwargs, self.defaults)
81 elif method == "both":
82 validate_args_and_kwargs(
83 fname, args, kwargs, max_fname_arg_count, self.defaults
84 )
85 else:
86 raise ValueError(f"invalid validation method '{method}'")
87
88
89ARGMINMAX_DEFAULTS = {"out": None}
90validate_argmin = CompatValidator(
91 ARGMINMAX_DEFAULTS, fname="argmin", method="both", max_fname_arg_count=1
92)
93validate_argmax = CompatValidator(
94 ARGMINMAX_DEFAULTS, fname="argmax", method="both", max_fname_arg_count=1
95)
96
97
98def process_skipna(skipna: bool | ndarray | None, args) -> tuple[bool, Any]:
99 if isinstance(skipna, ndarray) or skipna is None:
100 args = (skipna,) + args
101 skipna = True
102
103 return skipna, args
104
105
106def validate_argmin_with_skipna(skipna: bool | ndarray | None, args, kwargs) -> bool:
107 """
108 If 'Series.argmin' is called via the 'numpy' library, the third parameter
109 in its signature is 'out', which takes either an ndarray or 'None', so
110 check if the 'skipna' parameter is either an instance of ndarray or is
111 None, since 'skipna' itself should be a boolean
112 """
113 skipna, args = process_skipna(skipna, args)
114 validate_argmin(args, kwargs)
115 return skipna
116
117
118def validate_argmax_with_skipna(skipna: bool | ndarray | None, args, kwargs) -> bool:
119 """
120 If 'Series.argmax' is called via the 'numpy' library, the third parameter
121 in its signature is 'out', which takes either an ndarray or 'None', so
122 check if the 'skipna' parameter is either an instance of ndarray or is
123 None, since 'skipna' itself should be a boolean
124 """
125 skipna, args = process_skipna(skipna, args)
126 validate_argmax(args, kwargs)
127 return skipna
128
129
130ARGSORT_DEFAULTS: dict[str, int | str | None] = {}
131ARGSORT_DEFAULTS["axis"] = -1
132ARGSORT_DEFAULTS["kind"] = "quicksort"
133ARGSORT_DEFAULTS["order"] = None
134ARGSORT_DEFAULTS["kind"] = None
135
136
137validate_argsort = CompatValidator(
138 ARGSORT_DEFAULTS, fname="argsort", max_fname_arg_count=0, method="both"
139)
140
141# two different signatures of argsort, this second validation for when the
142# `kind` param is supported
143ARGSORT_DEFAULTS_KIND: dict[str, int | None] = {}
144ARGSORT_DEFAULTS_KIND["axis"] = -1
145ARGSORT_DEFAULTS_KIND["order"] = None
146validate_argsort_kind = CompatValidator(
147 ARGSORT_DEFAULTS_KIND, fname="argsort", max_fname_arg_count=0, method="both"
148)
149
150
151def validate_argsort_with_ascending(ascending: bool | int | None, args, kwargs) -> bool:
152 """
153 If 'Categorical.argsort' is called via the 'numpy' library, the first
154 parameter in its signature is 'axis', which takes either an integer or
155 'None', so check if the 'ascending' parameter has either integer type or is
156 None, since 'ascending' itself should be a boolean
157 """
158 if is_integer(ascending) or ascending is None:
159 args = (ascending,) + args
160 ascending = True
161
162 validate_argsort_kind(args, kwargs, max_fname_arg_count=3)
163 ascending = cast(bool, ascending)
164 return ascending
165
166
167CLIP_DEFAULTS: dict[str, Any] = {"out": None}
168validate_clip = CompatValidator(
169 CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3
170)
171
172
173@overload
174def validate_clip_with_axis(axis: ndarray, args, kwargs) -> None:
175 ...
176
177
178@overload
179def validate_clip_with_axis(axis: AxisNoneT, args, kwargs) -> AxisNoneT:
180 ...
181
182
183def validate_clip_with_axis(
184 axis: ndarray | AxisNoneT, args, kwargs
185) -> AxisNoneT | None:
186 """
187 If 'NDFrame.clip' is called via the numpy library, the third parameter in
188 its signature is 'out', which can takes an ndarray, so check if the 'axis'
189 parameter is an instance of ndarray, since 'axis' itself should either be
190 an integer or None
191 """
192 if isinstance(axis, ndarray):
193 args = (axis,) + args
194 # error: Incompatible types in assignment (expression has type "None",
195 # variable has type "Union[ndarray[Any, Any], str, int]")
196 axis = None # type: ignore[assignment]
197
198 validate_clip(args, kwargs)
199 # error: Incompatible return value type (got "Union[ndarray[Any, Any],
200 # str, int]", expected "Union[str, int, None]")
201 return axis # type: ignore[return-value]
202
203
204CUM_FUNC_DEFAULTS: dict[str, Any] = {}
205CUM_FUNC_DEFAULTS["dtype"] = None
206CUM_FUNC_DEFAULTS["out"] = None
207validate_cum_func = CompatValidator(
208 CUM_FUNC_DEFAULTS, method="both", max_fname_arg_count=1
209)
210validate_cumsum = CompatValidator(
211 CUM_FUNC_DEFAULTS, fname="cumsum", method="both", max_fname_arg_count=1
212)
213
214
215def validate_cum_func_with_skipna(skipna, args, kwargs, name) -> bool:
216 """
217 If this function is called via the 'numpy' library, the third parameter in
218 its signature is 'dtype', which takes either a 'numpy' dtype or 'None', so
219 check if the 'skipna' parameter is a boolean or not
220 """
221 if not is_bool(skipna):
222 args = (skipna,) + args
223 skipna = True
224
225 validate_cum_func(args, kwargs, fname=name)
226 return skipna
227
228
229ALLANY_DEFAULTS: dict[str, bool | None] = {}
230ALLANY_DEFAULTS["dtype"] = None
231ALLANY_DEFAULTS["out"] = None
232ALLANY_DEFAULTS["keepdims"] = False
233ALLANY_DEFAULTS["axis"] = None
234validate_all = CompatValidator(
235 ALLANY_DEFAULTS, fname="all", method="both", max_fname_arg_count=1
236)
237validate_any = CompatValidator(
238 ALLANY_DEFAULTS, fname="any", method="both", max_fname_arg_count=1
239)
240
241LOGICAL_FUNC_DEFAULTS = {"out": None, "keepdims": False}
242validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs")
243
244MINMAX_DEFAULTS = {"axis": None, "out": None, "keepdims": False}
245validate_min = CompatValidator(
246 MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1
247)
248validate_max = CompatValidator(
249 MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1
250)
251
252RESHAPE_DEFAULTS: dict[str, str] = {"order": "C"}
253validate_reshape = CompatValidator(
254 RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1
255)
256
257REPEAT_DEFAULTS: dict[str, Any] = {"axis": None}
258validate_repeat = CompatValidator(
259 REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1
260)
261
262ROUND_DEFAULTS: dict[str, Any] = {"out": None}
263validate_round = CompatValidator(
264 ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1
265)
266
267SORT_DEFAULTS: dict[str, int | str | None] = {}
268SORT_DEFAULTS["axis"] = -1
269SORT_DEFAULTS["kind"] = "quicksort"
270SORT_DEFAULTS["order"] = None
271validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs")
272
273STAT_FUNC_DEFAULTS: dict[str, Any | None] = {}
274STAT_FUNC_DEFAULTS["dtype"] = None
275STAT_FUNC_DEFAULTS["out"] = None
276
277SUM_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
278SUM_DEFAULTS["axis"] = None
279SUM_DEFAULTS["keepdims"] = False
280SUM_DEFAULTS["initial"] = None
281
282PROD_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
283PROD_DEFAULTS["axis"] = None
284PROD_DEFAULTS["keepdims"] = False
285PROD_DEFAULTS["initial"] = None
286
287MEDIAN_DEFAULTS = STAT_FUNC_DEFAULTS.copy()
288MEDIAN_DEFAULTS["overwrite_input"] = False
289MEDIAN_DEFAULTS["keepdims"] = False
290
291STAT_FUNC_DEFAULTS["keepdims"] = False
292
293validate_stat_func = CompatValidator(STAT_FUNC_DEFAULTS, method="kwargs")
294validate_sum = CompatValidator(
295 SUM_DEFAULTS, fname="sum", method="both", max_fname_arg_count=1
296)
297validate_prod = CompatValidator(
298 PROD_DEFAULTS, fname="prod", method="both", max_fname_arg_count=1
299)
300validate_mean = CompatValidator(
301 STAT_FUNC_DEFAULTS, fname="mean", method="both", max_fname_arg_count=1
302)
303validate_median = CompatValidator(
304 MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1
305)
306
307STAT_DDOF_FUNC_DEFAULTS: dict[str, bool | None] = {}
308STAT_DDOF_FUNC_DEFAULTS["dtype"] = None
309STAT_DDOF_FUNC_DEFAULTS["out"] = None
310STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False
311validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs")
312
313TAKE_DEFAULTS: dict[str, str | None] = {}
314TAKE_DEFAULTS["out"] = None
315TAKE_DEFAULTS["mode"] = "raise"
316validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs")
317
318
319def validate_take_with_convert(convert: ndarray | bool | None, args, kwargs) -> bool:
320 """
321 If this function is called via the 'numpy' library, the third parameter in
322 its signature is 'axis', which takes either an ndarray or 'None', so check
323 if the 'convert' parameter is either an instance of ndarray or is None
324 """
325 if isinstance(convert, ndarray) or convert is None:
326 args = (convert,) + args
327 convert = True
328
329 validate_take(args, kwargs, max_fname_arg_count=3, method="both")
330 return convert
331
332
333TRANSPOSE_DEFAULTS = {"axes": None}
334validate_transpose = CompatValidator(
335 TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0
336)
337
338
339def validate_groupby_func(name, args, kwargs, allowed=None) -> None:
340 """
341 'args' and 'kwargs' should be empty, except for allowed kwargs because all
342 of their necessary parameters are explicitly listed in the function
343 signature
344 """
345 if allowed is None:
346 allowed = []
347
348 kwargs = set(kwargs) - set(allowed)
349
350 if len(args) + len(kwargs) > 0:
351 raise UnsupportedFunctionCall(
352 "numpy operations are not valid with groupby. "
353 f"Use .groupby(...).{name}() instead"
354 )
355
356
357RESAMPLER_NUMPY_OPS = ("min", "max", "sum", "prod", "mean", "std", "var")
358
359
360def validate_resampler_func(method: str, args, kwargs) -> None:
361 """
362 'args' and 'kwargs' should be empty because all of their necessary
363 parameters are explicitly listed in the function signature
364 """
365 if len(args) + len(kwargs) > 0:
366 if method in RESAMPLER_NUMPY_OPS:
367 raise UnsupportedFunctionCall(
368 "numpy operations are not valid with resample. "
369 f"Use .resample(...).{method}() instead"
370 )
371 raise TypeError("too many arguments passed in")
372
373
374def validate_minmax_axis(axis: AxisInt | None, ndim: int = 1) -> None:
375 """
376 Ensure that the axis argument passed to min, max, argmin, or argmax is zero
377 or None, as otherwise it will be incorrectly ignored.
378
379 Parameters
380 ----------
381 axis : int or None
382 ndim : int, default 1
383
384 Raises
385 ------
386 ValueError
387 """
388 if axis is None:
389 return
390 if axis >= ndim or (axis < 0 and ndim + axis < 0):
391 raise ValueError(f"`axis` must be fewer than the number of dimensions ({ndim})")