1"""
2Common type operations.
3"""
4from __future__ import annotations
5
6from typing import (
7 TYPE_CHECKING,
8 Any,
9 Callable,
10)
11import warnings
12
13import numpy as np
14
15from pandas._libs import (
16 Interval,
17 Period,
18 algos,
19 lib,
20)
21from pandas._libs.tslibs import conversion
22from pandas.util._exceptions import find_stack_level
23
24from pandas.core.dtypes.base import _registry as registry
25from pandas.core.dtypes.dtypes import (
26 CategoricalDtype,
27 DatetimeTZDtype,
28 ExtensionDtype,
29 IntervalDtype,
30 PeriodDtype,
31 SparseDtype,
32)
33from pandas.core.dtypes.generic import ABCIndex
34from pandas.core.dtypes.inference import (
35 is_array_like,
36 is_bool,
37 is_complex,
38 is_dataclass,
39 is_decimal,
40 is_dict_like,
41 is_file_like,
42 is_float,
43 is_hashable,
44 is_integer,
45 is_interval,
46 is_iterator,
47 is_list_like,
48 is_named_tuple,
49 is_nested_list_like,
50 is_number,
51 is_re,
52 is_re_compilable,
53 is_scalar,
54 is_sequence,
55)
56
57if TYPE_CHECKING:
58 from pandas._typing import (
59 ArrayLike,
60 DtypeObj,
61 )
62
63DT64NS_DTYPE = conversion.DT64NS_DTYPE
64TD64NS_DTYPE = conversion.TD64NS_DTYPE
65INT64_DTYPE = np.dtype(np.int64)
66
67# oh the troubles to reduce import time
68_is_scipy_sparse = None
69
70ensure_float64 = algos.ensure_float64
71ensure_int64 = algos.ensure_int64
72ensure_int32 = algos.ensure_int32
73ensure_int16 = algos.ensure_int16
74ensure_int8 = algos.ensure_int8
75ensure_platform_int = algos.ensure_platform_int
76ensure_object = algos.ensure_object
77ensure_uint64 = algos.ensure_uint64
78
79
80def ensure_str(value: bytes | Any) -> str:
81 """
82 Ensure that bytes and non-strings get converted into ``str`` objects.
83 """
84 if isinstance(value, bytes):
85 value = value.decode("utf-8")
86 elif not isinstance(value, str):
87 value = str(value)
88 return value
89
90
91def ensure_python_int(value: int | np.integer) -> int:
92 """
93 Ensure that a value is a python int.
94
95 Parameters
96 ----------
97 value: int or numpy.integer
98
99 Returns
100 -------
101 int
102
103 Raises
104 ------
105 TypeError: if the value isn't an int or can't be converted to one.
106 """
107 if not (is_integer(value) or is_float(value)):
108 if not is_scalar(value):
109 raise TypeError(
110 f"Value needs to be a scalar value, was type {type(value).__name__}"
111 )
112 raise TypeError(f"Wrong type {type(value)} for value {value}")
113 try:
114 new_value = int(value)
115 assert new_value == value
116 except (TypeError, ValueError, AssertionError) as err:
117 raise TypeError(f"Wrong type {type(value)} for value {value}") from err
118 return new_value
119
120
121def classes(*klasses) -> Callable:
122 """Evaluate if the tipo is a subclass of the klasses."""
123 return lambda tipo: issubclass(tipo, klasses)
124
125
126def _classes_and_not_datetimelike(*klasses) -> Callable:
127 """
128 Evaluate if the tipo is a subclass of the klasses
129 and not a datetimelike.
130 """
131 return lambda tipo: (
132 issubclass(tipo, klasses)
133 and not issubclass(tipo, (np.datetime64, np.timedelta64))
134 )
135
136
137def is_object_dtype(arr_or_dtype) -> bool:
138 """
139 Check whether an array-like or dtype is of the object dtype.
140
141 Parameters
142 ----------
143 arr_or_dtype : array-like or dtype
144 The array-like or dtype to check.
145
146 Returns
147 -------
148 boolean
149 Whether or not the array-like or dtype is of the object dtype.
150
151 Examples
152 --------
153 >>> from pandas.api.types import is_object_dtype
154 >>> is_object_dtype(object)
155 True
156 >>> is_object_dtype(int)
157 False
158 >>> is_object_dtype(np.array([], dtype=object))
159 True
160 >>> is_object_dtype(np.array([], dtype=int))
161 False
162 >>> is_object_dtype([1, 2, 3])
163 False
164 """
165 return _is_dtype_type(arr_or_dtype, classes(np.object_))
166
167
168def is_sparse(arr) -> bool:
169 """
170 Check whether an array-like is a 1-D pandas sparse array.
171
172 .. deprecated:: 2.1.0
173 Use isinstance(dtype, pd.SparseDtype) instead.
174
175 Check that the one-dimensional array-like is a pandas sparse array.
176 Returns True if it is a pandas sparse array, not another type of
177 sparse array.
178
179 Parameters
180 ----------
181 arr : array-like
182 Array-like to check.
183
184 Returns
185 -------
186 bool
187 Whether or not the array-like is a pandas sparse array.
188
189 Examples
190 --------
191 Returns `True` if the parameter is a 1-D pandas sparse array.
192
193 >>> from pandas.api.types import is_sparse
194 >>> is_sparse(pd.arrays.SparseArray([0, 0, 1, 0]))
195 True
196 >>> is_sparse(pd.Series(pd.arrays.SparseArray([0, 0, 1, 0])))
197 True
198
199 Returns `False` if the parameter is not sparse.
200
201 >>> is_sparse(np.array([0, 0, 1, 0]))
202 False
203 >>> is_sparse(pd.Series([0, 1, 0, 0]))
204 False
205
206 Returns `False` if the parameter is not a pandas sparse array.
207
208 >>> from scipy.sparse import bsr_matrix
209 >>> is_sparse(bsr_matrix([0, 1, 0, 0]))
210 False
211
212 Returns `False` if the parameter has more than one dimension.
213 """
214 warnings.warn(
215 "is_sparse is deprecated and will be removed in a future "
216 "version. Check `isinstance(dtype, pd.SparseDtype)` instead.",
217 DeprecationWarning,
218 stacklevel=2,
219 )
220
221 dtype = getattr(arr, "dtype", arr)
222 return isinstance(dtype, SparseDtype)
223
224
225def is_scipy_sparse(arr) -> bool:
226 """
227 Check whether an array-like is a scipy.sparse.spmatrix instance.
228
229 Parameters
230 ----------
231 arr : array-like
232 The array-like to check.
233
234 Returns
235 -------
236 boolean
237 Whether or not the array-like is a scipy.sparse.spmatrix instance.
238
239 Notes
240 -----
241 If scipy is not installed, this function will always return False.
242
243 Examples
244 --------
245 >>> from scipy.sparse import bsr_matrix
246 >>> is_scipy_sparse(bsr_matrix([1, 2, 3]))
247 True
248 >>> is_scipy_sparse(pd.arrays.SparseArray([1, 2, 3]))
249 False
250 """
251 global _is_scipy_sparse
252
253 if _is_scipy_sparse is None: # pylint: disable=used-before-assignment
254 try:
255 from scipy.sparse import issparse as _is_scipy_sparse
256 except ImportError:
257 _is_scipy_sparse = lambda _: False
258
259 assert _is_scipy_sparse is not None
260 return _is_scipy_sparse(arr)
261
262
263def is_datetime64_dtype(arr_or_dtype) -> bool:
264 """
265 Check whether an array-like or dtype is of the datetime64 dtype.
266
267 Parameters
268 ----------
269 arr_or_dtype : array-like or dtype
270 The array-like or dtype to check.
271
272 Returns
273 -------
274 boolean
275 Whether or not the array-like or dtype is of the datetime64 dtype.
276
277 Examples
278 --------
279 >>> from pandas.api.types import is_datetime64_dtype
280 >>> is_datetime64_dtype(object)
281 False
282 >>> is_datetime64_dtype(np.datetime64)
283 True
284 >>> is_datetime64_dtype(np.array([], dtype=int))
285 False
286 >>> is_datetime64_dtype(np.array([], dtype=np.datetime64))
287 True
288 >>> is_datetime64_dtype([1, 2, 3])
289 False
290 """
291 if isinstance(arr_or_dtype, np.dtype):
292 # GH#33400 fastpath for dtype object
293 return arr_or_dtype.kind == "M"
294 return _is_dtype_type(arr_or_dtype, classes(np.datetime64))
295
296
297def is_datetime64tz_dtype(arr_or_dtype) -> bool:
298 """
299 Check whether an array-like or dtype is of a DatetimeTZDtype dtype.
300
301 .. deprecated:: 2.1.0
302 Use isinstance(dtype, pd.DatetimeTZDtype) instead.
303
304 Parameters
305 ----------
306 arr_or_dtype : array-like or dtype
307 The array-like or dtype to check.
308
309 Returns
310 -------
311 boolean
312 Whether or not the array-like or dtype is of a DatetimeTZDtype dtype.
313
314 Examples
315 --------
316 >>> from pandas.api.types import is_datetime64tz_dtype
317 >>> is_datetime64tz_dtype(object)
318 False
319 >>> is_datetime64tz_dtype([1, 2, 3])
320 False
321 >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) # tz-naive
322 False
323 >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))
324 True
325
326 >>> from pandas.core.dtypes.dtypes import DatetimeTZDtype
327 >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern")
328 >>> s = pd.Series([], dtype=dtype)
329 >>> is_datetime64tz_dtype(dtype)
330 True
331 >>> is_datetime64tz_dtype(s)
332 True
333 """
334 # GH#52607
335 warnings.warn(
336 "is_datetime64tz_dtype is deprecated and will be removed in a future "
337 "version. Check `isinstance(dtype, pd.DatetimeTZDtype)` instead.",
338 DeprecationWarning,
339 stacklevel=2,
340 )
341 if isinstance(arr_or_dtype, DatetimeTZDtype):
342 # GH#33400 fastpath for dtype object
343 # GH 34986
344 return True
345
346 if arr_or_dtype is None:
347 return False
348 return DatetimeTZDtype.is_dtype(arr_or_dtype)
349
350
351def is_timedelta64_dtype(arr_or_dtype) -> bool:
352 """
353 Check whether an array-like or dtype is of the timedelta64 dtype.
354
355 Parameters
356 ----------
357 arr_or_dtype : array-like or dtype
358 The array-like or dtype to check.
359
360 Returns
361 -------
362 boolean
363 Whether or not the array-like or dtype is of the timedelta64 dtype.
364
365 Examples
366 --------
367 >>> from pandas.core.dtypes.common import is_timedelta64_dtype
368 >>> is_timedelta64_dtype(object)
369 False
370 >>> is_timedelta64_dtype(np.timedelta64)
371 True
372 >>> is_timedelta64_dtype([1, 2, 3])
373 False
374 >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]"))
375 True
376 >>> is_timedelta64_dtype('0 days')
377 False
378 """
379 if isinstance(arr_or_dtype, np.dtype):
380 # GH#33400 fastpath for dtype object
381 return arr_or_dtype.kind == "m"
382
383 return _is_dtype_type(arr_or_dtype, classes(np.timedelta64))
384
385
386def is_period_dtype(arr_or_dtype) -> bool:
387 """
388 Check whether an array-like or dtype is of the Period dtype.
389
390 .. deprecated:: 2.2.0
391 Use isinstance(dtype, pd.Period) instead.
392
393 Parameters
394 ----------
395 arr_or_dtype : array-like or dtype
396 The array-like or dtype to check.
397
398 Returns
399 -------
400 boolean
401 Whether or not the array-like or dtype is of the Period dtype.
402
403 Examples
404 --------
405 >>> from pandas.core.dtypes.common import is_period_dtype
406 >>> is_period_dtype(object)
407 False
408 >>> is_period_dtype(pd.PeriodDtype(freq="D"))
409 True
410 >>> is_period_dtype([1, 2, 3])
411 False
412 >>> is_period_dtype(pd.Period("2017-01-01"))
413 False
414 >>> is_period_dtype(pd.PeriodIndex([], freq="Y"))
415 True
416 """
417 warnings.warn(
418 "is_period_dtype is deprecated and will be removed in a future version. "
419 "Use `isinstance(dtype, pd.PeriodDtype)` instead",
420 DeprecationWarning,
421 stacklevel=2,
422 )
423 if isinstance(arr_or_dtype, ExtensionDtype):
424 # GH#33400 fastpath for dtype object
425 return arr_or_dtype.type is Period
426
427 if arr_or_dtype is None:
428 return False
429 return PeriodDtype.is_dtype(arr_or_dtype)
430
431
432def is_interval_dtype(arr_or_dtype) -> bool:
433 """
434 Check whether an array-like or dtype is of the Interval dtype.
435
436 .. deprecated:: 2.2.0
437 Use isinstance(dtype, pd.IntervalDtype) instead.
438
439 Parameters
440 ----------
441 arr_or_dtype : array-like or dtype
442 The array-like or dtype to check.
443
444 Returns
445 -------
446 boolean
447 Whether or not the array-like or dtype is of the Interval dtype.
448
449 Examples
450 --------
451 >>> from pandas.core.dtypes.common import is_interval_dtype
452 >>> is_interval_dtype(object)
453 False
454 >>> is_interval_dtype(pd.IntervalDtype())
455 True
456 >>> is_interval_dtype([1, 2, 3])
457 False
458 >>>
459 >>> interval = pd.Interval(1, 2, closed="right")
460 >>> is_interval_dtype(interval)
461 False
462 >>> is_interval_dtype(pd.IntervalIndex([interval]))
463 True
464 """
465 # GH#52607
466 warnings.warn(
467 "is_interval_dtype is deprecated and will be removed in a future version. "
468 "Use `isinstance(dtype, pd.IntervalDtype)` instead",
469 DeprecationWarning,
470 stacklevel=2,
471 )
472 if isinstance(arr_or_dtype, ExtensionDtype):
473 # GH#33400 fastpath for dtype object
474 return arr_or_dtype.type is Interval
475
476 if arr_or_dtype is None:
477 return False
478 return IntervalDtype.is_dtype(arr_or_dtype)
479
480
481def is_categorical_dtype(arr_or_dtype) -> bool:
482 """
483 Check whether an array-like or dtype is of the Categorical dtype.
484
485 .. deprecated:: 2.2.0
486 Use isinstance(dtype, pd.CategoricalDtype) instead.
487
488 Parameters
489 ----------
490 arr_or_dtype : array-like or dtype
491 The array-like or dtype to check.
492
493 Returns
494 -------
495 boolean
496 Whether or not the array-like or dtype is of the Categorical dtype.
497
498 Examples
499 --------
500 >>> from pandas.api.types import is_categorical_dtype
501 >>> from pandas import CategoricalDtype
502 >>> is_categorical_dtype(object)
503 False
504 >>> is_categorical_dtype(CategoricalDtype())
505 True
506 >>> is_categorical_dtype([1, 2, 3])
507 False
508 >>> is_categorical_dtype(pd.Categorical([1, 2, 3]))
509 True
510 >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
511 True
512 """
513 # GH#52527
514 warnings.warn(
515 "is_categorical_dtype is deprecated and will be removed in a future "
516 "version. Use isinstance(dtype, pd.CategoricalDtype) instead",
517 DeprecationWarning,
518 stacklevel=2,
519 )
520 if isinstance(arr_or_dtype, ExtensionDtype):
521 # GH#33400 fastpath for dtype object
522 return arr_or_dtype.name == "category"
523
524 if arr_or_dtype is None:
525 return False
526 return CategoricalDtype.is_dtype(arr_or_dtype)
527
528
529def is_string_or_object_np_dtype(dtype: np.dtype) -> bool:
530 """
531 Faster alternative to is_string_dtype, assumes we have a np.dtype object.
532 """
533 return dtype == object or dtype.kind in "SU"
534
535
536def is_string_dtype(arr_or_dtype) -> bool:
537 """
538 Check whether the provided array or dtype is of the string dtype.
539
540 If an array is passed with an object dtype, the elements must be
541 inferred as strings.
542
543 Parameters
544 ----------
545 arr_or_dtype : array-like or dtype
546 The array or dtype to check.
547
548 Returns
549 -------
550 boolean
551 Whether or not the array or dtype is of the string dtype.
552
553 Examples
554 --------
555 >>> from pandas.api.types import is_string_dtype
556 >>> is_string_dtype(str)
557 True
558 >>> is_string_dtype(object)
559 True
560 >>> is_string_dtype(int)
561 False
562 >>> is_string_dtype(np.array(['a', 'b']))
563 True
564 >>> is_string_dtype(pd.Series([1, 2]))
565 False
566 >>> is_string_dtype(pd.Series([1, 2], dtype=object))
567 False
568 """
569 if hasattr(arr_or_dtype, "dtype") and _get_dtype(arr_or_dtype).kind == "O":
570 return is_all_strings(arr_or_dtype)
571
572 def condition(dtype) -> bool:
573 if is_string_or_object_np_dtype(dtype):
574 return True
575 try:
576 return dtype == "string"
577 except TypeError:
578 return False
579
580 return _is_dtype(arr_or_dtype, condition)
581
582
583def is_dtype_equal(source, target) -> bool:
584 """
585 Check if two dtypes are equal.
586
587 Parameters
588 ----------
589 source : The first dtype to compare
590 target : The second dtype to compare
591
592 Returns
593 -------
594 boolean
595 Whether or not the two dtypes are equal.
596
597 Examples
598 --------
599 >>> is_dtype_equal(int, float)
600 False
601 >>> is_dtype_equal("int", int)
602 True
603 >>> is_dtype_equal(object, "category")
604 False
605 >>> is_dtype_equal(CategoricalDtype(), "category")
606 True
607 >>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64")
608 False
609 """
610 if isinstance(target, str):
611 if not isinstance(source, str):
612 # GH#38516 ensure we get the same behavior from
613 # is_dtype_equal(CDT, "category") and CDT == "category"
614 try:
615 src = _get_dtype(source)
616 if isinstance(src, ExtensionDtype):
617 return src == target
618 except (TypeError, AttributeError, ImportError):
619 return False
620 elif isinstance(source, str):
621 return is_dtype_equal(target, source)
622
623 try:
624 source = _get_dtype(source)
625 target = _get_dtype(target)
626 return source == target
627 except (TypeError, AttributeError, ImportError):
628 # invalid comparison
629 # object == category will hit this
630 return False
631
632
633def is_integer_dtype(arr_or_dtype) -> bool:
634 """
635 Check whether the provided array or dtype is of an integer dtype.
636
637 Unlike in `is_any_int_dtype`, timedelta64 instances will return False.
638
639 The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
640 as integer by this function.
641
642 Parameters
643 ----------
644 arr_or_dtype : array-like or dtype
645 The array or dtype to check.
646
647 Returns
648 -------
649 boolean
650 Whether or not the array or dtype is of an integer dtype and
651 not an instance of timedelta64.
652
653 Examples
654 --------
655 >>> from pandas.api.types import is_integer_dtype
656 >>> is_integer_dtype(str)
657 False
658 >>> is_integer_dtype(int)
659 True
660 >>> is_integer_dtype(float)
661 False
662 >>> is_integer_dtype(np.uint64)
663 True
664 >>> is_integer_dtype('int8')
665 True
666 >>> is_integer_dtype('Int8')
667 True
668 >>> is_integer_dtype(pd.Int8Dtype)
669 True
670 >>> is_integer_dtype(np.datetime64)
671 False
672 >>> is_integer_dtype(np.timedelta64)
673 False
674 >>> is_integer_dtype(np.array(['a', 'b']))
675 False
676 >>> is_integer_dtype(pd.Series([1, 2]))
677 True
678 >>> is_integer_dtype(np.array([], dtype=np.timedelta64))
679 False
680 >>> is_integer_dtype(pd.Index([1, 2.])) # float
681 False
682 """
683 return _is_dtype_type(
684 arr_or_dtype, _classes_and_not_datetimelike(np.integer)
685 ) or _is_dtype(
686 arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind in "iu"
687 )
688
689
690def is_signed_integer_dtype(arr_or_dtype) -> bool:
691 """
692 Check whether the provided array or dtype is of a signed integer dtype.
693
694 Unlike in `is_any_int_dtype`, timedelta64 instances will return False.
695
696 The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
697 as integer by this function.
698
699 Parameters
700 ----------
701 arr_or_dtype : array-like or dtype
702 The array or dtype to check.
703
704 Returns
705 -------
706 boolean
707 Whether or not the array or dtype is of a signed integer dtype
708 and not an instance of timedelta64.
709
710 Examples
711 --------
712 >>> from pandas.core.dtypes.common import is_signed_integer_dtype
713 >>> is_signed_integer_dtype(str)
714 False
715 >>> is_signed_integer_dtype(int)
716 True
717 >>> is_signed_integer_dtype(float)
718 False
719 >>> is_signed_integer_dtype(np.uint64) # unsigned
720 False
721 >>> is_signed_integer_dtype('int8')
722 True
723 >>> is_signed_integer_dtype('Int8')
724 True
725 >>> is_signed_integer_dtype(pd.Int8Dtype)
726 True
727 >>> is_signed_integer_dtype(np.datetime64)
728 False
729 >>> is_signed_integer_dtype(np.timedelta64)
730 False
731 >>> is_signed_integer_dtype(np.array(['a', 'b']))
732 False
733 >>> is_signed_integer_dtype(pd.Series([1, 2]))
734 True
735 >>> is_signed_integer_dtype(np.array([], dtype=np.timedelta64))
736 False
737 >>> is_signed_integer_dtype(pd.Index([1, 2.])) # float
738 False
739 >>> is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned
740 False
741 """
742 return _is_dtype_type(
743 arr_or_dtype, _classes_and_not_datetimelike(np.signedinteger)
744 ) or _is_dtype(
745 arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind == "i"
746 )
747
748
749def is_unsigned_integer_dtype(arr_or_dtype) -> bool:
750 """
751 Check whether the provided array or dtype is of an unsigned integer dtype.
752
753 The nullable Integer dtypes (e.g. pandas.UInt64Dtype) are also
754 considered as integer by this function.
755
756 Parameters
757 ----------
758 arr_or_dtype : array-like or dtype
759 The array or dtype to check.
760
761 Returns
762 -------
763 boolean
764 Whether or not the array or dtype is of an unsigned integer dtype.
765
766 Examples
767 --------
768 >>> from pandas.api.types import is_unsigned_integer_dtype
769 >>> is_unsigned_integer_dtype(str)
770 False
771 >>> is_unsigned_integer_dtype(int) # signed
772 False
773 >>> is_unsigned_integer_dtype(float)
774 False
775 >>> is_unsigned_integer_dtype(np.uint64)
776 True
777 >>> is_unsigned_integer_dtype('uint8')
778 True
779 >>> is_unsigned_integer_dtype('UInt8')
780 True
781 >>> is_unsigned_integer_dtype(pd.UInt8Dtype)
782 True
783 >>> is_unsigned_integer_dtype(np.array(['a', 'b']))
784 False
785 >>> is_unsigned_integer_dtype(pd.Series([1, 2])) # signed
786 False
787 >>> is_unsigned_integer_dtype(pd.Index([1, 2.])) # float
788 False
789 >>> is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32))
790 True
791 """
792 return _is_dtype_type(
793 arr_or_dtype, _classes_and_not_datetimelike(np.unsignedinteger)
794 ) or _is_dtype(
795 arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind == "u"
796 )
797
798
799def is_int64_dtype(arr_or_dtype) -> bool:
800 """
801 Check whether the provided array or dtype is of the int64 dtype.
802
803 .. deprecated:: 2.1.0
804
805 is_int64_dtype is deprecated and will be removed in a future
806 version. Use dtype == np.int64 instead.
807
808 Parameters
809 ----------
810 arr_or_dtype : array-like or dtype
811 The array or dtype to check.
812
813 Returns
814 -------
815 boolean
816 Whether or not the array or dtype is of the int64 dtype.
817
818 Notes
819 -----
820 Depending on system architecture, the return value of `is_int64_dtype(
821 int)` will be True if the OS uses 64-bit integers and False if the OS
822 uses 32-bit integers.
823
824 Examples
825 --------
826 >>> from pandas.api.types import is_int64_dtype
827 >>> is_int64_dtype(str) # doctest: +SKIP
828 False
829 >>> is_int64_dtype(np.int32) # doctest: +SKIP
830 False
831 >>> is_int64_dtype(np.int64) # doctest: +SKIP
832 True
833 >>> is_int64_dtype('int8') # doctest: +SKIP
834 False
835 >>> is_int64_dtype('Int8') # doctest: +SKIP
836 False
837 >>> is_int64_dtype(pd.Int64Dtype) # doctest: +SKIP
838 True
839 >>> is_int64_dtype(float) # doctest: +SKIP
840 False
841 >>> is_int64_dtype(np.uint64) # unsigned # doctest: +SKIP
842 False
843 >>> is_int64_dtype(np.array(['a', 'b'])) # doctest: +SKIP
844 False
845 >>> is_int64_dtype(np.array([1, 2], dtype=np.int64)) # doctest: +SKIP
846 True
847 >>> is_int64_dtype(pd.Index([1, 2.])) # float # doctest: +SKIP
848 False
849 >>> is_int64_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned # doctest: +SKIP
850 False
851 """
852 # GH#52564
853 warnings.warn(
854 "is_int64_dtype is deprecated and will be removed in a future "
855 "version. Use dtype == np.int64 instead.",
856 DeprecationWarning,
857 stacklevel=2,
858 )
859 return _is_dtype_type(arr_or_dtype, classes(np.int64))
860
861
862def is_datetime64_any_dtype(arr_or_dtype) -> bool:
863 """
864 Check whether the provided array or dtype is of the datetime64 dtype.
865
866 Parameters
867 ----------
868 arr_or_dtype : array-like or dtype
869 The array or dtype to check.
870
871 Returns
872 -------
873 bool
874 Whether or not the array or dtype is of the datetime64 dtype.
875
876 Examples
877 --------
878 >>> from pandas.api.types import is_datetime64_any_dtype
879 >>> from pandas.core.dtypes.dtypes import DatetimeTZDtype
880 >>> is_datetime64_any_dtype(str)
881 False
882 >>> is_datetime64_any_dtype(int)
883 False
884 >>> is_datetime64_any_dtype(np.datetime64) # can be tz-naive
885 True
886 >>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern"))
887 True
888 >>> is_datetime64_any_dtype(np.array(['a', 'b']))
889 False
890 >>> is_datetime64_any_dtype(np.array([1, 2]))
891 False
892 >>> is_datetime64_any_dtype(np.array([], dtype="datetime64[ns]"))
893 True
894 >>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]"))
895 True
896 """
897 if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)):
898 # GH#33400 fastpath for dtype object
899 return arr_or_dtype.kind == "M"
900
901 if arr_or_dtype is None:
902 return False
903
904 try:
905 tipo = _get_dtype(arr_or_dtype)
906 except TypeError:
907 return False
908 return lib.is_np_dtype(tipo, "M") or isinstance(tipo, DatetimeTZDtype)
909
910
911def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
912 """
913 Check whether the provided array or dtype is of the datetime64[ns] dtype.
914
915 Parameters
916 ----------
917 arr_or_dtype : array-like or dtype
918 The array or dtype to check.
919
920 Returns
921 -------
922 bool
923 Whether or not the array or dtype is of the datetime64[ns] dtype.
924
925 Examples
926 --------
927 >>> from pandas.api.types import is_datetime64_ns_dtype
928 >>> from pandas.core.dtypes.dtypes import DatetimeTZDtype
929 >>> is_datetime64_ns_dtype(str)
930 False
931 >>> is_datetime64_ns_dtype(int)
932 False
933 >>> is_datetime64_ns_dtype(np.datetime64) # no unit
934 False
935 >>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern"))
936 True
937 >>> is_datetime64_ns_dtype(np.array(['a', 'b']))
938 False
939 >>> is_datetime64_ns_dtype(np.array([1, 2]))
940 False
941 >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64")) # no unit
942 False
943 >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) # wrong unit
944 False
945 >>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]"))
946 True
947 """
948 if arr_or_dtype is None:
949 return False
950 try:
951 tipo = _get_dtype(arr_or_dtype)
952 except TypeError:
953 return False
954 return tipo == DT64NS_DTYPE or (
955 isinstance(tipo, DatetimeTZDtype) and tipo.unit == "ns"
956 )
957
958
959def is_timedelta64_ns_dtype(arr_or_dtype) -> bool:
960 """
961 Check whether the provided array or dtype is of the timedelta64[ns] dtype.
962
963 This is a very specific dtype, so generic ones like `np.timedelta64`
964 will return False if passed into this function.
965
966 Parameters
967 ----------
968 arr_or_dtype : array-like or dtype
969 The array or dtype to check.
970
971 Returns
972 -------
973 boolean
974 Whether or not the array or dtype is of the timedelta64[ns] dtype.
975
976 Examples
977 --------
978 >>> from pandas.core.dtypes.common import is_timedelta64_ns_dtype
979 >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]'))
980 True
981 >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency
982 False
983 >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]'))
984 True
985 >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64))
986 False
987 """
988 return _is_dtype(arr_or_dtype, lambda dtype: dtype == TD64NS_DTYPE)
989
990
991# This exists to silence numpy deprecation warnings, see GH#29553
992def is_numeric_v_string_like(a: ArrayLike, b) -> bool:
993 """
994 Check if we are comparing a string-like object to a numeric ndarray.
995 NumPy doesn't like to compare such objects, especially numeric arrays
996 and scalar string-likes.
997
998 Parameters
999 ----------
1000 a : array-like, scalar
1001 The first object to check.
1002 b : array-like, scalar
1003 The second object to check.
1004
1005 Returns
1006 -------
1007 boolean
1008 Whether we return a comparing a string-like object to a numeric array.
1009
1010 Examples
1011 --------
1012 >>> is_numeric_v_string_like(np.array([1]), "foo")
1013 True
1014 >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"]))
1015 True
1016 >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))
1017 True
1018 >>> is_numeric_v_string_like(np.array([1]), np.array([2]))
1019 False
1020 >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"]))
1021 False
1022 """
1023 is_a_array = isinstance(a, np.ndarray)
1024 is_b_array = isinstance(b, np.ndarray)
1025
1026 is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b")
1027 is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b")
1028 is_a_string_array = is_a_array and a.dtype.kind in ("S", "U")
1029 is_b_string_array = is_b_array and b.dtype.kind in ("S", "U")
1030
1031 is_b_scalar_string_like = not is_b_array and isinstance(b, str)
1032
1033 return (
1034 (is_a_numeric_array and is_b_scalar_string_like)
1035 or (is_a_numeric_array and is_b_string_array)
1036 or (is_b_numeric_array and is_a_string_array)
1037 )
1038
1039
1040def needs_i8_conversion(dtype: DtypeObj | None) -> bool:
1041 """
1042 Check whether the dtype should be converted to int64.
1043
1044 Dtype "needs" such a conversion if the dtype is of a datetime-like dtype
1045
1046 Parameters
1047 ----------
1048 dtype : np.dtype, ExtensionDtype, or None
1049
1050 Returns
1051 -------
1052 boolean
1053 Whether or not the dtype should be converted to int64.
1054
1055 Examples
1056 --------
1057 >>> needs_i8_conversion(str)
1058 False
1059 >>> needs_i8_conversion(np.int64)
1060 False
1061 >>> needs_i8_conversion(np.datetime64)
1062 False
1063 >>> needs_i8_conversion(np.dtype(np.datetime64))
1064 True
1065 >>> needs_i8_conversion(np.array(['a', 'b']))
1066 False
1067 >>> needs_i8_conversion(pd.Series([1, 2]))
1068 False
1069 >>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]"))
1070 False
1071 >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))
1072 False
1073 >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern").dtype)
1074 True
1075 """
1076 if isinstance(dtype, np.dtype):
1077 return dtype.kind in "mM"
1078 return isinstance(dtype, (PeriodDtype, DatetimeTZDtype))
1079
1080
1081def is_numeric_dtype(arr_or_dtype) -> bool:
1082 """
1083 Check whether the provided array or dtype is of a numeric dtype.
1084
1085 Parameters
1086 ----------
1087 arr_or_dtype : array-like or dtype
1088 The array or dtype to check.
1089
1090 Returns
1091 -------
1092 boolean
1093 Whether or not the array or dtype is of a numeric dtype.
1094
1095 Examples
1096 --------
1097 >>> from pandas.api.types import is_numeric_dtype
1098 >>> is_numeric_dtype(str)
1099 False
1100 >>> is_numeric_dtype(int)
1101 True
1102 >>> is_numeric_dtype(float)
1103 True
1104 >>> is_numeric_dtype(np.uint64)
1105 True
1106 >>> is_numeric_dtype(np.datetime64)
1107 False
1108 >>> is_numeric_dtype(np.timedelta64)
1109 False
1110 >>> is_numeric_dtype(np.array(['a', 'b']))
1111 False
1112 >>> is_numeric_dtype(pd.Series([1, 2]))
1113 True
1114 >>> is_numeric_dtype(pd.Index([1, 2.]))
1115 True
1116 >>> is_numeric_dtype(np.array([], dtype=np.timedelta64))
1117 False
1118 """
1119 return _is_dtype_type(
1120 arr_or_dtype, _classes_and_not_datetimelike(np.number, np.bool_)
1121 ) or _is_dtype(
1122 arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ._is_numeric
1123 )
1124
1125
1126def is_any_real_numeric_dtype(arr_or_dtype) -> bool:
1127 """
1128 Check whether the provided array or dtype is of a real number dtype.
1129
1130 Parameters
1131 ----------
1132 arr_or_dtype : array-like or dtype
1133 The array or dtype to check.
1134
1135 Returns
1136 -------
1137 boolean
1138 Whether or not the array or dtype is of a real number dtype.
1139
1140 Examples
1141 --------
1142 >>> from pandas.api.types import is_any_real_numeric_dtype
1143 >>> is_any_real_numeric_dtype(int)
1144 True
1145 >>> is_any_real_numeric_dtype(float)
1146 True
1147 >>> is_any_real_numeric_dtype(object)
1148 False
1149 >>> is_any_real_numeric_dtype(str)
1150 False
1151 >>> is_any_real_numeric_dtype(complex(1, 2))
1152 False
1153 >>> is_any_real_numeric_dtype(bool)
1154 False
1155 """
1156 return (
1157 is_numeric_dtype(arr_or_dtype)
1158 and not is_complex_dtype(arr_or_dtype)
1159 and not is_bool_dtype(arr_or_dtype)
1160 )
1161
1162
1163def is_float_dtype(arr_or_dtype) -> bool:
1164 """
1165 Check whether the provided array or dtype is of a float dtype.
1166
1167 Parameters
1168 ----------
1169 arr_or_dtype : array-like or dtype
1170 The array or dtype to check.
1171
1172 Returns
1173 -------
1174 boolean
1175 Whether or not the array or dtype is of a float dtype.
1176
1177 Examples
1178 --------
1179 >>> from pandas.api.types import is_float_dtype
1180 >>> is_float_dtype(str)
1181 False
1182 >>> is_float_dtype(int)
1183 False
1184 >>> is_float_dtype(float)
1185 True
1186 >>> is_float_dtype(np.array(['a', 'b']))
1187 False
1188 >>> is_float_dtype(pd.Series([1, 2]))
1189 False
1190 >>> is_float_dtype(pd.Index([1, 2.]))
1191 True
1192 """
1193 return _is_dtype_type(arr_or_dtype, classes(np.floating)) or _is_dtype(
1194 arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind in "f"
1195 )
1196
1197
1198def is_bool_dtype(arr_or_dtype) -> bool:
1199 """
1200 Check whether the provided array or dtype is of a boolean dtype.
1201
1202 Parameters
1203 ----------
1204 arr_or_dtype : array-like or dtype
1205 The array or dtype to check.
1206
1207 Returns
1208 -------
1209 boolean
1210 Whether or not the array or dtype is of a boolean dtype.
1211
1212 Notes
1213 -----
1214 An ExtensionArray is considered boolean when the ``_is_boolean``
1215 attribute is set to True.
1216
1217 Examples
1218 --------
1219 >>> from pandas.api.types import is_bool_dtype
1220 >>> is_bool_dtype(str)
1221 False
1222 >>> is_bool_dtype(int)
1223 False
1224 >>> is_bool_dtype(bool)
1225 True
1226 >>> is_bool_dtype(np.bool_)
1227 True
1228 >>> is_bool_dtype(np.array(['a', 'b']))
1229 False
1230 >>> is_bool_dtype(pd.Series([1, 2]))
1231 False
1232 >>> is_bool_dtype(np.array([True, False]))
1233 True
1234 >>> is_bool_dtype(pd.Categorical([True, False]))
1235 True
1236 >>> is_bool_dtype(pd.arrays.SparseArray([True, False]))
1237 True
1238 """
1239 if arr_or_dtype is None:
1240 return False
1241 try:
1242 dtype = _get_dtype(arr_or_dtype)
1243 except (TypeError, ValueError):
1244 return False
1245
1246 if isinstance(dtype, CategoricalDtype):
1247 arr_or_dtype = dtype.categories
1248 # now we use the special definition for Index
1249
1250 if isinstance(arr_or_dtype, ABCIndex):
1251 # Allow Index[object] that is all-bools or Index["boolean"]
1252 if arr_or_dtype.inferred_type == "boolean":
1253 if not is_bool_dtype(arr_or_dtype.dtype):
1254 # GH#52680
1255 warnings.warn(
1256 "The behavior of is_bool_dtype with an object-dtype Index "
1257 "of bool objects is deprecated. In a future version, "
1258 "this will return False. Cast the Index to a bool dtype instead.",
1259 DeprecationWarning,
1260 stacklevel=2,
1261 )
1262 return True
1263 return False
1264 elif isinstance(dtype, ExtensionDtype):
1265 return getattr(dtype, "_is_boolean", False)
1266
1267 return issubclass(dtype.type, np.bool_)
1268
1269
1270def is_1d_only_ea_dtype(dtype: DtypeObj | None) -> bool:
1271 """
1272 Analogue to is_extension_array_dtype but excluding DatetimeTZDtype.
1273 """
1274 return isinstance(dtype, ExtensionDtype) and not dtype._supports_2d
1275
1276
1277def is_extension_array_dtype(arr_or_dtype) -> bool:
1278 """
1279 Check if an object is a pandas extension array type.
1280
1281 See the :ref:`Use Guide <extending.extension-types>` for more.
1282
1283 Parameters
1284 ----------
1285 arr_or_dtype : object
1286 For array-like input, the ``.dtype`` attribute will
1287 be extracted.
1288
1289 Returns
1290 -------
1291 bool
1292 Whether the `arr_or_dtype` is an extension array type.
1293
1294 Notes
1295 -----
1296 This checks whether an object implements the pandas extension
1297 array interface. In pandas, this includes:
1298
1299 * Categorical
1300 * Sparse
1301 * Interval
1302 * Period
1303 * DatetimeArray
1304 * TimedeltaArray
1305
1306 Third-party libraries may implement arrays or types satisfying
1307 this interface as well.
1308
1309 Examples
1310 --------
1311 >>> from pandas.api.types import is_extension_array_dtype
1312 >>> arr = pd.Categorical(['a', 'b'])
1313 >>> is_extension_array_dtype(arr)
1314 True
1315 >>> is_extension_array_dtype(arr.dtype)
1316 True
1317
1318 >>> arr = np.array(['a', 'b'])
1319 >>> is_extension_array_dtype(arr.dtype)
1320 False
1321 """
1322 dtype = getattr(arr_or_dtype, "dtype", arr_or_dtype)
1323 if isinstance(dtype, ExtensionDtype):
1324 return True
1325 elif isinstance(dtype, np.dtype):
1326 return False
1327 else:
1328 return registry.find(dtype) is not None
1329
1330
1331def is_ea_or_datetimelike_dtype(dtype: DtypeObj | None) -> bool:
1332 """
1333 Check for ExtensionDtype, datetime64 dtype, or timedelta64 dtype.
1334
1335 Notes
1336 -----
1337 Checks only for dtype objects, not dtype-castable strings or types.
1338 """
1339 return isinstance(dtype, ExtensionDtype) or (lib.is_np_dtype(dtype, "mM"))
1340
1341
1342def is_complex_dtype(arr_or_dtype) -> bool:
1343 """
1344 Check whether the provided array or dtype is of a complex dtype.
1345
1346 Parameters
1347 ----------
1348 arr_or_dtype : array-like or dtype
1349 The array or dtype to check.
1350
1351 Returns
1352 -------
1353 boolean
1354 Whether or not the array or dtype is of a complex dtype.
1355
1356 Examples
1357 --------
1358 >>> from pandas.api.types import is_complex_dtype
1359 >>> is_complex_dtype(str)
1360 False
1361 >>> is_complex_dtype(int)
1362 False
1363 >>> is_complex_dtype(np.complex128)
1364 True
1365 >>> is_complex_dtype(np.array(['a', 'b']))
1366 False
1367 >>> is_complex_dtype(pd.Series([1, 2]))
1368 False
1369 >>> is_complex_dtype(np.array([1 + 1j, 5]))
1370 True
1371 """
1372 return _is_dtype_type(arr_or_dtype, classes(np.complexfloating))
1373
1374
1375def _is_dtype(arr_or_dtype, condition) -> bool:
1376 """
1377 Return true if the condition is satisfied for the arr_or_dtype.
1378
1379 Parameters
1380 ----------
1381 arr_or_dtype : array-like, str, np.dtype, or ExtensionArrayType
1382 The array-like or dtype object whose dtype we want to extract.
1383 condition : callable[Union[np.dtype, ExtensionDtype]]
1384
1385 Returns
1386 -------
1387 bool
1388
1389 """
1390 if arr_or_dtype is None:
1391 return False
1392 try:
1393 dtype = _get_dtype(arr_or_dtype)
1394 except (TypeError, ValueError):
1395 return False
1396 return condition(dtype)
1397
1398
1399def _get_dtype(arr_or_dtype) -> DtypeObj:
1400 """
1401 Get the dtype instance associated with an array
1402 or dtype object.
1403
1404 Parameters
1405 ----------
1406 arr_or_dtype : array-like or dtype
1407 The array-like or dtype object whose dtype we want to extract.
1408
1409 Returns
1410 -------
1411 obj_dtype : The extract dtype instance from the
1412 passed in array or dtype object.
1413
1414 Raises
1415 ------
1416 TypeError : The passed in object is None.
1417 """
1418 if arr_or_dtype is None:
1419 raise TypeError("Cannot deduce dtype from null object")
1420
1421 # fastpath
1422 if isinstance(arr_or_dtype, np.dtype):
1423 return arr_or_dtype
1424 elif isinstance(arr_or_dtype, type):
1425 return np.dtype(arr_or_dtype)
1426
1427 # if we have an array-like
1428 elif hasattr(arr_or_dtype, "dtype"):
1429 arr_or_dtype = arr_or_dtype.dtype
1430
1431 return pandas_dtype(arr_or_dtype)
1432
1433
1434def _is_dtype_type(arr_or_dtype, condition) -> bool:
1435 """
1436 Return true if the condition is satisfied for the arr_or_dtype.
1437
1438 Parameters
1439 ----------
1440 arr_or_dtype : array-like or dtype
1441 The array-like or dtype object whose dtype we want to extract.
1442 condition : callable[Union[np.dtype, ExtensionDtypeType]]
1443
1444 Returns
1445 -------
1446 bool : if the condition is satisfied for the arr_or_dtype
1447 """
1448 if arr_or_dtype is None:
1449 return condition(type(None))
1450
1451 # fastpath
1452 if isinstance(arr_or_dtype, np.dtype):
1453 return condition(arr_or_dtype.type)
1454 elif isinstance(arr_or_dtype, type):
1455 if issubclass(arr_or_dtype, ExtensionDtype):
1456 arr_or_dtype = arr_or_dtype.type
1457 return condition(np.dtype(arr_or_dtype).type)
1458
1459 # if we have an array-like
1460 if hasattr(arr_or_dtype, "dtype"):
1461 arr_or_dtype = arr_or_dtype.dtype
1462
1463 # we are not possibly a dtype
1464 elif is_list_like(arr_or_dtype):
1465 return condition(type(None))
1466
1467 try:
1468 tipo = pandas_dtype(arr_or_dtype).type
1469 except (TypeError, ValueError):
1470 if is_scalar(arr_or_dtype):
1471 return condition(type(None))
1472
1473 return False
1474
1475 return condition(tipo)
1476
1477
1478def infer_dtype_from_object(dtype) -> type:
1479 """
1480 Get a numpy dtype.type-style object for a dtype object.
1481
1482 This methods also includes handling of the datetime64[ns] and
1483 datetime64[ns, TZ] objects.
1484
1485 If no dtype can be found, we return ``object``.
1486
1487 Parameters
1488 ----------
1489 dtype : dtype, type
1490 The dtype object whose numpy dtype.type-style
1491 object we want to extract.
1492
1493 Returns
1494 -------
1495 type
1496 """
1497 if isinstance(dtype, type) and issubclass(dtype, np.generic):
1498 # Type object from a dtype
1499
1500 return dtype
1501 elif isinstance(dtype, (np.dtype, ExtensionDtype)):
1502 # dtype object
1503 try:
1504 _validate_date_like_dtype(dtype)
1505 except TypeError:
1506 # Should still pass if we don't have a date-like
1507 pass
1508 if hasattr(dtype, "numpy_dtype"):
1509 # TODO: Implement this properly
1510 # https://github.com/pandas-dev/pandas/issues/52576
1511 return dtype.numpy_dtype.type
1512 return dtype.type
1513
1514 try:
1515 dtype = pandas_dtype(dtype)
1516 except TypeError:
1517 pass
1518
1519 if isinstance(dtype, ExtensionDtype):
1520 return dtype.type
1521 elif isinstance(dtype, str):
1522 # TODO(jreback)
1523 # should deprecate these
1524 if dtype in ["datetimetz", "datetime64tz"]:
1525 return DatetimeTZDtype.type
1526 elif dtype in ["period"]:
1527 raise NotImplementedError
1528
1529 if dtype in ["datetime", "timedelta"]:
1530 dtype += "64"
1531 try:
1532 return infer_dtype_from_object(getattr(np, dtype))
1533 except (AttributeError, TypeError):
1534 # Handles cases like _get_dtype(int) i.e.,
1535 # Python objects that are valid dtypes
1536 # (unlike user-defined types, in general)
1537 #
1538 # TypeError handles the float16 type code of 'e'
1539 # further handle internal types
1540 pass
1541
1542 return infer_dtype_from_object(np.dtype(dtype))
1543
1544
1545def _validate_date_like_dtype(dtype) -> None:
1546 """
1547 Check whether the dtype is a date-like dtype. Raises an error if invalid.
1548
1549 Parameters
1550 ----------
1551 dtype : dtype, type
1552 The dtype to check.
1553
1554 Raises
1555 ------
1556 TypeError : The dtype could not be casted to a date-like dtype.
1557 ValueError : The dtype is an illegal date-like dtype (e.g. the
1558 frequency provided is too specific)
1559 """
1560 try:
1561 typ = np.datetime_data(dtype)[0]
1562 except ValueError as e:
1563 raise TypeError(e) from e
1564 if typ not in ["generic", "ns"]:
1565 raise ValueError(
1566 f"{repr(dtype.name)} is too specific of a frequency, "
1567 f"try passing {repr(dtype.type.__name__)}"
1568 )
1569
1570
1571def validate_all_hashable(*args, error_name: str | None = None) -> None:
1572 """
1573 Return None if all args are hashable, else raise a TypeError.
1574
1575 Parameters
1576 ----------
1577 *args
1578 Arguments to validate.
1579 error_name : str, optional
1580 The name to use if error
1581
1582 Raises
1583 ------
1584 TypeError : If an argument is not hashable
1585
1586 Returns
1587 -------
1588 None
1589 """
1590 if not all(is_hashable(arg) for arg in args):
1591 if error_name:
1592 raise TypeError(f"{error_name} must be a hashable type")
1593 raise TypeError("All elements must be hashable")
1594
1595
1596def pandas_dtype(dtype) -> DtypeObj:
1597 """
1598 Convert input into a pandas only dtype object or a numpy dtype object.
1599
1600 Parameters
1601 ----------
1602 dtype : object to be converted
1603
1604 Returns
1605 -------
1606 np.dtype or a pandas dtype
1607
1608 Raises
1609 ------
1610 TypeError if not a dtype
1611
1612 Examples
1613 --------
1614 >>> pd.api.types.pandas_dtype(int)
1615 dtype('int64')
1616 """
1617 # short-circuit
1618 if isinstance(dtype, np.ndarray):
1619 return dtype.dtype
1620 elif isinstance(dtype, (np.dtype, ExtensionDtype)):
1621 return dtype
1622
1623 # registered extension types
1624 result = registry.find(dtype)
1625 if result is not None:
1626 if isinstance(result, type):
1627 # GH 31356, GH 54592
1628 warnings.warn(
1629 f"Instantiating {result.__name__} without any arguments."
1630 f"Pass a {result.__name__} instance to silence this warning.",
1631 UserWarning,
1632 stacklevel=find_stack_level(),
1633 )
1634 result = result()
1635 return result
1636
1637 # try a numpy dtype
1638 # raise a consistent TypeError if failed
1639 try:
1640 with warnings.catch_warnings():
1641 # GH#51523 - Series.astype(np.integer) doesn't show
1642 # numpy deprecation warning of np.integer
1643 # Hence enabling DeprecationWarning
1644 warnings.simplefilter("always", DeprecationWarning)
1645 npdtype = np.dtype(dtype)
1646 except SyntaxError as err:
1647 # np.dtype uses `eval` which can raise SyntaxError
1648 raise TypeError(f"data type '{dtype}' not understood") from err
1649
1650 # Any invalid dtype (such as pd.Timestamp) should raise an error.
1651 # np.dtype(invalid_type).kind = 0 for such objects. However, this will
1652 # also catch some valid dtypes such as object, np.object_ and 'object'
1653 # which we safeguard against by catching them earlier and returning
1654 # np.dtype(valid_dtype) before this condition is evaluated.
1655 if is_hashable(dtype) and dtype in [
1656 object,
1657 np.object_,
1658 "object",
1659 "O",
1660 "object_",
1661 ]:
1662 # check hashability to avoid errors/DeprecationWarning when we get
1663 # here and `dtype` is an array
1664 return npdtype
1665 elif npdtype.kind == "O":
1666 raise TypeError(f"dtype '{dtype}' not understood")
1667
1668 return npdtype
1669
1670
1671def is_all_strings(value: ArrayLike) -> bool:
1672 """
1673 Check if this is an array of strings that we should try parsing.
1674
1675 Includes object-dtype ndarray containing all-strings, StringArray,
1676 and Categorical with all-string categories.
1677 Does not include numpy string dtypes.
1678 """
1679 dtype = value.dtype
1680
1681 if isinstance(dtype, np.dtype):
1682 if len(value) == 0:
1683 return dtype == np.dtype("object")
1684 else:
1685 return dtype == np.dtype("object") and lib.is_string_array(
1686 np.asarray(value), skipna=False
1687 )
1688 elif isinstance(dtype, CategoricalDtype):
1689 return dtype.categories.inferred_type == "string"
1690 return dtype == "string"
1691
1692
1693__all__ = [
1694 "classes",
1695 "DT64NS_DTYPE",
1696 "ensure_float64",
1697 "ensure_python_int",
1698 "ensure_str",
1699 "infer_dtype_from_object",
1700 "INT64_DTYPE",
1701 "is_1d_only_ea_dtype",
1702 "is_all_strings",
1703 "is_any_real_numeric_dtype",
1704 "is_array_like",
1705 "is_bool",
1706 "is_bool_dtype",
1707 "is_categorical_dtype",
1708 "is_complex",
1709 "is_complex_dtype",
1710 "is_dataclass",
1711 "is_datetime64_any_dtype",
1712 "is_datetime64_dtype",
1713 "is_datetime64_ns_dtype",
1714 "is_datetime64tz_dtype",
1715 "is_decimal",
1716 "is_dict_like",
1717 "is_dtype_equal",
1718 "is_ea_or_datetimelike_dtype",
1719 "is_extension_array_dtype",
1720 "is_file_like",
1721 "is_float_dtype",
1722 "is_int64_dtype",
1723 "is_integer_dtype",
1724 "is_interval",
1725 "is_interval_dtype",
1726 "is_iterator",
1727 "is_named_tuple",
1728 "is_nested_list_like",
1729 "is_number",
1730 "is_numeric_dtype",
1731 "is_object_dtype",
1732 "is_period_dtype",
1733 "is_re",
1734 "is_re_compilable",
1735 "is_scipy_sparse",
1736 "is_sequence",
1737 "is_signed_integer_dtype",
1738 "is_sparse",
1739 "is_string_dtype",
1740 "is_string_or_object_np_dtype",
1741 "is_timedelta64_dtype",
1742 "is_timedelta64_ns_dtype",
1743 "is_unsigned_integer_dtype",
1744 "needs_i8_conversion",
1745 "pandas_dtype",
1746 "TD64NS_DTYPE",
1747 "validate_all_hashable",
1748]