1"""
2Common type operations.
3"""
4from __future__ import annotations
5
6from typing import (
7 Any,
8 Callable,
9)
10import warnings
11
12import numpy as np
13
14from pandas._libs import (
15 Interval,
16 Period,
17 algos,
18 lib,
19)
20from pandas._libs.tslibs import conversion
21from pandas._typing import (
22 ArrayLike,
23 DtypeObj,
24)
25
26from pandas.core.dtypes.base import _registry as registry
27from pandas.core.dtypes.dtypes import (
28 CategoricalDtype,
29 DatetimeTZDtype,
30 ExtensionDtype,
31 IntervalDtype,
32 PeriodDtype,
33)
34from pandas.core.dtypes.generic import ABCIndex
35from pandas.core.dtypes.inference import (
36 is_array_like,
37 is_bool,
38 is_complex,
39 is_dataclass,
40 is_decimal,
41 is_dict_like,
42 is_file_like,
43 is_float,
44 is_hashable,
45 is_integer,
46 is_interval,
47 is_iterator,
48 is_list_like,
49 is_named_tuple,
50 is_nested_list_like,
51 is_number,
52 is_re,
53 is_re_compilable,
54 is_scalar,
55 is_sequence,
56)
57
58DT64NS_DTYPE = conversion.DT64NS_DTYPE
59TD64NS_DTYPE = conversion.TD64NS_DTYPE
60INT64_DTYPE = np.dtype(np.int64)
61
62# oh the troubles to reduce import time
63_is_scipy_sparse = None
64
65ensure_float64 = algos.ensure_float64
66
67
68def ensure_float(arr):
69 """
70 Ensure that an array object has a float dtype if possible.
71
72 Parameters
73 ----------
74 arr : array-like
75 The array whose data type we want to enforce as float.
76
77 Returns
78 -------
79 float_arr : The original array cast to the float dtype if
80 possible. Otherwise, the original array is returned.
81 """
82 if is_extension_array_dtype(arr.dtype):
83 if is_float_dtype(arr.dtype):
84 arr = arr.to_numpy(dtype=arr.dtype.numpy_dtype, na_value=np.nan)
85 else:
86 arr = arr.to_numpy(dtype="float64", na_value=np.nan)
87 elif issubclass(arr.dtype.type, (np.integer, np.bool_)):
88 arr = arr.astype(float)
89 return arr
90
91
92ensure_int64 = algos.ensure_int64
93ensure_int32 = algos.ensure_int32
94ensure_int16 = algos.ensure_int16
95ensure_int8 = algos.ensure_int8
96ensure_platform_int = algos.ensure_platform_int
97ensure_object = algos.ensure_object
98ensure_uint64 = algos.ensure_uint64
99
100
101def ensure_str(value: bytes | Any) -> str:
102 """
103 Ensure that bytes and non-strings get converted into ``str`` objects.
104 """
105 if isinstance(value, bytes):
106 value = value.decode("utf-8")
107 elif not isinstance(value, str):
108 value = str(value)
109 return value
110
111
112def ensure_python_int(value: int | np.integer) -> int:
113 """
114 Ensure that a value is a python int.
115
116 Parameters
117 ----------
118 value: int or numpy.integer
119
120 Returns
121 -------
122 int
123
124 Raises
125 ------
126 TypeError: if the value isn't an int or can't be converted to one.
127 """
128 if not (is_integer(value) or is_float(value)):
129 if not is_scalar(value):
130 raise TypeError(
131 f"Value needs to be a scalar value, was type {type(value).__name__}"
132 )
133 raise TypeError(f"Wrong type {type(value)} for value {value}")
134 try:
135 new_value = int(value)
136 assert new_value == value
137 except (TypeError, ValueError, AssertionError) as err:
138 raise TypeError(f"Wrong type {type(value)} for value {value}") from err
139 return new_value
140
141
142def classes(*klasses) -> Callable:
143 """Evaluate if the tipo is a subclass of the klasses."""
144 return lambda tipo: issubclass(tipo, klasses)
145
146
147def classes_and_not_datetimelike(*klasses) -> Callable:
148 """
149 Evaluate if the tipo is a subclass of the klasses
150 and not a datetimelike.
151 """
152 return lambda tipo: (
153 issubclass(tipo, klasses)
154 and not issubclass(tipo, (np.datetime64, np.timedelta64))
155 )
156
157
158def is_object_dtype(arr_or_dtype) -> bool:
159 """
160 Check whether an array-like or dtype is of the object dtype.
161
162 Parameters
163 ----------
164 arr_or_dtype : array-like or dtype
165 The array-like or dtype to check.
166
167 Returns
168 -------
169 boolean
170 Whether or not the array-like or dtype is of the object dtype.
171
172 Examples
173 --------
174 >>> from pandas.api.types import is_object_dtype
175 >>> is_object_dtype(object)
176 True
177 >>> is_object_dtype(int)
178 False
179 >>> is_object_dtype(np.array([], dtype=object))
180 True
181 >>> is_object_dtype(np.array([], dtype=int))
182 False
183 >>> is_object_dtype([1, 2, 3])
184 False
185 """
186 return _is_dtype_type(arr_or_dtype, classes(np.object_))
187
188
189def is_sparse(arr) -> bool:
190 """
191 Check whether an array-like is a 1-D pandas sparse array.
192
193 Check that the one-dimensional array-like is a pandas sparse array.
194 Returns True if it is a pandas sparse array, not another type of
195 sparse array.
196
197 Parameters
198 ----------
199 arr : array-like
200 Array-like to check.
201
202 Returns
203 -------
204 bool
205 Whether or not the array-like is a pandas sparse array.
206
207 Examples
208 --------
209 Returns `True` if the parameter is a 1-D pandas sparse array.
210
211 >>> is_sparse(pd.arrays.SparseArray([0, 0, 1, 0]))
212 True
213 >>> is_sparse(pd.Series(pd.arrays.SparseArray([0, 0, 1, 0])))
214 True
215
216 Returns `False` if the parameter is not sparse.
217
218 >>> is_sparse(np.array([0, 0, 1, 0]))
219 False
220 >>> is_sparse(pd.Series([0, 1, 0, 0]))
221 False
222
223 Returns `False` if the parameter is not a pandas sparse array.
224
225 >>> from scipy.sparse import bsr_matrix
226 >>> is_sparse(bsr_matrix([0, 1, 0, 0]))
227 False
228
229 Returns `False` if the parameter has more than one dimension.
230 """
231 from pandas.core.arrays.sparse import SparseDtype
232
233 dtype = getattr(arr, "dtype", arr)
234 return isinstance(dtype, SparseDtype)
235
236
237def is_scipy_sparse(arr) -> bool:
238 """
239 Check whether an array-like is a scipy.sparse.spmatrix instance.
240
241 Parameters
242 ----------
243 arr : array-like
244 The array-like to check.
245
246 Returns
247 -------
248 boolean
249 Whether or not the array-like is a scipy.sparse.spmatrix instance.
250
251 Notes
252 -----
253 If scipy is not installed, this function will always return False.
254
255 Examples
256 --------
257 >>> from scipy.sparse import bsr_matrix
258 >>> is_scipy_sparse(bsr_matrix([1, 2, 3]))
259 True
260 >>> is_scipy_sparse(pd.arrays.SparseArray([1, 2, 3]))
261 False
262 """
263 global _is_scipy_sparse
264
265 if _is_scipy_sparse is None: # pylint: disable=used-before-assignment
266 try:
267 from scipy.sparse import issparse as _is_scipy_sparse
268 except ImportError:
269 _is_scipy_sparse = lambda _: False
270
271 assert _is_scipy_sparse is not None
272 return _is_scipy_sparse(arr)
273
274
275def is_datetime64_dtype(arr_or_dtype) -> bool:
276 """
277 Check whether an array-like or dtype is of the datetime64 dtype.
278
279 Parameters
280 ----------
281 arr_or_dtype : array-like or dtype
282 The array-like or dtype to check.
283
284 Returns
285 -------
286 boolean
287 Whether or not the array-like or dtype is of the datetime64 dtype.
288
289 Examples
290 --------
291 >>> from pandas.api.types import is_datetime64_dtype
292 >>> is_datetime64_dtype(object)
293 False
294 >>> is_datetime64_dtype(np.datetime64)
295 True
296 >>> is_datetime64_dtype(np.array([], dtype=int))
297 False
298 >>> is_datetime64_dtype(np.array([], dtype=np.datetime64))
299 True
300 >>> is_datetime64_dtype([1, 2, 3])
301 False
302 """
303 if isinstance(arr_or_dtype, np.dtype):
304 # GH#33400 fastpath for dtype object
305 return arr_or_dtype.kind == "M"
306 return _is_dtype_type(arr_or_dtype, classes(np.datetime64))
307
308
309def is_datetime64tz_dtype(arr_or_dtype) -> bool:
310 """
311 Check whether an array-like or dtype is of a DatetimeTZDtype dtype.
312
313 Parameters
314 ----------
315 arr_or_dtype : array-like or dtype
316 The array-like or dtype to check.
317
318 Returns
319 -------
320 boolean
321 Whether or not the array-like or dtype is of a DatetimeTZDtype dtype.
322
323 Examples
324 --------
325 >>> is_datetime64tz_dtype(object)
326 False
327 >>> is_datetime64tz_dtype([1, 2, 3])
328 False
329 >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) # tz-naive
330 False
331 >>> is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))
332 True
333
334 >>> dtype = DatetimeTZDtype("ns", tz="US/Eastern")
335 >>> s = pd.Series([], dtype=dtype)
336 >>> is_datetime64tz_dtype(dtype)
337 True
338 >>> is_datetime64tz_dtype(s)
339 True
340 """
341 if isinstance(arr_or_dtype, DatetimeTZDtype):
342 # GH#33400 fastpath for dtype object
343 # GH 34986
344 return True
345
346 if arr_or_dtype is None:
347 return False
348 return DatetimeTZDtype.is_dtype(arr_or_dtype)
349
350
351def is_timedelta64_dtype(arr_or_dtype) -> bool:
352 """
353 Check whether an array-like or dtype is of the timedelta64 dtype.
354
355 Parameters
356 ----------
357 arr_or_dtype : array-like or dtype
358 The array-like or dtype to check.
359
360 Returns
361 -------
362 boolean
363 Whether or not the array-like or dtype is of the timedelta64 dtype.
364
365 Examples
366 --------
367 >>> from pandas.core.dtypes.common import is_timedelta64_dtype
368 >>> is_timedelta64_dtype(object)
369 False
370 >>> is_timedelta64_dtype(np.timedelta64)
371 True
372 >>> is_timedelta64_dtype([1, 2, 3])
373 False
374 >>> is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]"))
375 True
376 >>> is_timedelta64_dtype('0 days')
377 False
378 """
379 if isinstance(arr_or_dtype, np.dtype):
380 # GH#33400 fastpath for dtype object
381 return arr_or_dtype.kind == "m"
382
383 return _is_dtype_type(arr_or_dtype, classes(np.timedelta64))
384
385
386def is_period_dtype(arr_or_dtype) -> bool:
387 """
388 Check whether an array-like or dtype is of the Period dtype.
389
390 Parameters
391 ----------
392 arr_or_dtype : array-like or dtype
393 The array-like or dtype to check.
394
395 Returns
396 -------
397 boolean
398 Whether or not the array-like or dtype is of the Period dtype.
399
400 Examples
401 --------
402 >>> is_period_dtype(object)
403 False
404 >>> is_period_dtype(PeriodDtype(freq="D"))
405 True
406 >>> is_period_dtype([1, 2, 3])
407 False
408 >>> is_period_dtype(pd.Period("2017-01-01"))
409 False
410 >>> is_period_dtype(pd.PeriodIndex([], freq="A"))
411 True
412 """
413 if isinstance(arr_or_dtype, ExtensionDtype):
414 # GH#33400 fastpath for dtype object
415 return arr_or_dtype.type is Period
416
417 if arr_or_dtype is None:
418 return False
419 return PeriodDtype.is_dtype(arr_or_dtype)
420
421
422def is_interval_dtype(arr_or_dtype) -> bool:
423 """
424 Check whether an array-like or dtype is of the Interval dtype.
425
426 Parameters
427 ----------
428 arr_or_dtype : array-like or dtype
429 The array-like or dtype to check.
430
431 Returns
432 -------
433 boolean
434 Whether or not the array-like or dtype is of the Interval dtype.
435
436 Examples
437 --------
438 >>> is_interval_dtype(object)
439 False
440 >>> is_interval_dtype(IntervalDtype())
441 True
442 >>> is_interval_dtype([1, 2, 3])
443 False
444 >>>
445 >>> interval = pd.Interval(1, 2, closed="right")
446 >>> is_interval_dtype(interval)
447 False
448 >>> is_interval_dtype(pd.IntervalIndex([interval]))
449 True
450 """
451 if isinstance(arr_or_dtype, ExtensionDtype):
452 # GH#33400 fastpath for dtype object
453 return arr_or_dtype.type is Interval
454
455 if arr_or_dtype is None:
456 return False
457 return IntervalDtype.is_dtype(arr_or_dtype)
458
459
460def is_categorical_dtype(arr_or_dtype) -> bool:
461 """
462 Check whether an array-like or dtype is of the Categorical dtype.
463
464 Parameters
465 ----------
466 arr_or_dtype : array-like or dtype
467 The array-like or dtype to check.
468
469 Returns
470 -------
471 boolean
472 Whether or not the array-like or dtype is of the Categorical dtype.
473
474 Examples
475 --------
476 >>> from pandas.api.types import is_categorical_dtype
477 >>> from pandas import CategoricalDtype
478 >>> is_categorical_dtype(object)
479 False
480 >>> is_categorical_dtype(CategoricalDtype())
481 True
482 >>> is_categorical_dtype([1, 2, 3])
483 False
484 >>> is_categorical_dtype(pd.Categorical([1, 2, 3]))
485 True
486 >>> is_categorical_dtype(pd.CategoricalIndex([1, 2, 3]))
487 True
488 """
489 if isinstance(arr_or_dtype, ExtensionDtype):
490 # GH#33400 fastpath for dtype object
491 return arr_or_dtype.name == "category"
492
493 if arr_or_dtype is None:
494 return False
495 return CategoricalDtype.is_dtype(arr_or_dtype)
496
497
498def is_string_or_object_np_dtype(dtype: np.dtype) -> bool:
499 """
500 Faster alternative to is_string_dtype, assumes we have a np.dtype object.
501 """
502 return dtype == object or dtype.kind in "SU"
503
504
505def is_string_dtype(arr_or_dtype) -> bool:
506 """
507 Check whether the provided array or dtype is of the string dtype.
508
509 If an array is passed with an object dtype, the elements must be
510 inferred as strings.
511
512 Parameters
513 ----------
514 arr_or_dtype : array-like or dtype
515 The array or dtype to check.
516
517 Returns
518 -------
519 boolean
520 Whether or not the array or dtype is of the string dtype.
521
522 Examples
523 --------
524 >>> is_string_dtype(str)
525 True
526 >>> is_string_dtype(object)
527 True
528 >>> is_string_dtype(int)
529 False
530 >>> is_string_dtype(np.array(['a', 'b']))
531 True
532 >>> is_string_dtype(pd.Series([1, 2]))
533 False
534 >>> is_string_dtype(pd.Series([1, 2], dtype=object))
535 False
536 """
537 if hasattr(arr_or_dtype, "dtype") and get_dtype(arr_or_dtype).kind == "O":
538 return is_all_strings(arr_or_dtype)
539
540 def condition(dtype) -> bool:
541 if is_string_or_object_np_dtype(dtype):
542 return True
543 try:
544 return dtype == "string"
545 except TypeError:
546 return False
547
548 return _is_dtype(arr_or_dtype, condition)
549
550
551def is_dtype_equal(source, target) -> bool:
552 """
553 Check if two dtypes are equal.
554
555 Parameters
556 ----------
557 source : The first dtype to compare
558 target : The second dtype to compare
559
560 Returns
561 -------
562 boolean
563 Whether or not the two dtypes are equal.
564
565 Examples
566 --------
567 >>> is_dtype_equal(int, float)
568 False
569 >>> is_dtype_equal("int", int)
570 True
571 >>> is_dtype_equal(object, "category")
572 False
573 >>> is_dtype_equal(CategoricalDtype(), "category")
574 True
575 >>> is_dtype_equal(DatetimeTZDtype(tz="UTC"), "datetime64")
576 False
577 """
578 if isinstance(target, str):
579 if not isinstance(source, str):
580 # GH#38516 ensure we get the same behavior from
581 # is_dtype_equal(CDT, "category") and CDT == "category"
582 try:
583 src = get_dtype(source)
584 if isinstance(src, ExtensionDtype):
585 return src == target
586 except (TypeError, AttributeError, ImportError):
587 return False
588 elif isinstance(source, str):
589 return is_dtype_equal(target, source)
590
591 try:
592 source = get_dtype(source)
593 target = get_dtype(target)
594 return source == target
595 except (TypeError, AttributeError, ImportError):
596 # invalid comparison
597 # object == category will hit this
598 return False
599
600
601def is_any_int_dtype(arr_or_dtype) -> bool:
602 """
603 Check whether the provided array or dtype is of an integer dtype.
604
605 In this function, timedelta64 instances are also considered "any-integer"
606 type objects and will return True.
607
608 This function is internal and should not be exposed in the public API.
609
610 The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
611 as integer by this function.
612
613 Parameters
614 ----------
615 arr_or_dtype : array-like or dtype
616 The array or dtype to check.
617
618 Returns
619 -------
620 boolean
621 Whether or not the array or dtype is of an integer dtype.
622
623 Examples
624 --------
625 >>> is_any_int_dtype(str)
626 False
627 >>> is_any_int_dtype(int)
628 True
629 >>> is_any_int_dtype(float)
630 False
631 >>> is_any_int_dtype(np.uint64)
632 True
633 >>> is_any_int_dtype(np.datetime64)
634 False
635 >>> is_any_int_dtype(np.timedelta64)
636 True
637 >>> is_any_int_dtype(np.array(['a', 'b']))
638 False
639 >>> is_any_int_dtype(pd.Series([1, 2]))
640 True
641 >>> is_any_int_dtype(np.array([], dtype=np.timedelta64))
642 True
643 >>> is_any_int_dtype(pd.Index([1, 2.])) # float
644 False
645 """
646 return _is_dtype_type(
647 arr_or_dtype, classes(np.integer, np.timedelta64)
648 ) or _is_dtype(
649 arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind in "iu"
650 )
651
652
653def is_integer_dtype(arr_or_dtype) -> bool:
654 """
655 Check whether the provided array or dtype is of an integer dtype.
656
657 Unlike in `is_any_int_dtype`, timedelta64 instances will return False.
658
659 The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
660 as integer by this function.
661
662 Parameters
663 ----------
664 arr_or_dtype : array-like or dtype
665 The array or dtype to check.
666
667 Returns
668 -------
669 boolean
670 Whether or not the array or dtype is of an integer dtype and
671 not an instance of timedelta64.
672
673 Examples
674 --------
675 >>> is_integer_dtype(str)
676 False
677 >>> is_integer_dtype(int)
678 True
679 >>> is_integer_dtype(float)
680 False
681 >>> is_integer_dtype(np.uint64)
682 True
683 >>> is_integer_dtype('int8')
684 True
685 >>> is_integer_dtype('Int8')
686 True
687 >>> is_integer_dtype(pd.Int8Dtype)
688 True
689 >>> is_integer_dtype(np.datetime64)
690 False
691 >>> is_integer_dtype(np.timedelta64)
692 False
693 >>> is_integer_dtype(np.array(['a', 'b']))
694 False
695 >>> is_integer_dtype(pd.Series([1, 2]))
696 True
697 >>> is_integer_dtype(np.array([], dtype=np.timedelta64))
698 False
699 >>> is_integer_dtype(pd.Index([1, 2.])) # float
700 False
701 """
702 return _is_dtype_type(
703 arr_or_dtype, classes_and_not_datetimelike(np.integer)
704 ) or _is_dtype(
705 arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind in "iu"
706 )
707
708
709def is_signed_integer_dtype(arr_or_dtype) -> bool:
710 """
711 Check whether the provided array or dtype is of a signed integer dtype.
712
713 Unlike in `is_any_int_dtype`, timedelta64 instances will return False.
714
715 The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
716 as integer by this function.
717
718 Parameters
719 ----------
720 arr_or_dtype : array-like or dtype
721 The array or dtype to check.
722
723 Returns
724 -------
725 boolean
726 Whether or not the array or dtype is of a signed integer dtype
727 and not an instance of timedelta64.
728
729 Examples
730 --------
731 >>> is_signed_integer_dtype(str)
732 False
733 >>> is_signed_integer_dtype(int)
734 True
735 >>> is_signed_integer_dtype(float)
736 False
737 >>> is_signed_integer_dtype(np.uint64) # unsigned
738 False
739 >>> is_signed_integer_dtype('int8')
740 True
741 >>> is_signed_integer_dtype('Int8')
742 True
743 >>> is_signed_integer_dtype(pd.Int8Dtype)
744 True
745 >>> is_signed_integer_dtype(np.datetime64)
746 False
747 >>> is_signed_integer_dtype(np.timedelta64)
748 False
749 >>> is_signed_integer_dtype(np.array(['a', 'b']))
750 False
751 >>> is_signed_integer_dtype(pd.Series([1, 2]))
752 True
753 >>> is_signed_integer_dtype(np.array([], dtype=np.timedelta64))
754 False
755 >>> is_signed_integer_dtype(pd.Index([1, 2.])) # float
756 False
757 >>> is_signed_integer_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned
758 False
759 """
760 return _is_dtype_type(
761 arr_or_dtype, classes_and_not_datetimelike(np.signedinteger)
762 ) or _is_dtype(
763 arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind == "i"
764 )
765
766
767def is_unsigned_integer_dtype(arr_or_dtype) -> bool:
768 """
769 Check whether the provided array or dtype is of an unsigned integer dtype.
770
771 The nullable Integer dtypes (e.g. pandas.UInt64Dtype) are also
772 considered as integer by this function.
773
774 Parameters
775 ----------
776 arr_or_dtype : array-like or dtype
777 The array or dtype to check.
778
779 Returns
780 -------
781 boolean
782 Whether or not the array or dtype is of an unsigned integer dtype.
783
784 Examples
785 --------
786 >>> is_unsigned_integer_dtype(str)
787 False
788 >>> is_unsigned_integer_dtype(int) # signed
789 False
790 >>> is_unsigned_integer_dtype(float)
791 False
792 >>> is_unsigned_integer_dtype(np.uint64)
793 True
794 >>> is_unsigned_integer_dtype('uint8')
795 True
796 >>> is_unsigned_integer_dtype('UInt8')
797 True
798 >>> is_unsigned_integer_dtype(pd.UInt8Dtype)
799 True
800 >>> is_unsigned_integer_dtype(np.array(['a', 'b']))
801 False
802 >>> is_unsigned_integer_dtype(pd.Series([1, 2])) # signed
803 False
804 >>> is_unsigned_integer_dtype(pd.Index([1, 2.])) # float
805 False
806 >>> is_unsigned_integer_dtype(np.array([1, 2], dtype=np.uint32))
807 True
808 """
809 return _is_dtype_type(
810 arr_or_dtype, classes_and_not_datetimelike(np.unsignedinteger)
811 ) or _is_dtype(
812 arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind == "u"
813 )
814
815
816def is_int64_dtype(arr_or_dtype) -> bool:
817 """
818 Check whether the provided array or dtype is of the int64 dtype.
819
820 Parameters
821 ----------
822 arr_or_dtype : array-like or dtype
823 The array or dtype to check.
824
825 Returns
826 -------
827 boolean
828 Whether or not the array or dtype is of the int64 dtype.
829
830 Notes
831 -----
832 Depending on system architecture, the return value of `is_int64_dtype(
833 int)` will be True if the OS uses 64-bit integers and False if the OS
834 uses 32-bit integers.
835
836 Examples
837 --------
838 >>> from pandas.api.types import is_int64_dtype
839 >>> is_int64_dtype(str)
840 False
841 >>> is_int64_dtype(np.int32)
842 False
843 >>> is_int64_dtype(np.int64)
844 True
845 >>> is_int64_dtype('int8')
846 False
847 >>> is_int64_dtype('Int8')
848 False
849 >>> is_int64_dtype(pd.Int64Dtype)
850 True
851 >>> is_int64_dtype(float)
852 False
853 >>> is_int64_dtype(np.uint64) # unsigned
854 False
855 >>> is_int64_dtype(np.array(['a', 'b']))
856 False
857 >>> is_int64_dtype(np.array([1, 2], dtype=np.int64))
858 True
859 >>> is_int64_dtype(pd.Index([1, 2.])) # float
860 False
861 >>> is_int64_dtype(np.array([1, 2], dtype=np.uint32)) # unsigned
862 False
863 """
864 return _is_dtype_type(arr_or_dtype, classes(np.int64))
865
866
867def is_datetime64_any_dtype(arr_or_dtype) -> bool:
868 """
869 Check whether the provided array or dtype is of the datetime64 dtype.
870
871 Parameters
872 ----------
873 arr_or_dtype : array-like or dtype
874 The array or dtype to check.
875
876 Returns
877 -------
878 bool
879 Whether or not the array or dtype is of the datetime64 dtype.
880
881 Examples
882 --------
883 >>> is_datetime64_any_dtype(str)
884 False
885 >>> is_datetime64_any_dtype(int)
886 False
887 >>> is_datetime64_any_dtype(np.datetime64) # can be tz-naive
888 True
889 >>> is_datetime64_any_dtype(DatetimeTZDtype("ns", "US/Eastern"))
890 True
891 >>> is_datetime64_any_dtype(np.array(['a', 'b']))
892 False
893 >>> is_datetime64_any_dtype(np.array([1, 2]))
894 False
895 >>> is_datetime64_any_dtype(np.array([], dtype="datetime64[ns]"))
896 True
897 >>> is_datetime64_any_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]"))
898 True
899 """
900 if isinstance(arr_or_dtype, (np.dtype, ExtensionDtype)):
901 # GH#33400 fastpath for dtype object
902 return arr_or_dtype.kind == "M"
903
904 if arr_or_dtype is None:
905 return False
906 return is_datetime64_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype)
907
908
909def is_datetime64_ns_dtype(arr_or_dtype) -> bool:
910 """
911 Check whether the provided array or dtype is of the datetime64[ns] dtype.
912
913 Parameters
914 ----------
915 arr_or_dtype : array-like or dtype
916 The array or dtype to check.
917
918 Returns
919 -------
920 bool
921 Whether or not the array or dtype is of the datetime64[ns] dtype.
922
923 Examples
924 --------
925 >>> is_datetime64_ns_dtype(str)
926 False
927 >>> is_datetime64_ns_dtype(int)
928 False
929 >>> is_datetime64_ns_dtype(np.datetime64) # no unit
930 False
931 >>> is_datetime64_ns_dtype(DatetimeTZDtype("ns", "US/Eastern"))
932 True
933 >>> is_datetime64_ns_dtype(np.array(['a', 'b']))
934 False
935 >>> is_datetime64_ns_dtype(np.array([1, 2]))
936 False
937 >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64")) # no unit
938 False
939 >>> is_datetime64_ns_dtype(np.array([], dtype="datetime64[ps]")) # wrong unit
940 False
941 >>> is_datetime64_ns_dtype(pd.DatetimeIndex([1, 2, 3], dtype="datetime64[ns]"))
942 True
943 """
944 if arr_or_dtype is None:
945 return False
946 try:
947 tipo = get_dtype(arr_or_dtype)
948 except TypeError:
949 if is_datetime64tz_dtype(arr_or_dtype):
950 tipo = get_dtype(arr_or_dtype.dtype)
951 else:
952 return False
953 return tipo == DT64NS_DTYPE or (
954 isinstance(tipo, DatetimeTZDtype) and tipo.unit == "ns"
955 )
956
957
958def is_timedelta64_ns_dtype(arr_or_dtype) -> bool:
959 """
960 Check whether the provided array or dtype is of the timedelta64[ns] dtype.
961
962 This is a very specific dtype, so generic ones like `np.timedelta64`
963 will return False if passed into this function.
964
965 Parameters
966 ----------
967 arr_or_dtype : array-like or dtype
968 The array or dtype to check.
969
970 Returns
971 -------
972 boolean
973 Whether or not the array or dtype is of the timedelta64[ns] dtype.
974
975 Examples
976 --------
977 >>> from pandas.core.dtypes.common import is_timedelta64_ns_dtype
978 >>> is_timedelta64_ns_dtype(np.dtype('m8[ns]'))
979 True
980 >>> is_timedelta64_ns_dtype(np.dtype('m8[ps]')) # Wrong frequency
981 False
982 >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype='m8[ns]'))
983 True
984 >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64))
985 False
986 """
987 return _is_dtype(arr_or_dtype, lambda dtype: dtype == TD64NS_DTYPE)
988
989
990def is_datetime_or_timedelta_dtype(arr_or_dtype) -> bool:
991 """
992 Check whether the provided array or dtype is of
993 a timedelta64 or datetime64 dtype.
994
995 Parameters
996 ----------
997 arr_or_dtype : array-like or dtype
998 The array or dtype to check.
999
1000 Returns
1001 -------
1002 boolean
1003 Whether or not the array or dtype is of a timedelta64,
1004 or datetime64 dtype.
1005
1006 Examples
1007 --------
1008 >>> is_datetime_or_timedelta_dtype(str)
1009 False
1010 >>> is_datetime_or_timedelta_dtype(int)
1011 False
1012 >>> is_datetime_or_timedelta_dtype(np.datetime64)
1013 True
1014 >>> is_datetime_or_timedelta_dtype(np.timedelta64)
1015 True
1016 >>> is_datetime_or_timedelta_dtype(np.array(['a', 'b']))
1017 False
1018 >>> is_datetime_or_timedelta_dtype(pd.Series([1, 2]))
1019 False
1020 >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.timedelta64))
1021 True
1022 >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64))
1023 True
1024 """
1025 return _is_dtype_type(arr_or_dtype, classes(np.datetime64, np.timedelta64))
1026
1027
1028# This exists to silence numpy deprecation warnings, see GH#29553
1029def is_numeric_v_string_like(a: ArrayLike, b) -> bool:
1030 """
1031 Check if we are comparing a string-like object to a numeric ndarray.
1032 NumPy doesn't like to compare such objects, especially numeric arrays
1033 and scalar string-likes.
1034
1035 Parameters
1036 ----------
1037 a : array-like, scalar
1038 The first object to check.
1039 b : array-like, scalar
1040 The second object to check.
1041
1042 Returns
1043 -------
1044 boolean
1045 Whether we return a comparing a string-like object to a numeric array.
1046
1047 Examples
1048 --------
1049 >>> is_numeric_v_string_like(np.array([1]), "foo")
1050 True
1051 >>> is_numeric_v_string_like(np.array([1, 2]), np.array(["foo"]))
1052 True
1053 >>> is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2]))
1054 True
1055 >>> is_numeric_v_string_like(np.array([1]), np.array([2]))
1056 False
1057 >>> is_numeric_v_string_like(np.array(["foo"]), np.array(["foo"]))
1058 False
1059 """
1060 is_a_array = isinstance(a, np.ndarray)
1061 is_b_array = isinstance(b, np.ndarray)
1062
1063 is_a_numeric_array = is_a_array and a.dtype.kind in ("u", "i", "f", "c", "b")
1064 is_b_numeric_array = is_b_array and b.dtype.kind in ("u", "i", "f", "c", "b")
1065 is_a_string_array = is_a_array and a.dtype.kind in ("S", "U")
1066 is_b_string_array = is_b_array and b.dtype.kind in ("S", "U")
1067
1068 is_b_scalar_string_like = not is_b_array and isinstance(b, str)
1069
1070 return (
1071 (is_a_numeric_array and is_b_scalar_string_like)
1072 or (is_a_numeric_array and is_b_string_array)
1073 or (is_b_numeric_array and is_a_string_array)
1074 )
1075
1076
1077def needs_i8_conversion(arr_or_dtype) -> bool:
1078 """
1079 Check whether the array or dtype should be converted to int64.
1080
1081 An array-like or dtype "needs" such a conversion if the array-like
1082 or dtype is of a datetime-like dtype
1083
1084 Parameters
1085 ----------
1086 arr_or_dtype : array-like or dtype
1087 The array or dtype to check.
1088
1089 Returns
1090 -------
1091 boolean
1092 Whether or not the array or dtype should be converted to int64.
1093
1094 Examples
1095 --------
1096 >>> needs_i8_conversion(str)
1097 False
1098 >>> needs_i8_conversion(np.int64)
1099 False
1100 >>> needs_i8_conversion(np.datetime64)
1101 True
1102 >>> needs_i8_conversion(np.array(['a', 'b']))
1103 False
1104 >>> needs_i8_conversion(pd.Series([1, 2]))
1105 False
1106 >>> needs_i8_conversion(pd.Series([], dtype="timedelta64[ns]"))
1107 True
1108 >>> needs_i8_conversion(pd.DatetimeIndex([1, 2, 3], tz="US/Eastern"))
1109 True
1110 """
1111 if arr_or_dtype is None:
1112 return False
1113 if isinstance(arr_or_dtype, np.dtype):
1114 return arr_or_dtype.kind in ["m", "M"]
1115 elif isinstance(arr_or_dtype, ExtensionDtype):
1116 return isinstance(arr_or_dtype, (PeriodDtype, DatetimeTZDtype))
1117
1118 try:
1119 dtype = get_dtype(arr_or_dtype)
1120 except (TypeError, ValueError):
1121 return False
1122 if isinstance(dtype, np.dtype):
1123 return dtype.kind in ["m", "M"]
1124 return isinstance(dtype, (PeriodDtype, DatetimeTZDtype))
1125
1126
1127def is_numeric_dtype(arr_or_dtype) -> bool:
1128 """
1129 Check whether the provided array or dtype is of a numeric dtype.
1130
1131 Parameters
1132 ----------
1133 arr_or_dtype : array-like or dtype
1134 The array or dtype to check.
1135
1136 Returns
1137 -------
1138 boolean
1139 Whether or not the array or dtype is of a numeric dtype.
1140
1141 Examples
1142 --------
1143 >>> from pandas.api.types import is_numeric_dtype
1144 >>> is_numeric_dtype(str)
1145 False
1146 >>> is_numeric_dtype(int)
1147 True
1148 >>> is_numeric_dtype(float)
1149 True
1150 >>> is_numeric_dtype(np.uint64)
1151 True
1152 >>> is_numeric_dtype(np.datetime64)
1153 False
1154 >>> is_numeric_dtype(np.timedelta64)
1155 False
1156 >>> is_numeric_dtype(np.array(['a', 'b']))
1157 False
1158 >>> is_numeric_dtype(pd.Series([1, 2]))
1159 True
1160 >>> is_numeric_dtype(pd.Index([1, 2.]))
1161 True
1162 >>> is_numeric_dtype(np.array([], dtype=np.timedelta64))
1163 False
1164 """
1165 return _is_dtype_type(
1166 arr_or_dtype, classes_and_not_datetimelike(np.number, np.bool_)
1167 ) or _is_dtype(
1168 arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ._is_numeric
1169 )
1170
1171
1172def is_any_real_numeric_dtype(arr_or_dtype) -> bool:
1173 """
1174 Check whether the provided array or dtype is of a real number dtype.
1175
1176 Parameters
1177 ----------
1178 arr_or_dtype : array-like or dtype
1179 The array or dtype to check.
1180
1181 Returns
1182 -------
1183 boolean
1184 Whether or not the array or dtype is of a real number dtype.
1185
1186 Examples
1187 --------
1188 >>> from pandas.api.types import is_any_real_numeric_dtype
1189 >>> is_any_real_numeric_dtype(int)
1190 True
1191 >>> is_any_real_numeric_dtype(float)
1192 True
1193 >>> is_any_real_numeric_dtype(object)
1194 False
1195 >>> is_any_real_numeric_dtype(str)
1196 False
1197 >>> is_any_real_numeric_dtype(complex(1, 2))
1198 False
1199 >>> is_any_real_numeric_dtype(bool)
1200 False
1201 """
1202 return (
1203 is_numeric_dtype(arr_or_dtype)
1204 and not is_complex_dtype(arr_or_dtype)
1205 and not is_bool_dtype(arr_or_dtype)
1206 )
1207
1208
1209def is_float_dtype(arr_or_dtype) -> bool:
1210 """
1211 Check whether the provided array or dtype is of a float dtype.
1212
1213 Parameters
1214 ----------
1215 arr_or_dtype : array-like or dtype
1216 The array or dtype to check.
1217
1218 Returns
1219 -------
1220 boolean
1221 Whether or not the array or dtype is of a float dtype.
1222
1223 Examples
1224 --------
1225 >>> from pandas.api.types import is_float_dtype
1226 >>> is_float_dtype(str)
1227 False
1228 >>> is_float_dtype(int)
1229 False
1230 >>> is_float_dtype(float)
1231 True
1232 >>> is_float_dtype(np.array(['a', 'b']))
1233 False
1234 >>> is_float_dtype(pd.Series([1, 2]))
1235 False
1236 >>> is_float_dtype(pd.Index([1, 2.]))
1237 True
1238 """
1239 return _is_dtype_type(arr_or_dtype, classes(np.floating)) or _is_dtype(
1240 arr_or_dtype, lambda typ: isinstance(typ, ExtensionDtype) and typ.kind in "f"
1241 )
1242
1243
1244def is_bool_dtype(arr_or_dtype) -> bool:
1245 """
1246 Check whether the provided array or dtype is of a boolean dtype.
1247
1248 Parameters
1249 ----------
1250 arr_or_dtype : array-like or dtype
1251 The array or dtype to check.
1252
1253 Returns
1254 -------
1255 boolean
1256 Whether or not the array or dtype is of a boolean dtype.
1257
1258 Notes
1259 -----
1260 An ExtensionArray is considered boolean when the ``_is_boolean``
1261 attribute is set to True.
1262
1263 Examples
1264 --------
1265 >>> from pandas.api.types import is_bool_dtype
1266 >>> is_bool_dtype(str)
1267 False
1268 >>> is_bool_dtype(int)
1269 False
1270 >>> is_bool_dtype(bool)
1271 True
1272 >>> is_bool_dtype(np.bool_)
1273 True
1274 >>> is_bool_dtype(np.array(['a', 'b']))
1275 False
1276 >>> is_bool_dtype(pd.Series([1, 2]))
1277 False
1278 >>> is_bool_dtype(np.array([True, False]))
1279 True
1280 >>> is_bool_dtype(pd.Categorical([True, False]))
1281 True
1282 >>> is_bool_dtype(pd.arrays.SparseArray([True, False]))
1283 True
1284 """
1285 if arr_or_dtype is None:
1286 return False
1287 try:
1288 dtype = get_dtype(arr_or_dtype)
1289 except (TypeError, ValueError):
1290 return False
1291
1292 if isinstance(dtype, CategoricalDtype):
1293 arr_or_dtype = dtype.categories
1294 # now we use the special definition for Index
1295
1296 if isinstance(arr_or_dtype, ABCIndex):
1297 # Allow Index[object] that is all-bools or Index["boolean"]
1298 return arr_or_dtype.inferred_type == "boolean"
1299 elif isinstance(dtype, ExtensionDtype):
1300 return getattr(dtype, "_is_boolean", False)
1301
1302 return issubclass(dtype.type, np.bool_)
1303
1304
1305def is_1d_only_ea_obj(obj: Any) -> bool:
1306 """
1307 ExtensionArray that does not support 2D, or more specifically that does
1308 not use HybridBlock.
1309 """
1310 from pandas.core.arrays import (
1311 DatetimeArray,
1312 ExtensionArray,
1313 PeriodArray,
1314 TimedeltaArray,
1315 )
1316
1317 return isinstance(obj, ExtensionArray) and not isinstance(
1318 obj, (DatetimeArray, TimedeltaArray, PeriodArray)
1319 )
1320
1321
1322def is_1d_only_ea_dtype(dtype: DtypeObj | None) -> bool:
1323 """
1324 Analogue to is_extension_array_dtype but excluding DatetimeTZDtype.
1325 """
1326 # Note: if other EA dtypes are ever held in HybridBlock, exclude those
1327 # here too.
1328 # NB: need to check DatetimeTZDtype and not is_datetime64tz_dtype
1329 # to exclude ArrowTimestampUSDtype
1330 return isinstance(dtype, ExtensionDtype) and not isinstance(
1331 dtype, (DatetimeTZDtype, PeriodDtype)
1332 )
1333
1334
1335def is_extension_array_dtype(arr_or_dtype) -> bool:
1336 """
1337 Check if an object is a pandas extension array type.
1338
1339 See the :ref:`Use Guide <extending.extension-types>` for more.
1340
1341 Parameters
1342 ----------
1343 arr_or_dtype : object
1344 For array-like input, the ``.dtype`` attribute will
1345 be extracted.
1346
1347 Returns
1348 -------
1349 bool
1350 Whether the `arr_or_dtype` is an extension array type.
1351
1352 Notes
1353 -----
1354 This checks whether an object implements the pandas extension
1355 array interface. In pandas, this includes:
1356
1357 * Categorical
1358 * Sparse
1359 * Interval
1360 * Period
1361 * DatetimeArray
1362 * TimedeltaArray
1363
1364 Third-party libraries may implement arrays or types satisfying
1365 this interface as well.
1366
1367 Examples
1368 --------
1369 >>> from pandas.api.types import is_extension_array_dtype
1370 >>> arr = pd.Categorical(['a', 'b'])
1371 >>> is_extension_array_dtype(arr)
1372 True
1373 >>> is_extension_array_dtype(arr.dtype)
1374 True
1375
1376 >>> arr = np.array(['a', 'b'])
1377 >>> is_extension_array_dtype(arr.dtype)
1378 False
1379 """
1380 dtype = getattr(arr_or_dtype, "dtype", arr_or_dtype)
1381 if isinstance(dtype, ExtensionDtype):
1382 return True
1383 elif isinstance(dtype, np.dtype):
1384 return False
1385 else:
1386 return registry.find(dtype) is not None
1387
1388
1389def is_ea_or_datetimelike_dtype(dtype: DtypeObj | None) -> bool:
1390 """
1391 Check for ExtensionDtype, datetime64 dtype, or timedelta64 dtype.
1392
1393 Notes
1394 -----
1395 Checks only for dtype objects, not dtype-castable strings or types.
1396 """
1397 return isinstance(dtype, ExtensionDtype) or (
1398 isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"]
1399 )
1400
1401
1402def is_complex_dtype(arr_or_dtype) -> bool:
1403 """
1404 Check whether the provided array or dtype is of a complex dtype.
1405
1406 Parameters
1407 ----------
1408 arr_or_dtype : array-like or dtype
1409 The array or dtype to check.
1410
1411 Returns
1412 -------
1413 boolean
1414 Whether or not the array or dtype is of a complex dtype.
1415
1416 Examples
1417 --------
1418 >>> from pandas.api.types import is_complex_dtype
1419 >>> is_complex_dtype(str)
1420 False
1421 >>> is_complex_dtype(int)
1422 False
1423 >>> is_complex_dtype(np.complex_)
1424 True
1425 >>> is_complex_dtype(np.array(['a', 'b']))
1426 False
1427 >>> is_complex_dtype(pd.Series([1, 2]))
1428 False
1429 >>> is_complex_dtype(np.array([1 + 1j, 5]))
1430 True
1431 """
1432 return _is_dtype_type(arr_or_dtype, classes(np.complexfloating))
1433
1434
1435def _is_dtype(arr_or_dtype, condition) -> bool:
1436 """
1437 Return true if the condition is satisfied for the arr_or_dtype.
1438
1439 Parameters
1440 ----------
1441 arr_or_dtype : array-like, str, np.dtype, or ExtensionArrayType
1442 The array-like or dtype object whose dtype we want to extract.
1443 condition : callable[Union[np.dtype, ExtensionDtype]]
1444
1445 Returns
1446 -------
1447 bool
1448
1449 """
1450 if arr_or_dtype is None:
1451 return False
1452 try:
1453 dtype = get_dtype(arr_or_dtype)
1454 except (TypeError, ValueError):
1455 return False
1456 return condition(dtype)
1457
1458
1459def get_dtype(arr_or_dtype) -> DtypeObj:
1460 """
1461 Get the dtype instance associated with an array
1462 or dtype object.
1463
1464 Parameters
1465 ----------
1466 arr_or_dtype : array-like or dtype
1467 The array-like or dtype object whose dtype we want to extract.
1468
1469 Returns
1470 -------
1471 obj_dtype : The extract dtype instance from the
1472 passed in array or dtype object.
1473
1474 Raises
1475 ------
1476 TypeError : The passed in object is None.
1477 """
1478 if arr_or_dtype is None:
1479 raise TypeError("Cannot deduce dtype from null object")
1480
1481 # fastpath
1482 if isinstance(arr_or_dtype, np.dtype):
1483 return arr_or_dtype
1484 elif isinstance(arr_or_dtype, type):
1485 return np.dtype(arr_or_dtype)
1486
1487 # if we have an array-like
1488 elif hasattr(arr_or_dtype, "dtype"):
1489 arr_or_dtype = arr_or_dtype.dtype
1490
1491 return pandas_dtype(arr_or_dtype)
1492
1493
1494def _is_dtype_type(arr_or_dtype, condition) -> bool:
1495 """
1496 Return true if the condition is satisfied for the arr_or_dtype.
1497
1498 Parameters
1499 ----------
1500 arr_or_dtype : array-like or dtype
1501 The array-like or dtype object whose dtype we want to extract.
1502 condition : callable[Union[np.dtype, ExtensionDtypeType]]
1503
1504 Returns
1505 -------
1506 bool : if the condition is satisfied for the arr_or_dtype
1507 """
1508 if arr_or_dtype is None:
1509 return condition(type(None))
1510
1511 # fastpath
1512 if isinstance(arr_or_dtype, np.dtype):
1513 return condition(arr_or_dtype.type)
1514 elif isinstance(arr_or_dtype, type):
1515 if issubclass(arr_or_dtype, ExtensionDtype):
1516 arr_or_dtype = arr_or_dtype.type
1517 return condition(np.dtype(arr_or_dtype).type)
1518
1519 # if we have an array-like
1520 if hasattr(arr_or_dtype, "dtype"):
1521 arr_or_dtype = arr_or_dtype.dtype
1522
1523 # we are not possibly a dtype
1524 elif is_list_like(arr_or_dtype):
1525 return condition(type(None))
1526
1527 try:
1528 tipo = pandas_dtype(arr_or_dtype).type
1529 except (TypeError, ValueError):
1530 if is_scalar(arr_or_dtype):
1531 return condition(type(None))
1532
1533 return False
1534
1535 return condition(tipo)
1536
1537
1538def infer_dtype_from_object(dtype) -> type:
1539 """
1540 Get a numpy dtype.type-style object for a dtype object.
1541
1542 This methods also includes handling of the datetime64[ns] and
1543 datetime64[ns, TZ] objects.
1544
1545 If no dtype can be found, we return ``object``.
1546
1547 Parameters
1548 ----------
1549 dtype : dtype, type
1550 The dtype object whose numpy dtype.type-style
1551 object we want to extract.
1552
1553 Returns
1554 -------
1555 type
1556 """
1557 if isinstance(dtype, type) and issubclass(dtype, np.generic):
1558 # Type object from a dtype
1559
1560 return dtype
1561 elif isinstance(dtype, (np.dtype, ExtensionDtype)):
1562 # dtype object
1563 try:
1564 _validate_date_like_dtype(dtype)
1565 except TypeError:
1566 # Should still pass if we don't have a date-like
1567 pass
1568 if hasattr(dtype, "numpy_dtype"):
1569 # TODO: Implement this properly
1570 # https://github.com/pandas-dev/pandas/issues/52576
1571 return dtype.numpy_dtype.type
1572 return dtype.type
1573
1574 try:
1575 dtype = pandas_dtype(dtype)
1576 except TypeError:
1577 pass
1578
1579 if is_extension_array_dtype(dtype):
1580 return dtype.type
1581 elif isinstance(dtype, str):
1582 # TODO(jreback)
1583 # should deprecate these
1584 if dtype in ["datetimetz", "datetime64tz"]:
1585 return DatetimeTZDtype.type
1586 elif dtype in ["period"]:
1587 raise NotImplementedError
1588
1589 if dtype in ["datetime", "timedelta"]:
1590 dtype += "64"
1591 try:
1592 return infer_dtype_from_object(getattr(np, dtype))
1593 except (AttributeError, TypeError):
1594 # Handles cases like get_dtype(int) i.e.,
1595 # Python objects that are valid dtypes
1596 # (unlike user-defined types, in general)
1597 #
1598 # TypeError handles the float16 type code of 'e'
1599 # further handle internal types
1600 pass
1601
1602 return infer_dtype_from_object(np.dtype(dtype))
1603
1604
1605def _validate_date_like_dtype(dtype) -> None:
1606 """
1607 Check whether the dtype is a date-like dtype. Raises an error if invalid.
1608
1609 Parameters
1610 ----------
1611 dtype : dtype, type
1612 The dtype to check.
1613
1614 Raises
1615 ------
1616 TypeError : The dtype could not be casted to a date-like dtype.
1617 ValueError : The dtype is an illegal date-like dtype (e.g. the
1618 frequency provided is too specific)
1619 """
1620 try:
1621 typ = np.datetime_data(dtype)[0]
1622 except ValueError as e:
1623 raise TypeError(e) from e
1624 if typ not in ["generic", "ns"]:
1625 raise ValueError(
1626 f"{repr(dtype.name)} is too specific of a frequency, "
1627 f"try passing {repr(dtype.type.__name__)}"
1628 )
1629
1630
1631def validate_all_hashable(*args, error_name: str | None = None) -> None:
1632 """
1633 Return None if all args are hashable, else raise a TypeError.
1634
1635 Parameters
1636 ----------
1637 *args
1638 Arguments to validate.
1639 error_name : str, optional
1640 The name to use if error
1641
1642 Raises
1643 ------
1644 TypeError : If an argument is not hashable
1645
1646 Returns
1647 -------
1648 None
1649 """
1650 if not all(is_hashable(arg) for arg in args):
1651 if error_name:
1652 raise TypeError(f"{error_name} must be a hashable type")
1653 raise TypeError("All elements must be hashable")
1654
1655
1656def pandas_dtype(dtype) -> DtypeObj:
1657 """
1658 Convert input into a pandas only dtype object or a numpy dtype object.
1659
1660 Parameters
1661 ----------
1662 dtype : object to be converted
1663
1664 Returns
1665 -------
1666 np.dtype or a pandas dtype
1667
1668 Raises
1669 ------
1670 TypeError if not a dtype
1671 """
1672 # short-circuit
1673 if isinstance(dtype, np.ndarray):
1674 return dtype.dtype
1675 elif isinstance(dtype, (np.dtype, ExtensionDtype)):
1676 return dtype
1677
1678 # registered extension types
1679 result = registry.find(dtype)
1680 if result is not None:
1681 return result
1682
1683 # try a numpy dtype
1684 # raise a consistent TypeError if failed
1685 try:
1686 with warnings.catch_warnings():
1687 # GH#51523 - Series.astype(np.integer) doesn't show
1688 # numpy deprication warning of np.integer
1689 # Hence enabling DeprecationWarning
1690 warnings.simplefilter("always", DeprecationWarning)
1691 npdtype = np.dtype(dtype)
1692 except SyntaxError as err:
1693 # np.dtype uses `eval` which can raise SyntaxError
1694 raise TypeError(f"data type '{dtype}' not understood") from err
1695
1696 # Any invalid dtype (such as pd.Timestamp) should raise an error.
1697 # np.dtype(invalid_type).kind = 0 for such objects. However, this will
1698 # also catch some valid dtypes such as object, np.object_ and 'object'
1699 # which we safeguard against by catching them earlier and returning
1700 # np.dtype(valid_dtype) before this condition is evaluated.
1701 if is_hashable(dtype) and dtype in [object, np.object_, "object", "O"]:
1702 # check hashability to avoid errors/DeprecationWarning when we get
1703 # here and `dtype` is an array
1704 return npdtype
1705 elif npdtype.kind == "O":
1706 raise TypeError(f"dtype '{dtype}' not understood")
1707
1708 return npdtype
1709
1710
1711def is_all_strings(value: ArrayLike) -> bool:
1712 """
1713 Check if this is an array of strings that we should try parsing.
1714
1715 Includes object-dtype ndarray containing all-strings, StringArray,
1716 and Categorical with all-string categories.
1717 Does not include numpy string dtypes.
1718 """
1719 dtype = value.dtype
1720
1721 if isinstance(dtype, np.dtype):
1722 return (
1723 dtype == np.dtype("object")
1724 and lib.infer_dtype(value, skipna=False) == "string"
1725 )
1726 elif isinstance(dtype, CategoricalDtype):
1727 return dtype.categories.inferred_type == "string"
1728 return dtype == "string"
1729
1730
1731__all__ = [
1732 "classes",
1733 "classes_and_not_datetimelike",
1734 "DT64NS_DTYPE",
1735 "ensure_float",
1736 "ensure_float64",
1737 "ensure_python_int",
1738 "ensure_str",
1739 "get_dtype",
1740 "infer_dtype_from_object",
1741 "INT64_DTYPE",
1742 "is_1d_only_ea_dtype",
1743 "is_1d_only_ea_obj",
1744 "is_all_strings",
1745 "is_any_int_dtype",
1746 "is_any_real_numeric_dtype",
1747 "is_array_like",
1748 "is_bool",
1749 "is_bool_dtype",
1750 "is_categorical_dtype",
1751 "is_complex",
1752 "is_complex_dtype",
1753 "is_dataclass",
1754 "is_datetime64_any_dtype",
1755 "is_datetime64_dtype",
1756 "is_datetime64_ns_dtype",
1757 "is_datetime64tz_dtype",
1758 "is_datetime_or_timedelta_dtype",
1759 "is_decimal",
1760 "is_dict_like",
1761 "is_dtype_equal",
1762 "is_ea_or_datetimelike_dtype",
1763 "is_extension_array_dtype",
1764 "is_file_like",
1765 "is_float_dtype",
1766 "is_int64_dtype",
1767 "is_integer_dtype",
1768 "is_interval",
1769 "is_interval_dtype",
1770 "is_iterator",
1771 "is_named_tuple",
1772 "is_nested_list_like",
1773 "is_number",
1774 "is_numeric_dtype",
1775 "is_object_dtype",
1776 "is_period_dtype",
1777 "is_re",
1778 "is_re_compilable",
1779 "is_scipy_sparse",
1780 "is_sequence",
1781 "is_signed_integer_dtype",
1782 "is_sparse",
1783 "is_string_dtype",
1784 "is_string_or_object_np_dtype",
1785 "is_timedelta64_dtype",
1786 "is_timedelta64_ns_dtype",
1787 "is_unsigned_integer_dtype",
1788 "needs_i8_conversion",
1789 "pandas_dtype",
1790 "TD64NS_DTYPE",
1791 "validate_all_hashable",
1792]