1from __future__ import annotations
2
3import numpy as np
4
5from pandas.core.dtypes.base import register_extension_dtype
6from pandas.core.dtypes.common import is_integer_dtype
7
8from pandas.core.arrays.numeric import (
9 NumericArray,
10 NumericDtype,
11)
12
13
14class IntegerDtype(NumericDtype):
15 """
16 An ExtensionDtype to hold a single size & kind of integer dtype.
17
18 These specific implementations are subclasses of the non-public
19 IntegerDtype. For example, we have Int8Dtype to represent signed int 8s.
20
21 The attributes name & type are set when these subclasses are created.
22 """
23
24 _default_np_dtype = np.dtype(np.int64)
25 _checker = is_integer_dtype
26
27 @classmethod
28 def construct_array_type(cls) -> type[IntegerArray]:
29 """
30 Return the array type associated with this dtype.
31
32 Returns
33 -------
34 type
35 """
36 return IntegerArray
37
38 @classmethod
39 def _str_to_dtype_mapping(cls):
40 return INT_STR_TO_DTYPE
41
42 @classmethod
43 def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
44 """
45 Safely cast the values to the given dtype.
46
47 "safe" in this context means the casting is lossless. e.g. if 'values'
48 has a floating dtype, each value must be an integer.
49 """
50 try:
51 return values.astype(dtype, casting="safe", copy=copy)
52 except TypeError as err:
53 casted = values.astype(dtype, copy=copy)
54 if (casted == values).all():
55 return casted
56
57 raise TypeError(
58 f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}"
59 ) from err
60
61
62class IntegerArray(NumericArray):
63 """
64 Array of integer (optional missing) values.
65
66 Uses :attr:`pandas.NA` as the missing value.
67
68 .. warning::
69
70 IntegerArray is currently experimental, and its API or internal
71 implementation may change without warning.
72
73 We represent an IntegerArray with 2 numpy arrays:
74
75 - data: contains a numpy integer array of the appropriate dtype
76 - mask: a boolean array holding a mask on the data, True is missing
77
78 To construct an IntegerArray from generic array-like input, use
79 :func:`pandas.array` with one of the integer dtypes (see examples).
80
81 See :ref:`integer_na` for more.
82
83 Parameters
84 ----------
85 values : numpy.ndarray
86 A 1-d integer-dtype array.
87 mask : numpy.ndarray
88 A 1-d boolean-dtype array indicating missing values.
89 copy : bool, default False
90 Whether to copy the `values` and `mask`.
91
92 Attributes
93 ----------
94 None
95
96 Methods
97 -------
98 None
99
100 Returns
101 -------
102 IntegerArray
103
104 Examples
105 --------
106 Create an IntegerArray with :func:`pandas.array`.
107
108 >>> int_array = pd.array([1, None, 3], dtype=pd.Int32Dtype())
109 >>> int_array
110 <IntegerArray>
111 [1, <NA>, 3]
112 Length: 3, dtype: Int32
113
114 String aliases for the dtypes are also available. They are capitalized.
115
116 >>> pd.array([1, None, 3], dtype='Int32')
117 <IntegerArray>
118 [1, <NA>, 3]
119 Length: 3, dtype: Int32
120
121 >>> pd.array([1, None, 3], dtype='UInt16')
122 <IntegerArray>
123 [1, <NA>, 3]
124 Length: 3, dtype: UInt16
125 """
126
127 _dtype_cls = IntegerDtype
128
129 # The value used to fill '_data' to avoid upcasting
130 _internal_fill_value = 1
131 # Fill values used for any/all
132 # Incompatible types in assignment (expression has type "int", base class
133 # "BaseMaskedArray" defined the type as "<typing special form>")
134 _truthy_value = 1 # type: ignore[assignment]
135 _falsey_value = 0 # type: ignore[assignment]
136
137
138_dtype_docstring = """
139An ExtensionDtype for {dtype} integer data.
140
141Uses :attr:`pandas.NA` as its missing value, rather than :attr:`numpy.nan`.
142
143Attributes
144----------
145None
146
147Methods
148-------
149None
150"""
151
152# create the Dtype
153
154
155@register_extension_dtype
156class Int8Dtype(IntegerDtype):
157 type = np.int8
158 name = "Int8"
159 __doc__ = _dtype_docstring.format(dtype="int8")
160
161
162@register_extension_dtype
163class Int16Dtype(IntegerDtype):
164 type = np.int16
165 name = "Int16"
166 __doc__ = _dtype_docstring.format(dtype="int16")
167
168
169@register_extension_dtype
170class Int32Dtype(IntegerDtype):
171 type = np.int32
172 name = "Int32"
173 __doc__ = _dtype_docstring.format(dtype="int32")
174
175
176@register_extension_dtype
177class Int64Dtype(IntegerDtype):
178 type = np.int64
179 name = "Int64"
180 __doc__ = _dtype_docstring.format(dtype="int64")
181
182
183@register_extension_dtype
184class UInt8Dtype(IntegerDtype):
185 type = np.uint8
186 name = "UInt8"
187 __doc__ = _dtype_docstring.format(dtype="uint8")
188
189
190@register_extension_dtype
191class UInt16Dtype(IntegerDtype):
192 type = np.uint16
193 name = "UInt16"
194 __doc__ = _dtype_docstring.format(dtype="uint16")
195
196
197@register_extension_dtype
198class UInt32Dtype(IntegerDtype):
199 type = np.uint32
200 name = "UInt32"
201 __doc__ = _dtype_docstring.format(dtype="uint32")
202
203
204@register_extension_dtype
205class UInt64Dtype(IntegerDtype):
206 type = np.uint64
207 name = "UInt64"
208 __doc__ = _dtype_docstring.format(dtype="uint64")
209
210
211INT_STR_TO_DTYPE: dict[str, IntegerDtype] = {
212 "int8": Int8Dtype(),
213 "int16": Int16Dtype(),
214 "int32": Int32Dtype(),
215 "int64": Int64Dtype(),
216 "uint8": UInt8Dtype(),
217 "uint16": UInt16Dtype(),
218 "uint32": UInt32Dtype(),
219 "uint64": UInt64Dtype(),
220}