1from __future__ import annotations
2
3from typing import ClassVar
4
5import numpy as np
6
7from pandas.core.dtypes.base import register_extension_dtype
8from pandas.core.dtypes.common import is_integer_dtype
9
10from pandas.core.arrays.numeric import (
11 NumericArray,
12 NumericDtype,
13)
14
15
16class IntegerDtype(NumericDtype):
17 """
18 An ExtensionDtype to hold a single size & kind of integer dtype.
19
20 These specific implementations are subclasses of the non-public
21 IntegerDtype. For example, we have Int8Dtype to represent signed int 8s.
22
23 The attributes name & type are set when these subclasses are created.
24 """
25
26 _default_np_dtype = np.dtype(np.int64)
27 _checker = is_integer_dtype
28
29 @classmethod
30 def construct_array_type(cls) -> type[IntegerArray]:
31 """
32 Return the array type associated with this dtype.
33
34 Returns
35 -------
36 type
37 """
38 return IntegerArray
39
40 @classmethod
41 def _get_dtype_mapping(cls) -> dict[np.dtype, IntegerDtype]:
42 return NUMPY_INT_TO_DTYPE
43
44 @classmethod
45 def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
46 """
47 Safely cast the values to the given dtype.
48
49 "safe" in this context means the casting is lossless. e.g. if 'values'
50 has a floating dtype, each value must be an integer.
51 """
52 try:
53 return values.astype(dtype, casting="safe", copy=copy)
54 except TypeError as err:
55 casted = values.astype(dtype, copy=copy)
56 if (casted == values).all():
57 return casted
58
59 raise TypeError(
60 f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}"
61 ) from err
62
63
64class IntegerArray(NumericArray):
65 """
66 Array of integer (optional missing) values.
67
68 Uses :attr:`pandas.NA` as the missing value.
69
70 .. warning::
71
72 IntegerArray is currently experimental, and its API or internal
73 implementation may change without warning.
74
75 We represent an IntegerArray with 2 numpy arrays:
76
77 - data: contains a numpy integer array of the appropriate dtype
78 - mask: a boolean array holding a mask on the data, True is missing
79
80 To construct an IntegerArray from generic array-like input, use
81 :func:`pandas.array` with one of the integer dtypes (see examples).
82
83 See :ref:`integer_na` for more.
84
85 Parameters
86 ----------
87 values : numpy.ndarray
88 A 1-d integer-dtype array.
89 mask : numpy.ndarray
90 A 1-d boolean-dtype array indicating missing values.
91 copy : bool, default False
92 Whether to copy the `values` and `mask`.
93
94 Attributes
95 ----------
96 None
97
98 Methods
99 -------
100 None
101
102 Returns
103 -------
104 IntegerArray
105
106 Examples
107 --------
108 Create an IntegerArray with :func:`pandas.array`.
109
110 >>> int_array = pd.array([1, None, 3], dtype=pd.Int32Dtype())
111 >>> int_array
112 <IntegerArray>
113 [1, <NA>, 3]
114 Length: 3, dtype: Int32
115
116 String aliases for the dtypes are also available. They are capitalized.
117
118 >>> pd.array([1, None, 3], dtype='Int32')
119 <IntegerArray>
120 [1, <NA>, 3]
121 Length: 3, dtype: Int32
122
123 >>> pd.array([1, None, 3], dtype='UInt16')
124 <IntegerArray>
125 [1, <NA>, 3]
126 Length: 3, dtype: UInt16
127 """
128
129 _dtype_cls = IntegerDtype
130
131 # The value used to fill '_data' to avoid upcasting
132 _internal_fill_value = 1
133 # Fill values used for any/all
134 # Incompatible types in assignment (expression has type "int", base class
135 # "BaseMaskedArray" defined the type as "<typing special form>")
136 _truthy_value = 1 # type: ignore[assignment]
137 _falsey_value = 0 # type: ignore[assignment]
138
139
140_dtype_docstring = """
141An ExtensionDtype for {dtype} integer data.
142
143Uses :attr:`pandas.NA` as its missing value, rather than :attr:`numpy.nan`.
144
145Attributes
146----------
147None
148
149Methods
150-------
151None
152
153Examples
154--------
155For Int8Dtype:
156
157>>> ser = pd.Series([2, pd.NA], dtype=pd.Int8Dtype())
158>>> ser.dtype
159Int8Dtype()
160
161For Int16Dtype:
162
163>>> ser = pd.Series([2, pd.NA], dtype=pd.Int16Dtype())
164>>> ser.dtype
165Int16Dtype()
166
167For Int32Dtype:
168
169>>> ser = pd.Series([2, pd.NA], dtype=pd.Int32Dtype())
170>>> ser.dtype
171Int32Dtype()
172
173For Int64Dtype:
174
175>>> ser = pd.Series([2, pd.NA], dtype=pd.Int64Dtype())
176>>> ser.dtype
177Int64Dtype()
178
179For UInt8Dtype:
180
181>>> ser = pd.Series([2, pd.NA], dtype=pd.UInt8Dtype())
182>>> ser.dtype
183UInt8Dtype()
184
185For UInt16Dtype:
186
187>>> ser = pd.Series([2, pd.NA], dtype=pd.UInt16Dtype())
188>>> ser.dtype
189UInt16Dtype()
190
191For UInt32Dtype:
192
193>>> ser = pd.Series([2, pd.NA], dtype=pd.UInt32Dtype())
194>>> ser.dtype
195UInt32Dtype()
196
197For UInt64Dtype:
198
199>>> ser = pd.Series([2, pd.NA], dtype=pd.UInt64Dtype())
200>>> ser.dtype
201UInt64Dtype()
202"""
203
204# create the Dtype
205
206
207@register_extension_dtype
208class Int8Dtype(IntegerDtype):
209 type = np.int8
210 name: ClassVar[str] = "Int8"
211 __doc__ = _dtype_docstring.format(dtype="int8")
212
213
214@register_extension_dtype
215class Int16Dtype(IntegerDtype):
216 type = np.int16
217 name: ClassVar[str] = "Int16"
218 __doc__ = _dtype_docstring.format(dtype="int16")
219
220
221@register_extension_dtype
222class Int32Dtype(IntegerDtype):
223 type = np.int32
224 name: ClassVar[str] = "Int32"
225 __doc__ = _dtype_docstring.format(dtype="int32")
226
227
228@register_extension_dtype
229class Int64Dtype(IntegerDtype):
230 type = np.int64
231 name: ClassVar[str] = "Int64"
232 __doc__ = _dtype_docstring.format(dtype="int64")
233
234
235@register_extension_dtype
236class UInt8Dtype(IntegerDtype):
237 type = np.uint8
238 name: ClassVar[str] = "UInt8"
239 __doc__ = _dtype_docstring.format(dtype="uint8")
240
241
242@register_extension_dtype
243class UInt16Dtype(IntegerDtype):
244 type = np.uint16
245 name: ClassVar[str] = "UInt16"
246 __doc__ = _dtype_docstring.format(dtype="uint16")
247
248
249@register_extension_dtype
250class UInt32Dtype(IntegerDtype):
251 type = np.uint32
252 name: ClassVar[str] = "UInt32"
253 __doc__ = _dtype_docstring.format(dtype="uint32")
254
255
256@register_extension_dtype
257class UInt64Dtype(IntegerDtype):
258 type = np.uint64
259 name: ClassVar[str] = "UInt64"
260 __doc__ = _dtype_docstring.format(dtype="uint64")
261
262
263NUMPY_INT_TO_DTYPE: dict[np.dtype, IntegerDtype] = {
264 np.dtype(np.int8): Int8Dtype(),
265 np.dtype(np.int16): Int16Dtype(),
266 np.dtype(np.int32): Int32Dtype(),
267 np.dtype(np.int64): Int64Dtype(),
268 np.dtype(np.uint8): UInt8Dtype(),
269 np.dtype(np.uint16): UInt16Dtype(),
270 np.dtype(np.uint32): UInt32Dtype(),
271 np.dtype(np.uint64): UInt64Dtype(),
272}