1"""
2Constructor functions intended to be shared by pd.array, Series.__init__,
3and Index.__new__.
4
5These should not depend on core.internals.
6"""
7from __future__ import annotations
8
9from typing import (
10 TYPE_CHECKING,
11 Optional,
12 Sequence,
13 Union,
14 cast,
15 overload,
16)
17
18import numpy as np
19from numpy import ma
20
21from pandas._libs import lib
22from pandas._libs.tslibs.period import Period
23from pandas._typing import (
24 AnyArrayLike,
25 ArrayLike,
26 Dtype,
27 DtypeObj,
28 T,
29)
30
31from pandas.core.dtypes.base import (
32 ExtensionDtype,
33 _registry as registry,
34)
35from pandas.core.dtypes.cast import (
36 construct_1d_arraylike_from_scalar,
37 construct_1d_object_array_from_listlike,
38 maybe_cast_to_datetime,
39 maybe_cast_to_integer_array,
40 maybe_convert_platform,
41 maybe_infer_to_datetimelike,
42 maybe_promote,
43)
44from pandas.core.dtypes.common import (
45 is_datetime64_ns_dtype,
46 is_dtype_equal,
47 is_extension_array_dtype,
48 is_integer_dtype,
49 is_list_like,
50 is_object_dtype,
51 is_timedelta64_ns_dtype,
52)
53from pandas.core.dtypes.dtypes import PandasDtype
54from pandas.core.dtypes.generic import (
55 ABCDataFrame,
56 ABCExtensionArray,
57 ABCIndex,
58 ABCPandasArray,
59 ABCRangeIndex,
60 ABCSeries,
61)
62from pandas.core.dtypes.missing import isna
63
64import pandas.core.common as com
65
66if TYPE_CHECKING:
67 from pandas import (
68 Index,
69 Series,
70 )
71 from pandas.core.arrays.base import ExtensionArray
72
73
74def array(
75 data: Sequence[object] | AnyArrayLike,
76 dtype: Dtype | None = None,
77 copy: bool = True,
78) -> ExtensionArray:
79 """
80 Create an array.
81
82 Parameters
83 ----------
84 data : Sequence of objects
85 The scalars inside `data` should be instances of the
86 scalar type for `dtype`. It's expected that `data`
87 represents a 1-dimensional array of data.
88
89 When `data` is an Index or Series, the underlying array
90 will be extracted from `data`.
91
92 dtype : str, np.dtype, or ExtensionDtype, optional
93 The dtype to use for the array. This may be a NumPy
94 dtype or an extension type registered with pandas using
95 :meth:`pandas.api.extensions.register_extension_dtype`.
96
97 If not specified, there are two possibilities:
98
99 1. When `data` is a :class:`Series`, :class:`Index`, or
100 :class:`ExtensionArray`, the `dtype` will be taken
101 from the data.
102 2. Otherwise, pandas will attempt to infer the `dtype`
103 from the data.
104
105 Note that when `data` is a NumPy array, ``data.dtype`` is
106 *not* used for inferring the array type. This is because
107 NumPy cannot represent all the types of data that can be
108 held in extension arrays.
109
110 Currently, pandas will infer an extension dtype for sequences of
111
112 ============================== =======================================
113 Scalar Type Array Type
114 ============================== =======================================
115 :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray`
116 :class:`pandas.Period` :class:`pandas.arrays.PeriodArray`
117 :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray`
118 :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray`
119 :class:`int` :class:`pandas.arrays.IntegerArray`
120 :class:`float` :class:`pandas.arrays.FloatingArray`
121 :class:`str` :class:`pandas.arrays.StringArray` or
122 :class:`pandas.arrays.ArrowStringArray`
123 :class:`bool` :class:`pandas.arrays.BooleanArray`
124 ============================== =======================================
125
126 The ExtensionArray created when the scalar type is :class:`str` is determined by
127 ``pd.options.mode.string_storage`` if the dtype is not explicitly given.
128
129 For all other cases, NumPy's usual inference rules will be used.
130
131 .. versionchanged:: 1.2.0
132
133 Pandas now also infers nullable-floating dtype for float-like
134 input data
135
136 copy : bool, default True
137 Whether to copy the data, even if not necessary. Depending
138 on the type of `data`, creating the new array may require
139 copying data, even if ``copy=False``.
140
141 Returns
142 -------
143 ExtensionArray
144 The newly created array.
145
146 Raises
147 ------
148 ValueError
149 When `data` is not 1-dimensional.
150
151 See Also
152 --------
153 numpy.array : Construct a NumPy array.
154 Series : Construct a pandas Series.
155 Index : Construct a pandas Index.
156 arrays.PandasArray : ExtensionArray wrapping a NumPy array.
157 Series.array : Extract the array stored within a Series.
158
159 Notes
160 -----
161 Omitting the `dtype` argument means pandas will attempt to infer the
162 best array type from the values in the data. As new array types are
163 added by pandas and 3rd party libraries, the "best" array type may
164 change. We recommend specifying `dtype` to ensure that
165
166 1. the correct array type for the data is returned
167 2. the returned array type doesn't change as new extension types
168 are added by pandas and third-party libraries
169
170 Additionally, if the underlying memory representation of the returned
171 array matters, we recommend specifying the `dtype` as a concrete object
172 rather than a string alias or allowing it to be inferred. For example,
173 a future version of pandas or a 3rd-party library may include a
174 dedicated ExtensionArray for string data. In this event, the following
175 would no longer return a :class:`arrays.PandasArray` backed by a NumPy
176 array.
177
178 >>> pd.array(['a', 'b'], dtype=str)
179 <PandasArray>
180 ['a', 'b']
181 Length: 2, dtype: str32
182
183 This would instead return the new ExtensionArray dedicated for string
184 data. If you really need the new array to be backed by a NumPy array,
185 specify that in the dtype.
186
187 >>> pd.array(['a', 'b'], dtype=np.dtype("<U1"))
188 <PandasArray>
189 ['a', 'b']
190 Length: 2, dtype: str32
191
192 Finally, Pandas has arrays that mostly overlap with NumPy
193
194 * :class:`arrays.DatetimeArray`
195 * :class:`arrays.TimedeltaArray`
196
197 When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is
198 passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray``
199 rather than a ``PandasArray``. This is for symmetry with the case of
200 timezone-aware data, which NumPy does not natively support.
201
202 >>> pd.array(['2015', '2016'], dtype='datetime64[ns]')
203 <DatetimeArray>
204 ['2015-01-01 00:00:00', '2016-01-01 00:00:00']
205 Length: 2, dtype: datetime64[ns]
206
207 >>> pd.array(["1H", "2H"], dtype='timedelta64[ns]')
208 <TimedeltaArray>
209 ['0 days 01:00:00', '0 days 02:00:00']
210 Length: 2, dtype: timedelta64[ns]
211
212 Examples
213 --------
214 If a dtype is not specified, pandas will infer the best dtype from the values.
215 See the description of `dtype` for the types pandas infers for.
216
217 >>> pd.array([1, 2])
218 <IntegerArray>
219 [1, 2]
220 Length: 2, dtype: Int64
221
222 >>> pd.array([1, 2, np.nan])
223 <IntegerArray>
224 [1, 2, <NA>]
225 Length: 3, dtype: Int64
226
227 >>> pd.array([1.1, 2.2])
228 <FloatingArray>
229 [1.1, 2.2]
230 Length: 2, dtype: Float64
231
232 >>> pd.array(["a", None, "c"])
233 <StringArray>
234 ['a', <NA>, 'c']
235 Length: 3, dtype: string
236
237 >>> with pd.option_context("string_storage", "pyarrow"):
238 ... arr = pd.array(["a", None, "c"])
239 ...
240 >>> arr
241 <ArrowStringArray>
242 ['a', <NA>, 'c']
243 Length: 3, dtype: string
244
245 >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])
246 <PeriodArray>
247 ['2000-01-01', '2000-01-01']
248 Length: 2, dtype: period[D]
249
250 You can use the string alias for `dtype`
251
252 >>> pd.array(['a', 'b', 'a'], dtype='category')
253 ['a', 'b', 'a']
254 Categories (2, object): ['a', 'b']
255
256 Or specify the actual dtype
257
258 >>> pd.array(['a', 'b', 'a'],
259 ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True))
260 ['a', 'b', 'a']
261 Categories (3, object): ['a' < 'b' < 'c']
262
263 If pandas does not infer a dedicated extension type a
264 :class:`arrays.PandasArray` is returned.
265
266 >>> pd.array([1 + 1j, 3 + 2j])
267 <PandasArray>
268 [(1+1j), (3+2j)]
269 Length: 2, dtype: complex128
270
271 As mentioned in the "Notes" section, new extension types may be added
272 in the future (by pandas or 3rd party libraries), causing the return
273 value to no longer be a :class:`arrays.PandasArray`. Specify the `dtype`
274 as a NumPy dtype if you need to ensure there's no future change in
275 behavior.
276
277 >>> pd.array([1, 2], dtype=np.dtype("int32"))
278 <PandasArray>
279 [1, 2]
280 Length: 2, dtype: int32
281
282 `data` must be 1-dimensional. A ValueError is raised when the input
283 has the wrong dimensionality.
284
285 >>> pd.array(1)
286 Traceback (most recent call last):
287 ...
288 ValueError: Cannot pass scalar '1' to 'pandas.array'.
289 """
290 from pandas.core.arrays import (
291 BooleanArray,
292 DatetimeArray,
293 ExtensionArray,
294 FloatingArray,
295 IntegerArray,
296 IntervalArray,
297 PandasArray,
298 PeriodArray,
299 TimedeltaArray,
300 )
301 from pandas.core.arrays.string_ import StringDtype
302
303 if lib.is_scalar(data):
304 msg = f"Cannot pass scalar '{data}' to 'pandas.array'."
305 raise ValueError(msg)
306 elif isinstance(data, ABCDataFrame):
307 raise TypeError("Cannot pass DataFrame to 'pandas.array'")
308
309 if dtype is None and isinstance(data, (ABCSeries, ABCIndex, ExtensionArray)):
310 # Note: we exclude np.ndarray here, will do type inference on it
311 dtype = data.dtype
312
313 data = extract_array(data, extract_numpy=True)
314
315 # this returns None for not-found dtypes.
316 if isinstance(dtype, str):
317 dtype = registry.find(dtype) or dtype
318
319 if isinstance(data, ExtensionArray) and (
320 dtype is None or is_dtype_equal(dtype, data.dtype)
321 ):
322 # e.g. TimedeltaArray[s], avoid casting to PandasArray
323 if copy:
324 return data.copy()
325 return data
326
327 if is_extension_array_dtype(dtype):
328 cls = cast(ExtensionDtype, dtype).construct_array_type()
329 return cls._from_sequence(data, dtype=dtype, copy=copy)
330
331 if dtype is None:
332 inferred_dtype = lib.infer_dtype(data, skipna=True)
333 if inferred_dtype == "period":
334 period_data = cast(Union[Sequence[Optional[Period]], AnyArrayLike], data)
335 return PeriodArray._from_sequence(period_data, copy=copy)
336
337 elif inferred_dtype == "interval":
338 return IntervalArray(data, copy=copy)
339
340 elif inferred_dtype.startswith("datetime"):
341 # datetime, datetime64
342 try:
343 return DatetimeArray._from_sequence(data, copy=copy)
344 except ValueError:
345 # Mixture of timezones, fall back to PandasArray
346 pass
347
348 elif inferred_dtype.startswith("timedelta"):
349 # timedelta, timedelta64
350 return TimedeltaArray._from_sequence(data, copy=copy)
351
352 elif inferred_dtype == "string":
353 # StringArray/ArrowStringArray depending on pd.options.mode.string_storage
354 return StringDtype().construct_array_type()._from_sequence(data, copy=copy)
355
356 elif inferred_dtype == "integer":
357 return IntegerArray._from_sequence(data, copy=copy)
358
359 elif (
360 inferred_dtype in ("floating", "mixed-integer-float")
361 and getattr(data, "dtype", None) != np.float16
362 ):
363 # GH#44715 Exclude np.float16 bc FloatingArray does not support it;
364 # we will fall back to PandasArray.
365 return FloatingArray._from_sequence(data, copy=copy)
366
367 elif inferred_dtype == "boolean":
368 return BooleanArray._from_sequence(data, copy=copy)
369
370 # Pandas overrides NumPy for
371 # 1. datetime64[ns]
372 # 2. timedelta64[ns]
373 # so that a DatetimeArray is returned.
374 if is_datetime64_ns_dtype(dtype):
375 return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy)
376 elif is_timedelta64_ns_dtype(dtype):
377 return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy)
378
379 return PandasArray._from_sequence(data, dtype=dtype, copy=copy)
380
381
382@overload
383def extract_array(
384 obj: Series | Index, extract_numpy: bool = ..., extract_range: bool = ...
385) -> ArrayLike:
386 ...
387
388
389@overload
390def extract_array(
391 obj: T, extract_numpy: bool = ..., extract_range: bool = ...
392) -> T | ArrayLike:
393 ...
394
395
396def extract_array(
397 obj: T, extract_numpy: bool = False, extract_range: bool = False
398) -> T | ArrayLike:
399 """
400 Extract the ndarray or ExtensionArray from a Series or Index.
401
402 For all other types, `obj` is just returned as is.
403
404 Parameters
405 ----------
406 obj : object
407 For Series / Index, the underlying ExtensionArray is unboxed.
408
409 extract_numpy : bool, default False
410 Whether to extract the ndarray from a PandasArray.
411
412 extract_range : bool, default False
413 If we have a RangeIndex, return range._values if True
414 (which is a materialized integer ndarray), otherwise return unchanged.
415
416 Returns
417 -------
418 arr : object
419
420 Examples
421 --------
422 >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category'))
423 ['a', 'b', 'c']
424 Categories (3, object): ['a', 'b', 'c']
425
426 Other objects like lists, arrays, and DataFrames are just passed through.
427
428 >>> extract_array([1, 2, 3])
429 [1, 2, 3]
430
431 For an ndarray-backed Series / Index the ndarray is returned.
432
433 >>> extract_array(pd.Series([1, 2, 3]))
434 array([1, 2, 3])
435
436 To extract all the way down to the ndarray, pass ``extract_numpy=True``.
437
438 >>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True)
439 array([1, 2, 3])
440 """
441 if isinstance(obj, (ABCIndex, ABCSeries)):
442 if isinstance(obj, ABCRangeIndex):
443 if extract_range:
444 return obj._values
445 # https://github.com/python/mypy/issues/1081
446 # error: Incompatible return value type (got "RangeIndex", expected
447 # "Union[T, Union[ExtensionArray, ndarray[Any, Any]]]")
448 return obj # type: ignore[return-value]
449
450 return obj._values
451
452 elif extract_numpy and isinstance(obj, ABCPandasArray):
453 return obj.to_numpy()
454
455 return obj
456
457
458def ensure_wrapped_if_datetimelike(arr):
459 """
460 Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.
461 """
462 if isinstance(arr, np.ndarray):
463 if arr.dtype.kind == "M":
464 from pandas.core.arrays import DatetimeArray
465
466 return DatetimeArray._from_sequence(arr)
467
468 elif arr.dtype.kind == "m":
469 from pandas.core.arrays import TimedeltaArray
470
471 return TimedeltaArray._from_sequence(arr)
472
473 return arr
474
475
476def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray:
477 """
478 Convert numpy MaskedArray to ensure mask is softened.
479 """
480 mask = ma.getmaskarray(data)
481 if mask.any():
482 dtype, fill_value = maybe_promote(data.dtype, np.nan)
483 dtype = cast(np.dtype, dtype)
484 # Incompatible types in assignment (expression has type "ndarray[Any,
485 # dtype[Any]]", variable has type "MaskedArray[Any, Any]")
486 data = data.astype(dtype, copy=True) # type: ignore[assignment]
487 data.soften_mask() # set hardmask False if it was True
488 data[mask] = fill_value
489 else:
490 data = data.copy()
491 return data
492
493
494def sanitize_array(
495 data,
496 index: Index | None,
497 dtype: DtypeObj | None = None,
498 copy: bool = False,
499 *,
500 allow_2d: bool = False,
501) -> ArrayLike:
502 """
503 Sanitize input data to an ndarray or ExtensionArray, copy if specified,
504 coerce to the dtype if specified.
505
506 Parameters
507 ----------
508 data : Any
509 index : Index or None, default None
510 dtype : np.dtype, ExtensionDtype, or None, default None
511 copy : bool, default False
512 allow_2d : bool, default False
513 If False, raise if we have a 2D Arraylike.
514
515 Returns
516 -------
517 np.ndarray or ExtensionArray
518 """
519 if isinstance(data, ma.MaskedArray):
520 data = sanitize_masked_array(data)
521
522 if isinstance(dtype, PandasDtype):
523 # Avoid ending up with a PandasArray
524 dtype = dtype.numpy_dtype
525
526 # extract ndarray or ExtensionArray, ensure we have no PandasArray
527 data = extract_array(data, extract_numpy=True, extract_range=True)
528
529 if isinstance(data, np.ndarray) and data.ndim == 0:
530 if dtype is None:
531 dtype = data.dtype
532 data = lib.item_from_zerodim(data)
533 elif isinstance(data, range):
534 # GH#16804
535 data = range_to_ndarray(data)
536 copy = False
537
538 if not is_list_like(data):
539 if index is None:
540 raise ValueError("index must be specified when data is not list-like")
541 data = construct_1d_arraylike_from_scalar(data, len(index), dtype)
542 return data
543
544 elif isinstance(data, ABCExtensionArray):
545 # it is already ensured above this is not a PandasArray
546 # Until GH#49309 is fixed this check needs to come before the
547 # ExtensionDtype check
548 if dtype is not None:
549 subarr = data.astype(dtype, copy=copy)
550 elif copy:
551 subarr = data.copy()
552 else:
553 subarr = data
554
555 elif isinstance(dtype, ExtensionDtype):
556 # create an extension array from its dtype
557 _sanitize_non_ordered(data)
558 cls = dtype.construct_array_type()
559 subarr = cls._from_sequence(data, dtype=dtype, copy=copy)
560
561 # GH#846
562 elif isinstance(data, np.ndarray):
563 if isinstance(data, np.matrix):
564 data = data.A
565
566 if dtype is None:
567 subarr = data
568 if data.dtype == object:
569 subarr = maybe_infer_to_datetimelike(data)
570
571 if subarr is data and copy:
572 subarr = subarr.copy()
573
574 else:
575 # we will try to copy by-definition here
576 subarr = _try_cast(data, dtype, copy)
577
578 elif hasattr(data, "__array__"):
579 # e.g. dask array GH#38645
580 data = np.array(data, copy=copy)
581 return sanitize_array(
582 data,
583 index=index,
584 dtype=dtype,
585 copy=False,
586 allow_2d=allow_2d,
587 )
588
589 else:
590 _sanitize_non_ordered(data)
591 # materialize e.g. generators, convert e.g. tuples, abc.ValueView
592 data = list(data)
593
594 if len(data) == 0 and dtype is None:
595 # We default to float64, matching numpy
596 subarr = np.array([], dtype=np.float64)
597
598 elif dtype is not None:
599 subarr = _try_cast(data, dtype, copy)
600
601 else:
602 subarr = maybe_convert_platform(data)
603 if subarr.dtype == object:
604 subarr = cast(np.ndarray, subarr)
605 subarr = maybe_infer_to_datetimelike(subarr)
606
607 subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d)
608
609 if isinstance(subarr, np.ndarray):
610 # at this point we should have dtype be None or subarr.dtype == dtype
611 dtype = cast(np.dtype, dtype)
612 subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)
613
614 return subarr
615
616
617def range_to_ndarray(rng: range) -> np.ndarray:
618 """
619 Cast a range object to ndarray.
620 """
621 # GH#30171 perf avoid realizing range as a list in np.array
622 try:
623 arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64")
624 except OverflowError:
625 # GH#30173 handling for ranges that overflow int64
626 if (rng.start >= 0 and rng.step > 0) or (rng.step < 0 <= rng.stop):
627 try:
628 arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64")
629 except OverflowError:
630 arr = construct_1d_object_array_from_listlike(list(rng))
631 else:
632 arr = construct_1d_object_array_from_listlike(list(rng))
633 return arr
634
635
636def _sanitize_non_ordered(data) -> None:
637 """
638 Raise only for unordered sets, e.g., not for dict_keys
639 """
640 if isinstance(data, (set, frozenset)):
641 raise TypeError(f"'{type(data).__name__}' type is unordered")
642
643
644def _sanitize_ndim(
645 result: ArrayLike,
646 data,
647 dtype: DtypeObj | None,
648 index: Index | None,
649 *,
650 allow_2d: bool = False,
651) -> ArrayLike:
652 """
653 Ensure we have a 1-dimensional result array.
654 """
655 if getattr(result, "ndim", 0) == 0:
656 raise ValueError("result should be arraylike with ndim > 0")
657
658 if result.ndim == 1:
659 # the result that we want
660 result = _maybe_repeat(result, index)
661
662 elif result.ndim > 1:
663 if isinstance(data, np.ndarray):
664 if allow_2d:
665 return result
666 raise ValueError(
667 f"Data must be 1-dimensional, got ndarray of shape {data.shape} instead"
668 )
669 if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype):
670 # i.e. PandasDtype("O")
671
672 result = com.asarray_tuplesafe(data, dtype=np.dtype("object"))
673 cls = dtype.construct_array_type()
674 result = cls._from_sequence(result, dtype=dtype)
675 else:
676 # error: Argument "dtype" to "asarray_tuplesafe" has incompatible type
677 # "Union[dtype[Any], ExtensionDtype, None]"; expected "Union[str,
678 # dtype[Any], None]"
679 result = com.asarray_tuplesafe(data, dtype=dtype) # type: ignore[arg-type]
680 return result
681
682
683def _sanitize_str_dtypes(
684 result: np.ndarray, data, dtype: np.dtype | None, copy: bool
685) -> np.ndarray:
686 """
687 Ensure we have a dtype that is supported by pandas.
688 """
689
690 # This is to prevent mixed-type Series getting all casted to
691 # NumPy string type, e.g. NaN --> '-1#IND'.
692 if issubclass(result.dtype.type, str):
693 # GH#16605
694 # If not empty convert the data to dtype
695 # GH#19853: If data is a scalar, result has already the result
696 if not lib.is_scalar(data):
697 if not np.all(isna(data)):
698 data = np.array(data, dtype=dtype, copy=False)
699 result = np.array(data, dtype=object, copy=copy)
700 return result
701
702
703def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike:
704 """
705 If we have a length-1 array and an index describing how long we expect
706 the result to be, repeat the array.
707 """
708 if index is not None:
709 if 1 == len(arr) != len(index):
710 arr = arr.repeat(len(index))
711 return arr
712
713
714def _try_cast(
715 arr: list | np.ndarray,
716 dtype: np.dtype,
717 copy: bool,
718) -> ArrayLike:
719 """
720 Convert input to numpy ndarray and optionally cast to a given dtype.
721
722 Parameters
723 ----------
724 arr : ndarray or list
725 Excludes: ExtensionArray, Series, Index.
726 dtype : np.dtype
727 copy : bool
728 If False, don't copy the data if not needed.
729
730 Returns
731 -------
732 np.ndarray or ExtensionArray
733 """
734 is_ndarray = isinstance(arr, np.ndarray)
735
736 if is_object_dtype(dtype):
737 if not is_ndarray:
738 subarr = construct_1d_object_array_from_listlike(arr)
739 return subarr
740 return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy)
741
742 elif dtype.kind == "U":
743 # TODO: test cases with arr.dtype.kind in ["m", "M"]
744 if is_ndarray:
745 arr = cast(np.ndarray, arr)
746 shape = arr.shape
747 if arr.ndim > 1:
748 arr = arr.ravel()
749 else:
750 shape = (len(arr),)
751 return lib.ensure_string_array(arr, convert_na_value=False, copy=copy).reshape(
752 shape
753 )
754
755 elif dtype.kind in ["m", "M"]:
756 return maybe_cast_to_datetime(arr, dtype)
757
758 # GH#15832: Check if we are requesting a numeric dtype and
759 # that we can convert the data to the requested dtype.
760 elif is_integer_dtype(dtype):
761 # this will raise if we have e.g. floats
762
763 subarr = maybe_cast_to_integer_array(arr, dtype)
764 else:
765 subarr = np.array(arr, dtype=dtype, copy=copy)
766
767 return subarr