1"""
2Constructor functions intended to be shared by pd.array, Series.__init__,
3and Index.__new__.
4
5These should not depend on core.internals.
6"""
7from __future__ import annotations
8
9from collections.abc import Sequence
10from typing import (
11 TYPE_CHECKING,
12 Optional,
13 Union,
14 cast,
15 overload,
16)
17import warnings
18
19import numpy as np
20from numpy import ma
21
22from pandas._config import using_pyarrow_string_dtype
23
24from pandas._libs import lib
25from pandas._libs.tslibs import (
26 Period,
27 get_supported_dtype,
28 is_supported_dtype,
29)
30from pandas._typing import (
31 AnyArrayLike,
32 ArrayLike,
33 Dtype,
34 DtypeObj,
35 T,
36)
37from pandas.util._exceptions import find_stack_level
38
39from pandas.core.dtypes.base import ExtensionDtype
40from pandas.core.dtypes.cast import (
41 construct_1d_arraylike_from_scalar,
42 construct_1d_object_array_from_listlike,
43 maybe_cast_to_datetime,
44 maybe_cast_to_integer_array,
45 maybe_convert_platform,
46 maybe_infer_to_datetimelike,
47 maybe_promote,
48)
49from pandas.core.dtypes.common import (
50 is_list_like,
51 is_object_dtype,
52 is_string_dtype,
53 pandas_dtype,
54)
55from pandas.core.dtypes.dtypes import NumpyEADtype
56from pandas.core.dtypes.generic import (
57 ABCDataFrame,
58 ABCExtensionArray,
59 ABCIndex,
60 ABCSeries,
61)
62from pandas.core.dtypes.missing import isna
63
64import pandas.core.common as com
65
66if TYPE_CHECKING:
67 from pandas import (
68 Index,
69 Series,
70 )
71 from pandas.core.arrays.base import ExtensionArray
72
73
74def array(
75 data: Sequence[object] | AnyArrayLike,
76 dtype: Dtype | None = None,
77 copy: bool = True,
78) -> ExtensionArray:
79 """
80 Create an array.
81
82 Parameters
83 ----------
84 data : Sequence of objects
85 The scalars inside `data` should be instances of the
86 scalar type for `dtype`. It's expected that `data`
87 represents a 1-dimensional array of data.
88
89 When `data` is an Index or Series, the underlying array
90 will be extracted from `data`.
91
92 dtype : str, np.dtype, or ExtensionDtype, optional
93 The dtype to use for the array. This may be a NumPy
94 dtype or an extension type registered with pandas using
95 :meth:`pandas.api.extensions.register_extension_dtype`.
96
97 If not specified, there are two possibilities:
98
99 1. When `data` is a :class:`Series`, :class:`Index`, or
100 :class:`ExtensionArray`, the `dtype` will be taken
101 from the data.
102 2. Otherwise, pandas will attempt to infer the `dtype`
103 from the data.
104
105 Note that when `data` is a NumPy array, ``data.dtype`` is
106 *not* used for inferring the array type. This is because
107 NumPy cannot represent all the types of data that can be
108 held in extension arrays.
109
110 Currently, pandas will infer an extension dtype for sequences of
111
112 ============================== =======================================
113 Scalar Type Array Type
114 ============================== =======================================
115 :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray`
116 :class:`pandas.Period` :class:`pandas.arrays.PeriodArray`
117 :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray`
118 :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray`
119 :class:`int` :class:`pandas.arrays.IntegerArray`
120 :class:`float` :class:`pandas.arrays.FloatingArray`
121 :class:`str` :class:`pandas.arrays.StringArray` or
122 :class:`pandas.arrays.ArrowStringArray`
123 :class:`bool` :class:`pandas.arrays.BooleanArray`
124 ============================== =======================================
125
126 The ExtensionArray created when the scalar type is :class:`str` is determined by
127 ``pd.options.mode.string_storage`` if the dtype is not explicitly given.
128
129 For all other cases, NumPy's usual inference rules will be used.
130 copy : bool, default True
131 Whether to copy the data, even if not necessary. Depending
132 on the type of `data`, creating the new array may require
133 copying data, even if ``copy=False``.
134
135 Returns
136 -------
137 ExtensionArray
138 The newly created array.
139
140 Raises
141 ------
142 ValueError
143 When `data` is not 1-dimensional.
144
145 See Also
146 --------
147 numpy.array : Construct a NumPy array.
148 Series : Construct a pandas Series.
149 Index : Construct a pandas Index.
150 arrays.NumpyExtensionArray : ExtensionArray wrapping a NumPy array.
151 Series.array : Extract the array stored within a Series.
152
153 Notes
154 -----
155 Omitting the `dtype` argument means pandas will attempt to infer the
156 best array type from the values in the data. As new array types are
157 added by pandas and 3rd party libraries, the "best" array type may
158 change. We recommend specifying `dtype` to ensure that
159
160 1. the correct array type for the data is returned
161 2. the returned array type doesn't change as new extension types
162 are added by pandas and third-party libraries
163
164 Additionally, if the underlying memory representation of the returned
165 array matters, we recommend specifying the `dtype` as a concrete object
166 rather than a string alias or allowing it to be inferred. For example,
167 a future version of pandas or a 3rd-party library may include a
168 dedicated ExtensionArray for string data. In this event, the following
169 would no longer return a :class:`arrays.NumpyExtensionArray` backed by a
170 NumPy array.
171
172 >>> pd.array(['a', 'b'], dtype=str)
173 <NumpyExtensionArray>
174 ['a', 'b']
175 Length: 2, dtype: str32
176
177 This would instead return the new ExtensionArray dedicated for string
178 data. If you really need the new array to be backed by a NumPy array,
179 specify that in the dtype.
180
181 >>> pd.array(['a', 'b'], dtype=np.dtype("<U1"))
182 <NumpyExtensionArray>
183 ['a', 'b']
184 Length: 2, dtype: str32
185
186 Finally, Pandas has arrays that mostly overlap with NumPy
187
188 * :class:`arrays.DatetimeArray`
189 * :class:`arrays.TimedeltaArray`
190
191 When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is
192 passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray``
193 rather than a ``NumpyExtensionArray``. This is for symmetry with the case of
194 timezone-aware data, which NumPy does not natively support.
195
196 >>> pd.array(['2015', '2016'], dtype='datetime64[ns]')
197 <DatetimeArray>
198 ['2015-01-01 00:00:00', '2016-01-01 00:00:00']
199 Length: 2, dtype: datetime64[ns]
200
201 >>> pd.array(["1h", "2h"], dtype='timedelta64[ns]')
202 <TimedeltaArray>
203 ['0 days 01:00:00', '0 days 02:00:00']
204 Length: 2, dtype: timedelta64[ns]
205
206 Examples
207 --------
208 If a dtype is not specified, pandas will infer the best dtype from the values.
209 See the description of `dtype` for the types pandas infers for.
210
211 >>> pd.array([1, 2])
212 <IntegerArray>
213 [1, 2]
214 Length: 2, dtype: Int64
215
216 >>> pd.array([1, 2, np.nan])
217 <IntegerArray>
218 [1, 2, <NA>]
219 Length: 3, dtype: Int64
220
221 >>> pd.array([1.1, 2.2])
222 <FloatingArray>
223 [1.1, 2.2]
224 Length: 2, dtype: Float64
225
226 >>> pd.array(["a", None, "c"])
227 <StringArray>
228 ['a', <NA>, 'c']
229 Length: 3, dtype: string
230
231 >>> with pd.option_context("string_storage", "pyarrow"):
232 ... arr = pd.array(["a", None, "c"])
233 ...
234 >>> arr
235 <ArrowStringArray>
236 ['a', <NA>, 'c']
237 Length: 3, dtype: string
238
239 >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])
240 <PeriodArray>
241 ['2000-01-01', '2000-01-01']
242 Length: 2, dtype: period[D]
243
244 You can use the string alias for `dtype`
245
246 >>> pd.array(['a', 'b', 'a'], dtype='category')
247 ['a', 'b', 'a']
248 Categories (2, object): ['a', 'b']
249
250 Or specify the actual dtype
251
252 >>> pd.array(['a', 'b', 'a'],
253 ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True))
254 ['a', 'b', 'a']
255 Categories (3, object): ['a' < 'b' < 'c']
256
257 If pandas does not infer a dedicated extension type a
258 :class:`arrays.NumpyExtensionArray` is returned.
259
260 >>> pd.array([1 + 1j, 3 + 2j])
261 <NumpyExtensionArray>
262 [(1+1j), (3+2j)]
263 Length: 2, dtype: complex128
264
265 As mentioned in the "Notes" section, new extension types may be added
266 in the future (by pandas or 3rd party libraries), causing the return
267 value to no longer be a :class:`arrays.NumpyExtensionArray`. Specify the
268 `dtype` as a NumPy dtype if you need to ensure there's no future change in
269 behavior.
270
271 >>> pd.array([1, 2], dtype=np.dtype("int32"))
272 <NumpyExtensionArray>
273 [1, 2]
274 Length: 2, dtype: int32
275
276 `data` must be 1-dimensional. A ValueError is raised when the input
277 has the wrong dimensionality.
278
279 >>> pd.array(1)
280 Traceback (most recent call last):
281 ...
282 ValueError: Cannot pass scalar '1' to 'pandas.array'.
283 """
284 from pandas.core.arrays import (
285 BooleanArray,
286 DatetimeArray,
287 ExtensionArray,
288 FloatingArray,
289 IntegerArray,
290 IntervalArray,
291 NumpyExtensionArray,
292 PeriodArray,
293 TimedeltaArray,
294 )
295 from pandas.core.arrays.string_ import StringDtype
296
297 if lib.is_scalar(data):
298 msg = f"Cannot pass scalar '{data}' to 'pandas.array'."
299 raise ValueError(msg)
300 elif isinstance(data, ABCDataFrame):
301 raise TypeError("Cannot pass DataFrame to 'pandas.array'")
302
303 if dtype is None and isinstance(data, (ABCSeries, ABCIndex, ExtensionArray)):
304 # Note: we exclude np.ndarray here, will do type inference on it
305 dtype = data.dtype
306
307 data = extract_array(data, extract_numpy=True)
308
309 # this returns None for not-found dtypes.
310 if dtype is not None:
311 dtype = pandas_dtype(dtype)
312
313 if isinstance(data, ExtensionArray) and (dtype is None or data.dtype == dtype):
314 # e.g. TimedeltaArray[s], avoid casting to NumpyExtensionArray
315 if copy:
316 return data.copy()
317 return data
318
319 if isinstance(dtype, ExtensionDtype):
320 cls = dtype.construct_array_type()
321 return cls._from_sequence(data, dtype=dtype, copy=copy)
322
323 if dtype is None:
324 inferred_dtype = lib.infer_dtype(data, skipna=True)
325 if inferred_dtype == "period":
326 period_data = cast(Union[Sequence[Optional[Period]], AnyArrayLike], data)
327 return PeriodArray._from_sequence(period_data, copy=copy)
328
329 elif inferred_dtype == "interval":
330 return IntervalArray(data, copy=copy)
331
332 elif inferred_dtype.startswith("datetime"):
333 # datetime, datetime64
334 try:
335 return DatetimeArray._from_sequence(data, copy=copy)
336 except ValueError:
337 # Mixture of timezones, fall back to NumpyExtensionArray
338 pass
339
340 elif inferred_dtype.startswith("timedelta"):
341 # timedelta, timedelta64
342 return TimedeltaArray._from_sequence(data, copy=copy)
343
344 elif inferred_dtype == "string":
345 # StringArray/ArrowStringArray depending on pd.options.mode.string_storage
346 dtype = StringDtype()
347 cls = dtype.construct_array_type()
348 return cls._from_sequence(data, dtype=dtype, copy=copy)
349
350 elif inferred_dtype == "integer":
351 return IntegerArray._from_sequence(data, copy=copy)
352 elif inferred_dtype == "empty" and not hasattr(data, "dtype") and not len(data):
353 return FloatingArray._from_sequence(data, copy=copy)
354 elif (
355 inferred_dtype in ("floating", "mixed-integer-float")
356 and getattr(data, "dtype", None) != np.float16
357 ):
358 # GH#44715 Exclude np.float16 bc FloatingArray does not support it;
359 # we will fall back to NumpyExtensionArray.
360 return FloatingArray._from_sequence(data, copy=copy)
361
362 elif inferred_dtype == "boolean":
363 return BooleanArray._from_sequence(data, dtype="boolean", copy=copy)
364
365 # Pandas overrides NumPy for
366 # 1. datetime64[ns,us,ms,s]
367 # 2. timedelta64[ns,us,ms,s]
368 # so that a DatetimeArray is returned.
369 if lib.is_np_dtype(dtype, "M") and is_supported_dtype(dtype):
370 return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy)
371 if lib.is_np_dtype(dtype, "m") and is_supported_dtype(dtype):
372 return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy)
373
374 elif lib.is_np_dtype(dtype, "mM"):
375 warnings.warn(
376 r"datetime64 and timedelta64 dtype resolutions other than "
377 r"'s', 'ms', 'us', and 'ns' are deprecated. "
378 r"In future releases passing unsupported resolutions will "
379 r"raise an exception.",
380 FutureWarning,
381 stacklevel=find_stack_level(),
382 )
383
384 return NumpyExtensionArray._from_sequence(data, dtype=dtype, copy=copy)
385
386
387_typs = frozenset(
388 {
389 "index",
390 "rangeindex",
391 "multiindex",
392 "datetimeindex",
393 "timedeltaindex",
394 "periodindex",
395 "categoricalindex",
396 "intervalindex",
397 "series",
398 }
399)
400
401
402@overload
403def extract_array(
404 obj: Series | Index, extract_numpy: bool = ..., extract_range: bool = ...
405) -> ArrayLike:
406 ...
407
408
409@overload
410def extract_array(
411 obj: T, extract_numpy: bool = ..., extract_range: bool = ...
412) -> T | ArrayLike:
413 ...
414
415
416def extract_array(
417 obj: T, extract_numpy: bool = False, extract_range: bool = False
418) -> T | ArrayLike:
419 """
420 Extract the ndarray or ExtensionArray from a Series or Index.
421
422 For all other types, `obj` is just returned as is.
423
424 Parameters
425 ----------
426 obj : object
427 For Series / Index, the underlying ExtensionArray is unboxed.
428
429 extract_numpy : bool, default False
430 Whether to extract the ndarray from a NumpyExtensionArray.
431
432 extract_range : bool, default False
433 If we have a RangeIndex, return range._values if True
434 (which is a materialized integer ndarray), otherwise return unchanged.
435
436 Returns
437 -------
438 arr : object
439
440 Examples
441 --------
442 >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category'))
443 ['a', 'b', 'c']
444 Categories (3, object): ['a', 'b', 'c']
445
446 Other objects like lists, arrays, and DataFrames are just passed through.
447
448 >>> extract_array([1, 2, 3])
449 [1, 2, 3]
450
451 For an ndarray-backed Series / Index the ndarray is returned.
452
453 >>> extract_array(pd.Series([1, 2, 3]))
454 array([1, 2, 3])
455
456 To extract all the way down to the ndarray, pass ``extract_numpy=True``.
457
458 >>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True)
459 array([1, 2, 3])
460 """
461 typ = getattr(obj, "_typ", None)
462 if typ in _typs:
463 # i.e. isinstance(obj, (ABCIndex, ABCSeries))
464 if typ == "rangeindex":
465 if extract_range:
466 # error: "T" has no attribute "_values"
467 return obj._values # type: ignore[attr-defined]
468 return obj
469
470 # error: "T" has no attribute "_values"
471 return obj._values # type: ignore[attr-defined]
472
473 elif extract_numpy and typ == "npy_extension":
474 # i.e. isinstance(obj, ABCNumpyExtensionArray)
475 # error: "T" has no attribute "to_numpy"
476 return obj.to_numpy() # type: ignore[attr-defined]
477
478 return obj
479
480
481def ensure_wrapped_if_datetimelike(arr):
482 """
483 Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.
484 """
485 if isinstance(arr, np.ndarray):
486 if arr.dtype.kind == "M":
487 from pandas.core.arrays import DatetimeArray
488
489 dtype = get_supported_dtype(arr.dtype)
490 return DatetimeArray._from_sequence(arr, dtype=dtype)
491
492 elif arr.dtype.kind == "m":
493 from pandas.core.arrays import TimedeltaArray
494
495 dtype = get_supported_dtype(arr.dtype)
496 return TimedeltaArray._from_sequence(arr, dtype=dtype)
497
498 return arr
499
500
501def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray:
502 """
503 Convert numpy MaskedArray to ensure mask is softened.
504 """
505 mask = ma.getmaskarray(data)
506 if mask.any():
507 dtype, fill_value = maybe_promote(data.dtype, np.nan)
508 dtype = cast(np.dtype, dtype)
509 data = ma.asarray(data.astype(dtype, copy=True))
510 data.soften_mask() # set hardmask False if it was True
511 data[mask] = fill_value
512 else:
513 data = data.copy()
514 return data
515
516
517def sanitize_array(
518 data,
519 index: Index | None,
520 dtype: DtypeObj | None = None,
521 copy: bool = False,
522 *,
523 allow_2d: bool = False,
524) -> ArrayLike:
525 """
526 Sanitize input data to an ndarray or ExtensionArray, copy if specified,
527 coerce to the dtype if specified.
528
529 Parameters
530 ----------
531 data : Any
532 index : Index or None, default None
533 dtype : np.dtype, ExtensionDtype, or None, default None
534 copy : bool, default False
535 allow_2d : bool, default False
536 If False, raise if we have a 2D Arraylike.
537
538 Returns
539 -------
540 np.ndarray or ExtensionArray
541 """
542 original_dtype = dtype
543 if isinstance(data, ma.MaskedArray):
544 data = sanitize_masked_array(data)
545
546 if isinstance(dtype, NumpyEADtype):
547 # Avoid ending up with a NumpyExtensionArray
548 dtype = dtype.numpy_dtype
549
550 object_index = False
551 if isinstance(data, ABCIndex) and data.dtype == object and dtype is None:
552 object_index = True
553
554 # extract ndarray or ExtensionArray, ensure we have no NumpyExtensionArray
555 data = extract_array(data, extract_numpy=True, extract_range=True)
556
557 if isinstance(data, np.ndarray) and data.ndim == 0:
558 if dtype is None:
559 dtype = data.dtype
560 data = lib.item_from_zerodim(data)
561 elif isinstance(data, range):
562 # GH#16804
563 data = range_to_ndarray(data)
564 copy = False
565
566 if not is_list_like(data):
567 if index is None:
568 raise ValueError("index must be specified when data is not list-like")
569 if (
570 isinstance(data, str)
571 and using_pyarrow_string_dtype()
572 and original_dtype is None
573 ):
574 from pandas.core.arrays.string_ import StringDtype
575
576 dtype = StringDtype("pyarrow_numpy")
577 data = construct_1d_arraylike_from_scalar(data, len(index), dtype)
578
579 return data
580
581 elif isinstance(data, ABCExtensionArray):
582 # it is already ensured above this is not a NumpyExtensionArray
583 # Until GH#49309 is fixed this check needs to come before the
584 # ExtensionDtype check
585 if dtype is not None:
586 subarr = data.astype(dtype, copy=copy)
587 elif copy:
588 subarr = data.copy()
589 else:
590 subarr = data
591
592 elif isinstance(dtype, ExtensionDtype):
593 # create an extension array from its dtype
594 _sanitize_non_ordered(data)
595 cls = dtype.construct_array_type()
596 subarr = cls._from_sequence(data, dtype=dtype, copy=copy)
597
598 # GH#846
599 elif isinstance(data, np.ndarray):
600 if isinstance(data, np.matrix):
601 data = data.A
602
603 if dtype is None:
604 subarr = data
605 if data.dtype == object:
606 subarr = maybe_infer_to_datetimelike(data)
607 if (
608 object_index
609 and using_pyarrow_string_dtype()
610 and is_string_dtype(subarr)
611 ):
612 # Avoid inference when string option is set
613 subarr = data
614 elif data.dtype.kind == "U" and using_pyarrow_string_dtype():
615 from pandas.core.arrays.string_ import StringDtype
616
617 dtype = StringDtype(storage="pyarrow_numpy")
618 subarr = dtype.construct_array_type()._from_sequence(data, dtype=dtype)
619
620 if subarr is data and copy:
621 subarr = subarr.copy()
622
623 else:
624 # we will try to copy by-definition here
625 subarr = _try_cast(data, dtype, copy)
626
627 elif hasattr(data, "__array__"):
628 # e.g. dask array GH#38645
629 if not copy:
630 data = np.asarray(data)
631 else:
632 data = np.array(data, copy=copy)
633 return sanitize_array(
634 data,
635 index=index,
636 dtype=dtype,
637 copy=False,
638 allow_2d=allow_2d,
639 )
640
641 else:
642 _sanitize_non_ordered(data)
643 # materialize e.g. generators, convert e.g. tuples, abc.ValueView
644 data = list(data)
645
646 if len(data) == 0 and dtype is None:
647 # We default to float64, matching numpy
648 subarr = np.array([], dtype=np.float64)
649
650 elif dtype is not None:
651 subarr = _try_cast(data, dtype, copy)
652
653 else:
654 subarr = maybe_convert_platform(data)
655 if subarr.dtype == object:
656 subarr = cast(np.ndarray, subarr)
657 subarr = maybe_infer_to_datetimelike(subarr)
658
659 subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d)
660
661 if isinstance(subarr, np.ndarray):
662 # at this point we should have dtype be None or subarr.dtype == dtype
663 dtype = cast(np.dtype, dtype)
664 subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)
665
666 return subarr
667
668
669def range_to_ndarray(rng: range) -> np.ndarray:
670 """
671 Cast a range object to ndarray.
672 """
673 # GH#30171 perf avoid realizing range as a list in np.array
674 try:
675 arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64")
676 except OverflowError:
677 # GH#30173 handling for ranges that overflow int64
678 if (rng.start >= 0 and rng.step > 0) or (rng.step < 0 <= rng.stop):
679 try:
680 arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64")
681 except OverflowError:
682 arr = construct_1d_object_array_from_listlike(list(rng))
683 else:
684 arr = construct_1d_object_array_from_listlike(list(rng))
685 return arr
686
687
688def _sanitize_non_ordered(data) -> None:
689 """
690 Raise only for unordered sets, e.g., not for dict_keys
691 """
692 if isinstance(data, (set, frozenset)):
693 raise TypeError(f"'{type(data).__name__}' type is unordered")
694
695
696def _sanitize_ndim(
697 result: ArrayLike,
698 data,
699 dtype: DtypeObj | None,
700 index: Index | None,
701 *,
702 allow_2d: bool = False,
703) -> ArrayLike:
704 """
705 Ensure we have a 1-dimensional result array.
706 """
707 if getattr(result, "ndim", 0) == 0:
708 raise ValueError("result should be arraylike with ndim > 0")
709
710 if result.ndim == 1:
711 # the result that we want
712 result = _maybe_repeat(result, index)
713
714 elif result.ndim > 1:
715 if isinstance(data, np.ndarray):
716 if allow_2d:
717 return result
718 raise ValueError(
719 f"Data must be 1-dimensional, got ndarray of shape {data.shape} instead"
720 )
721 if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype):
722 # i.e. NumpyEADtype("O")
723
724 result = com.asarray_tuplesafe(data, dtype=np.dtype("object"))
725 cls = dtype.construct_array_type()
726 result = cls._from_sequence(result, dtype=dtype)
727 else:
728 # error: Argument "dtype" to "asarray_tuplesafe" has incompatible type
729 # "Union[dtype[Any], ExtensionDtype, None]"; expected "Union[str,
730 # dtype[Any], None]"
731 result = com.asarray_tuplesafe(data, dtype=dtype) # type: ignore[arg-type]
732 return result
733
734
735def _sanitize_str_dtypes(
736 result: np.ndarray, data, dtype: np.dtype | None, copy: bool
737) -> np.ndarray:
738 """
739 Ensure we have a dtype that is supported by pandas.
740 """
741
742 # This is to prevent mixed-type Series getting all casted to
743 # NumPy string type, e.g. NaN --> '-1#IND'.
744 if issubclass(result.dtype.type, str):
745 # GH#16605
746 # If not empty convert the data to dtype
747 # GH#19853: If data is a scalar, result has already the result
748 if not lib.is_scalar(data):
749 if not np.all(isna(data)):
750 data = np.asarray(data, dtype=dtype)
751 if not copy:
752 result = np.asarray(data, dtype=object)
753 else:
754 result = np.array(data, dtype=object, copy=copy)
755 return result
756
757
758def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike:
759 """
760 If we have a length-1 array and an index describing how long we expect
761 the result to be, repeat the array.
762 """
763 if index is not None:
764 if 1 == len(arr) != len(index):
765 arr = arr.repeat(len(index))
766 return arr
767
768
769def _try_cast(
770 arr: list | np.ndarray,
771 dtype: np.dtype,
772 copy: bool,
773) -> ArrayLike:
774 """
775 Convert input to numpy ndarray and optionally cast to a given dtype.
776
777 Parameters
778 ----------
779 arr : ndarray or list
780 Excludes: ExtensionArray, Series, Index.
781 dtype : np.dtype
782 copy : bool
783 If False, don't copy the data if not needed.
784
785 Returns
786 -------
787 np.ndarray or ExtensionArray
788 """
789 is_ndarray = isinstance(arr, np.ndarray)
790
791 if dtype == object:
792 if not is_ndarray:
793 subarr = construct_1d_object_array_from_listlike(arr)
794 return subarr
795 return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy)
796
797 elif dtype.kind == "U":
798 # TODO: test cases with arr.dtype.kind in "mM"
799 if is_ndarray:
800 arr = cast(np.ndarray, arr)
801 shape = arr.shape
802 if arr.ndim > 1:
803 arr = arr.ravel()
804 else:
805 shape = (len(arr),)
806 return lib.ensure_string_array(arr, convert_na_value=False, copy=copy).reshape(
807 shape
808 )
809
810 elif dtype.kind in "mM":
811 return maybe_cast_to_datetime(arr, dtype)
812
813 # GH#15832: Check if we are requesting a numeric dtype and
814 # that we can convert the data to the requested dtype.
815 elif dtype.kind in "iu":
816 # this will raise if we have e.g. floats
817
818 subarr = maybe_cast_to_integer_array(arr, dtype)
819 elif not copy:
820 subarr = np.asarray(arr, dtype=dtype)
821 else:
822 subarr = np.array(arr, dtype=dtype, copy=copy)
823
824 return subarr