Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/construction.py: 47%

1"""

2Constructor functions intended to be shared by pd.array, Series.__init__,

3and Index.__new__.

5These should not depend on core.internals.

6"""

7from __future__ import annotations

9from collections.abc import Sequence

10from typing import (

11 TYPE_CHECKING,

12 Optional,

13 Union,

14 cast,

15 overload,

16)

17import warnings

19import numpy as np

20from numpy import ma

22from pandas._config import using_pyarrow_string_dtype

24from pandas._libs import lib

25from pandas._libs.tslibs import (

26 Period,

27 get_supported_dtype,

28 is_supported_dtype,

29)

30from pandas._typing import (

31 AnyArrayLike,

32 ArrayLike,

33 Dtype,

34 DtypeObj,

35 T,

36)

37from pandas.util._exceptions import find_stack_level

39from pandas.core.dtypes.base import ExtensionDtype

40from pandas.core.dtypes.cast import (

41 construct_1d_arraylike_from_scalar,

42 construct_1d_object_array_from_listlike,

43 maybe_cast_to_datetime,

44 maybe_cast_to_integer_array,

45 maybe_convert_platform,

46 maybe_infer_to_datetimelike,

47 maybe_promote,

48)

49from pandas.core.dtypes.common import (

50 is_list_like,

51 is_object_dtype,

52 is_string_dtype,

53 pandas_dtype,

54)

55from pandas.core.dtypes.dtypes import NumpyEADtype

56from pandas.core.dtypes.generic import (

57 ABCDataFrame,

58 ABCExtensionArray,

59 ABCIndex,

60 ABCSeries,

61)

62from pandas.core.dtypes.missing import isna

64import pandas.core.common as com

66if TYPE_CHECKING:

67 from pandas import (

68 Index,

69 Series,

70 )

71 from pandas.core.arrays.base import ExtensionArray

74def array(

75 data: Sequence[object] | AnyArrayLike,

76 dtype: Dtype | None = None,

77 copy: bool = True,

78) -> ExtensionArray:

79 """

80 Create an array.

82 Parameters

83 ----------

84 data : Sequence of objects

85 The scalars inside `data` should be instances of the

86 scalar type for `dtype`. It's expected that `data`

87 represents a 1-dimensional array of data.

89 When `data` is an Index or Series, the underlying array

90 will be extracted from `data`.

92 dtype : str, np.dtype, or ExtensionDtype, optional

93 The dtype to use for the array. This may be a NumPy

94 dtype or an extension type registered with pandas using

95 :meth:`pandas.api.extensions.register_extension_dtype`.

97 If not specified, there are two possibilities:

99 1. When `data` is a :class:`Series`, :class:`Index`, or

100 :class:`ExtensionArray`, the `dtype` will be taken

101 from the data.

102 2. Otherwise, pandas will attempt to infer the `dtype`

103 from the data.

104

105 Note that when `data` is a NumPy array, ``data.dtype`` is

106 *not* used for inferring the array type. This is because

107 NumPy cannot represent all the types of data that can be

108 held in extension arrays.

109

110 Currently, pandas will infer an extension dtype for sequences of

111

112 ============================== =======================================

113 Scalar Type Array Type

114 ============================== =======================================

115 :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray`

116 :class:`pandas.Period` :class:`pandas.arrays.PeriodArray`

117 :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray`

118 :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray`

119 :class:`int` :class:`pandas.arrays.IntegerArray`

120 :class:`float` :class:`pandas.arrays.FloatingArray`

121 :class:`str` :class:`pandas.arrays.StringArray` or

122 :class:`pandas.arrays.ArrowStringArray`

123 :class:`bool` :class:`pandas.arrays.BooleanArray`

124 ============================== =======================================

125

126 The ExtensionArray created when the scalar type is :class:`str` is determined by

127 ``pd.options.mode.string_storage`` if the dtype is not explicitly given.

128

129 For all other cases, NumPy's usual inference rules will be used.

130 copy : bool, default True

131 Whether to copy the data, even if not necessary. Depending

132 on the type of `data`, creating the new array may require

133 copying data, even if ``copy=False``.

134

135 Returns

136 -------

137 ExtensionArray

138 The newly created array.

139

140 Raises

141 ------

142 ValueError

143 When `data` is not 1-dimensional.

144

145 See Also

146 --------

147 numpy.array : Construct a NumPy array.

148 Series : Construct a pandas Series.

149 Index : Construct a pandas Index.

150 arrays.NumpyExtensionArray : ExtensionArray wrapping a NumPy array.

151 Series.array : Extract the array stored within a Series.

152

153 Notes

154 -----

155 Omitting the `dtype` argument means pandas will attempt to infer the

156 best array type from the values in the data. As new array types are

157 added by pandas and 3rd party libraries, the "best" array type may

158 change. We recommend specifying `dtype` to ensure that

159

160 1. the correct array type for the data is returned

161 2. the returned array type doesn't change as new extension types

162 are added by pandas and third-party libraries

163

164 Additionally, if the underlying memory representation of the returned

165 array matters, we recommend specifying the `dtype` as a concrete object

166 rather than a string alias or allowing it to be inferred. For example,

167 a future version of pandas or a 3rd-party library may include a

168 dedicated ExtensionArray for string data. In this event, the following

169 would no longer return a :class:`arrays.NumpyExtensionArray` backed by a

170 NumPy array.

171

172 >>> pd.array(['a', 'b'], dtype=str)

173 <NumpyExtensionArray>

174 ['a', 'b']

175 Length: 2, dtype: str32

176

177 This would instead return the new ExtensionArray dedicated for string

178 data. If you really need the new array to be backed by a NumPy array,

179 specify that in the dtype.

180

181 >>> pd.array(['a', 'b'], dtype=np.dtype("<U1"))

182 <NumpyExtensionArray>

183 ['a', 'b']

184 Length: 2, dtype: str32

185

186 Finally, Pandas has arrays that mostly overlap with NumPy

187

188 * :class:`arrays.DatetimeArray`

189 * :class:`arrays.TimedeltaArray`

190

191 When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is

192 passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray``

193 rather than a ``NumpyExtensionArray``. This is for symmetry with the case of

194 timezone-aware data, which NumPy does not natively support.

195

196 >>> pd.array(['2015', '2016'], dtype='datetime64[ns]')

197 <DatetimeArray>

198 ['2015-01-01 00:00:00', '2016-01-01 00:00:00']

199 Length: 2, dtype: datetime64[ns]

200

201 >>> pd.array(["1h", "2h"], dtype='timedelta64[ns]')

202 <TimedeltaArray>

203 ['0 days 01:00:00', '0 days 02:00:00']

204 Length: 2, dtype: timedelta64[ns]

205

206 Examples

207 --------

208 If a dtype is not specified, pandas will infer the best dtype from the values.

209 See the description of `dtype` for the types pandas infers for.

210

211 >>> pd.array([1, 2])

212 <IntegerArray>

213 [1, 2]

214 Length: 2, dtype: Int64

215

216 >>> pd.array([1, 2, np.nan])

217 <IntegerArray>

218 [1, 2, <NA>]

219 Length: 3, dtype: Int64

220

221 >>> pd.array([1.1, 2.2])

222 <FloatingArray>

223 [1.1, 2.2]

224 Length: 2, dtype: Float64

225

226 >>> pd.array(["a", None, "c"])

227 <StringArray>

228 ['a', <NA>, 'c']

229 Length: 3, dtype: string

230

231 >>> with pd.option_context("string_storage", "pyarrow"):

232 ... arr = pd.array(["a", None, "c"])

233 ...

234 >>> arr

235 <ArrowStringArray>

236 ['a', <NA>, 'c']

237 Length: 3, dtype: string

238

239 >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])

240 <PeriodArray>

241 ['2000-01-01', '2000-01-01']

242 Length: 2, dtype: period[D]

243

244 You can use the string alias for `dtype`

245

246 >>> pd.array(['a', 'b', 'a'], dtype='category')

247 ['a', 'b', 'a']

248 Categories (2, object): ['a', 'b']

249

250 Or specify the actual dtype

251

252 >>> pd.array(['a', 'b', 'a'],

253 ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True))

254 ['a', 'b', 'a']

255 Categories (3, object): ['a' < 'b' < 'c']

256

257 If pandas does not infer a dedicated extension type a

258 :class:`arrays.NumpyExtensionArray` is returned.

259

260 >>> pd.array([1 + 1j, 3 + 2j])

261 <NumpyExtensionArray>

262 [(1+1j), (3+2j)]

263 Length: 2, dtype: complex128

264

265 As mentioned in the "Notes" section, new extension types may be added

266 in the future (by pandas or 3rd party libraries), causing the return

267 value to no longer be a :class:`arrays.NumpyExtensionArray`. Specify the

268 `dtype` as a NumPy dtype if you need to ensure there's no future change in

269 behavior.

270

271 >>> pd.array([1, 2], dtype=np.dtype("int32"))

272 <NumpyExtensionArray>

273 [1, 2]

274 Length: 2, dtype: int32

275

276 `data` must be 1-dimensional. A ValueError is raised when the input

277 has the wrong dimensionality.

278

279 >>> pd.array(1)

280 Traceback (most recent call last):

281 ...

282 ValueError: Cannot pass scalar '1' to 'pandas.array'.

283 """

284 from pandas.core.arrays import (

285 BooleanArray,

286 DatetimeArray,

287 ExtensionArray,

288 FloatingArray,

289 IntegerArray,

290 IntervalArray,

291 NumpyExtensionArray,

292 PeriodArray,

293 TimedeltaArray,

294 )

295 from pandas.core.arrays.string_ import StringDtype

296

297 if lib.is_scalar(data):

298 msg = f"Cannot pass scalar '{data}' to 'pandas.array'."

299 raise ValueError(msg)

300 elif isinstance(data, ABCDataFrame):

301 raise TypeError("Cannot pass DataFrame to 'pandas.array'")

302

303 if dtype is None and isinstance(data, (ABCSeries, ABCIndex, ExtensionArray)):

304 # Note: we exclude np.ndarray here, will do type inference on it

305 dtype = data.dtype

306

307 data = extract_array(data, extract_numpy=True)

308

309 # this returns None for not-found dtypes.

310 if dtype is not None:

311 dtype = pandas_dtype(dtype)

312

313 if isinstance(data, ExtensionArray) and (dtype is None or data.dtype == dtype):

314 # e.g. TimedeltaArray[s], avoid casting to NumpyExtensionArray

315 if copy:

316 return data.copy()

317 return data

318

319 if isinstance(dtype, ExtensionDtype):

320 cls = dtype.construct_array_type()

321 return cls._from_sequence(data, dtype=dtype, copy=copy)

322

323 if dtype is None:

324 inferred_dtype = lib.infer_dtype(data, skipna=True)

325 if inferred_dtype == "period":

326 period_data = cast(Union[Sequence[Optional[Period]], AnyArrayLike], data)

327 return PeriodArray._from_sequence(period_data, copy=copy)

328

329 elif inferred_dtype == "interval":

330 return IntervalArray(data, copy=copy)

331

332 elif inferred_dtype.startswith("datetime"):

333 # datetime, datetime64

334 try:

335 return DatetimeArray._from_sequence(data, copy=copy)

336 except ValueError:

337 # Mixture of timezones, fall back to NumpyExtensionArray

338 pass

339

340 elif inferred_dtype.startswith("timedelta"):

341 # timedelta, timedelta64

342 return TimedeltaArray._from_sequence(data, copy=copy)

343

344 elif inferred_dtype == "string":

345 # StringArray/ArrowStringArray depending on pd.options.mode.string_storage

346 dtype = StringDtype()

347 cls = dtype.construct_array_type()

348 return cls._from_sequence(data, dtype=dtype, copy=copy)

349

350 elif inferred_dtype == "integer":

351 return IntegerArray._from_sequence(data, copy=copy)

352 elif inferred_dtype == "empty" and not hasattr(data, "dtype") and not len(data):

353 return FloatingArray._from_sequence(data, copy=copy)

354 elif (

355 inferred_dtype in ("floating", "mixed-integer-float")

356 and getattr(data, "dtype", None) != np.float16

357 ):

358 # GH#44715 Exclude np.float16 bc FloatingArray does not support it;

359 # we will fall back to NumpyExtensionArray.

360 return FloatingArray._from_sequence(data, copy=copy)

361

362 elif inferred_dtype == "boolean":

363 return BooleanArray._from_sequence(data, dtype="boolean", copy=copy)

364

365 # Pandas overrides NumPy for

366 # 1. datetime64[ns,us,ms,s]

367 # 2. timedelta64[ns,us,ms,s]

368 # so that a DatetimeArray is returned.

369 if lib.is_np_dtype(dtype, "M") and is_supported_dtype(dtype):

370 return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy)

371 if lib.is_np_dtype(dtype, "m") and is_supported_dtype(dtype):

372 return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy)

373

374 elif lib.is_np_dtype(dtype, "mM"):

375 warnings.warn(

376 r"datetime64 and timedelta64 dtype resolutions other than "

377 r"'s', 'ms', 'us', and 'ns' are deprecated. "

378 r"In future releases passing unsupported resolutions will "

379 r"raise an exception.",

380 FutureWarning,

381 stacklevel=find_stack_level(),

382 )

383

384 return NumpyExtensionArray._from_sequence(data, dtype=dtype, copy=copy)

385

386

387_typs = frozenset(

388 {

389 "index",

390 "rangeindex",

391 "multiindex",

392 "datetimeindex",

393 "timedeltaindex",

394 "periodindex",

395 "categoricalindex",

396 "intervalindex",

397 "series",

398 }

399)

400

401

402@overload

403def extract_array(

404 obj: Series | Index, extract_numpy: bool = ..., extract_range: bool = ...

405) -> ArrayLike:

406 ...

407

408

409@overload

410def extract_array(

411 obj: T, extract_numpy: bool = ..., extract_range: bool = ...

412) -> T | ArrayLike:

413 ...

414

415

416def extract_array(

417 obj: T, extract_numpy: bool = False, extract_range: bool = False

418) -> T | ArrayLike:

419 """

420 Extract the ndarray or ExtensionArray from a Series or Index.

421

422 For all other types, `obj` is just returned as is.

423

424 Parameters

425 ----------

426 obj : object

427 For Series / Index, the underlying ExtensionArray is unboxed.

428

429 extract_numpy : bool, default False

430 Whether to extract the ndarray from a NumpyExtensionArray.

431

432 extract_range : bool, default False

433 If we have a RangeIndex, return range._values if True

434 (which is a materialized integer ndarray), otherwise return unchanged.

435

436 Returns

437 -------

438 arr : object

439

440 Examples

441 --------

442 >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category'))

443 ['a', 'b', 'c']

444 Categories (3, object): ['a', 'b', 'c']

445

446 Other objects like lists, arrays, and DataFrames are just passed through.

447

448 >>> extract_array([1, 2, 3])

449 [1, 2, 3]

450

451 For an ndarray-backed Series / Index the ndarray is returned.

452

453 >>> extract_array(pd.Series([1, 2, 3]))

454 array([1, 2, 3])

455

456 To extract all the way down to the ndarray, pass ``extract_numpy=True``.

457

458 >>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True)

459 array([1, 2, 3])

460 """

461 typ = getattr(obj, "_typ", None)

462 if typ in _typs:

463 # i.e. isinstance(obj, (ABCIndex, ABCSeries))

464 if typ == "rangeindex":

465 if extract_range:

466 # error: "T" has no attribute "_values"

467 return obj._values # type: ignore[attr-defined]

468 return obj

469

470 # error: "T" has no attribute "_values"

471 return obj._values # type: ignore[attr-defined]

472

473 elif extract_numpy and typ == "npy_extension":

474 # i.e. isinstance(obj, ABCNumpyExtensionArray)

475 # error: "T" has no attribute "to_numpy"

476 return obj.to_numpy() # type: ignore[attr-defined]

477

478 return obj

479

480

481def ensure_wrapped_if_datetimelike(arr):

482 """

483 Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.

484 """

485 if isinstance(arr, np.ndarray):

486 if arr.dtype.kind == "M":

487 from pandas.core.arrays import DatetimeArray

488

489 dtype = get_supported_dtype(arr.dtype)

490 return DatetimeArray._from_sequence(arr, dtype=dtype)

491

492 elif arr.dtype.kind == "m":

493 from pandas.core.arrays import TimedeltaArray

494

495 dtype = get_supported_dtype(arr.dtype)

496 return TimedeltaArray._from_sequence(arr, dtype=dtype)

497

498 return arr

499

500

501def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray:

502 """

503 Convert numpy MaskedArray to ensure mask is softened.

504 """

505 mask = ma.getmaskarray(data)

506 if mask.any():

507 dtype, fill_value = maybe_promote(data.dtype, np.nan)

508 dtype = cast(np.dtype, dtype)

509 data = ma.asarray(data.astype(dtype, copy=True))

510 data.soften_mask() # set hardmask False if it was True

511 data[mask] = fill_value

512 else:

513 data = data.copy()

514 return data

515

516

517def sanitize_array(

518 data,

519 index: Index | None,

520 dtype: DtypeObj | None = None,

521 copy: bool = False,

522 *,

523 allow_2d: bool = False,

524) -> ArrayLike:

525 """

526 Sanitize input data to an ndarray or ExtensionArray, copy if specified,

527 coerce to the dtype if specified.

528

529 Parameters

530 ----------

531 data : Any

532 index : Index or None, default None

533 dtype : np.dtype, ExtensionDtype, or None, default None

534 copy : bool, default False

535 allow_2d : bool, default False

536 If False, raise if we have a 2D Arraylike.

537

538 Returns

539 -------

540 np.ndarray or ExtensionArray

541 """

542 original_dtype = dtype

543 if isinstance(data, ma.MaskedArray):

544 data = sanitize_masked_array(data)

545

546 if isinstance(dtype, NumpyEADtype):

547 # Avoid ending up with a NumpyExtensionArray

548 dtype = dtype.numpy_dtype

549

550 object_index = False

551 if isinstance(data, ABCIndex) and data.dtype == object and dtype is None:

552 object_index = True

553

554 # extract ndarray or ExtensionArray, ensure we have no NumpyExtensionArray

555 data = extract_array(data, extract_numpy=True, extract_range=True)

556

557 if isinstance(data, np.ndarray) and data.ndim == 0:

558 if dtype is None:

559 dtype = data.dtype

560 data = lib.item_from_zerodim(data)

561 elif isinstance(data, range):

562 # GH#16804

563 data = range_to_ndarray(data)

564 copy = False

565

566 if not is_list_like(data):

567 if index is None:

568 raise ValueError("index must be specified when data is not list-like")

569 if (

570 isinstance(data, str)

571 and using_pyarrow_string_dtype()

572 and original_dtype is None

573 ):

574 from pandas.core.arrays.string_ import StringDtype

575

576 dtype = StringDtype("pyarrow_numpy")

577 data = construct_1d_arraylike_from_scalar(data, len(index), dtype)

578

579 return data

580

581 elif isinstance(data, ABCExtensionArray):

582 # it is already ensured above this is not a NumpyExtensionArray

583 # Until GH#49309 is fixed this check needs to come before the

584 # ExtensionDtype check

585 if dtype is not None:

586 subarr = data.astype(dtype, copy=copy)

587 elif copy:

588 subarr = data.copy()

589 else:

590 subarr = data

591

592 elif isinstance(dtype, ExtensionDtype):

593 # create an extension array from its dtype

594 _sanitize_non_ordered(data)

595 cls = dtype.construct_array_type()

596 subarr = cls._from_sequence(data, dtype=dtype, copy=copy)

597

598 # GH#846

599 elif isinstance(data, np.ndarray):

600 if isinstance(data, np.matrix):

601 data = data.A

602

603 if dtype is None:

604 subarr = data

605 if data.dtype == object:

606 subarr = maybe_infer_to_datetimelike(data)

607 if (

608 object_index

609 and using_pyarrow_string_dtype()

610 and is_string_dtype(subarr)

611 ):

612 # Avoid inference when string option is set

613 subarr = data

614 elif data.dtype.kind == "U" and using_pyarrow_string_dtype():

615 from pandas.core.arrays.string_ import StringDtype

616

617 dtype = StringDtype(storage="pyarrow_numpy")

618 subarr = dtype.construct_array_type()._from_sequence(data, dtype=dtype)

619

620 if subarr is data and copy:

621 subarr = subarr.copy()

622

623 else:

624 # we will try to copy by-definition here

625 subarr = _try_cast(data, dtype, copy)

626

627 elif hasattr(data, "__array__"):

628 # e.g. dask array GH#38645

629 if not copy:

630 data = np.asarray(data)

631 else:

632 data = np.array(data, copy=copy)

633 return sanitize_array(

634 data,

635 index=index,

636 dtype=dtype,

637 copy=False,

638 allow_2d=allow_2d,

639 )

640

641 else:

642 _sanitize_non_ordered(data)

643 # materialize e.g. generators, convert e.g. tuples, abc.ValueView

644 data = list(data)

645

646 if len(data) == 0 and dtype is None:

647 # We default to float64, matching numpy

648 subarr = np.array([], dtype=np.float64)

649

650 elif dtype is not None:

651 subarr = _try_cast(data, dtype, copy)

652

653 else:

654 subarr = maybe_convert_platform(data)

655 if subarr.dtype == object:

656 subarr = cast(np.ndarray, subarr)

657 subarr = maybe_infer_to_datetimelike(subarr)

658

659 subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d)

660

661 if isinstance(subarr, np.ndarray):

662 # at this point we should have dtype be None or subarr.dtype == dtype

663 dtype = cast(np.dtype, dtype)

664 subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)

665

666 return subarr

667

668

669def range_to_ndarray(rng: range) -> np.ndarray:

670 """

671 Cast a range object to ndarray.

672 """

673 # GH#30171 perf avoid realizing range as a list in np.array

674 try:

675 arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64")

676 except OverflowError:

677 # GH#30173 handling for ranges that overflow int64

678 if (rng.start >= 0 and rng.step > 0) or (rng.step < 0 <= rng.stop):

679 try:

680 arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64")

681 except OverflowError:

682 arr = construct_1d_object_array_from_listlike(list(rng))

683 else:

684 arr = construct_1d_object_array_from_listlike(list(rng))

685 return arr

686

687

688def _sanitize_non_ordered(data) -> None:

689 """

690 Raise only for unordered sets, e.g., not for dict_keys

691 """

692 if isinstance(data, (set, frozenset)):

693 raise TypeError(f"'{type(data).__name__}' type is unordered")

694

695

696def _sanitize_ndim(

697 result: ArrayLike,

698 data,

699 dtype: DtypeObj | None,

700 index: Index | None,

701 *,

702 allow_2d: bool = False,

703) -> ArrayLike:

704 """

705 Ensure we have a 1-dimensional result array.

706 """

707 if getattr(result, "ndim", 0) == 0:

708 raise ValueError("result should be arraylike with ndim > 0")

709

710 if result.ndim == 1:

711 # the result that we want

712 result = _maybe_repeat(result, index)

713

714 elif result.ndim > 1:

715 if isinstance(data, np.ndarray):

716 if allow_2d:

717 return result

718 raise ValueError(

719 f"Data must be 1-dimensional, got ndarray of shape {data.shape} instead"

720 )

721 if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype):

722 # i.e. NumpyEADtype("O")

723

724 result = com.asarray_tuplesafe(data, dtype=np.dtype("object"))

725 cls = dtype.construct_array_type()

726 result = cls._from_sequence(result, dtype=dtype)

727 else:

728 # error: Argument "dtype" to "asarray_tuplesafe" has incompatible type

729 # "Union[dtype[Any], ExtensionDtype, None]"; expected "Union[str,

730 # dtype[Any], None]"

731 result = com.asarray_tuplesafe(data, dtype=dtype) # type: ignore[arg-type]

732 return result

733

734

735def _sanitize_str_dtypes(

736 result: np.ndarray, data, dtype: np.dtype | None, copy: bool

737) -> np.ndarray:

738 """

739 Ensure we have a dtype that is supported by pandas.

740 """

741

742 # This is to prevent mixed-type Series getting all casted to

743 # NumPy string type, e.g. NaN --> '-1#IND'.

744 if issubclass(result.dtype.type, str):

745 # GH#16605

746 # If not empty convert the data to dtype

747 # GH#19853: If data is a scalar, result has already the result

748 if not lib.is_scalar(data):

749 if not np.all(isna(data)):

750 data = np.asarray(data, dtype=dtype)

751 if not copy:

752 result = np.asarray(data, dtype=object)

753 else:

754 result = np.array(data, dtype=object, copy=copy)

755 return result

756

757

758def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike:

759 """

760 If we have a length-1 array and an index describing how long we expect

761 the result to be, repeat the array.

762 """

763 if index is not None:

764 if 1 == len(arr) != len(index):

765 arr = arr.repeat(len(index))

766 return arr

767

768

769def _try_cast(

770 arr: list | np.ndarray,

771 dtype: np.dtype,

772 copy: bool,

773) -> ArrayLike:

774 """

775 Convert input to numpy ndarray and optionally cast to a given dtype.

776

777 Parameters

778 ----------

779 arr : ndarray or list

780 Excludes: ExtensionArray, Series, Index.

781 dtype : np.dtype

782 copy : bool

783 If False, don't copy the data if not needed.

784

785 Returns

786 -------

787 np.ndarray or ExtensionArray

788 """

789 is_ndarray = isinstance(arr, np.ndarray)

790

791 if dtype == object:

792 if not is_ndarray:

793 subarr = construct_1d_object_array_from_listlike(arr)

794 return subarr

795 return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy)

796

797 elif dtype.kind == "U":

798 # TODO: test cases with arr.dtype.kind in "mM"

799 if is_ndarray:

800 arr = cast(np.ndarray, arr)

801 shape = arr.shape

802 if arr.ndim > 1:

803 arr = arr.ravel()

804 else:

805 shape = (len(arr),)

806 return lib.ensure_string_array(arr, convert_na_value=False, copy=copy).reshape(

807 shape

808 )

809

810 elif dtype.kind in "mM":

811 return maybe_cast_to_datetime(arr, dtype)

812

813 # GH#15832: Check if we are requesting a numeric dtype and

814 # that we can convert the data to the requested dtype.

815 elif dtype.kind in "iu":

816 # this will raise if we have e.g. floats

817

818 subarr = maybe_cast_to_integer_array(arr, dtype)

819 elif not copy:

820 subarr = np.asarray(arr, dtype=dtype)

821 else:

822 subarr = np.array(arr, dtype=dtype, copy=copy)

823

824 return subarr