Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/construction.py: 14%

1"""

2Constructor functions intended to be shared by pd.array, Series.__init__,

3and Index.__new__.

5These should not depend on core.internals.

6"""

7from __future__ import annotations

9from typing import (

10 TYPE_CHECKING,

11 Optional,

12 Sequence,

13 Union,

14 cast,

15 overload,

16)

18import numpy as np

19from numpy import ma

21from pandas._libs import lib

22from pandas._libs.tslibs.period import Period

23from pandas._typing import (

24 AnyArrayLike,

25 ArrayLike,

26 Dtype,

27 DtypeObj,

28 T,

29)

31from pandas.core.dtypes.base import (

32 ExtensionDtype,

33 _registry as registry,

34)

35from pandas.core.dtypes.cast import (

36 construct_1d_arraylike_from_scalar,

37 construct_1d_object_array_from_listlike,

38 maybe_cast_to_datetime,

39 maybe_cast_to_integer_array,

40 maybe_convert_platform,

41 maybe_infer_to_datetimelike,

42 maybe_promote,

43)

44from pandas.core.dtypes.common import (

45 is_datetime64_ns_dtype,

46 is_dtype_equal,

47 is_extension_array_dtype,

48 is_integer_dtype,

49 is_list_like,

50 is_object_dtype,

51 is_timedelta64_ns_dtype,

52)

53from pandas.core.dtypes.dtypes import PandasDtype

54from pandas.core.dtypes.generic import (

55 ABCDataFrame,

56 ABCExtensionArray,

57 ABCIndex,

58 ABCPandasArray,

59 ABCRangeIndex,

60 ABCSeries,

61)

62from pandas.core.dtypes.missing import isna

64import pandas.core.common as com

66if TYPE_CHECKING:

67 from pandas import (

68 Index,

69 Series,

70 )

71 from pandas.core.arrays.base import ExtensionArray

74def array(

75 data: Sequence[object] | AnyArrayLike,

76 dtype: Dtype | None = None,

77 copy: bool = True,

78) -> ExtensionArray:

79 """

80 Create an array.

82 Parameters

83 ----------

84 data : Sequence of objects

85 The scalars inside `data` should be instances of the

86 scalar type for `dtype`. It's expected that `data`

87 represents a 1-dimensional array of data.

89 When `data` is an Index or Series, the underlying array

90 will be extracted from `data`.

92 dtype : str, np.dtype, or ExtensionDtype, optional

93 The dtype to use for the array. This may be a NumPy

94 dtype or an extension type registered with pandas using

95 :meth:`pandas.api.extensions.register_extension_dtype`.

97 If not specified, there are two possibilities:

99 1. When `data` is a :class:`Series`, :class:`Index`, or

100 :class:`ExtensionArray`, the `dtype` will be taken

101 from the data.

102 2. Otherwise, pandas will attempt to infer the `dtype`

103 from the data.

104

105 Note that when `data` is a NumPy array, ``data.dtype`` is

106 *not* used for inferring the array type. This is because

107 NumPy cannot represent all the types of data that can be

108 held in extension arrays.

109

110 Currently, pandas will infer an extension dtype for sequences of

111

112 ============================== =======================================

113 Scalar Type Array Type

114 ============================== =======================================

115 :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray`

116 :class:`pandas.Period` :class:`pandas.arrays.PeriodArray`

117 :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray`

118 :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray`

119 :class:`int` :class:`pandas.arrays.IntegerArray`

120 :class:`float` :class:`pandas.arrays.FloatingArray`

121 :class:`str` :class:`pandas.arrays.StringArray` or

122 :class:`pandas.arrays.ArrowStringArray`

123 :class:`bool` :class:`pandas.arrays.BooleanArray`

124 ============================== =======================================

125

126 The ExtensionArray created when the scalar type is :class:`str` is determined by

127 ``pd.options.mode.string_storage`` if the dtype is not explicitly given.

128

129 For all other cases, NumPy's usual inference rules will be used.

130

131 .. versionchanged:: 1.2.0

132

133 Pandas now also infers nullable-floating dtype for float-like

134 input data

135

136 copy : bool, default True

137 Whether to copy the data, even if not necessary. Depending

138 on the type of `data`, creating the new array may require

139 copying data, even if ``copy=False``.

140

141 Returns

142 -------

143 ExtensionArray

144 The newly created array.

145

146 Raises

147 ------

148 ValueError

149 When `data` is not 1-dimensional.

150

151 See Also

152 --------

153 numpy.array : Construct a NumPy array.

154 Series : Construct a pandas Series.

155 Index : Construct a pandas Index.

156 arrays.PandasArray : ExtensionArray wrapping a NumPy array.

157 Series.array : Extract the array stored within a Series.

158

159 Notes

160 -----

161 Omitting the `dtype` argument means pandas will attempt to infer the

162 best array type from the values in the data. As new array types are

163 added by pandas and 3rd party libraries, the "best" array type may

164 change. We recommend specifying `dtype` to ensure that

165

166 1. the correct array type for the data is returned

167 2. the returned array type doesn't change as new extension types

168 are added by pandas and third-party libraries

169

170 Additionally, if the underlying memory representation of the returned

171 array matters, we recommend specifying the `dtype` as a concrete object

172 rather than a string alias or allowing it to be inferred. For example,

173 a future version of pandas or a 3rd-party library may include a

174 dedicated ExtensionArray for string data. In this event, the following

175 would no longer return a :class:`arrays.PandasArray` backed by a NumPy

176 array.

177

178 >>> pd.array(['a', 'b'], dtype=str)

179 <PandasArray>

180 ['a', 'b']

181 Length: 2, dtype: str32

182

183 This would instead return the new ExtensionArray dedicated for string

184 data. If you really need the new array to be backed by a NumPy array,

185 specify that in the dtype.

186

187 >>> pd.array(['a', 'b'], dtype=np.dtype("<U1"))

188 <PandasArray>

189 ['a', 'b']

190 Length: 2, dtype: str32

191

192 Finally, Pandas has arrays that mostly overlap with NumPy

193

194 * :class:`arrays.DatetimeArray`

195 * :class:`arrays.TimedeltaArray`

196

197 When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is

198 passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray``

199 rather than a ``PandasArray``. This is for symmetry with the case of

200 timezone-aware data, which NumPy does not natively support.

201

202 >>> pd.array(['2015', '2016'], dtype='datetime64[ns]')

203 <DatetimeArray>

204 ['2015-01-01 00:00:00', '2016-01-01 00:00:00']

205 Length: 2, dtype: datetime64[ns]

206

207 >>> pd.array(["1H", "2H"], dtype='timedelta64[ns]')

208 <TimedeltaArray>

209 ['0 days 01:00:00', '0 days 02:00:00']

210 Length: 2, dtype: timedelta64[ns]

211

212 Examples

213 --------

214 If a dtype is not specified, pandas will infer the best dtype from the values.

215 See the description of `dtype` for the types pandas infers for.

216

217 >>> pd.array([1, 2])

218 <IntegerArray>

219 [1, 2]

220 Length: 2, dtype: Int64

221

222 >>> pd.array([1, 2, np.nan])

223 <IntegerArray>

224 [1, 2, <NA>]

225 Length: 3, dtype: Int64

226

227 >>> pd.array([1.1, 2.2])

228 <FloatingArray>

229 [1.1, 2.2]

230 Length: 2, dtype: Float64

231

232 >>> pd.array(["a", None, "c"])

233 <StringArray>

234 ['a', <NA>, 'c']

235 Length: 3, dtype: string

236

237 >>> with pd.option_context("string_storage", "pyarrow"):

238 ... arr = pd.array(["a", None, "c"])

239 ...

240 >>> arr

241 <ArrowStringArray>

242 ['a', <NA>, 'c']

243 Length: 3, dtype: string

244

245 >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")])

246 <PeriodArray>

247 ['2000-01-01', '2000-01-01']

248 Length: 2, dtype: period[D]

249

250 You can use the string alias for `dtype`

251

252 >>> pd.array(['a', 'b', 'a'], dtype='category')

253 ['a', 'b', 'a']

254 Categories (2, object): ['a', 'b']

255

256 Or specify the actual dtype

257

258 >>> pd.array(['a', 'b', 'a'],

259 ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True))

260 ['a', 'b', 'a']

261 Categories (3, object): ['a' < 'b' < 'c']

262

263 If pandas does not infer a dedicated extension type a

264 :class:`arrays.PandasArray` is returned.

265

266 >>> pd.array([1 + 1j, 3 + 2j])

267 <PandasArray>

268 [(1+1j), (3+2j)]

269 Length: 2, dtype: complex128

270

271 As mentioned in the "Notes" section, new extension types may be added

272 in the future (by pandas or 3rd party libraries), causing the return

273 value to no longer be a :class:`arrays.PandasArray`. Specify the `dtype`

274 as a NumPy dtype if you need to ensure there's no future change in

275 behavior.

276

277 >>> pd.array([1, 2], dtype=np.dtype("int32"))

278 <PandasArray>

279 [1, 2]

280 Length: 2, dtype: int32

281

282 `data` must be 1-dimensional. A ValueError is raised when the input

283 has the wrong dimensionality.

284

285 >>> pd.array(1)

286 Traceback (most recent call last):

287 ...

288 ValueError: Cannot pass scalar '1' to 'pandas.array'.

289 """

290 from pandas.core.arrays import (

291 BooleanArray,

292 DatetimeArray,

293 ExtensionArray,

294 FloatingArray,

295 IntegerArray,

296 IntervalArray,

297 PandasArray,

298 PeriodArray,

299 TimedeltaArray,

300 )

301 from pandas.core.arrays.string_ import StringDtype

302

303 if lib.is_scalar(data):

304 msg = f"Cannot pass scalar '{data}' to 'pandas.array'."

305 raise ValueError(msg)

306 elif isinstance(data, ABCDataFrame):

307 raise TypeError("Cannot pass DataFrame to 'pandas.array'")

308

309 if dtype is None and isinstance(data, (ABCSeries, ABCIndex, ExtensionArray)):

310 # Note: we exclude np.ndarray here, will do type inference on it

311 dtype = data.dtype

312

313 data = extract_array(data, extract_numpy=True)

314

315 # this returns None for not-found dtypes.

316 if isinstance(dtype, str):

317 dtype = registry.find(dtype) or dtype

318

319 if isinstance(data, ExtensionArray) and (

320 dtype is None or is_dtype_equal(dtype, data.dtype)

321 ):

322 # e.g. TimedeltaArray[s], avoid casting to PandasArray

323 if copy:

324 return data.copy()

325 return data

326

327 if is_extension_array_dtype(dtype):

328 cls = cast(ExtensionDtype, dtype).construct_array_type()

329 return cls._from_sequence(data, dtype=dtype, copy=copy)

330

331 if dtype is None:

332 inferred_dtype = lib.infer_dtype(data, skipna=True)

333 if inferred_dtype == "period":

334 period_data = cast(Union[Sequence[Optional[Period]], AnyArrayLike], data)

335 return PeriodArray._from_sequence(period_data, copy=copy)

336

337 elif inferred_dtype == "interval":

338 return IntervalArray(data, copy=copy)

339

340 elif inferred_dtype.startswith("datetime"):

341 # datetime, datetime64

342 try:

343 return DatetimeArray._from_sequence(data, copy=copy)

344 except ValueError:

345 # Mixture of timezones, fall back to PandasArray

346 pass

347

348 elif inferred_dtype.startswith("timedelta"):

349 # timedelta, timedelta64

350 return TimedeltaArray._from_sequence(data, copy=copy)

351

352 elif inferred_dtype == "string":

353 # StringArray/ArrowStringArray depending on pd.options.mode.string_storage

354 return StringDtype().construct_array_type()._from_sequence(data, copy=copy)

355

356 elif inferred_dtype == "integer":

357 return IntegerArray._from_sequence(data, copy=copy)

358

359 elif (

360 inferred_dtype in ("floating", "mixed-integer-float")

361 and getattr(data, "dtype", None) != np.float16

362 ):

363 # GH#44715 Exclude np.float16 bc FloatingArray does not support it;

364 # we will fall back to PandasArray.

365 return FloatingArray._from_sequence(data, copy=copy)

366

367 elif inferred_dtype == "boolean":

368 return BooleanArray._from_sequence(data, copy=copy)

369

370 # Pandas overrides NumPy for

371 # 1. datetime64[ns]

372 # 2. timedelta64[ns]

373 # so that a DatetimeArray is returned.

374 if is_datetime64_ns_dtype(dtype):

375 return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy)

376 elif is_timedelta64_ns_dtype(dtype):

377 return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy)

378

379 return PandasArray._from_sequence(data, dtype=dtype, copy=copy)

380

381

382@overload

383def extract_array(

384 obj: Series | Index, extract_numpy: bool = ..., extract_range: bool = ...

385) -> ArrayLike:

386 ...

387

388

389@overload

390def extract_array(

391 obj: T, extract_numpy: bool = ..., extract_range: bool = ...

392) -> T | ArrayLike:

393 ...

394

395

396def extract_array(

397 obj: T, extract_numpy: bool = False, extract_range: bool = False

398) -> T | ArrayLike:

399 """

400 Extract the ndarray or ExtensionArray from a Series or Index.

401

402 For all other types, `obj` is just returned as is.

403

404 Parameters

405 ----------

406 obj : object

407 For Series / Index, the underlying ExtensionArray is unboxed.

408

409 extract_numpy : bool, default False

410 Whether to extract the ndarray from a PandasArray.

411

412 extract_range : bool, default False

413 If we have a RangeIndex, return range._values if True

414 (which is a materialized integer ndarray), otherwise return unchanged.

415

416 Returns

417 -------

418 arr : object

419

420 Examples

421 --------

422 >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category'))

423 ['a', 'b', 'c']

424 Categories (3, object): ['a', 'b', 'c']

425

426 Other objects like lists, arrays, and DataFrames are just passed through.

427

428 >>> extract_array([1, 2, 3])

429 [1, 2, 3]

430

431 For an ndarray-backed Series / Index the ndarray is returned.

432

433 >>> extract_array(pd.Series([1, 2, 3]))

434 array([1, 2, 3])

435

436 To extract all the way down to the ndarray, pass ``extract_numpy=True``.

437

438 >>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True)

439 array([1, 2, 3])

440 """

441 if isinstance(obj, (ABCIndex, ABCSeries)):

442 if isinstance(obj, ABCRangeIndex):

443 if extract_range:

444 return obj._values

445 # https://github.com/python/mypy/issues/1081

446 # error: Incompatible return value type (got "RangeIndex", expected

447 # "Union[T, Union[ExtensionArray, ndarray[Any, Any]]]")

448 return obj # type: ignore[return-value]

449

450 return obj._values

451

452 elif extract_numpy and isinstance(obj, ABCPandasArray):

453 return obj.to_numpy()

454

455 return obj

456

457

458def ensure_wrapped_if_datetimelike(arr):

459 """

460 Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray.

461 """

462 if isinstance(arr, np.ndarray):

463 if arr.dtype.kind == "M":

464 from pandas.core.arrays import DatetimeArray

465

466 return DatetimeArray._from_sequence(arr)

467

468 elif arr.dtype.kind == "m":

469 from pandas.core.arrays import TimedeltaArray

470

471 return TimedeltaArray._from_sequence(arr)

472

473 return arr

474

475

476def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray:

477 """

478 Convert numpy MaskedArray to ensure mask is softened.

479 """

480 mask = ma.getmaskarray(data)

481 if mask.any():

482 dtype, fill_value = maybe_promote(data.dtype, np.nan)

483 dtype = cast(np.dtype, dtype)

484 # Incompatible types in assignment (expression has type "ndarray[Any,

485 # dtype[Any]]", variable has type "MaskedArray[Any, Any]")

486 data = data.astype(dtype, copy=True) # type: ignore[assignment]

487 data.soften_mask() # set hardmask False if it was True

488 data[mask] = fill_value

489 else:

490 data = data.copy()

491 return data

492

493

494def sanitize_array(

495 data,

496 index: Index | None,

497 dtype: DtypeObj | None = None,

498 copy: bool = False,

499 *,

500 allow_2d: bool = False,

501) -> ArrayLike:

502 """

503 Sanitize input data to an ndarray or ExtensionArray, copy if specified,

504 coerce to the dtype if specified.

505

506 Parameters

507 ----------

508 data : Any

509 index : Index or None, default None

510 dtype : np.dtype, ExtensionDtype, or None, default None

511 copy : bool, default False

512 allow_2d : bool, default False

513 If False, raise if we have a 2D Arraylike.

514

515 Returns

516 -------

517 np.ndarray or ExtensionArray

518 """

519 if isinstance(data, ma.MaskedArray):

520 data = sanitize_masked_array(data)

521

522 if isinstance(dtype, PandasDtype):

523 # Avoid ending up with a PandasArray

524 dtype = dtype.numpy_dtype

525

526 # extract ndarray or ExtensionArray, ensure we have no PandasArray

527 data = extract_array(data, extract_numpy=True, extract_range=True)

528

529 if isinstance(data, np.ndarray) and data.ndim == 0:

530 if dtype is None:

531 dtype = data.dtype

532 data = lib.item_from_zerodim(data)

533 elif isinstance(data, range):

534 # GH#16804

535 data = range_to_ndarray(data)

536 copy = False

537

538 if not is_list_like(data):

539 if index is None:

540 raise ValueError("index must be specified when data is not list-like")

541 data = construct_1d_arraylike_from_scalar(data, len(index), dtype)

542 return data

543

544 elif isinstance(data, ABCExtensionArray):

545 # it is already ensured above this is not a PandasArray

546 # Until GH#49309 is fixed this check needs to come before the

547 # ExtensionDtype check

548 if dtype is not None:

549 subarr = data.astype(dtype, copy=copy)

550 elif copy:

551 subarr = data.copy()

552 else:

553 subarr = data

554

555 elif isinstance(dtype, ExtensionDtype):

556 # create an extension array from its dtype

557 _sanitize_non_ordered(data)

558 cls = dtype.construct_array_type()

559 subarr = cls._from_sequence(data, dtype=dtype, copy=copy)

560

561 # GH#846

562 elif isinstance(data, np.ndarray):

563 if isinstance(data, np.matrix):

564 data = data.A

565

566 if dtype is None:

567 subarr = data

568 if data.dtype == object:

569 subarr = maybe_infer_to_datetimelike(data)

570

571 if subarr is data and copy:

572 subarr = subarr.copy()

573

574 else:

575 # we will try to copy by-definition here

576 subarr = _try_cast(data, dtype, copy)

577

578 elif hasattr(data, "__array__"):

579 # e.g. dask array GH#38645

580 data = np.array(data, copy=copy)

581 return sanitize_array(

582 data,

583 index=index,

584 dtype=dtype,

585 copy=False,

586 allow_2d=allow_2d,

587 )

588

589 else:

590 _sanitize_non_ordered(data)

591 # materialize e.g. generators, convert e.g. tuples, abc.ValueView

592 data = list(data)

593

594 if len(data) == 0 and dtype is None:

595 # We default to float64, matching numpy

596 subarr = np.array([], dtype=np.float64)

597

598 elif dtype is not None:

599 subarr = _try_cast(data, dtype, copy)

600

601 else:

602 subarr = maybe_convert_platform(data)

603 if subarr.dtype == object:

604 subarr = cast(np.ndarray, subarr)

605 subarr = maybe_infer_to_datetimelike(subarr)

606

607 subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d)

608

609 if isinstance(subarr, np.ndarray):

610 # at this point we should have dtype be None or subarr.dtype == dtype

611 dtype = cast(np.dtype, dtype)

612 subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)

613

614 return subarr

615

616

617def range_to_ndarray(rng: range) -> np.ndarray:

618 """

619 Cast a range object to ndarray.

620 """

621 # GH#30171 perf avoid realizing range as a list in np.array

622 try:

623 arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64")

624 except OverflowError:

625 # GH#30173 handling for ranges that overflow int64

626 if (rng.start >= 0 and rng.step > 0) or (rng.step < 0 <= rng.stop):

627 try:

628 arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64")

629 except OverflowError:

630 arr = construct_1d_object_array_from_listlike(list(rng))

631 else:

632 arr = construct_1d_object_array_from_listlike(list(rng))

633 return arr

634

635

636def _sanitize_non_ordered(data) -> None:

637 """

638 Raise only for unordered sets, e.g., not for dict_keys

639 """

640 if isinstance(data, (set, frozenset)):

641 raise TypeError(f"'{type(data).__name__}' type is unordered")

642

643

644def _sanitize_ndim(

645 result: ArrayLike,

646 data,

647 dtype: DtypeObj | None,

648 index: Index | None,

649 *,

650 allow_2d: bool = False,

651) -> ArrayLike:

652 """

653 Ensure we have a 1-dimensional result array.

654 """

655 if getattr(result, "ndim", 0) == 0:

656 raise ValueError("result should be arraylike with ndim > 0")

657

658 if result.ndim == 1:

659 # the result that we want

660 result = _maybe_repeat(result, index)

661

662 elif result.ndim > 1:

663 if isinstance(data, np.ndarray):

664 if allow_2d:

665 return result

666 raise ValueError(

667 f"Data must be 1-dimensional, got ndarray of shape {data.shape} instead"

668 )

669 if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype):

670 # i.e. PandasDtype("O")

671

672 result = com.asarray_tuplesafe(data, dtype=np.dtype("object"))

673 cls = dtype.construct_array_type()

674 result = cls._from_sequence(result, dtype=dtype)

675 else:

676 # error: Argument "dtype" to "asarray_tuplesafe" has incompatible type

677 # "Union[dtype[Any], ExtensionDtype, None]"; expected "Union[str,

678 # dtype[Any], None]"

679 result = com.asarray_tuplesafe(data, dtype=dtype) # type: ignore[arg-type]

680 return result

681

682

683def _sanitize_str_dtypes(

684 result: np.ndarray, data, dtype: np.dtype | None, copy: bool

685) -> np.ndarray:

686 """

687 Ensure we have a dtype that is supported by pandas.

688 """

689

690 # This is to prevent mixed-type Series getting all casted to

691 # NumPy string type, e.g. NaN --> '-1#IND'.

692 if issubclass(result.dtype.type, str):

693 # GH#16605

694 # If not empty convert the data to dtype

695 # GH#19853: If data is a scalar, result has already the result

696 if not lib.is_scalar(data):

697 if not np.all(isna(data)):

698 data = np.array(data, dtype=dtype, copy=False)

699 result = np.array(data, dtype=object, copy=copy)

700 return result

701

702

703def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike:

704 """

705 If we have a length-1 array and an index describing how long we expect

706 the result to be, repeat the array.

707 """

708 if index is not None:

709 if 1 == len(arr) != len(index):

710 arr = arr.repeat(len(index))

711 return arr

712

713

714def _try_cast(

715 arr: list | np.ndarray,

716 dtype: np.dtype,

717 copy: bool,

718) -> ArrayLike:

719 """

720 Convert input to numpy ndarray and optionally cast to a given dtype.

721

722 Parameters

723 ----------

724 arr : ndarray or list

725 Excludes: ExtensionArray, Series, Index.

726 dtype : np.dtype

727 copy : bool

728 If False, don't copy the data if not needed.

729

730 Returns

731 -------

732 np.ndarray or ExtensionArray

733 """

734 is_ndarray = isinstance(arr, np.ndarray)

735

736 if is_object_dtype(dtype):

737 if not is_ndarray:

738 subarr = construct_1d_object_array_from_listlike(arr)

739 return subarr

740 return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy)

741

742 elif dtype.kind == "U":

743 # TODO: test cases with arr.dtype.kind in ["m", "M"]

744 if is_ndarray:

745 arr = cast(np.ndarray, arr)

746 shape = arr.shape

747 if arr.ndim > 1:

748 arr = arr.ravel()

749 else:

750 shape = (len(arr),)

751 return lib.ensure_string_array(arr, convert_na_value=False, copy=copy).reshape(

752 shape

753 )

754

755 elif dtype.kind in ["m", "M"]:

756 return maybe_cast_to_datetime(arr, dtype)

757

758 # GH#15832: Check if we are requesting a numeric dtype and

759 # that we can convert the data to the requested dtype.

760 elif is_integer_dtype(dtype):

761 # this will raise if we have e.g. floats

762

763 subarr = maybe_cast_to_integer_array(arr, dtype)

764 else:

765 subarr = np.array(arr, dtype=dtype, copy=copy)

766

767 return subarr