Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/dtypes/cast.py: 32%

1"""

2Routines for casting.

3"""

5from __future__ import annotations

7import datetime as dt

8import functools

9from typing import (

10 TYPE_CHECKING,

11 Any,

12 Literal,

13 TypeVar,

14 cast,

15 overload,

16)

17import warnings

19import numpy as np

21from pandas._config import using_pyarrow_string_dtype

23from pandas._libs import (

24 Interval,

25 Period,

26 lib,

27)

28from pandas._libs.missing import (

29 NA,

30 NAType,

31 checknull,

32)

33from pandas._libs.tslibs import (

34 NaT,

35 OutOfBoundsDatetime,

36 OutOfBoundsTimedelta,

37 Timedelta,

38 Timestamp,

39 is_supported_dtype,

40)

41from pandas._libs.tslibs.timedeltas import array_to_timedelta64

42from pandas.errors import (

43 IntCastingNaNError,

44 LossySetitemError,

45)

47from pandas.core.dtypes.common import (

48 ensure_int8,

49 ensure_int16,

50 ensure_int32,

51 ensure_int64,

52 ensure_object,

53 ensure_str,

54 is_bool,

55 is_complex,

56 is_float,

57 is_integer,

58 is_object_dtype,

59 is_scalar,

60 is_string_dtype,

61 pandas_dtype as pandas_dtype_func,

62)

63from pandas.core.dtypes.dtypes import (

64 ArrowDtype,

65 BaseMaskedDtype,

66 CategoricalDtype,

67 DatetimeTZDtype,

68 ExtensionDtype,

69 IntervalDtype,

70 PandasExtensionDtype,

71 PeriodDtype,

72)

73from pandas.core.dtypes.generic import (

74 ABCExtensionArray,

75 ABCIndex,

76 ABCSeries,

77)

78from pandas.core.dtypes.inference import is_list_like

79from pandas.core.dtypes.missing import (

80 is_valid_na_for_dtype,

81 isna,

82 na_value_for_dtype,

83 notna,

84)

86from pandas.io._util import _arrow_dtype_mapping

88if TYPE_CHECKING:

89 from collections.abc import (

90 Sequence,

91 Sized,

92 )

94 from pandas._typing import (

95 ArrayLike,

96 Dtype,

97 DtypeObj,

98 NumpyIndexT,

99 Scalar,

100 npt,

101 )

102

103 from pandas import Index

104 from pandas.core.arrays import (

105 Categorical,

106 DatetimeArray,

107 ExtensionArray,

108 IntervalArray,

109 PeriodArray,

110 TimedeltaArray,

111 )

112

113

114_int8_max = np.iinfo(np.int8).max

115_int16_max = np.iinfo(np.int16).max

116_int32_max = np.iinfo(np.int32).max

117

118_dtype_obj = np.dtype(object)

119

120NumpyArrayT = TypeVar("NumpyArrayT", bound=np.ndarray)

121

122

123def maybe_convert_platform(

124 values: list | tuple | range | np.ndarray | ExtensionArray,

125) -> ArrayLike:

126 """try to do platform conversion, allow ndarray or list here"""

127 arr: ArrayLike

128

129 if isinstance(values, (list, tuple, range)):

130 arr = construct_1d_object_array_from_listlike(values)

131 else:

132 # The caller is responsible for ensuring that we have np.ndarray

133 # or ExtensionArray here.

134 arr = values

135

136 if arr.dtype == _dtype_obj:

137 arr = cast(np.ndarray, arr)

138 arr = lib.maybe_convert_objects(arr)

139

140 return arr

141

142

143def is_nested_object(obj) -> bool:

144 """

145 return a boolean if we have a nested object, e.g. a Series with 1 or

146 more Series elements

147

148 This may not be necessarily be performant.

149

150 """

151 return bool(

152 isinstance(obj, ABCSeries)

153 and is_object_dtype(obj.dtype)

154 and any(isinstance(v, ABCSeries) for v in obj._values)

155 )

156

157

158def maybe_box_datetimelike(value: Scalar, dtype: Dtype | None = None) -> Scalar:

159 """

160 Cast scalar to Timestamp or Timedelta if scalar is datetime-like

161 and dtype is not object.

162

163 Parameters

164 ----------

165 value : scalar

166 dtype : Dtype, optional

167

168 Returns

169 -------

170 scalar

171 """

172 if dtype == _dtype_obj:

173 pass

174 elif isinstance(value, (np.datetime64, dt.datetime)):

175 value = Timestamp(value)

176 elif isinstance(value, (np.timedelta64, dt.timedelta)):

177 value = Timedelta(value)

178

179 return value

180

181

182def maybe_box_native(value: Scalar | None | NAType) -> Scalar | None | NAType:

183 """

184 If passed a scalar cast the scalar to a python native type.

185

186 Parameters

187 ----------

188 value : scalar or Series

189

190 Returns

191 -------

192 scalar or Series

193 """

194 if is_float(value):

195 value = float(value)

196 elif is_integer(value):

197 value = int(value)

198 elif is_bool(value):

199 value = bool(value)

200 elif isinstance(value, (np.datetime64, np.timedelta64)):

201 value = maybe_box_datetimelike(value)

202 elif value is NA:

203 value = None

204 return value

205

206

207def _maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:

208 """

209 Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting

210 into a numpy array. Failing to unbox would risk dropping nanoseconds.

211

212 Notes

213 -----

214 Caller is responsible for checking dtype.kind in "mM"

215 """

216 if is_valid_na_for_dtype(value, dtype):

217 # GH#36541: can't fill array directly with pd.NaT

218 # > np.empty(10, dtype="datetime64[ns]").fill(pd.NaT)

219 # ValueError: cannot convert float NaN to integer

220 value = dtype.type("NaT", "ns")

221 elif isinstance(value, Timestamp):

222 if value.tz is None:

223 value = value.to_datetime64()

224 elif not isinstance(dtype, DatetimeTZDtype):

225 raise TypeError("Cannot unbox tzaware Timestamp to tznaive dtype")

226 elif isinstance(value, Timedelta):

227 value = value.to_timedelta64()

228

229 _disallow_mismatched_datetimelike(value, dtype)

230 return value

231

232

233def _disallow_mismatched_datetimelike(value, dtype: DtypeObj):

234 """

235 numpy allows np.array(dt64values, dtype="timedelta64[ns]") and

236 vice-versa, but we do not want to allow this, so we need to

237 check explicitly

238 """

239 vdtype = getattr(value, "dtype", None)

240 if vdtype is None:

241 return

242 elif (vdtype.kind == "m" and dtype.kind == "M") or (

243 vdtype.kind == "M" and dtype.kind == "m"

244 ):

245 raise TypeError(f"Cannot cast {repr(value)} to {dtype}")

246

247

248@overload

249def maybe_downcast_to_dtype(result: np.ndarray, dtype: str | np.dtype) -> np.ndarray:

250 ...

251

252

253@overload

254def maybe_downcast_to_dtype(result: ExtensionArray, dtype: str | np.dtype) -> ArrayLike:

255 ...

256

257

258def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLike:

259 """

260 try to cast to the specified dtype (e.g. convert back to bool/int

261 or could be an astype of float64->float32

262 """

263 if isinstance(result, ABCSeries):

264 result = result._values

265 do_round = False

266

267 if isinstance(dtype, str):

268 if dtype == "infer":

269 inferred_type = lib.infer_dtype(result, skipna=False)

270 if inferred_type == "boolean":

271 dtype = "bool"

272 elif inferred_type == "integer":

273 dtype = "int64"

274 elif inferred_type == "datetime64":

275 dtype = "datetime64[ns]"

276 elif inferred_type in ["timedelta", "timedelta64"]:

277 dtype = "timedelta64[ns]"

278

279 # try to upcast here

280 elif inferred_type == "floating":

281 dtype = "int64"

282 if issubclass(result.dtype.type, np.number):

283 do_round = True

284

285 else:

286 # TODO: complex? what if result is already non-object?

287 dtype = "object"

288

289 dtype = np.dtype(dtype)

290

291 if not isinstance(dtype, np.dtype):

292 # enforce our signature annotation

293 raise TypeError(dtype) # pragma: no cover

294

295 converted = maybe_downcast_numeric(result, dtype, do_round)

296 if converted is not result:

297 return converted

298

299 # a datetimelike

300 # GH12821, iNaT is cast to float

301 if dtype.kind in "mM" and result.dtype.kind in "if":

302 result = result.astype(dtype)

303

304 elif dtype.kind == "m" and result.dtype == _dtype_obj:

305 # test_where_downcast_to_td64

306 result = cast(np.ndarray, result)

307 result = array_to_timedelta64(result)

308

309 elif dtype == np.dtype("M8[ns]") and result.dtype == _dtype_obj:

310 result = cast(np.ndarray, result)

311 return np.asarray(maybe_cast_to_datetime(result, dtype=dtype))

312

313 return result

314

315

316@overload

317def maybe_downcast_numeric(

318 result: np.ndarray, dtype: np.dtype, do_round: bool = False

319) -> np.ndarray:

320 ...

321

322

323@overload

324def maybe_downcast_numeric(

325 result: ExtensionArray, dtype: DtypeObj, do_round: bool = False

326) -> ArrayLike:

327 ...

328

329

330def maybe_downcast_numeric(

331 result: ArrayLike, dtype: DtypeObj, do_round: bool = False

332) -> ArrayLike:

333 """

334 Subset of maybe_downcast_to_dtype restricted to numeric dtypes.

335

336 Parameters

337 ----------

338 result : ndarray or ExtensionArray

339 dtype : np.dtype or ExtensionDtype

340 do_round : bool

341

342 Returns

343 -------

344 ndarray or ExtensionArray

345 """

346 if not isinstance(dtype, np.dtype) or not isinstance(result.dtype, np.dtype):

347 # e.g. SparseDtype has no itemsize attr

348 return result

349

350 def trans(x):

351 if do_round:

352 return x.round()

353 return x

354

355 if dtype.kind == result.dtype.kind:

356 # don't allow upcasts here (except if empty)

357 if result.dtype.itemsize <= dtype.itemsize and result.size:

358 return result

359

360 if dtype.kind in "biu":

361 if not result.size:

362 # if we don't have any elements, just astype it

363 return trans(result).astype(dtype)

364

365 if isinstance(result, np.ndarray):

366 element = result.item(0)

367 else:

368 element = result.iloc[0]

369 if not isinstance(element, (np.integer, np.floating, int, float, bool)):

370 # a comparable, e.g. a Decimal may slip in here

371 return result

372

373 if (

374 issubclass(result.dtype.type, (np.object_, np.number))

375 and notna(result).all()

376 ):

377 new_result = trans(result).astype(dtype)

378 if new_result.dtype.kind == "O" or result.dtype.kind == "O":

379 # np.allclose may raise TypeError on object-dtype

380 if (new_result == result).all():

381 return new_result

382 else:

383 if np.allclose(new_result, result, rtol=0):

384 return new_result

385

386 elif (

387 issubclass(dtype.type, np.floating)

388 and result.dtype.kind != "b"

389 and not is_string_dtype(result.dtype)

390 ):

391 with warnings.catch_warnings():

392 warnings.filterwarnings(

393 "ignore", "overflow encountered in cast", RuntimeWarning

394 )

395 new_result = result.astype(dtype)

396

397 # Adjust tolerances based on floating point size

398 size_tols = {4: 5e-4, 8: 5e-8, 16: 5e-16}

399

400 atol = size_tols.get(new_result.dtype.itemsize, 0.0)

401

402 # Check downcast float values are still equal within 7 digits when

403 # converting from float64 to float32

404 if np.allclose(new_result, result, equal_nan=True, rtol=0.0, atol=atol):

405 return new_result

406

407 elif dtype.kind == result.dtype.kind == "c":

408 new_result = result.astype(dtype)

409

410 if np.array_equal(new_result, result, equal_nan=True):

411 # TODO: use tolerance like we do for float?

412 return new_result

413

414 return result

415

416

417def maybe_upcast_numeric_to_64bit(arr: NumpyIndexT) -> NumpyIndexT:

418 """

419 If array is a int/uint/float bit size lower than 64 bit, upcast it to 64 bit.

420

421 Parameters

422 ----------

423 arr : ndarray or ExtensionArray

424

425 Returns

426 -------

427 ndarray or ExtensionArray

428 """

429 dtype = arr.dtype

430 if dtype.kind == "i" and dtype != np.int64:

431 return arr.astype(np.int64)

432 elif dtype.kind == "u" and dtype != np.uint64:

433 return arr.astype(np.uint64)

434 elif dtype.kind == "f" and dtype != np.float64:

435 return arr.astype(np.float64)

436 else:

437 return arr

438

439

440def maybe_cast_pointwise_result(

441 result: ArrayLike,

442 dtype: DtypeObj,

443 numeric_only: bool = False,

444 same_dtype: bool = True,

445) -> ArrayLike:

446 """

447 Try casting result of a pointwise operation back to the original dtype if

448 appropriate.

449

450 Parameters

451 ----------

452 result : array-like

453 Result to cast.

454 dtype : np.dtype or ExtensionDtype

455 Input Series from which result was calculated.

456 numeric_only : bool, default False

457 Whether to cast only numerics or datetimes as well.

458 same_dtype : bool, default True

459 Specify dtype when calling _from_sequence

460

461 Returns

462 -------

463 result : array-like

464 result maybe casted to the dtype.

465 """

466

467 if isinstance(dtype, ExtensionDtype):

468 cls = dtype.construct_array_type()

469 if same_dtype:

470 result = _maybe_cast_to_extension_array(cls, result, dtype=dtype)

471 else:

472 result = _maybe_cast_to_extension_array(cls, result)

473

474 elif (numeric_only and dtype.kind in "iufcb") or not numeric_only:

475 result = maybe_downcast_to_dtype(result, dtype)

476

477 return result

478

479

480def _maybe_cast_to_extension_array(

481 cls: type[ExtensionArray], obj: ArrayLike, dtype: ExtensionDtype | None = None

482) -> ArrayLike:

483 """

484 Call to `_from_sequence` that returns the object unchanged on Exception.

485

486 Parameters

487 ----------

488 cls : class, subclass of ExtensionArray

489 obj : arraylike

490 Values to pass to cls._from_sequence

491 dtype : ExtensionDtype, optional

492

493 Returns

494 -------

495 ExtensionArray or obj

496 """

497 result: ArrayLike

498

499 if dtype is not None:

500 try:

501 result = cls._from_scalars(obj, dtype=dtype)

502 except (TypeError, ValueError):

503 return obj

504 return result

505

506 try:

507 result = cls._from_sequence(obj, dtype=dtype)

508 except Exception:

509 # We can't predict what downstream EA constructors may raise

510 result = obj

511 return result

512

513

514@overload

515def ensure_dtype_can_hold_na(dtype: np.dtype) -> np.dtype:

516 ...

517

518

519@overload

520def ensure_dtype_can_hold_na(dtype: ExtensionDtype) -> ExtensionDtype:

521 ...

522

523

524def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj:

525 """

526 If we have a dtype that cannot hold NA values, find the best match that can.

527 """

528 if isinstance(dtype, ExtensionDtype):

529 if dtype._can_hold_na:

530 return dtype

531 elif isinstance(dtype, IntervalDtype):

532 # TODO(GH#45349): don't special-case IntervalDtype, allow

533 # overriding instead of returning object below.

534 return IntervalDtype(np.float64, closed=dtype.closed)

535 return _dtype_obj

536 elif dtype.kind == "b":

537 return _dtype_obj

538 elif dtype.kind in "iu":

539 return np.dtype(np.float64)

540 return dtype

541

542

543_canonical_nans = {

544 np.datetime64: np.datetime64("NaT", "ns"),

545 np.timedelta64: np.timedelta64("NaT", "ns"),

546 type(np.nan): np.nan,

547}

548

549

550def maybe_promote(dtype: np.dtype, fill_value=np.nan):

551 """

552 Find the minimal dtype that can hold both the given dtype and fill_value.

553

554 Parameters

555 ----------

556 dtype : np.dtype

557 fill_value : scalar, default np.nan

558

559 Returns

560 -------

561 dtype

562 Upcasted from dtype argument if necessary.

563 fill_value

564 Upcasted from fill_value argument if necessary.

565

566 Raises

567 ------

568 ValueError

569 If fill_value is a non-scalar and dtype is not object.

570 """

571 orig = fill_value

572 orig_is_nat = False

573 if checknull(fill_value):

574 # https://github.com/pandas-dev/pandas/pull/39692#issuecomment-1441051740

575 # avoid cache misses with NaN/NaT values that are not singletons

576 if fill_value is not NA:

577 try:

578 orig_is_nat = np.isnat(fill_value)

579 except TypeError:

580 pass

581

582 fill_value = _canonical_nans.get(type(fill_value), fill_value)

583

584 # for performance, we are using a cached version of the actual implementation

585 # of the function in _maybe_promote. However, this doesn't always work (in case

586 # of non-hashable arguments), so we fallback to the actual implementation if needed

587 try:

588 # error: Argument 3 to "__call__" of "_lru_cache_wrapper" has incompatible type

589 # "Type[Any]"; expected "Hashable" [arg-type]

590 dtype, fill_value = _maybe_promote_cached(

591 dtype, fill_value, type(fill_value) # type: ignore[arg-type]

592 )

593 except TypeError:

594 # if fill_value is not hashable (required for caching)

595 dtype, fill_value = _maybe_promote(dtype, fill_value)

596

597 if (dtype == _dtype_obj and orig is not None) or (

598 orig_is_nat and np.datetime_data(orig)[0] != "ns"

599 ):

600 # GH#51592,53497 restore our potentially non-canonical fill_value

601 fill_value = orig

602 return dtype, fill_value

603

604

605@functools.lru_cache

606def _maybe_promote_cached(dtype, fill_value, fill_value_type):

607 # The cached version of _maybe_promote below

608 # This also use fill_value_type as (unused) argument to use this in the

609 # cache lookup -> to differentiate 1 and True

610 return _maybe_promote(dtype, fill_value)

611

612

613def _maybe_promote(dtype: np.dtype, fill_value=np.nan):

614 # The actual implementation of the function, use `maybe_promote` above for

615 # a cached version.

616 if not is_scalar(fill_value):

617 # with object dtype there is nothing to promote, and the user can

618 # pass pretty much any weird fill_value they like

619 if dtype != object:

620 # with object dtype there is nothing to promote, and the user can

621 # pass pretty much any weird fill_value they like

622 raise ValueError("fill_value must be a scalar")

623 dtype = _dtype_obj

624 return dtype, fill_value

625

626 if is_valid_na_for_dtype(fill_value, dtype) and dtype.kind in "iufcmM":

627 dtype = ensure_dtype_can_hold_na(dtype)

628 fv = na_value_for_dtype(dtype)

629 return dtype, fv

630

631 elif isinstance(dtype, CategoricalDtype):

632 if fill_value in dtype.categories or isna(fill_value):

633 return dtype, fill_value

634 else:

635 return object, ensure_object(fill_value)

636

637 elif isna(fill_value):

638 dtype = _dtype_obj

639 if fill_value is None:

640 # but we retain e.g. pd.NA

641 fill_value = np.nan

642 return dtype, fill_value

643

644 # returns tuple of (dtype, fill_value)

645 if issubclass(dtype.type, np.datetime64):

646 inferred, fv = infer_dtype_from_scalar(fill_value)

647 if inferred == dtype:

648 return dtype, fv

649

650 from pandas.core.arrays import DatetimeArray

651

652 dta = DatetimeArray._from_sequence([], dtype="M8[ns]")

653 try:

654 fv = dta._validate_setitem_value(fill_value)

655 return dta.dtype, fv

656 except (ValueError, TypeError):

657 return _dtype_obj, fill_value

658

659 elif issubclass(dtype.type, np.timedelta64):

660 inferred, fv = infer_dtype_from_scalar(fill_value)

661 if inferred == dtype:

662 return dtype, fv

663

664 elif inferred.kind == "m":

665 # different unit, e.g. passed np.timedelta64(24, "h") with dtype=m8[ns]

666 # see if we can losslessly cast it to our dtype

667 unit = np.datetime_data(dtype)[0]

668 try:

669 td = Timedelta(fill_value).as_unit(unit, round_ok=False)

670 except OutOfBoundsTimedelta:

671 return _dtype_obj, fill_value

672 else:

673 return dtype, td.asm8

674

675 return _dtype_obj, fill_value

676

677 elif is_float(fill_value):

678 if issubclass(dtype.type, np.bool_):

679 dtype = np.dtype(np.object_)

680

681 elif issubclass(dtype.type, np.integer):

682 dtype = np.dtype(np.float64)

683

684 elif dtype.kind == "f":

685 mst = np.min_scalar_type(fill_value)

686 if mst > dtype:

687 # e.g. mst is np.float64 and dtype is np.float32

688 dtype = mst

689

690 elif dtype.kind == "c":

691 mst = np.min_scalar_type(fill_value)

692 dtype = np.promote_types(dtype, mst)

693

694 elif is_bool(fill_value):

695 if not issubclass(dtype.type, np.bool_):

696 dtype = np.dtype(np.object_)

697

698 elif is_integer(fill_value):

699 if issubclass(dtype.type, np.bool_):

700 dtype = np.dtype(np.object_)

701

702 elif issubclass(dtype.type, np.integer):

703 if not np_can_cast_scalar(fill_value, dtype): # type: ignore[arg-type]

704 # upcast to prevent overflow

705 mst = np.min_scalar_type(fill_value)

706 dtype = np.promote_types(dtype, mst)

707 if dtype.kind == "f":

708 # Case where we disagree with numpy

709 dtype = np.dtype(np.object_)

710

711 elif is_complex(fill_value):

712 if issubclass(dtype.type, np.bool_):

713 dtype = np.dtype(np.object_)

714

715 elif issubclass(dtype.type, (np.integer, np.floating)):

716 mst = np.min_scalar_type(fill_value)

717 dtype = np.promote_types(dtype, mst)

718

719 elif dtype.kind == "c":

720 mst = np.min_scalar_type(fill_value)

721 if mst > dtype:

722 # e.g. mst is np.complex128 and dtype is np.complex64

723 dtype = mst

724

725 else:

726 dtype = np.dtype(np.object_)

727

728 # in case we have a string that looked like a number

729 if issubclass(dtype.type, (bytes, str)):

730 dtype = np.dtype(np.object_)

731

732 fill_value = _ensure_dtype_type(fill_value, dtype)

733 return dtype, fill_value

734

735

736def _ensure_dtype_type(value, dtype: np.dtype):

737 """

738 Ensure that the given value is an instance of the given dtype.

739

740 e.g. if out dtype is np.complex64_, we should have an instance of that

741 as opposed to a python complex object.

742

743 Parameters

744 ----------

745 value : object

746 dtype : np.dtype

747

748 Returns

749 -------

750 object

751 """

752 # Start with exceptions in which we do _not_ cast to numpy types

753

754 if dtype == _dtype_obj:

755 return value

756

757 # Note: before we get here we have already excluded isna(value)

758 return dtype.type(value)

759

760

761def infer_dtype_from(val) -> tuple[DtypeObj, Any]:

762 """

763 Interpret the dtype from a scalar or array.

764

765 Parameters

766 ----------

767 val : object

768 """

769 if not is_list_like(val):

770 return infer_dtype_from_scalar(val)

771 return infer_dtype_from_array(val)

772

773

774def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]:

775 """

776 Interpret the dtype from a scalar.

777

778 Parameters

779 ----------

780 val : object

781 """

782 dtype: DtypeObj = _dtype_obj

783

784 # a 1-element ndarray

785 if isinstance(val, np.ndarray):

786 if val.ndim != 0:

787 msg = "invalid ndarray passed to infer_dtype_from_scalar"

788 raise ValueError(msg)

789

790 dtype = val.dtype

791 val = lib.item_from_zerodim(val)

792

793 elif isinstance(val, str):

794 # If we create an empty array using a string to infer

795 # the dtype, NumPy will only allocate one character per entry

796 # so this is kind of bad. Alternately we could use np.repeat

797 # instead of np.empty (but then you still don't want things

798 # coming out as np.str_!

799

800 dtype = _dtype_obj

801 if using_pyarrow_string_dtype():

802 from pandas.core.arrays.string_ import StringDtype

803

804 dtype = StringDtype(storage="pyarrow_numpy")

805

806 elif isinstance(val, (np.datetime64, dt.datetime)):

807 try:

808 val = Timestamp(val)

809 except OutOfBoundsDatetime:

810 return _dtype_obj, val

811

812 if val is NaT or val.tz is None:

813 val = val.to_datetime64()

814 dtype = val.dtype

815 # TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes

816 else:

817 dtype = DatetimeTZDtype(unit=val.unit, tz=val.tz)

818

819 elif isinstance(val, (np.timedelta64, dt.timedelta)):

820 try:

821 val = Timedelta(val)

822 except (OutOfBoundsTimedelta, OverflowError):

823 dtype = _dtype_obj

824 else:

825 if val is NaT:

826 val = np.timedelta64("NaT", "ns")

827 else:

828 val = val.asm8

829 dtype = val.dtype

830

831 elif is_bool(val):

832 dtype = np.dtype(np.bool_)

833

834 elif is_integer(val):

835 if isinstance(val, np.integer):

836 dtype = np.dtype(type(val))

837 else:

838 dtype = np.dtype(np.int64)

839

840 try:

841 np.array(val, dtype=dtype)

842 except OverflowError:

843 dtype = np.array(val).dtype

844

845 elif is_float(val):

846 if isinstance(val, np.floating):

847 dtype = np.dtype(type(val))

848 else:

849 dtype = np.dtype(np.float64)

850

851 elif is_complex(val):

852 dtype = np.dtype(np.complex128)

853

854 if isinstance(val, Period):

855 dtype = PeriodDtype(freq=val.freq)

856 elif isinstance(val, Interval):

857 subtype = infer_dtype_from_scalar(val.left)[0]

858 dtype = IntervalDtype(subtype=subtype, closed=val.closed)

859

860 return dtype, val

861

862

863def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]:

864 """

865 Convert datetimelike-keyed dicts to a Timestamp-keyed dict.

866

867 Parameters

868 ----------

869 d: dict-like object

870

871 Returns

872 -------

873 dict

874 """

875 return {maybe_box_datetimelike(key): value for key, value in d.items()}

876

877

878def infer_dtype_from_array(arr) -> tuple[DtypeObj, ArrayLike]:

879 """

880 Infer the dtype from an array.

881

882 Parameters

883 ----------

884 arr : array

885

886 Returns

887 -------

888 tuple (pandas-compat dtype, array)

889

890

891 Examples

892 --------

893 >>> np.asarray([1, '1'])

894 array(['1', '1'], dtype='<U21')

895

896 >>> infer_dtype_from_array([1, '1'])

897 (dtype('O'), [1, '1'])

898 """

899 if isinstance(arr, np.ndarray):

900 return arr.dtype, arr

901

902 if not is_list_like(arr):

903 raise TypeError("'arr' must be list-like")

904

905 arr_dtype = getattr(arr, "dtype", None)

906 if isinstance(arr_dtype, ExtensionDtype):

907 return arr.dtype, arr

908

909 elif isinstance(arr, ABCSeries):

910 return arr.dtype, np.asarray(arr)

911

912 # don't force numpy coerce with nan's

913 inferred = lib.infer_dtype(arr, skipna=False)

914 if inferred in ["string", "bytes", "mixed", "mixed-integer"]:

915 return (np.dtype(np.object_), arr)

916

917 arr = np.asarray(arr)

918 return arr.dtype, arr

919

920

921def _maybe_infer_dtype_type(element):

922 """

923 Try to infer an object's dtype, for use in arithmetic ops.

924

925 Uses `element.dtype` if that's available.

926 Objects implementing the iterator protocol are cast to a NumPy array,

927 and from there the array's type is used.

928

929 Parameters

930 ----------

931 element : object

932 Possibly has a `.dtype` attribute, and possibly the iterator

933 protocol.

934

935 Returns

936 -------

937 tipo : type

938

939 Examples

940 --------

941 >>> from collections import namedtuple

942 >>> Foo = namedtuple("Foo", "dtype")

943 >>> _maybe_infer_dtype_type(Foo(np.dtype("i8")))

944 dtype('int64')

945 """

946 tipo = None

947 if hasattr(element, "dtype"):

948 tipo = element.dtype

949 elif is_list_like(element):

950 element = np.asarray(element)

951 tipo = element.dtype

952 return tipo

953

954

955def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None:

956 """

957 Change string like dtypes to object for

958 ``DataFrame.select_dtypes()``.

959 """

960 # error: Argument 1 to <set> has incompatible type "Type[generic]"; expected

961 # "Union[dtype[Any], ExtensionDtype, None]"

962 # error: Argument 2 to <set> has incompatible type "Type[generic]"; expected

963 # "Union[dtype[Any], ExtensionDtype, None]"

964 non_string_dtypes = dtype_set - {

965 np.dtype("S").type, # type: ignore[arg-type]

966 np.dtype("<U").type, # type: ignore[arg-type]

967 }

968 if non_string_dtypes != dtype_set:

969 raise TypeError("string dtypes are not allowed, use 'object' instead")

970

971

972def coerce_indexer_dtype(indexer, categories) -> np.ndarray:

973 """coerce the indexer input array to the smallest dtype possible"""

974 length = len(categories)

975 if length < _int8_max:

976 return ensure_int8(indexer)

977 elif length < _int16_max:

978 return ensure_int16(indexer)

979 elif length < _int32_max:

980 return ensure_int32(indexer)

981 return ensure_int64(indexer)

982

983

984def convert_dtypes(

985 input_array: ArrayLike,

986 convert_string: bool = True,

987 convert_integer: bool = True,

988 convert_boolean: bool = True,

989 convert_floating: bool = True,

990 infer_objects: bool = False,

991 dtype_backend: Literal["numpy_nullable", "pyarrow"] = "numpy_nullable",

992) -> DtypeObj:

993 """

994 Convert objects to best possible type, and optionally,

995 to types supporting ``pd.NA``.

996

997 Parameters

998 ----------

999 input_array : ExtensionArray or np.ndarray

1000 convert_string : bool, default True

1001 Whether object dtypes should be converted to ``StringDtype()``.

1002 convert_integer : bool, default True

1003 Whether, if possible, conversion can be done to integer extension types.

1004 convert_boolean : bool, defaults True

1005 Whether object dtypes should be converted to ``BooleanDtypes()``.

1006 convert_floating : bool, defaults True

1007 Whether, if possible, conversion can be done to floating extension types.

1008 If `convert_integer` is also True, preference will be give to integer

1009 dtypes if the floats can be faithfully casted to integers.

1010 infer_objects : bool, defaults False

1011 Whether to also infer objects to float/int if possible. Is only hit if the

1012 object array contains pd.NA.

1013 dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'

1014 Back-end data type applied to the resultant :class:`DataFrame`

1015 (still experimental). Behaviour is as follows:

1016

1017 * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`

1018 (default).

1019 * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`

1020 DataFrame.

1021

1022 .. versionadded:: 2.0

1023

1024 Returns

1025 -------

1026 np.dtype, or ExtensionDtype

1027 """

1028 inferred_dtype: str | DtypeObj

1029

1030 if (

1031 convert_string or convert_integer or convert_boolean or convert_floating

1032 ) and isinstance(input_array, np.ndarray):

1033 if input_array.dtype == object:

1034 inferred_dtype = lib.infer_dtype(input_array)

1035 else:

1036 inferred_dtype = input_array.dtype

1037

1038 if is_string_dtype(inferred_dtype):

1039 if not convert_string or inferred_dtype == "bytes":

1040 inferred_dtype = input_array.dtype

1041 else:

1042 inferred_dtype = pandas_dtype_func("string")

1043

1044 if convert_integer:

1045 target_int_dtype = pandas_dtype_func("Int64")

1046

1047 if input_array.dtype.kind in "iu":

1048 from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE

1049

1050 inferred_dtype = NUMPY_INT_TO_DTYPE.get(

1051 input_array.dtype, target_int_dtype

1052 )

1053 elif input_array.dtype.kind in "fcb":

1054 # TODO: de-dup with maybe_cast_to_integer_array?

1055 arr = input_array[notna(input_array)]

1056 if (arr.astype(int) == arr).all():

1057 inferred_dtype = target_int_dtype

1058 else:

1059 inferred_dtype = input_array.dtype

1060 elif (

1061 infer_objects

1062 and input_array.dtype == object

1063 and (isinstance(inferred_dtype, str) and inferred_dtype == "integer")

1064 ):

1065 inferred_dtype = target_int_dtype

1066

1067 if convert_floating:

1068 if input_array.dtype.kind in "fcb":

1069 # i.e. numeric but not integer

1070 from pandas.core.arrays.floating import NUMPY_FLOAT_TO_DTYPE

1071

1072 inferred_float_dtype: DtypeObj = NUMPY_FLOAT_TO_DTYPE.get(

1073 input_array.dtype, pandas_dtype_func("Float64")

1074 )

1075 # if we could also convert to integer, check if all floats

1076 # are actually integers

1077 if convert_integer:

1078 # TODO: de-dup with maybe_cast_to_integer_array?

1079 arr = input_array[notna(input_array)]

1080 if (arr.astype(int) == arr).all():

1081 inferred_dtype = pandas_dtype_func("Int64")

1082 else:

1083 inferred_dtype = inferred_float_dtype

1084 else:

1085 inferred_dtype = inferred_float_dtype

1086 elif (

1087 infer_objects

1088 and input_array.dtype == object

1089 and (

1090 isinstance(inferred_dtype, str)

1091 and inferred_dtype == "mixed-integer-float"

1092 )

1093 ):

1094 inferred_dtype = pandas_dtype_func("Float64")

1095

1096 if convert_boolean:

1097 if input_array.dtype.kind == "b":

1098 inferred_dtype = pandas_dtype_func("boolean")

1099 elif isinstance(inferred_dtype, str) and inferred_dtype == "boolean":

1100 inferred_dtype = pandas_dtype_func("boolean")

1101

1102 if isinstance(inferred_dtype, str):

1103 # If we couldn't do anything else, then we retain the dtype

1104 inferred_dtype = input_array.dtype

1105

1106 else:

1107 inferred_dtype = input_array.dtype

1108

1109 if dtype_backend == "pyarrow":

1110 from pandas.core.arrays.arrow.array import to_pyarrow_type

1111 from pandas.core.arrays.string_ import StringDtype

1112

1113 assert not isinstance(inferred_dtype, str)

1114

1115 if (

1116 (convert_integer and inferred_dtype.kind in "iu")

1117 or (convert_floating and inferred_dtype.kind in "fc")

1118 or (convert_boolean and inferred_dtype.kind == "b")

1119 or (convert_string and isinstance(inferred_dtype, StringDtype))

1120 or (

1121 inferred_dtype.kind not in "iufcb"

1122 and not isinstance(inferred_dtype, StringDtype)

1123 )

1124 ):

1125 if isinstance(inferred_dtype, PandasExtensionDtype) and not isinstance(

1126 inferred_dtype, DatetimeTZDtype

1127 ):

1128 base_dtype = inferred_dtype.base

1129 elif isinstance(inferred_dtype, (BaseMaskedDtype, ArrowDtype)):

1130 base_dtype = inferred_dtype.numpy_dtype

1131 elif isinstance(inferred_dtype, StringDtype):

1132 base_dtype = np.dtype(str)

1133 else:

1134 base_dtype = inferred_dtype

1135 if (

1136 base_dtype.kind == "O" # type: ignore[union-attr]

1137 and input_array.size > 0

1138 and isna(input_array).all()

1139 ):

1140 import pyarrow as pa

1141

1142 pa_type = pa.null()

1143 else:

1144 pa_type = to_pyarrow_type(base_dtype)

1145 if pa_type is not None:

1146 inferred_dtype = ArrowDtype(pa_type)

1147 elif dtype_backend == "numpy_nullable" and isinstance(inferred_dtype, ArrowDtype):

1148 # GH 53648

1149 inferred_dtype = _arrow_dtype_mapping()[inferred_dtype.pyarrow_dtype]

1150

1151 # error: Incompatible return value type (got "Union[str, Union[dtype[Any],

1152 # ExtensionDtype]]", expected "Union[dtype[Any], ExtensionDtype]")

1153 return inferred_dtype # type: ignore[return-value]

1154

1155

1156def maybe_infer_to_datetimelike(

1157 value: npt.NDArray[np.object_],

1158) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray | IntervalArray:

1159 """

1160 we might have a array (or single object) that is datetime like,

1161 and no dtype is passed don't change the value unless we find a

1162 datetime/timedelta set

1163

1164 this is pretty strict in that a datetime/timedelta is REQUIRED

1165 in addition to possible nulls/string likes

1166

1167 Parameters

1168 ----------

1169 value : np.ndarray[object]

1170

1171 Returns

1172 -------

1173 np.ndarray, DatetimeArray, TimedeltaArray, PeriodArray, or IntervalArray

1174

1175 """

1176 if not isinstance(value, np.ndarray) or value.dtype != object:

1177 # Caller is responsible for passing only ndarray[object]

1178 raise TypeError(type(value)) # pragma: no cover

1179 if value.ndim != 1:

1180 # Caller is responsible

1181 raise ValueError(value.ndim) # pragma: no cover

1182

1183 if not len(value):

1184 return value

1185

1186 # error: Incompatible return value type (got "Union[ExtensionArray,

1187 # ndarray[Any, Any]]", expected "Union[ndarray[Any, Any], DatetimeArray,

1188 # TimedeltaArray, PeriodArray, IntervalArray]")

1189 return lib.maybe_convert_objects( # type: ignore[return-value]

1190 value,

1191 # Here we do not convert numeric dtypes, as if we wanted that,

1192 # numpy would have done it for us.

1193 convert_numeric=False,

1194 convert_non_numeric=True,

1195 dtype_if_all_nat=np.dtype("M8[ns]"),

1196 )

1197

1198

1199def maybe_cast_to_datetime(

1200 value: np.ndarray | list, dtype: np.dtype

1201) -> ExtensionArray | np.ndarray:

1202 """

1203 try to cast the array/value to a datetimelike dtype, converting float

1204 nan to iNaT

1205

1206 Caller is responsible for handling ExtensionDtype cases and non dt64/td64

1207 cases.

1208 """

1209 from pandas.core.arrays.datetimes import DatetimeArray

1210 from pandas.core.arrays.timedeltas import TimedeltaArray

1211

1212 assert dtype.kind in "mM"

1213 if not is_list_like(value):

1214 raise TypeError("value must be listlike")

1215

1216 # TODO: _from_sequence would raise ValueError in cases where

1217 # _ensure_nanosecond_dtype raises TypeError

1218 _ensure_nanosecond_dtype(dtype)

1219

1220 if lib.is_np_dtype(dtype, "m"):

1221 res = TimedeltaArray._from_sequence(value, dtype=dtype)

1222 return res

1223 else:

1224 try:

1225 dta = DatetimeArray._from_sequence(value, dtype=dtype)

1226 except ValueError as err:

1227 # We can give a Series-specific exception message.

1228 if "cannot supply both a tz and a timezone-naive dtype" in str(err):

1229 raise ValueError(

1230 "Cannot convert timezone-aware data to "

1231 "timezone-naive dtype. Use "

1232 "pd.Series(values).dt.tz_localize(None) instead."

1233 ) from err

1234 raise

1235

1236 return dta

1237

1238

1239def _ensure_nanosecond_dtype(dtype: DtypeObj) -> None:

1240 """

1241 Convert dtypes with granularity less than nanosecond to nanosecond

1242

1243 >>> _ensure_nanosecond_dtype(np.dtype("M8[us]"))

1244

1245 >>> _ensure_nanosecond_dtype(np.dtype("M8[D]"))

1246 Traceback (most recent call last):

1247 ...

1248 TypeError: dtype=datetime64[D] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns'

1249

1250 >>> _ensure_nanosecond_dtype(np.dtype("m8[ps]"))

1251 Traceback (most recent call last):

1252 ...

1253 TypeError: dtype=timedelta64[ps] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns'

1254 """ # noqa: E501

1255 msg = (

1256 f"The '{dtype.name}' dtype has no unit. "

1257 f"Please pass in '{dtype.name}[ns]' instead."

1258 )

1259

1260 # unpack e.g. SparseDtype

1261 dtype = getattr(dtype, "subtype", dtype)

1262

1263 if not isinstance(dtype, np.dtype):

1264 # i.e. datetime64tz

1265 pass

1266

1267 elif dtype.kind in "mM":

1268 if not is_supported_dtype(dtype):

1269 # pre-2.0 we would silently swap in nanos for lower-resolutions,

1270 # raise for above-nano resolutions

1271 if dtype.name in ["datetime64", "timedelta64"]:

1272 raise ValueError(msg)

1273 # TODO: ValueError or TypeError? existing test

1274 # test_constructor_generic_timestamp_bad_frequency expects TypeError

1275 raise TypeError(

1276 f"dtype={dtype} is not supported. Supported resolutions are 's', "

1277 "'ms', 'us', and 'ns'"

1278 )

1279

1280

1281# TODO: other value-dependent functions to standardize here include

1282# Index._find_common_type_compat

1283def find_result_type(left_dtype: DtypeObj, right: Any) -> DtypeObj:

1284 """

1285 Find the type/dtype for the result of an operation between objects.

1286

1287 This is similar to find_common_type, but looks at the right object instead

1288 of just its dtype. This can be useful in particular when the right

1289 object does not have a `dtype`.

1290

1291 Parameters

1292 ----------

1293 left_dtype : np.dtype or ExtensionDtype

1294 right : Any

1295

1296 Returns

1297 -------

1298 np.dtype or ExtensionDtype

1299

1300 See also

1301 --------

1302 find_common_type

1303 numpy.result_type

1304 """

1305 new_dtype: DtypeObj

1306

1307 if (

1308 isinstance(left_dtype, np.dtype)

1309 and left_dtype.kind in "iuc"

1310 and (lib.is_integer(right) or lib.is_float(right))

1311 ):

1312 # e.g. with int8 dtype and right=512, we want to end up with

1313 # np.int16, whereas infer_dtype_from(512) gives np.int64,

1314 # which will make us upcast too far.

1315 if lib.is_float(right) and right.is_integer() and left_dtype.kind != "f":

1316 right = int(right)

1317 # After NEP 50, numpy won't inspect Python scalars

1318 # TODO: do we need to recreate numpy's inspection logic for floats too

1319 # (this breaks some tests)

1320 if isinstance(right, int) and not isinstance(right, np.integer):

1321 # This gives an unsigned type by default

1322 # (if our number is positive)

1323

1324 # If our left dtype is signed, we might not want this since

1325 # this might give us 1 dtype too big

1326 # We should check if the corresponding int dtype (e.g. int64 for uint64)

1327 # can hold the number

1328 right_dtype = np.min_scalar_type(right)

1329 if right == 0:

1330 # Special case 0

1331 right = left_dtype

1332 elif (

1333 not np.issubdtype(left_dtype, np.unsignedinteger)

1334 and 0 < right <= np.iinfo(right_dtype).max

1335 ):

1336 # If left dtype isn't unsigned, check if it fits in the signed dtype

1337 right = np.dtype(f"i{right_dtype.itemsize}")

1338 else:

1339 right = right_dtype

1340

1341 new_dtype = np.result_type(left_dtype, right)

1342

1343 elif is_valid_na_for_dtype(right, left_dtype):

1344 # e.g. IntervalDtype[int] and None/np.nan

1345 new_dtype = ensure_dtype_can_hold_na(left_dtype)

1346

1347 else:

1348 dtype, _ = infer_dtype_from(right)

1349 new_dtype = find_common_type([left_dtype, dtype])

1350

1351 return new_dtype

1352

1353

1354def common_dtype_categorical_compat(

1355 objs: Sequence[Index | ArrayLike], dtype: DtypeObj

1356) -> DtypeObj:

1357 """

1358 Update the result of find_common_type to account for NAs in a Categorical.

1359

1360 Parameters

1361 ----------

1362 objs : list[np.ndarray | ExtensionArray | Index]

1363 dtype : np.dtype or ExtensionDtype

1364

1365 Returns

1366 -------

1367 np.dtype or ExtensionDtype

1368 """

1369 # GH#38240

1370

1371 # TODO: more generally, could do `not can_hold_na(dtype)`

1372 if lib.is_np_dtype(dtype, "iu"):

1373 for obj in objs:

1374 # We don't want to accientally allow e.g. "categorical" str here

1375 obj_dtype = getattr(obj, "dtype", None)

1376 if isinstance(obj_dtype, CategoricalDtype):

1377 if isinstance(obj, ABCIndex):

1378 # This check may already be cached

1379 hasnas = obj.hasnans

1380 else:

1381 # Categorical

1382 hasnas = cast("Categorical", obj)._hasna

1383

1384 if hasnas:

1385 # see test_union_int_categorical_with_nan

1386 dtype = np.dtype(np.float64)

1387 break

1388 return dtype

1389

1390

1391def np_find_common_type(*dtypes: np.dtype) -> np.dtype:

1392 """

1393 np.find_common_type implementation pre-1.25 deprecation using np.result_type

1394 https://github.com/pandas-dev/pandas/pull/49569#issuecomment-1308300065

1395

1396 Parameters

1397 ----------

1398 dtypes : np.dtypes

1399

1400 Returns

1401 -------

1402 np.dtype

1403 """

1404 try:

1405 common_dtype = np.result_type(*dtypes)

1406 if common_dtype.kind in "mMSU":

1407 # NumPy promotion currently (1.25) misbehaves for for times and strings,

1408 # so fall back to object (find_common_dtype did unless there

1409 # was only one dtype)

1410 common_dtype = np.dtype("O")

1411

1412 except TypeError:

1413 common_dtype = np.dtype("O")

1414 return common_dtype

1415

1416

1417@overload

1418def find_common_type(types: list[np.dtype]) -> np.dtype:

1419 ...

1420

1421

1422@overload

1423def find_common_type(types: list[ExtensionDtype]) -> DtypeObj:

1424 ...

1425

1426

1427@overload

1428def find_common_type(types: list[DtypeObj]) -> DtypeObj:

1429 ...

1430

1431

1432def find_common_type(types):

1433 """

1434 Find a common data type among the given dtypes.

1435

1436 Parameters

1437 ----------

1438 types : list of dtypes

1439

1440 Returns

1441 -------

1442 pandas extension or numpy dtype

1443

1444 See Also

1445 --------

1446 numpy.find_common_type

1447

1448 """

1449 if not types:

1450 raise ValueError("no types given")

1451

1452 first = types[0]

1453

1454 # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2)

1455 # => object

1456 if lib.dtypes_all_equal(list(types)):

1457 return first

1458

1459 # get unique types (dict.fromkeys is used as order-preserving set())

1460 types = list(dict.fromkeys(types).keys())

1461

1462 if any(isinstance(t, ExtensionDtype) for t in types):

1463 for t in types:

1464 if isinstance(t, ExtensionDtype):

1465 res = t._get_common_dtype(types)

1466 if res is not None:

1467 return res

1468 return np.dtype("object")

1469

1470 # take lowest unit

1471 if all(lib.is_np_dtype(t, "M") for t in types):

1472 return np.dtype(max(types))

1473 if all(lib.is_np_dtype(t, "m") for t in types):

1474 return np.dtype(max(types))

1475

1476 # don't mix bool / int or float or complex

1477 # this is different from numpy, which casts bool with float/int as int

1478 has_bools = any(t.kind == "b" for t in types)

1479 if has_bools:

1480 for t in types:

1481 if t.kind in "iufc":

1482 return np.dtype("object")

1483

1484 return np_find_common_type(*types)

1485

1486

1487def construct_2d_arraylike_from_scalar(

1488 value: Scalar, length: int, width: int, dtype: np.dtype, copy: bool

1489) -> np.ndarray:

1490 shape = (length, width)

1491

1492 if dtype.kind in "mM":

1493 value = _maybe_box_and_unbox_datetimelike(value, dtype)

1494 elif dtype == _dtype_obj:

1495 if isinstance(value, (np.timedelta64, np.datetime64)):

1496 # calling np.array below would cast to pytimedelta/pydatetime

1497 out = np.empty(shape, dtype=object)

1498 out.fill(value)

1499 return out

1500

1501 # Attempt to coerce to a numpy array

1502 try:

1503 if not copy:

1504 arr = np.asarray(value, dtype=dtype)

1505 else:

1506 arr = np.array(value, dtype=dtype, copy=copy)

1507 except (ValueError, TypeError) as err:

1508 raise TypeError(

1509 f"DataFrame constructor called with incompatible data and dtype: {err}"

1510 ) from err

1511

1512 if arr.ndim != 0:

1513 raise ValueError("DataFrame constructor not properly called!")

1514

1515 return np.full(shape, arr)

1516

1517

1518def construct_1d_arraylike_from_scalar(

1519 value: Scalar, length: int, dtype: DtypeObj | None

1520) -> ArrayLike:

1521 """

1522 create a np.ndarray / pandas type of specified shape and dtype

1523 filled with values

1524

1525 Parameters

1526 ----------

1527 value : scalar value

1528 length : int

1529 dtype : pandas_dtype or np.dtype

1530

1531 Returns

1532 -------

1533 np.ndarray / pandas type of length, filled with value

1534

1535 """

1536

1537 if dtype is None:

1538 try:

1539 dtype, value = infer_dtype_from_scalar(value)

1540 except OutOfBoundsDatetime:

1541 dtype = _dtype_obj

1542

1543 if isinstance(dtype, ExtensionDtype):

1544 cls = dtype.construct_array_type()

1545 seq = [] if length == 0 else [value]

1546 subarr = cls._from_sequence(seq, dtype=dtype).repeat(length)

1547

1548 else:

1549 if length and dtype.kind in "iu" and isna(value):

1550 # coerce if we have nan for an integer dtype

1551 dtype = np.dtype("float64")

1552 elif lib.is_np_dtype(dtype, "US"):

1553 # we need to coerce to object dtype to avoid

1554 # to allow numpy to take our string as a scalar value

1555 dtype = np.dtype("object")

1556 if not isna(value):

1557 value = ensure_str(value)

1558 elif dtype.kind in "mM":

1559 value = _maybe_box_and_unbox_datetimelike(value, dtype)

1560

1561 subarr = np.empty(length, dtype=dtype)

1562 if length:

1563 # GH 47391: numpy > 1.24 will raise filling np.nan into int dtypes

1564 subarr.fill(value)

1565

1566 return subarr

1567

1568

1569def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj):

1570 # Caller is responsible for checking dtype.kind in "mM"

1571

1572 if isinstance(value, dt.datetime):

1573 # we dont want to box dt64, in particular datetime64("NaT")

1574 value = maybe_box_datetimelike(value, dtype)

1575

1576 return _maybe_unbox_datetimelike(value, dtype)

1577

1578

1579def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:

1580 """

1581 Transform any list-like object in a 1-dimensional numpy array of object

1582 dtype.

1583

1584 Parameters

1585 ----------

1586 values : any iterable which has a len()

1587

1588 Raises

1589 ------

1590 TypeError

1591 * If `values` does not have a len()

1592

1593 Returns

1594 -------

1595 1-dimensional numpy array of dtype object

1596 """

1597 # numpy will try to interpret nested lists as further dimensions, hence

1598 # making a 1D array that contains list-likes is a bit tricky:

1599 result = np.empty(len(values), dtype="object")

1600 result[:] = values

1601 return result

1602

1603

1604def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.ndarray:

1605 """

1606 Takes any dtype and returns the casted version, raising for when data is

1607 incompatible with integer/unsigned integer dtypes.

1608

1609 Parameters

1610 ----------

1611 arr : np.ndarray or list

1612 The array to cast.

1613 dtype : np.dtype

1614 The integer dtype to cast the array to.

1615

1616 Returns

1617 -------

1618 ndarray

1619 Array of integer or unsigned integer dtype.

1620

1621 Raises

1622 ------

1623 OverflowError : the dtype is incompatible with the data

1624 ValueError : loss of precision has occurred during casting

1625

1626 Examples

1627 --------

1628 If you try to coerce negative values to unsigned integers, it raises:

1629

1630 >>> pd.Series([-1], dtype="uint64")

1631 Traceback (most recent call last):

1632 ...

1633 OverflowError: Trying to coerce negative values to unsigned integers

1634

1635 Also, if you try to coerce float values to integers, it raises:

1636

1637 >>> maybe_cast_to_integer_array([1, 2, 3.5], dtype=np.dtype("int64"))

1638 Traceback (most recent call last):

1639 ...

1640 ValueError: Trying to coerce float values to integers

1641 """

1642 assert dtype.kind in "iu"

1643

1644 try:

1645 if not isinstance(arr, np.ndarray):

1646 with warnings.catch_warnings():

1647 # We already disallow dtype=uint w/ negative numbers

1648 # (test_constructor_coercion_signed_to_unsigned) so safe to ignore.

1649 warnings.filterwarnings(

1650 "ignore",

1651 "NumPy will stop allowing conversion of out-of-bound Python int",

1652 DeprecationWarning,

1653 )

1654 casted = np.asarray(arr, dtype=dtype)

1655 else:

1656 with warnings.catch_warnings():

1657 warnings.filterwarnings("ignore", category=RuntimeWarning)

1658 casted = arr.astype(dtype, copy=False)

1659 except OverflowError as err:

1660 raise OverflowError(

1661 "The elements provided in the data cannot all be "

1662 f"casted to the dtype {dtype}"

1663 ) from err

1664

1665 if isinstance(arr, np.ndarray) and arr.dtype == dtype:

1666 # avoid expensive array_equal check

1667 return casted

1668

1669 with warnings.catch_warnings():

1670 warnings.filterwarnings("ignore", category=RuntimeWarning)

1671 warnings.filterwarnings(

1672 "ignore", "elementwise comparison failed", FutureWarning

1673 )

1674 if np.array_equal(arr, casted):

1675 return casted

1676

1677 # We do this casting to allow for proper

1678 # data and dtype checking.

1679 #

1680 # We didn't do this earlier because NumPy

1681 # doesn't handle `uint64` correctly.

1682 arr = np.asarray(arr)

1683

1684 if np.issubdtype(arr.dtype, str):

1685 # TODO(numpy-2.0 min): This case will raise an OverflowError above

1686 if (casted.astype(str) == arr).all():

1687 return casted

1688 raise ValueError(f"string values cannot be losslessly cast to {dtype}")

1689

1690 if dtype.kind == "u" and (arr < 0).any():

1691 # TODO: can this be hit anymore after numpy 2.0?

1692 raise OverflowError("Trying to coerce negative values to unsigned integers")

1693

1694 if arr.dtype.kind == "f":

1695 if not np.isfinite(arr).all():

1696 raise IntCastingNaNError(

1697 "Cannot convert non-finite values (NA or inf) to integer"

1698 )

1699 raise ValueError("Trying to coerce float values to integers")

1700 if arr.dtype == object:

1701 raise ValueError("Trying to coerce float values to integers")

1702

1703 if casted.dtype < arr.dtype:

1704 # TODO: Can this path be hit anymore with numpy > 2

1705 # GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows

1706 raise ValueError(

1707 f"Values are too large to be losslessly converted to {dtype}. "

1708 f"To cast anyway, use pd.Series(values).astype({dtype})"

1709 )

1710

1711 if arr.dtype.kind in "mM":

1712 # test_constructor_maskedarray_nonfloat

1713 raise TypeError(

1714 f"Constructing a Series or DataFrame from {arr.dtype} values and "

1715 f"dtype={dtype} is not supported. Use values.view({dtype}) instead."

1716 )

1717

1718 # No known cases that get here, but raising explicitly to cover our bases.

1719 raise ValueError(f"values cannot be losslessly cast to {dtype}")

1720

1721

1722def can_hold_element(arr: ArrayLike, element: Any) -> bool:

1723 """

1724 Can we do an inplace setitem with this element in an array with this dtype?

1725

1726 Parameters

1727 ----------

1728 arr : np.ndarray or ExtensionArray

1729 element : Any

1730

1731 Returns

1732 -------

1733 bool

1734 """

1735 dtype = arr.dtype

1736 if not isinstance(dtype, np.dtype) or dtype.kind in "mM":

1737 if isinstance(dtype, (PeriodDtype, IntervalDtype, DatetimeTZDtype, np.dtype)):

1738 # np.dtype here catches datetime64ns and timedelta64ns; we assume

1739 # in this case that we have DatetimeArray/TimedeltaArray

1740 arr = cast(

1741 "PeriodArray | DatetimeArray | TimedeltaArray | IntervalArray", arr

1742 )

1743 try:

1744 arr._validate_setitem_value(element)

1745 return True

1746 except (ValueError, TypeError):

1747 return False

1748

1749 # This is technically incorrect, but maintains the behavior of

1750 # ExtensionBlock._can_hold_element

1751 return True

1752

1753 try:

1754 np_can_hold_element(dtype, element)

1755 return True

1756 except (TypeError, LossySetitemError):

1757 return False

1758

1759

1760def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:

1761 """

1762 Raise if we cannot losslessly set this element into an ndarray with this dtype.

1763

1764 Specifically about places where we disagree with numpy. i.e. there are

1765 cases where numpy will raise in doing the setitem that we do not check

1766 for here, e.g. setting str "X" into a numeric ndarray.

1767

1768 Returns

1769 -------

1770 Any

1771 The element, potentially cast to the dtype.

1772

1773 Raises

1774 ------

1775 ValueError : If we cannot losslessly store this element with this dtype.

1776 """

1777 if dtype == _dtype_obj:

1778 return element

1779

1780 tipo = _maybe_infer_dtype_type(element)

1781

1782 if dtype.kind in "iu":

1783 if isinstance(element, range):

1784 if _dtype_can_hold_range(element, dtype):

1785 return element

1786 raise LossySetitemError

1787

1788 if is_integer(element) or (is_float(element) and element.is_integer()):

1789 # e.g. test_setitem_series_int8 if we have a python int 1

1790 # tipo may be np.int32, despite the fact that it will fit

1791 # in smaller int dtypes.

1792 info = np.iinfo(dtype)

1793 if info.min <= element <= info.max:

1794 return dtype.type(element)

1795 raise LossySetitemError

1796

1797 if tipo is not None:

1798 if tipo.kind not in "iu":

1799 if isinstance(element, np.ndarray) and element.dtype.kind == "f":

1800 # If all can be losslessly cast to integers, then we can hold them

1801 with np.errstate(invalid="ignore"):

1802 # We check afterwards if cast was losslessly, so no need to show

1803 # the warning

1804 casted = element.astype(dtype)

1805 comp = casted == element

1806 if comp.all():

1807 # Return the casted values bc they can be passed to

1808 # np.putmask, whereas the raw values cannot.

1809 # see TestSetitemFloatNDarrayIntoIntegerSeries

1810 return casted

1811 raise LossySetitemError

1812

1813 elif isinstance(element, ABCExtensionArray) and isinstance(

1814 element.dtype, CategoricalDtype

1815 ):

1816 # GH#52927 setting Categorical value into non-EA frame

1817 # TODO: general-case for EAs?

1818 try:

1819 casted = element.astype(dtype)

1820 except (ValueError, TypeError):

1821 raise LossySetitemError

1822 # Check for cases of either

1823 # a) lossy overflow/rounding or

1824 # b) semantic changes like dt64->int64

1825 comp = casted == element

1826 if not comp.all():

1827 raise LossySetitemError

1828 return casted

1829

1830 # Anything other than integer we cannot hold

1831 raise LossySetitemError

1832 if (

1833 dtype.kind == "u"

1834 and isinstance(element, np.ndarray)

1835 and element.dtype.kind == "i"

1836 ):

1837 # see test_where_uint64

1838 casted = element.astype(dtype)

1839 if (casted == element).all():

1840 # TODO: faster to check (element >=0).all()? potential

1841 # itemsize issues there?

1842 return casted

1843 raise LossySetitemError

1844 if dtype.itemsize < tipo.itemsize:

1845 raise LossySetitemError

1846 if not isinstance(tipo, np.dtype):

1847 # i.e. nullable IntegerDtype; we can put this into an ndarray

1848 # losslessly iff it has no NAs

1849 arr = element._values if isinstance(element, ABCSeries) else element

1850 if arr._hasna:

1851 raise LossySetitemError

1852 return element

1853

1854 return element

1855

1856 raise LossySetitemError

1857

1858 if dtype.kind == "f":

1859 if lib.is_integer(element) or lib.is_float(element):

1860 casted = dtype.type(element)

1861 if np.isnan(casted) or casted == element:

1862 return casted

1863 # otherwise e.g. overflow see TestCoercionFloat32

1864 raise LossySetitemError

1865

1866 if tipo is not None:

1867 # TODO: itemsize check?

1868 if tipo.kind not in "iuf":

1869 # Anything other than float/integer we cannot hold

1870 raise LossySetitemError

1871 if not isinstance(tipo, np.dtype):

1872 # i.e. nullable IntegerDtype or FloatingDtype;

1873 # we can put this into an ndarray losslessly iff it has no NAs

1874 if element._hasna:

1875 raise LossySetitemError

1876 return element

1877 elif tipo.itemsize > dtype.itemsize or tipo.kind != dtype.kind:

1878 if isinstance(element, np.ndarray):

1879 # e.g. TestDataFrameIndexingWhere::test_where_alignment

1880 casted = element.astype(dtype)

1881 if np.array_equal(casted, element, equal_nan=True):

1882 return casted

1883 raise LossySetitemError

1884

1885 return element

1886

1887 raise LossySetitemError

1888

1889 if dtype.kind == "c":

1890 if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element):

1891 if np.isnan(element):

1892 # see test_where_complex GH#6345

1893 return dtype.type(element)

1894

1895 with warnings.catch_warnings():

1896 warnings.filterwarnings("ignore")

1897 casted = dtype.type(element)

1898 if casted == element:

1899 return casted

1900 # otherwise e.g. overflow see test_32878_complex_itemsize

1901 raise LossySetitemError

1902

1903 if tipo is not None:

1904 if tipo.kind in "iufc":

1905 return element

1906 raise LossySetitemError

1907 raise LossySetitemError

1908

1909 if dtype.kind == "b":

1910 if tipo is not None:

1911 if tipo.kind == "b":

1912 if not isinstance(tipo, np.dtype):

1913 # i.e. we have a BooleanArray

1914 if element._hasna:

1915 # i.e. there are pd.NA elements

1916 raise LossySetitemError

1917 return element

1918 raise LossySetitemError

1919 if lib.is_bool(element):

1920 return element

1921 raise LossySetitemError

1922

1923 if dtype.kind == "S":

1924 # TODO: test tests.frame.methods.test_replace tests get here,

1925 # need more targeted tests. xref phofl has a PR about this

1926 if tipo is not None:

1927 if tipo.kind == "S" and tipo.itemsize <= dtype.itemsize:

1928 return element

1929 raise LossySetitemError

1930 if isinstance(element, bytes) and len(element) <= dtype.itemsize:

1931 return element

1932 raise LossySetitemError

1933

1934 if dtype.kind == "V":

1935 # i.e. np.void, which cannot hold _anything_

1936 raise LossySetitemError

1937

1938 raise NotImplementedError(dtype)

1939

1940

1941def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool:

1942 """

1943 _maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints),

1944 but in many cases a range can be held by a smaller integer dtype.

1945 Check if this is one of those cases.

1946 """

1947 if not len(rng):

1948 return True

1949 return np_can_cast_scalar(rng.start, dtype) and np_can_cast_scalar(rng.stop, dtype)

1950

1951

1952def np_can_cast_scalar(element: Scalar, dtype: np.dtype) -> bool:

1953 """

1954 np.can_cast pandas-equivalent for pre 2-0 behavior that allowed scalar

1955 inference

1956

1957 Parameters

1958 ----------

1959 element : Scalar

1960 dtype : np.dtype

1961

1962 Returns

1963 -------

1964 bool

1965 """

1966 try:

1967 np_can_hold_element(dtype, element)

1968 return True

1969 except (LossySetitemError, NotImplementedError):

1970 return False