Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/dtypes/cast.py: 11%

1"""

2Routines for casting.

3"""

5from __future__ import annotations

7import datetime as dt

8import functools

9from typing import (

10 TYPE_CHECKING,

11 Any,

12 Literal,

13 Sized,

14 TypeVar,

15 cast,

16 overload,

17)

18import warnings

20import numpy as np

22from pandas._libs import lib

23from pandas._libs.missing import (

24 NA,

25 NAType,

26 checknull,

27)

28from pandas._libs.tslibs import (

29 NaT,

30 OutOfBoundsDatetime,

31 OutOfBoundsTimedelta,

32 Timedelta,

33 Timestamp,

34 get_unit_from_dtype,

35 is_supported_unit,

36)

37from pandas._libs.tslibs.timedeltas import array_to_timedelta64

38from pandas._typing import (

39 ArrayLike,

40 Dtype,

41 DtypeObj,

42 NumpyIndexT,

43 Scalar,

44 npt,

45)

46from pandas.errors import (

47 IntCastingNaNError,

48 LossySetitemError,

49)

51from pandas.core.dtypes.common import (

52 ensure_int8,

53 ensure_int16,

54 ensure_int32,

55 ensure_int64,

56 ensure_object,

57 ensure_str,

58 is_bool,

59 is_bool_dtype,

60 is_complex,

61 is_complex_dtype,

62 is_datetime64_dtype,

63 is_extension_array_dtype,

64 is_float,

65 is_float_dtype,

66 is_integer,

67 is_integer_dtype,

68 is_numeric_dtype,

69 is_object_dtype,

70 is_scalar,

71 is_signed_integer_dtype,

72 is_string_dtype,

73 is_timedelta64_dtype,

74 is_unsigned_integer_dtype,

75 pandas_dtype as pandas_dtype_func,

76)

77from pandas.core.dtypes.dtypes import (

78 BaseMaskedDtype,

79 CategoricalDtype,

80 DatetimeTZDtype,

81 ExtensionDtype,

82 IntervalDtype,

83 PandasExtensionDtype,

84 PeriodDtype,

85)

86from pandas.core.dtypes.generic import (

87 ABCExtensionArray,

88 ABCIndex,

89 ABCSeries,

90)

91from pandas.core.dtypes.inference import is_list_like

92from pandas.core.dtypes.missing import (

93 is_valid_na_for_dtype,

94 isna,

95 na_value_for_dtype,

96 notna,

97)

99from pandas.io._util import _arrow_dtype_mapping

100

101if TYPE_CHECKING:

102 from pandas import Index

103 from pandas.core.arrays import (

104 Categorical,

105 DatetimeArray,

106 ExtensionArray,

107 IntervalArray,

108 PeriodArray,

109 TimedeltaArray,

110 )

111

112

113_int8_max = np.iinfo(np.int8).max

114_int16_max = np.iinfo(np.int16).max

115_int32_max = np.iinfo(np.int32).max

116_int64_max = np.iinfo(np.int64).max

117

118_dtype_obj = np.dtype(object)

119

120NumpyArrayT = TypeVar("NumpyArrayT", bound=np.ndarray)

121

122

123def maybe_convert_platform(

124 values: list | tuple | range | np.ndarray | ExtensionArray,

125) -> ArrayLike:

126 """try to do platform conversion, allow ndarray or list here"""

127 arr: ArrayLike

128

129 if isinstance(values, (list, tuple, range)):

130 arr = construct_1d_object_array_from_listlike(values)

131 else:

132 # The caller is responsible for ensuring that we have np.ndarray

133 # or ExtensionArray here.

134 arr = values

135

136 if arr.dtype == _dtype_obj:

137 arr = cast(np.ndarray, arr)

138 arr = lib.maybe_convert_objects(arr)

139

140 return arr

141

142

143def is_nested_object(obj) -> bool:

144 """

145 return a boolean if we have a nested object, e.g. a Series with 1 or

146 more Series elements

147

148 This may not be necessarily be performant.

149

150 """

151 return bool(

152 isinstance(obj, ABCSeries)

153 and is_object_dtype(obj.dtype)

154 and any(isinstance(v, ABCSeries) for v in obj._values)

155 )

156

157

158def maybe_box_datetimelike(value: Scalar, dtype: Dtype | None = None) -> Scalar:

159 """

160 Cast scalar to Timestamp or Timedelta if scalar is datetime-like

161 and dtype is not object.

162

163 Parameters

164 ----------

165 value : scalar

166 dtype : Dtype, optional

167

168 Returns

169 -------

170 scalar

171 """

172 if dtype == _dtype_obj:

173 pass

174 elif isinstance(value, (np.datetime64, dt.datetime)):

175 value = Timestamp(value)

176 elif isinstance(value, (np.timedelta64, dt.timedelta)):

177 value = Timedelta(value)

178

179 return value

180

181

182def maybe_box_native(value: Scalar | None | NAType) -> Scalar | None | NAType:

183 """

184 If passed a scalar cast the scalar to a python native type.

185

186 Parameters

187 ----------

188 value : scalar or Series

189

190 Returns

191 -------

192 scalar or Series

193 """

194 if is_float(value):

195 # error: Argument 1 to "float" has incompatible type

196 # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]";

197 # expected "Union[SupportsFloat, _SupportsIndex, str]"

198 value = float(value) # type: ignore[arg-type]

199 elif is_integer(value):

200 # error: Argument 1 to "int" has incompatible type

201 # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]";

202 # expected "Union[str, SupportsInt, _SupportsIndex, _SupportsTrunc]"

203 value = int(value) # type: ignore[arg-type]

204 elif is_bool(value):

205 value = bool(value)

206 elif isinstance(value, (np.datetime64, np.timedelta64)):

207 value = maybe_box_datetimelike(value)

208 elif value is NA:

209 value = None

210 return value

211

212

213def _maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:

214 """

215 Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting

216 into a numpy array. Failing to unbox would risk dropping nanoseconds.

217

218 Notes

219 -----

220 Caller is responsible for checking dtype.kind in ["m", "M"]

221 """

222 if is_valid_na_for_dtype(value, dtype):

223 # GH#36541: can't fill array directly with pd.NaT

224 # > np.empty(10, dtype="datetime64[ns]").fill(pd.NaT)

225 # ValueError: cannot convert float NaN to integer

226 value = dtype.type("NaT", "ns")

227 elif isinstance(value, Timestamp):

228 if value.tz is None:

229 value = value.to_datetime64()

230 elif not isinstance(dtype, DatetimeTZDtype):

231 raise TypeError("Cannot unbox tzaware Timestamp to tznaive dtype")

232 elif isinstance(value, Timedelta):

233 value = value.to_timedelta64()

234

235 _disallow_mismatched_datetimelike(value, dtype)

236 return value

237

238

239def _disallow_mismatched_datetimelike(value, dtype: DtypeObj):

240 """

241 numpy allows np.array(dt64values, dtype="timedelta64[ns]") and

242 vice-versa, but we do not want to allow this, so we need to

243 check explicitly

244 """

245 vdtype = getattr(value, "dtype", None)

246 if vdtype is None:

247 return

248 elif (vdtype.kind == "m" and dtype.kind == "M") or (

249 vdtype.kind == "M" and dtype.kind == "m"

250 ):

251 raise TypeError(f"Cannot cast {repr(value)} to {dtype}")

252

253

254@overload

255def maybe_downcast_to_dtype(result: np.ndarray, dtype: str | np.dtype) -> np.ndarray:

256 ...

257

258

259@overload

260def maybe_downcast_to_dtype(result: ExtensionArray, dtype: str | np.dtype) -> ArrayLike:

261 ...

262

263

264def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLike:

265 """

266 try to cast to the specified dtype (e.g. convert back to bool/int

267 or could be an astype of float64->float32

268 """

269 do_round = False

270

271 if isinstance(dtype, str):

272 if dtype == "infer":

273 inferred_type = lib.infer_dtype(result, skipna=False)

274 if inferred_type == "boolean":

275 dtype = "bool"

276 elif inferred_type == "integer":

277 dtype = "int64"

278 elif inferred_type == "datetime64":

279 dtype = "datetime64[ns]"

280 elif inferred_type in ["timedelta", "timedelta64"]:

281 dtype = "timedelta64[ns]"

282

283 # try to upcast here

284 elif inferred_type == "floating":

285 dtype = "int64"

286 if issubclass(result.dtype.type, np.number):

287 do_round = True

288

289 else:

290 # TODO: complex? what if result is already non-object?

291 dtype = "object"

292

293 dtype = np.dtype(dtype)

294

295 if not isinstance(dtype, np.dtype):

296 # enforce our signature annotation

297 raise TypeError(dtype) # pragma: no cover

298

299 converted = maybe_downcast_numeric(result, dtype, do_round)

300 if converted is not result:

301 return converted

302

303 # a datetimelike

304 # GH12821, iNaT is cast to float

305 if dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]:

306 result = result.astype(dtype)

307

308 elif dtype.kind == "m" and result.dtype == _dtype_obj:

309 # test_where_downcast_to_td64

310 result = cast(np.ndarray, result)

311 result = array_to_timedelta64(result)

312

313 elif dtype == np.dtype("M8[ns]") and result.dtype == _dtype_obj:

314 result = cast(np.ndarray, result)

315 return np.asarray(maybe_cast_to_datetime(result, dtype=dtype))

316

317 return result

318

319

320@overload

321def maybe_downcast_numeric(

322 result: np.ndarray, dtype: np.dtype, do_round: bool = False

323) -> np.ndarray:

324 ...

325

326

327@overload

328def maybe_downcast_numeric(

329 result: ExtensionArray, dtype: DtypeObj, do_round: bool = False

330) -> ArrayLike:

331 ...

332

333

334def maybe_downcast_numeric(

335 result: ArrayLike, dtype: DtypeObj, do_round: bool = False

336) -> ArrayLike:

337 """

338 Subset of maybe_downcast_to_dtype restricted to numeric dtypes.

339

340 Parameters

341 ----------

342 result : ndarray or ExtensionArray

343 dtype : np.dtype or ExtensionDtype

344 do_round : bool

345

346 Returns

347 -------

348 ndarray or ExtensionArray

349 """

350 if not isinstance(dtype, np.dtype) or not isinstance(result.dtype, np.dtype):

351 # e.g. SparseDtype has no itemsize attr

352 return result

353

354 def trans(x):

355 if do_round:

356 return x.round()

357 return x

358

359 if dtype.kind == result.dtype.kind:

360 # don't allow upcasts here (except if empty)

361 if result.dtype.itemsize <= dtype.itemsize and result.size:

362 return result

363

364 if is_bool_dtype(dtype) or is_integer_dtype(dtype):

365 if not result.size:

366 # if we don't have any elements, just astype it

367 return trans(result).astype(dtype)

368

369 # do a test on the first element, if it fails then we are done

370 r = result.ravel()

371 arr = np.array([r[0]])

372

373 if isna(arr).any():

374 # if we have any nulls, then we are done

375 return result

376

377 elif not isinstance(r[0], (np.integer, np.floating, int, float, bool)):

378 # a comparable, e.g. a Decimal may slip in here

379 return result

380

381 if (

382 issubclass(result.dtype.type, (np.object_, np.number))

383 and notna(result).all()

384 ):

385 new_result = trans(result).astype(dtype)

386 if new_result.dtype.kind == "O" or result.dtype.kind == "O":

387 # np.allclose may raise TypeError on object-dtype

388 if (new_result == result).all():

389 return new_result

390 else:

391 if np.allclose(new_result, result, rtol=0):

392 return new_result

393

394 elif (

395 issubclass(dtype.type, np.floating)

396 and not is_bool_dtype(result.dtype)

397 and not is_string_dtype(result.dtype)

398 ):

399 with warnings.catch_warnings():

400 warnings.filterwarnings(

401 "ignore", "overflow encountered in cast", RuntimeWarning

402 )

403 new_result = result.astype(dtype)

404

405 # Adjust tolerances based on floating point size

406 size_tols = {4: 5e-4, 8: 5e-8, 16: 5e-16}

407

408 atol = size_tols.get(new_result.dtype.itemsize, 0.0)

409

410 # Check downcast float values are still equal within 7 digits when

411 # converting from float64 to float32

412 if np.allclose(new_result, result, equal_nan=True, rtol=0.0, atol=atol):

413 return new_result

414

415 elif dtype.kind == result.dtype.kind == "c":

416 new_result = result.astype(dtype)

417

418 if np.array_equal(new_result, result, equal_nan=True):

419 # TODO: use tolerance like we do for float?

420 return new_result

421

422 return result

423

424

425def maybe_upcast_numeric_to_64bit(arr: NumpyIndexT) -> NumpyIndexT:

426 """

427 If array is a int/uint/float bit size lower than 64 bit, upcast it to 64 bit.

428

429 Parameters

430 ----------

431 arr : ndarray or ExtensionArray

432

433 Returns

434 -------

435 ndarray or ExtensionArray

436 """

437 dtype = arr.dtype

438 if is_signed_integer_dtype(dtype) and dtype != np.int64:

439 return arr.astype(np.int64)

440 elif is_unsigned_integer_dtype(dtype) and dtype != np.uint64:

441 return arr.astype(np.uint64)

442 elif is_float_dtype(dtype) and dtype != np.float64:

443 return arr.astype(np.float64)

444 else:

445 return arr

446

447

448def maybe_cast_pointwise_result(

449 result: ArrayLike,

450 dtype: DtypeObj,

451 numeric_only: bool = False,

452 same_dtype: bool = True,

453) -> ArrayLike:

454 """

455 Try casting result of a pointwise operation back to the original dtype if

456 appropriate.

457

458 Parameters

459 ----------

460 result : array-like

461 Result to cast.

462 dtype : np.dtype or ExtensionDtype

463 Input Series from which result was calculated.

464 numeric_only : bool, default False

465 Whether to cast only numerics or datetimes as well.

466 same_dtype : bool, default True

467 Specify dtype when calling _from_sequence

468

469 Returns

470 -------

471 result : array-like

472 result maybe casted to the dtype.

473 """

474

475 assert not is_scalar(result)

476

477 if isinstance(dtype, ExtensionDtype):

478 if not isinstance(dtype, (CategoricalDtype, DatetimeTZDtype)):

479 # TODO: avoid this special-casing

480 # We have to special case categorical so as not to upcast

481 # things like counts back to categorical

482

483 cls = dtype.construct_array_type()

484 if same_dtype:

485 result = maybe_cast_to_extension_array(cls, result, dtype=dtype)

486 else:

487 result = maybe_cast_to_extension_array(cls, result)

488

489 elif (numeric_only and is_numeric_dtype(dtype)) or not numeric_only:

490 result = maybe_downcast_to_dtype(result, dtype)

491

492 return result

493

494

495def maybe_cast_to_extension_array(

496 cls: type[ExtensionArray], obj: ArrayLike, dtype: ExtensionDtype | None = None

497) -> ArrayLike:

498 """

499 Call to `_from_sequence` that returns the object unchanged on Exception.

500

501 Parameters

502 ----------

503 cls : class, subclass of ExtensionArray

504 obj : arraylike

505 Values to pass to cls._from_sequence

506 dtype : ExtensionDtype, optional

507

508 Returns

509 -------

510 ExtensionArray or obj

511 """

512 from pandas.core.arrays.string_ import BaseStringArray

513

514 assert isinstance(cls, type), f"must pass a type: {cls}"

515 assertion_msg = f"must pass a subclass of ExtensionArray: {cls}"

516 assert issubclass(cls, ABCExtensionArray), assertion_msg

517

518 # Everything can be converted to StringArrays, but we may not want to convert

519 if issubclass(cls, BaseStringArray) and lib.infer_dtype(obj) != "string":

520 return obj

521

522 try:

523 result = cls._from_sequence(obj, dtype=dtype)

524 except Exception:

525 # We can't predict what downstream EA constructors may raise

526 result = obj

527 return result

528

529

530@overload

531def ensure_dtype_can_hold_na(dtype: np.dtype) -> np.dtype:

532 ...

533

534

535@overload

536def ensure_dtype_can_hold_na(dtype: ExtensionDtype) -> ExtensionDtype:

537 ...

538

539

540def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj:

541 """

542 If we have a dtype that cannot hold NA values, find the best match that can.

543 """

544 if isinstance(dtype, ExtensionDtype):

545 if dtype._can_hold_na:

546 return dtype

547 elif isinstance(dtype, IntervalDtype):

548 # TODO(GH#45349): don't special-case IntervalDtype, allow

549 # overriding instead of returning object below.

550 return IntervalDtype(np.float64, closed=dtype.closed)

551 return _dtype_obj

552 elif dtype.kind == "b":

553 return _dtype_obj

554 elif dtype.kind in ["i", "u"]:

555 return np.dtype(np.float64)

556 return dtype

557

558

559_canonical_nans = {

560 np.datetime64: np.datetime64("NaT", "ns"),

561 np.timedelta64: np.timedelta64("NaT", "ns"),

562 type(np.nan): np.nan,

563}

564

565

566def maybe_promote(dtype: np.dtype, fill_value=np.nan):

567 """

568 Find the minimal dtype that can hold both the given dtype and fill_value.

569

570 Parameters

571 ----------

572 dtype : np.dtype

573 fill_value : scalar, default np.nan

574

575 Returns

576 -------

577 dtype

578 Upcasted from dtype argument if necessary.

579 fill_value

580 Upcasted from fill_value argument if necessary.

581

582 Raises

583 ------

584 ValueError

585 If fill_value is a non-scalar and dtype is not object.

586 """

587 orig = fill_value

588 orig_is_nat = False

589 if checknull(fill_value):

590 # https://github.com/pandas-dev/pandas/pull/39692#issuecomment-1441051740

591 # avoid cache misses with NaN/NaT values that are not singletons

592 if fill_value is not NA:

593 try:

594 orig_is_nat = np.isnat(fill_value)

595 except TypeError:

596 pass

597

598 fill_value = _canonical_nans.get(type(fill_value), fill_value)

599

600 # for performance, we are using a cached version of the actual implementation

601 # of the function in _maybe_promote. However, this doesn't always work (in case

602 # of non-hashable arguments), so we fallback to the actual implementation if needed

603 try:

604 # error: Argument 3 to "__call__" of "_lru_cache_wrapper" has incompatible type

605 # "Type[Any]"; expected "Hashable" [arg-type]

606 dtype, fill_value = _maybe_promote_cached(

607 dtype, fill_value, type(fill_value) # type: ignore[arg-type]

608 )

609 except TypeError:

610 # if fill_value is not hashable (required for caching)

611 dtype, fill_value = _maybe_promote(dtype, fill_value)

612

613 if (dtype == _dtype_obj and orig is not None) or (

614 orig_is_nat and np.datetime_data(orig)[0] != "ns"

615 ):

616 # GH#51592,53497 restore our potentially non-canonical fill_value

617 fill_value = orig

618 return dtype, fill_value

619

620

621@functools.lru_cache(maxsize=128)

622def _maybe_promote_cached(dtype, fill_value, fill_value_type):

623 # The cached version of _maybe_promote below

624 # This also use fill_value_type as (unused) argument to use this in the

625 # cache lookup -> to differentiate 1 and True

626 return _maybe_promote(dtype, fill_value)

627

628

629def _maybe_promote(dtype: np.dtype, fill_value=np.nan):

630 # The actual implementation of the function, use `maybe_promote` above for

631 # a cached version.

632 if not is_scalar(fill_value):

633 # with object dtype there is nothing to promote, and the user can

634 # pass pretty much any weird fill_value they like

635 if not is_object_dtype(dtype):

636 # with object dtype there is nothing to promote, and the user can

637 # pass pretty much any weird fill_value they like

638 raise ValueError("fill_value must be a scalar")

639 dtype = _dtype_obj

640 return dtype, fill_value

641

642 kinds = ["i", "u", "f", "c", "m", "M"]

643 if is_valid_na_for_dtype(fill_value, dtype) and dtype.kind in kinds:

644 dtype = ensure_dtype_can_hold_na(dtype)

645 fv = na_value_for_dtype(dtype)

646 return dtype, fv

647

648 elif isinstance(dtype, CategoricalDtype):

649 if fill_value in dtype.categories or isna(fill_value):

650 return dtype, fill_value

651 else:

652 return object, ensure_object(fill_value)

653

654 elif isna(fill_value):

655 dtype = _dtype_obj

656 if fill_value is None:

657 # but we retain e.g. pd.NA

658 fill_value = np.nan

659 return dtype, fill_value

660

661 # returns tuple of (dtype, fill_value)

662 if issubclass(dtype.type, np.datetime64):

663 inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True)

664 if inferred == dtype:

665 return dtype, fv

666

667 from pandas.core.arrays import DatetimeArray

668

669 dta = DatetimeArray._from_sequence([], dtype="M8[ns]")

670 try:

671 fv = dta._validate_setitem_value(fill_value)

672 return dta.dtype, fv

673 except (ValueError, TypeError):

674 return _dtype_obj, fill_value

675

676 elif issubclass(dtype.type, np.timedelta64):

677 inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True)

678 if inferred == dtype:

679 return dtype, fv

680

681 return np.dtype("object"), fill_value

682

683 elif is_float(fill_value):

684 if issubclass(dtype.type, np.bool_):

685 dtype = np.dtype(np.object_)

686

687 elif issubclass(dtype.type, np.integer):

688 dtype = np.dtype(np.float64)

689

690 elif dtype.kind == "f":

691 mst = np.min_scalar_type(fill_value)

692 if mst > dtype:

693 # e.g. mst is np.float64 and dtype is np.float32

694 dtype = mst

695

696 elif dtype.kind == "c":

697 mst = np.min_scalar_type(fill_value)

698 dtype = np.promote_types(dtype, mst)

699

700 elif is_bool(fill_value):

701 if not issubclass(dtype.type, np.bool_):

702 dtype = np.dtype(np.object_)

703

704 elif is_integer(fill_value):

705 if issubclass(dtype.type, np.bool_):

706 dtype = np.dtype(np.object_)

707

708 elif issubclass(dtype.type, np.integer):

709 if not np.can_cast(fill_value, dtype):

710 # upcast to prevent overflow

711 mst = np.min_scalar_type(fill_value)

712 dtype = np.promote_types(dtype, mst)

713 if dtype.kind == "f":

714 # Case where we disagree with numpy

715 dtype = np.dtype(np.object_)

716

717 elif is_complex(fill_value):

718 if issubclass(dtype.type, np.bool_):

719 dtype = np.dtype(np.object_)

720

721 elif issubclass(dtype.type, (np.integer, np.floating)):

722 mst = np.min_scalar_type(fill_value)

723 dtype = np.promote_types(dtype, mst)

724

725 elif dtype.kind == "c":

726 mst = np.min_scalar_type(fill_value)

727 if mst > dtype:

728 # e.g. mst is np.complex128 and dtype is np.complex64

729 dtype = mst

730

731 else:

732 dtype = np.dtype(np.object_)

733

734 # in case we have a string that looked like a number

735 if issubclass(dtype.type, (bytes, str)):

736 dtype = np.dtype(np.object_)

737

738 fill_value = _ensure_dtype_type(fill_value, dtype)

739 return dtype, fill_value

740

741

742def _ensure_dtype_type(value, dtype: np.dtype):

743 """

744 Ensure that the given value is an instance of the given dtype.

745

746 e.g. if out dtype is np.complex64_, we should have an instance of that

747 as opposed to a python complex object.

748

749 Parameters

750 ----------

751 value : object

752 dtype : np.dtype

753

754 Returns

755 -------

756 object

757 """

758 # Start with exceptions in which we do _not_ cast to numpy types

759

760 if dtype == _dtype_obj:

761 return value

762

763 # Note: before we get here we have already excluded isna(value)

764 return dtype.type(value)

765

766

767def infer_dtype_from(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]:

768 """

769 Interpret the dtype from a scalar or array.

770

771 Parameters

772 ----------

773 val : object

774 pandas_dtype : bool, default False

775 whether to infer dtype including pandas extension types.

776 If False, scalar/array belongs to pandas extension types is inferred as

777 object

778 """

779 if not is_list_like(val):

780 return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype)

781 return infer_dtype_from_array(val, pandas_dtype=pandas_dtype)

782

783

784def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]:

785 """

786 Interpret the dtype from a scalar.

787

788 Parameters

789 ----------

790 pandas_dtype : bool, default False

791 whether to infer dtype including pandas extension types.

792 If False, scalar belongs to pandas extension types is inferred as

793 object

794 """

795 dtype: DtypeObj = _dtype_obj

796

797 # a 1-element ndarray

798 if isinstance(val, np.ndarray):

799 if val.ndim != 0:

800 msg = "invalid ndarray passed to infer_dtype_from_scalar"

801 raise ValueError(msg)

802

803 dtype = val.dtype

804 val = lib.item_from_zerodim(val)

805

806 elif isinstance(val, str):

807 # If we create an empty array using a string to infer

808 # the dtype, NumPy will only allocate one character per entry

809 # so this is kind of bad. Alternately we could use np.repeat

810 # instead of np.empty (but then you still don't want things

811 # coming out as np.str_!

812

813 dtype = _dtype_obj

814

815 elif isinstance(val, (np.datetime64, dt.datetime)):

816 try:

817 val = Timestamp(val)

818 if val is not NaT:

819 val = val.as_unit("ns")

820 except OutOfBoundsDatetime:

821 return _dtype_obj, val

822

823 if val is NaT or val.tz is None:

824 val = val.to_datetime64()

825 dtype = val.dtype

826 # TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes

827 else:

828 if pandas_dtype:

829 dtype = DatetimeTZDtype(unit="ns", tz=val.tz)

830 else:

831 # return datetimetz as object

832 return _dtype_obj, val

833

834 elif isinstance(val, (np.timedelta64, dt.timedelta)):

835 try:

836 val = Timedelta(val)

837 except (OutOfBoundsTimedelta, OverflowError):

838 dtype = _dtype_obj

839 else:

840 dtype = np.dtype("m8[ns]")

841 val = np.timedelta64(val.value, "ns")

842

843 elif is_bool(val):

844 dtype = np.dtype(np.bool_)

845

846 elif is_integer(val):

847 if isinstance(val, np.integer):

848 dtype = np.dtype(type(val))

849 else:

850 dtype = np.dtype(np.int64)

851

852 try:

853 np.array(val, dtype=dtype)

854 except OverflowError:

855 dtype = np.array(val).dtype

856

857 elif is_float(val):

858 if isinstance(val, np.floating):

859 dtype = np.dtype(type(val))

860 else:

861 dtype = np.dtype(np.float64)

862

863 elif is_complex(val):

864 dtype = np.dtype(np.complex_)

865

866 elif pandas_dtype:

867 if lib.is_period(val):

868 dtype = PeriodDtype(freq=val.freq)

869 elif lib.is_interval(val):

870 subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0]

871 dtype = IntervalDtype(subtype=subtype, closed=val.closed)

872

873 return dtype, val

874

875

876def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]:

877 """

878 Convert datetimelike-keyed dicts to a Timestamp-keyed dict.

879

880 Parameters

881 ----------

882 d: dict-like object

883

884 Returns

885 -------

886 dict

887 """

888 return {maybe_box_datetimelike(key): value for key, value in d.items()}

889

890

891def infer_dtype_from_array(

892 arr, pandas_dtype: bool = False

893) -> tuple[DtypeObj, ArrayLike]:

894 """

895 Infer the dtype from an array.

896

897 Parameters

898 ----------

899 arr : array

900 pandas_dtype : bool, default False

901 whether to infer dtype including pandas extension types.

902 If False, array belongs to pandas extension types

903 is inferred as object

904

905 Returns

906 -------

907 tuple (numpy-compat/pandas-compat dtype, array)

908

909 Notes

910 -----

911 if pandas_dtype=False. these infer to numpy dtypes

912 exactly with the exception that mixed / object dtypes

913 are not coerced by stringifying or conversion

914

915 if pandas_dtype=True. datetime64tz-aware/categorical

916 types will retain there character.

917

918 Examples

919 --------

920 >>> np.asarray([1, '1'])

921 array(['1', '1'], dtype='<U21')

922

923 >>> infer_dtype_from_array([1, '1'])

924 (dtype('O'), [1, '1'])

925 """

926 if isinstance(arr, np.ndarray):

927 return arr.dtype, arr

928

929 if not is_list_like(arr):

930 raise TypeError("'arr' must be list-like")

931

932 if pandas_dtype and is_extension_array_dtype(arr):

933 return arr.dtype, arr

934

935 elif isinstance(arr, ABCSeries):

936 return arr.dtype, np.asarray(arr)

937

938 # don't force numpy coerce with nan's

939 inferred = lib.infer_dtype(arr, skipna=False)

940 if inferred in ["string", "bytes", "mixed", "mixed-integer"]:

941 return (np.dtype(np.object_), arr)

942

943 arr = np.asarray(arr)

944 return arr.dtype, arr

945

946

947def _maybe_infer_dtype_type(element):

948 """

949 Try to infer an object's dtype, for use in arithmetic ops.

950

951 Uses `element.dtype` if that's available.

952 Objects implementing the iterator protocol are cast to a NumPy array,

953 and from there the array's type is used.

954

955 Parameters

956 ----------

957 element : object

958 Possibly has a `.dtype` attribute, and possibly the iterator

959 protocol.

960

961 Returns

962 -------

963 tipo : type

964

965 Examples

966 --------

967 >>> from collections import namedtuple

968 >>> Foo = namedtuple("Foo", "dtype")

969 >>> _maybe_infer_dtype_type(Foo(np.dtype("i8")))

970 dtype('int64')

971 """

972 tipo = None

973 if hasattr(element, "dtype"):

974 tipo = element.dtype

975 elif is_list_like(element):

976 element = np.asarray(element)

977 tipo = element.dtype

978 return tipo

979

980

981def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None:

982 """

983 Change string like dtypes to object for

984 ``DataFrame.select_dtypes()``.

985 """

986 # error: Argument 1 to <set> has incompatible type "Type[generic]"; expected

987 # "Union[dtype[Any], ExtensionDtype, None]"

988 # error: Argument 2 to <set> has incompatible type "Type[generic]"; expected

989 # "Union[dtype[Any], ExtensionDtype, None]"

990 non_string_dtypes = dtype_set - {

991 np.dtype("S").type, # type: ignore[arg-type]

992 np.dtype("<U").type, # type: ignore[arg-type]

993 }

994 if non_string_dtypes != dtype_set:

995 raise TypeError("string dtypes are not allowed, use 'object' instead")

996

997

998def coerce_indexer_dtype(indexer, categories) -> np.ndarray:

999 """coerce the indexer input array to the smallest dtype possible"""

1000 length = len(categories)

1001 if length < _int8_max:

1002 return ensure_int8(indexer)

1003 elif length < _int16_max:

1004 return ensure_int16(indexer)

1005 elif length < _int32_max:

1006 return ensure_int32(indexer)

1007 return ensure_int64(indexer)

1008

1009

1010def convert_dtypes(

1011 input_array: ArrayLike,

1012 convert_string: bool = True,

1013 convert_integer: bool = True,

1014 convert_boolean: bool = True,

1015 convert_floating: bool = True,

1016 infer_objects: bool = False,

1017 dtype_backend: Literal["numpy_nullable", "pyarrow"] = "numpy_nullable",

1018) -> DtypeObj:

1019 """

1020 Convert objects to best possible type, and optionally,

1021 to types supporting ``pd.NA``.

1022

1023 Parameters

1024 ----------

1025 input_array : ExtensionArray or np.ndarray

1026 convert_string : bool, default True

1027 Whether object dtypes should be converted to ``StringDtype()``.

1028 convert_integer : bool, default True

1029 Whether, if possible, conversion can be done to integer extension types.

1030 convert_boolean : bool, defaults True

1031 Whether object dtypes should be converted to ``BooleanDtypes()``.

1032 convert_floating : bool, defaults True

1033 Whether, if possible, conversion can be done to floating extension types.

1034 If `convert_integer` is also True, preference will be give to integer

1035 dtypes if the floats can be faithfully casted to integers.

1036 infer_objects : bool, defaults False

1037 Whether to also infer objects to float/int if possible. Is only hit if the

1038 object array contains pd.NA.

1039 dtype_backend : str, default "numpy_nullable"

1040 Nullable dtype implementation to use.

1041

1042 * "numpy_nullable" returns numpy-backed nullable types

1043 * "pyarrow" returns pyarrow-backed nullable types using ``ArrowDtype``

1044

1045 Returns

1046 -------

1047 np.dtype, or ExtensionDtype

1048 """

1049 inferred_dtype: str | DtypeObj

1050

1051 from pandas.core.arrays.arrow.dtype import ArrowDtype

1052

1053 if (

1054 convert_string or convert_integer or convert_boolean or convert_floating

1055 ) and isinstance(input_array, np.ndarray):

1056 if is_object_dtype(input_array.dtype):

1057 inferred_dtype = lib.infer_dtype(input_array)

1058 else:

1059 inferred_dtype = input_array.dtype

1060

1061 if is_string_dtype(inferred_dtype):

1062 if not convert_string or inferred_dtype == "bytes":

1063 inferred_dtype = input_array.dtype

1064 else:

1065 inferred_dtype = pandas_dtype_func("string")

1066

1067 if convert_integer:

1068 target_int_dtype = pandas_dtype_func("Int64")

1069

1070 if is_integer_dtype(input_array.dtype):

1071 from pandas.core.arrays.integer import INT_STR_TO_DTYPE

1072

1073 inferred_dtype = INT_STR_TO_DTYPE.get(

1074 input_array.dtype.name, target_int_dtype

1075 )

1076 elif is_numeric_dtype(input_array.dtype):

1077 # TODO: de-dup with maybe_cast_to_integer_array?

1078 arr = input_array[notna(input_array)]

1079 if (arr.astype(int) == arr).all():

1080 inferred_dtype = target_int_dtype

1081 else:

1082 inferred_dtype = input_array.dtype

1083 elif (

1084 infer_objects

1085 and is_object_dtype(input_array.dtype)

1086 and (isinstance(inferred_dtype, str) and inferred_dtype == "integer")

1087 ):

1088 inferred_dtype = target_int_dtype

1089

1090 if convert_floating:

1091 if not is_integer_dtype(input_array.dtype) and is_numeric_dtype(

1092 input_array.dtype

1093 ):

1094 from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE

1095

1096 inferred_float_dtype: DtypeObj = FLOAT_STR_TO_DTYPE.get(

1097 input_array.dtype.name, pandas_dtype_func("Float64")

1098 )

1099 # if we could also convert to integer, check if all floats

1100 # are actually integers

1101 if convert_integer:

1102 # TODO: de-dup with maybe_cast_to_integer_array?

1103 arr = input_array[notna(input_array)]

1104 if (arr.astype(int) == arr).all():

1105 inferred_dtype = pandas_dtype_func("Int64")

1106 else:

1107 inferred_dtype = inferred_float_dtype

1108 else:

1109 inferred_dtype = inferred_float_dtype

1110 elif (

1111 infer_objects

1112 and is_object_dtype(input_array.dtype)

1113 and (

1114 isinstance(inferred_dtype, str)

1115 and inferred_dtype == "mixed-integer-float"

1116 )

1117 ):

1118 inferred_dtype = pandas_dtype_func("Float64")

1119

1120 if convert_boolean:

1121 if is_bool_dtype(input_array.dtype):

1122 inferred_dtype = pandas_dtype_func("boolean")

1123 elif isinstance(inferred_dtype, str) and inferred_dtype == "boolean":

1124 inferred_dtype = pandas_dtype_func("boolean")

1125

1126 if isinstance(inferred_dtype, str):

1127 # If we couldn't do anything else, then we retain the dtype

1128 inferred_dtype = input_array.dtype

1129

1130 else:

1131 inferred_dtype = input_array.dtype

1132

1133 if dtype_backend == "pyarrow":

1134 from pandas.core.arrays.arrow.array import to_pyarrow_type

1135 from pandas.core.arrays.string_ import StringDtype

1136

1137 assert not isinstance(inferred_dtype, str)

1138

1139 if (

1140 (convert_integer and inferred_dtype.kind in "iu")

1141 or (convert_floating and inferred_dtype.kind in "fc")

1142 or (convert_boolean and inferred_dtype.kind == "b")

1143 or (convert_string and isinstance(inferred_dtype, StringDtype))

1144 or (

1145 inferred_dtype.kind not in "iufcb"

1146 and not isinstance(inferred_dtype, StringDtype)

1147 )

1148 ):

1149 if isinstance(inferred_dtype, PandasExtensionDtype) and not isinstance(

1150 inferred_dtype, DatetimeTZDtype

1151 ):

1152 base_dtype = inferred_dtype.base

1153 elif isinstance(inferred_dtype, (BaseMaskedDtype, ArrowDtype)):

1154 base_dtype = inferred_dtype.numpy_dtype

1155 elif isinstance(inferred_dtype, StringDtype):

1156 base_dtype = np.dtype(str)

1157 else:

1158 base_dtype = inferred_dtype

1159 pa_type = to_pyarrow_type(base_dtype)

1160 if pa_type is not None:

1161 inferred_dtype = ArrowDtype(pa_type)

1162 elif dtype_backend == "numpy_nullable" and isinstance(inferred_dtype, ArrowDtype):

1163 # GH 53648

1164 inferred_dtype = _arrow_dtype_mapping()[inferred_dtype.pyarrow_dtype]

1165

1166 # error: Incompatible return value type (got "Union[str, Union[dtype[Any],

1167 # ExtensionDtype]]", expected "Union[dtype[Any], ExtensionDtype]")

1168 return inferred_dtype # type: ignore[return-value]

1169

1170

1171def maybe_infer_to_datetimelike(

1172 value: npt.NDArray[np.object_],

1173) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray | IntervalArray:

1174 """

1175 we might have a array (or single object) that is datetime like,

1176 and no dtype is passed don't change the value unless we find a

1177 datetime/timedelta set

1178

1179 this is pretty strict in that a datetime/timedelta is REQUIRED

1180 in addition to possible nulls/string likes

1181

1182 Parameters

1183 ----------

1184 value : np.ndarray[object]

1185

1186 Returns

1187 -------

1188 np.ndarray, DatetimeArray, TimedeltaArray, PeriodArray, or IntervalArray

1189

1190 """

1191 if not isinstance(value, np.ndarray) or value.dtype != object:

1192 # Caller is responsible for passing only ndarray[object]

1193 raise TypeError(type(value)) # pragma: no cover

1194 if value.ndim != 1:

1195 # Caller is responsible

1196 raise ValueError(value.ndim) # pragma: no cover

1197

1198 if not len(value):

1199 return value

1200

1201 # error: Incompatible return value type (got "Union[ExtensionArray,

1202 # ndarray[Any, Any]]", expected "Union[ndarray[Any, Any], DatetimeArray,

1203 # TimedeltaArray, PeriodArray, IntervalArray]")

1204 return lib.maybe_convert_objects( # type: ignore[return-value]

1205 value,

1206 # Here we do not convert numeric dtypes, as if we wanted that,

1207 # numpy would have done it for us.

1208 convert_numeric=False,

1209 convert_period=True,

1210 convert_interval=True,

1211 convert_timedelta=True,

1212 convert_datetime=True,

1213 dtype_if_all_nat=np.dtype("M8[ns]"),

1214 )

1215

1216

1217def maybe_cast_to_datetime(

1218 value: np.ndarray | list, dtype: np.dtype

1219) -> ExtensionArray | np.ndarray:

1220 """

1221 try to cast the array/value to a datetimelike dtype, converting float

1222 nan to iNaT

1223

1224 Caller is responsible for handling ExtensionDtype cases and non dt64/td64

1225 cases.

1226 """

1227 from pandas.core.arrays.datetimes import DatetimeArray

1228 from pandas.core.arrays.timedeltas import TimedeltaArray

1229

1230 assert dtype.kind in ["m", "M"]

1231 if not is_list_like(value):

1232 raise TypeError("value must be listlike")

1233

1234 # TODO: _from_sequence would raise ValueError in cases where

1235 # _ensure_nanosecond_dtype raises TypeError

1236 _ensure_nanosecond_dtype(dtype)

1237

1238 if is_timedelta64_dtype(dtype):

1239 res = TimedeltaArray._from_sequence(value, dtype=dtype)

1240 return res

1241 else:

1242 try:

1243 dta = DatetimeArray._from_sequence(value, dtype=dtype)

1244 except ValueError as err:

1245 # We can give a Series-specific exception message.

1246 if "cannot supply both a tz and a timezone-naive dtype" in str(err):

1247 raise ValueError(

1248 "Cannot convert timezone-aware data to "

1249 "timezone-naive dtype. Use "

1250 "pd.Series(values).dt.tz_localize(None) instead."

1251 ) from err

1252 raise

1253

1254 return dta

1255

1256

1257def _ensure_nanosecond_dtype(dtype: DtypeObj) -> None:

1258 """

1259 Convert dtypes with granularity less than nanosecond to nanosecond

1260

1261 >>> _ensure_nanosecond_dtype(np.dtype("M8[us]"))

1262

1263 >>> _ensure_nanosecond_dtype(np.dtype("M8[D]"))

1264 Traceback (most recent call last):

1265 ...

1266 TypeError: dtype=datetime64[D] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns'

1267

1268 >>> _ensure_nanosecond_dtype(np.dtype("m8[ps]"))

1269 Traceback (most recent call last):

1270 ...

1271 TypeError: dtype=timedelta64[ps] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns'

1272 """ # noqa:E501

1273 msg = (

1274 f"The '{dtype.name}' dtype has no unit. "

1275 f"Please pass in '{dtype.name}[ns]' instead."

1276 )

1277

1278 # unpack e.g. SparseDtype

1279 dtype = getattr(dtype, "subtype", dtype)

1280

1281 if not isinstance(dtype, np.dtype):

1282 # i.e. datetime64tz

1283 pass

1284

1285 elif dtype.kind in ["m", "M"]:

1286 reso = get_unit_from_dtype(dtype)

1287 if not is_supported_unit(reso):

1288 # pre-2.0 we would silently swap in nanos for lower-resolutions,

1289 # raise for above-nano resolutions

1290 if dtype.name in ["datetime64", "timedelta64"]:

1291 raise ValueError(msg)

1292 # TODO: ValueError or TypeError? existing test

1293 # test_constructor_generic_timestamp_bad_frequency expects TypeError

1294 raise TypeError(

1295 f"dtype={dtype} is not supported. Supported resolutions are 's', "

1296 "'ms', 'us', and 'ns'"

1297 )

1298

1299

1300# TODO: other value-dependent functions to standardize here include

1301# Index._find_common_type_compat

1302def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:

1303 """

1304 Find the type/dtype for a the result of an operation between these objects.

1305

1306 This is similar to find_common_type, but looks at the objects instead

1307 of just their dtypes. This can be useful in particular when one of the

1308 objects does not have a `dtype`.

1309

1310 Parameters

1311 ----------

1312 left : np.ndarray or ExtensionArray

1313 right : Any

1314

1315 Returns

1316 -------

1317 np.dtype or ExtensionDtype

1318

1319 See also

1320 --------

1321 find_common_type

1322 numpy.result_type

1323 """

1324 new_dtype: DtypeObj

1325

1326 if (

1327 isinstance(left, np.ndarray)

1328 and left.dtype.kind in ["i", "u", "c"]

1329 and (lib.is_integer(right) or lib.is_float(right))

1330 ):

1331 # e.g. with int8 dtype and right=512, we want to end up with

1332 # np.int16, whereas infer_dtype_from(512) gives np.int64,

1333 # which will make us upcast too far.

1334 if lib.is_float(right) and right.is_integer() and left.dtype.kind != "f":

1335 right = int(right)

1336

1337 new_dtype = np.result_type(left, right)

1338

1339 elif is_valid_na_for_dtype(right, left.dtype):

1340 # e.g. IntervalDtype[int] and None/np.nan

1341 new_dtype = ensure_dtype_can_hold_na(left.dtype)

1342

1343 else:

1344 dtype, _ = infer_dtype_from(right, pandas_dtype=True)

1345

1346 new_dtype = find_common_type([left.dtype, dtype])

1347

1348 return new_dtype

1349

1350

1351def common_dtype_categorical_compat(

1352 objs: list[Index | ArrayLike], dtype: DtypeObj

1353) -> DtypeObj:

1354 """

1355 Update the result of find_common_type to account for NAs in a Categorical.

1356

1357 Parameters

1358 ----------

1359 objs : list[np.ndarray | ExtensionArray | Index]

1360 dtype : np.dtype or ExtensionDtype

1361

1362 Returns

1363 -------

1364 np.dtype or ExtensionDtype

1365 """

1366 # GH#38240

1367

1368 # TODO: more generally, could do `not can_hold_na(dtype)`

1369 if isinstance(dtype, np.dtype) and dtype.kind in ["i", "u"]:

1370 for obj in objs:

1371 # We don't want to accientally allow e.g. "categorical" str here

1372 obj_dtype = getattr(obj, "dtype", None)

1373 if isinstance(obj_dtype, CategoricalDtype):

1374 if isinstance(obj, ABCIndex):

1375 # This check may already be cached

1376 hasnas = obj.hasnans

1377 else:

1378 # Categorical

1379 hasnas = cast("Categorical", obj)._hasna

1380

1381 if hasnas:

1382 # see test_union_int_categorical_with_nan

1383 dtype = np.dtype(np.float64)

1384 break

1385 return dtype

1386

1387

1388def np_find_common_type(*dtypes: np.dtype) -> np.dtype:

1389 """

1390 np.find_common_type implementation pre-1.25 deprecation using np.result_type

1391 https://github.com/pandas-dev/pandas/pull/49569#issuecomment-1308300065

1392

1393 Parameters

1394 ----------

1395 dtypes : np.dtypes

1396

1397 Returns

1398 -------

1399 np.dtype

1400 """

1401 try:

1402 common_dtype = np.result_type(*dtypes)

1403 if common_dtype.kind in "mMSU":

1404 # NumPy promotion currently (1.25) misbehaves for for times and strings,

1405 # so fall back to object (find_common_dtype did unless there

1406 # was only one dtype)

1407 common_dtype = np.dtype("O")

1408

1409 except TypeError:

1410 common_dtype = np.dtype("O")

1411 return common_dtype

1412

1413

1414@overload

1415def find_common_type(types: list[np.dtype]) -> np.dtype:

1416 ...

1417

1418

1419@overload

1420def find_common_type(types: list[ExtensionDtype]) -> DtypeObj:

1421 ...

1422

1423

1424@overload

1425def find_common_type(types: list[DtypeObj]) -> DtypeObj:

1426 ...

1427

1428

1429def find_common_type(types):

1430 """

1431 Find a common data type among the given dtypes.

1432

1433 Parameters

1434 ----------

1435 types : list of dtypes

1436

1437 Returns

1438 -------

1439 pandas extension or numpy dtype

1440

1441 See Also

1442 --------

1443 numpy.find_common_type

1444

1445 """

1446 if not types:

1447 raise ValueError("no types given")

1448

1449 first = types[0]

1450

1451 # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2)

1452 # => object

1453 if lib.dtypes_all_equal(list(types)):

1454 return first

1455

1456 # get unique types (dict.fromkeys is used as order-preserving set())

1457 types = list(dict.fromkeys(types).keys())

1458

1459 if any(isinstance(t, ExtensionDtype) for t in types):

1460 for t in types:

1461 if isinstance(t, ExtensionDtype):

1462 res = t._get_common_dtype(types)

1463 if res is not None:

1464 return res

1465 return np.dtype("object")

1466

1467 # take lowest unit

1468 if all(is_datetime64_dtype(t) for t in types):

1469 return np.dtype(max(types))

1470 if all(is_timedelta64_dtype(t) for t in types):

1471 return np.dtype(max(types))

1472

1473 # don't mix bool / int or float or complex

1474 # this is different from numpy, which casts bool with float/int as int

1475 has_bools = any(is_bool_dtype(t) for t in types)

1476 if has_bools:

1477 for t in types:

1478 if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t):

1479 return np.dtype("object")

1480

1481 return np_find_common_type(*types)

1482

1483

1484def construct_2d_arraylike_from_scalar(

1485 value: Scalar, length: int, width: int, dtype: np.dtype, copy: bool

1486) -> np.ndarray:

1487 shape = (length, width)

1488

1489 if dtype.kind in ["m", "M"]:

1490 value = _maybe_box_and_unbox_datetimelike(value, dtype)

1491 elif dtype == _dtype_obj:

1492 if isinstance(value, (np.timedelta64, np.datetime64)):

1493 # calling np.array below would cast to pytimedelta/pydatetime

1494 out = np.empty(shape, dtype=object)

1495 out.fill(value)

1496 return out

1497

1498 # Attempt to coerce to a numpy array

1499 try:

1500 arr = np.array(value, dtype=dtype, copy=copy)

1501 except (ValueError, TypeError) as err:

1502 raise TypeError(

1503 f"DataFrame constructor called with incompatible data and dtype: {err}"

1504 ) from err

1505

1506 if arr.ndim != 0:

1507 raise ValueError("DataFrame constructor not properly called!")

1508

1509 return np.full(shape, arr)

1510

1511

1512def construct_1d_arraylike_from_scalar(

1513 value: Scalar, length: int, dtype: DtypeObj | None

1514) -> ArrayLike:

1515 """

1516 create a np.ndarray / pandas type of specified shape and dtype

1517 filled with values

1518

1519 Parameters

1520 ----------

1521 value : scalar value

1522 length : int

1523 dtype : pandas_dtype or np.dtype

1524

1525 Returns

1526 -------

1527 np.ndarray / pandas type of length, filled with value

1528

1529 """

1530

1531 if dtype is None:

1532 try:

1533 dtype, value = infer_dtype_from_scalar(value, pandas_dtype=True)

1534 except OutOfBoundsDatetime:

1535 dtype = _dtype_obj

1536

1537 if isinstance(dtype, ExtensionDtype):

1538 cls = dtype.construct_array_type()

1539 seq = [] if length == 0 else [value]

1540 subarr = cls._from_sequence(seq, dtype=dtype).repeat(length)

1541

1542 else:

1543 if length and is_integer_dtype(dtype) and isna(value):

1544 # coerce if we have nan for an integer dtype

1545 dtype = np.dtype("float64")

1546 elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"):

1547 # we need to coerce to object dtype to avoid

1548 # to allow numpy to take our string as a scalar value

1549 dtype = np.dtype("object")

1550 if not isna(value):

1551 value = ensure_str(value)

1552 elif dtype.kind in ["M", "m"]:

1553 value = _maybe_box_and_unbox_datetimelike(value, dtype)

1554

1555 subarr = np.empty(length, dtype=dtype)

1556 if length:

1557 # GH 47391: numpy > 1.24 will raise filling np.nan into int dtypes

1558 subarr.fill(value)

1559

1560 return subarr

1561

1562

1563def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj):

1564 # Caller is responsible for checking dtype.kind in ["m", "M"]

1565

1566 if isinstance(value, dt.datetime):

1567 # we dont want to box dt64, in particular datetime64("NaT")

1568 value = maybe_box_datetimelike(value, dtype)

1569

1570 return _maybe_unbox_datetimelike(value, dtype)

1571

1572

1573def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:

1574 """

1575 Transform any list-like object in a 1-dimensional numpy array of object

1576 dtype.

1577

1578 Parameters

1579 ----------

1580 values : any iterable which has a len()

1581

1582 Raises

1583 ------

1584 TypeError

1585 * If `values` does not have a len()

1586

1587 Returns

1588 -------

1589 1-dimensional numpy array of dtype object

1590 """

1591 # numpy will try to interpret nested lists as further dimensions, hence

1592 # making a 1D array that contains list-likes is a bit tricky:

1593 result = np.empty(len(values), dtype="object")

1594 result[:] = values

1595 return result

1596

1597

1598def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.ndarray:

1599 """

1600 Takes any dtype and returns the casted version, raising for when data is

1601 incompatible with integer/unsigned integer dtypes.

1602

1603 Parameters

1604 ----------

1605 arr : np.ndarray or list

1606 The array to cast.

1607 dtype : np.dtype

1608 The integer dtype to cast the array to.

1609

1610 Returns

1611 -------

1612 ndarray

1613 Array of integer or unsigned integer dtype.

1614

1615 Raises

1616 ------

1617 OverflowError : the dtype is incompatible with the data

1618 ValueError : loss of precision has occurred during casting

1619

1620 Examples

1621 --------

1622 If you try to coerce negative values to unsigned integers, it raises:

1623

1624 >>> pd.Series([-1], dtype="uint64")

1625 Traceback (most recent call last):

1626 ...

1627 OverflowError: Trying to coerce negative values to unsigned integers

1628

1629 Also, if you try to coerce float values to integers, it raises:

1630

1631 >>> maybe_cast_to_integer_array([1, 2, 3.5], dtype=np.dtype("int64"))

1632 Traceback (most recent call last):

1633 ...

1634 ValueError: Trying to coerce float values to integers

1635 """

1636 assert is_integer_dtype(dtype)

1637

1638 try:

1639 if not isinstance(arr, np.ndarray):

1640 with warnings.catch_warnings():

1641 # We already disallow dtype=uint w/ negative numbers

1642 # (test_constructor_coercion_signed_to_unsigned) so safe to ignore.

1643 warnings.filterwarnings(

1644 "ignore",

1645 "NumPy will stop allowing conversion of out-of-bound Python int",

1646 DeprecationWarning,

1647 )

1648 casted = np.array(arr, dtype=dtype, copy=False)

1649 else:

1650 with warnings.catch_warnings():

1651 warnings.filterwarnings("ignore", category=RuntimeWarning)

1652 casted = arr.astype(dtype, copy=False)

1653 except OverflowError as err:

1654 raise OverflowError(

1655 "The elements provided in the data cannot all be "

1656 f"casted to the dtype {dtype}"

1657 ) from err

1658

1659 if isinstance(arr, np.ndarray) and arr.dtype == dtype:

1660 # avoid expensive array_equal check

1661 return casted

1662

1663 with warnings.catch_warnings():

1664 warnings.filterwarnings("ignore", category=RuntimeWarning)

1665 if np.array_equal(arr, casted):

1666 return casted

1667

1668 # We do this casting to allow for proper

1669 # data and dtype checking.

1670 #

1671 # We didn't do this earlier because NumPy

1672 # doesn't handle `uint64` correctly.

1673 arr = np.asarray(arr)

1674

1675 if np.issubdtype(arr.dtype, str):

1676 if (casted.astype(str) == arr).all():

1677 return casted

1678 raise ValueError(f"string values cannot be losslessly cast to {dtype}")

1679

1680 if is_unsigned_integer_dtype(dtype) and (arr < 0).any():

1681 raise OverflowError("Trying to coerce negative values to unsigned integers")

1682

1683 if is_float_dtype(arr.dtype):

1684 if not np.isfinite(arr).all():

1685 raise IntCastingNaNError(

1686 "Cannot convert non-finite values (NA or inf) to integer"

1687 )

1688 raise ValueError("Trying to coerce float values to integers")

1689 if is_object_dtype(arr.dtype):

1690 raise ValueError("Trying to coerce float values to integers")

1691

1692 if casted.dtype < arr.dtype:

1693 # GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows

1694 raise ValueError(

1695 f"Values are too large to be losslessly converted to {dtype}. "

1696 f"To cast anyway, use pd.Series(values).astype({dtype})"

1697 )

1698

1699 if arr.dtype.kind in ["m", "M"]:

1700 # test_constructor_maskedarray_nonfloat

1701 raise TypeError(

1702 f"Constructing a Series or DataFrame from {arr.dtype} values and "

1703 f"dtype={dtype} is not supported. Use values.view({dtype}) instead."

1704 )

1705

1706 # No known cases that get here, but raising explicitly to cover our bases.

1707 raise ValueError(f"values cannot be losslessly cast to {dtype}")

1708

1709

1710def can_hold_element(arr: ArrayLike, element: Any) -> bool:

1711 """

1712 Can we do an inplace setitem with this element in an array with this dtype?

1713

1714 Parameters

1715 ----------

1716 arr : np.ndarray or ExtensionArray

1717 element : Any

1718

1719 Returns

1720 -------

1721 bool

1722 """

1723 dtype = arr.dtype

1724 if not isinstance(dtype, np.dtype) or dtype.kind in ["m", "M"]:

1725 if isinstance(dtype, (PeriodDtype, IntervalDtype, DatetimeTZDtype, np.dtype)):

1726 # np.dtype here catches datetime64ns and timedelta64ns; we assume

1727 # in this case that we have DatetimeArray/TimedeltaArray

1728 arr = cast(

1729 "PeriodArray | DatetimeArray | TimedeltaArray | IntervalArray", arr

1730 )

1731 try:

1732 arr._validate_setitem_value(element)

1733 return True

1734 except (ValueError, TypeError):

1735 # TODO: re-use _catch_deprecated_value_error to ensure we are

1736 # strict about what exceptions we allow through here.

1737 return False

1738

1739 # This is technically incorrect, but maintains the behavior of

1740 # ExtensionBlock._can_hold_element

1741 return True

1742

1743 try:

1744 np_can_hold_element(dtype, element)

1745 return True

1746 except (TypeError, LossySetitemError):

1747 return False

1748

1749

1750def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:

1751 """

1752 Raise if we cannot losslessly set this element into an ndarray with this dtype.

1753

1754 Specifically about places where we disagree with numpy. i.e. there are

1755 cases where numpy will raise in doing the setitem that we do not check

1756 for here, e.g. setting str "X" into a numeric ndarray.

1757

1758 Returns

1759 -------

1760 Any

1761 The element, potentially cast to the dtype.

1762

1763 Raises

1764 ------

1765 ValueError : If we cannot losslessly store this element with this dtype.

1766 """

1767 if dtype == _dtype_obj:

1768 return element

1769

1770 tipo = _maybe_infer_dtype_type(element)

1771

1772 if dtype.kind in ["i", "u"]:

1773 if isinstance(element, range):

1774 if _dtype_can_hold_range(element, dtype):

1775 return element

1776 raise LossySetitemError

1777

1778 if is_integer(element) or (is_float(element) and element.is_integer()):

1779 # e.g. test_setitem_series_int8 if we have a python int 1

1780 # tipo may be np.int32, despite the fact that it will fit

1781 # in smaller int dtypes.

1782 info = np.iinfo(dtype)

1783 if info.min <= element <= info.max:

1784 return dtype.type(element)

1785 raise LossySetitemError

1786

1787 if tipo is not None:

1788 if tipo.kind not in ["i", "u"]:

1789 if isinstance(element, np.ndarray) and element.dtype.kind == "f":

1790 # If all can be losslessly cast to integers, then we can hold them

1791 with np.errstate(invalid="ignore"):

1792 # We check afterwards if cast was losslessly, so no need to show

1793 # the warning

1794 casted = element.astype(dtype)

1795 comp = casted == element

1796 if comp.all():

1797 # Return the casted values bc they can be passed to

1798 # np.putmask, whereas the raw values cannot.

1799 # see TestSetitemFloatNDarrayIntoIntegerSeries

1800 return casted

1801 raise LossySetitemError

1802

1803 # Anything other than integer we cannot hold

1804 raise LossySetitemError

1805 if (

1806 dtype.kind == "u"

1807 and isinstance(element, np.ndarray)

1808 and element.dtype.kind == "i"

1809 ):

1810 # see test_where_uint64

1811 casted = element.astype(dtype)

1812 if (casted == element).all():

1813 # TODO: faster to check (element >=0).all()? potential

1814 # itemsize issues there?

1815 return casted

1816 raise LossySetitemError

1817 if dtype.itemsize < tipo.itemsize:

1818 raise LossySetitemError

1819 if not isinstance(tipo, np.dtype):

1820 # i.e. nullable IntegerDtype; we can put this into an ndarray

1821 # losslessly iff it has no NAs

1822 if element._hasna:

1823 raise LossySetitemError

1824 return element

1825

1826 return element

1827

1828 raise LossySetitemError

1829

1830 if dtype.kind == "f":

1831 if lib.is_integer(element) or lib.is_float(element):

1832 casted = dtype.type(element)

1833 if np.isnan(casted) or casted == element:

1834 return casted

1835 # otherwise e.g. overflow see TestCoercionFloat32

1836 raise LossySetitemError

1837

1838 if tipo is not None:

1839 # TODO: itemsize check?

1840 if tipo.kind not in ["f", "i", "u"]:

1841 # Anything other than float/integer we cannot hold

1842 raise LossySetitemError

1843 if not isinstance(tipo, np.dtype):

1844 # i.e. nullable IntegerDtype or FloatingDtype;

1845 # we can put this into an ndarray losslessly iff it has no NAs

1846 if element._hasna:

1847 raise LossySetitemError

1848 return element

1849 elif tipo.itemsize > dtype.itemsize or tipo.kind != dtype.kind:

1850 if isinstance(element, np.ndarray):

1851 # e.g. TestDataFrameIndexingWhere::test_where_alignment

1852 casted = element.astype(dtype)

1853 if np.array_equal(casted, element, equal_nan=True):

1854 return casted

1855 raise LossySetitemError

1856

1857 return element

1858

1859 raise LossySetitemError

1860

1861 if dtype.kind == "c":

1862 if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element):

1863 if np.isnan(element):

1864 # see test_where_complex GH#6345

1865 return dtype.type(element)

1866

1867 with warnings.catch_warnings():

1868 warnings.filterwarnings("ignore")

1869 casted = dtype.type(element)

1870 if casted == element:

1871 return casted

1872 # otherwise e.g. overflow see test_32878_complex_itemsize

1873 raise LossySetitemError

1874

1875 if tipo is not None:

1876 if tipo.kind in ["c", "f", "i", "u"]:

1877 return element

1878 raise LossySetitemError

1879 raise LossySetitemError

1880

1881 if dtype.kind == "b":

1882 if tipo is not None:

1883 if tipo.kind == "b":

1884 if not isinstance(tipo, np.dtype):

1885 # i.e. we have a BooleanArray

1886 if element._hasna:

1887 # i.e. there are pd.NA elements

1888 raise LossySetitemError

1889 return element

1890 raise LossySetitemError

1891 if lib.is_bool(element):

1892 return element

1893 raise LossySetitemError

1894

1895 if dtype.kind == "S":

1896 # TODO: test tests.frame.methods.test_replace tests get here,

1897 # need more targeted tests. xref phofl has a PR about this

1898 if tipo is not None:

1899 if tipo.kind == "S" and tipo.itemsize <= dtype.itemsize:

1900 return element

1901 raise LossySetitemError

1902 if isinstance(element, bytes) and len(element) <= dtype.itemsize:

1903 return element

1904 raise LossySetitemError

1905

1906 if dtype.kind == "V":

1907 # i.e. np.void, which cannot hold _anything_

1908 raise LossySetitemError

1909

1910 raise NotImplementedError(dtype)

1911

1912

1913def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool:

1914 """

1915 _maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints),

1916 but in many cases a range can be held by a smaller integer dtype.

1917 Check if this is one of those cases.

1918 """

1919 if not len(rng):

1920 return True

1921 return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype)