Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/dtypes/cast.py: 32%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

790 statements  

1""" 

2Routines for casting. 

3""" 

4 

5from __future__ import annotations 

6 

7import datetime as dt 

8import functools 

9from typing import ( 

10 TYPE_CHECKING, 

11 Any, 

12 Literal, 

13 TypeVar, 

14 cast, 

15 overload, 

16) 

17import warnings 

18 

19import numpy as np 

20 

21from pandas._config import using_pyarrow_string_dtype 

22 

23from pandas._libs import ( 

24 Interval, 

25 Period, 

26 lib, 

27) 

28from pandas._libs.missing import ( 

29 NA, 

30 NAType, 

31 checknull, 

32) 

33from pandas._libs.tslibs import ( 

34 NaT, 

35 OutOfBoundsDatetime, 

36 OutOfBoundsTimedelta, 

37 Timedelta, 

38 Timestamp, 

39 is_supported_dtype, 

40) 

41from pandas._libs.tslibs.timedeltas import array_to_timedelta64 

42from pandas.errors import ( 

43 IntCastingNaNError, 

44 LossySetitemError, 

45) 

46 

47from pandas.core.dtypes.common import ( 

48 ensure_int8, 

49 ensure_int16, 

50 ensure_int32, 

51 ensure_int64, 

52 ensure_object, 

53 ensure_str, 

54 is_bool, 

55 is_complex, 

56 is_float, 

57 is_integer, 

58 is_object_dtype, 

59 is_scalar, 

60 is_string_dtype, 

61 pandas_dtype as pandas_dtype_func, 

62) 

63from pandas.core.dtypes.dtypes import ( 

64 ArrowDtype, 

65 BaseMaskedDtype, 

66 CategoricalDtype, 

67 DatetimeTZDtype, 

68 ExtensionDtype, 

69 IntervalDtype, 

70 PandasExtensionDtype, 

71 PeriodDtype, 

72) 

73from pandas.core.dtypes.generic import ( 

74 ABCExtensionArray, 

75 ABCIndex, 

76 ABCSeries, 

77) 

78from pandas.core.dtypes.inference import is_list_like 

79from pandas.core.dtypes.missing import ( 

80 is_valid_na_for_dtype, 

81 isna, 

82 na_value_for_dtype, 

83 notna, 

84) 

85 

86from pandas.io._util import _arrow_dtype_mapping 

87 

88if TYPE_CHECKING: 

89 from collections.abc import ( 

90 Sequence, 

91 Sized, 

92 ) 

93 

94 from pandas._typing import ( 

95 ArrayLike, 

96 Dtype, 

97 DtypeObj, 

98 NumpyIndexT, 

99 Scalar, 

100 npt, 

101 ) 

102 

103 from pandas import Index 

104 from pandas.core.arrays import ( 

105 Categorical, 

106 DatetimeArray, 

107 ExtensionArray, 

108 IntervalArray, 

109 PeriodArray, 

110 TimedeltaArray, 

111 ) 

112 

113 

114_int8_max = np.iinfo(np.int8).max 

115_int16_max = np.iinfo(np.int16).max 

116_int32_max = np.iinfo(np.int32).max 

117 

118_dtype_obj = np.dtype(object) 

119 

120NumpyArrayT = TypeVar("NumpyArrayT", bound=np.ndarray) 

121 

122 

123def maybe_convert_platform( 

124 values: list | tuple | range | np.ndarray | ExtensionArray, 

125) -> ArrayLike: 

126 """try to do platform conversion, allow ndarray or list here""" 

127 arr: ArrayLike 

128 

129 if isinstance(values, (list, tuple, range)): 

130 arr = construct_1d_object_array_from_listlike(values) 

131 else: 

132 # The caller is responsible for ensuring that we have np.ndarray 

133 # or ExtensionArray here. 

134 arr = values 

135 

136 if arr.dtype == _dtype_obj: 

137 arr = cast(np.ndarray, arr) 

138 arr = lib.maybe_convert_objects(arr) 

139 

140 return arr 

141 

142 

143def is_nested_object(obj) -> bool: 

144 """ 

145 return a boolean if we have a nested object, e.g. a Series with 1 or 

146 more Series elements 

147 

148 This may not be necessarily be performant. 

149 

150 """ 

151 return bool( 

152 isinstance(obj, ABCSeries) 

153 and is_object_dtype(obj.dtype) 

154 and any(isinstance(v, ABCSeries) for v in obj._values) 

155 ) 

156 

157 

158def maybe_box_datetimelike(value: Scalar, dtype: Dtype | None = None) -> Scalar: 

159 """ 

160 Cast scalar to Timestamp or Timedelta if scalar is datetime-like 

161 and dtype is not object. 

162 

163 Parameters 

164 ---------- 

165 value : scalar 

166 dtype : Dtype, optional 

167 

168 Returns 

169 ------- 

170 scalar 

171 """ 

172 if dtype == _dtype_obj: 

173 pass 

174 elif isinstance(value, (np.datetime64, dt.datetime)): 

175 value = Timestamp(value) 

176 elif isinstance(value, (np.timedelta64, dt.timedelta)): 

177 value = Timedelta(value) 

178 

179 return value 

180 

181 

182def maybe_box_native(value: Scalar | None | NAType) -> Scalar | None | NAType: 

183 """ 

184 If passed a scalar cast the scalar to a python native type. 

185 

186 Parameters 

187 ---------- 

188 value : scalar or Series 

189 

190 Returns 

191 ------- 

192 scalar or Series 

193 """ 

194 if is_float(value): 

195 value = float(value) 

196 elif is_integer(value): 

197 value = int(value) 

198 elif is_bool(value): 

199 value = bool(value) 

200 elif isinstance(value, (np.datetime64, np.timedelta64)): 

201 value = maybe_box_datetimelike(value) 

202 elif value is NA: 

203 value = None 

204 return value 

205 

206 

207def _maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: 

208 """ 

209 Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting 

210 into a numpy array. Failing to unbox would risk dropping nanoseconds. 

211 

212 Notes 

213 ----- 

214 Caller is responsible for checking dtype.kind in "mM" 

215 """ 

216 if is_valid_na_for_dtype(value, dtype): 

217 # GH#36541: can't fill array directly with pd.NaT 

218 # > np.empty(10, dtype="datetime64[ns]").fill(pd.NaT) 

219 # ValueError: cannot convert float NaN to integer 

220 value = dtype.type("NaT", "ns") 

221 elif isinstance(value, Timestamp): 

222 if value.tz is None: 

223 value = value.to_datetime64() 

224 elif not isinstance(dtype, DatetimeTZDtype): 

225 raise TypeError("Cannot unbox tzaware Timestamp to tznaive dtype") 

226 elif isinstance(value, Timedelta): 

227 value = value.to_timedelta64() 

228 

229 _disallow_mismatched_datetimelike(value, dtype) 

230 return value 

231 

232 

233def _disallow_mismatched_datetimelike(value, dtype: DtypeObj): 

234 """ 

235 numpy allows np.array(dt64values, dtype="timedelta64[ns]") and 

236 vice-versa, but we do not want to allow this, so we need to 

237 check explicitly 

238 """ 

239 vdtype = getattr(value, "dtype", None) 

240 if vdtype is None: 

241 return 

242 elif (vdtype.kind == "m" and dtype.kind == "M") or ( 

243 vdtype.kind == "M" and dtype.kind == "m" 

244 ): 

245 raise TypeError(f"Cannot cast {repr(value)} to {dtype}") 

246 

247 

248@overload 

249def maybe_downcast_to_dtype(result: np.ndarray, dtype: str | np.dtype) -> np.ndarray: 

250 ... 

251 

252 

253@overload 

254def maybe_downcast_to_dtype(result: ExtensionArray, dtype: str | np.dtype) -> ArrayLike: 

255 ... 

256 

257 

258def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLike: 

259 """ 

260 try to cast to the specified dtype (e.g. convert back to bool/int 

261 or could be an astype of float64->float32 

262 """ 

263 if isinstance(result, ABCSeries): 

264 result = result._values 

265 do_round = False 

266 

267 if isinstance(dtype, str): 

268 if dtype == "infer": 

269 inferred_type = lib.infer_dtype(result, skipna=False) 

270 if inferred_type == "boolean": 

271 dtype = "bool" 

272 elif inferred_type == "integer": 

273 dtype = "int64" 

274 elif inferred_type == "datetime64": 

275 dtype = "datetime64[ns]" 

276 elif inferred_type in ["timedelta", "timedelta64"]: 

277 dtype = "timedelta64[ns]" 

278 

279 # try to upcast here 

280 elif inferred_type == "floating": 

281 dtype = "int64" 

282 if issubclass(result.dtype.type, np.number): 

283 do_round = True 

284 

285 else: 

286 # TODO: complex? what if result is already non-object? 

287 dtype = "object" 

288 

289 dtype = np.dtype(dtype) 

290 

291 if not isinstance(dtype, np.dtype): 

292 # enforce our signature annotation 

293 raise TypeError(dtype) # pragma: no cover 

294 

295 converted = maybe_downcast_numeric(result, dtype, do_round) 

296 if converted is not result: 

297 return converted 

298 

299 # a datetimelike 

300 # GH12821, iNaT is cast to float 

301 if dtype.kind in "mM" and result.dtype.kind in "if": 

302 result = result.astype(dtype) 

303 

304 elif dtype.kind == "m" and result.dtype == _dtype_obj: 

305 # test_where_downcast_to_td64 

306 result = cast(np.ndarray, result) 

307 result = array_to_timedelta64(result) 

308 

309 elif dtype == np.dtype("M8[ns]") and result.dtype == _dtype_obj: 

310 result = cast(np.ndarray, result) 

311 return np.asarray(maybe_cast_to_datetime(result, dtype=dtype)) 

312 

313 return result 

314 

315 

316@overload 

317def maybe_downcast_numeric( 

318 result: np.ndarray, dtype: np.dtype, do_round: bool = False 

319) -> np.ndarray: 

320 ... 

321 

322 

323@overload 

324def maybe_downcast_numeric( 

325 result: ExtensionArray, dtype: DtypeObj, do_round: bool = False 

326) -> ArrayLike: 

327 ... 

328 

329 

330def maybe_downcast_numeric( 

331 result: ArrayLike, dtype: DtypeObj, do_round: bool = False 

332) -> ArrayLike: 

333 """ 

334 Subset of maybe_downcast_to_dtype restricted to numeric dtypes. 

335 

336 Parameters 

337 ---------- 

338 result : ndarray or ExtensionArray 

339 dtype : np.dtype or ExtensionDtype 

340 do_round : bool 

341 

342 Returns 

343 ------- 

344 ndarray or ExtensionArray 

345 """ 

346 if not isinstance(dtype, np.dtype) or not isinstance(result.dtype, np.dtype): 

347 # e.g. SparseDtype has no itemsize attr 

348 return result 

349 

350 def trans(x): 

351 if do_round: 

352 return x.round() 

353 return x 

354 

355 if dtype.kind == result.dtype.kind: 

356 # don't allow upcasts here (except if empty) 

357 if result.dtype.itemsize <= dtype.itemsize and result.size: 

358 return result 

359 

360 if dtype.kind in "biu": 

361 if not result.size: 

362 # if we don't have any elements, just astype it 

363 return trans(result).astype(dtype) 

364 

365 if isinstance(result, np.ndarray): 

366 element = result.item(0) 

367 else: 

368 element = result.iloc[0] 

369 if not isinstance(element, (np.integer, np.floating, int, float, bool)): 

370 # a comparable, e.g. a Decimal may slip in here 

371 return result 

372 

373 if ( 

374 issubclass(result.dtype.type, (np.object_, np.number)) 

375 and notna(result).all() 

376 ): 

377 new_result = trans(result).astype(dtype) 

378 if new_result.dtype.kind == "O" or result.dtype.kind == "O": 

379 # np.allclose may raise TypeError on object-dtype 

380 if (new_result == result).all(): 

381 return new_result 

382 else: 

383 if np.allclose(new_result, result, rtol=0): 

384 return new_result 

385 

386 elif ( 

387 issubclass(dtype.type, np.floating) 

388 and result.dtype.kind != "b" 

389 and not is_string_dtype(result.dtype) 

390 ): 

391 with warnings.catch_warnings(): 

392 warnings.filterwarnings( 

393 "ignore", "overflow encountered in cast", RuntimeWarning 

394 ) 

395 new_result = result.astype(dtype) 

396 

397 # Adjust tolerances based on floating point size 

398 size_tols = {4: 5e-4, 8: 5e-8, 16: 5e-16} 

399 

400 atol = size_tols.get(new_result.dtype.itemsize, 0.0) 

401 

402 # Check downcast float values are still equal within 7 digits when 

403 # converting from float64 to float32 

404 if np.allclose(new_result, result, equal_nan=True, rtol=0.0, atol=atol): 

405 return new_result 

406 

407 elif dtype.kind == result.dtype.kind == "c": 

408 new_result = result.astype(dtype) 

409 

410 if np.array_equal(new_result, result, equal_nan=True): 

411 # TODO: use tolerance like we do for float? 

412 return new_result 

413 

414 return result 

415 

416 

417def maybe_upcast_numeric_to_64bit(arr: NumpyIndexT) -> NumpyIndexT: 

418 """ 

419 If array is a int/uint/float bit size lower than 64 bit, upcast it to 64 bit. 

420 

421 Parameters 

422 ---------- 

423 arr : ndarray or ExtensionArray 

424 

425 Returns 

426 ------- 

427 ndarray or ExtensionArray 

428 """ 

429 dtype = arr.dtype 

430 if dtype.kind == "i" and dtype != np.int64: 

431 return arr.astype(np.int64) 

432 elif dtype.kind == "u" and dtype != np.uint64: 

433 return arr.astype(np.uint64) 

434 elif dtype.kind == "f" and dtype != np.float64: 

435 return arr.astype(np.float64) 

436 else: 

437 return arr 

438 

439 

440def maybe_cast_pointwise_result( 

441 result: ArrayLike, 

442 dtype: DtypeObj, 

443 numeric_only: bool = False, 

444 same_dtype: bool = True, 

445) -> ArrayLike: 

446 """ 

447 Try casting result of a pointwise operation back to the original dtype if 

448 appropriate. 

449 

450 Parameters 

451 ---------- 

452 result : array-like 

453 Result to cast. 

454 dtype : np.dtype or ExtensionDtype 

455 Input Series from which result was calculated. 

456 numeric_only : bool, default False 

457 Whether to cast only numerics or datetimes as well. 

458 same_dtype : bool, default True 

459 Specify dtype when calling _from_sequence 

460 

461 Returns 

462 ------- 

463 result : array-like 

464 result maybe casted to the dtype. 

465 """ 

466 

467 if isinstance(dtype, ExtensionDtype): 

468 cls = dtype.construct_array_type() 

469 if same_dtype: 

470 result = _maybe_cast_to_extension_array(cls, result, dtype=dtype) 

471 else: 

472 result = _maybe_cast_to_extension_array(cls, result) 

473 

474 elif (numeric_only and dtype.kind in "iufcb") or not numeric_only: 

475 result = maybe_downcast_to_dtype(result, dtype) 

476 

477 return result 

478 

479 

480def _maybe_cast_to_extension_array( 

481 cls: type[ExtensionArray], obj: ArrayLike, dtype: ExtensionDtype | None = None 

482) -> ArrayLike: 

483 """ 

484 Call to `_from_sequence` that returns the object unchanged on Exception. 

485 

486 Parameters 

487 ---------- 

488 cls : class, subclass of ExtensionArray 

489 obj : arraylike 

490 Values to pass to cls._from_sequence 

491 dtype : ExtensionDtype, optional 

492 

493 Returns 

494 ------- 

495 ExtensionArray or obj 

496 """ 

497 result: ArrayLike 

498 

499 if dtype is not None: 

500 try: 

501 result = cls._from_scalars(obj, dtype=dtype) 

502 except (TypeError, ValueError): 

503 return obj 

504 return result 

505 

506 try: 

507 result = cls._from_sequence(obj, dtype=dtype) 

508 except Exception: 

509 # We can't predict what downstream EA constructors may raise 

510 result = obj 

511 return result 

512 

513 

514@overload 

515def ensure_dtype_can_hold_na(dtype: np.dtype) -> np.dtype: 

516 ... 

517 

518 

519@overload 

520def ensure_dtype_can_hold_na(dtype: ExtensionDtype) -> ExtensionDtype: 

521 ... 

522 

523 

524def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj: 

525 """ 

526 If we have a dtype that cannot hold NA values, find the best match that can. 

527 """ 

528 if isinstance(dtype, ExtensionDtype): 

529 if dtype._can_hold_na: 

530 return dtype 

531 elif isinstance(dtype, IntervalDtype): 

532 # TODO(GH#45349): don't special-case IntervalDtype, allow 

533 # overriding instead of returning object below. 

534 return IntervalDtype(np.float64, closed=dtype.closed) 

535 return _dtype_obj 

536 elif dtype.kind == "b": 

537 return _dtype_obj 

538 elif dtype.kind in "iu": 

539 return np.dtype(np.float64) 

540 return dtype 

541 

542 

543_canonical_nans = { 

544 np.datetime64: np.datetime64("NaT", "ns"), 

545 np.timedelta64: np.timedelta64("NaT", "ns"), 

546 type(np.nan): np.nan, 

547} 

548 

549 

550def maybe_promote(dtype: np.dtype, fill_value=np.nan): 

551 """ 

552 Find the minimal dtype that can hold both the given dtype and fill_value. 

553 

554 Parameters 

555 ---------- 

556 dtype : np.dtype 

557 fill_value : scalar, default np.nan 

558 

559 Returns 

560 ------- 

561 dtype 

562 Upcasted from dtype argument if necessary. 

563 fill_value 

564 Upcasted from fill_value argument if necessary. 

565 

566 Raises 

567 ------ 

568 ValueError 

569 If fill_value is a non-scalar and dtype is not object. 

570 """ 

571 orig = fill_value 

572 orig_is_nat = False 

573 if checknull(fill_value): 

574 # https://github.com/pandas-dev/pandas/pull/39692#issuecomment-1441051740 

575 # avoid cache misses with NaN/NaT values that are not singletons 

576 if fill_value is not NA: 

577 try: 

578 orig_is_nat = np.isnat(fill_value) 

579 except TypeError: 

580 pass 

581 

582 fill_value = _canonical_nans.get(type(fill_value), fill_value) 

583 

584 # for performance, we are using a cached version of the actual implementation 

585 # of the function in _maybe_promote. However, this doesn't always work (in case 

586 # of non-hashable arguments), so we fallback to the actual implementation if needed 

587 try: 

588 # error: Argument 3 to "__call__" of "_lru_cache_wrapper" has incompatible type 

589 # "Type[Any]"; expected "Hashable" [arg-type] 

590 dtype, fill_value = _maybe_promote_cached( 

591 dtype, fill_value, type(fill_value) # type: ignore[arg-type] 

592 ) 

593 except TypeError: 

594 # if fill_value is not hashable (required for caching) 

595 dtype, fill_value = _maybe_promote(dtype, fill_value) 

596 

597 if (dtype == _dtype_obj and orig is not None) or ( 

598 orig_is_nat and np.datetime_data(orig)[0] != "ns" 

599 ): 

600 # GH#51592,53497 restore our potentially non-canonical fill_value 

601 fill_value = orig 

602 return dtype, fill_value 

603 

604 

605@functools.lru_cache 

606def _maybe_promote_cached(dtype, fill_value, fill_value_type): 

607 # The cached version of _maybe_promote below 

608 # This also use fill_value_type as (unused) argument to use this in the 

609 # cache lookup -> to differentiate 1 and True 

610 return _maybe_promote(dtype, fill_value) 

611 

612 

613def _maybe_promote(dtype: np.dtype, fill_value=np.nan): 

614 # The actual implementation of the function, use `maybe_promote` above for 

615 # a cached version. 

616 if not is_scalar(fill_value): 

617 # with object dtype there is nothing to promote, and the user can 

618 # pass pretty much any weird fill_value they like 

619 if dtype != object: 

620 # with object dtype there is nothing to promote, and the user can 

621 # pass pretty much any weird fill_value they like 

622 raise ValueError("fill_value must be a scalar") 

623 dtype = _dtype_obj 

624 return dtype, fill_value 

625 

626 if is_valid_na_for_dtype(fill_value, dtype) and dtype.kind in "iufcmM": 

627 dtype = ensure_dtype_can_hold_na(dtype) 

628 fv = na_value_for_dtype(dtype) 

629 return dtype, fv 

630 

631 elif isinstance(dtype, CategoricalDtype): 

632 if fill_value in dtype.categories or isna(fill_value): 

633 return dtype, fill_value 

634 else: 

635 return object, ensure_object(fill_value) 

636 

637 elif isna(fill_value): 

638 dtype = _dtype_obj 

639 if fill_value is None: 

640 # but we retain e.g. pd.NA 

641 fill_value = np.nan 

642 return dtype, fill_value 

643 

644 # returns tuple of (dtype, fill_value) 

645 if issubclass(dtype.type, np.datetime64): 

646 inferred, fv = infer_dtype_from_scalar(fill_value) 

647 if inferred == dtype: 

648 return dtype, fv 

649 

650 from pandas.core.arrays import DatetimeArray 

651 

652 dta = DatetimeArray._from_sequence([], dtype="M8[ns]") 

653 try: 

654 fv = dta._validate_setitem_value(fill_value) 

655 return dta.dtype, fv 

656 except (ValueError, TypeError): 

657 return _dtype_obj, fill_value 

658 

659 elif issubclass(dtype.type, np.timedelta64): 

660 inferred, fv = infer_dtype_from_scalar(fill_value) 

661 if inferred == dtype: 

662 return dtype, fv 

663 

664 elif inferred.kind == "m": 

665 # different unit, e.g. passed np.timedelta64(24, "h") with dtype=m8[ns] 

666 # see if we can losslessly cast it to our dtype 

667 unit = np.datetime_data(dtype)[0] 

668 try: 

669 td = Timedelta(fill_value).as_unit(unit, round_ok=False) 

670 except OutOfBoundsTimedelta: 

671 return _dtype_obj, fill_value 

672 else: 

673 return dtype, td.asm8 

674 

675 return _dtype_obj, fill_value 

676 

677 elif is_float(fill_value): 

678 if issubclass(dtype.type, np.bool_): 

679 dtype = np.dtype(np.object_) 

680 

681 elif issubclass(dtype.type, np.integer): 

682 dtype = np.dtype(np.float64) 

683 

684 elif dtype.kind == "f": 

685 mst = np.min_scalar_type(fill_value) 

686 if mst > dtype: 

687 # e.g. mst is np.float64 and dtype is np.float32 

688 dtype = mst 

689 

690 elif dtype.kind == "c": 

691 mst = np.min_scalar_type(fill_value) 

692 dtype = np.promote_types(dtype, mst) 

693 

694 elif is_bool(fill_value): 

695 if not issubclass(dtype.type, np.bool_): 

696 dtype = np.dtype(np.object_) 

697 

698 elif is_integer(fill_value): 

699 if issubclass(dtype.type, np.bool_): 

700 dtype = np.dtype(np.object_) 

701 

702 elif issubclass(dtype.type, np.integer): 

703 if not np_can_cast_scalar(fill_value, dtype): # type: ignore[arg-type] 

704 # upcast to prevent overflow 

705 mst = np.min_scalar_type(fill_value) 

706 dtype = np.promote_types(dtype, mst) 

707 if dtype.kind == "f": 

708 # Case where we disagree with numpy 

709 dtype = np.dtype(np.object_) 

710 

711 elif is_complex(fill_value): 

712 if issubclass(dtype.type, np.bool_): 

713 dtype = np.dtype(np.object_) 

714 

715 elif issubclass(dtype.type, (np.integer, np.floating)): 

716 mst = np.min_scalar_type(fill_value) 

717 dtype = np.promote_types(dtype, mst) 

718 

719 elif dtype.kind == "c": 

720 mst = np.min_scalar_type(fill_value) 

721 if mst > dtype: 

722 # e.g. mst is np.complex128 and dtype is np.complex64 

723 dtype = mst 

724 

725 else: 

726 dtype = np.dtype(np.object_) 

727 

728 # in case we have a string that looked like a number 

729 if issubclass(dtype.type, (bytes, str)): 

730 dtype = np.dtype(np.object_) 

731 

732 fill_value = _ensure_dtype_type(fill_value, dtype) 

733 return dtype, fill_value 

734 

735 

736def _ensure_dtype_type(value, dtype: np.dtype): 

737 """ 

738 Ensure that the given value is an instance of the given dtype. 

739 

740 e.g. if out dtype is np.complex64_, we should have an instance of that 

741 as opposed to a python complex object. 

742 

743 Parameters 

744 ---------- 

745 value : object 

746 dtype : np.dtype 

747 

748 Returns 

749 ------- 

750 object 

751 """ 

752 # Start with exceptions in which we do _not_ cast to numpy types 

753 

754 if dtype == _dtype_obj: 

755 return value 

756 

757 # Note: before we get here we have already excluded isna(value) 

758 return dtype.type(value) 

759 

760 

761def infer_dtype_from(val) -> tuple[DtypeObj, Any]: 

762 """ 

763 Interpret the dtype from a scalar or array. 

764 

765 Parameters 

766 ---------- 

767 val : object 

768 """ 

769 if not is_list_like(val): 

770 return infer_dtype_from_scalar(val) 

771 return infer_dtype_from_array(val) 

772 

773 

774def infer_dtype_from_scalar(val) -> tuple[DtypeObj, Any]: 

775 """ 

776 Interpret the dtype from a scalar. 

777 

778 Parameters 

779 ---------- 

780 val : object 

781 """ 

782 dtype: DtypeObj = _dtype_obj 

783 

784 # a 1-element ndarray 

785 if isinstance(val, np.ndarray): 

786 if val.ndim != 0: 

787 msg = "invalid ndarray passed to infer_dtype_from_scalar" 

788 raise ValueError(msg) 

789 

790 dtype = val.dtype 

791 val = lib.item_from_zerodim(val) 

792 

793 elif isinstance(val, str): 

794 # If we create an empty array using a string to infer 

795 # the dtype, NumPy will only allocate one character per entry 

796 # so this is kind of bad. Alternately we could use np.repeat 

797 # instead of np.empty (but then you still don't want things 

798 # coming out as np.str_! 

799 

800 dtype = _dtype_obj 

801 if using_pyarrow_string_dtype(): 

802 from pandas.core.arrays.string_ import StringDtype 

803 

804 dtype = StringDtype(storage="pyarrow_numpy") 

805 

806 elif isinstance(val, (np.datetime64, dt.datetime)): 

807 try: 

808 val = Timestamp(val) 

809 except OutOfBoundsDatetime: 

810 return _dtype_obj, val 

811 

812 if val is NaT or val.tz is None: 

813 val = val.to_datetime64() 

814 dtype = val.dtype 

815 # TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes 

816 else: 

817 dtype = DatetimeTZDtype(unit=val.unit, tz=val.tz) 

818 

819 elif isinstance(val, (np.timedelta64, dt.timedelta)): 

820 try: 

821 val = Timedelta(val) 

822 except (OutOfBoundsTimedelta, OverflowError): 

823 dtype = _dtype_obj 

824 else: 

825 if val is NaT: 

826 val = np.timedelta64("NaT", "ns") 

827 else: 

828 val = val.asm8 

829 dtype = val.dtype 

830 

831 elif is_bool(val): 

832 dtype = np.dtype(np.bool_) 

833 

834 elif is_integer(val): 

835 if isinstance(val, np.integer): 

836 dtype = np.dtype(type(val)) 

837 else: 

838 dtype = np.dtype(np.int64) 

839 

840 try: 

841 np.array(val, dtype=dtype) 

842 except OverflowError: 

843 dtype = np.array(val).dtype 

844 

845 elif is_float(val): 

846 if isinstance(val, np.floating): 

847 dtype = np.dtype(type(val)) 

848 else: 

849 dtype = np.dtype(np.float64) 

850 

851 elif is_complex(val): 

852 dtype = np.dtype(np.complex128) 

853 

854 if isinstance(val, Period): 

855 dtype = PeriodDtype(freq=val.freq) 

856 elif isinstance(val, Interval): 

857 subtype = infer_dtype_from_scalar(val.left)[0] 

858 dtype = IntervalDtype(subtype=subtype, closed=val.closed) 

859 

860 return dtype, val 

861 

862 

863def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]: 

864 """ 

865 Convert datetimelike-keyed dicts to a Timestamp-keyed dict. 

866 

867 Parameters 

868 ---------- 

869 d: dict-like object 

870 

871 Returns 

872 ------- 

873 dict 

874 """ 

875 return {maybe_box_datetimelike(key): value for key, value in d.items()} 

876 

877 

878def infer_dtype_from_array(arr) -> tuple[DtypeObj, ArrayLike]: 

879 """ 

880 Infer the dtype from an array. 

881 

882 Parameters 

883 ---------- 

884 arr : array 

885 

886 Returns 

887 ------- 

888 tuple (pandas-compat dtype, array) 

889 

890 

891 Examples 

892 -------- 

893 >>> np.asarray([1, '1']) 

894 array(['1', '1'], dtype='<U21') 

895 

896 >>> infer_dtype_from_array([1, '1']) 

897 (dtype('O'), [1, '1']) 

898 """ 

899 if isinstance(arr, np.ndarray): 

900 return arr.dtype, arr 

901 

902 if not is_list_like(arr): 

903 raise TypeError("'arr' must be list-like") 

904 

905 arr_dtype = getattr(arr, "dtype", None) 

906 if isinstance(arr_dtype, ExtensionDtype): 

907 return arr.dtype, arr 

908 

909 elif isinstance(arr, ABCSeries): 

910 return arr.dtype, np.asarray(arr) 

911 

912 # don't force numpy coerce with nan's 

913 inferred = lib.infer_dtype(arr, skipna=False) 

914 if inferred in ["string", "bytes", "mixed", "mixed-integer"]: 

915 return (np.dtype(np.object_), arr) 

916 

917 arr = np.asarray(arr) 

918 return arr.dtype, arr 

919 

920 

921def _maybe_infer_dtype_type(element): 

922 """ 

923 Try to infer an object's dtype, for use in arithmetic ops. 

924 

925 Uses `element.dtype` if that's available. 

926 Objects implementing the iterator protocol are cast to a NumPy array, 

927 and from there the array's type is used. 

928 

929 Parameters 

930 ---------- 

931 element : object 

932 Possibly has a `.dtype` attribute, and possibly the iterator 

933 protocol. 

934 

935 Returns 

936 ------- 

937 tipo : type 

938 

939 Examples 

940 -------- 

941 >>> from collections import namedtuple 

942 >>> Foo = namedtuple("Foo", "dtype") 

943 >>> _maybe_infer_dtype_type(Foo(np.dtype("i8"))) 

944 dtype('int64') 

945 """ 

946 tipo = None 

947 if hasattr(element, "dtype"): 

948 tipo = element.dtype 

949 elif is_list_like(element): 

950 element = np.asarray(element) 

951 tipo = element.dtype 

952 return tipo 

953 

954 

955def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None: 

956 """ 

957 Change string like dtypes to object for 

958 ``DataFrame.select_dtypes()``. 

959 """ 

960 # error: Argument 1 to <set> has incompatible type "Type[generic]"; expected 

961 # "Union[dtype[Any], ExtensionDtype, None]" 

962 # error: Argument 2 to <set> has incompatible type "Type[generic]"; expected 

963 # "Union[dtype[Any], ExtensionDtype, None]" 

964 non_string_dtypes = dtype_set - { 

965 np.dtype("S").type, # type: ignore[arg-type] 

966 np.dtype("<U").type, # type: ignore[arg-type] 

967 } 

968 if non_string_dtypes != dtype_set: 

969 raise TypeError("string dtypes are not allowed, use 'object' instead") 

970 

971 

972def coerce_indexer_dtype(indexer, categories) -> np.ndarray: 

973 """coerce the indexer input array to the smallest dtype possible""" 

974 length = len(categories) 

975 if length < _int8_max: 

976 return ensure_int8(indexer) 

977 elif length < _int16_max: 

978 return ensure_int16(indexer) 

979 elif length < _int32_max: 

980 return ensure_int32(indexer) 

981 return ensure_int64(indexer) 

982 

983 

984def convert_dtypes( 

985 input_array: ArrayLike, 

986 convert_string: bool = True, 

987 convert_integer: bool = True, 

988 convert_boolean: bool = True, 

989 convert_floating: bool = True, 

990 infer_objects: bool = False, 

991 dtype_backend: Literal["numpy_nullable", "pyarrow"] = "numpy_nullable", 

992) -> DtypeObj: 

993 """ 

994 Convert objects to best possible type, and optionally, 

995 to types supporting ``pd.NA``. 

996 

997 Parameters 

998 ---------- 

999 input_array : ExtensionArray or np.ndarray 

1000 convert_string : bool, default True 

1001 Whether object dtypes should be converted to ``StringDtype()``. 

1002 convert_integer : bool, default True 

1003 Whether, if possible, conversion can be done to integer extension types. 

1004 convert_boolean : bool, defaults True 

1005 Whether object dtypes should be converted to ``BooleanDtypes()``. 

1006 convert_floating : bool, defaults True 

1007 Whether, if possible, conversion can be done to floating extension types. 

1008 If `convert_integer` is also True, preference will be give to integer 

1009 dtypes if the floats can be faithfully casted to integers. 

1010 infer_objects : bool, defaults False 

1011 Whether to also infer objects to float/int if possible. Is only hit if the 

1012 object array contains pd.NA. 

1013 dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' 

1014 Back-end data type applied to the resultant :class:`DataFrame` 

1015 (still experimental). Behaviour is as follows: 

1016 

1017 * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 

1018 (default). 

1019 * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 

1020 DataFrame. 

1021 

1022 .. versionadded:: 2.0 

1023 

1024 Returns 

1025 ------- 

1026 np.dtype, or ExtensionDtype 

1027 """ 

1028 inferred_dtype: str | DtypeObj 

1029 

1030 if ( 

1031 convert_string or convert_integer or convert_boolean or convert_floating 

1032 ) and isinstance(input_array, np.ndarray): 

1033 if input_array.dtype == object: 

1034 inferred_dtype = lib.infer_dtype(input_array) 

1035 else: 

1036 inferred_dtype = input_array.dtype 

1037 

1038 if is_string_dtype(inferred_dtype): 

1039 if not convert_string or inferred_dtype == "bytes": 

1040 inferred_dtype = input_array.dtype 

1041 else: 

1042 inferred_dtype = pandas_dtype_func("string") 

1043 

1044 if convert_integer: 

1045 target_int_dtype = pandas_dtype_func("Int64") 

1046 

1047 if input_array.dtype.kind in "iu": 

1048 from pandas.core.arrays.integer import NUMPY_INT_TO_DTYPE 

1049 

1050 inferred_dtype = NUMPY_INT_TO_DTYPE.get( 

1051 input_array.dtype, target_int_dtype 

1052 ) 

1053 elif input_array.dtype.kind in "fcb": 

1054 # TODO: de-dup with maybe_cast_to_integer_array? 

1055 arr = input_array[notna(input_array)] 

1056 if (arr.astype(int) == arr).all(): 

1057 inferred_dtype = target_int_dtype 

1058 else: 

1059 inferred_dtype = input_array.dtype 

1060 elif ( 

1061 infer_objects 

1062 and input_array.dtype == object 

1063 and (isinstance(inferred_dtype, str) and inferred_dtype == "integer") 

1064 ): 

1065 inferred_dtype = target_int_dtype 

1066 

1067 if convert_floating: 

1068 if input_array.dtype.kind in "fcb": 

1069 # i.e. numeric but not integer 

1070 from pandas.core.arrays.floating import NUMPY_FLOAT_TO_DTYPE 

1071 

1072 inferred_float_dtype: DtypeObj = NUMPY_FLOAT_TO_DTYPE.get( 

1073 input_array.dtype, pandas_dtype_func("Float64") 

1074 ) 

1075 # if we could also convert to integer, check if all floats 

1076 # are actually integers 

1077 if convert_integer: 

1078 # TODO: de-dup with maybe_cast_to_integer_array? 

1079 arr = input_array[notna(input_array)] 

1080 if (arr.astype(int) == arr).all(): 

1081 inferred_dtype = pandas_dtype_func("Int64") 

1082 else: 

1083 inferred_dtype = inferred_float_dtype 

1084 else: 

1085 inferred_dtype = inferred_float_dtype 

1086 elif ( 

1087 infer_objects 

1088 and input_array.dtype == object 

1089 and ( 

1090 isinstance(inferred_dtype, str) 

1091 and inferred_dtype == "mixed-integer-float" 

1092 ) 

1093 ): 

1094 inferred_dtype = pandas_dtype_func("Float64") 

1095 

1096 if convert_boolean: 

1097 if input_array.dtype.kind == "b": 

1098 inferred_dtype = pandas_dtype_func("boolean") 

1099 elif isinstance(inferred_dtype, str) and inferred_dtype == "boolean": 

1100 inferred_dtype = pandas_dtype_func("boolean") 

1101 

1102 if isinstance(inferred_dtype, str): 

1103 # If we couldn't do anything else, then we retain the dtype 

1104 inferred_dtype = input_array.dtype 

1105 

1106 else: 

1107 inferred_dtype = input_array.dtype 

1108 

1109 if dtype_backend == "pyarrow": 

1110 from pandas.core.arrays.arrow.array import to_pyarrow_type 

1111 from pandas.core.arrays.string_ import StringDtype 

1112 

1113 assert not isinstance(inferred_dtype, str) 

1114 

1115 if ( 

1116 (convert_integer and inferred_dtype.kind in "iu") 

1117 or (convert_floating and inferred_dtype.kind in "fc") 

1118 or (convert_boolean and inferred_dtype.kind == "b") 

1119 or (convert_string and isinstance(inferred_dtype, StringDtype)) 

1120 or ( 

1121 inferred_dtype.kind not in "iufcb" 

1122 and not isinstance(inferred_dtype, StringDtype) 

1123 ) 

1124 ): 

1125 if isinstance(inferred_dtype, PandasExtensionDtype) and not isinstance( 

1126 inferred_dtype, DatetimeTZDtype 

1127 ): 

1128 base_dtype = inferred_dtype.base 

1129 elif isinstance(inferred_dtype, (BaseMaskedDtype, ArrowDtype)): 

1130 base_dtype = inferred_dtype.numpy_dtype 

1131 elif isinstance(inferred_dtype, StringDtype): 

1132 base_dtype = np.dtype(str) 

1133 else: 

1134 base_dtype = inferred_dtype 

1135 if ( 

1136 base_dtype.kind == "O" # type: ignore[union-attr] 

1137 and input_array.size > 0 

1138 and isna(input_array).all() 

1139 ): 

1140 import pyarrow as pa 

1141 

1142 pa_type = pa.null() 

1143 else: 

1144 pa_type = to_pyarrow_type(base_dtype) 

1145 if pa_type is not None: 

1146 inferred_dtype = ArrowDtype(pa_type) 

1147 elif dtype_backend == "numpy_nullable" and isinstance(inferred_dtype, ArrowDtype): 

1148 # GH 53648 

1149 inferred_dtype = _arrow_dtype_mapping()[inferred_dtype.pyarrow_dtype] 

1150 

1151 # error: Incompatible return value type (got "Union[str, Union[dtype[Any], 

1152 # ExtensionDtype]]", expected "Union[dtype[Any], ExtensionDtype]") 

1153 return inferred_dtype # type: ignore[return-value] 

1154 

1155 

1156def maybe_infer_to_datetimelike( 

1157 value: npt.NDArray[np.object_], 

1158) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray | IntervalArray: 

1159 """ 

1160 we might have a array (or single object) that is datetime like, 

1161 and no dtype is passed don't change the value unless we find a 

1162 datetime/timedelta set 

1163 

1164 this is pretty strict in that a datetime/timedelta is REQUIRED 

1165 in addition to possible nulls/string likes 

1166 

1167 Parameters 

1168 ---------- 

1169 value : np.ndarray[object] 

1170 

1171 Returns 

1172 ------- 

1173 np.ndarray, DatetimeArray, TimedeltaArray, PeriodArray, or IntervalArray 

1174 

1175 """ 

1176 if not isinstance(value, np.ndarray) or value.dtype != object: 

1177 # Caller is responsible for passing only ndarray[object] 

1178 raise TypeError(type(value)) # pragma: no cover 

1179 if value.ndim != 1: 

1180 # Caller is responsible 

1181 raise ValueError(value.ndim) # pragma: no cover 

1182 

1183 if not len(value): 

1184 return value 

1185 

1186 # error: Incompatible return value type (got "Union[ExtensionArray, 

1187 # ndarray[Any, Any]]", expected "Union[ndarray[Any, Any], DatetimeArray, 

1188 # TimedeltaArray, PeriodArray, IntervalArray]") 

1189 return lib.maybe_convert_objects( # type: ignore[return-value] 

1190 value, 

1191 # Here we do not convert numeric dtypes, as if we wanted that, 

1192 # numpy would have done it for us. 

1193 convert_numeric=False, 

1194 convert_non_numeric=True, 

1195 dtype_if_all_nat=np.dtype("M8[ns]"), 

1196 ) 

1197 

1198 

1199def maybe_cast_to_datetime( 

1200 value: np.ndarray | list, dtype: np.dtype 

1201) -> ExtensionArray | np.ndarray: 

1202 """ 

1203 try to cast the array/value to a datetimelike dtype, converting float 

1204 nan to iNaT 

1205 

1206 Caller is responsible for handling ExtensionDtype cases and non dt64/td64 

1207 cases. 

1208 """ 

1209 from pandas.core.arrays.datetimes import DatetimeArray 

1210 from pandas.core.arrays.timedeltas import TimedeltaArray 

1211 

1212 assert dtype.kind in "mM" 

1213 if not is_list_like(value): 

1214 raise TypeError("value must be listlike") 

1215 

1216 # TODO: _from_sequence would raise ValueError in cases where 

1217 # _ensure_nanosecond_dtype raises TypeError 

1218 _ensure_nanosecond_dtype(dtype) 

1219 

1220 if lib.is_np_dtype(dtype, "m"): 

1221 res = TimedeltaArray._from_sequence(value, dtype=dtype) 

1222 return res 

1223 else: 

1224 try: 

1225 dta = DatetimeArray._from_sequence(value, dtype=dtype) 

1226 except ValueError as err: 

1227 # We can give a Series-specific exception message. 

1228 if "cannot supply both a tz and a timezone-naive dtype" in str(err): 

1229 raise ValueError( 

1230 "Cannot convert timezone-aware data to " 

1231 "timezone-naive dtype. Use " 

1232 "pd.Series(values).dt.tz_localize(None) instead." 

1233 ) from err 

1234 raise 

1235 

1236 return dta 

1237 

1238 

1239def _ensure_nanosecond_dtype(dtype: DtypeObj) -> None: 

1240 """ 

1241 Convert dtypes with granularity less than nanosecond to nanosecond 

1242 

1243 >>> _ensure_nanosecond_dtype(np.dtype("M8[us]")) 

1244 

1245 >>> _ensure_nanosecond_dtype(np.dtype("M8[D]")) 

1246 Traceback (most recent call last): 

1247 ... 

1248 TypeError: dtype=datetime64[D] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns' 

1249 

1250 >>> _ensure_nanosecond_dtype(np.dtype("m8[ps]")) 

1251 Traceback (most recent call last): 

1252 ... 

1253 TypeError: dtype=timedelta64[ps] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns' 

1254 """ # noqa: E501 

1255 msg = ( 

1256 f"The '{dtype.name}' dtype has no unit. " 

1257 f"Please pass in '{dtype.name}[ns]' instead." 

1258 ) 

1259 

1260 # unpack e.g. SparseDtype 

1261 dtype = getattr(dtype, "subtype", dtype) 

1262 

1263 if not isinstance(dtype, np.dtype): 

1264 # i.e. datetime64tz 

1265 pass 

1266 

1267 elif dtype.kind in "mM": 

1268 if not is_supported_dtype(dtype): 

1269 # pre-2.0 we would silently swap in nanos for lower-resolutions, 

1270 # raise for above-nano resolutions 

1271 if dtype.name in ["datetime64", "timedelta64"]: 

1272 raise ValueError(msg) 

1273 # TODO: ValueError or TypeError? existing test 

1274 # test_constructor_generic_timestamp_bad_frequency expects TypeError 

1275 raise TypeError( 

1276 f"dtype={dtype} is not supported. Supported resolutions are 's', " 

1277 "'ms', 'us', and 'ns'" 

1278 ) 

1279 

1280 

1281# TODO: other value-dependent functions to standardize here include 

1282# Index._find_common_type_compat 

1283def find_result_type(left_dtype: DtypeObj, right: Any) -> DtypeObj: 

1284 """ 

1285 Find the type/dtype for the result of an operation between objects. 

1286 

1287 This is similar to find_common_type, but looks at the right object instead 

1288 of just its dtype. This can be useful in particular when the right 

1289 object does not have a `dtype`. 

1290 

1291 Parameters 

1292 ---------- 

1293 left_dtype : np.dtype or ExtensionDtype 

1294 right : Any 

1295 

1296 Returns 

1297 ------- 

1298 np.dtype or ExtensionDtype 

1299 

1300 See also 

1301 -------- 

1302 find_common_type 

1303 numpy.result_type 

1304 """ 

1305 new_dtype: DtypeObj 

1306 

1307 if ( 

1308 isinstance(left_dtype, np.dtype) 

1309 and left_dtype.kind in "iuc" 

1310 and (lib.is_integer(right) or lib.is_float(right)) 

1311 ): 

1312 # e.g. with int8 dtype and right=512, we want to end up with 

1313 # np.int16, whereas infer_dtype_from(512) gives np.int64, 

1314 # which will make us upcast too far. 

1315 if lib.is_float(right) and right.is_integer() and left_dtype.kind != "f": 

1316 right = int(right) 

1317 # After NEP 50, numpy won't inspect Python scalars 

1318 # TODO: do we need to recreate numpy's inspection logic for floats too 

1319 # (this breaks some tests) 

1320 if isinstance(right, int) and not isinstance(right, np.integer): 

1321 # This gives an unsigned type by default 

1322 # (if our number is positive) 

1323 

1324 # If our left dtype is signed, we might not want this since 

1325 # this might give us 1 dtype too big 

1326 # We should check if the corresponding int dtype (e.g. int64 for uint64) 

1327 # can hold the number 

1328 right_dtype = np.min_scalar_type(right) 

1329 if right == 0: 

1330 # Special case 0 

1331 right = left_dtype 

1332 elif ( 

1333 not np.issubdtype(left_dtype, np.unsignedinteger) 

1334 and 0 < right <= np.iinfo(right_dtype).max 

1335 ): 

1336 # If left dtype isn't unsigned, check if it fits in the signed dtype 

1337 right = np.dtype(f"i{right_dtype.itemsize}") 

1338 else: 

1339 right = right_dtype 

1340 

1341 new_dtype = np.result_type(left_dtype, right) 

1342 

1343 elif is_valid_na_for_dtype(right, left_dtype): 

1344 # e.g. IntervalDtype[int] and None/np.nan 

1345 new_dtype = ensure_dtype_can_hold_na(left_dtype) 

1346 

1347 else: 

1348 dtype, _ = infer_dtype_from(right) 

1349 new_dtype = find_common_type([left_dtype, dtype]) 

1350 

1351 return new_dtype 

1352 

1353 

1354def common_dtype_categorical_compat( 

1355 objs: Sequence[Index | ArrayLike], dtype: DtypeObj 

1356) -> DtypeObj: 

1357 """ 

1358 Update the result of find_common_type to account for NAs in a Categorical. 

1359 

1360 Parameters 

1361 ---------- 

1362 objs : list[np.ndarray | ExtensionArray | Index] 

1363 dtype : np.dtype or ExtensionDtype 

1364 

1365 Returns 

1366 ------- 

1367 np.dtype or ExtensionDtype 

1368 """ 

1369 # GH#38240 

1370 

1371 # TODO: more generally, could do `not can_hold_na(dtype)` 

1372 if lib.is_np_dtype(dtype, "iu"): 

1373 for obj in objs: 

1374 # We don't want to accientally allow e.g. "categorical" str here 

1375 obj_dtype = getattr(obj, "dtype", None) 

1376 if isinstance(obj_dtype, CategoricalDtype): 

1377 if isinstance(obj, ABCIndex): 

1378 # This check may already be cached 

1379 hasnas = obj.hasnans 

1380 else: 

1381 # Categorical 

1382 hasnas = cast("Categorical", obj)._hasna 

1383 

1384 if hasnas: 

1385 # see test_union_int_categorical_with_nan 

1386 dtype = np.dtype(np.float64) 

1387 break 

1388 return dtype 

1389 

1390 

1391def np_find_common_type(*dtypes: np.dtype) -> np.dtype: 

1392 """ 

1393 np.find_common_type implementation pre-1.25 deprecation using np.result_type 

1394 https://github.com/pandas-dev/pandas/pull/49569#issuecomment-1308300065 

1395 

1396 Parameters 

1397 ---------- 

1398 dtypes : np.dtypes 

1399 

1400 Returns 

1401 ------- 

1402 np.dtype 

1403 """ 

1404 try: 

1405 common_dtype = np.result_type(*dtypes) 

1406 if common_dtype.kind in "mMSU": 

1407 # NumPy promotion currently (1.25) misbehaves for for times and strings, 

1408 # so fall back to object (find_common_dtype did unless there 

1409 # was only one dtype) 

1410 common_dtype = np.dtype("O") 

1411 

1412 except TypeError: 

1413 common_dtype = np.dtype("O") 

1414 return common_dtype 

1415 

1416 

1417@overload 

1418def find_common_type(types: list[np.dtype]) -> np.dtype: 

1419 ... 

1420 

1421 

1422@overload 

1423def find_common_type(types: list[ExtensionDtype]) -> DtypeObj: 

1424 ... 

1425 

1426 

1427@overload 

1428def find_common_type(types: list[DtypeObj]) -> DtypeObj: 

1429 ... 

1430 

1431 

1432def find_common_type(types): 

1433 """ 

1434 Find a common data type among the given dtypes. 

1435 

1436 Parameters 

1437 ---------- 

1438 types : list of dtypes 

1439 

1440 Returns 

1441 ------- 

1442 pandas extension or numpy dtype 

1443 

1444 See Also 

1445 -------- 

1446 numpy.find_common_type 

1447 

1448 """ 

1449 if not types: 

1450 raise ValueError("no types given") 

1451 

1452 first = types[0] 

1453 

1454 # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2) 

1455 # => object 

1456 if lib.dtypes_all_equal(list(types)): 

1457 return first 

1458 

1459 # get unique types (dict.fromkeys is used as order-preserving set()) 

1460 types = list(dict.fromkeys(types).keys()) 

1461 

1462 if any(isinstance(t, ExtensionDtype) for t in types): 

1463 for t in types: 

1464 if isinstance(t, ExtensionDtype): 

1465 res = t._get_common_dtype(types) 

1466 if res is not None: 

1467 return res 

1468 return np.dtype("object") 

1469 

1470 # take lowest unit 

1471 if all(lib.is_np_dtype(t, "M") for t in types): 

1472 return np.dtype(max(types)) 

1473 if all(lib.is_np_dtype(t, "m") for t in types): 

1474 return np.dtype(max(types)) 

1475 

1476 # don't mix bool / int or float or complex 

1477 # this is different from numpy, which casts bool with float/int as int 

1478 has_bools = any(t.kind == "b" for t in types) 

1479 if has_bools: 

1480 for t in types: 

1481 if t.kind in "iufc": 

1482 return np.dtype("object") 

1483 

1484 return np_find_common_type(*types) 

1485 

1486 

1487def construct_2d_arraylike_from_scalar( 

1488 value: Scalar, length: int, width: int, dtype: np.dtype, copy: bool 

1489) -> np.ndarray: 

1490 shape = (length, width) 

1491 

1492 if dtype.kind in "mM": 

1493 value = _maybe_box_and_unbox_datetimelike(value, dtype) 

1494 elif dtype == _dtype_obj: 

1495 if isinstance(value, (np.timedelta64, np.datetime64)): 

1496 # calling np.array below would cast to pytimedelta/pydatetime 

1497 out = np.empty(shape, dtype=object) 

1498 out.fill(value) 

1499 return out 

1500 

1501 # Attempt to coerce to a numpy array 

1502 try: 

1503 if not copy: 

1504 arr = np.asarray(value, dtype=dtype) 

1505 else: 

1506 arr = np.array(value, dtype=dtype, copy=copy) 

1507 except (ValueError, TypeError) as err: 

1508 raise TypeError( 

1509 f"DataFrame constructor called with incompatible data and dtype: {err}" 

1510 ) from err 

1511 

1512 if arr.ndim != 0: 

1513 raise ValueError("DataFrame constructor not properly called!") 

1514 

1515 return np.full(shape, arr) 

1516 

1517 

1518def construct_1d_arraylike_from_scalar( 

1519 value: Scalar, length: int, dtype: DtypeObj | None 

1520) -> ArrayLike: 

1521 """ 

1522 create a np.ndarray / pandas type of specified shape and dtype 

1523 filled with values 

1524 

1525 Parameters 

1526 ---------- 

1527 value : scalar value 

1528 length : int 

1529 dtype : pandas_dtype or np.dtype 

1530 

1531 Returns 

1532 ------- 

1533 np.ndarray / pandas type of length, filled with value 

1534 

1535 """ 

1536 

1537 if dtype is None: 

1538 try: 

1539 dtype, value = infer_dtype_from_scalar(value) 

1540 except OutOfBoundsDatetime: 

1541 dtype = _dtype_obj 

1542 

1543 if isinstance(dtype, ExtensionDtype): 

1544 cls = dtype.construct_array_type() 

1545 seq = [] if length == 0 else [value] 

1546 subarr = cls._from_sequence(seq, dtype=dtype).repeat(length) 

1547 

1548 else: 

1549 if length and dtype.kind in "iu" and isna(value): 

1550 # coerce if we have nan for an integer dtype 

1551 dtype = np.dtype("float64") 

1552 elif lib.is_np_dtype(dtype, "US"): 

1553 # we need to coerce to object dtype to avoid 

1554 # to allow numpy to take our string as a scalar value 

1555 dtype = np.dtype("object") 

1556 if not isna(value): 

1557 value = ensure_str(value) 

1558 elif dtype.kind in "mM": 

1559 value = _maybe_box_and_unbox_datetimelike(value, dtype) 

1560 

1561 subarr = np.empty(length, dtype=dtype) 

1562 if length: 

1563 # GH 47391: numpy > 1.24 will raise filling np.nan into int dtypes 

1564 subarr.fill(value) 

1565 

1566 return subarr 

1567 

1568 

1569def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj): 

1570 # Caller is responsible for checking dtype.kind in "mM" 

1571 

1572 if isinstance(value, dt.datetime): 

1573 # we dont want to box dt64, in particular datetime64("NaT") 

1574 value = maybe_box_datetimelike(value, dtype) 

1575 

1576 return _maybe_unbox_datetimelike(value, dtype) 

1577 

1578 

1579def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: 

1580 """ 

1581 Transform any list-like object in a 1-dimensional numpy array of object 

1582 dtype. 

1583 

1584 Parameters 

1585 ---------- 

1586 values : any iterable which has a len() 

1587 

1588 Raises 

1589 ------ 

1590 TypeError 

1591 * If `values` does not have a len() 

1592 

1593 Returns 

1594 ------- 

1595 1-dimensional numpy array of dtype object 

1596 """ 

1597 # numpy will try to interpret nested lists as further dimensions, hence 

1598 # making a 1D array that contains list-likes is a bit tricky: 

1599 result = np.empty(len(values), dtype="object") 

1600 result[:] = values 

1601 return result 

1602 

1603 

1604def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.ndarray: 

1605 """ 

1606 Takes any dtype and returns the casted version, raising for when data is 

1607 incompatible with integer/unsigned integer dtypes. 

1608 

1609 Parameters 

1610 ---------- 

1611 arr : np.ndarray or list 

1612 The array to cast. 

1613 dtype : np.dtype 

1614 The integer dtype to cast the array to. 

1615 

1616 Returns 

1617 ------- 

1618 ndarray 

1619 Array of integer or unsigned integer dtype. 

1620 

1621 Raises 

1622 ------ 

1623 OverflowError : the dtype is incompatible with the data 

1624 ValueError : loss of precision has occurred during casting 

1625 

1626 Examples 

1627 -------- 

1628 If you try to coerce negative values to unsigned integers, it raises: 

1629 

1630 >>> pd.Series([-1], dtype="uint64") 

1631 Traceback (most recent call last): 

1632 ... 

1633 OverflowError: Trying to coerce negative values to unsigned integers 

1634 

1635 Also, if you try to coerce float values to integers, it raises: 

1636 

1637 >>> maybe_cast_to_integer_array([1, 2, 3.5], dtype=np.dtype("int64")) 

1638 Traceback (most recent call last): 

1639 ... 

1640 ValueError: Trying to coerce float values to integers 

1641 """ 

1642 assert dtype.kind in "iu" 

1643 

1644 try: 

1645 if not isinstance(arr, np.ndarray): 

1646 with warnings.catch_warnings(): 

1647 # We already disallow dtype=uint w/ negative numbers 

1648 # (test_constructor_coercion_signed_to_unsigned) so safe to ignore. 

1649 warnings.filterwarnings( 

1650 "ignore", 

1651 "NumPy will stop allowing conversion of out-of-bound Python int", 

1652 DeprecationWarning, 

1653 ) 

1654 casted = np.asarray(arr, dtype=dtype) 

1655 else: 

1656 with warnings.catch_warnings(): 

1657 warnings.filterwarnings("ignore", category=RuntimeWarning) 

1658 casted = arr.astype(dtype, copy=False) 

1659 except OverflowError as err: 

1660 raise OverflowError( 

1661 "The elements provided in the data cannot all be " 

1662 f"casted to the dtype {dtype}" 

1663 ) from err 

1664 

1665 if isinstance(arr, np.ndarray) and arr.dtype == dtype: 

1666 # avoid expensive array_equal check 

1667 return casted 

1668 

1669 with warnings.catch_warnings(): 

1670 warnings.filterwarnings("ignore", category=RuntimeWarning) 

1671 warnings.filterwarnings( 

1672 "ignore", "elementwise comparison failed", FutureWarning 

1673 ) 

1674 if np.array_equal(arr, casted): 

1675 return casted 

1676 

1677 # We do this casting to allow for proper 

1678 # data and dtype checking. 

1679 # 

1680 # We didn't do this earlier because NumPy 

1681 # doesn't handle `uint64` correctly. 

1682 arr = np.asarray(arr) 

1683 

1684 if np.issubdtype(arr.dtype, str): 

1685 # TODO(numpy-2.0 min): This case will raise an OverflowError above 

1686 if (casted.astype(str) == arr).all(): 

1687 return casted 

1688 raise ValueError(f"string values cannot be losslessly cast to {dtype}") 

1689 

1690 if dtype.kind == "u" and (arr < 0).any(): 

1691 # TODO: can this be hit anymore after numpy 2.0? 

1692 raise OverflowError("Trying to coerce negative values to unsigned integers") 

1693 

1694 if arr.dtype.kind == "f": 

1695 if not np.isfinite(arr).all(): 

1696 raise IntCastingNaNError( 

1697 "Cannot convert non-finite values (NA or inf) to integer" 

1698 ) 

1699 raise ValueError("Trying to coerce float values to integers") 

1700 if arr.dtype == object: 

1701 raise ValueError("Trying to coerce float values to integers") 

1702 

1703 if casted.dtype < arr.dtype: 

1704 # TODO: Can this path be hit anymore with numpy > 2 

1705 # GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows 

1706 raise ValueError( 

1707 f"Values are too large to be losslessly converted to {dtype}. " 

1708 f"To cast anyway, use pd.Series(values).astype({dtype})" 

1709 ) 

1710 

1711 if arr.dtype.kind in "mM": 

1712 # test_constructor_maskedarray_nonfloat 

1713 raise TypeError( 

1714 f"Constructing a Series or DataFrame from {arr.dtype} values and " 

1715 f"dtype={dtype} is not supported. Use values.view({dtype}) instead." 

1716 ) 

1717 

1718 # No known cases that get here, but raising explicitly to cover our bases. 

1719 raise ValueError(f"values cannot be losslessly cast to {dtype}") 

1720 

1721 

1722def can_hold_element(arr: ArrayLike, element: Any) -> bool: 

1723 """ 

1724 Can we do an inplace setitem with this element in an array with this dtype? 

1725 

1726 Parameters 

1727 ---------- 

1728 arr : np.ndarray or ExtensionArray 

1729 element : Any 

1730 

1731 Returns 

1732 ------- 

1733 bool 

1734 """ 

1735 dtype = arr.dtype 

1736 if not isinstance(dtype, np.dtype) or dtype.kind in "mM": 

1737 if isinstance(dtype, (PeriodDtype, IntervalDtype, DatetimeTZDtype, np.dtype)): 

1738 # np.dtype here catches datetime64ns and timedelta64ns; we assume 

1739 # in this case that we have DatetimeArray/TimedeltaArray 

1740 arr = cast( 

1741 "PeriodArray | DatetimeArray | TimedeltaArray | IntervalArray", arr 

1742 ) 

1743 try: 

1744 arr._validate_setitem_value(element) 

1745 return True 

1746 except (ValueError, TypeError): 

1747 return False 

1748 

1749 # This is technically incorrect, but maintains the behavior of 

1750 # ExtensionBlock._can_hold_element 

1751 return True 

1752 

1753 try: 

1754 np_can_hold_element(dtype, element) 

1755 return True 

1756 except (TypeError, LossySetitemError): 

1757 return False 

1758 

1759 

1760def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: 

1761 """ 

1762 Raise if we cannot losslessly set this element into an ndarray with this dtype. 

1763 

1764 Specifically about places where we disagree with numpy. i.e. there are 

1765 cases where numpy will raise in doing the setitem that we do not check 

1766 for here, e.g. setting str "X" into a numeric ndarray. 

1767 

1768 Returns 

1769 ------- 

1770 Any 

1771 The element, potentially cast to the dtype. 

1772 

1773 Raises 

1774 ------ 

1775 ValueError : If we cannot losslessly store this element with this dtype. 

1776 """ 

1777 if dtype == _dtype_obj: 

1778 return element 

1779 

1780 tipo = _maybe_infer_dtype_type(element) 

1781 

1782 if dtype.kind in "iu": 

1783 if isinstance(element, range): 

1784 if _dtype_can_hold_range(element, dtype): 

1785 return element 

1786 raise LossySetitemError 

1787 

1788 if is_integer(element) or (is_float(element) and element.is_integer()): 

1789 # e.g. test_setitem_series_int8 if we have a python int 1 

1790 # tipo may be np.int32, despite the fact that it will fit 

1791 # in smaller int dtypes. 

1792 info = np.iinfo(dtype) 

1793 if info.min <= element <= info.max: 

1794 return dtype.type(element) 

1795 raise LossySetitemError 

1796 

1797 if tipo is not None: 

1798 if tipo.kind not in "iu": 

1799 if isinstance(element, np.ndarray) and element.dtype.kind == "f": 

1800 # If all can be losslessly cast to integers, then we can hold them 

1801 with np.errstate(invalid="ignore"): 

1802 # We check afterwards if cast was losslessly, so no need to show 

1803 # the warning 

1804 casted = element.astype(dtype) 

1805 comp = casted == element 

1806 if comp.all(): 

1807 # Return the casted values bc they can be passed to 

1808 # np.putmask, whereas the raw values cannot. 

1809 # see TestSetitemFloatNDarrayIntoIntegerSeries 

1810 return casted 

1811 raise LossySetitemError 

1812 

1813 elif isinstance(element, ABCExtensionArray) and isinstance( 

1814 element.dtype, CategoricalDtype 

1815 ): 

1816 # GH#52927 setting Categorical value into non-EA frame 

1817 # TODO: general-case for EAs? 

1818 try: 

1819 casted = element.astype(dtype) 

1820 except (ValueError, TypeError): 

1821 raise LossySetitemError 

1822 # Check for cases of either 

1823 # a) lossy overflow/rounding or 

1824 # b) semantic changes like dt64->int64 

1825 comp = casted == element 

1826 if not comp.all(): 

1827 raise LossySetitemError 

1828 return casted 

1829 

1830 # Anything other than integer we cannot hold 

1831 raise LossySetitemError 

1832 if ( 

1833 dtype.kind == "u" 

1834 and isinstance(element, np.ndarray) 

1835 and element.dtype.kind == "i" 

1836 ): 

1837 # see test_where_uint64 

1838 casted = element.astype(dtype) 

1839 if (casted == element).all(): 

1840 # TODO: faster to check (element >=0).all()? potential 

1841 # itemsize issues there? 

1842 return casted 

1843 raise LossySetitemError 

1844 if dtype.itemsize < tipo.itemsize: 

1845 raise LossySetitemError 

1846 if not isinstance(tipo, np.dtype): 

1847 # i.e. nullable IntegerDtype; we can put this into an ndarray 

1848 # losslessly iff it has no NAs 

1849 arr = element._values if isinstance(element, ABCSeries) else element 

1850 if arr._hasna: 

1851 raise LossySetitemError 

1852 return element 

1853 

1854 return element 

1855 

1856 raise LossySetitemError 

1857 

1858 if dtype.kind == "f": 

1859 if lib.is_integer(element) or lib.is_float(element): 

1860 casted = dtype.type(element) 

1861 if np.isnan(casted) or casted == element: 

1862 return casted 

1863 # otherwise e.g. overflow see TestCoercionFloat32 

1864 raise LossySetitemError 

1865 

1866 if tipo is not None: 

1867 # TODO: itemsize check? 

1868 if tipo.kind not in "iuf": 

1869 # Anything other than float/integer we cannot hold 

1870 raise LossySetitemError 

1871 if not isinstance(tipo, np.dtype): 

1872 # i.e. nullable IntegerDtype or FloatingDtype; 

1873 # we can put this into an ndarray losslessly iff it has no NAs 

1874 if element._hasna: 

1875 raise LossySetitemError 

1876 return element 

1877 elif tipo.itemsize > dtype.itemsize or tipo.kind != dtype.kind: 

1878 if isinstance(element, np.ndarray): 

1879 # e.g. TestDataFrameIndexingWhere::test_where_alignment 

1880 casted = element.astype(dtype) 

1881 if np.array_equal(casted, element, equal_nan=True): 

1882 return casted 

1883 raise LossySetitemError 

1884 

1885 return element 

1886 

1887 raise LossySetitemError 

1888 

1889 if dtype.kind == "c": 

1890 if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element): 

1891 if np.isnan(element): 

1892 # see test_where_complex GH#6345 

1893 return dtype.type(element) 

1894 

1895 with warnings.catch_warnings(): 

1896 warnings.filterwarnings("ignore") 

1897 casted = dtype.type(element) 

1898 if casted == element: 

1899 return casted 

1900 # otherwise e.g. overflow see test_32878_complex_itemsize 

1901 raise LossySetitemError 

1902 

1903 if tipo is not None: 

1904 if tipo.kind in "iufc": 

1905 return element 

1906 raise LossySetitemError 

1907 raise LossySetitemError 

1908 

1909 if dtype.kind == "b": 

1910 if tipo is not None: 

1911 if tipo.kind == "b": 

1912 if not isinstance(tipo, np.dtype): 

1913 # i.e. we have a BooleanArray 

1914 if element._hasna: 

1915 # i.e. there are pd.NA elements 

1916 raise LossySetitemError 

1917 return element 

1918 raise LossySetitemError 

1919 if lib.is_bool(element): 

1920 return element 

1921 raise LossySetitemError 

1922 

1923 if dtype.kind == "S": 

1924 # TODO: test tests.frame.methods.test_replace tests get here, 

1925 # need more targeted tests. xref phofl has a PR about this 

1926 if tipo is not None: 

1927 if tipo.kind == "S" and tipo.itemsize <= dtype.itemsize: 

1928 return element 

1929 raise LossySetitemError 

1930 if isinstance(element, bytes) and len(element) <= dtype.itemsize: 

1931 return element 

1932 raise LossySetitemError 

1933 

1934 if dtype.kind == "V": 

1935 # i.e. np.void, which cannot hold _anything_ 

1936 raise LossySetitemError 

1937 

1938 raise NotImplementedError(dtype) 

1939 

1940 

1941def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: 

1942 """ 

1943 _maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints), 

1944 but in many cases a range can be held by a smaller integer dtype. 

1945 Check if this is one of those cases. 

1946 """ 

1947 if not len(rng): 

1948 return True 

1949 return np_can_cast_scalar(rng.start, dtype) and np_can_cast_scalar(rng.stop, dtype) 

1950 

1951 

1952def np_can_cast_scalar(element: Scalar, dtype: np.dtype) -> bool: 

1953 """ 

1954 np.can_cast pandas-equivalent for pre 2-0 behavior that allowed scalar 

1955 inference 

1956 

1957 Parameters 

1958 ---------- 

1959 element : Scalar 

1960 dtype : np.dtype 

1961 

1962 Returns 

1963 ------- 

1964 bool 

1965 """ 

1966 try: 

1967 np_can_hold_element(dtype, element) 

1968 return True 

1969 except (LossySetitemError, NotImplementedError): 

1970 return False