Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/dtypes/cast.py: 11%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

756 statements  

1""" 

2Routines for casting. 

3""" 

4 

5from __future__ import annotations 

6 

7import datetime as dt 

8import functools 

9from typing import ( 

10 TYPE_CHECKING, 

11 Any, 

12 Literal, 

13 Sized, 

14 TypeVar, 

15 cast, 

16 overload, 

17) 

18import warnings 

19 

20import numpy as np 

21 

22from pandas._libs import lib 

23from pandas._libs.missing import ( 

24 NA, 

25 NAType, 

26 checknull, 

27) 

28from pandas._libs.tslibs import ( 

29 NaT, 

30 OutOfBoundsDatetime, 

31 OutOfBoundsTimedelta, 

32 Timedelta, 

33 Timestamp, 

34 get_unit_from_dtype, 

35 is_supported_unit, 

36) 

37from pandas._libs.tslibs.timedeltas import array_to_timedelta64 

38from pandas._typing import ( 

39 ArrayLike, 

40 Dtype, 

41 DtypeObj, 

42 NumpyIndexT, 

43 Scalar, 

44 npt, 

45) 

46from pandas.errors import ( 

47 IntCastingNaNError, 

48 LossySetitemError, 

49) 

50 

51from pandas.core.dtypes.common import ( 

52 ensure_int8, 

53 ensure_int16, 

54 ensure_int32, 

55 ensure_int64, 

56 ensure_object, 

57 ensure_str, 

58 is_bool, 

59 is_bool_dtype, 

60 is_complex, 

61 is_complex_dtype, 

62 is_datetime64_dtype, 

63 is_extension_array_dtype, 

64 is_float, 

65 is_float_dtype, 

66 is_integer, 

67 is_integer_dtype, 

68 is_numeric_dtype, 

69 is_object_dtype, 

70 is_scalar, 

71 is_signed_integer_dtype, 

72 is_string_dtype, 

73 is_timedelta64_dtype, 

74 is_unsigned_integer_dtype, 

75 pandas_dtype as pandas_dtype_func, 

76) 

77from pandas.core.dtypes.dtypes import ( 

78 BaseMaskedDtype, 

79 CategoricalDtype, 

80 DatetimeTZDtype, 

81 ExtensionDtype, 

82 IntervalDtype, 

83 PandasExtensionDtype, 

84 PeriodDtype, 

85) 

86from pandas.core.dtypes.generic import ( 

87 ABCExtensionArray, 

88 ABCIndex, 

89 ABCSeries, 

90) 

91from pandas.core.dtypes.inference import is_list_like 

92from pandas.core.dtypes.missing import ( 

93 is_valid_na_for_dtype, 

94 isna, 

95 na_value_for_dtype, 

96 notna, 

97) 

98 

99from pandas.io._util import _arrow_dtype_mapping 

100 

101if TYPE_CHECKING: 

102 from pandas import Index 

103 from pandas.core.arrays import ( 

104 Categorical, 

105 DatetimeArray, 

106 ExtensionArray, 

107 IntervalArray, 

108 PeriodArray, 

109 TimedeltaArray, 

110 ) 

111 

112 

113_int8_max = np.iinfo(np.int8).max 

114_int16_max = np.iinfo(np.int16).max 

115_int32_max = np.iinfo(np.int32).max 

116_int64_max = np.iinfo(np.int64).max 

117 

118_dtype_obj = np.dtype(object) 

119 

120NumpyArrayT = TypeVar("NumpyArrayT", bound=np.ndarray) 

121 

122 

123def maybe_convert_platform( 

124 values: list | tuple | range | np.ndarray | ExtensionArray, 

125) -> ArrayLike: 

126 """try to do platform conversion, allow ndarray or list here""" 

127 arr: ArrayLike 

128 

129 if isinstance(values, (list, tuple, range)): 

130 arr = construct_1d_object_array_from_listlike(values) 

131 else: 

132 # The caller is responsible for ensuring that we have np.ndarray 

133 # or ExtensionArray here. 

134 arr = values 

135 

136 if arr.dtype == _dtype_obj: 

137 arr = cast(np.ndarray, arr) 

138 arr = lib.maybe_convert_objects(arr) 

139 

140 return arr 

141 

142 

143def is_nested_object(obj) -> bool: 

144 """ 

145 return a boolean if we have a nested object, e.g. a Series with 1 or 

146 more Series elements 

147 

148 This may not be necessarily be performant. 

149 

150 """ 

151 return bool( 

152 isinstance(obj, ABCSeries) 

153 and is_object_dtype(obj.dtype) 

154 and any(isinstance(v, ABCSeries) for v in obj._values) 

155 ) 

156 

157 

158def maybe_box_datetimelike(value: Scalar, dtype: Dtype | None = None) -> Scalar: 

159 """ 

160 Cast scalar to Timestamp or Timedelta if scalar is datetime-like 

161 and dtype is not object. 

162 

163 Parameters 

164 ---------- 

165 value : scalar 

166 dtype : Dtype, optional 

167 

168 Returns 

169 ------- 

170 scalar 

171 """ 

172 if dtype == _dtype_obj: 

173 pass 

174 elif isinstance(value, (np.datetime64, dt.datetime)): 

175 value = Timestamp(value) 

176 elif isinstance(value, (np.timedelta64, dt.timedelta)): 

177 value = Timedelta(value) 

178 

179 return value 

180 

181 

182def maybe_box_native(value: Scalar | None | NAType) -> Scalar | None | NAType: 

183 """ 

184 If passed a scalar cast the scalar to a python native type. 

185 

186 Parameters 

187 ---------- 

188 value : scalar or Series 

189 

190 Returns 

191 ------- 

192 scalar or Series 

193 """ 

194 if is_float(value): 

195 # error: Argument 1 to "float" has incompatible type 

196 # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]"; 

197 # expected "Union[SupportsFloat, _SupportsIndex, str]" 

198 value = float(value) # type: ignore[arg-type] 

199 elif is_integer(value): 

200 # error: Argument 1 to "int" has incompatible type 

201 # "Union[Union[str, int, float, bool], Union[Any, Timestamp, Timedelta, Any]]"; 

202 # expected "Union[str, SupportsInt, _SupportsIndex, _SupportsTrunc]" 

203 value = int(value) # type: ignore[arg-type] 

204 elif is_bool(value): 

205 value = bool(value) 

206 elif isinstance(value, (np.datetime64, np.timedelta64)): 

207 value = maybe_box_datetimelike(value) 

208 elif value is NA: 

209 value = None 

210 return value 

211 

212 

213def _maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: 

214 """ 

215 Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting 

216 into a numpy array. Failing to unbox would risk dropping nanoseconds. 

217 

218 Notes 

219 ----- 

220 Caller is responsible for checking dtype.kind in ["m", "M"] 

221 """ 

222 if is_valid_na_for_dtype(value, dtype): 

223 # GH#36541: can't fill array directly with pd.NaT 

224 # > np.empty(10, dtype="datetime64[ns]").fill(pd.NaT) 

225 # ValueError: cannot convert float NaN to integer 

226 value = dtype.type("NaT", "ns") 

227 elif isinstance(value, Timestamp): 

228 if value.tz is None: 

229 value = value.to_datetime64() 

230 elif not isinstance(dtype, DatetimeTZDtype): 

231 raise TypeError("Cannot unbox tzaware Timestamp to tznaive dtype") 

232 elif isinstance(value, Timedelta): 

233 value = value.to_timedelta64() 

234 

235 _disallow_mismatched_datetimelike(value, dtype) 

236 return value 

237 

238 

239def _disallow_mismatched_datetimelike(value, dtype: DtypeObj): 

240 """ 

241 numpy allows np.array(dt64values, dtype="timedelta64[ns]") and 

242 vice-versa, but we do not want to allow this, so we need to 

243 check explicitly 

244 """ 

245 vdtype = getattr(value, "dtype", None) 

246 if vdtype is None: 

247 return 

248 elif (vdtype.kind == "m" and dtype.kind == "M") or ( 

249 vdtype.kind == "M" and dtype.kind == "m" 

250 ): 

251 raise TypeError(f"Cannot cast {repr(value)} to {dtype}") 

252 

253 

254@overload 

255def maybe_downcast_to_dtype(result: np.ndarray, dtype: str | np.dtype) -> np.ndarray: 

256 ... 

257 

258 

259@overload 

260def maybe_downcast_to_dtype(result: ExtensionArray, dtype: str | np.dtype) -> ArrayLike: 

261 ... 

262 

263 

264def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLike: 

265 """ 

266 try to cast to the specified dtype (e.g. convert back to bool/int 

267 or could be an astype of float64->float32 

268 """ 

269 do_round = False 

270 

271 if isinstance(dtype, str): 

272 if dtype == "infer": 

273 inferred_type = lib.infer_dtype(result, skipna=False) 

274 if inferred_type == "boolean": 

275 dtype = "bool" 

276 elif inferred_type == "integer": 

277 dtype = "int64" 

278 elif inferred_type == "datetime64": 

279 dtype = "datetime64[ns]" 

280 elif inferred_type in ["timedelta", "timedelta64"]: 

281 dtype = "timedelta64[ns]" 

282 

283 # try to upcast here 

284 elif inferred_type == "floating": 

285 dtype = "int64" 

286 if issubclass(result.dtype.type, np.number): 

287 do_round = True 

288 

289 else: 

290 # TODO: complex? what if result is already non-object? 

291 dtype = "object" 

292 

293 dtype = np.dtype(dtype) 

294 

295 if not isinstance(dtype, np.dtype): 

296 # enforce our signature annotation 

297 raise TypeError(dtype) # pragma: no cover 

298 

299 converted = maybe_downcast_numeric(result, dtype, do_round) 

300 if converted is not result: 

301 return converted 

302 

303 # a datetimelike 

304 # GH12821, iNaT is cast to float 

305 if dtype.kind in ["M", "m"] and result.dtype.kind in ["i", "f"]: 

306 result = result.astype(dtype) 

307 

308 elif dtype.kind == "m" and result.dtype == _dtype_obj: 

309 # test_where_downcast_to_td64 

310 result = cast(np.ndarray, result) 

311 result = array_to_timedelta64(result) 

312 

313 elif dtype == np.dtype("M8[ns]") and result.dtype == _dtype_obj: 

314 result = cast(np.ndarray, result) 

315 return np.asarray(maybe_cast_to_datetime(result, dtype=dtype)) 

316 

317 return result 

318 

319 

320@overload 

321def maybe_downcast_numeric( 

322 result: np.ndarray, dtype: np.dtype, do_round: bool = False 

323) -> np.ndarray: 

324 ... 

325 

326 

327@overload 

328def maybe_downcast_numeric( 

329 result: ExtensionArray, dtype: DtypeObj, do_round: bool = False 

330) -> ArrayLike: 

331 ... 

332 

333 

334def maybe_downcast_numeric( 

335 result: ArrayLike, dtype: DtypeObj, do_round: bool = False 

336) -> ArrayLike: 

337 """ 

338 Subset of maybe_downcast_to_dtype restricted to numeric dtypes. 

339 

340 Parameters 

341 ---------- 

342 result : ndarray or ExtensionArray 

343 dtype : np.dtype or ExtensionDtype 

344 do_round : bool 

345 

346 Returns 

347 ------- 

348 ndarray or ExtensionArray 

349 """ 

350 if not isinstance(dtype, np.dtype) or not isinstance(result.dtype, np.dtype): 

351 # e.g. SparseDtype has no itemsize attr 

352 return result 

353 

354 def trans(x): 

355 if do_round: 

356 return x.round() 

357 return x 

358 

359 if dtype.kind == result.dtype.kind: 

360 # don't allow upcasts here (except if empty) 

361 if result.dtype.itemsize <= dtype.itemsize and result.size: 

362 return result 

363 

364 if is_bool_dtype(dtype) or is_integer_dtype(dtype): 

365 if not result.size: 

366 # if we don't have any elements, just astype it 

367 return trans(result).astype(dtype) 

368 

369 # do a test on the first element, if it fails then we are done 

370 r = result.ravel() 

371 arr = np.array([r[0]]) 

372 

373 if isna(arr).any(): 

374 # if we have any nulls, then we are done 

375 return result 

376 

377 elif not isinstance(r[0], (np.integer, np.floating, int, float, bool)): 

378 # a comparable, e.g. a Decimal may slip in here 

379 return result 

380 

381 if ( 

382 issubclass(result.dtype.type, (np.object_, np.number)) 

383 and notna(result).all() 

384 ): 

385 new_result = trans(result).astype(dtype) 

386 if new_result.dtype.kind == "O" or result.dtype.kind == "O": 

387 # np.allclose may raise TypeError on object-dtype 

388 if (new_result == result).all(): 

389 return new_result 

390 else: 

391 if np.allclose(new_result, result, rtol=0): 

392 return new_result 

393 

394 elif ( 

395 issubclass(dtype.type, np.floating) 

396 and not is_bool_dtype(result.dtype) 

397 and not is_string_dtype(result.dtype) 

398 ): 

399 with warnings.catch_warnings(): 

400 warnings.filterwarnings( 

401 "ignore", "overflow encountered in cast", RuntimeWarning 

402 ) 

403 new_result = result.astype(dtype) 

404 

405 # Adjust tolerances based on floating point size 

406 size_tols = {4: 5e-4, 8: 5e-8, 16: 5e-16} 

407 

408 atol = size_tols.get(new_result.dtype.itemsize, 0.0) 

409 

410 # Check downcast float values are still equal within 7 digits when 

411 # converting from float64 to float32 

412 if np.allclose(new_result, result, equal_nan=True, rtol=0.0, atol=atol): 

413 return new_result 

414 

415 elif dtype.kind == result.dtype.kind == "c": 

416 new_result = result.astype(dtype) 

417 

418 if np.array_equal(new_result, result, equal_nan=True): 

419 # TODO: use tolerance like we do for float? 

420 return new_result 

421 

422 return result 

423 

424 

425def maybe_upcast_numeric_to_64bit(arr: NumpyIndexT) -> NumpyIndexT: 

426 """ 

427 If array is a int/uint/float bit size lower than 64 bit, upcast it to 64 bit. 

428 

429 Parameters 

430 ---------- 

431 arr : ndarray or ExtensionArray 

432 

433 Returns 

434 ------- 

435 ndarray or ExtensionArray 

436 """ 

437 dtype = arr.dtype 

438 if is_signed_integer_dtype(dtype) and dtype != np.int64: 

439 return arr.astype(np.int64) 

440 elif is_unsigned_integer_dtype(dtype) and dtype != np.uint64: 

441 return arr.astype(np.uint64) 

442 elif is_float_dtype(dtype) and dtype != np.float64: 

443 return arr.astype(np.float64) 

444 else: 

445 return arr 

446 

447 

448def maybe_cast_pointwise_result( 

449 result: ArrayLike, 

450 dtype: DtypeObj, 

451 numeric_only: bool = False, 

452 same_dtype: bool = True, 

453) -> ArrayLike: 

454 """ 

455 Try casting result of a pointwise operation back to the original dtype if 

456 appropriate. 

457 

458 Parameters 

459 ---------- 

460 result : array-like 

461 Result to cast. 

462 dtype : np.dtype or ExtensionDtype 

463 Input Series from which result was calculated. 

464 numeric_only : bool, default False 

465 Whether to cast only numerics or datetimes as well. 

466 same_dtype : bool, default True 

467 Specify dtype when calling _from_sequence 

468 

469 Returns 

470 ------- 

471 result : array-like 

472 result maybe casted to the dtype. 

473 """ 

474 

475 assert not is_scalar(result) 

476 

477 if isinstance(dtype, ExtensionDtype): 

478 if not isinstance(dtype, (CategoricalDtype, DatetimeTZDtype)): 

479 # TODO: avoid this special-casing 

480 # We have to special case categorical so as not to upcast 

481 # things like counts back to categorical 

482 

483 cls = dtype.construct_array_type() 

484 if same_dtype: 

485 result = maybe_cast_to_extension_array(cls, result, dtype=dtype) 

486 else: 

487 result = maybe_cast_to_extension_array(cls, result) 

488 

489 elif (numeric_only and is_numeric_dtype(dtype)) or not numeric_only: 

490 result = maybe_downcast_to_dtype(result, dtype) 

491 

492 return result 

493 

494 

495def maybe_cast_to_extension_array( 

496 cls: type[ExtensionArray], obj: ArrayLike, dtype: ExtensionDtype | None = None 

497) -> ArrayLike: 

498 """ 

499 Call to `_from_sequence` that returns the object unchanged on Exception. 

500 

501 Parameters 

502 ---------- 

503 cls : class, subclass of ExtensionArray 

504 obj : arraylike 

505 Values to pass to cls._from_sequence 

506 dtype : ExtensionDtype, optional 

507 

508 Returns 

509 ------- 

510 ExtensionArray or obj 

511 """ 

512 from pandas.core.arrays.string_ import BaseStringArray 

513 

514 assert isinstance(cls, type), f"must pass a type: {cls}" 

515 assertion_msg = f"must pass a subclass of ExtensionArray: {cls}" 

516 assert issubclass(cls, ABCExtensionArray), assertion_msg 

517 

518 # Everything can be converted to StringArrays, but we may not want to convert 

519 if issubclass(cls, BaseStringArray) and lib.infer_dtype(obj) != "string": 

520 return obj 

521 

522 try: 

523 result = cls._from_sequence(obj, dtype=dtype) 

524 except Exception: 

525 # We can't predict what downstream EA constructors may raise 

526 result = obj 

527 return result 

528 

529 

530@overload 

531def ensure_dtype_can_hold_na(dtype: np.dtype) -> np.dtype: 

532 ... 

533 

534 

535@overload 

536def ensure_dtype_can_hold_na(dtype: ExtensionDtype) -> ExtensionDtype: 

537 ... 

538 

539 

540def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj: 

541 """ 

542 If we have a dtype that cannot hold NA values, find the best match that can. 

543 """ 

544 if isinstance(dtype, ExtensionDtype): 

545 if dtype._can_hold_na: 

546 return dtype 

547 elif isinstance(dtype, IntervalDtype): 

548 # TODO(GH#45349): don't special-case IntervalDtype, allow 

549 # overriding instead of returning object below. 

550 return IntervalDtype(np.float64, closed=dtype.closed) 

551 return _dtype_obj 

552 elif dtype.kind == "b": 

553 return _dtype_obj 

554 elif dtype.kind in ["i", "u"]: 

555 return np.dtype(np.float64) 

556 return dtype 

557 

558 

559_canonical_nans = { 

560 np.datetime64: np.datetime64("NaT", "ns"), 

561 np.timedelta64: np.timedelta64("NaT", "ns"), 

562 type(np.nan): np.nan, 

563} 

564 

565 

566def maybe_promote(dtype: np.dtype, fill_value=np.nan): 

567 """ 

568 Find the minimal dtype that can hold both the given dtype and fill_value. 

569 

570 Parameters 

571 ---------- 

572 dtype : np.dtype 

573 fill_value : scalar, default np.nan 

574 

575 Returns 

576 ------- 

577 dtype 

578 Upcasted from dtype argument if necessary. 

579 fill_value 

580 Upcasted from fill_value argument if necessary. 

581 

582 Raises 

583 ------ 

584 ValueError 

585 If fill_value is a non-scalar and dtype is not object. 

586 """ 

587 orig = fill_value 

588 orig_is_nat = False 

589 if checknull(fill_value): 

590 # https://github.com/pandas-dev/pandas/pull/39692#issuecomment-1441051740 

591 # avoid cache misses with NaN/NaT values that are not singletons 

592 if fill_value is not NA: 

593 try: 

594 orig_is_nat = np.isnat(fill_value) 

595 except TypeError: 

596 pass 

597 

598 fill_value = _canonical_nans.get(type(fill_value), fill_value) 

599 

600 # for performance, we are using a cached version of the actual implementation 

601 # of the function in _maybe_promote. However, this doesn't always work (in case 

602 # of non-hashable arguments), so we fallback to the actual implementation if needed 

603 try: 

604 # error: Argument 3 to "__call__" of "_lru_cache_wrapper" has incompatible type 

605 # "Type[Any]"; expected "Hashable" [arg-type] 

606 dtype, fill_value = _maybe_promote_cached( 

607 dtype, fill_value, type(fill_value) # type: ignore[arg-type] 

608 ) 

609 except TypeError: 

610 # if fill_value is not hashable (required for caching) 

611 dtype, fill_value = _maybe_promote(dtype, fill_value) 

612 

613 if (dtype == _dtype_obj and orig is not None) or ( 

614 orig_is_nat and np.datetime_data(orig)[0] != "ns" 

615 ): 

616 # GH#51592,53497 restore our potentially non-canonical fill_value 

617 fill_value = orig 

618 return dtype, fill_value 

619 

620 

621@functools.lru_cache(maxsize=128) 

622def _maybe_promote_cached(dtype, fill_value, fill_value_type): 

623 # The cached version of _maybe_promote below 

624 # This also use fill_value_type as (unused) argument to use this in the 

625 # cache lookup -> to differentiate 1 and True 

626 return _maybe_promote(dtype, fill_value) 

627 

628 

629def _maybe_promote(dtype: np.dtype, fill_value=np.nan): 

630 # The actual implementation of the function, use `maybe_promote` above for 

631 # a cached version. 

632 if not is_scalar(fill_value): 

633 # with object dtype there is nothing to promote, and the user can 

634 # pass pretty much any weird fill_value they like 

635 if not is_object_dtype(dtype): 

636 # with object dtype there is nothing to promote, and the user can 

637 # pass pretty much any weird fill_value they like 

638 raise ValueError("fill_value must be a scalar") 

639 dtype = _dtype_obj 

640 return dtype, fill_value 

641 

642 kinds = ["i", "u", "f", "c", "m", "M"] 

643 if is_valid_na_for_dtype(fill_value, dtype) and dtype.kind in kinds: 

644 dtype = ensure_dtype_can_hold_na(dtype) 

645 fv = na_value_for_dtype(dtype) 

646 return dtype, fv 

647 

648 elif isinstance(dtype, CategoricalDtype): 

649 if fill_value in dtype.categories or isna(fill_value): 

650 return dtype, fill_value 

651 else: 

652 return object, ensure_object(fill_value) 

653 

654 elif isna(fill_value): 

655 dtype = _dtype_obj 

656 if fill_value is None: 

657 # but we retain e.g. pd.NA 

658 fill_value = np.nan 

659 return dtype, fill_value 

660 

661 # returns tuple of (dtype, fill_value) 

662 if issubclass(dtype.type, np.datetime64): 

663 inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True) 

664 if inferred == dtype: 

665 return dtype, fv 

666 

667 from pandas.core.arrays import DatetimeArray 

668 

669 dta = DatetimeArray._from_sequence([], dtype="M8[ns]") 

670 try: 

671 fv = dta._validate_setitem_value(fill_value) 

672 return dta.dtype, fv 

673 except (ValueError, TypeError): 

674 return _dtype_obj, fill_value 

675 

676 elif issubclass(dtype.type, np.timedelta64): 

677 inferred, fv = infer_dtype_from_scalar(fill_value, pandas_dtype=True) 

678 if inferred == dtype: 

679 return dtype, fv 

680 

681 return np.dtype("object"), fill_value 

682 

683 elif is_float(fill_value): 

684 if issubclass(dtype.type, np.bool_): 

685 dtype = np.dtype(np.object_) 

686 

687 elif issubclass(dtype.type, np.integer): 

688 dtype = np.dtype(np.float64) 

689 

690 elif dtype.kind == "f": 

691 mst = np.min_scalar_type(fill_value) 

692 if mst > dtype: 

693 # e.g. mst is np.float64 and dtype is np.float32 

694 dtype = mst 

695 

696 elif dtype.kind == "c": 

697 mst = np.min_scalar_type(fill_value) 

698 dtype = np.promote_types(dtype, mst) 

699 

700 elif is_bool(fill_value): 

701 if not issubclass(dtype.type, np.bool_): 

702 dtype = np.dtype(np.object_) 

703 

704 elif is_integer(fill_value): 

705 if issubclass(dtype.type, np.bool_): 

706 dtype = np.dtype(np.object_) 

707 

708 elif issubclass(dtype.type, np.integer): 

709 if not np.can_cast(fill_value, dtype): 

710 # upcast to prevent overflow 

711 mst = np.min_scalar_type(fill_value) 

712 dtype = np.promote_types(dtype, mst) 

713 if dtype.kind == "f": 

714 # Case where we disagree with numpy 

715 dtype = np.dtype(np.object_) 

716 

717 elif is_complex(fill_value): 

718 if issubclass(dtype.type, np.bool_): 

719 dtype = np.dtype(np.object_) 

720 

721 elif issubclass(dtype.type, (np.integer, np.floating)): 

722 mst = np.min_scalar_type(fill_value) 

723 dtype = np.promote_types(dtype, mst) 

724 

725 elif dtype.kind == "c": 

726 mst = np.min_scalar_type(fill_value) 

727 if mst > dtype: 

728 # e.g. mst is np.complex128 and dtype is np.complex64 

729 dtype = mst 

730 

731 else: 

732 dtype = np.dtype(np.object_) 

733 

734 # in case we have a string that looked like a number 

735 if issubclass(dtype.type, (bytes, str)): 

736 dtype = np.dtype(np.object_) 

737 

738 fill_value = _ensure_dtype_type(fill_value, dtype) 

739 return dtype, fill_value 

740 

741 

742def _ensure_dtype_type(value, dtype: np.dtype): 

743 """ 

744 Ensure that the given value is an instance of the given dtype. 

745 

746 e.g. if out dtype is np.complex64_, we should have an instance of that 

747 as opposed to a python complex object. 

748 

749 Parameters 

750 ---------- 

751 value : object 

752 dtype : np.dtype 

753 

754 Returns 

755 ------- 

756 object 

757 """ 

758 # Start with exceptions in which we do _not_ cast to numpy types 

759 

760 if dtype == _dtype_obj: 

761 return value 

762 

763 # Note: before we get here we have already excluded isna(value) 

764 return dtype.type(value) 

765 

766 

767def infer_dtype_from(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]: 

768 """ 

769 Interpret the dtype from a scalar or array. 

770 

771 Parameters 

772 ---------- 

773 val : object 

774 pandas_dtype : bool, default False 

775 whether to infer dtype including pandas extension types. 

776 If False, scalar/array belongs to pandas extension types is inferred as 

777 object 

778 """ 

779 if not is_list_like(val): 

780 return infer_dtype_from_scalar(val, pandas_dtype=pandas_dtype) 

781 return infer_dtype_from_array(val, pandas_dtype=pandas_dtype) 

782 

783 

784def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> tuple[DtypeObj, Any]: 

785 """ 

786 Interpret the dtype from a scalar. 

787 

788 Parameters 

789 ---------- 

790 pandas_dtype : bool, default False 

791 whether to infer dtype including pandas extension types. 

792 If False, scalar belongs to pandas extension types is inferred as 

793 object 

794 """ 

795 dtype: DtypeObj = _dtype_obj 

796 

797 # a 1-element ndarray 

798 if isinstance(val, np.ndarray): 

799 if val.ndim != 0: 

800 msg = "invalid ndarray passed to infer_dtype_from_scalar" 

801 raise ValueError(msg) 

802 

803 dtype = val.dtype 

804 val = lib.item_from_zerodim(val) 

805 

806 elif isinstance(val, str): 

807 # If we create an empty array using a string to infer 

808 # the dtype, NumPy will only allocate one character per entry 

809 # so this is kind of bad. Alternately we could use np.repeat 

810 # instead of np.empty (but then you still don't want things 

811 # coming out as np.str_! 

812 

813 dtype = _dtype_obj 

814 

815 elif isinstance(val, (np.datetime64, dt.datetime)): 

816 try: 

817 val = Timestamp(val) 

818 if val is not NaT: 

819 val = val.as_unit("ns") 

820 except OutOfBoundsDatetime: 

821 return _dtype_obj, val 

822 

823 if val is NaT or val.tz is None: 

824 val = val.to_datetime64() 

825 dtype = val.dtype 

826 # TODO: test with datetime(2920, 10, 1) based on test_replace_dtypes 

827 else: 

828 if pandas_dtype: 

829 dtype = DatetimeTZDtype(unit="ns", tz=val.tz) 

830 else: 

831 # return datetimetz as object 

832 return _dtype_obj, val 

833 

834 elif isinstance(val, (np.timedelta64, dt.timedelta)): 

835 try: 

836 val = Timedelta(val) 

837 except (OutOfBoundsTimedelta, OverflowError): 

838 dtype = _dtype_obj 

839 else: 

840 dtype = np.dtype("m8[ns]") 

841 val = np.timedelta64(val.value, "ns") 

842 

843 elif is_bool(val): 

844 dtype = np.dtype(np.bool_) 

845 

846 elif is_integer(val): 

847 if isinstance(val, np.integer): 

848 dtype = np.dtype(type(val)) 

849 else: 

850 dtype = np.dtype(np.int64) 

851 

852 try: 

853 np.array(val, dtype=dtype) 

854 except OverflowError: 

855 dtype = np.array(val).dtype 

856 

857 elif is_float(val): 

858 if isinstance(val, np.floating): 

859 dtype = np.dtype(type(val)) 

860 else: 

861 dtype = np.dtype(np.float64) 

862 

863 elif is_complex(val): 

864 dtype = np.dtype(np.complex_) 

865 

866 elif pandas_dtype: 

867 if lib.is_period(val): 

868 dtype = PeriodDtype(freq=val.freq) 

869 elif lib.is_interval(val): 

870 subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0] 

871 dtype = IntervalDtype(subtype=subtype, closed=val.closed) 

872 

873 return dtype, val 

874 

875 

876def dict_compat(d: dict[Scalar, Scalar]) -> dict[Scalar, Scalar]: 

877 """ 

878 Convert datetimelike-keyed dicts to a Timestamp-keyed dict. 

879 

880 Parameters 

881 ---------- 

882 d: dict-like object 

883 

884 Returns 

885 ------- 

886 dict 

887 """ 

888 return {maybe_box_datetimelike(key): value for key, value in d.items()} 

889 

890 

891def infer_dtype_from_array( 

892 arr, pandas_dtype: bool = False 

893) -> tuple[DtypeObj, ArrayLike]: 

894 """ 

895 Infer the dtype from an array. 

896 

897 Parameters 

898 ---------- 

899 arr : array 

900 pandas_dtype : bool, default False 

901 whether to infer dtype including pandas extension types. 

902 If False, array belongs to pandas extension types 

903 is inferred as object 

904 

905 Returns 

906 ------- 

907 tuple (numpy-compat/pandas-compat dtype, array) 

908 

909 Notes 

910 ----- 

911 if pandas_dtype=False. these infer to numpy dtypes 

912 exactly with the exception that mixed / object dtypes 

913 are not coerced by stringifying or conversion 

914 

915 if pandas_dtype=True. datetime64tz-aware/categorical 

916 types will retain there character. 

917 

918 Examples 

919 -------- 

920 >>> np.asarray([1, '1']) 

921 array(['1', '1'], dtype='<U21') 

922 

923 >>> infer_dtype_from_array([1, '1']) 

924 (dtype('O'), [1, '1']) 

925 """ 

926 if isinstance(arr, np.ndarray): 

927 return arr.dtype, arr 

928 

929 if not is_list_like(arr): 

930 raise TypeError("'arr' must be list-like") 

931 

932 if pandas_dtype and is_extension_array_dtype(arr): 

933 return arr.dtype, arr 

934 

935 elif isinstance(arr, ABCSeries): 

936 return arr.dtype, np.asarray(arr) 

937 

938 # don't force numpy coerce with nan's 

939 inferred = lib.infer_dtype(arr, skipna=False) 

940 if inferred in ["string", "bytes", "mixed", "mixed-integer"]: 

941 return (np.dtype(np.object_), arr) 

942 

943 arr = np.asarray(arr) 

944 return arr.dtype, arr 

945 

946 

947def _maybe_infer_dtype_type(element): 

948 """ 

949 Try to infer an object's dtype, for use in arithmetic ops. 

950 

951 Uses `element.dtype` if that's available. 

952 Objects implementing the iterator protocol are cast to a NumPy array, 

953 and from there the array's type is used. 

954 

955 Parameters 

956 ---------- 

957 element : object 

958 Possibly has a `.dtype` attribute, and possibly the iterator 

959 protocol. 

960 

961 Returns 

962 ------- 

963 tipo : type 

964 

965 Examples 

966 -------- 

967 >>> from collections import namedtuple 

968 >>> Foo = namedtuple("Foo", "dtype") 

969 >>> _maybe_infer_dtype_type(Foo(np.dtype("i8"))) 

970 dtype('int64') 

971 """ 

972 tipo = None 

973 if hasattr(element, "dtype"): 

974 tipo = element.dtype 

975 elif is_list_like(element): 

976 element = np.asarray(element) 

977 tipo = element.dtype 

978 return tipo 

979 

980 

981def invalidate_string_dtypes(dtype_set: set[DtypeObj]) -> None: 

982 """ 

983 Change string like dtypes to object for 

984 ``DataFrame.select_dtypes()``. 

985 """ 

986 # error: Argument 1 to <set> has incompatible type "Type[generic]"; expected 

987 # "Union[dtype[Any], ExtensionDtype, None]" 

988 # error: Argument 2 to <set> has incompatible type "Type[generic]"; expected 

989 # "Union[dtype[Any], ExtensionDtype, None]" 

990 non_string_dtypes = dtype_set - { 

991 np.dtype("S").type, # type: ignore[arg-type] 

992 np.dtype("<U").type, # type: ignore[arg-type] 

993 } 

994 if non_string_dtypes != dtype_set: 

995 raise TypeError("string dtypes are not allowed, use 'object' instead") 

996 

997 

998def coerce_indexer_dtype(indexer, categories) -> np.ndarray: 

999 """coerce the indexer input array to the smallest dtype possible""" 

1000 length = len(categories) 

1001 if length < _int8_max: 

1002 return ensure_int8(indexer) 

1003 elif length < _int16_max: 

1004 return ensure_int16(indexer) 

1005 elif length < _int32_max: 

1006 return ensure_int32(indexer) 

1007 return ensure_int64(indexer) 

1008 

1009 

1010def convert_dtypes( 

1011 input_array: ArrayLike, 

1012 convert_string: bool = True, 

1013 convert_integer: bool = True, 

1014 convert_boolean: bool = True, 

1015 convert_floating: bool = True, 

1016 infer_objects: bool = False, 

1017 dtype_backend: Literal["numpy_nullable", "pyarrow"] = "numpy_nullable", 

1018) -> DtypeObj: 

1019 """ 

1020 Convert objects to best possible type, and optionally, 

1021 to types supporting ``pd.NA``. 

1022 

1023 Parameters 

1024 ---------- 

1025 input_array : ExtensionArray or np.ndarray 

1026 convert_string : bool, default True 

1027 Whether object dtypes should be converted to ``StringDtype()``. 

1028 convert_integer : bool, default True 

1029 Whether, if possible, conversion can be done to integer extension types. 

1030 convert_boolean : bool, defaults True 

1031 Whether object dtypes should be converted to ``BooleanDtypes()``. 

1032 convert_floating : bool, defaults True 

1033 Whether, if possible, conversion can be done to floating extension types. 

1034 If `convert_integer` is also True, preference will be give to integer 

1035 dtypes if the floats can be faithfully casted to integers. 

1036 infer_objects : bool, defaults False 

1037 Whether to also infer objects to float/int if possible. Is only hit if the 

1038 object array contains pd.NA. 

1039 dtype_backend : str, default "numpy_nullable" 

1040 Nullable dtype implementation to use. 

1041 

1042 * "numpy_nullable" returns numpy-backed nullable types 

1043 * "pyarrow" returns pyarrow-backed nullable types using ``ArrowDtype`` 

1044 

1045 Returns 

1046 ------- 

1047 np.dtype, or ExtensionDtype 

1048 """ 

1049 inferred_dtype: str | DtypeObj 

1050 

1051 from pandas.core.arrays.arrow.dtype import ArrowDtype 

1052 

1053 if ( 

1054 convert_string or convert_integer or convert_boolean or convert_floating 

1055 ) and isinstance(input_array, np.ndarray): 

1056 if is_object_dtype(input_array.dtype): 

1057 inferred_dtype = lib.infer_dtype(input_array) 

1058 else: 

1059 inferred_dtype = input_array.dtype 

1060 

1061 if is_string_dtype(inferred_dtype): 

1062 if not convert_string or inferred_dtype == "bytes": 

1063 inferred_dtype = input_array.dtype 

1064 else: 

1065 inferred_dtype = pandas_dtype_func("string") 

1066 

1067 if convert_integer: 

1068 target_int_dtype = pandas_dtype_func("Int64") 

1069 

1070 if is_integer_dtype(input_array.dtype): 

1071 from pandas.core.arrays.integer import INT_STR_TO_DTYPE 

1072 

1073 inferred_dtype = INT_STR_TO_DTYPE.get( 

1074 input_array.dtype.name, target_int_dtype 

1075 ) 

1076 elif is_numeric_dtype(input_array.dtype): 

1077 # TODO: de-dup with maybe_cast_to_integer_array? 

1078 arr = input_array[notna(input_array)] 

1079 if (arr.astype(int) == arr).all(): 

1080 inferred_dtype = target_int_dtype 

1081 else: 

1082 inferred_dtype = input_array.dtype 

1083 elif ( 

1084 infer_objects 

1085 and is_object_dtype(input_array.dtype) 

1086 and (isinstance(inferred_dtype, str) and inferred_dtype == "integer") 

1087 ): 

1088 inferred_dtype = target_int_dtype 

1089 

1090 if convert_floating: 

1091 if not is_integer_dtype(input_array.dtype) and is_numeric_dtype( 

1092 input_array.dtype 

1093 ): 

1094 from pandas.core.arrays.floating import FLOAT_STR_TO_DTYPE 

1095 

1096 inferred_float_dtype: DtypeObj = FLOAT_STR_TO_DTYPE.get( 

1097 input_array.dtype.name, pandas_dtype_func("Float64") 

1098 ) 

1099 # if we could also convert to integer, check if all floats 

1100 # are actually integers 

1101 if convert_integer: 

1102 # TODO: de-dup with maybe_cast_to_integer_array? 

1103 arr = input_array[notna(input_array)] 

1104 if (arr.astype(int) == arr).all(): 

1105 inferred_dtype = pandas_dtype_func("Int64") 

1106 else: 

1107 inferred_dtype = inferred_float_dtype 

1108 else: 

1109 inferred_dtype = inferred_float_dtype 

1110 elif ( 

1111 infer_objects 

1112 and is_object_dtype(input_array.dtype) 

1113 and ( 

1114 isinstance(inferred_dtype, str) 

1115 and inferred_dtype == "mixed-integer-float" 

1116 ) 

1117 ): 

1118 inferred_dtype = pandas_dtype_func("Float64") 

1119 

1120 if convert_boolean: 

1121 if is_bool_dtype(input_array.dtype): 

1122 inferred_dtype = pandas_dtype_func("boolean") 

1123 elif isinstance(inferred_dtype, str) and inferred_dtype == "boolean": 

1124 inferred_dtype = pandas_dtype_func("boolean") 

1125 

1126 if isinstance(inferred_dtype, str): 

1127 # If we couldn't do anything else, then we retain the dtype 

1128 inferred_dtype = input_array.dtype 

1129 

1130 else: 

1131 inferred_dtype = input_array.dtype 

1132 

1133 if dtype_backend == "pyarrow": 

1134 from pandas.core.arrays.arrow.array import to_pyarrow_type 

1135 from pandas.core.arrays.string_ import StringDtype 

1136 

1137 assert not isinstance(inferred_dtype, str) 

1138 

1139 if ( 

1140 (convert_integer and inferred_dtype.kind in "iu") 

1141 or (convert_floating and inferred_dtype.kind in "fc") 

1142 or (convert_boolean and inferred_dtype.kind == "b") 

1143 or (convert_string and isinstance(inferred_dtype, StringDtype)) 

1144 or ( 

1145 inferred_dtype.kind not in "iufcb" 

1146 and not isinstance(inferred_dtype, StringDtype) 

1147 ) 

1148 ): 

1149 if isinstance(inferred_dtype, PandasExtensionDtype) and not isinstance( 

1150 inferred_dtype, DatetimeTZDtype 

1151 ): 

1152 base_dtype = inferred_dtype.base 

1153 elif isinstance(inferred_dtype, (BaseMaskedDtype, ArrowDtype)): 

1154 base_dtype = inferred_dtype.numpy_dtype 

1155 elif isinstance(inferred_dtype, StringDtype): 

1156 base_dtype = np.dtype(str) 

1157 else: 

1158 base_dtype = inferred_dtype 

1159 pa_type = to_pyarrow_type(base_dtype) 

1160 if pa_type is not None: 

1161 inferred_dtype = ArrowDtype(pa_type) 

1162 elif dtype_backend == "numpy_nullable" and isinstance(inferred_dtype, ArrowDtype): 

1163 # GH 53648 

1164 inferred_dtype = _arrow_dtype_mapping()[inferred_dtype.pyarrow_dtype] 

1165 

1166 # error: Incompatible return value type (got "Union[str, Union[dtype[Any], 

1167 # ExtensionDtype]]", expected "Union[dtype[Any], ExtensionDtype]") 

1168 return inferred_dtype # type: ignore[return-value] 

1169 

1170 

1171def maybe_infer_to_datetimelike( 

1172 value: npt.NDArray[np.object_], 

1173) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray | IntervalArray: 

1174 """ 

1175 we might have a array (or single object) that is datetime like, 

1176 and no dtype is passed don't change the value unless we find a 

1177 datetime/timedelta set 

1178 

1179 this is pretty strict in that a datetime/timedelta is REQUIRED 

1180 in addition to possible nulls/string likes 

1181 

1182 Parameters 

1183 ---------- 

1184 value : np.ndarray[object] 

1185 

1186 Returns 

1187 ------- 

1188 np.ndarray, DatetimeArray, TimedeltaArray, PeriodArray, or IntervalArray 

1189 

1190 """ 

1191 if not isinstance(value, np.ndarray) or value.dtype != object: 

1192 # Caller is responsible for passing only ndarray[object] 

1193 raise TypeError(type(value)) # pragma: no cover 

1194 if value.ndim != 1: 

1195 # Caller is responsible 

1196 raise ValueError(value.ndim) # pragma: no cover 

1197 

1198 if not len(value): 

1199 return value 

1200 

1201 # error: Incompatible return value type (got "Union[ExtensionArray, 

1202 # ndarray[Any, Any]]", expected "Union[ndarray[Any, Any], DatetimeArray, 

1203 # TimedeltaArray, PeriodArray, IntervalArray]") 

1204 return lib.maybe_convert_objects( # type: ignore[return-value] 

1205 value, 

1206 # Here we do not convert numeric dtypes, as if we wanted that, 

1207 # numpy would have done it for us. 

1208 convert_numeric=False, 

1209 convert_period=True, 

1210 convert_interval=True, 

1211 convert_timedelta=True, 

1212 convert_datetime=True, 

1213 dtype_if_all_nat=np.dtype("M8[ns]"), 

1214 ) 

1215 

1216 

1217def maybe_cast_to_datetime( 

1218 value: np.ndarray | list, dtype: np.dtype 

1219) -> ExtensionArray | np.ndarray: 

1220 """ 

1221 try to cast the array/value to a datetimelike dtype, converting float 

1222 nan to iNaT 

1223 

1224 Caller is responsible for handling ExtensionDtype cases and non dt64/td64 

1225 cases. 

1226 """ 

1227 from pandas.core.arrays.datetimes import DatetimeArray 

1228 from pandas.core.arrays.timedeltas import TimedeltaArray 

1229 

1230 assert dtype.kind in ["m", "M"] 

1231 if not is_list_like(value): 

1232 raise TypeError("value must be listlike") 

1233 

1234 # TODO: _from_sequence would raise ValueError in cases where 

1235 # _ensure_nanosecond_dtype raises TypeError 

1236 _ensure_nanosecond_dtype(dtype) 

1237 

1238 if is_timedelta64_dtype(dtype): 

1239 res = TimedeltaArray._from_sequence(value, dtype=dtype) 

1240 return res 

1241 else: 

1242 try: 

1243 dta = DatetimeArray._from_sequence(value, dtype=dtype) 

1244 except ValueError as err: 

1245 # We can give a Series-specific exception message. 

1246 if "cannot supply both a tz and a timezone-naive dtype" in str(err): 

1247 raise ValueError( 

1248 "Cannot convert timezone-aware data to " 

1249 "timezone-naive dtype. Use " 

1250 "pd.Series(values).dt.tz_localize(None) instead." 

1251 ) from err 

1252 raise 

1253 

1254 return dta 

1255 

1256 

1257def _ensure_nanosecond_dtype(dtype: DtypeObj) -> None: 

1258 """ 

1259 Convert dtypes with granularity less than nanosecond to nanosecond 

1260 

1261 >>> _ensure_nanosecond_dtype(np.dtype("M8[us]")) 

1262 

1263 >>> _ensure_nanosecond_dtype(np.dtype("M8[D]")) 

1264 Traceback (most recent call last): 

1265 ... 

1266 TypeError: dtype=datetime64[D] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns' 

1267 

1268 >>> _ensure_nanosecond_dtype(np.dtype("m8[ps]")) 

1269 Traceback (most recent call last): 

1270 ... 

1271 TypeError: dtype=timedelta64[ps] is not supported. Supported resolutions are 's', 'ms', 'us', and 'ns' 

1272 """ # noqa:E501 

1273 msg = ( 

1274 f"The '{dtype.name}' dtype has no unit. " 

1275 f"Please pass in '{dtype.name}[ns]' instead." 

1276 ) 

1277 

1278 # unpack e.g. SparseDtype 

1279 dtype = getattr(dtype, "subtype", dtype) 

1280 

1281 if not isinstance(dtype, np.dtype): 

1282 # i.e. datetime64tz 

1283 pass 

1284 

1285 elif dtype.kind in ["m", "M"]: 

1286 reso = get_unit_from_dtype(dtype) 

1287 if not is_supported_unit(reso): 

1288 # pre-2.0 we would silently swap in nanos for lower-resolutions, 

1289 # raise for above-nano resolutions 

1290 if dtype.name in ["datetime64", "timedelta64"]: 

1291 raise ValueError(msg) 

1292 # TODO: ValueError or TypeError? existing test 

1293 # test_constructor_generic_timestamp_bad_frequency expects TypeError 

1294 raise TypeError( 

1295 f"dtype={dtype} is not supported. Supported resolutions are 's', " 

1296 "'ms', 'us', and 'ns'" 

1297 ) 

1298 

1299 

1300# TODO: other value-dependent functions to standardize here include 

1301# Index._find_common_type_compat 

1302def find_result_type(left: ArrayLike, right: Any) -> DtypeObj: 

1303 """ 

1304 Find the type/dtype for a the result of an operation between these objects. 

1305 

1306 This is similar to find_common_type, but looks at the objects instead 

1307 of just their dtypes. This can be useful in particular when one of the 

1308 objects does not have a `dtype`. 

1309 

1310 Parameters 

1311 ---------- 

1312 left : np.ndarray or ExtensionArray 

1313 right : Any 

1314 

1315 Returns 

1316 ------- 

1317 np.dtype or ExtensionDtype 

1318 

1319 See also 

1320 -------- 

1321 find_common_type 

1322 numpy.result_type 

1323 """ 

1324 new_dtype: DtypeObj 

1325 

1326 if ( 

1327 isinstance(left, np.ndarray) 

1328 and left.dtype.kind in ["i", "u", "c"] 

1329 and (lib.is_integer(right) or lib.is_float(right)) 

1330 ): 

1331 # e.g. with int8 dtype and right=512, we want to end up with 

1332 # np.int16, whereas infer_dtype_from(512) gives np.int64, 

1333 # which will make us upcast too far. 

1334 if lib.is_float(right) and right.is_integer() and left.dtype.kind != "f": 

1335 right = int(right) 

1336 

1337 new_dtype = np.result_type(left, right) 

1338 

1339 elif is_valid_na_for_dtype(right, left.dtype): 

1340 # e.g. IntervalDtype[int] and None/np.nan 

1341 new_dtype = ensure_dtype_can_hold_na(left.dtype) 

1342 

1343 else: 

1344 dtype, _ = infer_dtype_from(right, pandas_dtype=True) 

1345 

1346 new_dtype = find_common_type([left.dtype, dtype]) 

1347 

1348 return new_dtype 

1349 

1350 

1351def common_dtype_categorical_compat( 

1352 objs: list[Index | ArrayLike], dtype: DtypeObj 

1353) -> DtypeObj: 

1354 """ 

1355 Update the result of find_common_type to account for NAs in a Categorical. 

1356 

1357 Parameters 

1358 ---------- 

1359 objs : list[np.ndarray | ExtensionArray | Index] 

1360 dtype : np.dtype or ExtensionDtype 

1361 

1362 Returns 

1363 ------- 

1364 np.dtype or ExtensionDtype 

1365 """ 

1366 # GH#38240 

1367 

1368 # TODO: more generally, could do `not can_hold_na(dtype)` 

1369 if isinstance(dtype, np.dtype) and dtype.kind in ["i", "u"]: 

1370 for obj in objs: 

1371 # We don't want to accientally allow e.g. "categorical" str here 

1372 obj_dtype = getattr(obj, "dtype", None) 

1373 if isinstance(obj_dtype, CategoricalDtype): 

1374 if isinstance(obj, ABCIndex): 

1375 # This check may already be cached 

1376 hasnas = obj.hasnans 

1377 else: 

1378 # Categorical 

1379 hasnas = cast("Categorical", obj)._hasna 

1380 

1381 if hasnas: 

1382 # see test_union_int_categorical_with_nan 

1383 dtype = np.dtype(np.float64) 

1384 break 

1385 return dtype 

1386 

1387 

1388def np_find_common_type(*dtypes: np.dtype) -> np.dtype: 

1389 """ 

1390 np.find_common_type implementation pre-1.25 deprecation using np.result_type 

1391 https://github.com/pandas-dev/pandas/pull/49569#issuecomment-1308300065 

1392 

1393 Parameters 

1394 ---------- 

1395 dtypes : np.dtypes 

1396 

1397 Returns 

1398 ------- 

1399 np.dtype 

1400 """ 

1401 try: 

1402 common_dtype = np.result_type(*dtypes) 

1403 if common_dtype.kind in "mMSU": 

1404 # NumPy promotion currently (1.25) misbehaves for for times and strings, 

1405 # so fall back to object (find_common_dtype did unless there 

1406 # was only one dtype) 

1407 common_dtype = np.dtype("O") 

1408 

1409 except TypeError: 

1410 common_dtype = np.dtype("O") 

1411 return common_dtype 

1412 

1413 

1414@overload 

1415def find_common_type(types: list[np.dtype]) -> np.dtype: 

1416 ... 

1417 

1418 

1419@overload 

1420def find_common_type(types: list[ExtensionDtype]) -> DtypeObj: 

1421 ... 

1422 

1423 

1424@overload 

1425def find_common_type(types: list[DtypeObj]) -> DtypeObj: 

1426 ... 

1427 

1428 

1429def find_common_type(types): 

1430 """ 

1431 Find a common data type among the given dtypes. 

1432 

1433 Parameters 

1434 ---------- 

1435 types : list of dtypes 

1436 

1437 Returns 

1438 ------- 

1439 pandas extension or numpy dtype 

1440 

1441 See Also 

1442 -------- 

1443 numpy.find_common_type 

1444 

1445 """ 

1446 if not types: 

1447 raise ValueError("no types given") 

1448 

1449 first = types[0] 

1450 

1451 # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2) 

1452 # => object 

1453 if lib.dtypes_all_equal(list(types)): 

1454 return first 

1455 

1456 # get unique types (dict.fromkeys is used as order-preserving set()) 

1457 types = list(dict.fromkeys(types).keys()) 

1458 

1459 if any(isinstance(t, ExtensionDtype) for t in types): 

1460 for t in types: 

1461 if isinstance(t, ExtensionDtype): 

1462 res = t._get_common_dtype(types) 

1463 if res is not None: 

1464 return res 

1465 return np.dtype("object") 

1466 

1467 # take lowest unit 

1468 if all(is_datetime64_dtype(t) for t in types): 

1469 return np.dtype(max(types)) 

1470 if all(is_timedelta64_dtype(t) for t in types): 

1471 return np.dtype(max(types)) 

1472 

1473 # don't mix bool / int or float or complex 

1474 # this is different from numpy, which casts bool with float/int as int 

1475 has_bools = any(is_bool_dtype(t) for t in types) 

1476 if has_bools: 

1477 for t in types: 

1478 if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t): 

1479 return np.dtype("object") 

1480 

1481 return np_find_common_type(*types) 

1482 

1483 

1484def construct_2d_arraylike_from_scalar( 

1485 value: Scalar, length: int, width: int, dtype: np.dtype, copy: bool 

1486) -> np.ndarray: 

1487 shape = (length, width) 

1488 

1489 if dtype.kind in ["m", "M"]: 

1490 value = _maybe_box_and_unbox_datetimelike(value, dtype) 

1491 elif dtype == _dtype_obj: 

1492 if isinstance(value, (np.timedelta64, np.datetime64)): 

1493 # calling np.array below would cast to pytimedelta/pydatetime 

1494 out = np.empty(shape, dtype=object) 

1495 out.fill(value) 

1496 return out 

1497 

1498 # Attempt to coerce to a numpy array 

1499 try: 

1500 arr = np.array(value, dtype=dtype, copy=copy) 

1501 except (ValueError, TypeError) as err: 

1502 raise TypeError( 

1503 f"DataFrame constructor called with incompatible data and dtype: {err}" 

1504 ) from err 

1505 

1506 if arr.ndim != 0: 

1507 raise ValueError("DataFrame constructor not properly called!") 

1508 

1509 return np.full(shape, arr) 

1510 

1511 

1512def construct_1d_arraylike_from_scalar( 

1513 value: Scalar, length: int, dtype: DtypeObj | None 

1514) -> ArrayLike: 

1515 """ 

1516 create a np.ndarray / pandas type of specified shape and dtype 

1517 filled with values 

1518 

1519 Parameters 

1520 ---------- 

1521 value : scalar value 

1522 length : int 

1523 dtype : pandas_dtype or np.dtype 

1524 

1525 Returns 

1526 ------- 

1527 np.ndarray / pandas type of length, filled with value 

1528 

1529 """ 

1530 

1531 if dtype is None: 

1532 try: 

1533 dtype, value = infer_dtype_from_scalar(value, pandas_dtype=True) 

1534 except OutOfBoundsDatetime: 

1535 dtype = _dtype_obj 

1536 

1537 if isinstance(dtype, ExtensionDtype): 

1538 cls = dtype.construct_array_type() 

1539 seq = [] if length == 0 else [value] 

1540 subarr = cls._from_sequence(seq, dtype=dtype).repeat(length) 

1541 

1542 else: 

1543 if length and is_integer_dtype(dtype) and isna(value): 

1544 # coerce if we have nan for an integer dtype 

1545 dtype = np.dtype("float64") 

1546 elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): 

1547 # we need to coerce to object dtype to avoid 

1548 # to allow numpy to take our string as a scalar value 

1549 dtype = np.dtype("object") 

1550 if not isna(value): 

1551 value = ensure_str(value) 

1552 elif dtype.kind in ["M", "m"]: 

1553 value = _maybe_box_and_unbox_datetimelike(value, dtype) 

1554 

1555 subarr = np.empty(length, dtype=dtype) 

1556 if length: 

1557 # GH 47391: numpy > 1.24 will raise filling np.nan into int dtypes 

1558 subarr.fill(value) 

1559 

1560 return subarr 

1561 

1562 

1563def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj): 

1564 # Caller is responsible for checking dtype.kind in ["m", "M"] 

1565 

1566 if isinstance(value, dt.datetime): 

1567 # we dont want to box dt64, in particular datetime64("NaT") 

1568 value = maybe_box_datetimelike(value, dtype) 

1569 

1570 return _maybe_unbox_datetimelike(value, dtype) 

1571 

1572 

1573def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray: 

1574 """ 

1575 Transform any list-like object in a 1-dimensional numpy array of object 

1576 dtype. 

1577 

1578 Parameters 

1579 ---------- 

1580 values : any iterable which has a len() 

1581 

1582 Raises 

1583 ------ 

1584 TypeError 

1585 * If `values` does not have a len() 

1586 

1587 Returns 

1588 ------- 

1589 1-dimensional numpy array of dtype object 

1590 """ 

1591 # numpy will try to interpret nested lists as further dimensions, hence 

1592 # making a 1D array that contains list-likes is a bit tricky: 

1593 result = np.empty(len(values), dtype="object") 

1594 result[:] = values 

1595 return result 

1596 

1597 

1598def maybe_cast_to_integer_array(arr: list | np.ndarray, dtype: np.dtype) -> np.ndarray: 

1599 """ 

1600 Takes any dtype and returns the casted version, raising for when data is 

1601 incompatible with integer/unsigned integer dtypes. 

1602 

1603 Parameters 

1604 ---------- 

1605 arr : np.ndarray or list 

1606 The array to cast. 

1607 dtype : np.dtype 

1608 The integer dtype to cast the array to. 

1609 

1610 Returns 

1611 ------- 

1612 ndarray 

1613 Array of integer or unsigned integer dtype. 

1614 

1615 Raises 

1616 ------ 

1617 OverflowError : the dtype is incompatible with the data 

1618 ValueError : loss of precision has occurred during casting 

1619 

1620 Examples 

1621 -------- 

1622 If you try to coerce negative values to unsigned integers, it raises: 

1623 

1624 >>> pd.Series([-1], dtype="uint64") 

1625 Traceback (most recent call last): 

1626 ... 

1627 OverflowError: Trying to coerce negative values to unsigned integers 

1628 

1629 Also, if you try to coerce float values to integers, it raises: 

1630 

1631 >>> maybe_cast_to_integer_array([1, 2, 3.5], dtype=np.dtype("int64")) 

1632 Traceback (most recent call last): 

1633 ... 

1634 ValueError: Trying to coerce float values to integers 

1635 """ 

1636 assert is_integer_dtype(dtype) 

1637 

1638 try: 

1639 if not isinstance(arr, np.ndarray): 

1640 with warnings.catch_warnings(): 

1641 # We already disallow dtype=uint w/ negative numbers 

1642 # (test_constructor_coercion_signed_to_unsigned) so safe to ignore. 

1643 warnings.filterwarnings( 

1644 "ignore", 

1645 "NumPy will stop allowing conversion of out-of-bound Python int", 

1646 DeprecationWarning, 

1647 ) 

1648 casted = np.array(arr, dtype=dtype, copy=False) 

1649 else: 

1650 with warnings.catch_warnings(): 

1651 warnings.filterwarnings("ignore", category=RuntimeWarning) 

1652 casted = arr.astype(dtype, copy=False) 

1653 except OverflowError as err: 

1654 raise OverflowError( 

1655 "The elements provided in the data cannot all be " 

1656 f"casted to the dtype {dtype}" 

1657 ) from err 

1658 

1659 if isinstance(arr, np.ndarray) and arr.dtype == dtype: 

1660 # avoid expensive array_equal check 

1661 return casted 

1662 

1663 with warnings.catch_warnings(): 

1664 warnings.filterwarnings("ignore", category=RuntimeWarning) 

1665 if np.array_equal(arr, casted): 

1666 return casted 

1667 

1668 # We do this casting to allow for proper 

1669 # data and dtype checking. 

1670 # 

1671 # We didn't do this earlier because NumPy 

1672 # doesn't handle `uint64` correctly. 

1673 arr = np.asarray(arr) 

1674 

1675 if np.issubdtype(arr.dtype, str): 

1676 if (casted.astype(str) == arr).all(): 

1677 return casted 

1678 raise ValueError(f"string values cannot be losslessly cast to {dtype}") 

1679 

1680 if is_unsigned_integer_dtype(dtype) and (arr < 0).any(): 

1681 raise OverflowError("Trying to coerce negative values to unsigned integers") 

1682 

1683 if is_float_dtype(arr.dtype): 

1684 if not np.isfinite(arr).all(): 

1685 raise IntCastingNaNError( 

1686 "Cannot convert non-finite values (NA or inf) to integer" 

1687 ) 

1688 raise ValueError("Trying to coerce float values to integers") 

1689 if is_object_dtype(arr.dtype): 

1690 raise ValueError("Trying to coerce float values to integers") 

1691 

1692 if casted.dtype < arr.dtype: 

1693 # GH#41734 e.g. [1, 200, 923442] and dtype="int8" -> overflows 

1694 raise ValueError( 

1695 f"Values are too large to be losslessly converted to {dtype}. " 

1696 f"To cast anyway, use pd.Series(values).astype({dtype})" 

1697 ) 

1698 

1699 if arr.dtype.kind in ["m", "M"]: 

1700 # test_constructor_maskedarray_nonfloat 

1701 raise TypeError( 

1702 f"Constructing a Series or DataFrame from {arr.dtype} values and " 

1703 f"dtype={dtype} is not supported. Use values.view({dtype}) instead." 

1704 ) 

1705 

1706 # No known cases that get here, but raising explicitly to cover our bases. 

1707 raise ValueError(f"values cannot be losslessly cast to {dtype}") 

1708 

1709 

1710def can_hold_element(arr: ArrayLike, element: Any) -> bool: 

1711 """ 

1712 Can we do an inplace setitem with this element in an array with this dtype? 

1713 

1714 Parameters 

1715 ---------- 

1716 arr : np.ndarray or ExtensionArray 

1717 element : Any 

1718 

1719 Returns 

1720 ------- 

1721 bool 

1722 """ 

1723 dtype = arr.dtype 

1724 if not isinstance(dtype, np.dtype) or dtype.kind in ["m", "M"]: 

1725 if isinstance(dtype, (PeriodDtype, IntervalDtype, DatetimeTZDtype, np.dtype)): 

1726 # np.dtype here catches datetime64ns and timedelta64ns; we assume 

1727 # in this case that we have DatetimeArray/TimedeltaArray 

1728 arr = cast( 

1729 "PeriodArray | DatetimeArray | TimedeltaArray | IntervalArray", arr 

1730 ) 

1731 try: 

1732 arr._validate_setitem_value(element) 

1733 return True 

1734 except (ValueError, TypeError): 

1735 # TODO: re-use _catch_deprecated_value_error to ensure we are 

1736 # strict about what exceptions we allow through here. 

1737 return False 

1738 

1739 # This is technically incorrect, but maintains the behavior of 

1740 # ExtensionBlock._can_hold_element 

1741 return True 

1742 

1743 try: 

1744 np_can_hold_element(dtype, element) 

1745 return True 

1746 except (TypeError, LossySetitemError): 

1747 return False 

1748 

1749 

1750def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: 

1751 """ 

1752 Raise if we cannot losslessly set this element into an ndarray with this dtype. 

1753 

1754 Specifically about places where we disagree with numpy. i.e. there are 

1755 cases where numpy will raise in doing the setitem that we do not check 

1756 for here, e.g. setting str "X" into a numeric ndarray. 

1757 

1758 Returns 

1759 ------- 

1760 Any 

1761 The element, potentially cast to the dtype. 

1762 

1763 Raises 

1764 ------ 

1765 ValueError : If we cannot losslessly store this element with this dtype. 

1766 """ 

1767 if dtype == _dtype_obj: 

1768 return element 

1769 

1770 tipo = _maybe_infer_dtype_type(element) 

1771 

1772 if dtype.kind in ["i", "u"]: 

1773 if isinstance(element, range): 

1774 if _dtype_can_hold_range(element, dtype): 

1775 return element 

1776 raise LossySetitemError 

1777 

1778 if is_integer(element) or (is_float(element) and element.is_integer()): 

1779 # e.g. test_setitem_series_int8 if we have a python int 1 

1780 # tipo may be np.int32, despite the fact that it will fit 

1781 # in smaller int dtypes. 

1782 info = np.iinfo(dtype) 

1783 if info.min <= element <= info.max: 

1784 return dtype.type(element) 

1785 raise LossySetitemError 

1786 

1787 if tipo is not None: 

1788 if tipo.kind not in ["i", "u"]: 

1789 if isinstance(element, np.ndarray) and element.dtype.kind == "f": 

1790 # If all can be losslessly cast to integers, then we can hold them 

1791 with np.errstate(invalid="ignore"): 

1792 # We check afterwards if cast was losslessly, so no need to show 

1793 # the warning 

1794 casted = element.astype(dtype) 

1795 comp = casted == element 

1796 if comp.all(): 

1797 # Return the casted values bc they can be passed to 

1798 # np.putmask, whereas the raw values cannot. 

1799 # see TestSetitemFloatNDarrayIntoIntegerSeries 

1800 return casted 

1801 raise LossySetitemError 

1802 

1803 # Anything other than integer we cannot hold 

1804 raise LossySetitemError 

1805 if ( 

1806 dtype.kind == "u" 

1807 and isinstance(element, np.ndarray) 

1808 and element.dtype.kind == "i" 

1809 ): 

1810 # see test_where_uint64 

1811 casted = element.astype(dtype) 

1812 if (casted == element).all(): 

1813 # TODO: faster to check (element >=0).all()? potential 

1814 # itemsize issues there? 

1815 return casted 

1816 raise LossySetitemError 

1817 if dtype.itemsize < tipo.itemsize: 

1818 raise LossySetitemError 

1819 if not isinstance(tipo, np.dtype): 

1820 # i.e. nullable IntegerDtype; we can put this into an ndarray 

1821 # losslessly iff it has no NAs 

1822 if element._hasna: 

1823 raise LossySetitemError 

1824 return element 

1825 

1826 return element 

1827 

1828 raise LossySetitemError 

1829 

1830 if dtype.kind == "f": 

1831 if lib.is_integer(element) or lib.is_float(element): 

1832 casted = dtype.type(element) 

1833 if np.isnan(casted) or casted == element: 

1834 return casted 

1835 # otherwise e.g. overflow see TestCoercionFloat32 

1836 raise LossySetitemError 

1837 

1838 if tipo is not None: 

1839 # TODO: itemsize check? 

1840 if tipo.kind not in ["f", "i", "u"]: 

1841 # Anything other than float/integer we cannot hold 

1842 raise LossySetitemError 

1843 if not isinstance(tipo, np.dtype): 

1844 # i.e. nullable IntegerDtype or FloatingDtype; 

1845 # we can put this into an ndarray losslessly iff it has no NAs 

1846 if element._hasna: 

1847 raise LossySetitemError 

1848 return element 

1849 elif tipo.itemsize > dtype.itemsize or tipo.kind != dtype.kind: 

1850 if isinstance(element, np.ndarray): 

1851 # e.g. TestDataFrameIndexingWhere::test_where_alignment 

1852 casted = element.astype(dtype) 

1853 if np.array_equal(casted, element, equal_nan=True): 

1854 return casted 

1855 raise LossySetitemError 

1856 

1857 return element 

1858 

1859 raise LossySetitemError 

1860 

1861 if dtype.kind == "c": 

1862 if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element): 

1863 if np.isnan(element): 

1864 # see test_where_complex GH#6345 

1865 return dtype.type(element) 

1866 

1867 with warnings.catch_warnings(): 

1868 warnings.filterwarnings("ignore") 

1869 casted = dtype.type(element) 

1870 if casted == element: 

1871 return casted 

1872 # otherwise e.g. overflow see test_32878_complex_itemsize 

1873 raise LossySetitemError 

1874 

1875 if tipo is not None: 

1876 if tipo.kind in ["c", "f", "i", "u"]: 

1877 return element 

1878 raise LossySetitemError 

1879 raise LossySetitemError 

1880 

1881 if dtype.kind == "b": 

1882 if tipo is not None: 

1883 if tipo.kind == "b": 

1884 if not isinstance(tipo, np.dtype): 

1885 # i.e. we have a BooleanArray 

1886 if element._hasna: 

1887 # i.e. there are pd.NA elements 

1888 raise LossySetitemError 

1889 return element 

1890 raise LossySetitemError 

1891 if lib.is_bool(element): 

1892 return element 

1893 raise LossySetitemError 

1894 

1895 if dtype.kind == "S": 

1896 # TODO: test tests.frame.methods.test_replace tests get here, 

1897 # need more targeted tests. xref phofl has a PR about this 

1898 if tipo is not None: 

1899 if tipo.kind == "S" and tipo.itemsize <= dtype.itemsize: 

1900 return element 

1901 raise LossySetitemError 

1902 if isinstance(element, bytes) and len(element) <= dtype.itemsize: 

1903 return element 

1904 raise LossySetitemError 

1905 

1906 if dtype.kind == "V": 

1907 # i.e. np.void, which cannot hold _anything_ 

1908 raise LossySetitemError 

1909 

1910 raise NotImplementedError(dtype) 

1911 

1912 

1913def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: 

1914 """ 

1915 _maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints), 

1916 but in many cases a range can be held by a smaller integer dtype. 

1917 Check if this is one of those cases. 

1918 """ 

1919 if not len(rng): 

1920 return True 

1921 return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype)