Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/construction.py: 14%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

218 statements  

1""" 

2Constructor functions intended to be shared by pd.array, Series.__init__, 

3and Index.__new__. 

4 

5These should not depend on core.internals. 

6""" 

7from __future__ import annotations 

8 

9from typing import ( 

10 TYPE_CHECKING, 

11 Optional, 

12 Sequence, 

13 Union, 

14 cast, 

15 overload, 

16) 

17 

18import numpy as np 

19from numpy import ma 

20 

21from pandas._libs import lib 

22from pandas._libs.tslibs.period import Period 

23from pandas._typing import ( 

24 AnyArrayLike, 

25 ArrayLike, 

26 Dtype, 

27 DtypeObj, 

28 T, 

29) 

30 

31from pandas.core.dtypes.base import ( 

32 ExtensionDtype, 

33 _registry as registry, 

34) 

35from pandas.core.dtypes.cast import ( 

36 construct_1d_arraylike_from_scalar, 

37 construct_1d_object_array_from_listlike, 

38 maybe_cast_to_datetime, 

39 maybe_cast_to_integer_array, 

40 maybe_convert_platform, 

41 maybe_infer_to_datetimelike, 

42 maybe_promote, 

43) 

44from pandas.core.dtypes.common import ( 

45 is_datetime64_ns_dtype, 

46 is_dtype_equal, 

47 is_extension_array_dtype, 

48 is_integer_dtype, 

49 is_list_like, 

50 is_object_dtype, 

51 is_timedelta64_ns_dtype, 

52) 

53from pandas.core.dtypes.dtypes import PandasDtype 

54from pandas.core.dtypes.generic import ( 

55 ABCDataFrame, 

56 ABCExtensionArray, 

57 ABCIndex, 

58 ABCPandasArray, 

59 ABCRangeIndex, 

60 ABCSeries, 

61) 

62from pandas.core.dtypes.missing import isna 

63 

64import pandas.core.common as com 

65 

66if TYPE_CHECKING: 

67 from pandas import ( 

68 Index, 

69 Series, 

70 ) 

71 from pandas.core.arrays.base import ExtensionArray 

72 

73 

74def array( 

75 data: Sequence[object] | AnyArrayLike, 

76 dtype: Dtype | None = None, 

77 copy: bool = True, 

78) -> ExtensionArray: 

79 """ 

80 Create an array. 

81 

82 Parameters 

83 ---------- 

84 data : Sequence of objects 

85 The scalars inside `data` should be instances of the 

86 scalar type for `dtype`. It's expected that `data` 

87 represents a 1-dimensional array of data. 

88 

89 When `data` is an Index or Series, the underlying array 

90 will be extracted from `data`. 

91 

92 dtype : str, np.dtype, or ExtensionDtype, optional 

93 The dtype to use for the array. This may be a NumPy 

94 dtype or an extension type registered with pandas using 

95 :meth:`pandas.api.extensions.register_extension_dtype`. 

96 

97 If not specified, there are two possibilities: 

98 

99 1. When `data` is a :class:`Series`, :class:`Index`, or 

100 :class:`ExtensionArray`, the `dtype` will be taken 

101 from the data. 

102 2. Otherwise, pandas will attempt to infer the `dtype` 

103 from the data. 

104 

105 Note that when `data` is a NumPy array, ``data.dtype`` is 

106 *not* used for inferring the array type. This is because 

107 NumPy cannot represent all the types of data that can be 

108 held in extension arrays. 

109 

110 Currently, pandas will infer an extension dtype for sequences of 

111 

112 ============================== ======================================= 

113 Scalar Type Array Type 

114 ============================== ======================================= 

115 :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray` 

116 :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` 

117 :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray` 

118 :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray` 

119 :class:`int` :class:`pandas.arrays.IntegerArray` 

120 :class:`float` :class:`pandas.arrays.FloatingArray` 

121 :class:`str` :class:`pandas.arrays.StringArray` or 

122 :class:`pandas.arrays.ArrowStringArray` 

123 :class:`bool` :class:`pandas.arrays.BooleanArray` 

124 ============================== ======================================= 

125 

126 The ExtensionArray created when the scalar type is :class:`str` is determined by 

127 ``pd.options.mode.string_storage`` if the dtype is not explicitly given. 

128 

129 For all other cases, NumPy's usual inference rules will be used. 

130 

131 .. versionchanged:: 1.2.0 

132 

133 Pandas now also infers nullable-floating dtype for float-like 

134 input data 

135 

136 copy : bool, default True 

137 Whether to copy the data, even if not necessary. Depending 

138 on the type of `data`, creating the new array may require 

139 copying data, even if ``copy=False``. 

140 

141 Returns 

142 ------- 

143 ExtensionArray 

144 The newly created array. 

145 

146 Raises 

147 ------ 

148 ValueError 

149 When `data` is not 1-dimensional. 

150 

151 See Also 

152 -------- 

153 numpy.array : Construct a NumPy array. 

154 Series : Construct a pandas Series. 

155 Index : Construct a pandas Index. 

156 arrays.PandasArray : ExtensionArray wrapping a NumPy array. 

157 Series.array : Extract the array stored within a Series. 

158 

159 Notes 

160 ----- 

161 Omitting the `dtype` argument means pandas will attempt to infer the 

162 best array type from the values in the data. As new array types are 

163 added by pandas and 3rd party libraries, the "best" array type may 

164 change. We recommend specifying `dtype` to ensure that 

165 

166 1. the correct array type for the data is returned 

167 2. the returned array type doesn't change as new extension types 

168 are added by pandas and third-party libraries 

169 

170 Additionally, if the underlying memory representation of the returned 

171 array matters, we recommend specifying the `dtype` as a concrete object 

172 rather than a string alias or allowing it to be inferred. For example, 

173 a future version of pandas or a 3rd-party library may include a 

174 dedicated ExtensionArray for string data. In this event, the following 

175 would no longer return a :class:`arrays.PandasArray` backed by a NumPy 

176 array. 

177 

178 >>> pd.array(['a', 'b'], dtype=str) 

179 <PandasArray> 

180 ['a', 'b'] 

181 Length: 2, dtype: str32 

182 

183 This would instead return the new ExtensionArray dedicated for string 

184 data. If you really need the new array to be backed by a NumPy array, 

185 specify that in the dtype. 

186 

187 >>> pd.array(['a', 'b'], dtype=np.dtype("<U1")) 

188 <PandasArray> 

189 ['a', 'b'] 

190 Length: 2, dtype: str32 

191 

192 Finally, Pandas has arrays that mostly overlap with NumPy 

193 

194 * :class:`arrays.DatetimeArray` 

195 * :class:`arrays.TimedeltaArray` 

196 

197 When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is 

198 passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray`` 

199 rather than a ``PandasArray``. This is for symmetry with the case of 

200 timezone-aware data, which NumPy does not natively support. 

201 

202 >>> pd.array(['2015', '2016'], dtype='datetime64[ns]') 

203 <DatetimeArray> 

204 ['2015-01-01 00:00:00', '2016-01-01 00:00:00'] 

205 Length: 2, dtype: datetime64[ns] 

206 

207 >>> pd.array(["1H", "2H"], dtype='timedelta64[ns]') 

208 <TimedeltaArray> 

209 ['0 days 01:00:00', '0 days 02:00:00'] 

210 Length: 2, dtype: timedelta64[ns] 

211 

212 Examples 

213 -------- 

214 If a dtype is not specified, pandas will infer the best dtype from the values. 

215 See the description of `dtype` for the types pandas infers for. 

216 

217 >>> pd.array([1, 2]) 

218 <IntegerArray> 

219 [1, 2] 

220 Length: 2, dtype: Int64 

221 

222 >>> pd.array([1, 2, np.nan]) 

223 <IntegerArray> 

224 [1, 2, <NA>] 

225 Length: 3, dtype: Int64 

226 

227 >>> pd.array([1.1, 2.2]) 

228 <FloatingArray> 

229 [1.1, 2.2] 

230 Length: 2, dtype: Float64 

231 

232 >>> pd.array(["a", None, "c"]) 

233 <StringArray> 

234 ['a', <NA>, 'c'] 

235 Length: 3, dtype: string 

236 

237 >>> with pd.option_context("string_storage", "pyarrow"): 

238 ... arr = pd.array(["a", None, "c"]) 

239 ... 

240 >>> arr 

241 <ArrowStringArray> 

242 ['a', <NA>, 'c'] 

243 Length: 3, dtype: string 

244 

245 >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")]) 

246 <PeriodArray> 

247 ['2000-01-01', '2000-01-01'] 

248 Length: 2, dtype: period[D] 

249 

250 You can use the string alias for `dtype` 

251 

252 >>> pd.array(['a', 'b', 'a'], dtype='category') 

253 ['a', 'b', 'a'] 

254 Categories (2, object): ['a', 'b'] 

255 

256 Or specify the actual dtype 

257 

258 >>> pd.array(['a', 'b', 'a'], 

259 ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True)) 

260 ['a', 'b', 'a'] 

261 Categories (3, object): ['a' < 'b' < 'c'] 

262 

263 If pandas does not infer a dedicated extension type a 

264 :class:`arrays.PandasArray` is returned. 

265 

266 >>> pd.array([1 + 1j, 3 + 2j]) 

267 <PandasArray> 

268 [(1+1j), (3+2j)] 

269 Length: 2, dtype: complex128 

270 

271 As mentioned in the "Notes" section, new extension types may be added 

272 in the future (by pandas or 3rd party libraries), causing the return 

273 value to no longer be a :class:`arrays.PandasArray`. Specify the `dtype` 

274 as a NumPy dtype if you need to ensure there's no future change in 

275 behavior. 

276 

277 >>> pd.array([1, 2], dtype=np.dtype("int32")) 

278 <PandasArray> 

279 [1, 2] 

280 Length: 2, dtype: int32 

281 

282 `data` must be 1-dimensional. A ValueError is raised when the input 

283 has the wrong dimensionality. 

284 

285 >>> pd.array(1) 

286 Traceback (most recent call last): 

287 ... 

288 ValueError: Cannot pass scalar '1' to 'pandas.array'. 

289 """ 

290 from pandas.core.arrays import ( 

291 BooleanArray, 

292 DatetimeArray, 

293 ExtensionArray, 

294 FloatingArray, 

295 IntegerArray, 

296 IntervalArray, 

297 PandasArray, 

298 PeriodArray, 

299 TimedeltaArray, 

300 ) 

301 from pandas.core.arrays.string_ import StringDtype 

302 

303 if lib.is_scalar(data): 

304 msg = f"Cannot pass scalar '{data}' to 'pandas.array'." 

305 raise ValueError(msg) 

306 elif isinstance(data, ABCDataFrame): 

307 raise TypeError("Cannot pass DataFrame to 'pandas.array'") 

308 

309 if dtype is None and isinstance(data, (ABCSeries, ABCIndex, ExtensionArray)): 

310 # Note: we exclude np.ndarray here, will do type inference on it 

311 dtype = data.dtype 

312 

313 data = extract_array(data, extract_numpy=True) 

314 

315 # this returns None for not-found dtypes. 

316 if isinstance(dtype, str): 

317 dtype = registry.find(dtype) or dtype 

318 

319 if isinstance(data, ExtensionArray) and ( 

320 dtype is None or is_dtype_equal(dtype, data.dtype) 

321 ): 

322 # e.g. TimedeltaArray[s], avoid casting to PandasArray 

323 if copy: 

324 return data.copy() 

325 return data 

326 

327 if is_extension_array_dtype(dtype): 

328 cls = cast(ExtensionDtype, dtype).construct_array_type() 

329 return cls._from_sequence(data, dtype=dtype, copy=copy) 

330 

331 if dtype is None: 

332 inferred_dtype = lib.infer_dtype(data, skipna=True) 

333 if inferred_dtype == "period": 

334 period_data = cast(Union[Sequence[Optional[Period]], AnyArrayLike], data) 

335 return PeriodArray._from_sequence(period_data, copy=copy) 

336 

337 elif inferred_dtype == "interval": 

338 return IntervalArray(data, copy=copy) 

339 

340 elif inferred_dtype.startswith("datetime"): 

341 # datetime, datetime64 

342 try: 

343 return DatetimeArray._from_sequence(data, copy=copy) 

344 except ValueError: 

345 # Mixture of timezones, fall back to PandasArray 

346 pass 

347 

348 elif inferred_dtype.startswith("timedelta"): 

349 # timedelta, timedelta64 

350 return TimedeltaArray._from_sequence(data, copy=copy) 

351 

352 elif inferred_dtype == "string": 

353 # StringArray/ArrowStringArray depending on pd.options.mode.string_storage 

354 return StringDtype().construct_array_type()._from_sequence(data, copy=copy) 

355 

356 elif inferred_dtype == "integer": 

357 return IntegerArray._from_sequence(data, copy=copy) 

358 

359 elif ( 

360 inferred_dtype in ("floating", "mixed-integer-float") 

361 and getattr(data, "dtype", None) != np.float16 

362 ): 

363 # GH#44715 Exclude np.float16 bc FloatingArray does not support it; 

364 # we will fall back to PandasArray. 

365 return FloatingArray._from_sequence(data, copy=copy) 

366 

367 elif inferred_dtype == "boolean": 

368 return BooleanArray._from_sequence(data, copy=copy) 

369 

370 # Pandas overrides NumPy for 

371 # 1. datetime64[ns] 

372 # 2. timedelta64[ns] 

373 # so that a DatetimeArray is returned. 

374 if is_datetime64_ns_dtype(dtype): 

375 return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy) 

376 elif is_timedelta64_ns_dtype(dtype): 

377 return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) 

378 

379 return PandasArray._from_sequence(data, dtype=dtype, copy=copy) 

380 

381 

382@overload 

383def extract_array( 

384 obj: Series | Index, extract_numpy: bool = ..., extract_range: bool = ... 

385) -> ArrayLike: 

386 ... 

387 

388 

389@overload 

390def extract_array( 

391 obj: T, extract_numpy: bool = ..., extract_range: bool = ... 

392) -> T | ArrayLike: 

393 ... 

394 

395 

396def extract_array( 

397 obj: T, extract_numpy: bool = False, extract_range: bool = False 

398) -> T | ArrayLike: 

399 """ 

400 Extract the ndarray or ExtensionArray from a Series or Index. 

401 

402 For all other types, `obj` is just returned as is. 

403 

404 Parameters 

405 ---------- 

406 obj : object 

407 For Series / Index, the underlying ExtensionArray is unboxed. 

408 

409 extract_numpy : bool, default False 

410 Whether to extract the ndarray from a PandasArray. 

411 

412 extract_range : bool, default False 

413 If we have a RangeIndex, return range._values if True 

414 (which is a materialized integer ndarray), otherwise return unchanged. 

415 

416 Returns 

417 ------- 

418 arr : object 

419 

420 Examples 

421 -------- 

422 >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category')) 

423 ['a', 'b', 'c'] 

424 Categories (3, object): ['a', 'b', 'c'] 

425 

426 Other objects like lists, arrays, and DataFrames are just passed through. 

427 

428 >>> extract_array([1, 2, 3]) 

429 [1, 2, 3] 

430 

431 For an ndarray-backed Series / Index the ndarray is returned. 

432 

433 >>> extract_array(pd.Series([1, 2, 3])) 

434 array([1, 2, 3]) 

435 

436 To extract all the way down to the ndarray, pass ``extract_numpy=True``. 

437 

438 >>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True) 

439 array([1, 2, 3]) 

440 """ 

441 if isinstance(obj, (ABCIndex, ABCSeries)): 

442 if isinstance(obj, ABCRangeIndex): 

443 if extract_range: 

444 return obj._values 

445 # https://github.com/python/mypy/issues/1081 

446 # error: Incompatible return value type (got "RangeIndex", expected 

447 # "Union[T, Union[ExtensionArray, ndarray[Any, Any]]]") 

448 return obj # type: ignore[return-value] 

449 

450 return obj._values 

451 

452 elif extract_numpy and isinstance(obj, ABCPandasArray): 

453 return obj.to_numpy() 

454 

455 return obj 

456 

457 

458def ensure_wrapped_if_datetimelike(arr): 

459 """ 

460 Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray. 

461 """ 

462 if isinstance(arr, np.ndarray): 

463 if arr.dtype.kind == "M": 

464 from pandas.core.arrays import DatetimeArray 

465 

466 return DatetimeArray._from_sequence(arr) 

467 

468 elif arr.dtype.kind == "m": 

469 from pandas.core.arrays import TimedeltaArray 

470 

471 return TimedeltaArray._from_sequence(arr) 

472 

473 return arr 

474 

475 

476def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: 

477 """ 

478 Convert numpy MaskedArray to ensure mask is softened. 

479 """ 

480 mask = ma.getmaskarray(data) 

481 if mask.any(): 

482 dtype, fill_value = maybe_promote(data.dtype, np.nan) 

483 dtype = cast(np.dtype, dtype) 

484 # Incompatible types in assignment (expression has type "ndarray[Any, 

485 # dtype[Any]]", variable has type "MaskedArray[Any, Any]") 

486 data = data.astype(dtype, copy=True) # type: ignore[assignment] 

487 data.soften_mask() # set hardmask False if it was True 

488 data[mask] = fill_value 

489 else: 

490 data = data.copy() 

491 return data 

492 

493 

494def sanitize_array( 

495 data, 

496 index: Index | None, 

497 dtype: DtypeObj | None = None, 

498 copy: bool = False, 

499 *, 

500 allow_2d: bool = False, 

501) -> ArrayLike: 

502 """ 

503 Sanitize input data to an ndarray or ExtensionArray, copy if specified, 

504 coerce to the dtype if specified. 

505 

506 Parameters 

507 ---------- 

508 data : Any 

509 index : Index or None, default None 

510 dtype : np.dtype, ExtensionDtype, or None, default None 

511 copy : bool, default False 

512 allow_2d : bool, default False 

513 If False, raise if we have a 2D Arraylike. 

514 

515 Returns 

516 ------- 

517 np.ndarray or ExtensionArray 

518 """ 

519 if isinstance(data, ma.MaskedArray): 

520 data = sanitize_masked_array(data) 

521 

522 if isinstance(dtype, PandasDtype): 

523 # Avoid ending up with a PandasArray 

524 dtype = dtype.numpy_dtype 

525 

526 # extract ndarray or ExtensionArray, ensure we have no PandasArray 

527 data = extract_array(data, extract_numpy=True, extract_range=True) 

528 

529 if isinstance(data, np.ndarray) and data.ndim == 0: 

530 if dtype is None: 

531 dtype = data.dtype 

532 data = lib.item_from_zerodim(data) 

533 elif isinstance(data, range): 

534 # GH#16804 

535 data = range_to_ndarray(data) 

536 copy = False 

537 

538 if not is_list_like(data): 

539 if index is None: 

540 raise ValueError("index must be specified when data is not list-like") 

541 data = construct_1d_arraylike_from_scalar(data, len(index), dtype) 

542 return data 

543 

544 elif isinstance(data, ABCExtensionArray): 

545 # it is already ensured above this is not a PandasArray 

546 # Until GH#49309 is fixed this check needs to come before the 

547 # ExtensionDtype check 

548 if dtype is not None: 

549 subarr = data.astype(dtype, copy=copy) 

550 elif copy: 

551 subarr = data.copy() 

552 else: 

553 subarr = data 

554 

555 elif isinstance(dtype, ExtensionDtype): 

556 # create an extension array from its dtype 

557 _sanitize_non_ordered(data) 

558 cls = dtype.construct_array_type() 

559 subarr = cls._from_sequence(data, dtype=dtype, copy=copy) 

560 

561 # GH#846 

562 elif isinstance(data, np.ndarray): 

563 if isinstance(data, np.matrix): 

564 data = data.A 

565 

566 if dtype is None: 

567 subarr = data 

568 if data.dtype == object: 

569 subarr = maybe_infer_to_datetimelike(data) 

570 

571 if subarr is data and copy: 

572 subarr = subarr.copy() 

573 

574 else: 

575 # we will try to copy by-definition here 

576 subarr = _try_cast(data, dtype, copy) 

577 

578 elif hasattr(data, "__array__"): 

579 # e.g. dask array GH#38645 

580 data = np.array(data, copy=copy) 

581 return sanitize_array( 

582 data, 

583 index=index, 

584 dtype=dtype, 

585 copy=False, 

586 allow_2d=allow_2d, 

587 ) 

588 

589 else: 

590 _sanitize_non_ordered(data) 

591 # materialize e.g. generators, convert e.g. tuples, abc.ValueView 

592 data = list(data) 

593 

594 if len(data) == 0 and dtype is None: 

595 # We default to float64, matching numpy 

596 subarr = np.array([], dtype=np.float64) 

597 

598 elif dtype is not None: 

599 subarr = _try_cast(data, dtype, copy) 

600 

601 else: 

602 subarr = maybe_convert_platform(data) 

603 if subarr.dtype == object: 

604 subarr = cast(np.ndarray, subarr) 

605 subarr = maybe_infer_to_datetimelike(subarr) 

606 

607 subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d) 

608 

609 if isinstance(subarr, np.ndarray): 

610 # at this point we should have dtype be None or subarr.dtype == dtype 

611 dtype = cast(np.dtype, dtype) 

612 subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) 

613 

614 return subarr 

615 

616 

617def range_to_ndarray(rng: range) -> np.ndarray: 

618 """ 

619 Cast a range object to ndarray. 

620 """ 

621 # GH#30171 perf avoid realizing range as a list in np.array 

622 try: 

623 arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64") 

624 except OverflowError: 

625 # GH#30173 handling for ranges that overflow int64 

626 if (rng.start >= 0 and rng.step > 0) or (rng.step < 0 <= rng.stop): 

627 try: 

628 arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64") 

629 except OverflowError: 

630 arr = construct_1d_object_array_from_listlike(list(rng)) 

631 else: 

632 arr = construct_1d_object_array_from_listlike(list(rng)) 

633 return arr 

634 

635 

636def _sanitize_non_ordered(data) -> None: 

637 """ 

638 Raise only for unordered sets, e.g., not for dict_keys 

639 """ 

640 if isinstance(data, (set, frozenset)): 

641 raise TypeError(f"'{type(data).__name__}' type is unordered") 

642 

643 

644def _sanitize_ndim( 

645 result: ArrayLike, 

646 data, 

647 dtype: DtypeObj | None, 

648 index: Index | None, 

649 *, 

650 allow_2d: bool = False, 

651) -> ArrayLike: 

652 """ 

653 Ensure we have a 1-dimensional result array. 

654 """ 

655 if getattr(result, "ndim", 0) == 0: 

656 raise ValueError("result should be arraylike with ndim > 0") 

657 

658 if result.ndim == 1: 

659 # the result that we want 

660 result = _maybe_repeat(result, index) 

661 

662 elif result.ndim > 1: 

663 if isinstance(data, np.ndarray): 

664 if allow_2d: 

665 return result 

666 raise ValueError( 

667 f"Data must be 1-dimensional, got ndarray of shape {data.shape} instead" 

668 ) 

669 if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype): 

670 # i.e. PandasDtype("O") 

671 

672 result = com.asarray_tuplesafe(data, dtype=np.dtype("object")) 

673 cls = dtype.construct_array_type() 

674 result = cls._from_sequence(result, dtype=dtype) 

675 else: 

676 # error: Argument "dtype" to "asarray_tuplesafe" has incompatible type 

677 # "Union[dtype[Any], ExtensionDtype, None]"; expected "Union[str, 

678 # dtype[Any], None]" 

679 result = com.asarray_tuplesafe(data, dtype=dtype) # type: ignore[arg-type] 

680 return result 

681 

682 

683def _sanitize_str_dtypes( 

684 result: np.ndarray, data, dtype: np.dtype | None, copy: bool 

685) -> np.ndarray: 

686 """ 

687 Ensure we have a dtype that is supported by pandas. 

688 """ 

689 

690 # This is to prevent mixed-type Series getting all casted to 

691 # NumPy string type, e.g. NaN --> '-1#IND'. 

692 if issubclass(result.dtype.type, str): 

693 # GH#16605 

694 # If not empty convert the data to dtype 

695 # GH#19853: If data is a scalar, result has already the result 

696 if not lib.is_scalar(data): 

697 if not np.all(isna(data)): 

698 data = np.array(data, dtype=dtype, copy=False) 

699 result = np.array(data, dtype=object, copy=copy) 

700 return result 

701 

702 

703def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike: 

704 """ 

705 If we have a length-1 array and an index describing how long we expect 

706 the result to be, repeat the array. 

707 """ 

708 if index is not None: 

709 if 1 == len(arr) != len(index): 

710 arr = arr.repeat(len(index)) 

711 return arr 

712 

713 

714def _try_cast( 

715 arr: list | np.ndarray, 

716 dtype: np.dtype, 

717 copy: bool, 

718) -> ArrayLike: 

719 """ 

720 Convert input to numpy ndarray and optionally cast to a given dtype. 

721 

722 Parameters 

723 ---------- 

724 arr : ndarray or list 

725 Excludes: ExtensionArray, Series, Index. 

726 dtype : np.dtype 

727 copy : bool 

728 If False, don't copy the data if not needed. 

729 

730 Returns 

731 ------- 

732 np.ndarray or ExtensionArray 

733 """ 

734 is_ndarray = isinstance(arr, np.ndarray) 

735 

736 if is_object_dtype(dtype): 

737 if not is_ndarray: 

738 subarr = construct_1d_object_array_from_listlike(arr) 

739 return subarr 

740 return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy) 

741 

742 elif dtype.kind == "U": 

743 # TODO: test cases with arr.dtype.kind in ["m", "M"] 

744 if is_ndarray: 

745 arr = cast(np.ndarray, arr) 

746 shape = arr.shape 

747 if arr.ndim > 1: 

748 arr = arr.ravel() 

749 else: 

750 shape = (len(arr),) 

751 return lib.ensure_string_array(arr, convert_na_value=False, copy=copy).reshape( 

752 shape 

753 ) 

754 

755 elif dtype.kind in ["m", "M"]: 

756 return maybe_cast_to_datetime(arr, dtype) 

757 

758 # GH#15832: Check if we are requesting a numeric dtype and 

759 # that we can convert the data to the requested dtype. 

760 elif is_integer_dtype(dtype): 

761 # this will raise if we have e.g. floats 

762 

763 subarr = maybe_cast_to_integer_array(arr, dtype) 

764 else: 

765 subarr = np.array(arr, dtype=dtype, copy=copy) 

766 

767 return subarr