Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/construction.py: 47%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

251 statements  

1""" 

2Constructor functions intended to be shared by pd.array, Series.__init__, 

3and Index.__new__. 

4 

5These should not depend on core.internals. 

6""" 

7from __future__ import annotations 

8 

9from collections.abc import Sequence 

10from typing import ( 

11 TYPE_CHECKING, 

12 Optional, 

13 Union, 

14 cast, 

15 overload, 

16) 

17import warnings 

18 

19import numpy as np 

20from numpy import ma 

21 

22from pandas._config import using_pyarrow_string_dtype 

23 

24from pandas._libs import lib 

25from pandas._libs.tslibs import ( 

26 Period, 

27 get_supported_dtype, 

28 is_supported_dtype, 

29) 

30from pandas._typing import ( 

31 AnyArrayLike, 

32 ArrayLike, 

33 Dtype, 

34 DtypeObj, 

35 T, 

36) 

37from pandas.util._exceptions import find_stack_level 

38 

39from pandas.core.dtypes.base import ExtensionDtype 

40from pandas.core.dtypes.cast import ( 

41 construct_1d_arraylike_from_scalar, 

42 construct_1d_object_array_from_listlike, 

43 maybe_cast_to_datetime, 

44 maybe_cast_to_integer_array, 

45 maybe_convert_platform, 

46 maybe_infer_to_datetimelike, 

47 maybe_promote, 

48) 

49from pandas.core.dtypes.common import ( 

50 is_list_like, 

51 is_object_dtype, 

52 is_string_dtype, 

53 pandas_dtype, 

54) 

55from pandas.core.dtypes.dtypes import NumpyEADtype 

56from pandas.core.dtypes.generic import ( 

57 ABCDataFrame, 

58 ABCExtensionArray, 

59 ABCIndex, 

60 ABCSeries, 

61) 

62from pandas.core.dtypes.missing import isna 

63 

64import pandas.core.common as com 

65 

66if TYPE_CHECKING: 

67 from pandas import ( 

68 Index, 

69 Series, 

70 ) 

71 from pandas.core.arrays.base import ExtensionArray 

72 

73 

74def array( 

75 data: Sequence[object] | AnyArrayLike, 

76 dtype: Dtype | None = None, 

77 copy: bool = True, 

78) -> ExtensionArray: 

79 """ 

80 Create an array. 

81 

82 Parameters 

83 ---------- 

84 data : Sequence of objects 

85 The scalars inside `data` should be instances of the 

86 scalar type for `dtype`. It's expected that `data` 

87 represents a 1-dimensional array of data. 

88 

89 When `data` is an Index or Series, the underlying array 

90 will be extracted from `data`. 

91 

92 dtype : str, np.dtype, or ExtensionDtype, optional 

93 The dtype to use for the array. This may be a NumPy 

94 dtype or an extension type registered with pandas using 

95 :meth:`pandas.api.extensions.register_extension_dtype`. 

96 

97 If not specified, there are two possibilities: 

98 

99 1. When `data` is a :class:`Series`, :class:`Index`, or 

100 :class:`ExtensionArray`, the `dtype` will be taken 

101 from the data. 

102 2. Otherwise, pandas will attempt to infer the `dtype` 

103 from the data. 

104 

105 Note that when `data` is a NumPy array, ``data.dtype`` is 

106 *not* used for inferring the array type. This is because 

107 NumPy cannot represent all the types of data that can be 

108 held in extension arrays. 

109 

110 Currently, pandas will infer an extension dtype for sequences of 

111 

112 ============================== ======================================= 

113 Scalar Type Array Type 

114 ============================== ======================================= 

115 :class:`pandas.Interval` :class:`pandas.arrays.IntervalArray` 

116 :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` 

117 :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray` 

118 :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray` 

119 :class:`int` :class:`pandas.arrays.IntegerArray` 

120 :class:`float` :class:`pandas.arrays.FloatingArray` 

121 :class:`str` :class:`pandas.arrays.StringArray` or 

122 :class:`pandas.arrays.ArrowStringArray` 

123 :class:`bool` :class:`pandas.arrays.BooleanArray` 

124 ============================== ======================================= 

125 

126 The ExtensionArray created when the scalar type is :class:`str` is determined by 

127 ``pd.options.mode.string_storage`` if the dtype is not explicitly given. 

128 

129 For all other cases, NumPy's usual inference rules will be used. 

130 copy : bool, default True 

131 Whether to copy the data, even if not necessary. Depending 

132 on the type of `data`, creating the new array may require 

133 copying data, even if ``copy=False``. 

134 

135 Returns 

136 ------- 

137 ExtensionArray 

138 The newly created array. 

139 

140 Raises 

141 ------ 

142 ValueError 

143 When `data` is not 1-dimensional. 

144 

145 See Also 

146 -------- 

147 numpy.array : Construct a NumPy array. 

148 Series : Construct a pandas Series. 

149 Index : Construct a pandas Index. 

150 arrays.NumpyExtensionArray : ExtensionArray wrapping a NumPy array. 

151 Series.array : Extract the array stored within a Series. 

152 

153 Notes 

154 ----- 

155 Omitting the `dtype` argument means pandas will attempt to infer the 

156 best array type from the values in the data. As new array types are 

157 added by pandas and 3rd party libraries, the "best" array type may 

158 change. We recommend specifying `dtype` to ensure that 

159 

160 1. the correct array type for the data is returned 

161 2. the returned array type doesn't change as new extension types 

162 are added by pandas and third-party libraries 

163 

164 Additionally, if the underlying memory representation of the returned 

165 array matters, we recommend specifying the `dtype` as a concrete object 

166 rather than a string alias or allowing it to be inferred. For example, 

167 a future version of pandas or a 3rd-party library may include a 

168 dedicated ExtensionArray for string data. In this event, the following 

169 would no longer return a :class:`arrays.NumpyExtensionArray` backed by a 

170 NumPy array. 

171 

172 >>> pd.array(['a', 'b'], dtype=str) 

173 <NumpyExtensionArray> 

174 ['a', 'b'] 

175 Length: 2, dtype: str32 

176 

177 This would instead return the new ExtensionArray dedicated for string 

178 data. If you really need the new array to be backed by a NumPy array, 

179 specify that in the dtype. 

180 

181 >>> pd.array(['a', 'b'], dtype=np.dtype("<U1")) 

182 <NumpyExtensionArray> 

183 ['a', 'b'] 

184 Length: 2, dtype: str32 

185 

186 Finally, Pandas has arrays that mostly overlap with NumPy 

187 

188 * :class:`arrays.DatetimeArray` 

189 * :class:`arrays.TimedeltaArray` 

190 

191 When data with a ``datetime64[ns]`` or ``timedelta64[ns]`` dtype is 

192 passed, pandas will always return a ``DatetimeArray`` or ``TimedeltaArray`` 

193 rather than a ``NumpyExtensionArray``. This is for symmetry with the case of 

194 timezone-aware data, which NumPy does not natively support. 

195 

196 >>> pd.array(['2015', '2016'], dtype='datetime64[ns]') 

197 <DatetimeArray> 

198 ['2015-01-01 00:00:00', '2016-01-01 00:00:00'] 

199 Length: 2, dtype: datetime64[ns] 

200 

201 >>> pd.array(["1h", "2h"], dtype='timedelta64[ns]') 

202 <TimedeltaArray> 

203 ['0 days 01:00:00', '0 days 02:00:00'] 

204 Length: 2, dtype: timedelta64[ns] 

205 

206 Examples 

207 -------- 

208 If a dtype is not specified, pandas will infer the best dtype from the values. 

209 See the description of `dtype` for the types pandas infers for. 

210 

211 >>> pd.array([1, 2]) 

212 <IntegerArray> 

213 [1, 2] 

214 Length: 2, dtype: Int64 

215 

216 >>> pd.array([1, 2, np.nan]) 

217 <IntegerArray> 

218 [1, 2, <NA>] 

219 Length: 3, dtype: Int64 

220 

221 >>> pd.array([1.1, 2.2]) 

222 <FloatingArray> 

223 [1.1, 2.2] 

224 Length: 2, dtype: Float64 

225 

226 >>> pd.array(["a", None, "c"]) 

227 <StringArray> 

228 ['a', <NA>, 'c'] 

229 Length: 3, dtype: string 

230 

231 >>> with pd.option_context("string_storage", "pyarrow"): 

232 ... arr = pd.array(["a", None, "c"]) 

233 ... 

234 >>> arr 

235 <ArrowStringArray> 

236 ['a', <NA>, 'c'] 

237 Length: 3, dtype: string 

238 

239 >>> pd.array([pd.Period('2000', freq="D"), pd.Period("2000", freq="D")]) 

240 <PeriodArray> 

241 ['2000-01-01', '2000-01-01'] 

242 Length: 2, dtype: period[D] 

243 

244 You can use the string alias for `dtype` 

245 

246 >>> pd.array(['a', 'b', 'a'], dtype='category') 

247 ['a', 'b', 'a'] 

248 Categories (2, object): ['a', 'b'] 

249 

250 Or specify the actual dtype 

251 

252 >>> pd.array(['a', 'b', 'a'], 

253 ... dtype=pd.CategoricalDtype(['a', 'b', 'c'], ordered=True)) 

254 ['a', 'b', 'a'] 

255 Categories (3, object): ['a' < 'b' < 'c'] 

256 

257 If pandas does not infer a dedicated extension type a 

258 :class:`arrays.NumpyExtensionArray` is returned. 

259 

260 >>> pd.array([1 + 1j, 3 + 2j]) 

261 <NumpyExtensionArray> 

262 [(1+1j), (3+2j)] 

263 Length: 2, dtype: complex128 

264 

265 As mentioned in the "Notes" section, new extension types may be added 

266 in the future (by pandas or 3rd party libraries), causing the return 

267 value to no longer be a :class:`arrays.NumpyExtensionArray`. Specify the 

268 `dtype` as a NumPy dtype if you need to ensure there's no future change in 

269 behavior. 

270 

271 >>> pd.array([1, 2], dtype=np.dtype("int32")) 

272 <NumpyExtensionArray> 

273 [1, 2] 

274 Length: 2, dtype: int32 

275 

276 `data` must be 1-dimensional. A ValueError is raised when the input 

277 has the wrong dimensionality. 

278 

279 >>> pd.array(1) 

280 Traceback (most recent call last): 

281 ... 

282 ValueError: Cannot pass scalar '1' to 'pandas.array'. 

283 """ 

284 from pandas.core.arrays import ( 

285 BooleanArray, 

286 DatetimeArray, 

287 ExtensionArray, 

288 FloatingArray, 

289 IntegerArray, 

290 IntervalArray, 

291 NumpyExtensionArray, 

292 PeriodArray, 

293 TimedeltaArray, 

294 ) 

295 from pandas.core.arrays.string_ import StringDtype 

296 

297 if lib.is_scalar(data): 

298 msg = f"Cannot pass scalar '{data}' to 'pandas.array'." 

299 raise ValueError(msg) 

300 elif isinstance(data, ABCDataFrame): 

301 raise TypeError("Cannot pass DataFrame to 'pandas.array'") 

302 

303 if dtype is None and isinstance(data, (ABCSeries, ABCIndex, ExtensionArray)): 

304 # Note: we exclude np.ndarray here, will do type inference on it 

305 dtype = data.dtype 

306 

307 data = extract_array(data, extract_numpy=True) 

308 

309 # this returns None for not-found dtypes. 

310 if dtype is not None: 

311 dtype = pandas_dtype(dtype) 

312 

313 if isinstance(data, ExtensionArray) and (dtype is None or data.dtype == dtype): 

314 # e.g. TimedeltaArray[s], avoid casting to NumpyExtensionArray 

315 if copy: 

316 return data.copy() 

317 return data 

318 

319 if isinstance(dtype, ExtensionDtype): 

320 cls = dtype.construct_array_type() 

321 return cls._from_sequence(data, dtype=dtype, copy=copy) 

322 

323 if dtype is None: 

324 inferred_dtype = lib.infer_dtype(data, skipna=True) 

325 if inferred_dtype == "period": 

326 period_data = cast(Union[Sequence[Optional[Period]], AnyArrayLike], data) 

327 return PeriodArray._from_sequence(period_data, copy=copy) 

328 

329 elif inferred_dtype == "interval": 

330 return IntervalArray(data, copy=copy) 

331 

332 elif inferred_dtype.startswith("datetime"): 

333 # datetime, datetime64 

334 try: 

335 return DatetimeArray._from_sequence(data, copy=copy) 

336 except ValueError: 

337 # Mixture of timezones, fall back to NumpyExtensionArray 

338 pass 

339 

340 elif inferred_dtype.startswith("timedelta"): 

341 # timedelta, timedelta64 

342 return TimedeltaArray._from_sequence(data, copy=copy) 

343 

344 elif inferred_dtype == "string": 

345 # StringArray/ArrowStringArray depending on pd.options.mode.string_storage 

346 dtype = StringDtype() 

347 cls = dtype.construct_array_type() 

348 return cls._from_sequence(data, dtype=dtype, copy=copy) 

349 

350 elif inferred_dtype == "integer": 

351 return IntegerArray._from_sequence(data, copy=copy) 

352 elif inferred_dtype == "empty" and not hasattr(data, "dtype") and not len(data): 

353 return FloatingArray._from_sequence(data, copy=copy) 

354 elif ( 

355 inferred_dtype in ("floating", "mixed-integer-float") 

356 and getattr(data, "dtype", None) != np.float16 

357 ): 

358 # GH#44715 Exclude np.float16 bc FloatingArray does not support it; 

359 # we will fall back to NumpyExtensionArray. 

360 return FloatingArray._from_sequence(data, copy=copy) 

361 

362 elif inferred_dtype == "boolean": 

363 return BooleanArray._from_sequence(data, dtype="boolean", copy=copy) 

364 

365 # Pandas overrides NumPy for 

366 # 1. datetime64[ns,us,ms,s] 

367 # 2. timedelta64[ns,us,ms,s] 

368 # so that a DatetimeArray is returned. 

369 if lib.is_np_dtype(dtype, "M") and is_supported_dtype(dtype): 

370 return DatetimeArray._from_sequence(data, dtype=dtype, copy=copy) 

371 if lib.is_np_dtype(dtype, "m") and is_supported_dtype(dtype): 

372 return TimedeltaArray._from_sequence(data, dtype=dtype, copy=copy) 

373 

374 elif lib.is_np_dtype(dtype, "mM"): 

375 warnings.warn( 

376 r"datetime64 and timedelta64 dtype resolutions other than " 

377 r"'s', 'ms', 'us', and 'ns' are deprecated. " 

378 r"In future releases passing unsupported resolutions will " 

379 r"raise an exception.", 

380 FutureWarning, 

381 stacklevel=find_stack_level(), 

382 ) 

383 

384 return NumpyExtensionArray._from_sequence(data, dtype=dtype, copy=copy) 

385 

386 

387_typs = frozenset( 

388 { 

389 "index", 

390 "rangeindex", 

391 "multiindex", 

392 "datetimeindex", 

393 "timedeltaindex", 

394 "periodindex", 

395 "categoricalindex", 

396 "intervalindex", 

397 "series", 

398 } 

399) 

400 

401 

402@overload 

403def extract_array( 

404 obj: Series | Index, extract_numpy: bool = ..., extract_range: bool = ... 

405) -> ArrayLike: 

406 ... 

407 

408 

409@overload 

410def extract_array( 

411 obj: T, extract_numpy: bool = ..., extract_range: bool = ... 

412) -> T | ArrayLike: 

413 ... 

414 

415 

416def extract_array( 

417 obj: T, extract_numpy: bool = False, extract_range: bool = False 

418) -> T | ArrayLike: 

419 """ 

420 Extract the ndarray or ExtensionArray from a Series or Index. 

421 

422 For all other types, `obj` is just returned as is. 

423 

424 Parameters 

425 ---------- 

426 obj : object 

427 For Series / Index, the underlying ExtensionArray is unboxed. 

428 

429 extract_numpy : bool, default False 

430 Whether to extract the ndarray from a NumpyExtensionArray. 

431 

432 extract_range : bool, default False 

433 If we have a RangeIndex, return range._values if True 

434 (which is a materialized integer ndarray), otherwise return unchanged. 

435 

436 Returns 

437 ------- 

438 arr : object 

439 

440 Examples 

441 -------- 

442 >>> extract_array(pd.Series(['a', 'b', 'c'], dtype='category')) 

443 ['a', 'b', 'c'] 

444 Categories (3, object): ['a', 'b', 'c'] 

445 

446 Other objects like lists, arrays, and DataFrames are just passed through. 

447 

448 >>> extract_array([1, 2, 3]) 

449 [1, 2, 3] 

450 

451 For an ndarray-backed Series / Index the ndarray is returned. 

452 

453 >>> extract_array(pd.Series([1, 2, 3])) 

454 array([1, 2, 3]) 

455 

456 To extract all the way down to the ndarray, pass ``extract_numpy=True``. 

457 

458 >>> extract_array(pd.Series([1, 2, 3]), extract_numpy=True) 

459 array([1, 2, 3]) 

460 """ 

461 typ = getattr(obj, "_typ", None) 

462 if typ in _typs: 

463 # i.e. isinstance(obj, (ABCIndex, ABCSeries)) 

464 if typ == "rangeindex": 

465 if extract_range: 

466 # error: "T" has no attribute "_values" 

467 return obj._values # type: ignore[attr-defined] 

468 return obj 

469 

470 # error: "T" has no attribute "_values" 

471 return obj._values # type: ignore[attr-defined] 

472 

473 elif extract_numpy and typ == "npy_extension": 

474 # i.e. isinstance(obj, ABCNumpyExtensionArray) 

475 # error: "T" has no attribute "to_numpy" 

476 return obj.to_numpy() # type: ignore[attr-defined] 

477 

478 return obj 

479 

480 

481def ensure_wrapped_if_datetimelike(arr): 

482 """ 

483 Wrap datetime64 and timedelta64 ndarrays in DatetimeArray/TimedeltaArray. 

484 """ 

485 if isinstance(arr, np.ndarray): 

486 if arr.dtype.kind == "M": 

487 from pandas.core.arrays import DatetimeArray 

488 

489 dtype = get_supported_dtype(arr.dtype) 

490 return DatetimeArray._from_sequence(arr, dtype=dtype) 

491 

492 elif arr.dtype.kind == "m": 

493 from pandas.core.arrays import TimedeltaArray 

494 

495 dtype = get_supported_dtype(arr.dtype) 

496 return TimedeltaArray._from_sequence(arr, dtype=dtype) 

497 

498 return arr 

499 

500 

501def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: 

502 """ 

503 Convert numpy MaskedArray to ensure mask is softened. 

504 """ 

505 mask = ma.getmaskarray(data) 

506 if mask.any(): 

507 dtype, fill_value = maybe_promote(data.dtype, np.nan) 

508 dtype = cast(np.dtype, dtype) 

509 data = ma.asarray(data.astype(dtype, copy=True)) 

510 data.soften_mask() # set hardmask False if it was True 

511 data[mask] = fill_value 

512 else: 

513 data = data.copy() 

514 return data 

515 

516 

517def sanitize_array( 

518 data, 

519 index: Index | None, 

520 dtype: DtypeObj | None = None, 

521 copy: bool = False, 

522 *, 

523 allow_2d: bool = False, 

524) -> ArrayLike: 

525 """ 

526 Sanitize input data to an ndarray or ExtensionArray, copy if specified, 

527 coerce to the dtype if specified. 

528 

529 Parameters 

530 ---------- 

531 data : Any 

532 index : Index or None, default None 

533 dtype : np.dtype, ExtensionDtype, or None, default None 

534 copy : bool, default False 

535 allow_2d : bool, default False 

536 If False, raise if we have a 2D Arraylike. 

537 

538 Returns 

539 ------- 

540 np.ndarray or ExtensionArray 

541 """ 

542 original_dtype = dtype 

543 if isinstance(data, ma.MaskedArray): 

544 data = sanitize_masked_array(data) 

545 

546 if isinstance(dtype, NumpyEADtype): 

547 # Avoid ending up with a NumpyExtensionArray 

548 dtype = dtype.numpy_dtype 

549 

550 object_index = False 

551 if isinstance(data, ABCIndex) and data.dtype == object and dtype is None: 

552 object_index = True 

553 

554 # extract ndarray or ExtensionArray, ensure we have no NumpyExtensionArray 

555 data = extract_array(data, extract_numpy=True, extract_range=True) 

556 

557 if isinstance(data, np.ndarray) and data.ndim == 0: 

558 if dtype is None: 

559 dtype = data.dtype 

560 data = lib.item_from_zerodim(data) 

561 elif isinstance(data, range): 

562 # GH#16804 

563 data = range_to_ndarray(data) 

564 copy = False 

565 

566 if not is_list_like(data): 

567 if index is None: 

568 raise ValueError("index must be specified when data is not list-like") 

569 if ( 

570 isinstance(data, str) 

571 and using_pyarrow_string_dtype() 

572 and original_dtype is None 

573 ): 

574 from pandas.core.arrays.string_ import StringDtype 

575 

576 dtype = StringDtype("pyarrow_numpy") 

577 data = construct_1d_arraylike_from_scalar(data, len(index), dtype) 

578 

579 return data 

580 

581 elif isinstance(data, ABCExtensionArray): 

582 # it is already ensured above this is not a NumpyExtensionArray 

583 # Until GH#49309 is fixed this check needs to come before the 

584 # ExtensionDtype check 

585 if dtype is not None: 

586 subarr = data.astype(dtype, copy=copy) 

587 elif copy: 

588 subarr = data.copy() 

589 else: 

590 subarr = data 

591 

592 elif isinstance(dtype, ExtensionDtype): 

593 # create an extension array from its dtype 

594 _sanitize_non_ordered(data) 

595 cls = dtype.construct_array_type() 

596 subarr = cls._from_sequence(data, dtype=dtype, copy=copy) 

597 

598 # GH#846 

599 elif isinstance(data, np.ndarray): 

600 if isinstance(data, np.matrix): 

601 data = data.A 

602 

603 if dtype is None: 

604 subarr = data 

605 if data.dtype == object: 

606 subarr = maybe_infer_to_datetimelike(data) 

607 if ( 

608 object_index 

609 and using_pyarrow_string_dtype() 

610 and is_string_dtype(subarr) 

611 ): 

612 # Avoid inference when string option is set 

613 subarr = data 

614 elif data.dtype.kind == "U" and using_pyarrow_string_dtype(): 

615 from pandas.core.arrays.string_ import StringDtype 

616 

617 dtype = StringDtype(storage="pyarrow_numpy") 

618 subarr = dtype.construct_array_type()._from_sequence(data, dtype=dtype) 

619 

620 if subarr is data and copy: 

621 subarr = subarr.copy() 

622 

623 else: 

624 # we will try to copy by-definition here 

625 subarr = _try_cast(data, dtype, copy) 

626 

627 elif hasattr(data, "__array__"): 

628 # e.g. dask array GH#38645 

629 if not copy: 

630 data = np.asarray(data) 

631 else: 

632 data = np.array(data, copy=copy) 

633 return sanitize_array( 

634 data, 

635 index=index, 

636 dtype=dtype, 

637 copy=False, 

638 allow_2d=allow_2d, 

639 ) 

640 

641 else: 

642 _sanitize_non_ordered(data) 

643 # materialize e.g. generators, convert e.g. tuples, abc.ValueView 

644 data = list(data) 

645 

646 if len(data) == 0 and dtype is None: 

647 # We default to float64, matching numpy 

648 subarr = np.array([], dtype=np.float64) 

649 

650 elif dtype is not None: 

651 subarr = _try_cast(data, dtype, copy) 

652 

653 else: 

654 subarr = maybe_convert_platform(data) 

655 if subarr.dtype == object: 

656 subarr = cast(np.ndarray, subarr) 

657 subarr = maybe_infer_to_datetimelike(subarr) 

658 

659 subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d) 

660 

661 if isinstance(subarr, np.ndarray): 

662 # at this point we should have dtype be None or subarr.dtype == dtype 

663 dtype = cast(np.dtype, dtype) 

664 subarr = _sanitize_str_dtypes(subarr, data, dtype, copy) 

665 

666 return subarr 

667 

668 

669def range_to_ndarray(rng: range) -> np.ndarray: 

670 """ 

671 Cast a range object to ndarray. 

672 """ 

673 # GH#30171 perf avoid realizing range as a list in np.array 

674 try: 

675 arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64") 

676 except OverflowError: 

677 # GH#30173 handling for ranges that overflow int64 

678 if (rng.start >= 0 and rng.step > 0) or (rng.step < 0 <= rng.stop): 

679 try: 

680 arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64") 

681 except OverflowError: 

682 arr = construct_1d_object_array_from_listlike(list(rng)) 

683 else: 

684 arr = construct_1d_object_array_from_listlike(list(rng)) 

685 return arr 

686 

687 

688def _sanitize_non_ordered(data) -> None: 

689 """ 

690 Raise only for unordered sets, e.g., not for dict_keys 

691 """ 

692 if isinstance(data, (set, frozenset)): 

693 raise TypeError(f"'{type(data).__name__}' type is unordered") 

694 

695 

696def _sanitize_ndim( 

697 result: ArrayLike, 

698 data, 

699 dtype: DtypeObj | None, 

700 index: Index | None, 

701 *, 

702 allow_2d: bool = False, 

703) -> ArrayLike: 

704 """ 

705 Ensure we have a 1-dimensional result array. 

706 """ 

707 if getattr(result, "ndim", 0) == 0: 

708 raise ValueError("result should be arraylike with ndim > 0") 

709 

710 if result.ndim == 1: 

711 # the result that we want 

712 result = _maybe_repeat(result, index) 

713 

714 elif result.ndim > 1: 

715 if isinstance(data, np.ndarray): 

716 if allow_2d: 

717 return result 

718 raise ValueError( 

719 f"Data must be 1-dimensional, got ndarray of shape {data.shape} instead" 

720 ) 

721 if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype): 

722 # i.e. NumpyEADtype("O") 

723 

724 result = com.asarray_tuplesafe(data, dtype=np.dtype("object")) 

725 cls = dtype.construct_array_type() 

726 result = cls._from_sequence(result, dtype=dtype) 

727 else: 

728 # error: Argument "dtype" to "asarray_tuplesafe" has incompatible type 

729 # "Union[dtype[Any], ExtensionDtype, None]"; expected "Union[str, 

730 # dtype[Any], None]" 

731 result = com.asarray_tuplesafe(data, dtype=dtype) # type: ignore[arg-type] 

732 return result 

733 

734 

735def _sanitize_str_dtypes( 

736 result: np.ndarray, data, dtype: np.dtype | None, copy: bool 

737) -> np.ndarray: 

738 """ 

739 Ensure we have a dtype that is supported by pandas. 

740 """ 

741 

742 # This is to prevent mixed-type Series getting all casted to 

743 # NumPy string type, e.g. NaN --> '-1#IND'. 

744 if issubclass(result.dtype.type, str): 

745 # GH#16605 

746 # If not empty convert the data to dtype 

747 # GH#19853: If data is a scalar, result has already the result 

748 if not lib.is_scalar(data): 

749 if not np.all(isna(data)): 

750 data = np.asarray(data, dtype=dtype) 

751 if not copy: 

752 result = np.asarray(data, dtype=object) 

753 else: 

754 result = np.array(data, dtype=object, copy=copy) 

755 return result 

756 

757 

758def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike: 

759 """ 

760 If we have a length-1 array and an index describing how long we expect 

761 the result to be, repeat the array. 

762 """ 

763 if index is not None: 

764 if 1 == len(arr) != len(index): 

765 arr = arr.repeat(len(index)) 

766 return arr 

767 

768 

769def _try_cast( 

770 arr: list | np.ndarray, 

771 dtype: np.dtype, 

772 copy: bool, 

773) -> ArrayLike: 

774 """ 

775 Convert input to numpy ndarray and optionally cast to a given dtype. 

776 

777 Parameters 

778 ---------- 

779 arr : ndarray or list 

780 Excludes: ExtensionArray, Series, Index. 

781 dtype : np.dtype 

782 copy : bool 

783 If False, don't copy the data if not needed. 

784 

785 Returns 

786 ------- 

787 np.ndarray or ExtensionArray 

788 """ 

789 is_ndarray = isinstance(arr, np.ndarray) 

790 

791 if dtype == object: 

792 if not is_ndarray: 

793 subarr = construct_1d_object_array_from_listlike(arr) 

794 return subarr 

795 return ensure_wrapped_if_datetimelike(arr).astype(dtype, copy=copy) 

796 

797 elif dtype.kind == "U": 

798 # TODO: test cases with arr.dtype.kind in "mM" 

799 if is_ndarray: 

800 arr = cast(np.ndarray, arr) 

801 shape = arr.shape 

802 if arr.ndim > 1: 

803 arr = arr.ravel() 

804 else: 

805 shape = (len(arr),) 

806 return lib.ensure_string_array(arr, convert_na_value=False, copy=copy).reshape( 

807 shape 

808 ) 

809 

810 elif dtype.kind in "mM": 

811 return maybe_cast_to_datetime(arr, dtype) 

812 

813 # GH#15832: Check if we are requesting a numeric dtype and 

814 # that we can convert the data to the requested dtype. 

815 elif dtype.kind in "iu": 

816 # this will raise if we have e.g. floats 

817 

818 subarr = maybe_cast_to_integer_array(arr, dtype) 

819 elif not copy: 

820 subarr = np.asarray(arr, dtype=dtype) 

821 else: 

822 subarr = np.array(arr, dtype=dtype, copy=copy) 

823 

824 return subarr