Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/json/_json.py: 58%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

535 statements  

1from __future__ import annotations 

2 

3from abc import ( 

4 ABC, 

5 abstractmethod, 

6) 

7from collections import abc 

8from io import StringIO 

9from itertools import islice 

10from typing import ( 

11 TYPE_CHECKING, 

12 Any, 

13 Callable, 

14 Generic, 

15 Literal, 

16 TypeVar, 

17 final, 

18 overload, 

19) 

20import warnings 

21 

22import numpy as np 

23 

24from pandas._libs import lib 

25from pandas._libs.json import ( 

26 ujson_dumps, 

27 ujson_loads, 

28) 

29from pandas._libs.tslibs import iNaT 

30from pandas.compat._optional import import_optional_dependency 

31from pandas.errors import AbstractMethodError 

32from pandas.util._decorators import doc 

33from pandas.util._exceptions import find_stack_level 

34from pandas.util._validators import check_dtype_backend 

35 

36from pandas.core.dtypes.common import ( 

37 ensure_str, 

38 is_string_dtype, 

39) 

40from pandas.core.dtypes.dtypes import PeriodDtype 

41 

42from pandas import ( 

43 ArrowDtype, 

44 DataFrame, 

45 Index, 

46 MultiIndex, 

47 Series, 

48 isna, 

49 notna, 

50 to_datetime, 

51) 

52from pandas.core.reshape.concat import concat 

53from pandas.core.shared_docs import _shared_docs 

54 

55from pandas.io.common import ( 

56 IOHandles, 

57 dedup_names, 

58 extension_to_compression, 

59 file_exists, 

60 get_handle, 

61 is_fsspec_url, 

62 is_potential_multi_index, 

63 is_url, 

64 stringify_path, 

65) 

66from pandas.io.json._normalize import convert_to_line_delimits 

67from pandas.io.json._table_schema import ( 

68 build_table_schema, 

69 parse_table_schema, 

70) 

71from pandas.io.parsers.readers import validate_integer 

72 

73if TYPE_CHECKING: 

74 from collections.abc import ( 

75 Hashable, 

76 Mapping, 

77 ) 

78 from types import TracebackType 

79 

80 from pandas._typing import ( 

81 CompressionOptions, 

82 DtypeArg, 

83 DtypeBackend, 

84 FilePath, 

85 IndexLabel, 

86 JSONEngine, 

87 JSONSerializable, 

88 ReadBuffer, 

89 Self, 

90 StorageOptions, 

91 WriteBuffer, 

92 ) 

93 

94 from pandas.core.generic import NDFrame 

95 

96FrameSeriesStrT = TypeVar("FrameSeriesStrT", bound=Literal["frame", "series"]) 

97 

98 

99# interface to/from 

100@overload 

101def to_json( 

102 path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes], 

103 obj: NDFrame, 

104 orient: str | None = ..., 

105 date_format: str = ..., 

106 double_precision: int = ..., 

107 force_ascii: bool = ..., 

108 date_unit: str = ..., 

109 default_handler: Callable[[Any], JSONSerializable] | None = ..., 

110 lines: bool = ..., 

111 compression: CompressionOptions = ..., 

112 index: bool | None = ..., 

113 indent: int = ..., 

114 storage_options: StorageOptions = ..., 

115 mode: Literal["a", "w"] = ..., 

116) -> None: 

117 ... 

118 

119 

120@overload 

121def to_json( 

122 path_or_buf: None, 

123 obj: NDFrame, 

124 orient: str | None = ..., 

125 date_format: str = ..., 

126 double_precision: int = ..., 

127 force_ascii: bool = ..., 

128 date_unit: str = ..., 

129 default_handler: Callable[[Any], JSONSerializable] | None = ..., 

130 lines: bool = ..., 

131 compression: CompressionOptions = ..., 

132 index: bool | None = ..., 

133 indent: int = ..., 

134 storage_options: StorageOptions = ..., 

135 mode: Literal["a", "w"] = ..., 

136) -> str: 

137 ... 

138 

139 

140def to_json( 

141 path_or_buf: FilePath | WriteBuffer[str] | WriteBuffer[bytes] | None, 

142 obj: NDFrame, 

143 orient: str | None = None, 

144 date_format: str = "epoch", 

145 double_precision: int = 10, 

146 force_ascii: bool = True, 

147 date_unit: str = "ms", 

148 default_handler: Callable[[Any], JSONSerializable] | None = None, 

149 lines: bool = False, 

150 compression: CompressionOptions = "infer", 

151 index: bool | None = None, 

152 indent: int = 0, 

153 storage_options: StorageOptions | None = None, 

154 mode: Literal["a", "w"] = "w", 

155) -> str | None: 

156 if orient in ["records", "values"] and index is True: 

157 raise ValueError( 

158 "'index=True' is only valid when 'orient' is 'split', 'table', " 

159 "'index', or 'columns'." 

160 ) 

161 elif orient in ["index", "columns"] and index is False: 

162 raise ValueError( 

163 "'index=False' is only valid when 'orient' is 'split', 'table', " 

164 "'records', or 'values'." 

165 ) 

166 elif index is None: 

167 # will be ignored for orient='records' and 'values' 

168 index = True 

169 

170 if lines and orient != "records": 

171 raise ValueError("'lines' keyword only valid when 'orient' is records") 

172 

173 if mode not in ["a", "w"]: 

174 msg = ( 

175 f"mode={mode} is not a valid option." 

176 "Only 'w' and 'a' are currently supported." 

177 ) 

178 raise ValueError(msg) 

179 

180 if mode == "a" and (not lines or orient != "records"): 

181 msg = ( 

182 "mode='a' (append) is only supported when " 

183 "lines is True and orient is 'records'" 

184 ) 

185 raise ValueError(msg) 

186 

187 if orient == "table" and isinstance(obj, Series): 

188 obj = obj.to_frame(name=obj.name or "values") 

189 

190 writer: type[Writer] 

191 if orient == "table" and isinstance(obj, DataFrame): 

192 writer = JSONTableWriter 

193 elif isinstance(obj, Series): 

194 writer = SeriesWriter 

195 elif isinstance(obj, DataFrame): 

196 writer = FrameWriter 

197 else: 

198 raise NotImplementedError("'obj' should be a Series or a DataFrame") 

199 

200 s = writer( 

201 obj, 

202 orient=orient, 

203 date_format=date_format, 

204 double_precision=double_precision, 

205 ensure_ascii=force_ascii, 

206 date_unit=date_unit, 

207 default_handler=default_handler, 

208 index=index, 

209 indent=indent, 

210 ).write() 

211 

212 if lines: 

213 s = convert_to_line_delimits(s) 

214 

215 if path_or_buf is not None: 

216 # apply compression and byte/text conversion 

217 with get_handle( 

218 path_or_buf, mode, compression=compression, storage_options=storage_options 

219 ) as handles: 

220 handles.handle.write(s) 

221 else: 

222 return s 

223 return None 

224 

225 

226class Writer(ABC): 

227 _default_orient: str 

228 

229 def __init__( 

230 self, 

231 obj: NDFrame, 

232 orient: str | None, 

233 date_format: str, 

234 double_precision: int, 

235 ensure_ascii: bool, 

236 date_unit: str, 

237 index: bool, 

238 default_handler: Callable[[Any], JSONSerializable] | None = None, 

239 indent: int = 0, 

240 ) -> None: 

241 self.obj = obj 

242 

243 if orient is None: 

244 orient = self._default_orient 

245 

246 self.orient = orient 

247 self.date_format = date_format 

248 self.double_precision = double_precision 

249 self.ensure_ascii = ensure_ascii 

250 self.date_unit = date_unit 

251 self.default_handler = default_handler 

252 self.index = index 

253 self.indent = indent 

254 

255 self.is_copy = None 

256 self._format_axes() 

257 

258 def _format_axes(self) -> None: 

259 raise AbstractMethodError(self) 

260 

261 def write(self) -> str: 

262 iso_dates = self.date_format == "iso" 

263 return ujson_dumps( 

264 self.obj_to_write, 

265 orient=self.orient, 

266 double_precision=self.double_precision, 

267 ensure_ascii=self.ensure_ascii, 

268 date_unit=self.date_unit, 

269 iso_dates=iso_dates, 

270 default_handler=self.default_handler, 

271 indent=self.indent, 

272 ) 

273 

274 @property 

275 @abstractmethod 

276 def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: 

277 """Object to write in JSON format.""" 

278 

279 

280class SeriesWriter(Writer): 

281 _default_orient = "index" 

282 

283 @property 

284 def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: 

285 if not self.index and self.orient == "split": 

286 return {"name": self.obj.name, "data": self.obj.values} 

287 else: 

288 return self.obj 

289 

290 def _format_axes(self) -> None: 

291 if not self.obj.index.is_unique and self.orient == "index": 

292 raise ValueError(f"Series index must be unique for orient='{self.orient}'") 

293 

294 

295class FrameWriter(Writer): 

296 _default_orient = "columns" 

297 

298 @property 

299 def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: 

300 if not self.index and self.orient == "split": 

301 obj_to_write = self.obj.to_dict(orient="split") 

302 del obj_to_write["index"] 

303 else: 

304 obj_to_write = self.obj 

305 return obj_to_write 

306 

307 def _format_axes(self) -> None: 

308 """ 

309 Try to format axes if they are datelike. 

310 """ 

311 if not self.obj.index.is_unique and self.orient in ("index", "columns"): 

312 raise ValueError( 

313 f"DataFrame index must be unique for orient='{self.orient}'." 

314 ) 

315 if not self.obj.columns.is_unique and self.orient in ( 

316 "index", 

317 "columns", 

318 "records", 

319 ): 

320 raise ValueError( 

321 f"DataFrame columns must be unique for orient='{self.orient}'." 

322 ) 

323 

324 

325class JSONTableWriter(FrameWriter): 

326 _default_orient = "records" 

327 

328 def __init__( 

329 self, 

330 obj, 

331 orient: str | None, 

332 date_format: str, 

333 double_precision: int, 

334 ensure_ascii: bool, 

335 date_unit: str, 

336 index: bool, 

337 default_handler: Callable[[Any], JSONSerializable] | None = None, 

338 indent: int = 0, 

339 ) -> None: 

340 """ 

341 Adds a `schema` attribute with the Table Schema, resets 

342 the index (can't do in caller, because the schema inference needs 

343 to know what the index is, forces orient to records, and forces 

344 date_format to 'iso'. 

345 """ 

346 super().__init__( 

347 obj, 

348 orient, 

349 date_format, 

350 double_precision, 

351 ensure_ascii, 

352 date_unit, 

353 index, 

354 default_handler=default_handler, 

355 indent=indent, 

356 ) 

357 

358 if date_format != "iso": 

359 msg = ( 

360 "Trying to write with `orient='table'` and " 

361 f"`date_format='{date_format}'`. Table Schema requires dates " 

362 "to be formatted with `date_format='iso'`" 

363 ) 

364 raise ValueError(msg) 

365 

366 self.schema = build_table_schema(obj, index=self.index) 

367 

368 # NotImplemented on a column MultiIndex 

369 if obj.ndim == 2 and isinstance(obj.columns, MultiIndex): 

370 raise NotImplementedError( 

371 "orient='table' is not supported for MultiIndex columns" 

372 ) 

373 

374 # TODO: Do this timedelta properly in objToJSON.c See GH #15137 

375 if ( 

376 (obj.ndim == 1) 

377 and (obj.name in set(obj.index.names)) 

378 or len(obj.columns.intersection(obj.index.names)) 

379 ): 

380 msg = "Overlapping names between the index and columns" 

381 raise ValueError(msg) 

382 

383 obj = obj.copy() 

384 timedeltas = obj.select_dtypes(include=["timedelta"]).columns 

385 if len(timedeltas): 

386 obj[timedeltas] = obj[timedeltas].map(lambda x: x.isoformat()) 

387 # Convert PeriodIndex to datetimes before serializing 

388 if isinstance(obj.index.dtype, PeriodDtype): 

389 obj.index = obj.index.to_timestamp() 

390 

391 # exclude index from obj if index=False 

392 if not self.index: 

393 self.obj = obj.reset_index(drop=True) 

394 else: 

395 self.obj = obj.reset_index(drop=False) 

396 self.date_format = "iso" 

397 self.orient = "records" 

398 self.index = index 

399 

400 @property 

401 def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: 

402 return {"schema": self.schema, "data": self.obj} 

403 

404 

405@overload 

406def read_json( 

407 path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], 

408 *, 

409 orient: str | None = ..., 

410 typ: Literal["frame"] = ..., 

411 dtype: DtypeArg | None = ..., 

412 convert_axes: bool | None = ..., 

413 convert_dates: bool | list[str] = ..., 

414 keep_default_dates: bool = ..., 

415 precise_float: bool = ..., 

416 date_unit: str | None = ..., 

417 encoding: str | None = ..., 

418 encoding_errors: str | None = ..., 

419 lines: bool = ..., 

420 chunksize: int, 

421 compression: CompressionOptions = ..., 

422 nrows: int | None = ..., 

423 storage_options: StorageOptions = ..., 

424 dtype_backend: DtypeBackend | lib.NoDefault = ..., 

425 engine: JSONEngine = ..., 

426) -> JsonReader[Literal["frame"]]: 

427 ... 

428 

429 

430@overload 

431def read_json( 

432 path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], 

433 *, 

434 orient: str | None = ..., 

435 typ: Literal["series"], 

436 dtype: DtypeArg | None = ..., 

437 convert_axes: bool | None = ..., 

438 convert_dates: bool | list[str] = ..., 

439 keep_default_dates: bool = ..., 

440 precise_float: bool = ..., 

441 date_unit: str | None = ..., 

442 encoding: str | None = ..., 

443 encoding_errors: str | None = ..., 

444 lines: bool = ..., 

445 chunksize: int, 

446 compression: CompressionOptions = ..., 

447 nrows: int | None = ..., 

448 storage_options: StorageOptions = ..., 

449 dtype_backend: DtypeBackend | lib.NoDefault = ..., 

450 engine: JSONEngine = ..., 

451) -> JsonReader[Literal["series"]]: 

452 ... 

453 

454 

455@overload 

456def read_json( 

457 path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], 

458 *, 

459 orient: str | None = ..., 

460 typ: Literal["series"], 

461 dtype: DtypeArg | None = ..., 

462 convert_axes: bool | None = ..., 

463 convert_dates: bool | list[str] = ..., 

464 keep_default_dates: bool = ..., 

465 precise_float: bool = ..., 

466 date_unit: str | None = ..., 

467 encoding: str | None = ..., 

468 encoding_errors: str | None = ..., 

469 lines: bool = ..., 

470 chunksize: None = ..., 

471 compression: CompressionOptions = ..., 

472 nrows: int | None = ..., 

473 storage_options: StorageOptions = ..., 

474 dtype_backend: DtypeBackend | lib.NoDefault = ..., 

475 engine: JSONEngine = ..., 

476) -> Series: 

477 ... 

478 

479 

480@overload 

481def read_json( 

482 path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], 

483 *, 

484 orient: str | None = ..., 

485 typ: Literal["frame"] = ..., 

486 dtype: DtypeArg | None = ..., 

487 convert_axes: bool | None = ..., 

488 convert_dates: bool | list[str] = ..., 

489 keep_default_dates: bool = ..., 

490 precise_float: bool = ..., 

491 date_unit: str | None = ..., 

492 encoding: str | None = ..., 

493 encoding_errors: str | None = ..., 

494 lines: bool = ..., 

495 chunksize: None = ..., 

496 compression: CompressionOptions = ..., 

497 nrows: int | None = ..., 

498 storage_options: StorageOptions = ..., 

499 dtype_backend: DtypeBackend | lib.NoDefault = ..., 

500 engine: JSONEngine = ..., 

501) -> DataFrame: 

502 ... 

503 

504 

505@doc( 

506 storage_options=_shared_docs["storage_options"], 

507 decompression_options=_shared_docs["decompression_options"] % "path_or_buf", 

508) 

509def read_json( 

510 path_or_buf: FilePath | ReadBuffer[str] | ReadBuffer[bytes], 

511 *, 

512 orient: str | None = None, 

513 typ: Literal["frame", "series"] = "frame", 

514 dtype: DtypeArg | None = None, 

515 convert_axes: bool | None = None, 

516 convert_dates: bool | list[str] = True, 

517 keep_default_dates: bool = True, 

518 precise_float: bool = False, 

519 date_unit: str | None = None, 

520 encoding: str | None = None, 

521 encoding_errors: str | None = "strict", 

522 lines: bool = False, 

523 chunksize: int | None = None, 

524 compression: CompressionOptions = "infer", 

525 nrows: int | None = None, 

526 storage_options: StorageOptions | None = None, 

527 dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, 

528 engine: JSONEngine = "ujson", 

529) -> DataFrame | Series | JsonReader: 

530 """ 

531 Convert a JSON string to pandas object. 

532 

533 Parameters 

534 ---------- 

535 path_or_buf : a valid JSON str, path object or file-like object 

536 Any valid string path is acceptable. The string could be a URL. Valid 

537 URL schemes include http, ftp, s3, and file. For file URLs, a host is 

538 expected. A local file could be: 

539 ``file://localhost/path/to/table.json``. 

540 

541 If you want to pass in a path object, pandas accepts any 

542 ``os.PathLike``. 

543 

544 By file-like object, we refer to objects with a ``read()`` method, 

545 such as a file handle (e.g. via builtin ``open`` function) 

546 or ``StringIO``. 

547 

548 .. deprecated:: 2.1.0 

549 Passing json literal strings is deprecated. 

550 

551 orient : str, optional 

552 Indication of expected JSON string format. 

553 Compatible JSON strings can be produced by ``to_json()`` with a 

554 corresponding orient value. 

555 The set of possible orients is: 

556 

557 - ``'split'`` : dict like 

558 ``{{index -> [index], columns -> [columns], data -> [values]}}`` 

559 - ``'records'`` : list like 

560 ``[{{column -> value}}, ... , {{column -> value}}]`` 

561 - ``'index'`` : dict like ``{{index -> {{column -> value}}}}`` 

562 - ``'columns'`` : dict like ``{{column -> {{index -> value}}}}`` 

563 - ``'values'`` : just the values array 

564 - ``'table'`` : dict like ``{{'schema': {{schema}}, 'data': {{data}}}}`` 

565 

566 The allowed and default values depend on the value 

567 of the `typ` parameter. 

568 

569 * when ``typ == 'series'``, 

570 

571 - allowed orients are ``{{'split','records','index'}}`` 

572 - default is ``'index'`` 

573 - The Series index must be unique for orient ``'index'``. 

574 

575 * when ``typ == 'frame'``, 

576 

577 - allowed orients are ``{{'split','records','index', 

578 'columns','values', 'table'}}`` 

579 - default is ``'columns'`` 

580 - The DataFrame index must be unique for orients ``'index'`` and 

581 ``'columns'``. 

582 - The DataFrame columns must be unique for orients ``'index'``, 

583 ``'columns'``, and ``'records'``. 

584 

585 typ : {{'frame', 'series'}}, default 'frame' 

586 The type of object to recover. 

587 

588 dtype : bool or dict, default None 

589 If True, infer dtypes; if a dict of column to dtype, then use those; 

590 if False, then don't infer dtypes at all, applies only to the data. 

591 

592 For all ``orient`` values except ``'table'``, default is True. 

593 

594 convert_axes : bool, default None 

595 Try to convert the axes to the proper dtypes. 

596 

597 For all ``orient`` values except ``'table'``, default is True. 

598 

599 convert_dates : bool or list of str, default True 

600 If True then default datelike columns may be converted (depending on 

601 keep_default_dates). 

602 If False, no dates will be converted. 

603 If a list of column names, then those columns will be converted and 

604 default datelike columns may also be converted (depending on 

605 keep_default_dates). 

606 

607 keep_default_dates : bool, default True 

608 If parsing dates (convert_dates is not False), then try to parse the 

609 default datelike columns. 

610 A column label is datelike if 

611 

612 * it ends with ``'_at'``, 

613 

614 * it ends with ``'_time'``, 

615 

616 * it begins with ``'timestamp'``, 

617 

618 * it is ``'modified'``, or 

619 

620 * it is ``'date'``. 

621 

622 precise_float : bool, default False 

623 Set to enable usage of higher precision (strtod) function when 

624 decoding string to double values. Default (False) is to use fast but 

625 less precise builtin functionality. 

626 

627 date_unit : str, default None 

628 The timestamp unit to detect if converting dates. The default behaviour 

629 is to try and detect the correct precision, but if this is not desired 

630 then pass one of 's', 'ms', 'us' or 'ns' to force parsing only seconds, 

631 milliseconds, microseconds or nanoseconds respectively. 

632 

633 encoding : str, default is 'utf-8' 

634 The encoding to use to decode py3 bytes. 

635 

636 encoding_errors : str, optional, default "strict" 

637 How encoding errors are treated. `List of possible values 

638 <https://docs.python.org/3/library/codecs.html#error-handlers>`_ . 

639 

640 .. versionadded:: 1.3.0 

641 

642 lines : bool, default False 

643 Read the file as a json object per line. 

644 

645 chunksize : int, optional 

646 Return JsonReader object for iteration. 

647 See the `line-delimited json docs 

648 <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#line-delimited-json>`_ 

649 for more information on ``chunksize``. 

650 This can only be passed if `lines=True`. 

651 If this is None, the file will be read into memory all at once. 

652 {decompression_options} 

653 

654 .. versionchanged:: 1.4.0 Zstandard support. 

655 

656 nrows : int, optional 

657 The number of lines from the line-delimited jsonfile that has to be read. 

658 This can only be passed if `lines=True`. 

659 If this is None, all the rows will be returned. 

660 

661 {storage_options} 

662 

663 dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable' 

664 Back-end data type applied to the resultant :class:`DataFrame` 

665 (still experimental). Behaviour is as follows: 

666 

667 * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 

668 (default). 

669 * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 

670 DataFrame. 

671 

672 .. versionadded:: 2.0 

673 

674 engine : {{"ujson", "pyarrow"}}, default "ujson" 

675 Parser engine to use. The ``"pyarrow"`` engine is only available when 

676 ``lines=True``. 

677 

678 .. versionadded:: 2.0 

679 

680 Returns 

681 ------- 

682 Series, DataFrame, or pandas.api.typing.JsonReader 

683 A JsonReader is returned when ``chunksize`` is not ``0`` or ``None``. 

684 Otherwise, the type returned depends on the value of ``typ``. 

685 

686 See Also 

687 -------- 

688 DataFrame.to_json : Convert a DataFrame to a JSON string. 

689 Series.to_json : Convert a Series to a JSON string. 

690 json_normalize : Normalize semi-structured JSON data into a flat table. 

691 

692 Notes 

693 ----- 

694 Specific to ``orient='table'``, if a :class:`DataFrame` with a literal 

695 :class:`Index` name of `index` gets written with :func:`to_json`, the 

696 subsequent read operation will incorrectly set the :class:`Index` name to 

697 ``None``. This is because `index` is also used by :func:`DataFrame.to_json` 

698 to denote a missing :class:`Index` name, and the subsequent 

699 :func:`read_json` operation cannot distinguish between the two. The same 

700 limitation is encountered with a :class:`MultiIndex` and any names 

701 beginning with ``'level_'``. 

702 

703 Examples 

704 -------- 

705 >>> from io import StringIO 

706 >>> df = pd.DataFrame([['a', 'b'], ['c', 'd']], 

707 ... index=['row 1', 'row 2'], 

708 ... columns=['col 1', 'col 2']) 

709 

710 Encoding/decoding a Dataframe using ``'split'`` formatted JSON: 

711 

712 >>> df.to_json(orient='split') 

713 '\ 

714{{\ 

715"columns":["col 1","col 2"],\ 

716"index":["row 1","row 2"],\ 

717"data":[["a","b"],["c","d"]]\ 

718}}\ 

719' 

720 >>> pd.read_json(StringIO(_), orient='split') 

721 col 1 col 2 

722 row 1 a b 

723 row 2 c d 

724 

725 Encoding/decoding a Dataframe using ``'index'`` formatted JSON: 

726 

727 >>> df.to_json(orient='index') 

728 '{{"row 1":{{"col 1":"a","col 2":"b"}},"row 2":{{"col 1":"c","col 2":"d"}}}}' 

729 

730 >>> pd.read_json(StringIO(_), orient='index') 

731 col 1 col 2 

732 row 1 a b 

733 row 2 c d 

734 

735 Encoding/decoding a Dataframe using ``'records'`` formatted JSON. 

736 Note that index labels are not preserved with this encoding. 

737 

738 >>> df.to_json(orient='records') 

739 '[{{"col 1":"a","col 2":"b"}},{{"col 1":"c","col 2":"d"}}]' 

740 >>> pd.read_json(StringIO(_), orient='records') 

741 col 1 col 2 

742 0 a b 

743 1 c d 

744 

745 Encoding with Table Schema 

746 

747 >>> df.to_json(orient='table') 

748 '\ 

749{{"schema":{{"fields":[\ 

750{{"name":"index","type":"string"}},\ 

751{{"name":"col 1","type":"string"}},\ 

752{{"name":"col 2","type":"string"}}],\ 

753"primaryKey":["index"],\ 

754"pandas_version":"1.4.0"}},\ 

755"data":[\ 

756{{"index":"row 1","col 1":"a","col 2":"b"}},\ 

757{{"index":"row 2","col 1":"c","col 2":"d"}}]\ 

758}}\ 

759' 

760 

761 The following example uses ``dtype_backend="numpy_nullable"`` 

762 

763 >>> data = '''{{"index": {{"0": 0, "1": 1}}, 

764 ... "a": {{"0": 1, "1": null}}, 

765 ... "b": {{"0": 2.5, "1": 4.5}}, 

766 ... "c": {{"0": true, "1": false}}, 

767 ... "d": {{"0": "a", "1": "b"}}, 

768 ... "e": {{"0": 1577.2, "1": 1577.1}}}}''' 

769 >>> pd.read_json(StringIO(data), dtype_backend="numpy_nullable") 

770 index a b c d e 

771 0 0 1 2.5 True a 1577.2 

772 1 1 <NA> 4.5 False b 1577.1 

773 """ 

774 if orient == "table" and dtype: 

775 raise ValueError("cannot pass both dtype and orient='table'") 

776 if orient == "table" and convert_axes: 

777 raise ValueError("cannot pass both convert_axes and orient='table'") 

778 

779 check_dtype_backend(dtype_backend) 

780 

781 if dtype is None and orient != "table": 

782 # error: Incompatible types in assignment (expression has type "bool", variable 

783 # has type "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float], 

784 # Type[int], Type[complex], Type[bool], Type[object], Dict[Hashable, 

785 # Union[ExtensionDtype, Union[str, dtype[Any]], Type[str], Type[float], 

786 # Type[int], Type[complex], Type[bool], Type[object]]], None]") 

787 dtype = True # type: ignore[assignment] 

788 if convert_axes is None and orient != "table": 

789 convert_axes = True 

790 

791 json_reader = JsonReader( 

792 path_or_buf, 

793 orient=orient, 

794 typ=typ, 

795 dtype=dtype, 

796 convert_axes=convert_axes, 

797 convert_dates=convert_dates, 

798 keep_default_dates=keep_default_dates, 

799 precise_float=precise_float, 

800 date_unit=date_unit, 

801 encoding=encoding, 

802 lines=lines, 

803 chunksize=chunksize, 

804 compression=compression, 

805 nrows=nrows, 

806 storage_options=storage_options, 

807 encoding_errors=encoding_errors, 

808 dtype_backend=dtype_backend, 

809 engine=engine, 

810 ) 

811 

812 if chunksize: 

813 return json_reader 

814 else: 

815 return json_reader.read() 

816 

817 

818class JsonReader(abc.Iterator, Generic[FrameSeriesStrT]): 

819 """ 

820 JsonReader provides an interface for reading in a JSON file. 

821 

822 If initialized with ``lines=True`` and ``chunksize``, can be iterated over 

823 ``chunksize`` lines at a time. Otherwise, calling ``read`` reads in the 

824 whole document. 

825 """ 

826 

827 def __init__( 

828 self, 

829 filepath_or_buffer, 

830 orient, 

831 typ: FrameSeriesStrT, 

832 dtype, 

833 convert_axes: bool | None, 

834 convert_dates, 

835 keep_default_dates: bool, 

836 precise_float: bool, 

837 date_unit, 

838 encoding, 

839 lines: bool, 

840 chunksize: int | None, 

841 compression: CompressionOptions, 

842 nrows: int | None, 

843 storage_options: StorageOptions | None = None, 

844 encoding_errors: str | None = "strict", 

845 dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, 

846 engine: JSONEngine = "ujson", 

847 ) -> None: 

848 self.orient = orient 

849 self.typ = typ 

850 self.dtype = dtype 

851 self.convert_axes = convert_axes 

852 self.convert_dates = convert_dates 

853 self.keep_default_dates = keep_default_dates 

854 self.precise_float = precise_float 

855 self.date_unit = date_unit 

856 self.encoding = encoding 

857 self.engine = engine 

858 self.compression = compression 

859 self.storage_options = storage_options 

860 self.lines = lines 

861 self.chunksize = chunksize 

862 self.nrows_seen = 0 

863 self.nrows = nrows 

864 self.encoding_errors = encoding_errors 

865 self.handles: IOHandles[str] | None = None 

866 self.dtype_backend = dtype_backend 

867 

868 if self.engine not in {"pyarrow", "ujson"}: 

869 raise ValueError( 

870 f"The engine type {self.engine} is currently not supported." 

871 ) 

872 if self.chunksize is not None: 

873 self.chunksize = validate_integer("chunksize", self.chunksize, 1) 

874 if not self.lines: 

875 raise ValueError("chunksize can only be passed if lines=True") 

876 if self.engine == "pyarrow": 

877 raise ValueError( 

878 "currently pyarrow engine doesn't support chunksize parameter" 

879 ) 

880 if self.nrows is not None: 

881 self.nrows = validate_integer("nrows", self.nrows, 0) 

882 if not self.lines: 

883 raise ValueError("nrows can only be passed if lines=True") 

884 if ( 

885 isinstance(filepath_or_buffer, str) 

886 and not self.lines 

887 and "\n" in filepath_or_buffer 

888 ): 

889 warnings.warn( 

890 "Passing literal json to 'read_json' is deprecated and " 

891 "will be removed in a future version. To read from a " 

892 "literal string, wrap it in a 'StringIO' object.", 

893 FutureWarning, 

894 stacklevel=find_stack_level(), 

895 ) 

896 if self.engine == "pyarrow": 

897 if not self.lines: 

898 raise ValueError( 

899 "currently pyarrow engine only supports " 

900 "the line-delimited JSON format" 

901 ) 

902 self.data = filepath_or_buffer 

903 elif self.engine == "ujson": 

904 data = self._get_data_from_filepath(filepath_or_buffer) 

905 self.data = self._preprocess_data(data) 

906 

907 def _preprocess_data(self, data): 

908 """ 

909 At this point, the data either has a `read` attribute (e.g. a file 

910 object or a StringIO) or is a string that is a JSON document. 

911 

912 If self.chunksize, we prepare the data for the `__next__` method. 

913 Otherwise, we read it into memory for the `read` method. 

914 """ 

915 if hasattr(data, "read") and not (self.chunksize or self.nrows): 

916 with self: 

917 data = data.read() 

918 if not hasattr(data, "read") and (self.chunksize or self.nrows): 

919 data = StringIO(data) 

920 

921 return data 

922 

923 def _get_data_from_filepath(self, filepath_or_buffer): 

924 """ 

925 The function read_json accepts three input types: 

926 1. filepath (string-like) 

927 2. file-like object (e.g. open file object, StringIO) 

928 3. JSON string 

929 

930 This method turns (1) into (2) to simplify the rest of the processing. 

931 It returns input types (2) and (3) unchanged. 

932 

933 It raises FileNotFoundError if the input is a string ending in 

934 one of .json, .json.gz, .json.bz2, etc. but no such file exists. 

935 """ 

936 # if it is a string but the file does not exist, it might be a JSON string 

937 filepath_or_buffer = stringify_path(filepath_or_buffer) 

938 if ( 

939 not isinstance(filepath_or_buffer, str) 

940 or is_url(filepath_or_buffer) 

941 or is_fsspec_url(filepath_or_buffer) 

942 or file_exists(filepath_or_buffer) 

943 ): 

944 self.handles = get_handle( 

945 filepath_or_buffer, 

946 "r", 

947 encoding=self.encoding, 

948 compression=self.compression, 

949 storage_options=self.storage_options, 

950 errors=self.encoding_errors, 

951 ) 

952 filepath_or_buffer = self.handles.handle 

953 elif ( 

954 isinstance(filepath_or_buffer, str) 

955 and filepath_or_buffer.lower().endswith( 

956 (".json",) + tuple(f".json{c}" for c in extension_to_compression) 

957 ) 

958 and not file_exists(filepath_or_buffer) 

959 ): 

960 raise FileNotFoundError(f"File {filepath_or_buffer} does not exist") 

961 else: 

962 warnings.warn( 

963 "Passing literal json to 'read_json' is deprecated and " 

964 "will be removed in a future version. To read from a " 

965 "literal string, wrap it in a 'StringIO' object.", 

966 FutureWarning, 

967 stacklevel=find_stack_level(), 

968 ) 

969 return filepath_or_buffer 

970 

971 def _combine_lines(self, lines) -> str: 

972 """ 

973 Combines a list of JSON objects into one JSON object. 

974 """ 

975 return ( 

976 f'[{",".join([line for line in (line.strip() for line in lines) if line])}]' 

977 ) 

978 

979 @overload 

980 def read(self: JsonReader[Literal["frame"]]) -> DataFrame: 

981 ... 

982 

983 @overload 

984 def read(self: JsonReader[Literal["series"]]) -> Series: 

985 ... 

986 

987 @overload 

988 def read(self: JsonReader[Literal["frame", "series"]]) -> DataFrame | Series: 

989 ... 

990 

991 def read(self) -> DataFrame | Series: 

992 """ 

993 Read the whole JSON input into a pandas object. 

994 """ 

995 obj: DataFrame | Series 

996 with self: 

997 if self.engine == "pyarrow": 

998 pyarrow_json = import_optional_dependency("pyarrow.json") 

999 pa_table = pyarrow_json.read_json(self.data) 

1000 

1001 mapping: type[ArrowDtype] | None | Callable 

1002 if self.dtype_backend == "pyarrow": 

1003 mapping = ArrowDtype 

1004 elif self.dtype_backend == "numpy_nullable": 

1005 from pandas.io._util import _arrow_dtype_mapping 

1006 

1007 mapping = _arrow_dtype_mapping().get 

1008 else: 

1009 mapping = None 

1010 

1011 return pa_table.to_pandas(types_mapper=mapping) 

1012 elif self.engine == "ujson": 

1013 if self.lines: 

1014 if self.chunksize: 

1015 obj = concat(self) 

1016 elif self.nrows: 

1017 lines = list(islice(self.data, self.nrows)) 

1018 lines_json = self._combine_lines(lines) 

1019 obj = self._get_object_parser(lines_json) 

1020 else: 

1021 data = ensure_str(self.data) 

1022 data_lines = data.split("\n") 

1023 obj = self._get_object_parser(self._combine_lines(data_lines)) 

1024 else: 

1025 obj = self._get_object_parser(self.data) 

1026 if self.dtype_backend is not lib.no_default: 

1027 return obj.convert_dtypes( 

1028 infer_objects=False, dtype_backend=self.dtype_backend 

1029 ) 

1030 else: 

1031 return obj 

1032 

1033 def _get_object_parser(self, json) -> DataFrame | Series: 

1034 """ 

1035 Parses a json document into a pandas object. 

1036 """ 

1037 typ = self.typ 

1038 dtype = self.dtype 

1039 kwargs = { 

1040 "orient": self.orient, 

1041 "dtype": self.dtype, 

1042 "convert_axes": self.convert_axes, 

1043 "convert_dates": self.convert_dates, 

1044 "keep_default_dates": self.keep_default_dates, 

1045 "precise_float": self.precise_float, 

1046 "date_unit": self.date_unit, 

1047 "dtype_backend": self.dtype_backend, 

1048 } 

1049 obj = None 

1050 if typ == "frame": 

1051 obj = FrameParser(json, **kwargs).parse() 

1052 

1053 if typ == "series" or obj is None: 

1054 if not isinstance(dtype, bool): 

1055 kwargs["dtype"] = dtype 

1056 obj = SeriesParser(json, **kwargs).parse() 

1057 

1058 return obj 

1059 

1060 def close(self) -> None: 

1061 """ 

1062 If we opened a stream earlier, in _get_data_from_filepath, we should 

1063 close it. 

1064 

1065 If an open stream or file was passed, we leave it open. 

1066 """ 

1067 if self.handles is not None: 

1068 self.handles.close() 

1069 

1070 def __iter__(self) -> Self: 

1071 return self 

1072 

1073 @overload 

1074 def __next__(self: JsonReader[Literal["frame"]]) -> DataFrame: 

1075 ... 

1076 

1077 @overload 

1078 def __next__(self: JsonReader[Literal["series"]]) -> Series: 

1079 ... 

1080 

1081 @overload 

1082 def __next__(self: JsonReader[Literal["frame", "series"]]) -> DataFrame | Series: 

1083 ... 

1084 

1085 def __next__(self) -> DataFrame | Series: 

1086 if self.nrows and self.nrows_seen >= self.nrows: 

1087 self.close() 

1088 raise StopIteration 

1089 

1090 lines = list(islice(self.data, self.chunksize)) 

1091 if not lines: 

1092 self.close() 

1093 raise StopIteration 

1094 

1095 try: 

1096 lines_json = self._combine_lines(lines) 

1097 obj = self._get_object_parser(lines_json) 

1098 

1099 # Make sure that the returned objects have the right index. 

1100 obj.index = range(self.nrows_seen, self.nrows_seen + len(obj)) 

1101 self.nrows_seen += len(obj) 

1102 except Exception as ex: 

1103 self.close() 

1104 raise ex 

1105 

1106 if self.dtype_backend is not lib.no_default: 

1107 return obj.convert_dtypes( 

1108 infer_objects=False, dtype_backend=self.dtype_backend 

1109 ) 

1110 else: 

1111 return obj 

1112 

1113 def __enter__(self) -> Self: 

1114 return self 

1115 

1116 def __exit__( 

1117 self, 

1118 exc_type: type[BaseException] | None, 

1119 exc_value: BaseException | None, 

1120 traceback: TracebackType | None, 

1121 ) -> None: 

1122 self.close() 

1123 

1124 

1125class Parser: 

1126 _split_keys: tuple[str, ...] 

1127 _default_orient: str 

1128 

1129 _STAMP_UNITS = ("s", "ms", "us", "ns") 

1130 _MIN_STAMPS = { 

1131 "s": 31536000, 

1132 "ms": 31536000000, 

1133 "us": 31536000000000, 

1134 "ns": 31536000000000000, 

1135 } 

1136 json: str 

1137 

1138 def __init__( 

1139 self, 

1140 json: str, 

1141 orient, 

1142 dtype: DtypeArg | None = None, 

1143 convert_axes: bool = True, 

1144 convert_dates: bool | list[str] = True, 

1145 keep_default_dates: bool = False, 

1146 precise_float: bool = False, 

1147 date_unit=None, 

1148 dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, 

1149 ) -> None: 

1150 self.json = json 

1151 

1152 if orient is None: 

1153 orient = self._default_orient 

1154 

1155 self.orient = orient 

1156 

1157 self.dtype = dtype 

1158 

1159 if date_unit is not None: 

1160 date_unit = date_unit.lower() 

1161 if date_unit not in self._STAMP_UNITS: 

1162 raise ValueError(f"date_unit must be one of {self._STAMP_UNITS}") 

1163 self.min_stamp = self._MIN_STAMPS[date_unit] 

1164 else: 

1165 self.min_stamp = self._MIN_STAMPS["s"] 

1166 

1167 self.precise_float = precise_float 

1168 self.convert_axes = convert_axes 

1169 self.convert_dates = convert_dates 

1170 self.date_unit = date_unit 

1171 self.keep_default_dates = keep_default_dates 

1172 self.obj: DataFrame | Series | None = None 

1173 self.dtype_backend = dtype_backend 

1174 

1175 @final 

1176 def check_keys_split(self, decoded: dict) -> None: 

1177 """ 

1178 Checks that dict has only the appropriate keys for orient='split'. 

1179 """ 

1180 bad_keys = set(decoded.keys()).difference(set(self._split_keys)) 

1181 if bad_keys: 

1182 bad_keys_joined = ", ".join(bad_keys) 

1183 raise ValueError(f"JSON data had unexpected key(s): {bad_keys_joined}") 

1184 

1185 @final 

1186 def parse(self): 

1187 self._parse() 

1188 

1189 if self.obj is None: 

1190 return None 

1191 if self.convert_axes: 

1192 self._convert_axes() 

1193 self._try_convert_types() 

1194 return self.obj 

1195 

1196 def _parse(self) -> None: 

1197 raise AbstractMethodError(self) 

1198 

1199 @final 

1200 def _convert_axes(self) -> None: 

1201 """ 

1202 Try to convert axes. 

1203 """ 

1204 obj = self.obj 

1205 assert obj is not None # for mypy 

1206 for axis_name in obj._AXIS_ORDERS: 

1207 ax = obj._get_axis(axis_name) 

1208 ser = Series(ax, dtype=ax.dtype, copy=False) 

1209 new_ser, result = self._try_convert_data( 

1210 name=axis_name, 

1211 data=ser, 

1212 use_dtypes=False, 

1213 convert_dates=True, 

1214 is_axis=True, 

1215 ) 

1216 if result: 

1217 new_axis = Index(new_ser, dtype=new_ser.dtype, copy=False) 

1218 setattr(self.obj, axis_name, new_axis) 

1219 

1220 def _try_convert_types(self) -> None: 

1221 raise AbstractMethodError(self) 

1222 

1223 @final 

1224 def _try_convert_data( 

1225 self, 

1226 name: Hashable, 

1227 data: Series, 

1228 use_dtypes: bool = True, 

1229 convert_dates: bool | list[str] = True, 

1230 is_axis: bool = False, 

1231 ) -> tuple[Series, bool]: 

1232 """ 

1233 Try to parse a Series into a column by inferring dtype. 

1234 """ 

1235 # don't try to coerce, unless a force conversion 

1236 if use_dtypes: 

1237 if not self.dtype: 

1238 if all(notna(data)): 

1239 return data, False 

1240 

1241 with warnings.catch_warnings(): 

1242 warnings.filterwarnings( 

1243 "ignore", 

1244 "Downcasting object dtype arrays", 

1245 category=FutureWarning, 

1246 ) 

1247 filled = data.fillna(np.nan) 

1248 

1249 return filled, True 

1250 

1251 elif self.dtype is True: 

1252 pass 

1253 else: 

1254 # dtype to force 

1255 dtype = ( 

1256 self.dtype.get(name) if isinstance(self.dtype, dict) else self.dtype 

1257 ) 

1258 if dtype is not None: 

1259 try: 

1260 return data.astype(dtype), True 

1261 except (TypeError, ValueError): 

1262 return data, False 

1263 

1264 if convert_dates: 

1265 new_data, result = self._try_convert_to_date(data) 

1266 if result: 

1267 return new_data, True 

1268 

1269 converted = False 

1270 if self.dtype_backend is not lib.no_default and not is_axis: 

1271 # Fall through for conversion later on 

1272 return data, True 

1273 elif is_string_dtype(data.dtype): 

1274 # try float 

1275 try: 

1276 data = data.astype("float64") 

1277 converted = True 

1278 except (TypeError, ValueError): 

1279 pass 

1280 

1281 if data.dtype.kind == "f" and data.dtype != "float64": 

1282 # coerce floats to 64 

1283 try: 

1284 data = data.astype("float64") 

1285 converted = True 

1286 except (TypeError, ValueError): 

1287 pass 

1288 

1289 # don't coerce 0-len data 

1290 if len(data) and data.dtype in ("float", "object"): 

1291 # coerce ints if we can 

1292 try: 

1293 new_data = data.astype("int64") 

1294 if (new_data == data).all(): 

1295 data = new_data 

1296 converted = True 

1297 except (TypeError, ValueError, OverflowError): 

1298 pass 

1299 

1300 if data.dtype == "int" and data.dtype != "int64": 

1301 # coerce ints to 64 

1302 try: 

1303 data = data.astype("int64") 

1304 converted = True 

1305 except (TypeError, ValueError): 

1306 pass 

1307 

1308 # if we have an index, we want to preserve dtypes 

1309 if name == "index" and len(data): 

1310 if self.orient == "split": 

1311 return data, False 

1312 

1313 return data, converted 

1314 

1315 @final 

1316 def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]: 

1317 """ 

1318 Try to parse a ndarray like into a date column. 

1319 

1320 Try to coerce object in epoch/iso formats and integer/float in epoch 

1321 formats. Return a boolean if parsing was successful. 

1322 """ 

1323 # no conversion on empty 

1324 if not len(data): 

1325 return data, False 

1326 

1327 new_data = data 

1328 

1329 if new_data.dtype == "string": 

1330 new_data = new_data.astype(object) 

1331 

1332 if new_data.dtype == "object": 

1333 try: 

1334 new_data = data.astype("int64") 

1335 except OverflowError: 

1336 return data, False 

1337 except (TypeError, ValueError): 

1338 pass 

1339 

1340 # ignore numbers that are out of range 

1341 if issubclass(new_data.dtype.type, np.number): 

1342 in_range = ( 

1343 isna(new_data._values) 

1344 | (new_data > self.min_stamp) 

1345 | (new_data._values == iNaT) 

1346 ) 

1347 if not in_range.all(): 

1348 return data, False 

1349 

1350 date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS 

1351 for date_unit in date_units: 

1352 try: 

1353 with warnings.catch_warnings(): 

1354 warnings.filterwarnings( 

1355 "ignore", 

1356 ".*parsing datetimes with mixed time " 

1357 "zones will raise an error", 

1358 category=FutureWarning, 

1359 ) 

1360 new_data = to_datetime(new_data, errors="raise", unit=date_unit) 

1361 except (ValueError, OverflowError, TypeError): 

1362 continue 

1363 return new_data, True 

1364 return data, False 

1365 

1366 

1367class SeriesParser(Parser): 

1368 _default_orient = "index" 

1369 _split_keys = ("name", "index", "data") 

1370 obj: Series | None 

1371 

1372 def _parse(self) -> None: 

1373 data = ujson_loads(self.json, precise_float=self.precise_float) 

1374 

1375 if self.orient == "split": 

1376 decoded = {str(k): v for k, v in data.items()} 

1377 self.check_keys_split(decoded) 

1378 self.obj = Series(**decoded) 

1379 else: 

1380 self.obj = Series(data) 

1381 

1382 def _try_convert_types(self) -> None: 

1383 if self.obj is None: 

1384 return 

1385 obj, result = self._try_convert_data( 

1386 "data", self.obj, convert_dates=self.convert_dates 

1387 ) 

1388 if result: 

1389 self.obj = obj 

1390 

1391 

1392class FrameParser(Parser): 

1393 _default_orient = "columns" 

1394 _split_keys = ("columns", "index", "data") 

1395 obj: DataFrame | None 

1396 

1397 def _parse(self) -> None: 

1398 json = self.json 

1399 orient = self.orient 

1400 

1401 if orient == "columns": 

1402 self.obj = DataFrame( 

1403 ujson_loads(json, precise_float=self.precise_float), dtype=None 

1404 ) 

1405 elif orient == "split": 

1406 decoded = { 

1407 str(k): v 

1408 for k, v in ujson_loads(json, precise_float=self.precise_float).items() 

1409 } 

1410 self.check_keys_split(decoded) 

1411 orig_names = [ 

1412 (tuple(col) if isinstance(col, list) else col) 

1413 for col in decoded["columns"] 

1414 ] 

1415 decoded["columns"] = dedup_names( 

1416 orig_names, 

1417 is_potential_multi_index(orig_names, None), 

1418 ) 

1419 self.obj = DataFrame(dtype=None, **decoded) 

1420 elif orient == "index": 

1421 self.obj = DataFrame.from_dict( 

1422 ujson_loads(json, precise_float=self.precise_float), 

1423 dtype=None, 

1424 orient="index", 

1425 ) 

1426 elif orient == "table": 

1427 self.obj = parse_table_schema(json, precise_float=self.precise_float) 

1428 else: 

1429 self.obj = DataFrame( 

1430 ujson_loads(json, precise_float=self.precise_float), dtype=None 

1431 ) 

1432 

1433 def _process_converter( 

1434 self, 

1435 f: Callable[[Hashable, Series], tuple[Series, bool]], 

1436 filt: Callable[[Hashable], bool] | None = None, 

1437 ) -> None: 

1438 """ 

1439 Take a conversion function and possibly recreate the frame. 

1440 """ 

1441 if filt is None: 

1442 filt = lambda col: True 

1443 

1444 obj = self.obj 

1445 assert obj is not None # for mypy 

1446 

1447 needs_new_obj = False 

1448 new_obj = {} 

1449 for i, (col, c) in enumerate(obj.items()): 

1450 if filt(col): 

1451 new_data, result = f(col, c) 

1452 if result: 

1453 c = new_data 

1454 needs_new_obj = True 

1455 new_obj[i] = c 

1456 

1457 if needs_new_obj: 

1458 # possibly handle dup columns 

1459 new_frame = DataFrame(new_obj, index=obj.index) 

1460 new_frame.columns = obj.columns 

1461 self.obj = new_frame 

1462 

1463 def _try_convert_types(self) -> None: 

1464 if self.obj is None: 

1465 return 

1466 if self.convert_dates: 

1467 self._try_convert_dates() 

1468 

1469 self._process_converter( 

1470 lambda col, c: self._try_convert_data(col, c, convert_dates=False) 

1471 ) 

1472 

1473 def _try_convert_dates(self) -> None: 

1474 if self.obj is None: 

1475 return 

1476 

1477 # our columns to parse 

1478 convert_dates_list_bool = self.convert_dates 

1479 if isinstance(convert_dates_list_bool, bool): 

1480 convert_dates_list_bool = [] 

1481 convert_dates = set(convert_dates_list_bool) 

1482 

1483 def is_ok(col) -> bool: 

1484 """ 

1485 Return if this col is ok to try for a date parse. 

1486 """ 

1487 if col in convert_dates: 

1488 return True 

1489 if not self.keep_default_dates: 

1490 return False 

1491 if not isinstance(col, str): 

1492 return False 

1493 

1494 col_lower = col.lower() 

1495 if ( 

1496 col_lower.endswith(("_at", "_time")) 

1497 or col_lower == "modified" 

1498 or col_lower == "date" 

1499 or col_lower == "datetime" 

1500 or col_lower.startswith("timestamp") 

1501 ): 

1502 return True 

1503 return False 

1504 

1505 self._process_converter(lambda col, c: self._try_convert_to_date(c), filt=is_ok)