1""" feather-format compat """
2from __future__ import annotations
3
4from typing import (
5 TYPE_CHECKING,
6 Any,
7)
8
9from pandas._config import using_pyarrow_string_dtype
10
11from pandas._libs import lib
12from pandas.compat._optional import import_optional_dependency
13from pandas.util._decorators import doc
14from pandas.util._validators import check_dtype_backend
15
16import pandas as pd
17from pandas.core.api import DataFrame
18from pandas.core.shared_docs import _shared_docs
19
20from pandas.io._util import arrow_string_types_mapper
21from pandas.io.common import get_handle
22
23if TYPE_CHECKING:
24 from collections.abc import (
25 Hashable,
26 Sequence,
27 )
28
29 from pandas._typing import (
30 DtypeBackend,
31 FilePath,
32 ReadBuffer,
33 StorageOptions,
34 WriteBuffer,
35 )
36
37
38@doc(storage_options=_shared_docs["storage_options"])
39def to_feather(
40 df: DataFrame,
41 path: FilePath | WriteBuffer[bytes],
42 storage_options: StorageOptions | None = None,
43 **kwargs: Any,
44) -> None:
45 """
46 Write a DataFrame to the binary Feather format.
47
48 Parameters
49 ----------
50 df : DataFrame
51 path : str, path object, or file-like object
52 {storage_options}
53 **kwargs :
54 Additional keywords passed to `pyarrow.feather.write_feather`.
55
56 """
57 import_optional_dependency("pyarrow")
58 from pyarrow import feather
59
60 if not isinstance(df, DataFrame):
61 raise ValueError("feather only support IO with DataFrames")
62
63 with get_handle(
64 path, "wb", storage_options=storage_options, is_text=False
65 ) as handles:
66 feather.write_feather(df, handles.handle, **kwargs)
67
68
69@doc(storage_options=_shared_docs["storage_options"])
70def read_feather(
71 path: FilePath | ReadBuffer[bytes],
72 columns: Sequence[Hashable] | None = None,
73 use_threads: bool = True,
74 storage_options: StorageOptions | None = None,
75 dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
76) -> DataFrame:
77 """
78 Load a feather-format object from the file path.
79
80 Parameters
81 ----------
82 path : str, path object, or file-like object
83 String, path object (implementing ``os.PathLike[str]``), or file-like
84 object implementing a binary ``read()`` function. The string could be a URL.
85 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
86 expected. A local file could be: ``file://localhost/path/to/table.feather``.
87 columns : sequence, default None
88 If not provided, all columns are read.
89 use_threads : bool, default True
90 Whether to parallelize reading using multiple threads.
91 {storage_options}
92
93 dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable'
94 Back-end data type applied to the resultant :class:`DataFrame`
95 (still experimental). Behaviour is as follows:
96
97 * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame`
98 (default).
99 * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype`
100 DataFrame.
101
102 .. versionadded:: 2.0
103
104 Returns
105 -------
106 type of object stored in file
107
108 Examples
109 --------
110 >>> df = pd.read_feather("path/to/file.feather") # doctest: +SKIP
111 """
112 import_optional_dependency("pyarrow")
113 from pyarrow import feather
114
115 # import utils to register the pyarrow extension types
116 import pandas.core.arrays.arrow.extension_types # pyright: ignore[reportUnusedImport] # noqa: F401
117
118 check_dtype_backend(dtype_backend)
119
120 with get_handle(
121 path, "rb", storage_options=storage_options, is_text=False
122 ) as handles:
123 if dtype_backend is lib.no_default and not using_pyarrow_string_dtype():
124 return feather.read_feather(
125 handles.handle, columns=columns, use_threads=bool(use_threads)
126 )
127
128 pa_table = feather.read_table(
129 handles.handle, columns=columns, use_threads=bool(use_threads)
130 )
131
132 if dtype_backend == "numpy_nullable":
133 from pandas.io._util import _arrow_dtype_mapping
134
135 return pa_table.to_pandas(types_mapper=_arrow_dtype_mapping().get)
136
137 elif dtype_backend == "pyarrow":
138 return pa_table.to_pandas(types_mapper=pd.ArrowDtype)
139
140 elif using_pyarrow_string_dtype():
141 return pa_table.to_pandas(types_mapper=arrow_string_types_mapper())
142 else:
143 raise NotImplementedError