1""" feather-format compat """
2from __future__ import annotations
3
4from typing import (
5 Hashable,
6 Sequence,
7)
8
9from pandas._libs import lib
10from pandas._typing import (
11 DtypeBackend,
12 FilePath,
13 ReadBuffer,
14 StorageOptions,
15 WriteBuffer,
16)
17from pandas.compat._optional import import_optional_dependency
18from pandas.util._decorators import doc
19from pandas.util._validators import check_dtype_backend
20
21import pandas as pd
22from pandas.core.api import (
23 DataFrame,
24 RangeIndex,
25)
26from pandas.core.shared_docs import _shared_docs
27
28from pandas.io.common import get_handle
29
30
31@doc(storage_options=_shared_docs["storage_options"])
32def to_feather(
33 df: DataFrame,
34 path: FilePath | WriteBuffer[bytes],
35 storage_options: StorageOptions = None,
36 **kwargs,
37) -> None:
38 """
39 Write a DataFrame to the binary Feather format.
40
41 Parameters
42 ----------
43 df : DataFrame
44 path : str, path object, or file-like object
45 {storage_options}
46
47 .. versionadded:: 1.2.0
48
49 **kwargs :
50 Additional keywords passed to `pyarrow.feather.write_feather`.
51
52 .. versionadded:: 1.1.0
53 """
54 import_optional_dependency("pyarrow")
55 from pyarrow import feather
56
57 if not isinstance(df, DataFrame):
58 raise ValueError("feather only support IO with DataFrames")
59
60 valid_types = {"string", "unicode"}
61
62 # validate index
63 # --------------
64
65 # validate that we have only a default index
66 # raise on anything else as we don't serialize the index
67
68 if not df.index.dtype == "int64":
69 typ = type(df.index)
70 raise ValueError(
71 f"feather does not support serializing {typ} "
72 "for the index; you can .reset_index() to make the index into column(s)"
73 )
74
75 if not df.index.equals(RangeIndex.from_range(range(len(df)))):
76 raise ValueError(
77 "feather does not support serializing a non-default index for the index; "
78 "you can .reset_index() to make the index into column(s)"
79 )
80
81 if df.index.name is not None:
82 raise ValueError(
83 "feather does not serialize index meta-data on a default index"
84 )
85
86 # validate columns
87 # ----------------
88
89 # must have value column names (strings only)
90 if df.columns.inferred_type not in valid_types:
91 raise ValueError("feather must have string column names")
92
93 with get_handle(
94 path, "wb", storage_options=storage_options, is_text=False
95 ) as handles:
96 feather.write_feather(df, handles.handle, **kwargs)
97
98
99@doc(storage_options=_shared_docs["storage_options"])
100def read_feather(
101 path: FilePath | ReadBuffer[bytes],
102 columns: Sequence[Hashable] | None = None,
103 use_threads: bool = True,
104 storage_options: StorageOptions = None,
105 dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
106):
107 """
108 Load a feather-format object from the file path.
109
110 Parameters
111 ----------
112 path : str, path object, or file-like object
113 String, path object (implementing ``os.PathLike[str]``), or file-like
114 object implementing a binary ``read()`` function. The string could be a URL.
115 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
116 expected. A local file could be: ``file://localhost/path/to/table.feather``.
117 columns : sequence, default None
118 If not provided, all columns are read.
119 use_threads : bool, default True
120 Whether to parallelize reading using multiple threads.
121 {storage_options}
122
123 .. versionadded:: 1.2.0
124
125 dtype_backend : {{"numpy_nullable", "pyarrow"}}, defaults to NumPy backed DataFrames
126 Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
127 arrays, nullable dtypes are used for all dtypes that have a nullable
128 implementation when "numpy_nullable" is set, pyarrow is used for all
129 dtypes if "pyarrow" is set.
130
131 The dtype_backends are still experimential.
132
133 .. versionadded:: 2.0
134
135 Returns
136 -------
137 type of object stored in file
138 """
139 import_optional_dependency("pyarrow")
140 from pyarrow import feather
141
142 check_dtype_backend(dtype_backend)
143
144 with get_handle(
145 path, "rb", storage_options=storage_options, is_text=False
146 ) as handles:
147 if dtype_backend is lib.no_default:
148 return feather.read_feather(
149 handles.handle, columns=columns, use_threads=bool(use_threads)
150 )
151
152 pa_table = feather.read_table(
153 handles.handle, columns=columns, use_threads=bool(use_threads)
154 )
155
156 if dtype_backend == "numpy_nullable":
157 from pandas.io._util import _arrow_dtype_mapping
158
159 return pa_table.to_pandas(types_mapper=_arrow_dtype_mapping().get)
160
161 elif dtype_backend == "pyarrow":
162 return pa_table.to_pandas(types_mapper=pd.ArrowDtype)