Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/io/feather_format.py: 35%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

43 statements  

1""" feather-format compat """ 

2from __future__ import annotations 

3 

4from typing import ( 

5 Hashable, 

6 Sequence, 

7) 

8 

9from pandas._libs import lib 

10from pandas._typing import ( 

11 DtypeBackend, 

12 FilePath, 

13 ReadBuffer, 

14 StorageOptions, 

15 WriteBuffer, 

16) 

17from pandas.compat._optional import import_optional_dependency 

18from pandas.util._decorators import doc 

19from pandas.util._validators import check_dtype_backend 

20 

21import pandas as pd 

22from pandas.core.api import ( 

23 DataFrame, 

24 RangeIndex, 

25) 

26from pandas.core.shared_docs import _shared_docs 

27 

28from pandas.io.common import get_handle 

29 

30 

31@doc(storage_options=_shared_docs["storage_options"]) 

32def to_feather( 

33 df: DataFrame, 

34 path: FilePath | WriteBuffer[bytes], 

35 storage_options: StorageOptions = None, 

36 **kwargs, 

37) -> None: 

38 """ 

39 Write a DataFrame to the binary Feather format. 

40 

41 Parameters 

42 ---------- 

43 df : DataFrame 

44 path : str, path object, or file-like object 

45 {storage_options} 

46 

47 .. versionadded:: 1.2.0 

48 

49 **kwargs : 

50 Additional keywords passed to `pyarrow.feather.write_feather`. 

51 

52 .. versionadded:: 1.1.0 

53 """ 

54 import_optional_dependency("pyarrow") 

55 from pyarrow import feather 

56 

57 if not isinstance(df, DataFrame): 

58 raise ValueError("feather only support IO with DataFrames") 

59 

60 valid_types = {"string", "unicode"} 

61 

62 # validate index 

63 # -------------- 

64 

65 # validate that we have only a default index 

66 # raise on anything else as we don't serialize the index 

67 

68 if not df.index.dtype == "int64": 

69 typ = type(df.index) 

70 raise ValueError( 

71 f"feather does not support serializing {typ} " 

72 "for the index; you can .reset_index() to make the index into column(s)" 

73 ) 

74 

75 if not df.index.equals(RangeIndex.from_range(range(len(df)))): 

76 raise ValueError( 

77 "feather does not support serializing a non-default index for the index; " 

78 "you can .reset_index() to make the index into column(s)" 

79 ) 

80 

81 if df.index.name is not None: 

82 raise ValueError( 

83 "feather does not serialize index meta-data on a default index" 

84 ) 

85 

86 # validate columns 

87 # ---------------- 

88 

89 # must have value column names (strings only) 

90 if df.columns.inferred_type not in valid_types: 

91 raise ValueError("feather must have string column names") 

92 

93 with get_handle( 

94 path, "wb", storage_options=storage_options, is_text=False 

95 ) as handles: 

96 feather.write_feather(df, handles.handle, **kwargs) 

97 

98 

99@doc(storage_options=_shared_docs["storage_options"]) 

100def read_feather( 

101 path: FilePath | ReadBuffer[bytes], 

102 columns: Sequence[Hashable] | None = None, 

103 use_threads: bool = True, 

104 storage_options: StorageOptions = None, 

105 dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, 

106): 

107 """ 

108 Load a feather-format object from the file path. 

109 

110 Parameters 

111 ---------- 

112 path : str, path object, or file-like object 

113 String, path object (implementing ``os.PathLike[str]``), or file-like 

114 object implementing a binary ``read()`` function. The string could be a URL. 

115 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is 

116 expected. A local file could be: ``file://localhost/path/to/table.feather``. 

117 columns : sequence, default None 

118 If not provided, all columns are read. 

119 use_threads : bool, default True 

120 Whether to parallelize reading using multiple threads. 

121 {storage_options} 

122 

123 .. versionadded:: 1.2.0 

124 

125 dtype_backend : {{"numpy_nullable", "pyarrow"}}, defaults to NumPy backed DataFrames 

126 Which dtype_backend to use, e.g. whether a DataFrame should have NumPy 

127 arrays, nullable dtypes are used for all dtypes that have a nullable 

128 implementation when "numpy_nullable" is set, pyarrow is used for all 

129 dtypes if "pyarrow" is set. 

130 

131 The dtype_backends are still experimential. 

132 

133 .. versionadded:: 2.0 

134 

135 Returns 

136 ------- 

137 type of object stored in file 

138 """ 

139 import_optional_dependency("pyarrow") 

140 from pyarrow import feather 

141 

142 check_dtype_backend(dtype_backend) 

143 

144 with get_handle( 

145 path, "rb", storage_options=storage_options, is_text=False 

146 ) as handles: 

147 if dtype_backend is lib.no_default: 

148 return feather.read_feather( 

149 handles.handle, columns=columns, use_threads=bool(use_threads) 

150 ) 

151 

152 pa_table = feather.read_table( 

153 handles.handle, columns=columns, use_threads=bool(use_threads) 

154 ) 

155 

156 if dtype_backend == "numpy_nullable": 

157 from pandas.io._util import _arrow_dtype_mapping 

158 

159 return pa_table.to_pandas(types_mapper=_arrow_dtype_mapping().get) 

160 

161 elif dtype_backend == "pyarrow": 

162 return pa_table.to_pandas(types_mapper=pd.ArrowDtype)