Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/feather_format.py: 41%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

41 statements  

1""" feather-format compat """ 

2from __future__ import annotations 

3 

4from typing import ( 

5 TYPE_CHECKING, 

6 Any, 

7) 

8 

9from pandas._config import using_pyarrow_string_dtype 

10 

11from pandas._libs import lib 

12from pandas.compat._optional import import_optional_dependency 

13from pandas.util._decorators import doc 

14from pandas.util._validators import check_dtype_backend 

15 

16import pandas as pd 

17from pandas.core.api import DataFrame 

18from pandas.core.shared_docs import _shared_docs 

19 

20from pandas.io._util import arrow_string_types_mapper 

21from pandas.io.common import get_handle 

22 

23if TYPE_CHECKING: 

24 from collections.abc import ( 

25 Hashable, 

26 Sequence, 

27 ) 

28 

29 from pandas._typing import ( 

30 DtypeBackend, 

31 FilePath, 

32 ReadBuffer, 

33 StorageOptions, 

34 WriteBuffer, 

35 ) 

36 

37 

38@doc(storage_options=_shared_docs["storage_options"]) 

39def to_feather( 

40 df: DataFrame, 

41 path: FilePath | WriteBuffer[bytes], 

42 storage_options: StorageOptions | None = None, 

43 **kwargs: Any, 

44) -> None: 

45 """ 

46 Write a DataFrame to the binary Feather format. 

47 

48 Parameters 

49 ---------- 

50 df : DataFrame 

51 path : str, path object, or file-like object 

52 {storage_options} 

53 **kwargs : 

54 Additional keywords passed to `pyarrow.feather.write_feather`. 

55 

56 """ 

57 import_optional_dependency("pyarrow") 

58 from pyarrow import feather 

59 

60 if not isinstance(df, DataFrame): 

61 raise ValueError("feather only support IO with DataFrames") 

62 

63 with get_handle( 

64 path, "wb", storage_options=storage_options, is_text=False 

65 ) as handles: 

66 feather.write_feather(df, handles.handle, **kwargs) 

67 

68 

69@doc(storage_options=_shared_docs["storage_options"]) 

70def read_feather( 

71 path: FilePath | ReadBuffer[bytes], 

72 columns: Sequence[Hashable] | None = None, 

73 use_threads: bool = True, 

74 storage_options: StorageOptions | None = None, 

75 dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default, 

76) -> DataFrame: 

77 """ 

78 Load a feather-format object from the file path. 

79 

80 Parameters 

81 ---------- 

82 path : str, path object, or file-like object 

83 String, path object (implementing ``os.PathLike[str]``), or file-like 

84 object implementing a binary ``read()`` function. The string could be a URL. 

85 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is 

86 expected. A local file could be: ``file://localhost/path/to/table.feather``. 

87 columns : sequence, default None 

88 If not provided, all columns are read. 

89 use_threads : bool, default True 

90 Whether to parallelize reading using multiple threads. 

91 {storage_options} 

92 

93 dtype_backend : {{'numpy_nullable', 'pyarrow'}}, default 'numpy_nullable' 

94 Back-end data type applied to the resultant :class:`DataFrame` 

95 (still experimental). Behaviour is as follows: 

96 

97 * ``"numpy_nullable"``: returns nullable-dtype-backed :class:`DataFrame` 

98 (default). 

99 * ``"pyarrow"``: returns pyarrow-backed nullable :class:`ArrowDtype` 

100 DataFrame. 

101 

102 .. versionadded:: 2.0 

103 

104 Returns 

105 ------- 

106 type of object stored in file 

107 

108 Examples 

109 -------- 

110 >>> df = pd.read_feather("path/to/file.feather") # doctest: +SKIP 

111 """ 

112 import_optional_dependency("pyarrow") 

113 from pyarrow import feather 

114 

115 # import utils to register the pyarrow extension types 

116 import pandas.core.arrays.arrow.extension_types # pyright: ignore[reportUnusedImport] # noqa: F401 

117 

118 check_dtype_backend(dtype_backend) 

119 

120 with get_handle( 

121 path, "rb", storage_options=storage_options, is_text=False 

122 ) as handles: 

123 if dtype_backend is lib.no_default and not using_pyarrow_string_dtype(): 

124 return feather.read_feather( 

125 handles.handle, columns=columns, use_threads=bool(use_threads) 

126 ) 

127 

128 pa_table = feather.read_table( 

129 handles.handle, columns=columns, use_threads=bool(use_threads) 

130 ) 

131 

132 if dtype_backend == "numpy_nullable": 

133 from pandas.io._util import _arrow_dtype_mapping 

134 

135 return pa_table.to_pandas(types_mapper=_arrow_dtype_mapping().get) 

136 

137 elif dtype_backend == "pyarrow": 

138 return pa_table.to_pandas(types_mapper=pd.ArrowDtype) 

139 

140 elif using_pyarrow_string_dtype(): 

141 return pa_table.to_pandas(types_mapper=arrow_string_types_mapper()) 

142 else: 

143 raise NotImplementedError