Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/pickle.py: 43%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

30 statements  

1""" pickle compat """ 

2from __future__ import annotations 

3 

4import pickle 

5from typing import ( 

6 TYPE_CHECKING, 

7 Any, 

8) 

9import warnings 

10 

11from pandas.compat import pickle_compat as pc 

12from pandas.util._decorators import doc 

13 

14from pandas.core.shared_docs import _shared_docs 

15 

16from pandas.io.common import get_handle 

17 

18if TYPE_CHECKING: 

19 from pandas._typing import ( 

20 CompressionOptions, 

21 FilePath, 

22 ReadPickleBuffer, 

23 StorageOptions, 

24 WriteBuffer, 

25 ) 

26 

27 from pandas import ( 

28 DataFrame, 

29 Series, 

30 ) 

31 

32 

33@doc( 

34 storage_options=_shared_docs["storage_options"], 

35 compression_options=_shared_docs["compression_options"] % "filepath_or_buffer", 

36) 

37def to_pickle( 

38 obj: Any, 

39 filepath_or_buffer: FilePath | WriteBuffer[bytes], 

40 compression: CompressionOptions = "infer", 

41 protocol: int = pickle.HIGHEST_PROTOCOL, 

42 storage_options: StorageOptions | None = None, 

43) -> None: 

44 """ 

45 Pickle (serialize) object to file. 

46 

47 Parameters 

48 ---------- 

49 obj : any object 

50 Any python object. 

51 filepath_or_buffer : str, path object, or file-like object 

52 String, path object (implementing ``os.PathLike[str]``), or file-like 

53 object implementing a binary ``write()`` function. 

54 Also accepts URL. URL has to be of S3 or GCS. 

55 {compression_options} 

56 

57 .. versionchanged:: 1.4.0 Zstandard support. 

58 

59 protocol : int 

60 Int which indicates which protocol should be used by the pickler, 

61 default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible 

62 values for this parameter depend on the version of Python. For Python 

63 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value. 

64 For Python >= 3.4, 4 is a valid value. A negative value for the 

65 protocol parameter is equivalent to setting its value to 

66 HIGHEST_PROTOCOL. 

67 

68 {storage_options} 

69 

70 .. [1] https://docs.python.org/3/library/pickle.html 

71 

72 See Also 

73 -------- 

74 read_pickle : Load pickled pandas object (or any object) from file. 

75 DataFrame.to_hdf : Write DataFrame to an HDF5 file. 

76 DataFrame.to_sql : Write DataFrame to a SQL database. 

77 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

78 

79 Examples 

80 -------- 

81 >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP 

82 >>> original_df # doctest: +SKIP 

83 foo bar 

84 0 0 5 

85 1 1 6 

86 2 2 7 

87 3 3 8 

88 4 4 9 

89 >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP 

90 

91 >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP 

92 >>> unpickled_df # doctest: +SKIP 

93 foo bar 

94 0 0 5 

95 1 1 6 

96 2 2 7 

97 3 3 8 

98 4 4 9 

99 """ # noqa: E501 

100 if protocol < 0: 

101 protocol = pickle.HIGHEST_PROTOCOL 

102 

103 with get_handle( 

104 filepath_or_buffer, 

105 "wb", 

106 compression=compression, 

107 is_text=False, 

108 storage_options=storage_options, 

109 ) as handles: 

110 # letting pickle write directly to the buffer is more memory-efficient 

111 pickle.dump(obj, handles.handle, protocol=protocol) 

112 

113 

114@doc( 

115 storage_options=_shared_docs["storage_options"], 

116 decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer", 

117) 

118def read_pickle( 

119 filepath_or_buffer: FilePath | ReadPickleBuffer, 

120 compression: CompressionOptions = "infer", 

121 storage_options: StorageOptions | None = None, 

122) -> DataFrame | Series: 

123 """ 

124 Load pickled pandas object (or any object) from file. 

125 

126 .. warning:: 

127 

128 Loading pickled data received from untrusted sources can be 

129 unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__. 

130 

131 Parameters 

132 ---------- 

133 filepath_or_buffer : str, path object, or file-like object 

134 String, path object (implementing ``os.PathLike[str]``), or file-like 

135 object implementing a binary ``readlines()`` function. 

136 Also accepts URL. URL is not limited to S3 and GCS. 

137 

138 {decompression_options} 

139 

140 .. versionchanged:: 1.4.0 Zstandard support. 

141 

142 {storage_options} 

143 

144 Returns 

145 ------- 

146 same type as object stored in file 

147 

148 See Also 

149 -------- 

150 DataFrame.to_pickle : Pickle (serialize) DataFrame object to file. 

151 Series.to_pickle : Pickle (serialize) Series object to file. 

152 read_hdf : Read HDF5 file into a DataFrame. 

153 read_sql : Read SQL query or database table into a DataFrame. 

154 read_parquet : Load a parquet object, returning a DataFrame. 

155 

156 Notes 

157 ----- 

158 read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3 

159 provided the object was serialized with to_pickle. 

160 

161 Examples 

162 -------- 

163 >>> original_df = pd.DataFrame( 

164 ... {{"foo": range(5), "bar": range(5, 10)}} 

165 ... ) # doctest: +SKIP 

166 >>> original_df # doctest: +SKIP 

167 foo bar 

168 0 0 5 

169 1 1 6 

170 2 2 7 

171 3 3 8 

172 4 4 9 

173 >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP 

174 

175 >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP 

176 >>> unpickled_df # doctest: +SKIP 

177 foo bar 

178 0 0 5 

179 1 1 6 

180 2 2 7 

181 3 3 8 

182 4 4 9 

183 """ 

184 excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError, TypeError) 

185 with get_handle( 

186 filepath_or_buffer, 

187 "rb", 

188 compression=compression, 

189 is_text=False, 

190 storage_options=storage_options, 

191 ) as handles: 

192 # 1) try standard library Pickle 

193 # 2) try pickle_compat (older pandas version) to handle subclass changes 

194 # 3) try pickle_compat with latin-1 encoding upon a UnicodeDecodeError 

195 

196 try: 

197 # TypeError for Cython complaints about object.__new__ vs Tick.__new__ 

198 try: 

199 with warnings.catch_warnings(record=True): 

200 # We want to silence any warnings about, e.g. moved modules. 

201 warnings.simplefilter("ignore", Warning) 

202 return pickle.load(handles.handle) 

203 except excs_to_catch: 

204 # e.g. 

205 # "No module named 'pandas.core.sparse.series'" 

206 # "Can't get attribute '__nat_unpickle' on <module 'pandas._libs.tslib" 

207 return pc.load(handles.handle, encoding=None) 

208 except UnicodeDecodeError: 

209 # e.g. can occur for files written in py27; see GH#28645 and GH#31988 

210 return pc.load(handles.handle, encoding="latin-1")