Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/io/pickle.py: 46%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

28 statements  

1""" pickle compat """ 

2from __future__ import annotations 

3 

4import pickle 

5from typing import Any 

6import warnings 

7 

8from pandas._typing import ( 

9 CompressionOptions, 

10 FilePath, 

11 ReadPickleBuffer, 

12 StorageOptions, 

13 WriteBuffer, 

14) 

15from pandas.compat import pickle_compat as pc 

16from pandas.util._decorators import doc 

17 

18from pandas.core.shared_docs import _shared_docs 

19 

20from pandas.io.common import get_handle 

21 

22 

23@doc( 

24 storage_options=_shared_docs["storage_options"], 

25 compression_options=_shared_docs["compression_options"] % "filepath_or_buffer", 

26) 

27def to_pickle( 

28 obj: Any, 

29 filepath_or_buffer: FilePath | WriteBuffer[bytes], 

30 compression: CompressionOptions = "infer", 

31 protocol: int = pickle.HIGHEST_PROTOCOL, 

32 storage_options: StorageOptions = None, 

33) -> None: 

34 """ 

35 Pickle (serialize) object to file. 

36 

37 Parameters 

38 ---------- 

39 obj : any object 

40 Any python object. 

41 filepath_or_buffer : str, path object, or file-like object 

42 String, path object (implementing ``os.PathLike[str]``), or file-like 

43 object implementing a binary ``write()`` function. 

44 Also accepts URL. URL has to be of S3 or GCS. 

45 {compression_options} 

46 

47 .. versionchanged:: 1.4.0 Zstandard support. 

48 

49 protocol : int 

50 Int which indicates which protocol should be used by the pickler, 

51 default HIGHEST_PROTOCOL (see [1], paragraph 12.1.2). The possible 

52 values for this parameter depend on the version of Python. For Python 

53 2.x, possible values are 0, 1, 2. For Python>=3.0, 3 is a valid value. 

54 For Python >= 3.4, 4 is a valid value. A negative value for the 

55 protocol parameter is equivalent to setting its value to 

56 HIGHEST_PROTOCOL. 

57 

58 {storage_options} 

59 

60 .. versionadded:: 1.2.0 

61 

62 .. [1] https://docs.python.org/3/library/pickle.html 

63 

64 See Also 

65 -------- 

66 read_pickle : Load pickled pandas object (or any object) from file. 

67 DataFrame.to_hdf : Write DataFrame to an HDF5 file. 

68 DataFrame.to_sql : Write DataFrame to a SQL database. 

69 DataFrame.to_parquet : Write a DataFrame to the binary parquet format. 

70 

71 Examples 

72 -------- 

73 >>> original_df = pd.DataFrame({{"foo": range(5), "bar": range(5, 10)}}) # doctest: +SKIP 

74 >>> original_df # doctest: +SKIP 

75 foo bar 

76 0 0 5 

77 1 1 6 

78 2 2 7 

79 3 3 8 

80 4 4 9 

81 >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP 

82 

83 >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP 

84 >>> unpickled_df # doctest: +SKIP 

85 foo bar 

86 0 0 5 

87 1 1 6 

88 2 2 7 

89 3 3 8 

90 4 4 9 

91 """ # noqa: E501 

92 if protocol < 0: 

93 protocol = pickle.HIGHEST_PROTOCOL 

94 

95 with get_handle( 

96 filepath_or_buffer, 

97 "wb", 

98 compression=compression, 

99 is_text=False, 

100 storage_options=storage_options, 

101 ) as handles: 

102 # letting pickle write directly to the buffer is more memory-efficient 

103 pickle.dump(obj, handles.handle, protocol=protocol) 

104 

105 

106@doc( 

107 storage_options=_shared_docs["storage_options"], 

108 decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer", 

109) 

110def read_pickle( 

111 filepath_or_buffer: FilePath | ReadPickleBuffer, 

112 compression: CompressionOptions = "infer", 

113 storage_options: StorageOptions = None, 

114): 

115 """ 

116 Load pickled pandas object (or any object) from file. 

117 

118 .. warning:: 

119 

120 Loading pickled data received from untrusted sources can be 

121 unsafe. See `here <https://docs.python.org/3/library/pickle.html>`__. 

122 

123 Parameters 

124 ---------- 

125 filepath_or_buffer : str, path object, or file-like object 

126 String, path object (implementing ``os.PathLike[str]``), or file-like 

127 object implementing a binary ``readlines()`` function. 

128 Also accepts URL. URL is not limited to S3 and GCS. 

129 

130 {decompression_options} 

131 

132 .. versionchanged:: 1.4.0 Zstandard support. 

133 

134 {storage_options} 

135 

136 .. versionadded:: 1.2.0 

137 

138 Returns 

139 ------- 

140 same type as object stored in file 

141 

142 See Also 

143 -------- 

144 DataFrame.to_pickle : Pickle (serialize) DataFrame object to file. 

145 Series.to_pickle : Pickle (serialize) Series object to file. 

146 read_hdf : Read HDF5 file into a DataFrame. 

147 read_sql : Read SQL query or database table into a DataFrame. 

148 read_parquet : Load a parquet object, returning a DataFrame. 

149 

150 Notes 

151 ----- 

152 read_pickle is only guaranteed to be backwards compatible to pandas 0.20.3 

153 provided the object was serialized with to_pickle. 

154 

155 Examples 

156 -------- 

157 >>> original_df = pd.DataFrame( 

158 ... {{"foo": range(5), "bar": range(5, 10)}} 

159 ... ) # doctest: +SKIP 

160 >>> original_df # doctest: +SKIP 

161 foo bar 

162 0 0 5 

163 1 1 6 

164 2 2 7 

165 3 3 8 

166 4 4 9 

167 >>> pd.to_pickle(original_df, "./dummy.pkl") # doctest: +SKIP 

168 

169 >>> unpickled_df = pd.read_pickle("./dummy.pkl") # doctest: +SKIP 

170 >>> unpickled_df # doctest: +SKIP 

171 foo bar 

172 0 0 5 

173 1 1 6 

174 2 2 7 

175 3 3 8 

176 4 4 9 

177 """ 

178 excs_to_catch = (AttributeError, ImportError, ModuleNotFoundError, TypeError) 

179 with get_handle( 

180 filepath_or_buffer, 

181 "rb", 

182 compression=compression, 

183 is_text=False, 

184 storage_options=storage_options, 

185 ) as handles: 

186 # 1) try standard library Pickle 

187 # 2) try pickle_compat (older pandas version) to handle subclass changes 

188 # 3) try pickle_compat with latin-1 encoding upon a UnicodeDecodeError 

189 

190 try: 

191 # TypeError for Cython complaints about object.__new__ vs Tick.__new__ 

192 try: 

193 with warnings.catch_warnings(record=True): 

194 # We want to silence any warnings about, e.g. moved modules. 

195 warnings.simplefilter("ignore", Warning) 

196 return pickle.load(handles.handle) 

197 except excs_to_catch: 

198 # e.g. 

199 # "No module named 'pandas.core.sparse.series'" 

200 # "Can't get attribute '__nat_unpickle' on <module 'pandas._libs.tslib" 

201 return pc.load(handles.handle, encoding=None) 

202 except UnicodeDecodeError: 

203 # e.g. can occur for files written in py27; see GH#28645 and GH#31988 

204 return pc.load(handles.handle, encoding="latin-1")