Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/sas/sasreader.py: 38%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

52 statements  

1""" 

2Read SAS sas7bdat or xport files. 

3""" 

4from __future__ import annotations 

5 

6from abc import ( 

7 ABC, 

8 abstractmethod, 

9) 

10from typing import ( 

11 TYPE_CHECKING, 

12 overload, 

13) 

14 

15from pandas.util._decorators import doc 

16 

17from pandas.core.shared_docs import _shared_docs 

18 

19from pandas.io.common import stringify_path 

20 

21if TYPE_CHECKING: 

22 from collections.abc import Hashable 

23 from types import TracebackType 

24 

25 from pandas._typing import ( 

26 CompressionOptions, 

27 FilePath, 

28 ReadBuffer, 

29 Self, 

30 ) 

31 

32 from pandas import DataFrame 

33 

34 

35class ReaderBase(ABC): 

36 """ 

37 Protocol for XportReader and SAS7BDATReader classes. 

38 """ 

39 

40 @abstractmethod 

41 def read(self, nrows: int | None = None) -> DataFrame: 

42 ... 

43 

44 @abstractmethod 

45 def close(self) -> None: 

46 ... 

47 

48 def __enter__(self) -> Self: 

49 return self 

50 

51 def __exit__( 

52 self, 

53 exc_type: type[BaseException] | None, 

54 exc_value: BaseException | None, 

55 traceback: TracebackType | None, 

56 ) -> None: 

57 self.close() 

58 

59 

60@overload 

61def read_sas( 

62 filepath_or_buffer: FilePath | ReadBuffer[bytes], 

63 *, 

64 format: str | None = ..., 

65 index: Hashable | None = ..., 

66 encoding: str | None = ..., 

67 chunksize: int = ..., 

68 iterator: bool = ..., 

69 compression: CompressionOptions = ..., 

70) -> ReaderBase: 

71 ... 

72 

73 

74@overload 

75def read_sas( 

76 filepath_or_buffer: FilePath | ReadBuffer[bytes], 

77 *, 

78 format: str | None = ..., 

79 index: Hashable | None = ..., 

80 encoding: str | None = ..., 

81 chunksize: None = ..., 

82 iterator: bool = ..., 

83 compression: CompressionOptions = ..., 

84) -> DataFrame | ReaderBase: 

85 ... 

86 

87 

88@doc(decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer") 

89def read_sas( 

90 filepath_or_buffer: FilePath | ReadBuffer[bytes], 

91 *, 

92 format: str | None = None, 

93 index: Hashable | None = None, 

94 encoding: str | None = None, 

95 chunksize: int | None = None, 

96 iterator: bool = False, 

97 compression: CompressionOptions = "infer", 

98) -> DataFrame | ReaderBase: 

99 """ 

100 Read SAS files stored as either XPORT or SAS7BDAT format files. 

101 

102 Parameters 

103 ---------- 

104 filepath_or_buffer : str, path object, or file-like object 

105 String, path object (implementing ``os.PathLike[str]``), or file-like 

106 object implementing a binary ``read()`` function. The string could be a URL. 

107 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is 

108 expected. A local file could be: 

109 ``file://localhost/path/to/table.sas7bdat``. 

110 format : str {{'xport', 'sas7bdat'}} or None 

111 If None, file format is inferred from file extension. If 'xport' or 

112 'sas7bdat', uses the corresponding format. 

113 index : identifier of index column, defaults to None 

114 Identifier of column that should be used as index of the DataFrame. 

115 encoding : str, default is None 

116 Encoding for text data. If None, text data are stored as raw bytes. 

117 chunksize : int 

118 Read file `chunksize` lines at a time, returns iterator. 

119 iterator : bool, defaults to False 

120 If True, returns an iterator for reading the file incrementally. 

121 {decompression_options} 

122 

123 Returns 

124 ------- 

125 DataFrame if iterator=False and chunksize=None, else SAS7BDATReader 

126 or XportReader 

127 

128 Examples 

129 -------- 

130 >>> df = pd.read_sas("sas_data.sas7bdat") # doctest: +SKIP 

131 """ 

132 if format is None: 

133 buffer_error_msg = ( 

134 "If this is a buffer object rather " 

135 "than a string name, you must specify a format string" 

136 ) 

137 filepath_or_buffer = stringify_path(filepath_or_buffer) 

138 if not isinstance(filepath_or_buffer, str): 

139 raise ValueError(buffer_error_msg) 

140 fname = filepath_or_buffer.lower() 

141 if ".xpt" in fname: 

142 format = "xport" 

143 elif ".sas7bdat" in fname: 

144 format = "sas7bdat" 

145 else: 

146 raise ValueError( 

147 f"unable to infer format of SAS file from filename: {repr(fname)}" 

148 ) 

149 

150 reader: ReaderBase 

151 if format.lower() == "xport": 

152 from pandas.io.sas.sas_xport import XportReader 

153 

154 reader = XportReader( 

155 filepath_or_buffer, 

156 index=index, 

157 encoding=encoding, 

158 chunksize=chunksize, 

159 compression=compression, 

160 ) 

161 elif format.lower() == "sas7bdat": 

162 from pandas.io.sas.sas7bdat import SAS7BDATReader 

163 

164 reader = SAS7BDATReader( 

165 filepath_or_buffer, 

166 index=index, 

167 encoding=encoding, 

168 chunksize=chunksize, 

169 compression=compression, 

170 ) 

171 else: 

172 raise ValueError("unknown SAS format") 

173 

174 if iterator or chunksize: 

175 return reader 

176 

177 with reader: 

178 return reader.read()