Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/io/sas/sasreader.py: 43%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

51 statements  

1""" 

2Read SAS sas7bdat or xport files. 

3""" 

4from __future__ import annotations 

5 

6from abc import ( 

7 ABCMeta, 

8 abstractmethod, 

9) 

10from types import TracebackType 

11from typing import ( 

12 TYPE_CHECKING, 

13 Hashable, 

14 overload, 

15) 

16 

17from pandas._typing import ( 

18 CompressionOptions, 

19 FilePath, 

20 ReadBuffer, 

21) 

22from pandas.util._decorators import doc 

23 

24from pandas.core.shared_docs import _shared_docs 

25 

26from pandas.io.common import stringify_path 

27 

28if TYPE_CHECKING: 

29 from pandas import DataFrame 

30 

31 

32# TODO(PY38): replace with Protocol in Python 3.8 

33class ReaderBase(metaclass=ABCMeta): 

34 """ 

35 Protocol for XportReader and SAS7BDATReader classes. 

36 """ 

37 

38 @abstractmethod 

39 def read(self, nrows: int | None = None) -> DataFrame: 

40 pass 

41 

42 @abstractmethod 

43 def close(self) -> None: 

44 pass 

45 

46 def __enter__(self) -> ReaderBase: 

47 return self 

48 

49 def __exit__( 

50 self, 

51 exc_type: type[BaseException] | None, 

52 exc_value: BaseException | None, 

53 traceback: TracebackType | None, 

54 ) -> None: 

55 self.close() 

56 

57 

58@overload 

59def read_sas( 

60 filepath_or_buffer: FilePath | ReadBuffer[bytes], 

61 *, 

62 format: str | None = ..., 

63 index: Hashable | None = ..., 

64 encoding: str | None = ..., 

65 chunksize: int = ..., 

66 iterator: bool = ..., 

67 compression: CompressionOptions = ..., 

68) -> ReaderBase: 

69 ... 

70 

71 

72@overload 

73def read_sas( 

74 filepath_or_buffer: FilePath | ReadBuffer[bytes], 

75 *, 

76 format: str | None = ..., 

77 index: Hashable | None = ..., 

78 encoding: str | None = ..., 

79 chunksize: None = ..., 

80 iterator: bool = ..., 

81 compression: CompressionOptions = ..., 

82) -> DataFrame | ReaderBase: 

83 ... 

84 

85 

86@doc(decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer") 

87def read_sas( 

88 filepath_or_buffer: FilePath | ReadBuffer[bytes], 

89 *, 

90 format: str | None = None, 

91 index: Hashable | None = None, 

92 encoding: str | None = None, 

93 chunksize: int | None = None, 

94 iterator: bool = False, 

95 compression: CompressionOptions = "infer", 

96) -> DataFrame | ReaderBase: 

97 """ 

98 Read SAS files stored as either XPORT or SAS7BDAT format files. 

99 

100 Parameters 

101 ---------- 

102 filepath_or_buffer : str, path object, or file-like object 

103 String, path object (implementing ``os.PathLike[str]``), or file-like 

104 object implementing a binary ``read()`` function. The string could be a URL. 

105 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is 

106 expected. A local file could be: 

107 ``file://localhost/path/to/table.sas7bdat``. 

108 format : str {{'xport', 'sas7bdat'}} or None 

109 If None, file format is inferred from file extension. If 'xport' or 

110 'sas7bdat', uses the corresponding format. 

111 index : identifier of index column, defaults to None 

112 Identifier of column that should be used as index of the DataFrame. 

113 encoding : str, default is None 

114 Encoding for text data. If None, text data are stored as raw bytes. 

115 chunksize : int 

116 Read file `chunksize` lines at a time, returns iterator. 

117 

118 .. versionchanged:: 1.2 

119 

120 ``TextFileReader`` is a context manager. 

121 iterator : bool, defaults to False 

122 If True, returns an iterator for reading the file incrementally. 

123 

124 .. versionchanged:: 1.2 

125 

126 ``TextFileReader`` is a context manager. 

127 {decompression_options} 

128 

129 Returns 

130 ------- 

131 DataFrame if iterator=False and chunksize=None, else SAS7BDATReader 

132 or XportReader 

133 """ 

134 if format is None: 

135 buffer_error_msg = ( 

136 "If this is a buffer object rather " 

137 "than a string name, you must specify a format string" 

138 ) 

139 filepath_or_buffer = stringify_path(filepath_or_buffer) 

140 if not isinstance(filepath_or_buffer, str): 

141 raise ValueError(buffer_error_msg) 

142 fname = filepath_or_buffer.lower() 

143 if ".xpt" in fname: 

144 format = "xport" 

145 elif ".sas7bdat" in fname: 

146 format = "sas7bdat" 

147 else: 

148 raise ValueError( 

149 f"unable to infer format of SAS file from filename: {repr(fname)}" 

150 ) 

151 

152 reader: ReaderBase 

153 if format.lower() == "xport": 

154 from pandas.io.sas.sas_xport import XportReader 

155 

156 reader = XportReader( 

157 filepath_or_buffer, 

158 index=index, 

159 encoding=encoding, 

160 chunksize=chunksize, 

161 compression=compression, 

162 ) 

163 elif format.lower() == "sas7bdat": 

164 from pandas.io.sas.sas7bdat import SAS7BDATReader 

165 

166 reader = SAS7BDATReader( 

167 filepath_or_buffer, 

168 index=index, 

169 encoding=encoding, 

170 chunksize=chunksize, 

171 compression=compression, 

172 ) 

173 else: 

174 raise ValueError("unknown SAS format") 

175 

176 if iterator or chunksize: 

177 return reader 

178 

179 with reader: 

180 return reader.read()