1"""
2Read SAS sas7bdat or xport files.
3"""
4from __future__ import annotations
5
6from abc import (
7 ABCMeta,
8 abstractmethod,
9)
10from types import TracebackType
11from typing import (
12 TYPE_CHECKING,
13 Hashable,
14 overload,
15)
16
17from pandas._typing import (
18 CompressionOptions,
19 FilePath,
20 ReadBuffer,
21)
22from pandas.util._decorators import doc
23
24from pandas.core.shared_docs import _shared_docs
25
26from pandas.io.common import stringify_path
27
28if TYPE_CHECKING:
29 from pandas import DataFrame
30
31
32# TODO(PY38): replace with Protocol in Python 3.8
33class ReaderBase(metaclass=ABCMeta):
34 """
35 Protocol for XportReader and SAS7BDATReader classes.
36 """
37
38 @abstractmethod
39 def read(self, nrows: int | None = None) -> DataFrame:
40 pass
41
42 @abstractmethod
43 def close(self) -> None:
44 pass
45
46 def __enter__(self) -> ReaderBase:
47 return self
48
49 def __exit__(
50 self,
51 exc_type: type[BaseException] | None,
52 exc_value: BaseException | None,
53 traceback: TracebackType | None,
54 ) -> None:
55 self.close()
56
57
58@overload
59def read_sas(
60 filepath_or_buffer: FilePath | ReadBuffer[bytes],
61 *,
62 format: str | None = ...,
63 index: Hashable | None = ...,
64 encoding: str | None = ...,
65 chunksize: int = ...,
66 iterator: bool = ...,
67 compression: CompressionOptions = ...,
68) -> ReaderBase:
69 ...
70
71
72@overload
73def read_sas(
74 filepath_or_buffer: FilePath | ReadBuffer[bytes],
75 *,
76 format: str | None = ...,
77 index: Hashable | None = ...,
78 encoding: str | None = ...,
79 chunksize: None = ...,
80 iterator: bool = ...,
81 compression: CompressionOptions = ...,
82) -> DataFrame | ReaderBase:
83 ...
84
85
86@doc(decompression_options=_shared_docs["decompression_options"] % "filepath_or_buffer")
87def read_sas(
88 filepath_or_buffer: FilePath | ReadBuffer[bytes],
89 *,
90 format: str | None = None,
91 index: Hashable | None = None,
92 encoding: str | None = None,
93 chunksize: int | None = None,
94 iterator: bool = False,
95 compression: CompressionOptions = "infer",
96) -> DataFrame | ReaderBase:
97 """
98 Read SAS files stored as either XPORT or SAS7BDAT format files.
99
100 Parameters
101 ----------
102 filepath_or_buffer : str, path object, or file-like object
103 String, path object (implementing ``os.PathLike[str]``), or file-like
104 object implementing a binary ``read()`` function. The string could be a URL.
105 Valid URL schemes include http, ftp, s3, and file. For file URLs, a host is
106 expected. A local file could be:
107 ``file://localhost/path/to/table.sas7bdat``.
108 format : str {{'xport', 'sas7bdat'}} or None
109 If None, file format is inferred from file extension. If 'xport' or
110 'sas7bdat', uses the corresponding format.
111 index : identifier of index column, defaults to None
112 Identifier of column that should be used as index of the DataFrame.
113 encoding : str, default is None
114 Encoding for text data. If None, text data are stored as raw bytes.
115 chunksize : int
116 Read file `chunksize` lines at a time, returns iterator.
117
118 .. versionchanged:: 1.2
119
120 ``TextFileReader`` is a context manager.
121 iterator : bool, defaults to False
122 If True, returns an iterator for reading the file incrementally.
123
124 .. versionchanged:: 1.2
125
126 ``TextFileReader`` is a context manager.
127 {decompression_options}
128
129 Returns
130 -------
131 DataFrame if iterator=False and chunksize=None, else SAS7BDATReader
132 or XportReader
133 """
134 if format is None:
135 buffer_error_msg = (
136 "If this is a buffer object rather "
137 "than a string name, you must specify a format string"
138 )
139 filepath_or_buffer = stringify_path(filepath_or_buffer)
140 if not isinstance(filepath_or_buffer, str):
141 raise ValueError(buffer_error_msg)
142 fname = filepath_or_buffer.lower()
143 if ".xpt" in fname:
144 format = "xport"
145 elif ".sas7bdat" in fname:
146 format = "sas7bdat"
147 else:
148 raise ValueError(
149 f"unable to infer format of SAS file from filename: {repr(fname)}"
150 )
151
152 reader: ReaderBase
153 if format.lower() == "xport":
154 from pandas.io.sas.sas_xport import XportReader
155
156 reader = XportReader(
157 filepath_or_buffer,
158 index=index,
159 encoding=encoding,
160 chunksize=chunksize,
161 compression=compression,
162 )
163 elif format.lower() == "sas7bdat":
164 from pandas.io.sas.sas7bdat import SAS7BDATReader
165
166 reader = SAS7BDATReader(
167 filepath_or_buffer,
168 index=index,
169 encoding=encoding,
170 chunksize=chunksize,
171 compression=compression,
172 )
173 else:
174 raise ValueError("unknown SAS format")
175
176 if iterator or chunksize:
177 return reader
178
179 with reader:
180 return reader.read()