1from __future__ import annotations
2
3from typing import (
4 TYPE_CHECKING,
5 Any,
6 Callable,
7 Hashable,
8 Iterable,
9 Literal,
10 MutableMapping,
11 Sequence,
12 TypeVar,
13 overload,
14)
15
16from pandas.compat._optional import import_optional_dependency
17
18from pandas.core.dtypes.common import (
19 is_integer,
20 is_list_like,
21)
22
23if TYPE_CHECKING:
24 from pandas.io.excel._base import ExcelWriter
25
26 ExcelWriter_t = type[ExcelWriter]
27 usecols_func = TypeVar("usecols_func", bound=Callable[[Hashable], object])
28
29_writers: MutableMapping[str, ExcelWriter_t] = {}
30
31
32def register_writer(klass: ExcelWriter_t) -> None:
33 """
34 Add engine to the excel writer registry.io.excel.
35
36 You must use this method to integrate with ``to_excel``.
37
38 Parameters
39 ----------
40 klass : ExcelWriter
41 """
42 if not callable(klass):
43 raise ValueError("Can only register callables as engines")
44 engine_name = klass._engine
45 _writers[engine_name] = klass
46
47
48def get_default_engine(ext: str, mode: Literal["reader", "writer"] = "reader") -> str:
49 """
50 Return the default reader/writer for the given extension.
51
52 Parameters
53 ----------
54 ext : str
55 The excel file extension for which to get the default engine.
56 mode : str {'reader', 'writer'}
57 Whether to get the default engine for reading or writing.
58 Either 'reader' or 'writer'
59
60 Returns
61 -------
62 str
63 The default engine for the extension.
64 """
65 _default_readers = {
66 "xlsx": "openpyxl",
67 "xlsm": "openpyxl",
68 "xlsb": "pyxlsb",
69 "xls": "xlrd",
70 "ods": "odf",
71 }
72 _default_writers = {
73 "xlsx": "openpyxl",
74 "xlsm": "openpyxl",
75 "xlsb": "pyxlsb",
76 "ods": "odf",
77 }
78 assert mode in ["reader", "writer"]
79 if mode == "writer":
80 # Prefer xlsxwriter over openpyxl if installed
81 xlsxwriter = import_optional_dependency("xlsxwriter", errors="warn")
82 if xlsxwriter:
83 _default_writers["xlsx"] = "xlsxwriter"
84 return _default_writers[ext]
85 else:
86 return _default_readers[ext]
87
88
89def get_writer(engine_name: str) -> ExcelWriter_t:
90 try:
91 return _writers[engine_name]
92 except KeyError as err:
93 raise ValueError(f"No Excel writer '{engine_name}'") from err
94
95
96def _excel2num(x: str) -> int:
97 """
98 Convert Excel column name like 'AB' to 0-based column index.
99
100 Parameters
101 ----------
102 x : str
103 The Excel column name to convert to a 0-based column index.
104
105 Returns
106 -------
107 num : int
108 The column index corresponding to the name.
109
110 Raises
111 ------
112 ValueError
113 Part of the Excel column name was invalid.
114 """
115 index = 0
116
117 for c in x.upper().strip():
118 cp = ord(c)
119
120 if cp < ord("A") or cp > ord("Z"):
121 raise ValueError(f"Invalid column name: {x}")
122
123 index = index * 26 + cp - ord("A") + 1
124
125 return index - 1
126
127
128def _range2cols(areas: str) -> list[int]:
129 """
130 Convert comma separated list of column names and ranges to indices.
131
132 Parameters
133 ----------
134 areas : str
135 A string containing a sequence of column ranges (or areas).
136
137 Returns
138 -------
139 cols : list
140 A list of 0-based column indices.
141
142 Examples
143 --------
144 >>> _range2cols('A:E')
145 [0, 1, 2, 3, 4]
146 >>> _range2cols('A,C,Z:AB')
147 [0, 2, 25, 26, 27]
148 """
149 cols: list[int] = []
150
151 for rng in areas.split(","):
152 if ":" in rng:
153 rngs = rng.split(":")
154 cols.extend(range(_excel2num(rngs[0]), _excel2num(rngs[1]) + 1))
155 else:
156 cols.append(_excel2num(rng))
157
158 return cols
159
160
161@overload
162def maybe_convert_usecols(usecols: str | list[int]) -> list[int]:
163 ...
164
165
166@overload
167def maybe_convert_usecols(usecols: list[str]) -> list[str]:
168 ...
169
170
171@overload
172def maybe_convert_usecols(usecols: usecols_func) -> usecols_func:
173 ...
174
175
176@overload
177def maybe_convert_usecols(usecols: None) -> None:
178 ...
179
180
181def maybe_convert_usecols(
182 usecols: str | list[int] | list[str] | usecols_func | None,
183) -> None | list[int] | list[str] | usecols_func:
184 """
185 Convert `usecols` into a compatible format for parsing in `parsers.py`.
186
187 Parameters
188 ----------
189 usecols : object
190 The use-columns object to potentially convert.
191
192 Returns
193 -------
194 converted : object
195 The compatible format of `usecols`.
196 """
197 if usecols is None:
198 return usecols
199
200 if is_integer(usecols):
201 raise ValueError(
202 "Passing an integer for `usecols` is no longer supported. "
203 "Please pass in a list of int from 0 to `usecols` inclusive instead."
204 )
205
206 if isinstance(usecols, str):
207 return _range2cols(usecols)
208
209 return usecols
210
211
212@overload
213def validate_freeze_panes(freeze_panes: tuple[int, int]) -> Literal[True]:
214 ...
215
216
217@overload
218def validate_freeze_panes(freeze_panes: None) -> Literal[False]:
219 ...
220
221
222def validate_freeze_panes(freeze_panes: tuple[int, int] | None) -> bool:
223 if freeze_panes is not None:
224 if len(freeze_panes) == 2 and all(
225 isinstance(item, int) for item in freeze_panes
226 ):
227 return True
228
229 raise ValueError(
230 "freeze_panes must be of form (row, column) "
231 "where row and column are integers"
232 )
233
234 # freeze_panes wasn't specified, return False so it won't be applied
235 # to output sheet
236 return False
237
238
239def fill_mi_header(
240 row: list[Hashable], control_row: list[bool]
241) -> tuple[list[Hashable], list[bool]]:
242 """
243 Forward fill blank entries in row but only inside the same parent index.
244
245 Used for creating headers in Multiindex.
246
247 Parameters
248 ----------
249 row : list
250 List of items in a single row.
251 control_row : list of bool
252 Helps to determine if particular column is in same parent index as the
253 previous value. Used to stop propagation of empty cells between
254 different indexes.
255
256 Returns
257 -------
258 Returns changed row and control_row
259 """
260 last = row[0]
261 for i in range(1, len(row)):
262 if not control_row[i]:
263 last = row[i]
264
265 if row[i] == "" or row[i] is None:
266 row[i] = last
267 else:
268 control_row[i] = False
269 last = row[i]
270
271 return row, control_row
272
273
274def pop_header_name(
275 row: list[Hashable], index_col: int | Sequence[int]
276) -> tuple[Hashable | None, list[Hashable]]:
277 """
278 Pop the header name for MultiIndex parsing.
279
280 Parameters
281 ----------
282 row : list
283 The data row to parse for the header name.
284 index_col : int, list
285 The index columns for our data. Assumed to be non-null.
286
287 Returns
288 -------
289 header_name : str
290 The extracted header name.
291 trimmed_row : list
292 The original data row with the header name removed.
293 """
294 # Pop out header name and fill w/blank.
295 if is_list_like(index_col):
296 assert isinstance(index_col, Iterable)
297 i = max(index_col)
298 else:
299 assert not isinstance(index_col, Iterable)
300 i = index_col
301
302 header_name = row[i]
303 header_name = None if header_name == "" else header_name
304
305 return header_name, row[:i] + [""] + row[i + 1 :]
306
307
308def combine_kwargs(engine_kwargs: dict[str, Any] | None, kwargs: dict) -> dict:
309 """
310 Used to combine two sources of kwargs for the backend engine.
311
312 Use of kwargs is deprecated, this function is solely for use in 1.3 and should
313 be removed in 1.4/2.0. Also _base.ExcelWriter.__new__ ensures either engine_kwargs
314 or kwargs must be None or empty respectively.
315
316 Parameters
317 ----------
318 engine_kwargs: dict
319 kwargs to be passed through to the engine.
320 kwargs: dict
321 kwargs to be psased through to the engine (deprecated)
322
323 Returns
324 -------
325 engine_kwargs combined with kwargs
326 """
327 if engine_kwargs is None:
328 result = {}
329 else:
330 result = engine_kwargs.copy()
331 result.update(kwargs)
332 return result