1from __future__ import annotations
2
3from collections.abc import (
4 Hashable,
5 Iterable,
6 MutableMapping,
7 Sequence,
8)
9from typing import (
10 TYPE_CHECKING,
11 Any,
12 Callable,
13 Literal,
14 TypeVar,
15 overload,
16)
17
18from pandas.compat._optional import import_optional_dependency
19
20from pandas.core.dtypes.common import (
21 is_integer,
22 is_list_like,
23)
24
25if TYPE_CHECKING:
26 from pandas.io.excel._base import ExcelWriter
27
28 ExcelWriter_t = type[ExcelWriter]
29 usecols_func = TypeVar("usecols_func", bound=Callable[[Hashable], object])
30
31_writers: MutableMapping[str, ExcelWriter_t] = {}
32
33
34def register_writer(klass: ExcelWriter_t) -> None:
35 """
36 Add engine to the excel writer registry.io.excel.
37
38 You must use this method to integrate with ``to_excel``.
39
40 Parameters
41 ----------
42 klass : ExcelWriter
43 """
44 if not callable(klass):
45 raise ValueError("Can only register callables as engines")
46 engine_name = klass._engine
47 _writers[engine_name] = klass
48
49
50def get_default_engine(ext: str, mode: Literal["reader", "writer"] = "reader") -> str:
51 """
52 Return the default reader/writer for the given extension.
53
54 Parameters
55 ----------
56 ext : str
57 The excel file extension for which to get the default engine.
58 mode : str {'reader', 'writer'}
59 Whether to get the default engine for reading or writing.
60 Either 'reader' or 'writer'
61
62 Returns
63 -------
64 str
65 The default engine for the extension.
66 """
67 _default_readers = {
68 "xlsx": "openpyxl",
69 "xlsm": "openpyxl",
70 "xlsb": "pyxlsb",
71 "xls": "xlrd",
72 "ods": "odf",
73 }
74 _default_writers = {
75 "xlsx": "openpyxl",
76 "xlsm": "openpyxl",
77 "xlsb": "pyxlsb",
78 "ods": "odf",
79 }
80 assert mode in ["reader", "writer"]
81 if mode == "writer":
82 # Prefer xlsxwriter over openpyxl if installed
83 xlsxwriter = import_optional_dependency("xlsxwriter", errors="warn")
84 if xlsxwriter:
85 _default_writers["xlsx"] = "xlsxwriter"
86 return _default_writers[ext]
87 else:
88 return _default_readers[ext]
89
90
91def get_writer(engine_name: str) -> ExcelWriter_t:
92 try:
93 return _writers[engine_name]
94 except KeyError as err:
95 raise ValueError(f"No Excel writer '{engine_name}'") from err
96
97
98def _excel2num(x: str) -> int:
99 """
100 Convert Excel column name like 'AB' to 0-based column index.
101
102 Parameters
103 ----------
104 x : str
105 The Excel column name to convert to a 0-based column index.
106
107 Returns
108 -------
109 num : int
110 The column index corresponding to the name.
111
112 Raises
113 ------
114 ValueError
115 Part of the Excel column name was invalid.
116 """
117 index = 0
118
119 for c in x.upper().strip():
120 cp = ord(c)
121
122 if cp < ord("A") or cp > ord("Z"):
123 raise ValueError(f"Invalid column name: {x}")
124
125 index = index * 26 + cp - ord("A") + 1
126
127 return index - 1
128
129
130def _range2cols(areas: str) -> list[int]:
131 """
132 Convert comma separated list of column names and ranges to indices.
133
134 Parameters
135 ----------
136 areas : str
137 A string containing a sequence of column ranges (or areas).
138
139 Returns
140 -------
141 cols : list
142 A list of 0-based column indices.
143
144 Examples
145 --------
146 >>> _range2cols('A:E')
147 [0, 1, 2, 3, 4]
148 >>> _range2cols('A,C,Z:AB')
149 [0, 2, 25, 26, 27]
150 """
151 cols: list[int] = []
152
153 for rng in areas.split(","):
154 if ":" in rng:
155 rngs = rng.split(":")
156 cols.extend(range(_excel2num(rngs[0]), _excel2num(rngs[1]) + 1))
157 else:
158 cols.append(_excel2num(rng))
159
160 return cols
161
162
163@overload
164def maybe_convert_usecols(usecols: str | list[int]) -> list[int]:
165 ...
166
167
168@overload
169def maybe_convert_usecols(usecols: list[str]) -> list[str]:
170 ...
171
172
173@overload
174def maybe_convert_usecols(usecols: usecols_func) -> usecols_func:
175 ...
176
177
178@overload
179def maybe_convert_usecols(usecols: None) -> None:
180 ...
181
182
183def maybe_convert_usecols(
184 usecols: str | list[int] | list[str] | usecols_func | None,
185) -> None | list[int] | list[str] | usecols_func:
186 """
187 Convert `usecols` into a compatible format for parsing in `parsers.py`.
188
189 Parameters
190 ----------
191 usecols : object
192 The use-columns object to potentially convert.
193
194 Returns
195 -------
196 converted : object
197 The compatible format of `usecols`.
198 """
199 if usecols is None:
200 return usecols
201
202 if is_integer(usecols):
203 raise ValueError(
204 "Passing an integer for `usecols` is no longer supported. "
205 "Please pass in a list of int from 0 to `usecols` inclusive instead."
206 )
207
208 if isinstance(usecols, str):
209 return _range2cols(usecols)
210
211 return usecols
212
213
214@overload
215def validate_freeze_panes(freeze_panes: tuple[int, int]) -> Literal[True]:
216 ...
217
218
219@overload
220def validate_freeze_panes(freeze_panes: None) -> Literal[False]:
221 ...
222
223
224def validate_freeze_panes(freeze_panes: tuple[int, int] | None) -> bool:
225 if freeze_panes is not None:
226 if len(freeze_panes) == 2 and all(
227 isinstance(item, int) for item in freeze_panes
228 ):
229 return True
230
231 raise ValueError(
232 "freeze_panes must be of form (row, column) "
233 "where row and column are integers"
234 )
235
236 # freeze_panes wasn't specified, return False so it won't be applied
237 # to output sheet
238 return False
239
240
241def fill_mi_header(
242 row: list[Hashable], control_row: list[bool]
243) -> tuple[list[Hashable], list[bool]]:
244 """
245 Forward fill blank entries in row but only inside the same parent index.
246
247 Used for creating headers in Multiindex.
248
249 Parameters
250 ----------
251 row : list
252 List of items in a single row.
253 control_row : list of bool
254 Helps to determine if particular column is in same parent index as the
255 previous value. Used to stop propagation of empty cells between
256 different indexes.
257
258 Returns
259 -------
260 Returns changed row and control_row
261 """
262 last = row[0]
263 for i in range(1, len(row)):
264 if not control_row[i]:
265 last = row[i]
266
267 if row[i] == "" or row[i] is None:
268 row[i] = last
269 else:
270 control_row[i] = False
271 last = row[i]
272
273 return row, control_row
274
275
276def pop_header_name(
277 row: list[Hashable], index_col: int | Sequence[int]
278) -> tuple[Hashable | None, list[Hashable]]:
279 """
280 Pop the header name for MultiIndex parsing.
281
282 Parameters
283 ----------
284 row : list
285 The data row to parse for the header name.
286 index_col : int, list
287 The index columns for our data. Assumed to be non-null.
288
289 Returns
290 -------
291 header_name : str
292 The extracted header name.
293 trimmed_row : list
294 The original data row with the header name removed.
295 """
296 # Pop out header name and fill w/blank.
297 if is_list_like(index_col):
298 assert isinstance(index_col, Iterable)
299 i = max(index_col)
300 else:
301 assert not isinstance(index_col, Iterable)
302 i = index_col
303
304 header_name = row[i]
305 header_name = None if header_name == "" else header_name
306
307 return header_name, row[:i] + [""] + row[i + 1 :]
308
309
310def combine_kwargs(engine_kwargs: dict[str, Any] | None, kwargs: dict) -> dict:
311 """
312 Used to combine two sources of kwargs for the backend engine.
313
314 Use of kwargs is deprecated, this function is solely for use in 1.3 and should
315 be removed in 1.4/2.0. Also _base.ExcelWriter.__new__ ensures either engine_kwargs
316 or kwargs must be None or empty respectively.
317
318 Parameters
319 ----------
320 engine_kwargs: dict
321 kwargs to be passed through to the engine.
322 kwargs: dict
323 kwargs to be psased through to the engine (deprecated)
324
325 Returns
326 -------
327 engine_kwargs combined with kwargs
328 """
329 if engine_kwargs is None:
330 result = {}
331 else:
332 result = engine_kwargs.copy()
333 result.update(kwargs)
334 return result