1from __future__ import annotations
2
3import mmap
4from typing import (
5 TYPE_CHECKING,
6 Any,
7 cast,
8)
9
10import numpy as np
11
12from pandas.compat._optional import import_optional_dependency
13from pandas.util._decorators import doc
14
15from pandas.core.shared_docs import _shared_docs
16
17from pandas.io.excel._base import (
18 BaseExcelReader,
19 ExcelWriter,
20)
21from pandas.io.excel._util import (
22 combine_kwargs,
23 validate_freeze_panes,
24)
25
26if TYPE_CHECKING:
27 from openpyxl import Workbook
28 from openpyxl.descriptors.serialisable import Serialisable
29
30 from pandas._typing import (
31 ExcelWriterIfSheetExists,
32 FilePath,
33 ReadBuffer,
34 Scalar,
35 StorageOptions,
36 WriteExcelBuffer,
37 )
38
39
40class OpenpyxlWriter(ExcelWriter):
41 _engine = "openpyxl"
42 _supported_extensions = (".xlsx", ".xlsm")
43
44 def __init__(
45 self,
46 path: FilePath | WriteExcelBuffer | ExcelWriter,
47 engine: str | None = None,
48 date_format: str | None = None,
49 datetime_format: str | None = None,
50 mode: str = "w",
51 storage_options: StorageOptions | None = None,
52 if_sheet_exists: ExcelWriterIfSheetExists | None = None,
53 engine_kwargs: dict[str, Any] | None = None,
54 **kwargs,
55 ) -> None:
56 # Use the openpyxl module as the Excel writer.
57 from openpyxl.workbook import Workbook
58
59 engine_kwargs = combine_kwargs(engine_kwargs, kwargs)
60
61 super().__init__(
62 path,
63 mode=mode,
64 storage_options=storage_options,
65 if_sheet_exists=if_sheet_exists,
66 engine_kwargs=engine_kwargs,
67 )
68
69 # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from
70 # the file and later write to it
71 if "r+" in self._mode: # Load from existing workbook
72 from openpyxl import load_workbook
73
74 try:
75 self._book = load_workbook(self._handles.handle, **engine_kwargs)
76 except TypeError:
77 self._handles.handle.close()
78 raise
79 self._handles.handle.seek(0)
80 else:
81 # Create workbook object with default optimized_write=True.
82 try:
83 self._book = Workbook(**engine_kwargs)
84 except TypeError:
85 self._handles.handle.close()
86 raise
87
88 if self.book.worksheets:
89 self.book.remove(self.book.worksheets[0])
90
91 @property
92 def book(self) -> Workbook:
93 """
94 Book instance of class openpyxl.workbook.Workbook.
95
96 This attribute can be used to access engine-specific features.
97 """
98 return self._book
99
100 @property
101 def sheets(self) -> dict[str, Any]:
102 """Mapping of sheet names to sheet objects."""
103 result = {name: self.book[name] for name in self.book.sheetnames}
104 return result
105
106 def _save(self) -> None:
107 """
108 Save workbook to disk.
109 """
110 self.book.save(self._handles.handle)
111 if "r+" in self._mode and not isinstance(self._handles.handle, mmap.mmap):
112 # truncate file to the written content
113 self._handles.handle.truncate()
114
115 @classmethod
116 def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]:
117 """
118 Convert a style_dict to a set of kwargs suitable for initializing
119 or updating-on-copy an openpyxl v2 style object.
120
121 Parameters
122 ----------
123 style_dict : dict
124 A dict with zero or more of the following keys (or their synonyms).
125 'font'
126 'fill'
127 'border' ('borders')
128 'alignment'
129 'number_format'
130 'protection'
131
132 Returns
133 -------
134 style_kwargs : dict
135 A dict with the same, normalized keys as ``style_dict`` but each
136 value has been replaced with a native openpyxl style object of the
137 appropriate class.
138 """
139 _style_key_map = {"borders": "border"}
140
141 style_kwargs: dict[str, Serialisable] = {}
142 for k, v in style_dict.items():
143 k = _style_key_map.get(k, k)
144 _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None)
145 new_v = _conv_to_x(v)
146 if new_v:
147 style_kwargs[k] = new_v
148
149 return style_kwargs
150
151 @classmethod
152 def _convert_to_color(cls, color_spec):
153 """
154 Convert ``color_spec`` to an openpyxl v2 Color object.
155
156 Parameters
157 ----------
158 color_spec : str, dict
159 A 32-bit ARGB hex string, or a dict with zero or more of the
160 following keys.
161 'rgb'
162 'indexed'
163 'auto'
164 'theme'
165 'tint'
166 'index'
167 'type'
168
169 Returns
170 -------
171 color : openpyxl.styles.Color
172 """
173 from openpyxl.styles import Color
174
175 if isinstance(color_spec, str):
176 return Color(color_spec)
177 else:
178 return Color(**color_spec)
179
180 @classmethod
181 def _convert_to_font(cls, font_dict):
182 """
183 Convert ``font_dict`` to an openpyxl v2 Font object.
184
185 Parameters
186 ----------
187 font_dict : dict
188 A dict with zero or more of the following keys (or their synonyms).
189 'name'
190 'size' ('sz')
191 'bold' ('b')
192 'italic' ('i')
193 'underline' ('u')
194 'strikethrough' ('strike')
195 'color'
196 'vertAlign' ('vertalign')
197 'charset'
198 'scheme'
199 'family'
200 'outline'
201 'shadow'
202 'condense'
203
204 Returns
205 -------
206 font : openpyxl.styles.Font
207 """
208 from openpyxl.styles import Font
209
210 _font_key_map = {
211 "sz": "size",
212 "b": "bold",
213 "i": "italic",
214 "u": "underline",
215 "strike": "strikethrough",
216 "vertalign": "vertAlign",
217 }
218
219 font_kwargs = {}
220 for k, v in font_dict.items():
221 k = _font_key_map.get(k, k)
222 if k == "color":
223 v = cls._convert_to_color(v)
224 font_kwargs[k] = v
225
226 return Font(**font_kwargs)
227
228 @classmethod
229 def _convert_to_stop(cls, stop_seq):
230 """
231 Convert ``stop_seq`` to a list of openpyxl v2 Color objects,
232 suitable for initializing the ``GradientFill`` ``stop`` parameter.
233
234 Parameters
235 ----------
236 stop_seq : iterable
237 An iterable that yields objects suitable for consumption by
238 ``_convert_to_color``.
239
240 Returns
241 -------
242 stop : list of openpyxl.styles.Color
243 """
244 return map(cls._convert_to_color, stop_seq)
245
246 @classmethod
247 def _convert_to_fill(cls, fill_dict: dict[str, Any]):
248 """
249 Convert ``fill_dict`` to an openpyxl v2 Fill object.
250
251 Parameters
252 ----------
253 fill_dict : dict
254 A dict with one or more of the following keys (or their synonyms),
255 'fill_type' ('patternType', 'patterntype')
256 'start_color' ('fgColor', 'fgcolor')
257 'end_color' ('bgColor', 'bgcolor')
258 or one or more of the following keys (or their synonyms).
259 'type' ('fill_type')
260 'degree'
261 'left'
262 'right'
263 'top'
264 'bottom'
265 'stop'
266
267 Returns
268 -------
269 fill : openpyxl.styles.Fill
270 """
271 from openpyxl.styles import (
272 GradientFill,
273 PatternFill,
274 )
275
276 _pattern_fill_key_map = {
277 "patternType": "fill_type",
278 "patterntype": "fill_type",
279 "fgColor": "start_color",
280 "fgcolor": "start_color",
281 "bgColor": "end_color",
282 "bgcolor": "end_color",
283 }
284
285 _gradient_fill_key_map = {"fill_type": "type"}
286
287 pfill_kwargs = {}
288 gfill_kwargs = {}
289 for k, v in fill_dict.items():
290 pk = _pattern_fill_key_map.get(k)
291 gk = _gradient_fill_key_map.get(k)
292 if pk in ["start_color", "end_color"]:
293 v = cls._convert_to_color(v)
294 if gk == "stop":
295 v = cls._convert_to_stop(v)
296 if pk:
297 pfill_kwargs[pk] = v
298 elif gk:
299 gfill_kwargs[gk] = v
300 else:
301 pfill_kwargs[k] = v
302 gfill_kwargs[k] = v
303
304 try:
305 return PatternFill(**pfill_kwargs)
306 except TypeError:
307 return GradientFill(**gfill_kwargs)
308
309 @classmethod
310 def _convert_to_side(cls, side_spec):
311 """
312 Convert ``side_spec`` to an openpyxl v2 Side object.
313
314 Parameters
315 ----------
316 side_spec : str, dict
317 A string specifying the border style, or a dict with zero or more
318 of the following keys (or their synonyms).
319 'style' ('border_style')
320 'color'
321
322 Returns
323 -------
324 side : openpyxl.styles.Side
325 """
326 from openpyxl.styles import Side
327
328 _side_key_map = {"border_style": "style"}
329
330 if isinstance(side_spec, str):
331 return Side(style=side_spec)
332
333 side_kwargs = {}
334 for k, v in side_spec.items():
335 k = _side_key_map.get(k, k)
336 if k == "color":
337 v = cls._convert_to_color(v)
338 side_kwargs[k] = v
339
340 return Side(**side_kwargs)
341
342 @classmethod
343 def _convert_to_border(cls, border_dict):
344 """
345 Convert ``border_dict`` to an openpyxl v2 Border object.
346
347 Parameters
348 ----------
349 border_dict : dict
350 A dict with zero or more of the following keys (or their synonyms).
351 'left'
352 'right'
353 'top'
354 'bottom'
355 'diagonal'
356 'diagonal_direction'
357 'vertical'
358 'horizontal'
359 'diagonalUp' ('diagonalup')
360 'diagonalDown' ('diagonaldown')
361 'outline'
362
363 Returns
364 -------
365 border : openpyxl.styles.Border
366 """
367 from openpyxl.styles import Border
368
369 _border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"}
370
371 border_kwargs = {}
372 for k, v in border_dict.items():
373 k = _border_key_map.get(k, k)
374 if k == "color":
375 v = cls._convert_to_color(v)
376 if k in ["left", "right", "top", "bottom", "diagonal"]:
377 v = cls._convert_to_side(v)
378 border_kwargs[k] = v
379
380 return Border(**border_kwargs)
381
382 @classmethod
383 def _convert_to_alignment(cls, alignment_dict):
384 """
385 Convert ``alignment_dict`` to an openpyxl v2 Alignment object.
386
387 Parameters
388 ----------
389 alignment_dict : dict
390 A dict with zero or more of the following keys (or their synonyms).
391 'horizontal'
392 'vertical'
393 'text_rotation'
394 'wrap_text'
395 'shrink_to_fit'
396 'indent'
397 Returns
398 -------
399 alignment : openpyxl.styles.Alignment
400 """
401 from openpyxl.styles import Alignment
402
403 return Alignment(**alignment_dict)
404
405 @classmethod
406 def _convert_to_number_format(cls, number_format_dict):
407 """
408 Convert ``number_format_dict`` to an openpyxl v2.1.0 number format
409 initializer.
410
411 Parameters
412 ----------
413 number_format_dict : dict
414 A dict with zero or more of the following keys.
415 'format_code' : str
416
417 Returns
418 -------
419 number_format : str
420 """
421 return number_format_dict["format_code"]
422
423 @classmethod
424 def _convert_to_protection(cls, protection_dict):
425 """
426 Convert ``protection_dict`` to an openpyxl v2 Protection object.
427
428 Parameters
429 ----------
430 protection_dict : dict
431 A dict with zero or more of the following keys.
432 'locked'
433 'hidden'
434
435 Returns
436 -------
437 """
438 from openpyxl.styles import Protection
439
440 return Protection(**protection_dict)
441
442 def _write_cells(
443 self,
444 cells,
445 sheet_name: str | None = None,
446 startrow: int = 0,
447 startcol: int = 0,
448 freeze_panes: tuple[int, int] | None = None,
449 ) -> None:
450 # Write the frame cells using openpyxl.
451 sheet_name = self._get_sheet_name(sheet_name)
452
453 _style_cache: dict[str, dict[str, Serialisable]] = {}
454
455 if sheet_name in self.sheets and self._if_sheet_exists != "new":
456 if "r+" in self._mode:
457 if self._if_sheet_exists == "replace":
458 old_wks = self.sheets[sheet_name]
459 target_index = self.book.index(old_wks)
460 del self.book[sheet_name]
461 wks = self.book.create_sheet(sheet_name, target_index)
462 elif self._if_sheet_exists == "error":
463 raise ValueError(
464 f"Sheet '{sheet_name}' already exists and "
465 f"if_sheet_exists is set to 'error'."
466 )
467 elif self._if_sheet_exists == "overlay":
468 wks = self.sheets[sheet_name]
469 else:
470 raise ValueError(
471 f"'{self._if_sheet_exists}' is not valid for if_sheet_exists. "
472 "Valid options are 'error', 'new', 'replace' and 'overlay'."
473 )
474 else:
475 wks = self.sheets[sheet_name]
476 else:
477 wks = self.book.create_sheet()
478 wks.title = sheet_name
479
480 if validate_freeze_panes(freeze_panes):
481 freeze_panes = cast(tuple[int, int], freeze_panes)
482 wks.freeze_panes = wks.cell(
483 row=freeze_panes[0] + 1, column=freeze_panes[1] + 1
484 )
485
486 for cell in cells:
487 xcell = wks.cell(
488 row=startrow + cell.row + 1, column=startcol + cell.col + 1
489 )
490 xcell.value, fmt = self._value_with_fmt(cell.val)
491 if fmt:
492 xcell.number_format = fmt
493
494 style_kwargs: dict[str, Serialisable] | None = {}
495 if cell.style:
496 key = str(cell.style)
497 style_kwargs = _style_cache.get(key)
498 if style_kwargs is None:
499 style_kwargs = self._convert_to_style_kwargs(cell.style)
500 _style_cache[key] = style_kwargs
501
502 if style_kwargs:
503 for k, v in style_kwargs.items():
504 setattr(xcell, k, v)
505
506 if cell.mergestart is not None and cell.mergeend is not None:
507 wks.merge_cells(
508 start_row=startrow + cell.row + 1,
509 start_column=startcol + cell.col + 1,
510 end_column=startcol + cell.mergeend + 1,
511 end_row=startrow + cell.mergestart + 1,
512 )
513
514 # When cells are merged only the top-left cell is preserved
515 # The behaviour of the other cells in a merged range is
516 # undefined
517 if style_kwargs:
518 first_row = startrow + cell.row + 1
519 last_row = startrow + cell.mergestart + 1
520 first_col = startcol + cell.col + 1
521 last_col = startcol + cell.mergeend + 1
522
523 for row in range(first_row, last_row + 1):
524 for col in range(first_col, last_col + 1):
525 if row == first_row and col == first_col:
526 # Ignore first cell. It is already handled.
527 continue
528 xcell = wks.cell(column=col, row=row)
529 for k, v in style_kwargs.items():
530 setattr(xcell, k, v)
531
532
533class OpenpyxlReader(BaseExcelReader["Workbook"]):
534 @doc(storage_options=_shared_docs["storage_options"])
535 def __init__(
536 self,
537 filepath_or_buffer: FilePath | ReadBuffer[bytes],
538 storage_options: StorageOptions | None = None,
539 engine_kwargs: dict | None = None,
540 ) -> None:
541 """
542 Reader using openpyxl engine.
543
544 Parameters
545 ----------
546 filepath_or_buffer : str, path object or Workbook
547 Object to be parsed.
548 {storage_options}
549 engine_kwargs : dict, optional
550 Arbitrary keyword arguments passed to excel engine.
551 """
552 import_optional_dependency("openpyxl")
553 super().__init__(
554 filepath_or_buffer,
555 storage_options=storage_options,
556 engine_kwargs=engine_kwargs,
557 )
558
559 @property
560 def _workbook_class(self) -> type[Workbook]:
561 from openpyxl import Workbook
562
563 return Workbook
564
565 def load_workbook(
566 self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs
567 ) -> Workbook:
568 from openpyxl import load_workbook
569
570 default_kwargs = {"read_only": True, "data_only": True, "keep_links": False}
571
572 return load_workbook(
573 filepath_or_buffer,
574 **(default_kwargs | engine_kwargs),
575 )
576
577 @property
578 def sheet_names(self) -> list[str]:
579 return [sheet.title for sheet in self.book.worksheets]
580
581 def get_sheet_by_name(self, name: str):
582 self.raise_if_bad_sheet_by_name(name)
583 return self.book[name]
584
585 def get_sheet_by_index(self, index: int):
586 self.raise_if_bad_sheet_by_index(index)
587 return self.book.worksheets[index]
588
589 def _convert_cell(self, cell) -> Scalar:
590 from openpyxl.cell.cell import (
591 TYPE_ERROR,
592 TYPE_NUMERIC,
593 )
594
595 if cell.value is None:
596 return "" # compat with xlrd
597 elif cell.data_type == TYPE_ERROR:
598 return np.nan
599 elif cell.data_type == TYPE_NUMERIC:
600 val = int(cell.value)
601 if val == cell.value:
602 return val
603 return float(cell.value)
604
605 return cell.value
606
607 def get_sheet_data(
608 self, sheet, file_rows_needed: int | None = None
609 ) -> list[list[Scalar]]:
610 if self.book.read_only:
611 sheet.reset_dimensions()
612
613 data: list[list[Scalar]] = []
614 last_row_with_data = -1
615 for row_number, row in enumerate(sheet.rows):
616 converted_row = [self._convert_cell(cell) for cell in row]
617 while converted_row and converted_row[-1] == "":
618 # trim trailing empty elements
619 converted_row.pop()
620 if converted_row:
621 last_row_with_data = row_number
622 data.append(converted_row)
623 if file_rows_needed is not None and len(data) >= file_rows_needed:
624 break
625
626 # Trim trailing empty rows
627 data = data[: last_row_with_data + 1]
628
629 if len(data) > 0:
630 # extend rows to max width
631 max_width = max(len(data_row) for data_row in data)
632 if min(len(data_row) for data_row in data) < max_width:
633 empty_cell: list[Scalar] = [""]
634 data = [
635 data_row + (max_width - len(data_row)) * empty_cell
636 for data_row in data
637 ]
638
639 return data