1from __future__ import annotations
2
3import mmap
4from typing import (
5 TYPE_CHECKING,
6 Any,
7 Tuple,
8 cast,
9)
10
11import numpy as np
12
13from pandas._typing import (
14 FilePath,
15 ReadBuffer,
16 Scalar,
17 StorageOptions,
18 WriteExcelBuffer,
19)
20from pandas.compat._optional import import_optional_dependency
21from pandas.util._decorators import doc
22
23from pandas.core.shared_docs import _shared_docs
24
25from pandas.io.excel._base import (
26 BaseExcelReader,
27 ExcelWriter,
28)
29from pandas.io.excel._util import (
30 combine_kwargs,
31 validate_freeze_panes,
32)
33
34if TYPE_CHECKING:
35 from openpyxl.descriptors.serialisable import Serialisable
36 from openpyxl.workbook import Workbook
37
38
39class OpenpyxlWriter(ExcelWriter):
40 _engine = "openpyxl"
41 _supported_extensions = (".xlsx", ".xlsm")
42
43 def __init__(
44 self,
45 path: FilePath | WriteExcelBuffer | ExcelWriter,
46 engine: str | None = None,
47 date_format: str | None = None,
48 datetime_format: str | None = None,
49 mode: str = "w",
50 storage_options: StorageOptions = None,
51 if_sheet_exists: str | None = None,
52 engine_kwargs: dict[str, Any] | None = None,
53 **kwargs,
54 ) -> None:
55 # Use the openpyxl module as the Excel writer.
56 from openpyxl.workbook import Workbook
57
58 engine_kwargs = combine_kwargs(engine_kwargs, kwargs)
59
60 super().__init__(
61 path,
62 mode=mode,
63 storage_options=storage_options,
64 if_sheet_exists=if_sheet_exists,
65 engine_kwargs=engine_kwargs,
66 )
67
68 # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from
69 # the file and later write to it
70 if "r+" in self._mode: # Load from existing workbook
71 from openpyxl import load_workbook
72
73 try:
74 self._book = load_workbook(self._handles.handle, **engine_kwargs)
75 except TypeError:
76 self._handles.handle.close()
77 raise
78 self._handles.handle.seek(0)
79 else:
80 # Create workbook object with default optimized_write=True.
81 try:
82 self._book = Workbook(**engine_kwargs)
83 except TypeError:
84 self._handles.handle.close()
85 raise
86
87 if self.book.worksheets:
88 self.book.remove(self.book.worksheets[0])
89
90 @property
91 def book(self) -> Workbook:
92 """
93 Book instance of class openpyxl.workbook.Workbook.
94
95 This attribute can be used to access engine-specific features.
96 """
97 return self._book
98
99 @property
100 def sheets(self) -> dict[str, Any]:
101 """Mapping of sheet names to sheet objects."""
102 result = {name: self.book[name] for name in self.book.sheetnames}
103 return result
104
105 def _save(self) -> None:
106 """
107 Save workbook to disk.
108 """
109 self.book.save(self._handles.handle)
110 if "r+" in self._mode and not isinstance(self._handles.handle, mmap.mmap):
111 # truncate file to the written content
112 self._handles.handle.truncate()
113
114 @classmethod
115 def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]:
116 """
117 Convert a style_dict to a set of kwargs suitable for initializing
118 or updating-on-copy an openpyxl v2 style object.
119
120 Parameters
121 ----------
122 style_dict : dict
123 A dict with zero or more of the following keys (or their synonyms).
124 'font'
125 'fill'
126 'border' ('borders')
127 'alignment'
128 'number_format'
129 'protection'
130
131 Returns
132 -------
133 style_kwargs : dict
134 A dict with the same, normalized keys as ``style_dict`` but each
135 value has been replaced with a native openpyxl style object of the
136 appropriate class.
137 """
138 _style_key_map = {"borders": "border"}
139
140 style_kwargs: dict[str, Serialisable] = {}
141 for k, v in style_dict.items():
142 k = _style_key_map.get(k, k)
143 _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None)
144 new_v = _conv_to_x(v)
145 if new_v:
146 style_kwargs[k] = new_v
147
148 return style_kwargs
149
150 @classmethod
151 def _convert_to_color(cls, color_spec):
152 """
153 Convert ``color_spec`` to an openpyxl v2 Color object.
154
155 Parameters
156 ----------
157 color_spec : str, dict
158 A 32-bit ARGB hex string, or a dict with zero or more of the
159 following keys.
160 'rgb'
161 'indexed'
162 'auto'
163 'theme'
164 'tint'
165 'index'
166 'type'
167
168 Returns
169 -------
170 color : openpyxl.styles.Color
171 """
172 from openpyxl.styles import Color
173
174 if isinstance(color_spec, str):
175 return Color(color_spec)
176 else:
177 return Color(**color_spec)
178
179 @classmethod
180 def _convert_to_font(cls, font_dict):
181 """
182 Convert ``font_dict`` to an openpyxl v2 Font object.
183
184 Parameters
185 ----------
186 font_dict : dict
187 A dict with zero or more of the following keys (or their synonyms).
188 'name'
189 'size' ('sz')
190 'bold' ('b')
191 'italic' ('i')
192 'underline' ('u')
193 'strikethrough' ('strike')
194 'color'
195 'vertAlign' ('vertalign')
196 'charset'
197 'scheme'
198 'family'
199 'outline'
200 'shadow'
201 'condense'
202
203 Returns
204 -------
205 font : openpyxl.styles.Font
206 """
207 from openpyxl.styles import Font
208
209 _font_key_map = {
210 "sz": "size",
211 "b": "bold",
212 "i": "italic",
213 "u": "underline",
214 "strike": "strikethrough",
215 "vertalign": "vertAlign",
216 }
217
218 font_kwargs = {}
219 for k, v in font_dict.items():
220 k = _font_key_map.get(k, k)
221 if k == "color":
222 v = cls._convert_to_color(v)
223 font_kwargs[k] = v
224
225 return Font(**font_kwargs)
226
227 @classmethod
228 def _convert_to_stop(cls, stop_seq):
229 """
230 Convert ``stop_seq`` to a list of openpyxl v2 Color objects,
231 suitable for initializing the ``GradientFill`` ``stop`` parameter.
232
233 Parameters
234 ----------
235 stop_seq : iterable
236 An iterable that yields objects suitable for consumption by
237 ``_convert_to_color``.
238
239 Returns
240 -------
241 stop : list of openpyxl.styles.Color
242 """
243 return map(cls._convert_to_color, stop_seq)
244
245 @classmethod
246 def _convert_to_fill(cls, fill_dict: dict[str, Any]):
247 """
248 Convert ``fill_dict`` to an openpyxl v2 Fill object.
249
250 Parameters
251 ----------
252 fill_dict : dict
253 A dict with one or more of the following keys (or their synonyms),
254 'fill_type' ('patternType', 'patterntype')
255 'start_color' ('fgColor', 'fgcolor')
256 'end_color' ('bgColor', 'bgcolor')
257 or one or more of the following keys (or their synonyms).
258 'type' ('fill_type')
259 'degree'
260 'left'
261 'right'
262 'top'
263 'bottom'
264 'stop'
265
266 Returns
267 -------
268 fill : openpyxl.styles.Fill
269 """
270 from openpyxl.styles import (
271 GradientFill,
272 PatternFill,
273 )
274
275 _pattern_fill_key_map = {
276 "patternType": "fill_type",
277 "patterntype": "fill_type",
278 "fgColor": "start_color",
279 "fgcolor": "start_color",
280 "bgColor": "end_color",
281 "bgcolor": "end_color",
282 }
283
284 _gradient_fill_key_map = {"fill_type": "type"}
285
286 pfill_kwargs = {}
287 gfill_kwargs = {}
288 for k, v in fill_dict.items():
289 pk = _pattern_fill_key_map.get(k)
290 gk = _gradient_fill_key_map.get(k)
291 if pk in ["start_color", "end_color"]:
292 v = cls._convert_to_color(v)
293 if gk == "stop":
294 v = cls._convert_to_stop(v)
295 if pk:
296 pfill_kwargs[pk] = v
297 elif gk:
298 gfill_kwargs[gk] = v
299 else:
300 pfill_kwargs[k] = v
301 gfill_kwargs[k] = v
302
303 try:
304 return PatternFill(**pfill_kwargs)
305 except TypeError:
306 return GradientFill(**gfill_kwargs)
307
308 @classmethod
309 def _convert_to_side(cls, side_spec):
310 """
311 Convert ``side_spec`` to an openpyxl v2 Side object.
312
313 Parameters
314 ----------
315 side_spec : str, dict
316 A string specifying the border style, or a dict with zero or more
317 of the following keys (or their synonyms).
318 'style' ('border_style')
319 'color'
320
321 Returns
322 -------
323 side : openpyxl.styles.Side
324 """
325 from openpyxl.styles import Side
326
327 _side_key_map = {"border_style": "style"}
328
329 if isinstance(side_spec, str):
330 return Side(style=side_spec)
331
332 side_kwargs = {}
333 for k, v in side_spec.items():
334 k = _side_key_map.get(k, k)
335 if k == "color":
336 v = cls._convert_to_color(v)
337 side_kwargs[k] = v
338
339 return Side(**side_kwargs)
340
341 @classmethod
342 def _convert_to_border(cls, border_dict):
343 """
344 Convert ``border_dict`` to an openpyxl v2 Border object.
345
346 Parameters
347 ----------
348 border_dict : dict
349 A dict with zero or more of the following keys (or their synonyms).
350 'left'
351 'right'
352 'top'
353 'bottom'
354 'diagonal'
355 'diagonal_direction'
356 'vertical'
357 'horizontal'
358 'diagonalUp' ('diagonalup')
359 'diagonalDown' ('diagonaldown')
360 'outline'
361
362 Returns
363 -------
364 border : openpyxl.styles.Border
365 """
366 from openpyxl.styles import Border
367
368 _border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"}
369
370 border_kwargs = {}
371 for k, v in border_dict.items():
372 k = _border_key_map.get(k, k)
373 if k == "color":
374 v = cls._convert_to_color(v)
375 if k in ["left", "right", "top", "bottom", "diagonal"]:
376 v = cls._convert_to_side(v)
377 border_kwargs[k] = v
378
379 return Border(**border_kwargs)
380
381 @classmethod
382 def _convert_to_alignment(cls, alignment_dict):
383 """
384 Convert ``alignment_dict`` to an openpyxl v2 Alignment object.
385
386 Parameters
387 ----------
388 alignment_dict : dict
389 A dict with zero or more of the following keys (or their synonyms).
390 'horizontal'
391 'vertical'
392 'text_rotation'
393 'wrap_text'
394 'shrink_to_fit'
395 'indent'
396 Returns
397 -------
398 alignment : openpyxl.styles.Alignment
399 """
400 from openpyxl.styles import Alignment
401
402 return Alignment(**alignment_dict)
403
404 @classmethod
405 def _convert_to_number_format(cls, number_format_dict):
406 """
407 Convert ``number_format_dict`` to an openpyxl v2.1.0 number format
408 initializer.
409
410 Parameters
411 ----------
412 number_format_dict : dict
413 A dict with zero or more of the following keys.
414 'format_code' : str
415
416 Returns
417 -------
418 number_format : str
419 """
420 return number_format_dict["format_code"]
421
422 @classmethod
423 def _convert_to_protection(cls, protection_dict):
424 """
425 Convert ``protection_dict`` to an openpyxl v2 Protection object.
426
427 Parameters
428 ----------
429 protection_dict : dict
430 A dict with zero or more of the following keys.
431 'locked'
432 'hidden'
433
434 Returns
435 -------
436 """
437 from openpyxl.styles import Protection
438
439 return Protection(**protection_dict)
440
441 def _write_cells(
442 self,
443 cells,
444 sheet_name: str | None = None,
445 startrow: int = 0,
446 startcol: int = 0,
447 freeze_panes: tuple[int, int] | None = None,
448 ) -> None:
449 # Write the frame cells using openpyxl.
450 sheet_name = self._get_sheet_name(sheet_name)
451
452 _style_cache: dict[str, dict[str, Serialisable]] = {}
453
454 if sheet_name in self.sheets and self._if_sheet_exists != "new":
455 if "r+" in self._mode:
456 if self._if_sheet_exists == "replace":
457 old_wks = self.sheets[sheet_name]
458 target_index = self.book.index(old_wks)
459 del self.book[sheet_name]
460 wks = self.book.create_sheet(sheet_name, target_index)
461 elif self._if_sheet_exists == "error":
462 raise ValueError(
463 f"Sheet '{sheet_name}' already exists and "
464 f"if_sheet_exists is set to 'error'."
465 )
466 elif self._if_sheet_exists == "overlay":
467 wks = self.sheets[sheet_name]
468 else:
469 raise ValueError(
470 f"'{self._if_sheet_exists}' is not valid for if_sheet_exists. "
471 "Valid options are 'error', 'new', 'replace' and 'overlay'."
472 )
473 else:
474 wks = self.sheets[sheet_name]
475 else:
476 wks = self.book.create_sheet()
477 wks.title = sheet_name
478
479 if validate_freeze_panes(freeze_panes):
480 freeze_panes = cast(Tuple[int, int], freeze_panes)
481 wks.freeze_panes = wks.cell(
482 row=freeze_panes[0] + 1, column=freeze_panes[1] + 1
483 )
484
485 for cell in cells:
486 xcell = wks.cell(
487 row=startrow + cell.row + 1, column=startcol + cell.col + 1
488 )
489 xcell.value, fmt = self._value_with_fmt(cell.val)
490 if fmt:
491 xcell.number_format = fmt
492
493 style_kwargs: dict[str, Serialisable] | None = {}
494 if cell.style:
495 key = str(cell.style)
496 style_kwargs = _style_cache.get(key)
497 if style_kwargs is None:
498 style_kwargs = self._convert_to_style_kwargs(cell.style)
499 _style_cache[key] = style_kwargs
500
501 if style_kwargs:
502 for k, v in style_kwargs.items():
503 setattr(xcell, k, v)
504
505 if cell.mergestart is not None and cell.mergeend is not None:
506 wks.merge_cells(
507 start_row=startrow + cell.row + 1,
508 start_column=startcol + cell.col + 1,
509 end_column=startcol + cell.mergeend + 1,
510 end_row=startrow + cell.mergestart + 1,
511 )
512
513 # When cells are merged only the top-left cell is preserved
514 # The behaviour of the other cells in a merged range is
515 # undefined
516 if style_kwargs:
517 first_row = startrow + cell.row + 1
518 last_row = startrow + cell.mergestart + 1
519 first_col = startcol + cell.col + 1
520 last_col = startcol + cell.mergeend + 1
521
522 for row in range(first_row, last_row + 1):
523 for col in range(first_col, last_col + 1):
524 if row == first_row and col == first_col:
525 # Ignore first cell. It is already handled.
526 continue
527 xcell = wks.cell(column=col, row=row)
528 for k, v in style_kwargs.items():
529 setattr(xcell, k, v)
530
531
532class OpenpyxlReader(BaseExcelReader):
533 @doc(storage_options=_shared_docs["storage_options"])
534 def __init__(
535 self,
536 filepath_or_buffer: FilePath | ReadBuffer[bytes],
537 storage_options: StorageOptions = None,
538 ) -> None:
539 """
540 Reader using openpyxl engine.
541
542 Parameters
543 ----------
544 filepath_or_buffer : str, path object or Workbook
545 Object to be parsed.
546 {storage_options}
547 """
548 import_optional_dependency("openpyxl")
549 super().__init__(filepath_or_buffer, storage_options=storage_options)
550
551 @property
552 def _workbook_class(self):
553 from openpyxl import Workbook
554
555 return Workbook
556
557 def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]):
558 from openpyxl import load_workbook
559
560 return load_workbook(
561 filepath_or_buffer, read_only=True, data_only=True, keep_links=False
562 )
563
564 @property
565 def sheet_names(self) -> list[str]:
566 return [sheet.title for sheet in self.book.worksheets]
567
568 def get_sheet_by_name(self, name: str):
569 self.raise_if_bad_sheet_by_name(name)
570 return self.book[name]
571
572 def get_sheet_by_index(self, index: int):
573 self.raise_if_bad_sheet_by_index(index)
574 return self.book.worksheets[index]
575
576 def _convert_cell(self, cell) -> Scalar:
577 from openpyxl.cell.cell import (
578 TYPE_ERROR,
579 TYPE_NUMERIC,
580 )
581
582 if cell.value is None:
583 return "" # compat with xlrd
584 elif cell.data_type == TYPE_ERROR:
585 return np.nan
586 elif cell.data_type == TYPE_NUMERIC:
587 val = int(cell.value)
588 if val == cell.value:
589 return val
590 return float(cell.value)
591
592 return cell.value
593
594 def get_sheet_data(
595 self, sheet, file_rows_needed: int | None = None
596 ) -> list[list[Scalar]]:
597 if self.book.read_only:
598 sheet.reset_dimensions()
599
600 data: list[list[Scalar]] = []
601 last_row_with_data = -1
602 for row_number, row in enumerate(sheet.rows):
603 converted_row = [self._convert_cell(cell) for cell in row]
604 while converted_row and converted_row[-1] == "":
605 # trim trailing empty elements
606 converted_row.pop()
607 if converted_row:
608 last_row_with_data = row_number
609 data.append(converted_row)
610 if file_rows_needed is not None and len(data) >= file_rows_needed:
611 break
612
613 # Trim trailing empty rows
614 data = data[: last_row_with_data + 1]
615
616 if len(data) > 0:
617 # extend rows to max width
618 max_width = max(len(data_row) for data_row in data)
619 if min(len(data_row) for data_row in data) < max_width:
620 empty_cell: list[Scalar] = [""]
621 data = [
622 data_row + (max_width - len(data_row)) * empty_cell
623 for data_row in data
624 ]
625
626 return data