Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/excel/_openpyxl.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

243 statements  

1from __future__ import annotations 

2 

3import mmap 

4from typing import ( 

5 TYPE_CHECKING, 

6 Any, 

7 cast, 

8) 

9 

10import numpy as np 

11 

12from pandas.compat._optional import import_optional_dependency 

13from pandas.util._decorators import doc 

14 

15from pandas.core.shared_docs import _shared_docs 

16 

17from pandas.io.excel._base import ( 

18 BaseExcelReader, 

19 ExcelWriter, 

20) 

21from pandas.io.excel._util import ( 

22 combine_kwargs, 

23 validate_freeze_panes, 

24) 

25 

26if TYPE_CHECKING: 

27 from openpyxl import Workbook 

28 from openpyxl.descriptors.serialisable import Serialisable 

29 

30 from pandas._typing import ( 

31 ExcelWriterIfSheetExists, 

32 FilePath, 

33 ReadBuffer, 

34 Scalar, 

35 StorageOptions, 

36 WriteExcelBuffer, 

37 ) 

38 

39 

40class OpenpyxlWriter(ExcelWriter): 

41 _engine = "openpyxl" 

42 _supported_extensions = (".xlsx", ".xlsm") 

43 

44 def __init__( 

45 self, 

46 path: FilePath | WriteExcelBuffer | ExcelWriter, 

47 engine: str | None = None, 

48 date_format: str | None = None, 

49 datetime_format: str | None = None, 

50 mode: str = "w", 

51 storage_options: StorageOptions | None = None, 

52 if_sheet_exists: ExcelWriterIfSheetExists | None = None, 

53 engine_kwargs: dict[str, Any] | None = None, 

54 **kwargs, 

55 ) -> None: 

56 # Use the openpyxl module as the Excel writer. 

57 from openpyxl.workbook import Workbook 

58 

59 engine_kwargs = combine_kwargs(engine_kwargs, kwargs) 

60 

61 super().__init__( 

62 path, 

63 mode=mode, 

64 storage_options=storage_options, 

65 if_sheet_exists=if_sheet_exists, 

66 engine_kwargs=engine_kwargs, 

67 ) 

68 

69 # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from 

70 # the file and later write to it 

71 if "r+" in self._mode: # Load from existing workbook 

72 from openpyxl import load_workbook 

73 

74 try: 

75 self._book = load_workbook(self._handles.handle, **engine_kwargs) 

76 except TypeError: 

77 self._handles.handle.close() 

78 raise 

79 self._handles.handle.seek(0) 

80 else: 

81 # Create workbook object with default optimized_write=True. 

82 try: 

83 self._book = Workbook(**engine_kwargs) 

84 except TypeError: 

85 self._handles.handle.close() 

86 raise 

87 

88 if self.book.worksheets: 

89 self.book.remove(self.book.worksheets[0]) 

90 

91 @property 

92 def book(self) -> Workbook: 

93 """ 

94 Book instance of class openpyxl.workbook.Workbook. 

95 

96 This attribute can be used to access engine-specific features. 

97 """ 

98 return self._book 

99 

100 @property 

101 def sheets(self) -> dict[str, Any]: 

102 """Mapping of sheet names to sheet objects.""" 

103 result = {name: self.book[name] for name in self.book.sheetnames} 

104 return result 

105 

106 def _save(self) -> None: 

107 """ 

108 Save workbook to disk. 

109 """ 

110 self.book.save(self._handles.handle) 

111 if "r+" in self._mode and not isinstance(self._handles.handle, mmap.mmap): 

112 # truncate file to the written content 

113 self._handles.handle.truncate() 

114 

115 @classmethod 

116 def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]: 

117 """ 

118 Convert a style_dict to a set of kwargs suitable for initializing 

119 or updating-on-copy an openpyxl v2 style object. 

120 

121 Parameters 

122 ---------- 

123 style_dict : dict 

124 A dict with zero or more of the following keys (or their synonyms). 

125 'font' 

126 'fill' 

127 'border' ('borders') 

128 'alignment' 

129 'number_format' 

130 'protection' 

131 

132 Returns 

133 ------- 

134 style_kwargs : dict 

135 A dict with the same, normalized keys as ``style_dict`` but each 

136 value has been replaced with a native openpyxl style object of the 

137 appropriate class. 

138 """ 

139 _style_key_map = {"borders": "border"} 

140 

141 style_kwargs: dict[str, Serialisable] = {} 

142 for k, v in style_dict.items(): 

143 k = _style_key_map.get(k, k) 

144 _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None) 

145 new_v = _conv_to_x(v) 

146 if new_v: 

147 style_kwargs[k] = new_v 

148 

149 return style_kwargs 

150 

151 @classmethod 

152 def _convert_to_color(cls, color_spec): 

153 """ 

154 Convert ``color_spec`` to an openpyxl v2 Color object. 

155 

156 Parameters 

157 ---------- 

158 color_spec : str, dict 

159 A 32-bit ARGB hex string, or a dict with zero or more of the 

160 following keys. 

161 'rgb' 

162 'indexed' 

163 'auto' 

164 'theme' 

165 'tint' 

166 'index' 

167 'type' 

168 

169 Returns 

170 ------- 

171 color : openpyxl.styles.Color 

172 """ 

173 from openpyxl.styles import Color 

174 

175 if isinstance(color_spec, str): 

176 return Color(color_spec) 

177 else: 

178 return Color(**color_spec) 

179 

180 @classmethod 

181 def _convert_to_font(cls, font_dict): 

182 """ 

183 Convert ``font_dict`` to an openpyxl v2 Font object. 

184 

185 Parameters 

186 ---------- 

187 font_dict : dict 

188 A dict with zero or more of the following keys (or their synonyms). 

189 'name' 

190 'size' ('sz') 

191 'bold' ('b') 

192 'italic' ('i') 

193 'underline' ('u') 

194 'strikethrough' ('strike') 

195 'color' 

196 'vertAlign' ('vertalign') 

197 'charset' 

198 'scheme' 

199 'family' 

200 'outline' 

201 'shadow' 

202 'condense' 

203 

204 Returns 

205 ------- 

206 font : openpyxl.styles.Font 

207 """ 

208 from openpyxl.styles import Font 

209 

210 _font_key_map = { 

211 "sz": "size", 

212 "b": "bold", 

213 "i": "italic", 

214 "u": "underline", 

215 "strike": "strikethrough", 

216 "vertalign": "vertAlign", 

217 } 

218 

219 font_kwargs = {} 

220 for k, v in font_dict.items(): 

221 k = _font_key_map.get(k, k) 

222 if k == "color": 

223 v = cls._convert_to_color(v) 

224 font_kwargs[k] = v 

225 

226 return Font(**font_kwargs) 

227 

228 @classmethod 

229 def _convert_to_stop(cls, stop_seq): 

230 """ 

231 Convert ``stop_seq`` to a list of openpyxl v2 Color objects, 

232 suitable for initializing the ``GradientFill`` ``stop`` parameter. 

233 

234 Parameters 

235 ---------- 

236 stop_seq : iterable 

237 An iterable that yields objects suitable for consumption by 

238 ``_convert_to_color``. 

239 

240 Returns 

241 ------- 

242 stop : list of openpyxl.styles.Color 

243 """ 

244 return map(cls._convert_to_color, stop_seq) 

245 

246 @classmethod 

247 def _convert_to_fill(cls, fill_dict: dict[str, Any]): 

248 """ 

249 Convert ``fill_dict`` to an openpyxl v2 Fill object. 

250 

251 Parameters 

252 ---------- 

253 fill_dict : dict 

254 A dict with one or more of the following keys (or their synonyms), 

255 'fill_type' ('patternType', 'patterntype') 

256 'start_color' ('fgColor', 'fgcolor') 

257 'end_color' ('bgColor', 'bgcolor') 

258 or one or more of the following keys (or their synonyms). 

259 'type' ('fill_type') 

260 'degree' 

261 'left' 

262 'right' 

263 'top' 

264 'bottom' 

265 'stop' 

266 

267 Returns 

268 ------- 

269 fill : openpyxl.styles.Fill 

270 """ 

271 from openpyxl.styles import ( 

272 GradientFill, 

273 PatternFill, 

274 ) 

275 

276 _pattern_fill_key_map = { 

277 "patternType": "fill_type", 

278 "patterntype": "fill_type", 

279 "fgColor": "start_color", 

280 "fgcolor": "start_color", 

281 "bgColor": "end_color", 

282 "bgcolor": "end_color", 

283 } 

284 

285 _gradient_fill_key_map = {"fill_type": "type"} 

286 

287 pfill_kwargs = {} 

288 gfill_kwargs = {} 

289 for k, v in fill_dict.items(): 

290 pk = _pattern_fill_key_map.get(k) 

291 gk = _gradient_fill_key_map.get(k) 

292 if pk in ["start_color", "end_color"]: 

293 v = cls._convert_to_color(v) 

294 if gk == "stop": 

295 v = cls._convert_to_stop(v) 

296 if pk: 

297 pfill_kwargs[pk] = v 

298 elif gk: 

299 gfill_kwargs[gk] = v 

300 else: 

301 pfill_kwargs[k] = v 

302 gfill_kwargs[k] = v 

303 

304 try: 

305 return PatternFill(**pfill_kwargs) 

306 except TypeError: 

307 return GradientFill(**gfill_kwargs) 

308 

309 @classmethod 

310 def _convert_to_side(cls, side_spec): 

311 """ 

312 Convert ``side_spec`` to an openpyxl v2 Side object. 

313 

314 Parameters 

315 ---------- 

316 side_spec : str, dict 

317 A string specifying the border style, or a dict with zero or more 

318 of the following keys (or their synonyms). 

319 'style' ('border_style') 

320 'color' 

321 

322 Returns 

323 ------- 

324 side : openpyxl.styles.Side 

325 """ 

326 from openpyxl.styles import Side 

327 

328 _side_key_map = {"border_style": "style"} 

329 

330 if isinstance(side_spec, str): 

331 return Side(style=side_spec) 

332 

333 side_kwargs = {} 

334 for k, v in side_spec.items(): 

335 k = _side_key_map.get(k, k) 

336 if k == "color": 

337 v = cls._convert_to_color(v) 

338 side_kwargs[k] = v 

339 

340 return Side(**side_kwargs) 

341 

342 @classmethod 

343 def _convert_to_border(cls, border_dict): 

344 """ 

345 Convert ``border_dict`` to an openpyxl v2 Border object. 

346 

347 Parameters 

348 ---------- 

349 border_dict : dict 

350 A dict with zero or more of the following keys (or their synonyms). 

351 'left' 

352 'right' 

353 'top' 

354 'bottom' 

355 'diagonal' 

356 'diagonal_direction' 

357 'vertical' 

358 'horizontal' 

359 'diagonalUp' ('diagonalup') 

360 'diagonalDown' ('diagonaldown') 

361 'outline' 

362 

363 Returns 

364 ------- 

365 border : openpyxl.styles.Border 

366 """ 

367 from openpyxl.styles import Border 

368 

369 _border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"} 

370 

371 border_kwargs = {} 

372 for k, v in border_dict.items(): 

373 k = _border_key_map.get(k, k) 

374 if k == "color": 

375 v = cls._convert_to_color(v) 

376 if k in ["left", "right", "top", "bottom", "diagonal"]: 

377 v = cls._convert_to_side(v) 

378 border_kwargs[k] = v 

379 

380 return Border(**border_kwargs) 

381 

382 @classmethod 

383 def _convert_to_alignment(cls, alignment_dict): 

384 """ 

385 Convert ``alignment_dict`` to an openpyxl v2 Alignment object. 

386 

387 Parameters 

388 ---------- 

389 alignment_dict : dict 

390 A dict with zero or more of the following keys (or their synonyms). 

391 'horizontal' 

392 'vertical' 

393 'text_rotation' 

394 'wrap_text' 

395 'shrink_to_fit' 

396 'indent' 

397 Returns 

398 ------- 

399 alignment : openpyxl.styles.Alignment 

400 """ 

401 from openpyxl.styles import Alignment 

402 

403 return Alignment(**alignment_dict) 

404 

405 @classmethod 

406 def _convert_to_number_format(cls, number_format_dict): 

407 """ 

408 Convert ``number_format_dict`` to an openpyxl v2.1.0 number format 

409 initializer. 

410 

411 Parameters 

412 ---------- 

413 number_format_dict : dict 

414 A dict with zero or more of the following keys. 

415 'format_code' : str 

416 

417 Returns 

418 ------- 

419 number_format : str 

420 """ 

421 return number_format_dict["format_code"] 

422 

423 @classmethod 

424 def _convert_to_protection(cls, protection_dict): 

425 """ 

426 Convert ``protection_dict`` to an openpyxl v2 Protection object. 

427 

428 Parameters 

429 ---------- 

430 protection_dict : dict 

431 A dict with zero or more of the following keys. 

432 'locked' 

433 'hidden' 

434 

435 Returns 

436 ------- 

437 """ 

438 from openpyxl.styles import Protection 

439 

440 return Protection(**protection_dict) 

441 

442 def _write_cells( 

443 self, 

444 cells, 

445 sheet_name: str | None = None, 

446 startrow: int = 0, 

447 startcol: int = 0, 

448 freeze_panes: tuple[int, int] | None = None, 

449 ) -> None: 

450 # Write the frame cells using openpyxl. 

451 sheet_name = self._get_sheet_name(sheet_name) 

452 

453 _style_cache: dict[str, dict[str, Serialisable]] = {} 

454 

455 if sheet_name in self.sheets and self._if_sheet_exists != "new": 

456 if "r+" in self._mode: 

457 if self._if_sheet_exists == "replace": 

458 old_wks = self.sheets[sheet_name] 

459 target_index = self.book.index(old_wks) 

460 del self.book[sheet_name] 

461 wks = self.book.create_sheet(sheet_name, target_index) 

462 elif self._if_sheet_exists == "error": 

463 raise ValueError( 

464 f"Sheet '{sheet_name}' already exists and " 

465 f"if_sheet_exists is set to 'error'." 

466 ) 

467 elif self._if_sheet_exists == "overlay": 

468 wks = self.sheets[sheet_name] 

469 else: 

470 raise ValueError( 

471 f"'{self._if_sheet_exists}' is not valid for if_sheet_exists. " 

472 "Valid options are 'error', 'new', 'replace' and 'overlay'." 

473 ) 

474 else: 

475 wks = self.sheets[sheet_name] 

476 else: 

477 wks = self.book.create_sheet() 

478 wks.title = sheet_name 

479 

480 if validate_freeze_panes(freeze_panes): 

481 freeze_panes = cast(tuple[int, int], freeze_panes) 

482 wks.freeze_panes = wks.cell( 

483 row=freeze_panes[0] + 1, column=freeze_panes[1] + 1 

484 ) 

485 

486 for cell in cells: 

487 xcell = wks.cell( 

488 row=startrow + cell.row + 1, column=startcol + cell.col + 1 

489 ) 

490 xcell.value, fmt = self._value_with_fmt(cell.val) 

491 if fmt: 

492 xcell.number_format = fmt 

493 

494 style_kwargs: dict[str, Serialisable] | None = {} 

495 if cell.style: 

496 key = str(cell.style) 

497 style_kwargs = _style_cache.get(key) 

498 if style_kwargs is None: 

499 style_kwargs = self._convert_to_style_kwargs(cell.style) 

500 _style_cache[key] = style_kwargs 

501 

502 if style_kwargs: 

503 for k, v in style_kwargs.items(): 

504 setattr(xcell, k, v) 

505 

506 if cell.mergestart is not None and cell.mergeend is not None: 

507 wks.merge_cells( 

508 start_row=startrow + cell.row + 1, 

509 start_column=startcol + cell.col + 1, 

510 end_column=startcol + cell.mergeend + 1, 

511 end_row=startrow + cell.mergestart + 1, 

512 ) 

513 

514 # When cells are merged only the top-left cell is preserved 

515 # The behaviour of the other cells in a merged range is 

516 # undefined 

517 if style_kwargs: 

518 first_row = startrow + cell.row + 1 

519 last_row = startrow + cell.mergestart + 1 

520 first_col = startcol + cell.col + 1 

521 last_col = startcol + cell.mergeend + 1 

522 

523 for row in range(first_row, last_row + 1): 

524 for col in range(first_col, last_col + 1): 

525 if row == first_row and col == first_col: 

526 # Ignore first cell. It is already handled. 

527 continue 

528 xcell = wks.cell(column=col, row=row) 

529 for k, v in style_kwargs.items(): 

530 setattr(xcell, k, v) 

531 

532 

533class OpenpyxlReader(BaseExcelReader["Workbook"]): 

534 @doc(storage_options=_shared_docs["storage_options"]) 

535 def __init__( 

536 self, 

537 filepath_or_buffer: FilePath | ReadBuffer[bytes], 

538 storage_options: StorageOptions | None = None, 

539 engine_kwargs: dict | None = None, 

540 ) -> None: 

541 """ 

542 Reader using openpyxl engine. 

543 

544 Parameters 

545 ---------- 

546 filepath_or_buffer : str, path object or Workbook 

547 Object to be parsed. 

548 {storage_options} 

549 engine_kwargs : dict, optional 

550 Arbitrary keyword arguments passed to excel engine. 

551 """ 

552 import_optional_dependency("openpyxl") 

553 super().__init__( 

554 filepath_or_buffer, 

555 storage_options=storage_options, 

556 engine_kwargs=engine_kwargs, 

557 ) 

558 

559 @property 

560 def _workbook_class(self) -> type[Workbook]: 

561 from openpyxl import Workbook 

562 

563 return Workbook 

564 

565 def load_workbook( 

566 self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs 

567 ) -> Workbook: 

568 from openpyxl import load_workbook 

569 

570 default_kwargs = {"read_only": True, "data_only": True, "keep_links": False} 

571 

572 return load_workbook( 

573 filepath_or_buffer, 

574 **(default_kwargs | engine_kwargs), 

575 ) 

576 

577 @property 

578 def sheet_names(self) -> list[str]: 

579 return [sheet.title for sheet in self.book.worksheets] 

580 

581 def get_sheet_by_name(self, name: str): 

582 self.raise_if_bad_sheet_by_name(name) 

583 return self.book[name] 

584 

585 def get_sheet_by_index(self, index: int): 

586 self.raise_if_bad_sheet_by_index(index) 

587 return self.book.worksheets[index] 

588 

589 def _convert_cell(self, cell) -> Scalar: 

590 from openpyxl.cell.cell import ( 

591 TYPE_ERROR, 

592 TYPE_NUMERIC, 

593 ) 

594 

595 if cell.value is None: 

596 return "" # compat with xlrd 

597 elif cell.data_type == TYPE_ERROR: 

598 return np.nan 

599 elif cell.data_type == TYPE_NUMERIC: 

600 val = int(cell.value) 

601 if val == cell.value: 

602 return val 

603 return float(cell.value) 

604 

605 return cell.value 

606 

607 def get_sheet_data( 

608 self, sheet, file_rows_needed: int | None = None 

609 ) -> list[list[Scalar]]: 

610 if self.book.read_only: 

611 sheet.reset_dimensions() 

612 

613 data: list[list[Scalar]] = [] 

614 last_row_with_data = -1 

615 for row_number, row in enumerate(sheet.rows): 

616 converted_row = [self._convert_cell(cell) for cell in row] 

617 while converted_row and converted_row[-1] == "": 

618 # trim trailing empty elements 

619 converted_row.pop() 

620 if converted_row: 

621 last_row_with_data = row_number 

622 data.append(converted_row) 

623 if file_rows_needed is not None and len(data) >= file_rows_needed: 

624 break 

625 

626 # Trim trailing empty rows 

627 data = data[: last_row_with_data + 1] 

628 

629 if len(data) > 0: 

630 # extend rows to max width 

631 max_width = max(len(data_row) for data_row in data) 

632 if min(len(data_row) for data_row in data) < max_width: 

633 empty_cell: list[Scalar] = [""] 

634 data = [ 

635 data_row + (max_width - len(data_row)) * empty_cell 

636 for data_row in data 

637 ] 

638 

639 return data