Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/io/excel/_openpyxl.py: 22%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

242 statements  

1from __future__ import annotations 

2 

3import mmap 

4from typing import ( 

5 TYPE_CHECKING, 

6 Any, 

7 Tuple, 

8 cast, 

9) 

10 

11import numpy as np 

12 

13from pandas._typing import ( 

14 FilePath, 

15 ReadBuffer, 

16 Scalar, 

17 StorageOptions, 

18 WriteExcelBuffer, 

19) 

20from pandas.compat._optional import import_optional_dependency 

21from pandas.util._decorators import doc 

22 

23from pandas.core.shared_docs import _shared_docs 

24 

25from pandas.io.excel._base import ( 

26 BaseExcelReader, 

27 ExcelWriter, 

28) 

29from pandas.io.excel._util import ( 

30 combine_kwargs, 

31 validate_freeze_panes, 

32) 

33 

34if TYPE_CHECKING: 

35 from openpyxl.descriptors.serialisable import Serialisable 

36 from openpyxl.workbook import Workbook 

37 

38 

39class OpenpyxlWriter(ExcelWriter): 

40 _engine = "openpyxl" 

41 _supported_extensions = (".xlsx", ".xlsm") 

42 

43 def __init__( 

44 self, 

45 path: FilePath | WriteExcelBuffer | ExcelWriter, 

46 engine: str | None = None, 

47 date_format: str | None = None, 

48 datetime_format: str | None = None, 

49 mode: str = "w", 

50 storage_options: StorageOptions = None, 

51 if_sheet_exists: str | None = None, 

52 engine_kwargs: dict[str, Any] | None = None, 

53 **kwargs, 

54 ) -> None: 

55 # Use the openpyxl module as the Excel writer. 

56 from openpyxl.workbook import Workbook 

57 

58 engine_kwargs = combine_kwargs(engine_kwargs, kwargs) 

59 

60 super().__init__( 

61 path, 

62 mode=mode, 

63 storage_options=storage_options, 

64 if_sheet_exists=if_sheet_exists, 

65 engine_kwargs=engine_kwargs, 

66 ) 

67 

68 # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from 

69 # the file and later write to it 

70 if "r+" in self._mode: # Load from existing workbook 

71 from openpyxl import load_workbook 

72 

73 try: 

74 self._book = load_workbook(self._handles.handle, **engine_kwargs) 

75 except TypeError: 

76 self._handles.handle.close() 

77 raise 

78 self._handles.handle.seek(0) 

79 else: 

80 # Create workbook object with default optimized_write=True. 

81 try: 

82 self._book = Workbook(**engine_kwargs) 

83 except TypeError: 

84 self._handles.handle.close() 

85 raise 

86 

87 if self.book.worksheets: 

88 self.book.remove(self.book.worksheets[0]) 

89 

90 @property 

91 def book(self) -> Workbook: 

92 """ 

93 Book instance of class openpyxl.workbook.Workbook. 

94 

95 This attribute can be used to access engine-specific features. 

96 """ 

97 return self._book 

98 

99 @property 

100 def sheets(self) -> dict[str, Any]: 

101 """Mapping of sheet names to sheet objects.""" 

102 result = {name: self.book[name] for name in self.book.sheetnames} 

103 return result 

104 

105 def _save(self) -> None: 

106 """ 

107 Save workbook to disk. 

108 """ 

109 self.book.save(self._handles.handle) 

110 if "r+" in self._mode and not isinstance(self._handles.handle, mmap.mmap): 

111 # truncate file to the written content 

112 self._handles.handle.truncate() 

113 

114 @classmethod 

115 def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]: 

116 """ 

117 Convert a style_dict to a set of kwargs suitable for initializing 

118 or updating-on-copy an openpyxl v2 style object. 

119 

120 Parameters 

121 ---------- 

122 style_dict : dict 

123 A dict with zero or more of the following keys (or their synonyms). 

124 'font' 

125 'fill' 

126 'border' ('borders') 

127 'alignment' 

128 'number_format' 

129 'protection' 

130 

131 Returns 

132 ------- 

133 style_kwargs : dict 

134 A dict with the same, normalized keys as ``style_dict`` but each 

135 value has been replaced with a native openpyxl style object of the 

136 appropriate class. 

137 """ 

138 _style_key_map = {"borders": "border"} 

139 

140 style_kwargs: dict[str, Serialisable] = {} 

141 for k, v in style_dict.items(): 

142 k = _style_key_map.get(k, k) 

143 _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None) 

144 new_v = _conv_to_x(v) 

145 if new_v: 

146 style_kwargs[k] = new_v 

147 

148 return style_kwargs 

149 

150 @classmethod 

151 def _convert_to_color(cls, color_spec): 

152 """ 

153 Convert ``color_spec`` to an openpyxl v2 Color object. 

154 

155 Parameters 

156 ---------- 

157 color_spec : str, dict 

158 A 32-bit ARGB hex string, or a dict with zero or more of the 

159 following keys. 

160 'rgb' 

161 'indexed' 

162 'auto' 

163 'theme' 

164 'tint' 

165 'index' 

166 'type' 

167 

168 Returns 

169 ------- 

170 color : openpyxl.styles.Color 

171 """ 

172 from openpyxl.styles import Color 

173 

174 if isinstance(color_spec, str): 

175 return Color(color_spec) 

176 else: 

177 return Color(**color_spec) 

178 

179 @classmethod 

180 def _convert_to_font(cls, font_dict): 

181 """ 

182 Convert ``font_dict`` to an openpyxl v2 Font object. 

183 

184 Parameters 

185 ---------- 

186 font_dict : dict 

187 A dict with zero or more of the following keys (or their synonyms). 

188 'name' 

189 'size' ('sz') 

190 'bold' ('b') 

191 'italic' ('i') 

192 'underline' ('u') 

193 'strikethrough' ('strike') 

194 'color' 

195 'vertAlign' ('vertalign') 

196 'charset' 

197 'scheme' 

198 'family' 

199 'outline' 

200 'shadow' 

201 'condense' 

202 

203 Returns 

204 ------- 

205 font : openpyxl.styles.Font 

206 """ 

207 from openpyxl.styles import Font 

208 

209 _font_key_map = { 

210 "sz": "size", 

211 "b": "bold", 

212 "i": "italic", 

213 "u": "underline", 

214 "strike": "strikethrough", 

215 "vertalign": "vertAlign", 

216 } 

217 

218 font_kwargs = {} 

219 for k, v in font_dict.items(): 

220 k = _font_key_map.get(k, k) 

221 if k == "color": 

222 v = cls._convert_to_color(v) 

223 font_kwargs[k] = v 

224 

225 return Font(**font_kwargs) 

226 

227 @classmethod 

228 def _convert_to_stop(cls, stop_seq): 

229 """ 

230 Convert ``stop_seq`` to a list of openpyxl v2 Color objects, 

231 suitable for initializing the ``GradientFill`` ``stop`` parameter. 

232 

233 Parameters 

234 ---------- 

235 stop_seq : iterable 

236 An iterable that yields objects suitable for consumption by 

237 ``_convert_to_color``. 

238 

239 Returns 

240 ------- 

241 stop : list of openpyxl.styles.Color 

242 """ 

243 return map(cls._convert_to_color, stop_seq) 

244 

245 @classmethod 

246 def _convert_to_fill(cls, fill_dict: dict[str, Any]): 

247 """ 

248 Convert ``fill_dict`` to an openpyxl v2 Fill object. 

249 

250 Parameters 

251 ---------- 

252 fill_dict : dict 

253 A dict with one or more of the following keys (or their synonyms), 

254 'fill_type' ('patternType', 'patterntype') 

255 'start_color' ('fgColor', 'fgcolor') 

256 'end_color' ('bgColor', 'bgcolor') 

257 or one or more of the following keys (or their synonyms). 

258 'type' ('fill_type') 

259 'degree' 

260 'left' 

261 'right' 

262 'top' 

263 'bottom' 

264 'stop' 

265 

266 Returns 

267 ------- 

268 fill : openpyxl.styles.Fill 

269 """ 

270 from openpyxl.styles import ( 

271 GradientFill, 

272 PatternFill, 

273 ) 

274 

275 _pattern_fill_key_map = { 

276 "patternType": "fill_type", 

277 "patterntype": "fill_type", 

278 "fgColor": "start_color", 

279 "fgcolor": "start_color", 

280 "bgColor": "end_color", 

281 "bgcolor": "end_color", 

282 } 

283 

284 _gradient_fill_key_map = {"fill_type": "type"} 

285 

286 pfill_kwargs = {} 

287 gfill_kwargs = {} 

288 for k, v in fill_dict.items(): 

289 pk = _pattern_fill_key_map.get(k) 

290 gk = _gradient_fill_key_map.get(k) 

291 if pk in ["start_color", "end_color"]: 

292 v = cls._convert_to_color(v) 

293 if gk == "stop": 

294 v = cls._convert_to_stop(v) 

295 if pk: 

296 pfill_kwargs[pk] = v 

297 elif gk: 

298 gfill_kwargs[gk] = v 

299 else: 

300 pfill_kwargs[k] = v 

301 gfill_kwargs[k] = v 

302 

303 try: 

304 return PatternFill(**pfill_kwargs) 

305 except TypeError: 

306 return GradientFill(**gfill_kwargs) 

307 

308 @classmethod 

309 def _convert_to_side(cls, side_spec): 

310 """ 

311 Convert ``side_spec`` to an openpyxl v2 Side object. 

312 

313 Parameters 

314 ---------- 

315 side_spec : str, dict 

316 A string specifying the border style, or a dict with zero or more 

317 of the following keys (or their synonyms). 

318 'style' ('border_style') 

319 'color' 

320 

321 Returns 

322 ------- 

323 side : openpyxl.styles.Side 

324 """ 

325 from openpyxl.styles import Side 

326 

327 _side_key_map = {"border_style": "style"} 

328 

329 if isinstance(side_spec, str): 

330 return Side(style=side_spec) 

331 

332 side_kwargs = {} 

333 for k, v in side_spec.items(): 

334 k = _side_key_map.get(k, k) 

335 if k == "color": 

336 v = cls._convert_to_color(v) 

337 side_kwargs[k] = v 

338 

339 return Side(**side_kwargs) 

340 

341 @classmethod 

342 def _convert_to_border(cls, border_dict): 

343 """ 

344 Convert ``border_dict`` to an openpyxl v2 Border object. 

345 

346 Parameters 

347 ---------- 

348 border_dict : dict 

349 A dict with zero or more of the following keys (or their synonyms). 

350 'left' 

351 'right' 

352 'top' 

353 'bottom' 

354 'diagonal' 

355 'diagonal_direction' 

356 'vertical' 

357 'horizontal' 

358 'diagonalUp' ('diagonalup') 

359 'diagonalDown' ('diagonaldown') 

360 'outline' 

361 

362 Returns 

363 ------- 

364 border : openpyxl.styles.Border 

365 """ 

366 from openpyxl.styles import Border 

367 

368 _border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"} 

369 

370 border_kwargs = {} 

371 for k, v in border_dict.items(): 

372 k = _border_key_map.get(k, k) 

373 if k == "color": 

374 v = cls._convert_to_color(v) 

375 if k in ["left", "right", "top", "bottom", "diagonal"]: 

376 v = cls._convert_to_side(v) 

377 border_kwargs[k] = v 

378 

379 return Border(**border_kwargs) 

380 

381 @classmethod 

382 def _convert_to_alignment(cls, alignment_dict): 

383 """ 

384 Convert ``alignment_dict`` to an openpyxl v2 Alignment object. 

385 

386 Parameters 

387 ---------- 

388 alignment_dict : dict 

389 A dict with zero or more of the following keys (or their synonyms). 

390 'horizontal' 

391 'vertical' 

392 'text_rotation' 

393 'wrap_text' 

394 'shrink_to_fit' 

395 'indent' 

396 Returns 

397 ------- 

398 alignment : openpyxl.styles.Alignment 

399 """ 

400 from openpyxl.styles import Alignment 

401 

402 return Alignment(**alignment_dict) 

403 

404 @classmethod 

405 def _convert_to_number_format(cls, number_format_dict): 

406 """ 

407 Convert ``number_format_dict`` to an openpyxl v2.1.0 number format 

408 initializer. 

409 

410 Parameters 

411 ---------- 

412 number_format_dict : dict 

413 A dict with zero or more of the following keys. 

414 'format_code' : str 

415 

416 Returns 

417 ------- 

418 number_format : str 

419 """ 

420 return number_format_dict["format_code"] 

421 

422 @classmethod 

423 def _convert_to_protection(cls, protection_dict): 

424 """ 

425 Convert ``protection_dict`` to an openpyxl v2 Protection object. 

426 

427 Parameters 

428 ---------- 

429 protection_dict : dict 

430 A dict with zero or more of the following keys. 

431 'locked' 

432 'hidden' 

433 

434 Returns 

435 ------- 

436 """ 

437 from openpyxl.styles import Protection 

438 

439 return Protection(**protection_dict) 

440 

441 def _write_cells( 

442 self, 

443 cells, 

444 sheet_name: str | None = None, 

445 startrow: int = 0, 

446 startcol: int = 0, 

447 freeze_panes: tuple[int, int] | None = None, 

448 ) -> None: 

449 # Write the frame cells using openpyxl. 

450 sheet_name = self._get_sheet_name(sheet_name) 

451 

452 _style_cache: dict[str, dict[str, Serialisable]] = {} 

453 

454 if sheet_name in self.sheets and self._if_sheet_exists != "new": 

455 if "r+" in self._mode: 

456 if self._if_sheet_exists == "replace": 

457 old_wks = self.sheets[sheet_name] 

458 target_index = self.book.index(old_wks) 

459 del self.book[sheet_name] 

460 wks = self.book.create_sheet(sheet_name, target_index) 

461 elif self._if_sheet_exists == "error": 

462 raise ValueError( 

463 f"Sheet '{sheet_name}' already exists and " 

464 f"if_sheet_exists is set to 'error'." 

465 ) 

466 elif self._if_sheet_exists == "overlay": 

467 wks = self.sheets[sheet_name] 

468 else: 

469 raise ValueError( 

470 f"'{self._if_sheet_exists}' is not valid for if_sheet_exists. " 

471 "Valid options are 'error', 'new', 'replace' and 'overlay'." 

472 ) 

473 else: 

474 wks = self.sheets[sheet_name] 

475 else: 

476 wks = self.book.create_sheet() 

477 wks.title = sheet_name 

478 

479 if validate_freeze_panes(freeze_panes): 

480 freeze_panes = cast(Tuple[int, int], freeze_panes) 

481 wks.freeze_panes = wks.cell( 

482 row=freeze_panes[0] + 1, column=freeze_panes[1] + 1 

483 ) 

484 

485 for cell in cells: 

486 xcell = wks.cell( 

487 row=startrow + cell.row + 1, column=startcol + cell.col + 1 

488 ) 

489 xcell.value, fmt = self._value_with_fmt(cell.val) 

490 if fmt: 

491 xcell.number_format = fmt 

492 

493 style_kwargs: dict[str, Serialisable] | None = {} 

494 if cell.style: 

495 key = str(cell.style) 

496 style_kwargs = _style_cache.get(key) 

497 if style_kwargs is None: 

498 style_kwargs = self._convert_to_style_kwargs(cell.style) 

499 _style_cache[key] = style_kwargs 

500 

501 if style_kwargs: 

502 for k, v in style_kwargs.items(): 

503 setattr(xcell, k, v) 

504 

505 if cell.mergestart is not None and cell.mergeend is not None: 

506 wks.merge_cells( 

507 start_row=startrow + cell.row + 1, 

508 start_column=startcol + cell.col + 1, 

509 end_column=startcol + cell.mergeend + 1, 

510 end_row=startrow + cell.mergestart + 1, 

511 ) 

512 

513 # When cells are merged only the top-left cell is preserved 

514 # The behaviour of the other cells in a merged range is 

515 # undefined 

516 if style_kwargs: 

517 first_row = startrow + cell.row + 1 

518 last_row = startrow + cell.mergestart + 1 

519 first_col = startcol + cell.col + 1 

520 last_col = startcol + cell.mergeend + 1 

521 

522 for row in range(first_row, last_row + 1): 

523 for col in range(first_col, last_col + 1): 

524 if row == first_row and col == first_col: 

525 # Ignore first cell. It is already handled. 

526 continue 

527 xcell = wks.cell(column=col, row=row) 

528 for k, v in style_kwargs.items(): 

529 setattr(xcell, k, v) 

530 

531 

532class OpenpyxlReader(BaseExcelReader): 

533 @doc(storage_options=_shared_docs["storage_options"]) 

534 def __init__( 

535 self, 

536 filepath_or_buffer: FilePath | ReadBuffer[bytes], 

537 storage_options: StorageOptions = None, 

538 ) -> None: 

539 """ 

540 Reader using openpyxl engine. 

541 

542 Parameters 

543 ---------- 

544 filepath_or_buffer : str, path object or Workbook 

545 Object to be parsed. 

546 {storage_options} 

547 """ 

548 import_optional_dependency("openpyxl") 

549 super().__init__(filepath_or_buffer, storage_options=storage_options) 

550 

551 @property 

552 def _workbook_class(self): 

553 from openpyxl import Workbook 

554 

555 return Workbook 

556 

557 def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]): 

558 from openpyxl import load_workbook 

559 

560 return load_workbook( 

561 filepath_or_buffer, read_only=True, data_only=True, keep_links=False 

562 ) 

563 

564 @property 

565 def sheet_names(self) -> list[str]: 

566 return [sheet.title for sheet in self.book.worksheets] 

567 

568 def get_sheet_by_name(self, name: str): 

569 self.raise_if_bad_sheet_by_name(name) 

570 return self.book[name] 

571 

572 def get_sheet_by_index(self, index: int): 

573 self.raise_if_bad_sheet_by_index(index) 

574 return self.book.worksheets[index] 

575 

576 def _convert_cell(self, cell) -> Scalar: 

577 from openpyxl.cell.cell import ( 

578 TYPE_ERROR, 

579 TYPE_NUMERIC, 

580 ) 

581 

582 if cell.value is None: 

583 return "" # compat with xlrd 

584 elif cell.data_type == TYPE_ERROR: 

585 return np.nan 

586 elif cell.data_type == TYPE_NUMERIC: 

587 val = int(cell.value) 

588 if val == cell.value: 

589 return val 

590 return float(cell.value) 

591 

592 return cell.value 

593 

594 def get_sheet_data( 

595 self, sheet, file_rows_needed: int | None = None 

596 ) -> list[list[Scalar]]: 

597 if self.book.read_only: 

598 sheet.reset_dimensions() 

599 

600 data: list[list[Scalar]] = [] 

601 last_row_with_data = -1 

602 for row_number, row in enumerate(sheet.rows): 

603 converted_row = [self._convert_cell(cell) for cell in row] 

604 while converted_row and converted_row[-1] == "": 

605 # trim trailing empty elements 

606 converted_row.pop() 

607 if converted_row: 

608 last_row_with_data = row_number 

609 data.append(converted_row) 

610 if file_rows_needed is not None and len(data) >= file_rows_needed: 

611 break 

612 

613 # Trim trailing empty rows 

614 data = data[: last_row_with_data + 1] 

615 

616 if len(data) > 0: 

617 # extend rows to max width 

618 max_width = max(len(data_row) for data_row in data) 

619 if min(len(data_row) for data_row in data) < max_width: 

620 empty_cell: list[Scalar] = [""] 

621 data = [ 

622 data_row + (max_width - len(data_row)) * empty_cell 

623 for data_row in data 

624 ] 

625 

626 return data