1from __future__ import annotations
2
3from datetime import (
4 date,
5 datetime,
6 time,
7 timedelta,
8)
9from typing import (
10 TYPE_CHECKING,
11 Any,
12 Union,
13)
14
15from pandas.compat._optional import import_optional_dependency
16from pandas.util._decorators import doc
17
18import pandas as pd
19from pandas.core.shared_docs import _shared_docs
20
21from pandas.io.excel._base import BaseExcelReader
22
23if TYPE_CHECKING:
24 from python_calamine import (
25 CalamineSheet,
26 CalamineWorkbook,
27 )
28
29 from pandas._typing import (
30 FilePath,
31 NaTType,
32 ReadBuffer,
33 Scalar,
34 StorageOptions,
35 )
36
37_CellValue = Union[int, float, str, bool, time, date, datetime, timedelta]
38
39
40class CalamineReader(BaseExcelReader["CalamineWorkbook"]):
41 @doc(storage_options=_shared_docs["storage_options"])
42 def __init__(
43 self,
44 filepath_or_buffer: FilePath | ReadBuffer[bytes],
45 storage_options: StorageOptions | None = None,
46 engine_kwargs: dict | None = None,
47 ) -> None:
48 """
49 Reader using calamine engine (xlsx/xls/xlsb/ods).
50
51 Parameters
52 ----------
53 filepath_or_buffer : str, path to be parsed or
54 an open readable stream.
55 {storage_options}
56 engine_kwargs : dict, optional
57 Arbitrary keyword arguments passed to excel engine.
58 """
59 import_optional_dependency("python_calamine")
60 super().__init__(
61 filepath_or_buffer,
62 storage_options=storage_options,
63 engine_kwargs=engine_kwargs,
64 )
65
66 @property
67 def _workbook_class(self) -> type[CalamineWorkbook]:
68 from python_calamine import CalamineWorkbook
69
70 return CalamineWorkbook
71
72 def load_workbook(
73 self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs: Any
74 ) -> CalamineWorkbook:
75 from python_calamine import load_workbook
76
77 return load_workbook(filepath_or_buffer, **engine_kwargs)
78
79 @property
80 def sheet_names(self) -> list[str]:
81 from python_calamine import SheetTypeEnum
82
83 return [
84 sheet.name
85 for sheet in self.book.sheets_metadata
86 if sheet.typ == SheetTypeEnum.WorkSheet
87 ]
88
89 def get_sheet_by_name(self, name: str) -> CalamineSheet:
90 self.raise_if_bad_sheet_by_name(name)
91 return self.book.get_sheet_by_name(name)
92
93 def get_sheet_by_index(self, index: int) -> CalamineSheet:
94 self.raise_if_bad_sheet_by_index(index)
95 return self.book.get_sheet_by_index(index)
96
97 def get_sheet_data(
98 self, sheet: CalamineSheet, file_rows_needed: int | None = None
99 ) -> list[list[Scalar | NaTType | time]]:
100 def _convert_cell(value: _CellValue) -> Scalar | NaTType | time:
101 if isinstance(value, float):
102 val = int(value)
103 if val == value:
104 return val
105 else:
106 return value
107 elif isinstance(value, date):
108 return pd.Timestamp(value)
109 elif isinstance(value, timedelta):
110 return pd.Timedelta(value)
111 elif isinstance(value, time):
112 return value
113
114 return value
115
116 rows: list[list[_CellValue]] = sheet.to_python(
117 skip_empty_area=False, nrows=file_rows_needed
118 )
119 data = [[_convert_cell(cell) for cell in row] for row in rows]
120
121 return data