Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/excel/_calamine.py: 41%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

51 statements  

1from __future__ import annotations 

2 

3from datetime import ( 

4 date, 

5 datetime, 

6 time, 

7 timedelta, 

8) 

9from typing import ( 

10 TYPE_CHECKING, 

11 Any, 

12 Union, 

13) 

14 

15from pandas.compat._optional import import_optional_dependency 

16from pandas.util._decorators import doc 

17 

18import pandas as pd 

19from pandas.core.shared_docs import _shared_docs 

20 

21from pandas.io.excel._base import BaseExcelReader 

22 

23if TYPE_CHECKING: 

24 from python_calamine import ( 

25 CalamineSheet, 

26 CalamineWorkbook, 

27 ) 

28 

29 from pandas._typing import ( 

30 FilePath, 

31 NaTType, 

32 ReadBuffer, 

33 Scalar, 

34 StorageOptions, 

35 ) 

36 

37_CellValue = Union[int, float, str, bool, time, date, datetime, timedelta] 

38 

39 

40class CalamineReader(BaseExcelReader["CalamineWorkbook"]): 

41 @doc(storage_options=_shared_docs["storage_options"]) 

42 def __init__( 

43 self, 

44 filepath_or_buffer: FilePath | ReadBuffer[bytes], 

45 storage_options: StorageOptions | None = None, 

46 engine_kwargs: dict | None = None, 

47 ) -> None: 

48 """ 

49 Reader using calamine engine (xlsx/xls/xlsb/ods). 

50 

51 Parameters 

52 ---------- 

53 filepath_or_buffer : str, path to be parsed or 

54 an open readable stream. 

55 {storage_options} 

56 engine_kwargs : dict, optional 

57 Arbitrary keyword arguments passed to excel engine. 

58 """ 

59 import_optional_dependency("python_calamine") 

60 super().__init__( 

61 filepath_or_buffer, 

62 storage_options=storage_options, 

63 engine_kwargs=engine_kwargs, 

64 ) 

65 

66 @property 

67 def _workbook_class(self) -> type[CalamineWorkbook]: 

68 from python_calamine import CalamineWorkbook 

69 

70 return CalamineWorkbook 

71 

72 def load_workbook( 

73 self, filepath_or_buffer: FilePath | ReadBuffer[bytes], engine_kwargs: Any 

74 ) -> CalamineWorkbook: 

75 from python_calamine import load_workbook 

76 

77 return load_workbook(filepath_or_buffer, **engine_kwargs) 

78 

79 @property 

80 def sheet_names(self) -> list[str]: 

81 from python_calamine import SheetTypeEnum 

82 

83 return [ 

84 sheet.name 

85 for sheet in self.book.sheets_metadata 

86 if sheet.typ == SheetTypeEnum.WorkSheet 

87 ] 

88 

89 def get_sheet_by_name(self, name: str) -> CalamineSheet: 

90 self.raise_if_bad_sheet_by_name(name) 

91 return self.book.get_sheet_by_name(name) 

92 

93 def get_sheet_by_index(self, index: int) -> CalamineSheet: 

94 self.raise_if_bad_sheet_by_index(index) 

95 return self.book.get_sheet_by_index(index) 

96 

97 def get_sheet_data( 

98 self, sheet: CalamineSheet, file_rows_needed: int | None = None 

99 ) -> list[list[Scalar | NaTType | time]]: 

100 def _convert_cell(value: _CellValue) -> Scalar | NaTType | time: 

101 if isinstance(value, float): 

102 val = int(value) 

103 if val == value: 

104 return val 

105 else: 

106 return value 

107 elif isinstance(value, date): 

108 return pd.Timestamp(value) 

109 elif isinstance(value, timedelta): 

110 return pd.Timedelta(value) 

111 elif isinstance(value, time): 

112 return value 

113 

114 return value 

115 

116 rows: list[list[_CellValue]] = sheet.to_python( 

117 skip_empty_area=False, nrows=file_rows_needed 

118 ) 

119 data = [[_convert_cell(cell) for cell in row] for row in rows] 

120 

121 return data