Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/excel/

1from __future__ import annotations

3from datetime import time

4import math

5from typing import TYPE_CHECKING

7import numpy as np

9from pandas.compat._optional import import_optional_dependency

10from pandas.util._decorators import doc

12from pandas.core.shared_docs import _shared_docs

14from pandas.io.excel._base import BaseExcelReader

16if TYPE_CHECKING:

17 from xlrd import Book

19 from pandas._typing import (

20 Scalar,

21 StorageOptions,

22 )

25class XlrdReader(BaseExcelReader["Book"]):

26 @doc(storage_options=_shared_docs["storage_options"])

27 def __init__(

28 self,

29 filepath_or_buffer,

30 storage_options: StorageOptions | None = None,

31 engine_kwargs: dict | None = None,

32 ) -> None:

33 """

34 Reader using xlrd engine.

36 Parameters

37 ----------

38 filepath_or_buffer : str, path object or Workbook

39 Object to be parsed.

40 {storage_options}

41 engine_kwargs : dict, optional

42 Arbitrary keyword arguments passed to excel engine.

43 """

44 err_msg = "Install xlrd >= 2.0.1 for xls Excel support"

45 import_optional_dependency("xlrd", extra=err_msg)

46 super().__init__(

47 filepath_or_buffer,

48 storage_options=storage_options,

49 engine_kwargs=engine_kwargs,

50 )

52 @property

53 def _workbook_class(self) -> type[Book]:

54 from xlrd import Book

56 return Book

58 def load_workbook(self, filepath_or_buffer, engine_kwargs) -> Book:

59 from xlrd import open_workbook

61 if hasattr(filepath_or_buffer, "read"):

62 data = filepath_or_buffer.read()

63 return open_workbook(file_contents=data, **engine_kwargs)

64 else:

65 return open_workbook(filepath_or_buffer, **engine_kwargs)

67 @property

68 def sheet_names(self):

69 return self.book.sheet_names()

71 def get_sheet_by_name(self, name):

72 self.raise_if_bad_sheet_by_name(name)

73 return self.book.sheet_by_name(name)

75 def get_sheet_by_index(self, index):

76 self.raise_if_bad_sheet_by_index(index)

77 return self.book.sheet_by_index(index)

79 def get_sheet_data(

80 self, sheet, file_rows_needed: int | None = None

81 ) -> list[list[Scalar]]:

82 from xlrd import (

83 XL_CELL_BOOLEAN,

84 XL_CELL_DATE,

85 XL_CELL_ERROR,

86 XL_CELL_NUMBER,

87 xldate,

88 )

90 epoch1904 = self.book.datemode

92 def _parse_cell(cell_contents, cell_typ):

93 """

94 converts the contents of the cell into a pandas appropriate object

95 """

96 if cell_typ == XL_CELL_DATE:

97 # Use the newer xlrd datetime handling.

98 try:

99 cell_contents = xldate.xldate_as_datetime(cell_contents, epoch1904)

100 except OverflowError:

101 return cell_contents

102

103 # Excel doesn't distinguish between dates and time,

104 # so we treat dates on the epoch as times only.

105 # Also, Excel supports 1900 and 1904 epochs.

106 year = (cell_contents.timetuple())[0:3]

107 if (not epoch1904 and year == (1899, 12, 31)) or (

108 epoch1904 and year == (1904, 1, 1)

109 ):

110 cell_contents = time(

111 cell_contents.hour,

112 cell_contents.minute,

113 cell_contents.second,

114 cell_contents.microsecond,

115 )

116

117 elif cell_typ == XL_CELL_ERROR:

118 cell_contents = np.nan

119 elif cell_typ == XL_CELL_BOOLEAN:

120 cell_contents = bool(cell_contents)

121 elif cell_typ == XL_CELL_NUMBER:

122 # GH5394 - Excel 'numbers' are always floats

123 # it's a minimal perf hit and less surprising

124 if math.isfinite(cell_contents):

125 # GH54564 - don't attempt to convert NaN/Inf

126 val = int(cell_contents)

127 if val == cell_contents:

128 cell_contents = val

129 return cell_contents

130

131 data = []

132

133 nrows = sheet.nrows

134 if file_rows_needed is not None:

135 nrows = min(nrows, file_rows_needed)

136 for i in range(nrows):

137 row = [

138 _parse_cell(value, typ)

139 for value, typ in zip(sheet.row_values(i), sheet.row_types(i))

140 ]

141 data.append(row)

142

143 return data

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/io/excel/_xlrd.py: 31%

67 statements