Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/io/excel/_xlrd.py: 31%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

62 statements  

1from __future__ import annotations 

2 

3from datetime import time 

4 

5import numpy as np 

6 

7from pandas._typing import ( 

8 Scalar, 

9 StorageOptions, 

10) 

11from pandas.compat._optional import import_optional_dependency 

12from pandas.util._decorators import doc 

13 

14from pandas.core.shared_docs import _shared_docs 

15 

16from pandas.io.excel._base import BaseExcelReader 

17 

18 

19class XlrdReader(BaseExcelReader): 

20 @doc(storage_options=_shared_docs["storage_options"]) 

21 def __init__( 

22 self, filepath_or_buffer, storage_options: StorageOptions = None 

23 ) -> None: 

24 """ 

25 Reader using xlrd engine. 

26 

27 Parameters 

28 ---------- 

29 filepath_or_buffer : str, path object or Workbook 

30 Object to be parsed. 

31 {storage_options} 

32 """ 

33 err_msg = "Install xlrd >= 2.0.1 for xls Excel support" 

34 import_optional_dependency("xlrd", extra=err_msg) 

35 super().__init__(filepath_or_buffer, storage_options=storage_options) 

36 

37 @property 

38 def _workbook_class(self): 

39 from xlrd import Book 

40 

41 return Book 

42 

43 def load_workbook(self, filepath_or_buffer): 

44 from xlrd import open_workbook 

45 

46 if hasattr(filepath_or_buffer, "read"): 

47 data = filepath_or_buffer.read() 

48 return open_workbook(file_contents=data) 

49 else: 

50 return open_workbook(filepath_or_buffer) 

51 

52 @property 

53 def sheet_names(self): 

54 return self.book.sheet_names() 

55 

56 def get_sheet_by_name(self, name): 

57 self.raise_if_bad_sheet_by_name(name) 

58 return self.book.sheet_by_name(name) 

59 

60 def get_sheet_by_index(self, index): 

61 self.raise_if_bad_sheet_by_index(index) 

62 return self.book.sheet_by_index(index) 

63 

64 def get_sheet_data( 

65 self, sheet, file_rows_needed: int | None = None 

66 ) -> list[list[Scalar]]: 

67 from xlrd import ( 

68 XL_CELL_BOOLEAN, 

69 XL_CELL_DATE, 

70 XL_CELL_ERROR, 

71 XL_CELL_NUMBER, 

72 xldate, 

73 ) 

74 

75 epoch1904 = self.book.datemode 

76 

77 def _parse_cell(cell_contents, cell_typ): 

78 """ 

79 converts the contents of the cell into a pandas appropriate object 

80 """ 

81 if cell_typ == XL_CELL_DATE: 

82 # Use the newer xlrd datetime handling. 

83 try: 

84 cell_contents = xldate.xldate_as_datetime(cell_contents, epoch1904) 

85 except OverflowError: 

86 return cell_contents 

87 

88 # Excel doesn't distinguish between dates and time, 

89 # so we treat dates on the epoch as times only. 

90 # Also, Excel supports 1900 and 1904 epochs. 

91 year = (cell_contents.timetuple())[0:3] 

92 if (not epoch1904 and year == (1899, 12, 31)) or ( 

93 epoch1904 and year == (1904, 1, 1) 

94 ): 

95 cell_contents = time( 

96 cell_contents.hour, 

97 cell_contents.minute, 

98 cell_contents.second, 

99 cell_contents.microsecond, 

100 ) 

101 

102 elif cell_typ == XL_CELL_ERROR: 

103 cell_contents = np.nan 

104 elif cell_typ == XL_CELL_BOOLEAN: 

105 cell_contents = bool(cell_contents) 

106 elif cell_typ == XL_CELL_NUMBER: 

107 # GH5394 - Excel 'numbers' are always floats 

108 # it's a minimal perf hit and less surprising 

109 val = int(cell_contents) 

110 if val == cell_contents: 

111 cell_contents = val 

112 return cell_contents 

113 

114 data = [] 

115 

116 nrows = sheet.nrows 

117 if file_rows_needed is not None: 

118 nrows = min(nrows, file_rows_needed) 

119 for i in range(nrows): 

120 row = [ 

121 _parse_cell(value, typ) 

122 for value, typ in zip(sheet.row_values(i), sheet.row_types(i)) 

123 ] 

124 data.append(row) 

125 

126 return data