1# Copyright (c) 2010-2024 openpyxl
2
3""" Read worksheets on-demand
4"""
5
6from .worksheet import Worksheet
7from openpyxl.cell.read_only import ReadOnlyCell, EMPTY_CELL
8from openpyxl.utils import get_column_letter
9
10from ._reader import WorkSheetParser
11from openpyxl.workbook.defined_name import DefinedNameDict
12
13
14def read_dimension(source):
15 parser = WorkSheetParser(source, [])
16 return parser.parse_dimensions()
17
18
19class ReadOnlyWorksheet:
20
21 _min_column = 1
22 _min_row = 1
23 _max_column = _max_row = None
24
25 # from Standard Worksheet
26 # Methods from Worksheet
27 cell = Worksheet.cell
28 iter_rows = Worksheet.iter_rows
29 values = Worksheet.values
30 rows = Worksheet.rows
31 __getitem__ = Worksheet.__getitem__
32 __iter__ = Worksheet.__iter__
33
34
35 def __init__(self, parent_workbook, title, worksheet_path, shared_strings):
36 self.parent = parent_workbook
37 self.title = title
38 self.sheet_state = 'visible'
39 self._current_row = None
40 self._worksheet_path = worksheet_path
41 self._shared_strings = shared_strings
42 self._get_size()
43 self.defined_names = DefinedNameDict()
44
45
46 def _get_size(self):
47 src = self._get_source()
48 parser = WorkSheetParser(src, [])
49 dimensions = parser.parse_dimensions()
50 src.close()
51 if dimensions is not None:
52 self._min_column, self._min_row, self._max_column, self._max_row = dimensions
53
54
55 def _get_source(self):
56 """Parse xml source on demand, must close after use"""
57 return self.parent._archive.open(self._worksheet_path)
58
59
60 def _cells_by_row(self, min_col, min_row, max_col, max_row, values_only=False):
61 """
62 The source worksheet file may have columns or rows missing.
63 Missing cells will be created.
64 """
65 filler = EMPTY_CELL
66 if values_only:
67 filler = None
68
69 max_col = max_col or self.max_column
70 max_row = max_row or self.max_row
71 empty_row = []
72 if max_col is not None:
73 empty_row = (filler,) * (max_col + 1 - min_col)
74
75 counter = min_row
76 idx = 1
77 with self._get_source() as src:
78 parser = WorkSheetParser(src,
79 self._shared_strings,
80 data_only=self.parent.data_only,
81 epoch=self.parent.epoch,
82 date_formats=self.parent._date_formats,
83 timedelta_formats=self.parent._timedelta_formats)
84
85 for idx, row in parser.parse():
86 if max_row is not None and idx > max_row:
87 break
88
89 # some rows are missing
90 for _ in range(counter, idx):
91 counter += 1
92 yield empty_row
93
94 # return cells from a row
95 if counter <= idx:
96 row = self._get_row(row, min_col, max_col, values_only)
97 counter += 1
98 yield row
99
100 if max_row is not None and max_row < idx:
101 for _ in range(counter, max_row+1):
102 yield empty_row
103
104
105 def _get_row(self, row, min_col=1, max_col=None, values_only=False):
106 """
107 Make sure a row contains always the same number of cells or values
108 """
109 if not row and not max_col: # in case someone wants to force rows where there aren't any
110 return ()
111
112 max_col = max_col or row[-1]['column']
113 row_width = max_col + 1 - min_col
114
115 new_row = [EMPTY_CELL] * row_width
116 if values_only:
117 new_row = [None] * row_width
118
119 for cell in row:
120 counter = cell['column']
121 if min_col <= counter <= max_col:
122 idx = counter - min_col # position in list of cells returned
123 new_row[idx] = cell['value']
124 if not values_only:
125 new_row[idx] = ReadOnlyCell(self, **cell)
126
127 return tuple(new_row)
128
129
130 def _get_cell(self, row, column):
131 """Cells are returned by a generator which can be empty"""
132 for row in self._cells_by_row(column, row, column, row):
133 if row:
134 return row[0]
135 return EMPTY_CELL
136
137
138 def calculate_dimension(self, force=False):
139 if not all([self.max_column, self.max_row]):
140 if force:
141 self._calculate_dimension()
142 else:
143 raise ValueError("Worksheet is unsized, use calculate_dimension(force=True)")
144 return f"{get_column_letter(self.min_column)}{self.min_row}:{get_column_letter(self.max_column)}{self.max_row}"
145
146
147 def _calculate_dimension(self):
148 """
149 Loop through all the cells to get the size of a worksheet.
150 Do this only if it is explicitly requested.
151 """
152
153 max_col = 0
154 for r in self.rows:
155 if not r:
156 continue
157 cell = r[-1]
158 max_col = max(max_col, cell.column)
159
160 self._max_row = cell.row
161 self._max_column = max_col
162
163
164 def reset_dimensions(self):
165 """
166 Remove worksheet dimensions if these are incorrect in the worksheet source.
167 NB. This probably indicates a bug in the library or application that created
168 the workbook.
169 """
170 self._max_row = self._max_column = None
171
172
173 @property
174 def min_row(self):
175 return self._min_row
176
177
178 @property
179 def max_row(self):
180 return self._max_row
181
182
183 @property
184 def min_column(self):
185 return self._min_column
186
187
188 @property
189 def max_column(self):
190 return self._max_column