1# Copyright (c) 2010-2024 openpyxl
2
3"""
4Collection of utilities used within the package and also available for client code
5"""
6from functools import lru_cache
7from itertools import chain, product
8from string import ascii_uppercase, digits
9import re
10
11from .exceptions import CellCoordinatesException
12
13# constants
14COORD_RE = re.compile(r'^[$]?([A-Za-z]{1,3})[$]?(\d+)$')
15COL_RANGE = """[A-Z]{1,3}:[A-Z]{1,3}:"""
16ROW_RANGE = r"""\d+:\d+:"""
17RANGE_EXPR = r"""
18[$]?(?P<min_col>[A-Za-z]{1,3})?
19[$]?(?P<min_row>\d+)?
20(:[$]?(?P<max_col>[A-Za-z]{1,3})?
21[$]?(?P<max_row>\d+)?)?
22"""
23ABSOLUTE_RE = re.compile('^' + RANGE_EXPR +'$', re.VERBOSE)
24SHEET_TITLE = r"""
25(('(?P<quoted>([^']|'')*)')|(?P<notquoted>[^'^ ^!]*))!"""
26SHEETRANGE_RE = re.compile("""{0}(?P<cells>{1})(?=,?)""".format(
27 SHEET_TITLE, RANGE_EXPR), re.VERBOSE)
28
29
30def get_column_interval(start, end):
31 """
32 Given the start and end columns, return all the columns in the series.
33
34 The start and end columns can be either column letters or 1-based
35 indexes.
36 """
37 if isinstance(start, str):
38 start = column_index_from_string(start)
39 if isinstance(end, str):
40 end = column_index_from_string(end)
41 return [get_column_letter(x) for x in range(start, end + 1)]
42
43
44def coordinate_from_string(coord_string):
45 """Convert a coordinate string like 'B12' to a tuple ('B', 12)"""
46 match = COORD_RE.match(coord_string)
47 if not match:
48 msg = f"Invalid cell coordinates ({coord_string})"
49 raise CellCoordinatesException(msg)
50 column, row = match.groups()
51 row = int(row)
52 if not row:
53 msg = f"There is no row 0 ({coord_string})"
54 raise CellCoordinatesException(msg)
55 return column, row
56
57
58def absolute_coordinate(coord_string):
59 """Convert a coordinate to an absolute coordinate string (B12 -> $B$12)"""
60 m = ABSOLUTE_RE.match(coord_string)
61 if not m:
62 raise ValueError(f"{coord_string} is not a valid coordinate range")
63
64 d = m.groupdict('')
65 for k, v in d.items():
66 if v:
67 d[k] = f"${v}"
68
69 if d['max_col'] or d['max_row']:
70 fmt = "{min_col}{min_row}:{max_col}{max_row}"
71 else:
72 fmt = "{min_col}{min_row}"
73 return fmt.format(**d)
74
75
76__decimal_to_alpha = [""] + list(ascii_uppercase)
77
78@lru_cache(maxsize=None)
79def get_column_letter(col_idx):
80 """
81 Convert decimal column position to its ASCII (base 26) form.
82
83 Because column indices are 1-based, strides are actually pow(26, n) + 26
84 Hence, a correction is applied between pow(26, n) and pow(26, 2) + 26 to
85 prevent and additional column letter being prepended
86
87 "A" == 1 == pow(26, 0)
88 "Z" == 26 == pow(26, 0) + 26 // decimal equivalent 10
89 "AA" == 27 == pow(26, 1) + 1
90 "ZZ" == 702 == pow(26, 2) + 26 // decimal equivalent 100
91 """
92
93 if not 1 <= col_idx <= 18278:
94 raise ValueError("Invalid column index {0}".format(col_idx))
95
96 result = []
97
98 if col_idx < 26:
99 return __decimal_to_alpha[col_idx]
100
101 while col_idx:
102 col_idx, remainder = divmod(col_idx, 26)
103 result.insert(0, __decimal_to_alpha[remainder])
104 if not remainder:
105 col_idx -= 1
106 result.insert(0, "Z")
107
108 return "".join(result)
109
110
111__alpha_to_decimal = {letter:pos for pos, letter in enumerate(ascii_uppercase, 1)}
112__powers = (1, 26, 676)
113
114@lru_cache(maxsize=None)
115def column_index_from_string(col):
116 """
117 Convert ASCII column name (base 26) to decimal with 1-based index
118
119 Characters represent descending multiples of powers of 26
120
121 "AFZ" == 26 * pow(26, 0) + 6 * pow(26, 1) + 1 * pow(26, 2)
122 """
123 error_msg = f"'{col}' is not a valid column name. Column names are from A to ZZZ"
124 if len(col) > 3:
125 raise ValueError(error_msg)
126 idx = 0
127 col = reversed(col.upper())
128 for letter, power in zip(col, __powers):
129 try:
130 pos = __alpha_to_decimal[letter]
131 except KeyError:
132 raise ValueError(error_msg)
133 idx += pos * power
134 if not 0 < idx < 18279:
135 raise ValueError(error_msg)
136 return idx
137
138
139def range_boundaries(range_string):
140 """
141 Convert a range string into a tuple of boundaries:
142 (min_col, min_row, max_col, max_row)
143 Cell coordinates will be converted into a range with the cell at both end
144 """
145 msg = "{0} is not a valid coordinate or range".format(range_string)
146 m = ABSOLUTE_RE.match(range_string)
147 if not m:
148 raise ValueError(msg)
149
150 min_col, min_row, sep, max_col, max_row = m.groups()
151
152 if sep:
153 cols = min_col, max_col
154 rows = min_row, max_row
155
156 if not (
157 all(cols + rows) or
158 all(cols) and not any(rows) or
159 all(rows) and not any(cols)
160 ):
161 raise ValueError(msg)
162
163 if min_col is not None:
164 min_col = column_index_from_string(min_col)
165
166 if min_row is not None:
167 min_row = int(min_row)
168
169 if max_col is not None:
170 max_col = column_index_from_string(max_col)
171 else:
172 max_col = min_col
173
174 if max_row is not None:
175 max_row = int(max_row)
176 else:
177 max_row = min_row
178
179 return min_col, min_row, max_col, max_row
180
181
182def rows_from_range(range_string):
183 """
184 Get individual addresses for every cell in a range.
185 Yields one row at a time.
186 """
187 min_col, min_row, max_col, max_row = range_boundaries(range_string)
188 rows = range(min_row, max_row + 1)
189 cols = [get_column_letter(col) for col in range(min_col, max_col + 1)]
190 for row in rows:
191 yield tuple('{0}{1}'.format(col, row) for col in cols)
192
193
194def cols_from_range(range_string):
195 """
196 Get individual addresses for every cell in a range.
197 Yields one row at a time.
198 """
199 min_col, min_row, max_col, max_row = range_boundaries(range_string)
200 rows = range(min_row, max_row+1)
201 cols = (get_column_letter(col) for col in range(min_col, max_col+1))
202 for col in cols:
203 yield tuple('{0}{1}'.format(col, row) for row in rows)
204
205
206def coordinate_to_tuple(coordinate):
207 """
208 Convert an Excel style coordinate to (row, column) tuple
209 """
210 for idx, c in enumerate(coordinate):
211 if c in digits:
212 break
213 col = coordinate[:idx]
214 row = coordinate[idx:]
215 return int(row), column_index_from_string(col)
216
217
218def range_to_tuple(range_string):
219 """
220 Convert a worksheet range to the sheetname and maximum and minimum
221 coordinate indices
222 """
223 m = SHEETRANGE_RE.match(range_string)
224 if m is None:
225 raise ValueError("Value must be of the form sheetname!A1:E4")
226 sheetname = m.group("quoted") or m.group("notquoted")
227 cells = m.group("cells")
228 boundaries = range_boundaries(cells)
229 return sheetname, boundaries
230
231
232def quote_sheetname(sheetname):
233 """
234 Add quotes around sheetnames if they contain spaces.
235 """
236 if "'" in sheetname:
237 sheetname = sheetname.replace("'", "''")
238
239 sheetname = u"'{0}'".format(sheetname)
240 return sheetname