Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/io/excel/_util.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

102 statements  

1from __future__ import annotations 

2 

3from typing import ( 

4 TYPE_CHECKING, 

5 Any, 

6 Callable, 

7 Hashable, 

8 Iterable, 

9 Literal, 

10 MutableMapping, 

11 Sequence, 

12 TypeVar, 

13 overload, 

14) 

15 

16from pandas.compat._optional import import_optional_dependency 

17 

18from pandas.core.dtypes.common import ( 

19 is_integer, 

20 is_list_like, 

21) 

22 

23if TYPE_CHECKING: 

24 from pandas.io.excel._base import ExcelWriter 

25 

26 ExcelWriter_t = type[ExcelWriter] 

27 usecols_func = TypeVar("usecols_func", bound=Callable[[Hashable], object]) 

28 

29_writers: MutableMapping[str, ExcelWriter_t] = {} 

30 

31 

32def register_writer(klass: ExcelWriter_t) -> None: 

33 """ 

34 Add engine to the excel writer registry.io.excel. 

35 

36 You must use this method to integrate with ``to_excel``. 

37 

38 Parameters 

39 ---------- 

40 klass : ExcelWriter 

41 """ 

42 if not callable(klass): 

43 raise ValueError("Can only register callables as engines") 

44 engine_name = klass._engine 

45 _writers[engine_name] = klass 

46 

47 

48def get_default_engine(ext: str, mode: Literal["reader", "writer"] = "reader") -> str: 

49 """ 

50 Return the default reader/writer for the given extension. 

51 

52 Parameters 

53 ---------- 

54 ext : str 

55 The excel file extension for which to get the default engine. 

56 mode : str {'reader', 'writer'} 

57 Whether to get the default engine for reading or writing. 

58 Either 'reader' or 'writer' 

59 

60 Returns 

61 ------- 

62 str 

63 The default engine for the extension. 

64 """ 

65 _default_readers = { 

66 "xlsx": "openpyxl", 

67 "xlsm": "openpyxl", 

68 "xlsb": "pyxlsb", 

69 "xls": "xlrd", 

70 "ods": "odf", 

71 } 

72 _default_writers = { 

73 "xlsx": "openpyxl", 

74 "xlsm": "openpyxl", 

75 "xlsb": "pyxlsb", 

76 "ods": "odf", 

77 } 

78 assert mode in ["reader", "writer"] 

79 if mode == "writer": 

80 # Prefer xlsxwriter over openpyxl if installed 

81 xlsxwriter = import_optional_dependency("xlsxwriter", errors="warn") 

82 if xlsxwriter: 

83 _default_writers["xlsx"] = "xlsxwriter" 

84 return _default_writers[ext] 

85 else: 

86 return _default_readers[ext] 

87 

88 

89def get_writer(engine_name: str) -> ExcelWriter_t: 

90 try: 

91 return _writers[engine_name] 

92 except KeyError as err: 

93 raise ValueError(f"No Excel writer '{engine_name}'") from err 

94 

95 

96def _excel2num(x: str) -> int: 

97 """ 

98 Convert Excel column name like 'AB' to 0-based column index. 

99 

100 Parameters 

101 ---------- 

102 x : str 

103 The Excel column name to convert to a 0-based column index. 

104 

105 Returns 

106 ------- 

107 num : int 

108 The column index corresponding to the name. 

109 

110 Raises 

111 ------ 

112 ValueError 

113 Part of the Excel column name was invalid. 

114 """ 

115 index = 0 

116 

117 for c in x.upper().strip(): 

118 cp = ord(c) 

119 

120 if cp < ord("A") or cp > ord("Z"): 

121 raise ValueError(f"Invalid column name: {x}") 

122 

123 index = index * 26 + cp - ord("A") + 1 

124 

125 return index - 1 

126 

127 

128def _range2cols(areas: str) -> list[int]: 

129 """ 

130 Convert comma separated list of column names and ranges to indices. 

131 

132 Parameters 

133 ---------- 

134 areas : str 

135 A string containing a sequence of column ranges (or areas). 

136 

137 Returns 

138 ------- 

139 cols : list 

140 A list of 0-based column indices. 

141 

142 Examples 

143 -------- 

144 >>> _range2cols('A:E') 

145 [0, 1, 2, 3, 4] 

146 >>> _range2cols('A,C,Z:AB') 

147 [0, 2, 25, 26, 27] 

148 """ 

149 cols: list[int] = [] 

150 

151 for rng in areas.split(","): 

152 if ":" in rng: 

153 rngs = rng.split(":") 

154 cols.extend(range(_excel2num(rngs[0]), _excel2num(rngs[1]) + 1)) 

155 else: 

156 cols.append(_excel2num(rng)) 

157 

158 return cols 

159 

160 

161@overload 

162def maybe_convert_usecols(usecols: str | list[int]) -> list[int]: 

163 ... 

164 

165 

166@overload 

167def maybe_convert_usecols(usecols: list[str]) -> list[str]: 

168 ... 

169 

170 

171@overload 

172def maybe_convert_usecols(usecols: usecols_func) -> usecols_func: 

173 ... 

174 

175 

176@overload 

177def maybe_convert_usecols(usecols: None) -> None: 

178 ... 

179 

180 

181def maybe_convert_usecols( 

182 usecols: str | list[int] | list[str] | usecols_func | None, 

183) -> None | list[int] | list[str] | usecols_func: 

184 """ 

185 Convert `usecols` into a compatible format for parsing in `parsers.py`. 

186 

187 Parameters 

188 ---------- 

189 usecols : object 

190 The use-columns object to potentially convert. 

191 

192 Returns 

193 ------- 

194 converted : object 

195 The compatible format of `usecols`. 

196 """ 

197 if usecols is None: 

198 return usecols 

199 

200 if is_integer(usecols): 

201 raise ValueError( 

202 "Passing an integer for `usecols` is no longer supported. " 

203 "Please pass in a list of int from 0 to `usecols` inclusive instead." 

204 ) 

205 

206 if isinstance(usecols, str): 

207 return _range2cols(usecols) 

208 

209 return usecols 

210 

211 

212@overload 

213def validate_freeze_panes(freeze_panes: tuple[int, int]) -> Literal[True]: 

214 ... 

215 

216 

217@overload 

218def validate_freeze_panes(freeze_panes: None) -> Literal[False]: 

219 ... 

220 

221 

222def validate_freeze_panes(freeze_panes: tuple[int, int] | None) -> bool: 

223 if freeze_panes is not None: 

224 if len(freeze_panes) == 2 and all( 

225 isinstance(item, int) for item in freeze_panes 

226 ): 

227 return True 

228 

229 raise ValueError( 

230 "freeze_panes must be of form (row, column) " 

231 "where row and column are integers" 

232 ) 

233 

234 # freeze_panes wasn't specified, return False so it won't be applied 

235 # to output sheet 

236 return False 

237 

238 

239def fill_mi_header( 

240 row: list[Hashable], control_row: list[bool] 

241) -> tuple[list[Hashable], list[bool]]: 

242 """ 

243 Forward fill blank entries in row but only inside the same parent index. 

244 

245 Used for creating headers in Multiindex. 

246 

247 Parameters 

248 ---------- 

249 row : list 

250 List of items in a single row. 

251 control_row : list of bool 

252 Helps to determine if particular column is in same parent index as the 

253 previous value. Used to stop propagation of empty cells between 

254 different indexes. 

255 

256 Returns 

257 ------- 

258 Returns changed row and control_row 

259 """ 

260 last = row[0] 

261 for i in range(1, len(row)): 

262 if not control_row[i]: 

263 last = row[i] 

264 

265 if row[i] == "" or row[i] is None: 

266 row[i] = last 

267 else: 

268 control_row[i] = False 

269 last = row[i] 

270 

271 return row, control_row 

272 

273 

274def pop_header_name( 

275 row: list[Hashable], index_col: int | Sequence[int] 

276) -> tuple[Hashable | None, list[Hashable]]: 

277 """ 

278 Pop the header name for MultiIndex parsing. 

279 

280 Parameters 

281 ---------- 

282 row : list 

283 The data row to parse for the header name. 

284 index_col : int, list 

285 The index columns for our data. Assumed to be non-null. 

286 

287 Returns 

288 ------- 

289 header_name : str 

290 The extracted header name. 

291 trimmed_row : list 

292 The original data row with the header name removed. 

293 """ 

294 # Pop out header name and fill w/blank. 

295 if is_list_like(index_col): 

296 assert isinstance(index_col, Iterable) 

297 i = max(index_col) 

298 else: 

299 assert not isinstance(index_col, Iterable) 

300 i = index_col 

301 

302 header_name = row[i] 

303 header_name = None if header_name == "" else header_name 

304 

305 return header_name, row[:i] + [""] + row[i + 1 :] 

306 

307 

308def combine_kwargs(engine_kwargs: dict[str, Any] | None, kwargs: dict) -> dict: 

309 """ 

310 Used to combine two sources of kwargs for the backend engine. 

311 

312 Use of kwargs is deprecated, this function is solely for use in 1.3 and should 

313 be removed in 1.4/2.0. Also _base.ExcelWriter.__new__ ensures either engine_kwargs 

314 or kwargs must be None or empty respectively. 

315 

316 Parameters 

317 ---------- 

318 engine_kwargs: dict 

319 kwargs to be passed through to the engine. 

320 kwargs: dict 

321 kwargs to be psased through to the engine (deprecated) 

322 

323 Returns 

324 ------- 

325 engine_kwargs combined with kwargs 

326 """ 

327 if engine_kwargs is None: 

328 result = {} 

329 else: 

330 result = engine_kwargs.copy() 

331 result.update(kwargs) 

332 return result