Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/xlrd/__init__.py: 82%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

49 statements  

1# Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd 

2# This module is part of the xlrd package, which is released under a 

3# BSD-style licence. 

4import os 

5import pprint 

6import sys 

7import zipfile 

8 

9from . import timemachine 

10from .biffh import ( 

11 XL_CELL_BLANK, XL_CELL_BOOLEAN, XL_CELL_DATE, XL_CELL_EMPTY, XL_CELL_ERROR, 

12 XL_CELL_NUMBER, XL_CELL_TEXT, XLRDError, biff_text_from_num, 

13 error_text_from_code, 

14) 

15from .book import Book, colname, open_workbook_xls 

16from .compdoc import SIGNATURE as XLS_SIGNATURE 

17from .formula import * # is constrained by __all__ 

18from .info import __VERSION__, __version__ 

19from .sheet import empty_cell 

20from .xldate import XLDateError, xldate_as_datetime, xldate_as_tuple 

21 

22 

23#: descriptions of the file types :mod:`xlrd` can :func:`inspect <inspect_format>`. 

24FILE_FORMAT_DESCRIPTIONS = { 

25 'xls': 'Excel xls', 

26 'xlsb': 'Excel 2007 xlsb file', 

27 'xlsx': 'Excel xlsx file', 

28 'ods': 'Openoffice.org ODS file', 

29 'zip': 'Unknown ZIP file', 

30 None: 'Unknown file type', 

31} 

32 

33ZIP_SIGNATURE = b"PK\x03\x04" 

34 

35PEEK_SIZE = max(len(XLS_SIGNATURE), len(ZIP_SIGNATURE)) 

36 

37 

38def inspect_format(path=None, content=None): 

39 """ 

40 Inspect the content at the supplied path or the :class:`bytes` content provided 

41 and return the file's type as a :class:`str`, or ``None`` if it cannot 

42 be determined. 

43 

44 :param path: 

45 A :class:`string <str>` path containing the content to inspect. 

46 ``~`` will be expanded. 

47 

48 :param content: 

49 The :class:`bytes` content to inspect. 

50 

51 :returns: 

52 A :class:`str`, or ``None`` if the format cannot be determined. 

53 The return value can always be looked up in :data:`FILE_FORMAT_DESCRIPTIONS` 

54 to return a human-readable description of the format found. 

55 """ 

56 if content: 

57 peek = content[:PEEK_SIZE] 

58 else: 

59 path = os.path.expanduser(path) 

60 with open(path, "rb") as f: 

61 peek = f.read(PEEK_SIZE) 

62 

63 if peek.startswith(XLS_SIGNATURE): 

64 return 'xls' 

65 

66 if peek.startswith(ZIP_SIGNATURE): 

67 zf = zipfile.ZipFile(timemachine.BYTES_IO(content) if content else path) 

68 

69 # Workaround for some third party files that use forward slashes and 

70 # lower case names. We map the expected name in lowercase to the 

71 # actual filename in the zip container. 

72 component_names = {name.replace('\\', '/').lower(): name 

73 for name in zf.namelist()} 

74 

75 if 'xl/workbook.xml' in component_names: 

76 return 'xlsx' 

77 if 'xl/workbook.bin' in component_names: 

78 return 'xlsb' 

79 if 'content.xml' in component_names: 

80 return 'ods' 

81 return 'zip' 

82 

83 

84def open_workbook(filename=None, 

85 logfile=sys.stdout, 

86 verbosity=0, 

87 use_mmap=True, 

88 file_contents=None, 

89 encoding_override=None, 

90 formatting_info=False, 

91 on_demand=False, 

92 ragged_rows=False, 

93 ignore_workbook_corruption=False 

94 ): 

95 """ 

96 Open a spreadsheet file for data extraction. 

97 

98 :param filename: The path to the spreadsheet file to be opened. 

99 

100 :param logfile: An open file to which messages and diagnostics are written. 

101 

102 :param verbosity: Increases the volume of trace material written to the 

103 logfile. 

104 

105 :param use_mmap: 

106 

107 Whether to use the mmap module is determined heuristically. 

108 Use this arg to override the result. 

109 

110 Current heuristic: mmap is used if it exists. 

111 

112 :param file_contents: 

113 

114 A string or an :class:`mmap.mmap` object or some other behave-alike 

115 object. If ``file_contents`` is supplied, ``filename`` will not be used, 

116 except (possibly) in messages. 

117 

118 :param encoding_override: 

119 

120 Used to overcome missing or bad codepage information 

121 in older-version files. See :doc:`unicode`. 

122 

123 :param formatting_info: 

124 

125 The default is ``False``, which saves memory. 

126 In this case, "Blank" cells, which are those with their own formatting 

127 information but no data, are treated as empty by ignoring the file's 

128 ``BLANK`` and ``MULBLANK`` records. 

129 This cuts off any bottom or right "margin" of rows of empty or blank 

130 cells. 

131 Only :meth:`~xlrd.sheet.Sheet.cell_value` and 

132 :meth:`~xlrd.sheet.Sheet.cell_type` are available. 

133 

134 When ``True``, formatting information will be read from the spreadsheet 

135 file. This provides all cells, including empty and blank cells. 

136 Formatting information is available for each cell. 

137 

138 Note that this will raise a NotImplementedError when used with an 

139 xlsx file. 

140 

141 :param on_demand: 

142 

143 Governs whether sheets are all loaded initially or when demanded 

144 by the caller. See :doc:`on_demand`. 

145 

146 :param ragged_rows: 

147 

148 The default of ``False`` means all rows are padded out with empty cells so 

149 that all rows have the same size as found in 

150 :attr:`~xlrd.sheet.Sheet.ncols`. 

151 

152 ``True`` means that there are no empty cells at the ends of rows. 

153 This can result in substantial memory savings if rows are of widely 

154 varying sizes. See also the :meth:`~xlrd.sheet.Sheet.row_len` method. 

155 

156 

157 :param ignore_workbook_corruption: 

158 

159 This option allows to read corrupted workbooks. 

160 When ``False`` you may face CompDocError: Workbook corruption. 

161 When ``True`` that exception will be ignored. 

162 

163 :returns: An instance of the :class:`~xlrd.book.Book` class. 

164 """ 

165 

166 file_format = inspect_format(filename, file_contents) 

167 # We have to let unknown file formats pass through here, as some ancient 

168 # files that xlrd can parse don't start with the expected signature. 

169 if file_format and file_format != 'xls': 

170 raise XLRDError(FILE_FORMAT_DESCRIPTIONS[file_format]+'; not supported') 

171 

172 bk = open_workbook_xls( 

173 filename=filename, 

174 logfile=logfile, 

175 verbosity=verbosity, 

176 use_mmap=use_mmap, 

177 file_contents=file_contents, 

178 encoding_override=encoding_override, 

179 formatting_info=formatting_info, 

180 on_demand=on_demand, 

181 ragged_rows=ragged_rows, 

182 ignore_workbook_corruption=ignore_workbook_corruption, 

183 ) 

184 

185 return bk 

186 

187 

188def dump(filename, outfile=sys.stdout, unnumbered=False): 

189 """ 

190 For debugging: dump an XLS file's BIFF records in char & hex. 

191 

192 :param filename: The path to the file to be dumped. 

193 :param outfile: An open file, to which the dump is written. 

194 :param unnumbered: If true, omit offsets (for meaningful diffs). 

195 """ 

196 from .biffh import biff_dump 

197 bk = Book() 

198 bk.biff2_8_load(filename=filename, logfile=outfile, ) 

199 biff_dump(bk.mem, bk.base, bk.stream_len, 0, outfile, unnumbered) 

200 

201 

202def count_records(filename, outfile=sys.stdout): 

203 """ 

204 For debugging and analysis: summarise the file's BIFF records. 

205 ie: produce a sorted file of ``(record_name, count)``. 

206 

207 :param filename: The path to the file to be summarised. 

208 :param outfile: An open file, to which the summary is written. 

209 """ 

210 from .biffh import biff_count_records 

211 bk = Book() 

212 bk.biff2_8_load(filename=filename, logfile=outfile, ) 

213 biff_count_records(bk.mem, bk.base, bk.stream_len, outfile)