Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/xlrd/__init_

2# This module is part of the xlrd package, which is released under a

3# BSD-style licence.

4import os

5import pprint

6import sys

7import zipfile

9from . import timemachine

10from .biffh import (

11 XL_CELL_BLANK, XL_CELL_BOOLEAN, XL_CELL_DATE, XL_CELL_EMPTY, XL_CELL_ERROR,

12 XL_CELL_NUMBER, XL_CELL_TEXT, XLRDError, biff_text_from_num,

13 error_text_from_code,

14)

15from .book import Book, colname, open_workbook_xls

16from .compdoc import SIGNATURE as XLS_SIGNATURE

17from .formula import * # is constrained by __all__

18from .info import __VERSION__, __version__

19from .sheet import empty_cell

20from .xldate import XLDateError, xldate_as_datetime, xldate_as_tuple

23#: descriptions of the file types :mod:`xlrd` can :func:`inspect <inspect_format>`.

24FILE_FORMAT_DESCRIPTIONS = {

25 'xls': 'Excel xls',

26 'xlsb': 'Excel 2007 xlsb file',

27 'xlsx': 'Excel xlsx file',

28 'ods': 'Openoffice.org ODS file',

29 'zip': 'Unknown ZIP file',

30 None: 'Unknown file type',

31}

33ZIP_SIGNATURE = b"PK\x03\x04"

35PEEK_SIZE = max(len(XLS_SIGNATURE), len(ZIP_SIGNATURE))

38def inspect_format(path=None, content=None):

39 """

40 Inspect the content at the supplied path or the :class:`bytes` content provided

41 and return the file's type as a :class:`str`, or ``None`` if it cannot

42 be determined.

44 :param path:

45 A :class:`string <str>` path containing the content to inspect.

46 ``~`` will be expanded.

48 :param content:

49 The :class:`bytes` content to inspect.

51 :returns:

52 A :class:`str`, or ``None`` if the format cannot be determined.

53 The return value can always be looked up in :data:`FILE_FORMAT_DESCRIPTIONS`

54 to return a human-readable description of the format found.

55 """

56 if content:

57 peek = content[:PEEK_SIZE]

58 else:

59 path = os.path.expanduser(path)

60 with open(path, "rb") as f:

61 peek = f.read(PEEK_SIZE)

63 if peek.startswith(XLS_SIGNATURE):

64 return 'xls'

66 if peek.startswith(ZIP_SIGNATURE):

67 zf = zipfile.ZipFile(timemachine.BYTES_IO(content) if content else path)

69 # Workaround for some third party files that use forward slashes and

70 # lower case names. We map the expected name in lowercase to the

71 # actual filename in the zip container.

72 component_names = {name.replace('\\', '/').lower(): name

73 for name in zf.namelist()}

75 if 'xl/workbook.xml' in component_names:

76 return 'xlsx'

77 if 'xl/workbook.bin' in component_names:

78 return 'xlsb'

79 if 'content.xml' in component_names:

80 return 'ods'

81 return 'zip'

84def open_workbook(filename=None,

85 logfile=sys.stdout,

86 verbosity=0,

87 use_mmap=True,

88 file_contents=None,

89 encoding_override=None,

90 formatting_info=False,

91 on_demand=False,

92 ragged_rows=False,

93 ignore_workbook_corruption=False

94 ):

95 """

96 Open a spreadsheet file for data extraction.

98 :param filename: The path to the spreadsheet file to be opened.

100 :param logfile: An open file to which messages and diagnostics are written.

101

102 :param verbosity: Increases the volume of trace material written to the

103 logfile.

104

105 :param use_mmap:

106

107 Whether to use the mmap module is determined heuristically.

108 Use this arg to override the result.

109

110 Current heuristic: mmap is used if it exists.

111

112 :param file_contents:

113

114 A string or an :class:`mmap.mmap` object or some other behave-alike

115 object. If ``file_contents`` is supplied, ``filename`` will not be used,

116 except (possibly) in messages.

117

118 :param encoding_override:

119

120 Used to overcome missing or bad codepage information

121 in older-version files. See :doc:`unicode`.

122

123 :param formatting_info:

124

125 The default is ``False``, which saves memory.

126 In this case, "Blank" cells, which are those with their own formatting

127 information but no data, are treated as empty by ignoring the file's

128 ``BLANK`` and ``MULBLANK`` records.

129 This cuts off any bottom or right "margin" of rows of empty or blank

130 cells.

131 Only :meth:`~xlrd.sheet.Sheet.cell_value` and

132 :meth:`~xlrd.sheet.Sheet.cell_type` are available.

133

134 When ``True``, formatting information will be read from the spreadsheet

135 file. This provides all cells, including empty and blank cells.

136 Formatting information is available for each cell.

137

138 Note that this will raise a NotImplementedError when used with an

139 xlsx file.

140

141 :param on_demand:

142

143 Governs whether sheets are all loaded initially or when demanded

144 by the caller. See :doc:`on_demand`.

145

146 :param ragged_rows:

147

148 The default of ``False`` means all rows are padded out with empty cells so

149 that all rows have the same size as found in

150 :attr:`~xlrd.sheet.Sheet.ncols`.

151

152 ``True`` means that there are no empty cells at the ends of rows.

153 This can result in substantial memory savings if rows are of widely

154 varying sizes. See also the :meth:`~xlrd.sheet.Sheet.row_len` method.

155

156

157 :param ignore_workbook_corruption:

158

159 This option allows to read corrupted workbooks.

160 When ``False`` you may face CompDocError: Workbook corruption.

161 When ``True`` that exception will be ignored.

162

163 :returns: An instance of the :class:`~xlrd.book.Book` class.

164 """

165

166 file_format = inspect_format(filename, file_contents)

167 # We have to let unknown file formats pass through here, as some ancient

168 # files that xlrd can parse don't start with the expected signature.

169 if file_format and file_format != 'xls':

170 raise XLRDError(FILE_FORMAT_DESCRIPTIONS[file_format]+'; not supported')

171

172 bk = open_workbook_xls(

173 filename=filename,

174 logfile=logfile,

175 verbosity=verbosity,

176 use_mmap=use_mmap,

177 file_contents=file_contents,

178 encoding_override=encoding_override,

179 formatting_info=formatting_info,

180 on_demand=on_demand,

181 ragged_rows=ragged_rows,

182 ignore_workbook_corruption=ignore_workbook_corruption,

183 )

184

185 return bk

186

187

188def dump(filename, outfile=sys.stdout, unnumbered=False):

189 """

190 For debugging: dump an XLS file's BIFF records in char & hex.

191

192 :param filename: The path to the file to be dumped.

193 :param outfile: An open file, to which the dump is written.

194 :param unnumbered: If true, omit offsets (for meaningful diffs).

195 """

196 from .biffh import biff_dump

197 bk = Book()

198 bk.biff2_8_load(filename=filename, logfile=outfile, )

199 biff_dump(bk.mem, bk.base, bk.stream_len, 0, outfile, unnumbered)

200

201

202def count_records(filename, outfile=sys.stdout):

203 """

204 For debugging and analysis: summarise the file's BIFF records.

205 ie: produce a sorted file of ``(record_name, count)``.

206

207 :param filename: The path to the file to be summarised.

208 :param outfile: An open file, to which the summary is written.

209 """

210 from .biffh import biff_count_records

211 bk = Book()

212 bk.biff2_8_load(filename=filename, logfile=outfile, )

213 biff_count_records(bk.mem, bk.base, bk.stream_len, outfile)

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/xlrd/init.py: 82%

49 statements