Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/xlrd/__init__.py: 82%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd
2# This module is part of the xlrd package, which is released under a
3# BSD-style licence.
4import os
5import pprint
6import sys
7import zipfile
9from . import timemachine
10from .biffh import (
11 XL_CELL_BLANK, XL_CELL_BOOLEAN, XL_CELL_DATE, XL_CELL_EMPTY, XL_CELL_ERROR,
12 XL_CELL_NUMBER, XL_CELL_TEXT, XLRDError, biff_text_from_num,
13 error_text_from_code,
14)
15from .book import Book, colname, open_workbook_xls
16from .compdoc import SIGNATURE as XLS_SIGNATURE
17from .formula import * # is constrained by __all__
18from .info import __VERSION__, __version__
19from .sheet import empty_cell
20from .xldate import XLDateError, xldate_as_datetime, xldate_as_tuple
23#: descriptions of the file types :mod:`xlrd` can :func:`inspect <inspect_format>`.
24FILE_FORMAT_DESCRIPTIONS = {
25 'xls': 'Excel xls',
26 'xlsb': 'Excel 2007 xlsb file',
27 'xlsx': 'Excel xlsx file',
28 'ods': 'Openoffice.org ODS file',
29 'zip': 'Unknown ZIP file',
30 None: 'Unknown file type',
31}
33ZIP_SIGNATURE = b"PK\x03\x04"
35PEEK_SIZE = max(len(XLS_SIGNATURE), len(ZIP_SIGNATURE))
38def inspect_format(path=None, content=None):
39 """
40 Inspect the content at the supplied path or the :class:`bytes` content provided
41 and return the file's type as a :class:`str`, or ``None`` if it cannot
42 be determined.
44 :param path:
45 A :class:`string <str>` path containing the content to inspect.
46 ``~`` will be expanded.
48 :param content:
49 The :class:`bytes` content to inspect.
51 :returns:
52 A :class:`str`, or ``None`` if the format cannot be determined.
53 The return value can always be looked up in :data:`FILE_FORMAT_DESCRIPTIONS`
54 to return a human-readable description of the format found.
55 """
56 if content:
57 peek = content[:PEEK_SIZE]
58 else:
59 path = os.path.expanduser(path)
60 with open(path, "rb") as f:
61 peek = f.read(PEEK_SIZE)
63 if peek.startswith(XLS_SIGNATURE):
64 return 'xls'
66 if peek.startswith(ZIP_SIGNATURE):
67 zf = zipfile.ZipFile(timemachine.BYTES_IO(content) if content else path)
69 # Workaround for some third party files that use forward slashes and
70 # lower case names. We map the expected name in lowercase to the
71 # actual filename in the zip container.
72 component_names = {name.replace('\\', '/').lower(): name
73 for name in zf.namelist()}
75 if 'xl/workbook.xml' in component_names:
76 return 'xlsx'
77 if 'xl/workbook.bin' in component_names:
78 return 'xlsb'
79 if 'content.xml' in component_names:
80 return 'ods'
81 return 'zip'
84def open_workbook(filename=None,
85 logfile=sys.stdout,
86 verbosity=0,
87 use_mmap=True,
88 file_contents=None,
89 encoding_override=None,
90 formatting_info=False,
91 on_demand=False,
92 ragged_rows=False,
93 ignore_workbook_corruption=False
94 ):
95 """
96 Open a spreadsheet file for data extraction.
98 :param filename: The path to the spreadsheet file to be opened.
100 :param logfile: An open file to which messages and diagnostics are written.
102 :param verbosity: Increases the volume of trace material written to the
103 logfile.
105 :param use_mmap:
107 Whether to use the mmap module is determined heuristically.
108 Use this arg to override the result.
110 Current heuristic: mmap is used if it exists.
112 :param file_contents:
114 A string or an :class:`mmap.mmap` object or some other behave-alike
115 object. If ``file_contents`` is supplied, ``filename`` will not be used,
116 except (possibly) in messages.
118 :param encoding_override:
120 Used to overcome missing or bad codepage information
121 in older-version files. See :doc:`unicode`.
123 :param formatting_info:
125 The default is ``False``, which saves memory.
126 In this case, "Blank" cells, which are those with their own formatting
127 information but no data, are treated as empty by ignoring the file's
128 ``BLANK`` and ``MULBLANK`` records.
129 This cuts off any bottom or right "margin" of rows of empty or blank
130 cells.
131 Only :meth:`~xlrd.sheet.Sheet.cell_value` and
132 :meth:`~xlrd.sheet.Sheet.cell_type` are available.
134 When ``True``, formatting information will be read from the spreadsheet
135 file. This provides all cells, including empty and blank cells.
136 Formatting information is available for each cell.
138 Note that this will raise a NotImplementedError when used with an
139 xlsx file.
141 :param on_demand:
143 Governs whether sheets are all loaded initially or when demanded
144 by the caller. See :doc:`on_demand`.
146 :param ragged_rows:
148 The default of ``False`` means all rows are padded out with empty cells so
149 that all rows have the same size as found in
150 :attr:`~xlrd.sheet.Sheet.ncols`.
152 ``True`` means that there are no empty cells at the ends of rows.
153 This can result in substantial memory savings if rows are of widely
154 varying sizes. See also the :meth:`~xlrd.sheet.Sheet.row_len` method.
157 :param ignore_workbook_corruption:
159 This option allows to read corrupted workbooks.
160 When ``False`` you may face CompDocError: Workbook corruption.
161 When ``True`` that exception will be ignored.
163 :returns: An instance of the :class:`~xlrd.book.Book` class.
164 """
166 file_format = inspect_format(filename, file_contents)
167 # We have to let unknown file formats pass through here, as some ancient
168 # files that xlrd can parse don't start with the expected signature.
169 if file_format and file_format != 'xls':
170 raise XLRDError(FILE_FORMAT_DESCRIPTIONS[file_format]+'; not supported')
172 bk = open_workbook_xls(
173 filename=filename,
174 logfile=logfile,
175 verbosity=verbosity,
176 use_mmap=use_mmap,
177 file_contents=file_contents,
178 encoding_override=encoding_override,
179 formatting_info=formatting_info,
180 on_demand=on_demand,
181 ragged_rows=ragged_rows,
182 ignore_workbook_corruption=ignore_workbook_corruption,
183 )
185 return bk
188def dump(filename, outfile=sys.stdout, unnumbered=False):
189 """
190 For debugging: dump an XLS file's BIFF records in char & hex.
192 :param filename: The path to the file to be dumped.
193 :param outfile: An open file, to which the dump is written.
194 :param unnumbered: If true, omit offsets (for meaningful diffs).
195 """
196 from .biffh import biff_dump
197 bk = Book()
198 bk.biff2_8_load(filename=filename, logfile=outfile, )
199 biff_dump(bk.mem, bk.base, bk.stream_len, 0, outfile, unnumbered)
202def count_records(filename, outfile=sys.stdout):
203 """
204 For debugging and analysis: summarise the file's BIFF records.
205 ie: produce a sorted file of ``(record_name, count)``.
207 :param filename: The path to the file to be summarised.
208 :param outfile: An open file, to which the summary is written.
209 """
210 from .biffh import biff_count_records
211 bk = Book()
212 bk.biff2_8_load(filename=filename, logfile=outfile, )
213 biff_count_records(bk.mem, bk.base, bk.stream_len, outfile)