Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/xlrd/book.py: 58%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd
2# This module is part of the xlrd package, which is released under a
3# BSD-style licence.
5from __future__ import print_function
7import struct
9from . import compdoc, formatting, sheet
10from .biffh import *
11from .formula import *
12from .timemachine import *
14try:
15 from time import perf_counter
16except ImportError:
17 # Python 2.7
18 from time import clock as perf_counter
20from struct import unpack
22empty_cell = sheet.empty_cell # for exposure to the world ...
24DEBUG = 0
26import mmap
28MY_EOF = 0xF00BAAA # not a 16-bit number
30SUPBOOK_UNK, SUPBOOK_INTERNAL, SUPBOOK_EXTERNAL, SUPBOOK_ADDIN, SUPBOOK_DDEOLE = range(5)
32SUPPORTED_VERSIONS = (80, 70, 50, 45, 40, 30, 21, 20)
34_code_from_builtin_name = {
35 "Consolidate_Area": "\x00",
36 "Auto_Open": "\x01",
37 "Auto_Close": "\x02",
38 "Extract": "\x03",
39 "Database": "\x04",
40 "Criteria": "\x05",
41 "Print_Area": "\x06",
42 "Print_Titles": "\x07",
43 "Recorder": "\x08",
44 "Data_Form": "\x09",
45 "Auto_Activate": "\x0A",
46 "Auto_Deactivate": "\x0B",
47 "Sheet_Title": "\x0C",
48 "_FilterDatabase": "\x0D",
49}
50builtin_name_from_code = {}
51code_from_builtin_name = {}
52for _bin, _bic in _code_from_builtin_name.items():
53 _bin = UNICODE_LITERAL(_bin)
54 _bic = UNICODE_LITERAL(_bic)
55 code_from_builtin_name[_bin] = _bic
56 builtin_name_from_code[_bic] = _bin
57del _bin, _bic, _code_from_builtin_name
59def open_workbook_xls(filename=None,
60 logfile=sys.stdout, verbosity=0, use_mmap=True,
61 file_contents=None,
62 encoding_override=None,
63 formatting_info=False, on_demand=False, ragged_rows=False,
64 ignore_workbook_corruption=False):
65 t0 = perf_counter()
66 bk = Book()
67 try:
68 bk.biff2_8_load(
69 filename=filename, file_contents=file_contents,
70 logfile=logfile, verbosity=verbosity, use_mmap=use_mmap,
71 encoding_override=encoding_override,
72 formatting_info=formatting_info,
73 on_demand=on_demand,
74 ragged_rows=ragged_rows,
75 ignore_workbook_corruption=ignore_workbook_corruption
76 )
77 t1 = perf_counter()
78 bk.load_time_stage_1 = t1 - t0
79 biff_version = bk.getbof(XL_WORKBOOK_GLOBALS)
80 if not biff_version:
81 raise XLRDError("Can't determine file's BIFF version")
82 if biff_version not in SUPPORTED_VERSIONS:
83 raise XLRDError(
84 "BIFF version %s is not supported"
85 % biff_text_from_num[biff_version]
86 )
87 bk.biff_version = biff_version
88 if biff_version <= 40:
89 # no workbook globals, only 1 worksheet
90 if on_demand:
91 fprintf(bk.logfile,
92 "*** WARNING: on_demand is not supported for this Excel version.\n"
93 "*** Setting on_demand to False.\n")
94 bk.on_demand = on_demand = False
95 bk.fake_globals_get_sheet()
96 elif biff_version == 45:
97 # worksheet(s) embedded in global stream
98 bk.parse_globals()
99 if on_demand:
100 fprintf(bk.logfile, "*** WARNING: on_demand is not supported for this Excel version.\n"
101 "*** Setting on_demand to False.\n")
102 bk.on_demand = on_demand = False
103 else:
104 bk.parse_globals()
105 bk._sheet_list = [None for sh in bk._sheet_names]
106 if not on_demand:
107 bk.get_sheets()
108 bk.nsheets = len(bk._sheet_list)
109 if biff_version == 45 and bk.nsheets > 1:
110 fprintf(
111 bk.logfile,
112 "*** WARNING: Excel 4.0 workbook (.XLW) file contains %d worksheets.\n"
113 "*** Book-level data will be that of the last worksheet.\n",
114 bk.nsheets
115 )
116 t2 = perf_counter()
117 bk.load_time_stage_2 = t2 - t1
118 except:
119 bk.release_resources()
120 raise
121 # normal exit
122 if not on_demand:
123 bk.release_resources()
124 return bk
127class Name(BaseObject):
128 """
129 Information relating to a named reference, formula, macro, etc.
131 .. note::
133 Name information is **not** extracted from files older than
134 Excel 5.0 (``Book.biff_version < 50``)
135 """
136 _repr_these = ['stack']
137 book = None # parent
139 #: 0 = Visible; 1 = Hidden
140 hidden = 0
142 #: 0 = Command macro; 1 = Function macro. Relevant only if macro == 1
143 func = 0
145 #: 0 = Sheet macro; 1 = VisualBasic macro. Relevant only if macro == 1
146 vbasic = 0
148 #: 0 = Standard name; 1 = Macro name
149 macro = 0
151 #: 0 = Simple formula; 1 = Complex formula (array formula or user defined).
152 #:
153 #: .. note:: No examples have been sighted.
154 complex = 0
156 #: 0 = User-defined name; 1 = Built-in name
157 #:
158 #: Common examples: ``Print_Area``, ``Print_Titles``; see OOo docs for
159 #: full list
160 builtin = 0
162 #: Function group. Relevant only if macro == 1; see OOo docs for values.
163 funcgroup = 0
165 #: 0 = Formula definition; 1 = Binary data
166 #:
167 #: .. note:: No examples have been sighted.
168 binary = 0
170 #: The index of this object in book.name_obj_list
171 name_index = 0
173 # A Unicode string. If builtin, decoded as per OOo docs.
174 name = UNICODE_LITERAL("")
176 #: An 8-bit string.
177 raw_formula = b''
179 #: ``-1``:
180 #: The name is global (visible in all calculation sheets).
181 #: ``-2``:
182 #: The name belongs to a macro sheet or VBA sheet.
183 #: ``-3``:
184 #: The name is invalid.
185 #: ``0 <= scope < book.nsheets``:
186 #: The name is local to the sheet whose index is scope.
187 scope = -1
189 #: The result of evaluating the formula, if any.
190 #: If no formula, or evaluation of the formula encountered problems,
191 #: the result is ``None``. Otherwise the result is a single instance of the
192 #: :class:`~xlrd.formula.Operand` class.
193 #
194 result = None
196 def cell(self):
197 """
198 This is a convenience method for the frequent use case where the name
199 refers to a single cell.
201 :returns: An instance of the :class:`~xlrd.sheet.Cell` class.
203 :raises xlrd.biffh.XLRDError:
204 The name is not a constant absolute reference
205 to a single cell.
206 """
207 res = self.result
208 if res:
209 # result should be an instance of the Operand class
210 kind = res.kind
211 value = res.value
212 if kind == oREF and len(value) == 1:
213 ref3d = value[0]
214 if (0 <= ref3d.shtxlo == ref3d.shtxhi - 1 and
215 ref3d.rowxlo == ref3d.rowxhi - 1 and
216 ref3d.colxlo == ref3d.colxhi - 1):
217 sh = self.book.sheet_by_index(ref3d.shtxlo)
218 return sh.cell(ref3d.rowxlo, ref3d.colxlo)
219 self.dump(
220 self.book.logfile,
221 header="=== Dump of Name object ===",
222 footer="======= End of dump =======",
223 )
224 raise XLRDError("Not a constant absolute reference to a single cell")
226 def area2d(self, clipped=True):
227 """
228 This is a convenience method for the use case where the name
229 refers to one rectangular area in one worksheet.
231 :param clipped:
232 If ``True``, the default, the returned rectangle is clipped
233 to fit in ``(0, sheet.nrows, 0, sheet.ncols)``.
234 it is guaranteed that ``0 <= rowxlo <= rowxhi <= sheet.nrows`` and
235 that the number of usable rows in the area (which may be zero) is
236 ``rowxhi - rowxlo``; likewise for columns.
238 :returns: a tuple ``(sheet_object, rowxlo, rowxhi, colxlo, colxhi)``.
240 :raises xlrd.biffh.XLRDError:
241 The name is not a constant absolute reference
242 to a single area in a single sheet.
243 """
244 res = self.result
245 if res:
246 # result should be an instance of the Operand class
247 kind = res.kind
248 value = res.value
249 if kind == oREF and len(value) == 1: # only 1 reference
250 ref3d = value[0]
251 if 0 <= ref3d.shtxlo == ref3d.shtxhi - 1: # only 1 usable sheet
252 sh = self.book.sheet_by_index(ref3d.shtxlo)
253 if not clipped:
254 return sh, ref3d.rowxlo, ref3d.rowxhi, ref3d.colxlo, ref3d.colxhi
255 rowxlo = min(ref3d.rowxlo, sh.nrows)
256 rowxhi = max(rowxlo, min(ref3d.rowxhi, sh.nrows))
257 colxlo = min(ref3d.colxlo, sh.ncols)
258 colxhi = max(colxlo, min(ref3d.colxhi, sh.ncols))
259 assert 0 <= rowxlo <= rowxhi <= sh.nrows
260 assert 0 <= colxlo <= colxhi <= sh.ncols
261 return sh, rowxlo, rowxhi, colxlo, colxhi
262 self.dump(
263 self.book.logfile,
264 header="=== Dump of Name object ===",
265 footer="======= End of dump =======",
266 )
267 raise XLRDError("Not a constant absolute reference to a single area in a single sheet")
270class Book(BaseObject):
271 """
272 Contents of a "workbook".
274 .. warning::
276 You should not instantiate this class yourself. You use the :class:`Book`
277 object that was returned when you called :func:`~xlrd.open_workbook`.
278 """
280 #: The number of worksheets present in the workbook file.
281 #: This information is available even when no sheets have yet been loaded.
282 nsheets = 0
284 #: Which date system was in force when this file was last saved.
285 #:
286 #: 0:
287 #: 1900 system (the Excel for Windows default).
288 #:
289 #: 1:
290 #: 1904 system (the Excel for Macintosh default).
291 #:
292 #: Defaults to 0 in case it's not specified in the file.
293 datemode = 0
295 #: Version of BIFF (Binary Interchange File Format) used to create the file.
296 #: Latest is 8.0 (represented here as 80), introduced with Excel 97.
297 #: Earliest supported by this module: 2.0 (represented as 20).
298 biff_version = 0
300 #: List containing a :class:`Name` object for each ``NAME`` record in the
301 #: workbook.
302 #:
303 #: .. versionadded:: 0.6.0
304 name_obj_list = []
306 #: An integer denoting the character set used for strings in this file.
307 #: For BIFF 8 and later, this will be 1200, meaning Unicode;
308 #: more precisely, UTF_16_LE.
309 #: For earlier versions, this is used to derive the appropriate Python
310 #: encoding to be used to convert to Unicode.
311 #: Examples: ``1252 -> 'cp1252'``, ``10000 -> 'mac_roman'``
312 codepage = None
314 #: The encoding that was derived from the codepage.
315 encoding = None
317 #: A tuple containing the telephone country code for:
318 #:
319 #: ``[0]``:
320 #: the user-interface setting when the file was created.
321 #:
322 #: ``[1]``:
323 #: the regional settings.
324 #:
325 #: Example: ``(1, 61)`` meaning ``(USA, Australia)``.
326 #:
327 #: This information may give a clue to the correct encoding for an
328 #: unknown codepage. For a long list of observed values, refer to the
329 #: OpenOffice.org documentation for the ``COUNTRY`` record.
330 countries = (0, 0)
332 #: What (if anything) is recorded as the name of the last user to
333 #: save the file.
334 user_name = UNICODE_LITERAL('')
336 #: A list of :class:`~xlrd.formatting.Font` class instances,
337 #: each corresponding to a FONT record.
338 #:
339 #: .. versionadded:: 0.6.1
340 font_list = []
342 #: A list of :class:`~xlrd.formatting.XF` class instances,
343 #: each corresponding to an ``XF`` record.
344 #:
345 #: .. versionadded:: 0.6.1
346 xf_list = []
348 #: A list of :class:`~xlrd.formatting.Format` objects, each corresponding to
349 #: a ``FORMAT`` record, in the order that they appear in the input file.
350 #: It does *not* contain builtin formats.
351 #:
352 #: If you are creating an output file using (for example) :mod:`xlwt`,
353 #: use this list.
354 #:
355 #: The collection to be used for all visual rendering purposes is
356 #: :attr:`format_map`.
357 #:
358 #: .. versionadded:: 0.6.1
359 format_list = []
361 ##
362 #: The mapping from :attr:`~xlrd.formatting.XF.format_key` to
363 #: :class:`~xlrd.formatting.Format` object.
364 #:
365 #: .. versionadded:: 0.6.1
366 format_map = {}
368 #: This provides access via name to the extended format information for
369 #: both built-in styles and user-defined styles.
370 #:
371 #: It maps ``name`` to ``(built_in, xf_index)``, where
372 #: ``name`` is either the name of a user-defined style,
373 #: or the name of one of the built-in styles. Known built-in names are
374 #: Normal, RowLevel_1 to RowLevel_7,
375 #: ColLevel_1 to ColLevel_7, Comma, Currency, Percent, "Comma [0]",
376 #: "Currency [0]", Hyperlink, and "Followed Hyperlink".
377 #:
378 #: ``built_in`` has the following meanings
379 #:
380 #: 1:
381 #: built-in style
382 #:
383 #: 0:
384 #: user-defined
385 #:
386 #: ``xf_index`` is an index into :attr:`Book.xf_list`.
387 #:
388 #: References: OOo docs s6.99 (``STYLE`` record); Excel UI Format/Style
389 #:
390 #: .. versionadded:: 0.6.1
391 #:
392 #: Extracted only if ``open_workbook(..., formatting_info=True)``
393 #:
394 #: .. versionadded:: 0.7.4
395 style_name_map = {}
397 #: This provides definitions for colour indexes. Please refer to
398 #: :ref:`palette` for an explanation
399 #: of how colours are represented in Excel.
400 #:
401 #: Colour indexes into the palette map into ``(red, green, blue)`` tuples.
402 #: "Magic" indexes e.g. ``0x7FFF`` map to ``None``.
403 #:
404 #: :attr:`colour_map` is what you need if you want to render cells on screen
405 #: or in a PDF file. If you are writing an output XLS file, use
406 #: :attr:`palette_record`.
407 #:
408 #: .. note:: Extracted only if ``open_workbook(..., formatting_info=True)``
409 #:
410 #: .. versionadded:: 0.6.1
411 colour_map = {}
413 #: If the user has changed any of the colours in the standard palette, the
414 #: XLS file will contain a ``PALETTE`` record with 56 (16 for Excel 4.0 and
415 #: earlier) RGB values in it, and this list will be e.g.
416 #: ``[(r0, b0, g0), ..., (r55, b55, g55)]``.
417 #: Otherwise this list will be empty. This is what you need if you are
418 #: writing an output XLS file. If you want to render cells on screen or in a
419 #: PDF file, use :attr:`colour_map`.
420 #:
421 #: .. note:: Extracted only if ``open_workbook(..., formatting_info=True)``
422 #:
423 #: .. versionadded:: 0.6.1
424 palette_record = []
426 #: Time in seconds to extract the XLS image as a contiguous string
427 #: (or mmap equivalent).
428 load_time_stage_1 = -1.0
430 #: Time in seconds to parse the data from the contiguous string
431 #: (or mmap equivalent).
432 load_time_stage_2 = -1.0
434 def sheets(self):
435 """
436 :returns: A list of all sheets in the book.
438 All sheets not already loaded will be loaded.
439 """
440 for sheetx in xrange(self.nsheets):
441 if not self._sheet_list[sheetx]:
442 self.get_sheet(sheetx)
443 return self._sheet_list[:]
445 def sheet_by_index(self, sheetx):
446 """
447 :param sheetx: Sheet index in ``range(nsheets)``
448 :returns: A :class:`~xlrd.sheet.Sheet`.
449 """
450 return self._sheet_list[sheetx] or self.get_sheet(sheetx)
452 def __iter__(self):
453 """
454 Makes iteration through sheets of a book a little more straightforward.
455 Don't free resources after use since it can be called like `list(book)`
456 """
457 for i in range(self.nsheets):
458 yield self.sheet_by_index(i)
460 def sheet_by_name(self, sheet_name):
461 """
462 :param sheet_name: Name of the sheet required.
463 :returns: A :class:`~xlrd.sheet.Sheet`.
464 """
465 try:
466 sheetx = self._sheet_names.index(sheet_name)
467 except ValueError:
468 raise XLRDError('No sheet named <%r>' % sheet_name)
469 return self.sheet_by_index(sheetx)
471 def __getitem__(self, item):
472 """
473 Allow indexing with sheet name or index.
474 :param item: Name or index of sheet enquired upon
475 :return: :class:`~xlrd.sheet.Sheet`.
476 """
477 if isinstance(item, int):
478 return self.sheet_by_index(item)
479 else:
480 return self.sheet_by_name(item)
482 def sheet_names(self):
483 """
484 :returns:
485 A list of the names of all the worksheets in the workbook file.
486 This information is available even when no sheets have yet been
487 loaded.
488 """
489 return self._sheet_names[:]
491 def sheet_loaded(self, sheet_name_or_index):
492 """
493 :param sheet_name_or_index: Name or index of sheet enquired upon
494 :returns: ``True`` if sheet is loaded, ``False`` otherwise.
496 .. versionadded:: 0.7.1
497 """
498 if isinstance(sheet_name_or_index, int):
499 sheetx = sheet_name_or_index
500 else:
501 try:
502 sheetx = self._sheet_names.index(sheet_name_or_index)
503 except ValueError:
504 raise XLRDError('No sheet named <%r>' % sheet_name_or_index)
505 return bool(self._sheet_list[sheetx])
507 def unload_sheet(self, sheet_name_or_index):
508 """
509 :param sheet_name_or_index: Name or index of sheet to be unloaded.
511 .. versionadded:: 0.7.1
512 """
513 if isinstance(sheet_name_or_index, int):
514 sheetx = sheet_name_or_index
515 else:
516 try:
517 sheetx = self._sheet_names.index(sheet_name_or_index)
518 except ValueError:
519 raise XLRDError('No sheet named <%r>' % sheet_name_or_index)
520 self._sheet_list[sheetx] = None
522 def release_resources(self):
523 """
524 This method has a dual purpose. You can call it to release
525 memory-consuming objects and (possibly) a memory-mapped file
526 (:class:`mmap.mmap` object) when you have finished loading sheets in
527 ``on_demand`` mode, but still require the :class:`Book` object to
528 examine the loaded sheets. It is also called automatically (a) when
529 :func:`~xlrd.open_workbook`
530 raises an exception and (b) if you are using a ``with`` statement, when
531 the ``with`` block is exited. Calling this method multiple times on the
532 same object has no ill effect.
533 """
534 self._resources_released = 1
535 if hasattr(self.mem, "close"):
536 # must be a mmap.mmap object
537 self.mem.close()
538 self.mem = None
539 if hasattr(self.filestr, "close"):
540 self.filestr.close()
541 self.filestr = None
542 self._sharedstrings = None
543 self._rich_text_runlist_map = None
545 def __enter__(self):
546 return self
548 def __exit__(self, exc_type, exc_value, exc_tb):
549 self.release_resources()
550 # return false
552 #: A mapping from ``(lower_case_name, scope)`` to a single :class:`Name`
553 #: object.
554 #:
555 #: .. versionadded:: 0.6.0
556 name_and_scope_map = {}
558 #: A mapping from `lower_case_name` to a list of :class:`Name` objects.
559 #: The list is sorted in scope order. Typically there will be one item
560 #: (of global scope) in the list.
561 #:
562 #: .. versionadded:: 0.6.0
563 name_map = {}
565 def __init__(self):
566 self._sheet_list = []
567 self._sheet_names = []
568 self._sheet_visibility = [] # from BOUNDSHEET record
569 self.nsheets = 0
570 self._sh_abs_posn = [] # sheet's absolute position in the stream
571 self._sharedstrings = []
572 self._rich_text_runlist_map = {}
573 self.raw_user_name = False
574 self._sheethdr_count = 0 # BIFF 4W only
575 self.builtinfmtcount = -1 # unknown as yet. BIFF 3, 4S, 4W
576 self.initialise_format_info()
577 self._all_sheets_count = 0 # includes macro & VBA sheets
578 self._supbook_count = 0
579 self._supbook_locals_inx = None
580 self._supbook_addins_inx = None
581 self._all_sheets_map = [] # maps an all_sheets index to a calc-sheets index (or -1)
582 self._externsheet_info = []
583 self._externsheet_type_b57 = []
584 self._extnsht_name_from_num = {}
585 self._sheet_num_from_name = {}
586 self._extnsht_count = 0
587 self._supbook_types = []
588 self._resources_released = 0
589 self.addin_func_names = []
590 self.name_obj_list = []
591 self.colour_map = {}
592 self.palette_record = []
593 self.xf_list = []
594 self.style_name_map = {}
595 self.mem = b''
596 self.filestr = b''
598 def biff2_8_load(self, filename=None, file_contents=None,
599 logfile=sys.stdout, verbosity=0, use_mmap=True,
600 encoding_override=None,
601 formatting_info=False,
602 on_demand=False,
603 ragged_rows=False,
604 ignore_workbook_corruption=False
605 ):
606 # DEBUG = 0
607 self.logfile = logfile
608 self.verbosity = verbosity
609 self.use_mmap = use_mmap
610 self.encoding_override = encoding_override
611 self.formatting_info = formatting_info
612 self.on_demand = on_demand
613 self.ragged_rows = ragged_rows
615 if not file_contents:
616 with open(filename, "rb") as f:
617 f.seek(0, 2) # EOF
618 size = f.tell()
619 f.seek(0, 0) # BOF
620 if size == 0:
621 raise XLRDError("File size is 0 bytes")
622 if self.use_mmap:
623 self.filestr = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)
624 self.stream_len = size
625 else:
626 self.filestr = f.read()
627 self.stream_len = len(self.filestr)
628 else:
629 self.filestr = file_contents
630 self.stream_len = len(file_contents)
632 self.base = 0
633 if self.filestr[:8] != compdoc.SIGNATURE:
634 # got this one at the antique store
635 self.mem = self.filestr
636 else:
637 cd = compdoc.CompDoc(self.filestr, logfile=self.logfile,
638 ignore_workbook_corruption=ignore_workbook_corruption)
639 for qname in ['Workbook', 'Book']:
640 self.mem, self.base, self.stream_len = \
641 cd.locate_named_stream(UNICODE_LITERAL(qname))
642 if self.mem:
643 break
644 else:
645 raise XLRDError("Can't find workbook in OLE2 compound document")
646 del cd
647 if self.mem is not self.filestr:
648 if hasattr(self.filestr, "close"):
649 self.filestr.close()
650 self.filestr = b''
651 self._position = self.base
652 if DEBUG:
653 print("mem: %s, base: %d, len: %d" % (type(self.mem), self.base, self.stream_len), file=self.logfile)
655 def initialise_format_info(self):
656 # needs to be done once per sheet for BIFF 4W :-(
657 self.format_map = {}
658 self.format_list = []
659 self.xfcount = 0
660 self.actualfmtcount = 0 # number of FORMAT records seen so far
661 self._xf_index_to_xl_type_map = {0: XL_CELL_NUMBER}
662 self._xf_epilogue_done = 0
663 self.xf_list = []
664 self.font_list = []
666 def get2bytes(self):
667 pos = self._position
668 buff_two = self.mem[pos:pos+2]
669 lenbuff = len(buff_two)
670 self._position += lenbuff
671 if lenbuff < 2:
672 return MY_EOF
673 lo, hi = buff_two
674 return (BYTES_ORD(hi) << 8) | BYTES_ORD(lo)
676 def get_record_parts(self):
677 pos = self._position
678 mem = self.mem
679 code, length = unpack('<HH', mem[pos:pos+4])
680 pos += 4
681 data = mem[pos:pos+length]
682 self._position = pos + length
683 return (code, length, data)
685 def get_record_parts_conditional(self, reqd_record):
686 pos = self._position
687 mem = self.mem
688 code, length = unpack('<HH', mem[pos:pos+4])
689 if code != reqd_record:
690 return (None, 0, b'')
691 pos += 4
692 data = mem[pos:pos+length]
693 self._position = pos + length
694 return (code, length, data)
696 def get_sheet(self, sh_number, update_pos=True):
697 if self._resources_released:
698 raise XLRDError("Can't load sheets after releasing resources.")
699 if update_pos:
700 self._position = self._sh_abs_posn[sh_number]
701 self.getbof(XL_WORKSHEET)
702 # assert biff_version == self.biff_version ### FAILS
703 # Have an example where book is v7 but sheet reports v8!!!
704 # It appears to work OK if the sheet version is ignored.
705 # Confirmed by Daniel Rentz: happens when Excel does "save as"
706 # creating an old version file; ignore version details on sheet BOF.
707 sh = sheet.Sheet(
708 self,
709 self._position,
710 self._sheet_names[sh_number],
711 sh_number,
712 )
713 sh.read(self)
714 self._sheet_list[sh_number] = sh
715 return sh
717 def get_sheets(self):
718 # DEBUG = 0
719 if DEBUG: print("GET_SHEETS:", self._sheet_names, self._sh_abs_posn, file=self.logfile)
720 for sheetno in xrange(len(self._sheet_names)):
721 if DEBUG: print("GET_SHEETS: sheetno =", sheetno, self._sheet_names, self._sh_abs_posn, file=self.logfile)
722 self.get_sheet(sheetno)
724 def fake_globals_get_sheet(self): # for BIFF 4.0 and earlier
725 formatting.initialise_book(self)
726 fake_sheet_name = UNICODE_LITERAL('Sheet 1')
727 self._sheet_names = [fake_sheet_name]
728 self._sh_abs_posn = [0]
729 self._sheet_visibility = [0] # one sheet, visible
730 self._sheet_list.append(None) # get_sheet updates _sheet_list but needs a None beforehand
731 self.get_sheets()
733 def handle_boundsheet(self, data):
734 # DEBUG = 1
735 bv = self.biff_version
736 self.derive_encoding()
737 if DEBUG:
738 fprintf(self.logfile, "BOUNDSHEET: bv=%d data %r\n", bv, data)
739 if bv == 45: # BIFF4W
740 #### Not documented in OOo docs ...
741 # In fact, the *only* data is the name of the sheet.
742 sheet_name = unpack_string(data, 0, self.encoding, lenlen=1)
743 visibility = 0
744 sheet_type = XL_BOUNDSHEET_WORKSHEET # guess, patch later
745 if len(self._sh_abs_posn) == 0:
746 abs_posn = self._sheetsoffset + self.base
747 # Note (a) this won't be used
748 # (b) it's the position of the SHEETHDR record
749 # (c) add 11 to get to the worksheet BOF record
750 else:
751 abs_posn = -1 # unknown
752 else:
753 offset, visibility, sheet_type = unpack('<iBB', data[0:6])
754 abs_posn = offset + self.base # because global BOF is always at posn 0 in the stream
755 if bv < BIFF_FIRST_UNICODE:
756 sheet_name = unpack_string(data, 6, self.encoding, lenlen=1)
757 else:
758 sheet_name = unpack_unicode(data, 6, lenlen=1)
760 if DEBUG or self.verbosity >= 2:
761 fprintf(self.logfile,
762 "BOUNDSHEET: inx=%d vis=%r sheet_name=%r abs_posn=%d sheet_type=0x%02x\n",
763 self._all_sheets_count, visibility, sheet_name, abs_posn, sheet_type)
764 self._all_sheets_count += 1
765 if sheet_type != XL_BOUNDSHEET_WORKSHEET:
766 self._all_sheets_map.append(-1)
767 descr = {
768 1: 'Macro sheet',
769 2: 'Chart',
770 6: 'Visual Basic module',
771 }.get(sheet_type, 'UNKNOWN')
773 if DEBUG or self.verbosity >= 1:
774 fprintf(self.logfile,
775 "NOTE *** Ignoring non-worksheet data named %r (type 0x%02x = %s)\n",
776 sheet_name, sheet_type, descr)
777 else:
778 snum = len(self._sheet_names)
779 self._all_sheets_map.append(snum)
780 self._sheet_names.append(sheet_name)
781 self._sh_abs_posn.append(abs_posn)
782 self._sheet_visibility.append(visibility)
783 self._sheet_num_from_name[sheet_name] = snum
785 def handle_builtinfmtcount(self, data):
786 ### N.B. This count appears to be utterly useless.
787 # DEBUG = 1
788 builtinfmtcount = unpack('<H', data[0:2])[0]
789 if DEBUG: fprintf(self.logfile, "BUILTINFMTCOUNT: %r\n", builtinfmtcount)
790 self.builtinfmtcount = builtinfmtcount
792 def derive_encoding(self):
793 if self.encoding_override:
794 self.encoding = self.encoding_override
795 elif self.codepage is None:
796 if self.biff_version < 80:
797 fprintf(self.logfile,
798 "*** No CODEPAGE record, no encoding_override: will use 'iso-8859-1'\n")
799 self.encoding = 'iso-8859-1'
800 else:
801 self.codepage = 1200 # utf16le
802 if self.verbosity >= 2:
803 fprintf(self.logfile, "*** No CODEPAGE record; assuming 1200 (utf_16_le)\n")
804 else:
805 codepage = self.codepage
806 if codepage in encoding_from_codepage:
807 encoding = encoding_from_codepage[codepage]
808 elif 300 <= codepage <= 1999:
809 encoding = 'cp' + str(codepage)
810 elif self.biff_version >= 80:
811 self.codepage = 1200
812 encoding = 'utf_16_le'
813 else:
814 encoding = 'unknown_codepage_' + str(codepage)
815 if DEBUG or (self.verbosity and encoding != self.encoding) :
816 fprintf(self.logfile, "CODEPAGE: codepage %r -> encoding %r\n", codepage, encoding)
817 self.encoding = encoding
818 if self.codepage != 1200: # utf_16_le
819 # If we don't have a codec that can decode ASCII into Unicode,
820 # we're well & truly stuffed -- let the punter know ASAP.
821 try:
822 unicode(b'trial', self.encoding)
823 except BaseException as e:
824 fprintf(self.logfile,
825 "ERROR *** codepage %r -> encoding %r -> %s: %s\n",
826 self.codepage, self.encoding, type(e).__name__.split(".")[-1], e)
827 raise
828 if self.raw_user_name:
829 strg = unpack_string(self.user_name, 0, self.encoding, lenlen=1)
830 strg = strg.rstrip()
831 # if DEBUG:
832 # print "CODEPAGE: user name decoded from %r to %r" % (self.user_name, strg)
833 self.user_name = strg
834 self.raw_user_name = False
835 return self.encoding
837 def handle_codepage(self, data):
838 # DEBUG = 0
839 codepage = unpack('<H', data[0:2])[0]
840 self.codepage = codepage
841 self.derive_encoding()
843 def handle_country(self, data):
844 countries = unpack('<HH', data[0:4])
845 if self.verbosity: print("Countries:", countries, file=self.logfile)
846 # Note: in BIFF7 and earlier, country record was put (redundantly?) in each worksheet.
847 assert self.countries == (0, 0) or self.countries == countries
848 self.countries = countries
850 def handle_datemode(self, data):
851 datemode = unpack('<H', data[0:2])[0]
852 if DEBUG or self.verbosity:
853 fprintf(self.logfile, "DATEMODE: datemode %r\n", datemode)
854 assert datemode in (0, 1)
855 self.datemode = datemode
857 def handle_externname(self, data):
858 blah = DEBUG or self.verbosity >= 2
859 if self.biff_version >= 80:
860 option_flags, other_info =unpack("<HI", data[:6])
861 pos = 6
862 name, pos = unpack_unicode_update_pos(data, pos, lenlen=1)
863 extra = data[pos:]
864 if self._supbook_types[-1] == SUPBOOK_ADDIN:
865 self.addin_func_names.append(name)
866 if blah:
867 fprintf(self.logfile,
868 "EXTERNNAME: sbktype=%d oflags=0x%04x oinfo=0x%08x name=%r extra=%r\n",
869 self._supbook_types[-1], option_flags, other_info, name, extra)
871 def handle_externsheet(self, data):
872 self.derive_encoding() # in case CODEPAGE record missing/out of order/wrong
873 self._extnsht_count += 1 # for use as a 1-based index
874 blah1 = DEBUG or self.verbosity >= 1
875 blah2 = DEBUG or self.verbosity >= 2
876 if self.biff_version >= 80:
877 num_refs = unpack("<H", data[0:2])[0]
878 bytes_reqd = num_refs * 6 + 2
879 while len(data) < bytes_reqd:
880 if blah1:
881 fprintf(
882 self.logfile,
883 "INFO: EXTERNSHEET needs %d bytes, have %d\n",
884 bytes_reqd, len(data),
885 )
886 code2, length2, data2 = self.get_record_parts()
887 if code2 != XL_CONTINUE:
888 raise XLRDError("Missing CONTINUE after EXTERNSHEET record")
889 data += data2
890 pos = 2
891 for k in xrange(num_refs):
892 info = unpack("<HHH", data[pos:pos+6])
893 ref_recordx, ref_first_sheetx, ref_last_sheetx = info
894 self._externsheet_info.append(info)
895 pos += 6
896 if blah2:
897 fprintf(
898 self.logfile,
899 "EXTERNSHEET(b8): k = %2d, record = %2d, first_sheet = %5d, last sheet = %5d\n",
900 k, ref_recordx, ref_first_sheetx, ref_last_sheetx,
901 )
902 else:
903 nc, ty = unpack("<BB", data[:2])
904 if blah2:
905 print("EXTERNSHEET(b7-):", file=self.logfile)
906 hex_char_dump(data, 0, len(data), fout=self.logfile)
907 msg = {
908 1: "Encoded URL",
909 2: "Current sheet!!",
910 3: "Specific sheet in own doc't",
911 4: "Nonspecific sheet in own doc't!!",
912 }.get(ty, "Not encoded")
913 print(" %3d chars, type is %d (%s)" % (nc, ty, msg), file=self.logfile)
914 if ty == 3:
915 sheet_name = unicode(data[2:nc+2], self.encoding)
916 self._extnsht_name_from_num[self._extnsht_count] = sheet_name
917 if blah2: print(self._extnsht_name_from_num, file=self.logfile)
918 if not (1 <= ty <= 4):
919 ty = 0
920 self._externsheet_type_b57.append(ty)
922 def handle_filepass(self, data):
923 if self.verbosity >= 2:
924 logf = self.logfile
925 fprintf(logf, "FILEPASS:\n")
926 hex_char_dump(data, 0, len(data), base=0, fout=logf)
927 if self.biff_version >= 80:
928 kind1, = unpack('<H', data[:2])
929 if kind1 == 0: # weak XOR encryption
930 key, hash_value = unpack('<HH', data[2:])
931 fprintf(logf,
932 'weak XOR: key=0x%04x hash=0x%04x\n',
933 key, hash_value)
934 elif kind1 == 1:
935 kind2, = unpack('<H', data[4:6])
936 if kind2 == 1: # BIFF8 standard encryption
937 caption = "BIFF8 std"
938 elif kind2 == 2:
939 caption = "BIFF8 strong"
940 else:
941 caption = "** UNKNOWN ENCRYPTION METHOD **"
942 fprintf(logf, "%s\n", caption)
943 raise XLRDError("Workbook is encrypted")
945 def handle_name(self, data):
946 blah = DEBUG or self.verbosity >= 2
947 bv = self.biff_version
948 if bv < 50:
949 return
950 self.derive_encoding()
951 # print
952 # hex_char_dump(data, 0, len(data), fout=self.logfile)
953 (
954 option_flags, kb_shortcut, name_len, fmla_len, extsht_index, sheet_index,
955 menu_text_len, description_text_len, help_topic_text_len, status_bar_text_len,
956 ) = unpack("<HBBHHH4B", data[0:14])
957 nobj = Name()
958 nobj.book = self ### CIRCULAR ###
959 name_index = len(self.name_obj_list)
960 nobj.name_index = name_index
961 self.name_obj_list.append(nobj)
962 nobj.option_flags = option_flags
963 attrs = [
964 ('hidden', 1, 0),
965 ('func', 2, 1),
966 ('vbasic', 4, 2),
967 ('macro', 8, 3),
968 ('complex', 0x10, 4),
969 ('builtin', 0x20, 5),
970 ('funcgroup', 0xFC0, 6),
971 ('binary', 0x1000, 12),
972 ]
973 for attr, mask, nshift in attrs:
974 setattr(nobj, attr, (option_flags & mask) >> nshift)
976 macro_flag = " M"[nobj.macro]
977 if bv < 80:
978 internal_name, pos = unpack_string_update_pos(data, 14, self.encoding, known_len=name_len)
979 else:
980 internal_name, pos = unpack_unicode_update_pos(data, 14, known_len=name_len)
981 nobj.extn_sheet_num = extsht_index
982 nobj.excel_sheet_index = sheet_index
983 nobj.scope = None # patched up in the names_epilogue() method
984 if blah:
985 fprintf(
986 self.logfile,
987 "NAME[%d]:%s oflags=%d, name_len=%d, fmla_len=%d, extsht_index=%d, sheet_index=%d, name=%r\n",
988 name_index, macro_flag, option_flags, name_len,
989 fmla_len, extsht_index, sheet_index, internal_name)
990 name = internal_name
991 if nobj.builtin:
992 name = builtin_name_from_code.get(name, "??Unknown??")
993 if blah: print(" builtin: %s" % name, file=self.logfile)
994 nobj.name = name
995 nobj.raw_formula = data[pos:]
996 nobj.basic_formula_len = fmla_len
997 nobj.evaluated = 0
998 if blah:
999 nobj.dump(
1000 self.logfile,
1001 header="--- handle_name: name[%d] ---" % name_index,
1002 footer="-------------------",
1003 )
1005 def names_epilogue(self):
1006 blah = self.verbosity >= 2
1007 f = self.logfile
1008 if blah:
1009 print("+++++ names_epilogue +++++", file=f)
1010 print("_all_sheets_map", REPR(self._all_sheets_map), file=f)
1011 print("_extnsht_name_from_num", REPR(self._extnsht_name_from_num), file=f)
1012 print("_sheet_num_from_name", REPR(self._sheet_num_from_name), file=f)
1013 num_names = len(self.name_obj_list)
1014 for namex in range(num_names):
1015 nobj = self.name_obj_list[namex]
1016 # Convert from excel_sheet_index to scope.
1017 # This is done here because in BIFF7 and earlier, the
1018 # BOUNDSHEET records (from which _all_sheets_map is derived)
1019 # come after the NAME records.
1020 if self.biff_version >= 80:
1021 sheet_index = nobj.excel_sheet_index
1022 if sheet_index == 0:
1023 intl_sheet_index = -1 # global
1024 elif 1 <= sheet_index <= len(self._all_sheets_map):
1025 intl_sheet_index = self._all_sheets_map[sheet_index-1]
1026 if intl_sheet_index == -1: # maps to a macro or VBA sheet
1027 intl_sheet_index = -2 # valid sheet reference but not useful
1028 else:
1029 # huh?
1030 intl_sheet_index = -3 # invalid
1031 elif 50 <= self.biff_version <= 70:
1032 sheet_index = nobj.extn_sheet_num
1033 if sheet_index == 0:
1034 intl_sheet_index = -1 # global
1035 else:
1036 sheet_name = self._extnsht_name_from_num[sheet_index]
1037 intl_sheet_index = self._sheet_num_from_name.get(sheet_name, -2)
1038 nobj.scope = intl_sheet_index
1040 for namex in range(num_names):
1041 nobj = self.name_obj_list[namex]
1042 # Parse the formula ...
1043 if nobj.macro or nobj.binary: continue
1044 if nobj.evaluated: continue
1045 evaluate_name_formula(self, nobj, namex, blah=blah)
1047 if self.verbosity >= 2:
1048 print("---------- name object dump ----------", file=f)
1049 for namex in range(num_names):
1050 nobj = self.name_obj_list[namex]
1051 nobj.dump(f, header="--- name[%d] ---" % namex)
1052 print("--------------------------------------", file=f)
1053 #
1054 # Build some dicts for access to the name objects
1055 #
1056 name_and_scope_map = {} # (name.lower(), scope): Name_object
1057 name_map = {} # name.lower() : list of Name_objects (sorted in scope order)
1058 for namex in range(num_names):
1059 nobj = self.name_obj_list[namex]
1060 name_lcase = nobj.name.lower()
1061 key = (name_lcase, nobj.scope)
1062 if key in name_and_scope_map and self.verbosity:
1063 fprintf(f, 'Duplicate entry %r in name_and_scope_map\n', key)
1064 name_and_scope_map[key] = nobj
1065 sort_data = (nobj.scope, namex, nobj)
1066 # namex (a temp unique ID) ensures the Name objects will not
1067 # be compared (fatal in py3)
1068 if name_lcase in name_map:
1069 name_map[name_lcase].append(sort_data)
1070 else:
1071 name_map[name_lcase] = [sort_data]
1072 for key in name_map.keys():
1073 alist = name_map[key]
1074 alist.sort()
1075 name_map[key] = [x[2] for x in alist]
1076 self.name_and_scope_map = name_and_scope_map
1077 self.name_map = name_map
1079 def handle_obj(self, data):
1080 # Not doing much handling at all.
1081 # Worrying about embedded (BOF ... EOF) substreams is done elsewhere.
1082 # DEBUG = 1
1083 obj_type, obj_id = unpack('<HI', data[4:10])
1084 # if DEBUG: print "---> handle_obj type=%d id=0x%08x" % (obj_type, obj_id)
1086 def handle_supbook(self, data):
1087 # aka EXTERNALBOOK in OOo docs
1088 self._supbook_types.append(None)
1089 blah = DEBUG or self.verbosity >= 2
1090 if blah:
1091 print("SUPBOOK:", file=self.logfile)
1092 hex_char_dump(data, 0, len(data), fout=self.logfile)
1093 num_sheets = unpack("<H", data[0:2])[0]
1094 if blah: print("num_sheets = %d" % num_sheets, file=self.logfile)
1095 sbn = self._supbook_count
1096 self._supbook_count += 1
1097 if data[2:4] == b"\x01\x04":
1098 self._supbook_types[-1] = SUPBOOK_INTERNAL
1099 self._supbook_locals_inx = self._supbook_count - 1
1100 if blah:
1101 print("SUPBOOK[%d]: internal 3D refs; %d sheets" % (sbn, num_sheets), file=self.logfile)
1102 print(" _all_sheets_map", self._all_sheets_map, file=self.logfile)
1103 return
1104 if data[0:4] == b"\x01\x00\x01\x3A":
1105 self._supbook_types[-1] = SUPBOOK_ADDIN
1106 self._supbook_addins_inx = self._supbook_count - 1
1107 if blah: print("SUPBOOK[%d]: add-in functions" % sbn, file=self.logfile)
1108 return
1109 url, pos = unpack_unicode_update_pos(data, 2, lenlen=2)
1110 if num_sheets == 0:
1111 self._supbook_types[-1] = SUPBOOK_DDEOLE
1112 if blah: fprintf(self.logfile, "SUPBOOK[%d]: DDE/OLE document = %r\n", sbn, url)
1113 return
1114 self._supbook_types[-1] = SUPBOOK_EXTERNAL
1115 if blah: fprintf(self.logfile, "SUPBOOK[%d]: url = %r\n", sbn, url)
1116 sheet_names = []
1117 for x in range(num_sheets):
1118 try:
1119 shname, pos = unpack_unicode_update_pos(data, pos, lenlen=2)
1120 except struct.error:
1121 # #### FIX ME ####
1122 # Should implement handling of CONTINUE record(s) ...
1123 if self.verbosity:
1124 print(
1125 "*** WARNING: unpack failure in sheet %d of %d in SUPBOOK record for file %r"
1126 % (x, num_sheets, url),
1127 file=self.logfile,
1128 )
1129 break
1130 sheet_names.append(shname)
1131 if blah: fprintf(self.logfile, " sheetx=%d namelen=%d name=%r (next pos=%d)\n", x, len(shname), shname, pos)
1133 def handle_sheethdr(self, data):
1134 # This a BIFF 4W special.
1135 # The SHEETHDR record is followed by a (BOF ... EOF) substream containing
1136 # a worksheet.
1137 # DEBUG = 1
1138 self.derive_encoding()
1139 sheet_len = unpack('<i', data[:4])[0]
1140 sheet_name = unpack_string(data, 4, self.encoding, lenlen=1)
1141 sheetno = self._sheethdr_count
1142 assert sheet_name == self._sheet_names[sheetno]
1143 self._sheethdr_count += 1
1144 BOF_posn = self._position
1145 posn = BOF_posn - 4 - len(data)
1146 if DEBUG: fprintf(self.logfile, 'SHEETHDR %d at posn %d: len=%d name=%r\n', sheetno, posn, sheet_len, sheet_name)
1147 self.initialise_format_info()
1148 if DEBUG: print('SHEETHDR: xf epilogue flag is %d' % self._xf_epilogue_done, file=self.logfile)
1149 self._sheet_list.append(None) # get_sheet updates _sheet_list but needs a None beforehand
1150 self.get_sheet(sheetno, update_pos=False)
1151 if DEBUG: print('SHEETHDR: posn after get_sheet() =', self._position, file=self.logfile)
1152 self._position = BOF_posn + sheet_len
1154 def handle_sheetsoffset(self, data):
1155 # DEBUG = 0
1156 posn = unpack('<i', data)[0]
1157 if DEBUG: print('SHEETSOFFSET:', posn, file=self.logfile)
1158 self._sheetsoffset = posn
1160 def handle_sst(self, data):
1161 # DEBUG = 1
1162 if DEBUG:
1163 print("SST Processing", file=self.logfile)
1164 t0 = perf_counter()
1165 nbt = len(data)
1166 strlist = [data]
1167 uniquestrings = unpack('<i', data[4:8])[0]
1168 if DEBUG or self.verbosity >= 2:
1169 fprintf(self.logfile, "SST: unique strings: %d\n", uniquestrings)
1170 while 1:
1171 code, nb, data = self.get_record_parts_conditional(XL_CONTINUE)
1172 if code is None:
1173 break
1174 nbt += nb
1175 if DEBUG >= 2:
1176 fprintf(self.logfile, "CONTINUE: adding %d bytes to SST -> %d\n", nb, nbt)
1177 strlist.append(data)
1178 self._sharedstrings, rt_runlist = unpack_SST_table(strlist, uniquestrings)
1179 if self.formatting_info:
1180 self._rich_text_runlist_map = rt_runlist
1181 if DEBUG:
1182 t1 = perf_counter()
1183 print("SST processing took %.2f seconds" % (t1 - t0, ), file=self.logfile)
1185 def handle_writeaccess(self, data):
1186 DEBUG = 0
1187 if self.biff_version < 80:
1188 if not self.encoding:
1189 self.raw_user_name = True
1190 self.user_name = data
1191 return
1192 strg = unpack_string(data, 0, self.encoding, lenlen=1)
1193 else:
1194 try:
1195 strg = unpack_unicode(data, 0, lenlen=2)
1196 except UnicodeDecodeError:
1197 # may have invalid trailing characters
1198 strg = unpack_unicode(data.strip(), 0, lenlen=2)
1199 if DEBUG: fprintf(self.logfile, "WRITEACCESS: %d bytes; raw=%s %r\n", len(data), self.raw_user_name, strg)
1200 strg = strg.rstrip()
1201 self.user_name = strg
1203 def parse_globals(self):
1204 # DEBUG = 0
1205 # no need to position, just start reading (after the BOF)
1206 formatting.initialise_book(self)
1207 while 1:
1208 rc, length, data = self.get_record_parts()
1209 if DEBUG: print("parse_globals: record code is 0x%04x" % rc, file=self.logfile)
1210 if rc == XL_SST:
1211 self.handle_sst(data)
1212 elif rc == XL_FONT or rc == XL_FONT_B3B4:
1213 self.handle_font(data)
1214 elif rc == XL_FORMAT: # XL_FORMAT2 is BIFF <= 3.0, can't appear in globals
1215 self.handle_format(data)
1216 elif rc == XL_XF:
1217 self.handle_xf(data)
1218 elif rc == XL_BOUNDSHEET:
1219 self.handle_boundsheet(data)
1220 elif rc == XL_DATEMODE:
1221 self.handle_datemode(data)
1222 elif rc == XL_CODEPAGE:
1223 self.handle_codepage(data)
1224 elif rc == XL_COUNTRY:
1225 self.handle_country(data)
1226 elif rc == XL_EXTERNNAME:
1227 self.handle_externname(data)
1228 elif rc == XL_EXTERNSHEET:
1229 self.handle_externsheet(data)
1230 elif rc == XL_FILEPASS:
1231 self.handle_filepass(data)
1232 elif rc == XL_WRITEACCESS:
1233 self.handle_writeaccess(data)
1234 elif rc == XL_SHEETSOFFSET:
1235 self.handle_sheetsoffset(data)
1236 elif rc == XL_SHEETHDR:
1237 self.handle_sheethdr(data)
1238 elif rc == XL_SUPBOOK:
1239 self.handle_supbook(data)
1240 elif rc == XL_NAME:
1241 self.handle_name(data)
1242 elif rc == XL_PALETTE:
1243 self.handle_palette(data)
1244 elif rc == XL_STYLE:
1245 self.handle_style(data)
1246 elif rc & 0xff == 9 and self.verbosity:
1247 fprintf(self.logfile, "*** Unexpected BOF at posn %d: 0x%04x len=%d data=%r\n",
1248 self._position - length - 4, rc, length, data)
1249 elif rc == XL_EOF:
1250 self.xf_epilogue()
1251 self.names_epilogue()
1252 self.palette_epilogue()
1253 if not self.encoding:
1254 self.derive_encoding()
1255 if self.biff_version == 45:
1256 # DEBUG = 0
1257 if DEBUG: print("global EOF: position", self._position, file=self.logfile)
1258 # if DEBUG:
1259 # pos = self._position - 4
1260 # print repr(self.mem[pos:pos+40])
1261 return
1262 else:
1263 # if DEBUG:
1264 # print >> self.logfile, "parse_globals: ignoring record code 0x%04x" % rc
1265 pass
1267 def read(self, pos, length):
1268 data = self.mem[pos:pos+length]
1269 self._position = pos + len(data)
1270 return data
1272 def getbof(self, rqd_stream):
1273 # DEBUG = 1
1274 # if DEBUG: print >> self.logfile, "getbof(): position", self._position
1275 if DEBUG: print("reqd: 0x%04x" % rqd_stream, file=self.logfile)
1277 def bof_error(msg):
1278 raise XLRDError('Unsupported format, or corrupt file: ' + msg)
1279 savpos = self._position
1280 opcode = self.get2bytes()
1281 if opcode == MY_EOF:
1282 bof_error('Expected BOF record; met end of file')
1283 if opcode not in bofcodes:
1284 bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8])
1285 length = self.get2bytes()
1286 if length == MY_EOF:
1287 bof_error('Incomplete BOF record[1]; met end of file')
1288 if not (4 <= length <= 20):
1289 bof_error(
1290 'Invalid length (%d) for BOF record type 0x%04x'
1291 % (length, opcode))
1292 padding = b'\0' * max(0, boflen[opcode] - length)
1293 data = self.read(self._position, length)
1294 if DEBUG: fprintf(self.logfile, "\ngetbof(): data=%r\n", data)
1295 if len(data) < length:
1296 bof_error('Incomplete BOF record[2]; met end of file')
1297 data += padding
1298 version1 = opcode >> 8
1299 version2, streamtype = unpack('<HH', data[0:4])
1300 if DEBUG:
1301 print("getbof(): op=0x%04x version2=0x%04x streamtype=0x%04x"
1302 % (opcode, version2, streamtype), file=self.logfile)
1303 bof_offset = self._position - 4 - length
1304 if DEBUG:
1305 print("getbof(): BOF found at offset %d; savpos=%d"
1306 % (bof_offset, savpos), file=self.logfile)
1307 version = build = year = 0
1308 if version1 == 0x08:
1309 build, year = unpack('<HH', data[4:8])
1310 if version2 == 0x0600:
1311 version = 80
1312 elif version2 == 0x0500:
1313 if year < 1994 or build in (2412, 3218, 3321):
1314 version = 50
1315 else:
1316 version = 70
1317 else:
1318 # dodgy one, created by a 3rd-party tool
1319 version = {
1320 0x0000: 21,
1321 0x0007: 21,
1322 0x0200: 21,
1323 0x0300: 30,
1324 0x0400: 40,
1325 }.get(version2, 0)
1326 elif version1 in (0x04, 0x02, 0x00):
1327 version = {0x04: 40, 0x02: 30, 0x00: 21}[version1]
1329 if version == 40 and streamtype == XL_WORKBOOK_GLOBALS_4W:
1330 version = 45 # i.e. 4W
1332 if DEBUG or self.verbosity >= 2:
1333 print("BOF: op=0x%04x vers=0x%04x stream=0x%04x buildid=%d buildyr=%d -> BIFF%d"
1334 % (opcode, version2, streamtype, build, year, version), file=self.logfile)
1335 got_globals = streamtype == XL_WORKBOOK_GLOBALS or (
1336 version == 45 and streamtype == XL_WORKBOOK_GLOBALS_4W)
1337 if (rqd_stream == XL_WORKBOOK_GLOBALS and got_globals) or streamtype == rqd_stream:
1338 return version
1339 if version < 50 and streamtype == XL_WORKSHEET:
1340 return version
1341 if version >= 50 and streamtype == 0x0100:
1342 bof_error("Workspace file -- no spreadsheet data")
1343 bof_error(
1344 'BOF not workbook/worksheet: op=0x%04x vers=0x%04x strm=0x%04x build=%d year=%d -> BIFF%d'
1345 % (opcode, version2, streamtype, build, year, version)
1346 )
1348# === helper functions
1350def expand_cell_address(inrow, incol):
1351 # Ref : OOo docs, "4.3.4 Cell Addresses in BIFF8"
1352 outrow = inrow
1353 if incol & 0x8000:
1354 if outrow >= 32768:
1355 outrow -= 65536
1356 relrow = 1
1357 else:
1358 relrow = 0
1359 outcol = incol & 0xFF
1360 if incol & 0x4000:
1361 if outcol >= 128:
1362 outcol -= 256
1363 relcol = 1
1364 else:
1365 relcol = 0
1366 return outrow, outcol, relrow, relcol
1368def colname(colx, _A2Z="ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
1369 assert colx >= 0
1370 name = UNICODE_LITERAL('')
1371 while 1:
1372 quot, rem = divmod(colx, 26)
1373 name = _A2Z[rem] + name
1374 if not quot:
1375 return name
1376 colx = quot - 1
1378def display_cell_address(rowx, colx, relrow, relcol):
1379 if relrow:
1380 rowpart = "(*%s%d)" % ("+-"[rowx < 0], abs(rowx))
1381 else:
1382 rowpart = "$%d" % (rowx+1,)
1383 if relcol:
1384 colpart = "(*%s%d)" % ("+-"[colx < 0], abs(colx))
1385 else:
1386 colpart = "$" + colname(colx)
1387 return colpart + rowpart
1389def unpack_SST_table(datatab, nstrings):
1390 "Return list of strings"
1391 datainx = 0
1392 ndatas = len(datatab)
1393 data = datatab[0]
1394 datalen = len(data)
1395 pos = 8
1396 strings = []
1397 strappend = strings.append
1398 richtext_runs = {}
1399 local_unpack = unpack
1400 local_min = min
1401 local_BYTES_ORD = BYTES_ORD
1402 latin_1 = "latin_1"
1403 for _unused_i in xrange(nstrings):
1404 nchars = local_unpack('<H', data[pos:pos+2])[0]
1405 pos += 2
1406 options = local_BYTES_ORD(data[pos])
1407 pos += 1
1408 rtcount = 0
1409 phosz = 0
1410 if options & 0x08: # richtext
1411 rtcount = local_unpack('<H', data[pos:pos+2])[0]
1412 pos += 2
1413 if options & 0x04: # phonetic
1414 phosz = local_unpack('<i', data[pos:pos+4])[0]
1415 pos += 4
1416 accstrg = UNICODE_LITERAL('')
1417 charsgot = 0
1418 while 1:
1419 charsneed = nchars - charsgot
1420 if options & 0x01:
1421 # Uncompressed UTF-16
1422 charsavail = local_min((datalen - pos) >> 1, charsneed)
1423 rawstrg = data[pos:pos+2*charsavail]
1424 # if DEBUG: print "SST U16: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)
1425 try:
1426 accstrg += unicode(rawstrg, "utf_16_le")
1427 except:
1428 # print "SST U16: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)
1429 # Probable cause: dodgy data e.g. unfinished surrogate pair.
1430 # E.g. file unicode2.xls in pyExcelerator's examples has cells containing
1431 # unichr(i) for i in range(0x100000)
1432 # so this will include 0xD800 etc
1433 raise
1434 pos += 2*charsavail
1435 else:
1436 # Note: this is COMPRESSED (not ASCII!) encoding!!!
1437 charsavail = local_min(datalen - pos, charsneed)
1438 rawstrg = data[pos:pos+charsavail]
1439 # if DEBUG: print "SST CMPRSD: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)
1440 accstrg += unicode(rawstrg, latin_1)
1441 pos += charsavail
1442 charsgot += charsavail
1443 if charsgot == nchars:
1444 break
1445 datainx += 1
1446 data = datatab[datainx]
1447 datalen = len(data)
1448 options = local_BYTES_ORD(data[0])
1449 pos = 1
1451 if rtcount:
1452 runs = []
1453 for runindex in xrange(rtcount):
1454 if pos == datalen:
1455 pos = 0
1456 datainx += 1
1457 data = datatab[datainx]
1458 datalen = len(data)
1459 runs.append(local_unpack("<HH", data[pos:pos+4]))
1460 pos += 4
1461 richtext_runs[len(strings)] = runs
1463 pos += phosz # size of the phonetic stuff to skip
1464 if pos >= datalen:
1465 # adjust to correct position in next record
1466 pos = pos - datalen
1467 datainx += 1
1468 if datainx < ndatas:
1469 data = datatab[datainx]
1470 datalen = len(data)
1471 else:
1472 assert _unused_i == nstrings - 1
1473 strappend(accstrg)
1474 return strings, richtext_runs