Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/xlrd/book.py: 59%

2# This module is part of the xlrd package, which is released under a

3# BSD-style licence.

5from __future__ import print_function

7import struct

9from . import compdoc, formatting, sheet

10from .biffh import *

11from .formula import *

12from .timemachine import *

14try:

15 from time import perf_counter

16except ImportError:

17 # Python 2.7

18 from time import clock as perf_counter

20from struct import unpack

22empty_cell = sheet.empty_cell # for exposure to the world ...

24DEBUG = 0

26import mmap

28MY_EOF = 0xF00BAAA # not a 16-bit number

30SUPBOOK_UNK, SUPBOOK_INTERNAL, SUPBOOK_EXTERNAL, SUPBOOK_ADDIN, SUPBOOK_DDEOLE = range(5)

32SUPPORTED_VERSIONS = (80, 70, 50, 45, 40, 30, 21, 20)

34_code_from_builtin_name = {

35 "Consolidate_Area": "\x00",

36 "Auto_Open": "\x01",

37 "Auto_Close": "\x02",

38 "Extract": "\x03",

39 "Database": "\x04",

40 "Criteria": "\x05",

41 "Print_Area": "\x06",

42 "Print_Titles": "\x07",

43 "Recorder": "\x08",

44 "Data_Form": "\x09",

45 "Auto_Activate": "\x0A",

46 "Auto_Deactivate": "\x0B",

47 "Sheet_Title": "\x0C",

48 "_FilterDatabase": "\x0D",

49}

50builtin_name_from_code = {}

51code_from_builtin_name = {}

52for _bin, _bic in _code_from_builtin_name.items():

53 _bin = UNICODE_LITERAL(_bin)

54 _bic = UNICODE_LITERAL(_bic)

55 code_from_builtin_name[_bin] = _bic

56 builtin_name_from_code[_bic] = _bin

57del _bin, _bic, _code_from_builtin_name

59def open_workbook_xls(filename=None,

60 logfile=sys.stdout, verbosity=0, use_mmap=True,

61 file_contents=None,

62 encoding_override=None,

63 formatting_info=False, on_demand=False, ragged_rows=False,

64 ignore_workbook_corruption=False):

65 t0 = perf_counter()

66 bk = Book()

67 try:

68 bk.biff2_8_load(

69 filename=filename, file_contents=file_contents,

70 logfile=logfile, verbosity=verbosity, use_mmap=use_mmap,

71 encoding_override=encoding_override,

72 formatting_info=formatting_info,

73 on_demand=on_demand,

74 ragged_rows=ragged_rows,

75 ignore_workbook_corruption=ignore_workbook_corruption

76 )

77 t1 = perf_counter()

78 bk.load_time_stage_1 = t1 - t0

79 biff_version = bk.getbof(XL_WORKBOOK_GLOBALS)

80 if not biff_version:

81 raise XLRDError("Can't determine file's BIFF version")

82 if biff_version not in SUPPORTED_VERSIONS:

83 raise XLRDError(

84 "BIFF version %s is not supported"

85 % biff_text_from_num[biff_version]

86 )

87 bk.biff_version = biff_version

88 if biff_version <= 40:

89 # no workbook globals, only 1 worksheet

90 if on_demand:

91 fprintf(bk.logfile,

92 "*** WARNING: on_demand is not supported for this Excel version.\n"

93 "*** Setting on_demand to False.\n")

94 bk.on_demand = on_demand = False

95 bk.fake_globals_get_sheet()

96 elif biff_version == 45:

97 # worksheet(s) embedded in global stream

98 bk.parse_globals()

99 if on_demand:

100 fprintf(bk.logfile, "*** WARNING: on_demand is not supported for this Excel version.\n"

101 "*** Setting on_demand to False.\n")

102 bk.on_demand = on_demand = False

103 else:

104 bk.parse_globals()

105 bk._sheet_list = [None for sh in bk._sheet_names]

106 if not on_demand:

107 bk.get_sheets()

108 bk.nsheets = len(bk._sheet_list)

109 if biff_version == 45 and bk.nsheets > 1:

110 fprintf(

111 bk.logfile,

112 "*** WARNING: Excel 4.0 workbook (.XLW) file contains %d worksheets.\n"

113 "*** Book-level data will be that of the last worksheet.\n",

114 bk.nsheets

115 )

116 t2 = perf_counter()

117 bk.load_time_stage_2 = t2 - t1

118 except:

119 bk.release_resources()

120 raise

121 # normal exit

122 if not on_demand:

123 bk.release_resources()

124 return bk

125

126

127class Name(BaseObject):

128 """

129 Information relating to a named reference, formula, macro, etc.

130

131 .. note::

132

133 Name information is **not** extracted from files older than

134 Excel 5.0 (``Book.biff_version < 50``)

135 """

136 _repr_these = ['stack']

137 book = None # parent

138

139 #: 0 = Visible; 1 = Hidden

140 hidden = 0

141

142 #: 0 = Command macro; 1 = Function macro. Relevant only if macro == 1

143 func = 0

144

145 #: 0 = Sheet macro; 1 = VisualBasic macro. Relevant only if macro == 1

146 vbasic = 0

147

148 #: 0 = Standard name; 1 = Macro name

149 macro = 0

150

151 #: 0 = Simple formula; 1 = Complex formula (array formula or user defined).

152 #:

153 #: .. note:: No examples have been sighted.

154 complex = 0

155

156 #: 0 = User-defined name; 1 = Built-in name

157 #:

158 #: Common examples: ``Print_Area``, ``Print_Titles``; see OOo docs for

159 #: full list

160 builtin = 0

161

162 #: Function group. Relevant only if macro == 1; see OOo docs for values.

163 funcgroup = 0

164

165 #: 0 = Formula definition; 1 = Binary data

166 #:

167 #: .. note:: No examples have been sighted.

168 binary = 0

169

170 #: The index of this object in book.name_obj_list

171 name_index = 0

172

173 # A Unicode string. If builtin, decoded as per OOo docs.

174 name = UNICODE_LITERAL("")

175

176 #: An 8-bit string.

177 raw_formula = b''

178

179 #: ``-1``:

180 #: The name is global (visible in all calculation sheets).

181 #: ``-2``:

182 #: The name belongs to a macro sheet or VBA sheet.

183 #: ``-3``:

184 #: The name is invalid.

185 #: ``0 <= scope < book.nsheets``:

186 #: The name is local to the sheet whose index is scope.

187 scope = -1

188

189 #: The result of evaluating the formula, if any.

190 #: If no formula, or evaluation of the formula encountered problems,

191 #: the result is ``None``. Otherwise the result is a single instance of the

192 #: :class:`~xlrd.formula.Operand` class.

193 #

194 result = None

195

196 def cell(self):

197 """

198 This is a convenience method for the frequent use case where the name

199 refers to a single cell.

200

201 :returns: An instance of the :class:`~xlrd.sheet.Cell` class.

202

203 :raises xlrd.biffh.XLRDError:

204 The name is not a constant absolute reference

205 to a single cell.

206 """

207 res = self.result

208 if res:

209 # result should be an instance of the Operand class

210 kind = res.kind

211 value = res.value

212 if kind == oREF and len(value) == 1:

213 ref3d = value[0]

214 if (0 <= ref3d.shtxlo == ref3d.shtxhi - 1 and

215 ref3d.rowxlo == ref3d.rowxhi - 1 and

216 ref3d.colxlo == ref3d.colxhi - 1):

217 sh = self.book.sheet_by_index(ref3d.shtxlo)

218 return sh.cell(ref3d.rowxlo, ref3d.colxlo)

219 self.dump(

220 self.book.logfile,

221 header="=== Dump of Name object ===",

222 footer="======= End of dump =======",

223 )

224 raise XLRDError("Not a constant absolute reference to a single cell")

225

226 def area2d(self, clipped=True):

227 """

228 This is a convenience method for the use case where the name

229 refers to one rectangular area in one worksheet.

230

231 :param clipped:

232 If ``True``, the default, the returned rectangle is clipped

233 to fit in ``(0, sheet.nrows, 0, sheet.ncols)``.

234 it is guaranteed that ``0 <= rowxlo <= rowxhi <= sheet.nrows`` and

235 that the number of usable rows in the area (which may be zero) is

236 ``rowxhi - rowxlo``; likewise for columns.

237

238 :returns: a tuple ``(sheet_object, rowxlo, rowxhi, colxlo, colxhi)``.

239

240 :raises xlrd.biffh.XLRDError:

241 The name is not a constant absolute reference

242 to a single area in a single sheet.

243 """

244 res = self.result

245 if res:

246 # result should be an instance of the Operand class

247 kind = res.kind

248 value = res.value

249 if kind == oREF and len(value) == 1: # only 1 reference

250 ref3d = value[0]

251 if 0 <= ref3d.shtxlo == ref3d.shtxhi - 1: # only 1 usable sheet

252 sh = self.book.sheet_by_index(ref3d.shtxlo)

253 if not clipped:

254 return sh, ref3d.rowxlo, ref3d.rowxhi, ref3d.colxlo, ref3d.colxhi

255 rowxlo = min(ref3d.rowxlo, sh.nrows)

256 rowxhi = max(rowxlo, min(ref3d.rowxhi, sh.nrows))

257 colxlo = min(ref3d.colxlo, sh.ncols)

258 colxhi = max(colxlo, min(ref3d.colxhi, sh.ncols))

259 assert 0 <= rowxlo <= rowxhi <= sh.nrows

260 assert 0 <= colxlo <= colxhi <= sh.ncols

261 return sh, rowxlo, rowxhi, colxlo, colxhi

262 self.dump(

263 self.book.logfile,

264 header="=== Dump of Name object ===",

265 footer="======= End of dump =======",

266 )

267 raise XLRDError("Not a constant absolute reference to a single area in a single sheet")

268

269

270class Book(BaseObject):

271 """

272 Contents of a "workbook".

273

274 .. warning::

275

276 You should not instantiate this class yourself. You use the :class:`Book`

277 object that was returned when you called :func:`~xlrd.open_workbook`.

278 """

279

280 #: The number of worksheets present in the workbook file.

281 #: This information is available even when no sheets have yet been loaded.

282 nsheets = 0

283

284 #: Which date system was in force when this file was last saved.

285 #:

286 #: 0:

287 #: 1900 system (the Excel for Windows default).

288 #:

289 #: 1:

290 #: 1904 system (the Excel for Macintosh default).

291 #:

292 #: Defaults to 0 in case it's not specified in the file.

293 datemode = 0

294

295 #: Version of BIFF (Binary Interchange File Format) used to create the file.

296 #: Latest is 8.0 (represented here as 80), introduced with Excel 97.

297 #: Earliest supported by this module: 2.0 (represented as 20).

298 biff_version = 0

299

300 #: List containing a :class:`Name` object for each ``NAME`` record in the

301 #: workbook.

302 #:

303 #: .. versionadded:: 0.6.0

304 name_obj_list = []

305

306 #: An integer denoting the character set used for strings in this file.

307 #: For BIFF 8 and later, this will be 1200, meaning Unicode;

308 #: more precisely, UTF_16_LE.

309 #: For earlier versions, this is used to derive the appropriate Python

310 #: encoding to be used to convert to Unicode.

311 #: Examples: ``1252 -> 'cp1252'``, ``10000 -> 'mac_roman'``

312 codepage = None

313

314 #: The encoding that was derived from the codepage.

315 encoding = None

316

317 #: A tuple containing the telephone country code for:

318 #:

319 #: ``[0]``:

320 #: the user-interface setting when the file was created.

321 #:

322 #: ``[1]``:

323 #: the regional settings.

324 #:

325 #: Example: ``(1, 61)`` meaning ``(USA, Australia)``.

326 #:

327 #: This information may give a clue to the correct encoding for an

328 #: unknown codepage. For a long list of observed values, refer to the

329 #: OpenOffice.org documentation for the ``COUNTRY`` record.

330 countries = (0, 0)

331

332 #: What (if anything) is recorded as the name of the last user to

333 #: save the file.

334 user_name = UNICODE_LITERAL('')

335

336 #: A list of :class:`~xlrd.formatting.Font` class instances,

337 #: each corresponding to a FONT record.

338 #:

339 #: .. versionadded:: 0.6.1

340 font_list = []

341

342 #: A list of :class:`~xlrd.formatting.XF` class instances,

343 #: each corresponding to an ``XF`` record.

344 #:

345 #: .. versionadded:: 0.6.1

346 xf_list = []

347

348 #: A list of :class:`~xlrd.formatting.Format` objects, each corresponding to

349 #: a ``FORMAT`` record, in the order that they appear in the input file.

350 #: It does *not* contain builtin formats.

351 #:

352 #: If you are creating an output file using (for example) :mod:`xlwt`,

353 #: use this list.

354 #:

355 #: The collection to be used for all visual rendering purposes is

356 #: :attr:`format_map`.

357 #:

358 #: .. versionadded:: 0.6.1

359 format_list = []

360

361 ##

362 #: The mapping from :attr:`~xlrd.formatting.XF.format_key` to

363 #: :class:`~xlrd.formatting.Format` object.

364 #:

365 #: .. versionadded:: 0.6.1

366 format_map = {}

367

368 #: This provides access via name to the extended format information for

369 #: both built-in styles and user-defined styles.

370 #:

371 #: It maps ``name`` to ``(built_in, xf_index)``, where

372 #: ``name`` is either the name of a user-defined style,

373 #: or the name of one of the built-in styles. Known built-in names are

374 #: Normal, RowLevel_1 to RowLevel_7,

375 #: ColLevel_1 to ColLevel_7, Comma, Currency, Percent, "Comma [0]",

376 #: "Currency [0]", Hyperlink, and "Followed Hyperlink".

377 #:

378 #: ``built_in`` has the following meanings

379 #:

380 #: 1:

381 #: built-in style

382 #:

383 #: 0:

384 #: user-defined

385 #:

386 #: ``xf_index`` is an index into :attr:`Book.xf_list`.

387 #:

388 #: References: OOo docs s6.99 (``STYLE`` record); Excel UI Format/Style

389 #:

390 #: .. versionadded:: 0.6.1

391 #:

392 #: Extracted only if ``open_workbook(..., formatting_info=True)``

393 #:

394 #: .. versionadded:: 0.7.4

395 style_name_map = {}

396

397 #: This provides definitions for colour indexes. Please refer to

398 #: :ref:`palette` for an explanation

399 #: of how colours are represented in Excel.

400 #:

401 #: Colour indexes into the palette map into ``(red, green, blue)`` tuples.

402 #: "Magic" indexes e.g. ``0x7FFF`` map to ``None``.

403 #:

404 #: :attr:`colour_map` is what you need if you want to render cells on screen

405 #: or in a PDF file. If you are writing an output XLS file, use

406 #: :attr:`palette_record`.

407 #:

408 #: .. note:: Extracted only if ``open_workbook(..., formatting_info=True)``

409 #:

410 #: .. versionadded:: 0.6.1

411 colour_map = {}

412

413 #: If the user has changed any of the colours in the standard palette, the

414 #: XLS file will contain a ``PALETTE`` record with 56 (16 for Excel 4.0 and

415 #: earlier) RGB values in it, and this list will be e.g.

416 #: ``[(r0, b0, g0), ..., (r55, b55, g55)]``.

417 #: Otherwise this list will be empty. This is what you need if you are

418 #: writing an output XLS file. If you want to render cells on screen or in a

419 #: PDF file, use :attr:`colour_map`.

420 #:

421 #: .. note:: Extracted only if ``open_workbook(..., formatting_info=True)``

422 #:

423 #: .. versionadded:: 0.6.1

424 palette_record = []

425

426 #: Time in seconds to extract the XLS image as a contiguous string

427 #: (or mmap equivalent).

428 load_time_stage_1 = -1.0

429

430 #: Time in seconds to parse the data from the contiguous string

431 #: (or mmap equivalent).

432 load_time_stage_2 = -1.0

433

434 def sheets(self):

435 """

436 :returns: A list of all sheets in the book.

437

438 All sheets not already loaded will be loaded.

439 """

440 for sheetx in xrange(self.nsheets):

441 if not self._sheet_list[sheetx]:

442 self.get_sheet(sheetx)

443 return self._sheet_list[:]

444

445 def sheet_by_index(self, sheetx):

446 """

447 :param sheetx: Sheet index in ``range(nsheets)``

448 :returns: A :class:`~xlrd.sheet.Sheet`.

449 """

450 return self._sheet_list[sheetx] or self.get_sheet(sheetx)

451

452 def __iter__(self):

453 """

454 Makes iteration through sheets of a book a little more straightforward.

455 Don't free resources after use since it can be called like `list(book)`

456 """

457 for i in range(self.nsheets):

458 yield self.sheet_by_index(i)

459

460 def sheet_by_name(self, sheet_name):

461 """

462 :param sheet_name: Name of the sheet required.

463 :returns: A :class:`~xlrd.sheet.Sheet`.

464 """

465 try:

466 sheetx = self._sheet_names.index(sheet_name)

467 except ValueError:

468 raise XLRDError('No sheet named <%r>' % sheet_name)

469 return self.sheet_by_index(sheetx)

470

471 def __getitem__(self, item):

472 """

473 Allow indexing with sheet name or index.

474 :param item: Name or index of sheet enquired upon

475 :return: :class:`~xlrd.sheet.Sheet`.

476 """

477 if isinstance(item, int):

478 return self.sheet_by_index(item)

479 else:

480 return self.sheet_by_name(item)

481

482 def sheet_names(self):

483 """

484 :returns:

485 A list of the names of all the worksheets in the workbook file.

486 This information is available even when no sheets have yet been

487 loaded.

488 """

489 return self._sheet_names[:]

490

491 def sheet_loaded(self, sheet_name_or_index):

492 """

493 :param sheet_name_or_index: Name or index of sheet enquired upon

494 :returns: ``True`` if sheet is loaded, ``False`` otherwise.

495

496 .. versionadded:: 0.7.1

497 """

498 if isinstance(sheet_name_or_index, int):

499 sheetx = sheet_name_or_index

500 else:

501 try:

502 sheetx = self._sheet_names.index(sheet_name_or_index)

503 except ValueError:

504 raise XLRDError('No sheet named <%r>' % sheet_name_or_index)

505 return bool(self._sheet_list[sheetx])

506

507 def unload_sheet(self, sheet_name_or_index):

508 """

509 :param sheet_name_or_index: Name or index of sheet to be unloaded.

510

511 .. versionadded:: 0.7.1

512 """

513 if isinstance(sheet_name_or_index, int):

514 sheetx = sheet_name_or_index

515 else:

516 try:

517 sheetx = self._sheet_names.index(sheet_name_or_index)

518 except ValueError:

519 raise XLRDError('No sheet named <%r>' % sheet_name_or_index)

520 self._sheet_list[sheetx] = None

521

522 def release_resources(self):

523 """

524 This method has a dual purpose. You can call it to release

525 memory-consuming objects and (possibly) a memory-mapped file

526 (:class:`mmap.mmap` object) when you have finished loading sheets in

527 ``on_demand`` mode, but still require the :class:`Book` object to

528 examine the loaded sheets. It is also called automatically (a) when

529 :func:`~xlrd.open_workbook`

530 raises an exception and (b) if you are using a ``with`` statement, when

531 the ``with`` block is exited. Calling this method multiple times on the

532 same object has no ill effect.

533 """

534 self._resources_released = 1

535 if hasattr(self.mem, "close"):

536 # must be a mmap.mmap object

537 self.mem.close()

538 self.mem = None

539 if hasattr(self.filestr, "close"):

540 self.filestr.close()

541 self.filestr = None

542 self._sharedstrings = None

543 self._rich_text_runlist_map = None

544

545 def __enter__(self):

546 return self

547

548 def __exit__(self, exc_type, exc_value, exc_tb):

549 self.release_resources()

550 # return false

551

552 #: A mapping from ``(lower_case_name, scope)`` to a single :class:`Name`

553 #: object.

554 #:

555 #: .. versionadded:: 0.6.0

556 name_and_scope_map = {}

557

558 #: A mapping from `lower_case_name` to a list of :class:`Name` objects.

559 #: The list is sorted in scope order. Typically there will be one item

560 #: (of global scope) in the list.

561 #:

562 #: .. versionadded:: 0.6.0

563 name_map = {}

564

565 def __init__(self):

566 self._sheet_list = []

567 self._sheet_names = []

568 self._sheet_visibility = [] # from BOUNDSHEET record

569 self.nsheets = 0

570 self._sh_abs_posn = [] # sheet's absolute position in the stream

571 self._sharedstrings = []

572 self._rich_text_runlist_map = {}

573 self.raw_user_name = False

574 self._sheethdr_count = 0 # BIFF 4W only

575 self.builtinfmtcount = -1 # unknown as yet. BIFF 3, 4S, 4W

576 self.initialise_format_info()

577 self._all_sheets_count = 0 # includes macro & VBA sheets

578 self._supbook_count = 0

579 self._supbook_locals_inx = None

580 self._supbook_addins_inx = None

581 self._all_sheets_map = [] # maps an all_sheets index to a calc-sheets index (or -1)

582 self._externsheet_info = []

583 self._externsheet_type_b57 = []

584 self._extnsht_name_from_num = {}

585 self._sheet_num_from_name = {}

586 self._extnsht_count = 0

587 self._supbook_types = []

588 self._resources_released = 0

589 self.addin_func_names = []

590 self.name_obj_list = []

591 self.colour_map = {}

592 self.palette_record = []

593 self.xf_list = []

594 self.style_name_map = {}

595 self.mem = b''

596 self.filestr = b''

597

598 def biff2_8_load(self, filename=None, file_contents=None,

599 logfile=sys.stdout, verbosity=0, use_mmap=True,

600 encoding_override=None,

601 formatting_info=False,

602 on_demand=False,

603 ragged_rows=False,

604 ignore_workbook_corruption=False

605 ):

606 # DEBUG = 0

607 self.logfile = logfile

608 self.verbosity = verbosity

609 self.use_mmap = use_mmap

610 self.encoding_override = encoding_override

611 self.formatting_info = formatting_info

612 self.on_demand = on_demand

613 self.ragged_rows = ragged_rows

614

615 if not file_contents:

616 with open(filename, "rb") as f:

617 f.seek(0, 2) # EOF

618 size = f.tell()

619 f.seek(0, 0) # BOF

620 if size == 0:

621 raise XLRDError("File size is 0 bytes")

622 if self.use_mmap:

623 self.filestr = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ)

624 self.stream_len = size

625 else:

626 self.filestr = f.read()

627 self.stream_len = len(self.filestr)

628 else:

629 self.filestr = file_contents

630 self.stream_len = len(file_contents)

631

632 self.base = 0

633 if self.filestr[:8] != compdoc.SIGNATURE:

634 # got this one at the antique store

635 self.mem = self.filestr

636 else:

637 cd = compdoc.CompDoc(self.filestr, logfile=self.logfile,

638 ignore_workbook_corruption=ignore_workbook_corruption)

639 for qname in ['Workbook', 'Book']:

640 self.mem, self.base, self.stream_len = \

641 cd.locate_named_stream(UNICODE_LITERAL(qname))

642 if self.mem:

643 break

644 else:

645 raise XLRDError("Can't find workbook in OLE2 compound document")

646 del cd

647 if self.mem is not self.filestr:

648 if hasattr(self.filestr, "close"):

649 self.filestr.close()

650 self.filestr = b''

651 self._position = self.base

652 if DEBUG:

653 print("mem: %s, base: %d, len: %d" % (type(self.mem), self.base, self.stream_len), file=self.logfile)

654

655 def initialise_format_info(self):

656 # needs to be done once per sheet for BIFF 4W :-(

657 self.format_map = {}

658 self.format_list = []

659 self.xfcount = 0

660 self.actualfmtcount = 0 # number of FORMAT records seen so far

661 self._xf_index_to_xl_type_map = {0: XL_CELL_NUMBER}

662 self._xf_epilogue_done = 0

663 self.xf_list = []

664 self.font_list = []

665

666 def get2bytes(self):

667 pos = self._position

668 buff_two = self.mem[pos:pos+2]

669 lenbuff = len(buff_two)

670 self._position += lenbuff

671 if lenbuff < 2:

672 return MY_EOF

673 lo, hi = buff_two

674 return (BYTES_ORD(hi) << 8) | BYTES_ORD(lo)

675

676 def get_record_parts(self):

677 pos = self._position

678 mem = self.mem

679 code, length = unpack('<HH', mem[pos:pos+4])

680 pos += 4

681 data = mem[pos:pos+length]

682 self._position = pos + length

683 return (code, length, data)

684

685 def get_record_parts_conditional(self, reqd_record):

686 pos = self._position

687 mem = self.mem

688 code, length = unpack('<HH', mem[pos:pos+4])

689 if code != reqd_record:

690 return (None, 0, b'')

691 pos += 4

692 data = mem[pos:pos+length]

693 self._position = pos + length

694 return (code, length, data)

695

696 def get_sheet(self, sh_number, update_pos=True):

697 if self._resources_released:

698 raise XLRDError("Can't load sheets after releasing resources.")

699 if update_pos:

700 self._position = self._sh_abs_posn[sh_number]

701 self.getbof(XL_WORKSHEET)

702 # assert biff_version == self.biff_version ### FAILS

703 # Have an example where book is v7 but sheet reports v8!!!

704 # It appears to work OK if the sheet version is ignored.

705 # Confirmed by Daniel Rentz: happens when Excel does "save as"

706 # creating an old version file; ignore version details on sheet BOF.

707 sh = sheet.Sheet(

708 self,

709 self._position,

710 self._sheet_names[sh_number],

711 sh_number,

712 )

713 sh.read(self)

714 self._sheet_list[sh_number] = sh

715 return sh

716

717 def get_sheets(self):

718 # DEBUG = 0

719 if DEBUG: print("GET_SHEETS:", self._sheet_names, self._sh_abs_posn, file=self.logfile)

720 for sheetno in xrange(len(self._sheet_names)):

721 if DEBUG: print("GET_SHEETS: sheetno =", sheetno, self._sheet_names, self._sh_abs_posn, file=self.logfile)

722 self.get_sheet(sheetno)

723

724 def fake_globals_get_sheet(self): # for BIFF 4.0 and earlier

725 formatting.initialise_book(self)

726 fake_sheet_name = UNICODE_LITERAL('Sheet 1')

727 self._sheet_names = [fake_sheet_name]

728 self._sh_abs_posn = [0]

729 self._sheet_visibility = [0] # one sheet, visible

730 self._sheet_list.append(None) # get_sheet updates _sheet_list but needs a None beforehand

731 self.get_sheets()

732

733 def handle_boundsheet(self, data):

734 # DEBUG = 1

735 bv = self.biff_version

736 self.derive_encoding()

737 if DEBUG:

738 fprintf(self.logfile, "BOUNDSHEET: bv=%d data %r\n", bv, data)

739 if bv == 45: # BIFF4W

740 #### Not documented in OOo docs ...

741 # In fact, the *only* data is the name of the sheet.

742 sheet_name = unpack_string(data, 0, self.encoding, lenlen=1)

743 visibility = 0

744 sheet_type = XL_BOUNDSHEET_WORKSHEET # guess, patch later

745 if len(self._sh_abs_posn) == 0:

746 abs_posn = self._sheetsoffset + self.base

747 # Note (a) this won't be used

748 # (b) it's the position of the SHEETHDR record

749 # (c) add 11 to get to the worksheet BOF record

750 else:

751 abs_posn = -1 # unknown

752 else:

753 offset, visibility, sheet_type = unpack('<iBB', data[0:6])

754 abs_posn = offset + self.base # because global BOF is always at posn 0 in the stream

755 if bv < BIFF_FIRST_UNICODE:

756 sheet_name = unpack_string(data, 6, self.encoding, lenlen=1)

757 else:

758 sheet_name = unpack_unicode(data, 6, lenlen=1)

759

760 if DEBUG or self.verbosity >= 2:

761 fprintf(self.logfile,

762 "BOUNDSHEET: inx=%d vis=%r sheet_name=%r abs_posn=%d sheet_type=0x%02x\n",

763 self._all_sheets_count, visibility, sheet_name, abs_posn, sheet_type)

764 self._all_sheets_count += 1

765 if sheet_type != XL_BOUNDSHEET_WORKSHEET:

766 self._all_sheets_map.append(-1)

767 descr = {

768 1: 'Macro sheet',

769 2: 'Chart',

770 6: 'Visual Basic module',

771 }.get(sheet_type, 'UNKNOWN')

772

773 if DEBUG or self.verbosity >= 1:

774 fprintf(self.logfile,

775 "NOTE *** Ignoring non-worksheet data named %r (type 0x%02x = %s)\n",

776 sheet_name, sheet_type, descr)

777 else:

778 snum = len(self._sheet_names)

779 self._all_sheets_map.append(snum)

780 self._sheet_names.append(sheet_name)

781 self._sh_abs_posn.append(abs_posn)

782 self._sheet_visibility.append(visibility)

783 self._sheet_num_from_name[sheet_name] = snum

784

785 def handle_builtinfmtcount(self, data):

786 ### N.B. This count appears to be utterly useless.

787 # DEBUG = 1

788 builtinfmtcount = unpack('<H', data[0:2])[0]

789 if DEBUG: fprintf(self.logfile, "BUILTINFMTCOUNT: %r\n", builtinfmtcount)

790 self.builtinfmtcount = builtinfmtcount

791

792 def derive_encoding(self):

793 if self.encoding_override:

794 self.encoding = self.encoding_override

795 elif self.codepage is None:

796 if self.biff_version < 80:

797 fprintf(self.logfile,

798 "*** No CODEPAGE record, no encoding_override: will use 'iso-8859-1'\n")

799 self.encoding = 'iso-8859-1'

800 else:

801 self.codepage = 1200 # utf16le

802 if self.verbosity >= 2:

803 fprintf(self.logfile, "*** No CODEPAGE record; assuming 1200 (utf_16_le)\n")

804 else:

805 codepage = self.codepage

806 if codepage in encoding_from_codepage:

807 encoding = encoding_from_codepage[codepage]

808 elif 300 <= codepage <= 1999:

809 encoding = 'cp' + str(codepage)

810 elif self.biff_version >= 80:

811 self.codepage = 1200

812 encoding = 'utf_16_le'

813 else:

814 encoding = 'unknown_codepage_' + str(codepage)

815 if DEBUG or (self.verbosity and encoding != self.encoding) :

816 fprintf(self.logfile, "CODEPAGE: codepage %r -> encoding %r\n", codepage, encoding)

817 self.encoding = encoding

818 if self.codepage != 1200: # utf_16_le

819 # If we don't have a codec that can decode ASCII into Unicode,

820 # we're well & truly stuffed -- let the punter know ASAP.

821 try:

822 unicode(b'trial', self.encoding)

823 except BaseException as e:

824 fprintf(self.logfile,

825 "ERROR *** codepage %r -> encoding %r -> %s: %s\n",

826 self.codepage, self.encoding, type(e).__name__.split(".")[-1], e)

827 raise

828 if self.raw_user_name:

829 strg = unpack_string(self.user_name, 0, self.encoding, lenlen=1)

830 strg = strg.rstrip()

831 # if DEBUG:

832 # print "CODEPAGE: user name decoded from %r to %r" % (self.user_name, strg)

833 self.user_name = strg

834 self.raw_user_name = False

835 return self.encoding

836

837 def handle_codepage(self, data):

838 # DEBUG = 0

839 codepage = unpack('<H', data[0:2])[0]

840 self.codepage = codepage

841 self.derive_encoding()

842

843 def handle_country(self, data):

844 countries = unpack('<HH', data[0:4])

845 if self.verbosity: print("Countries:", countries, file=self.logfile)

846 # Note: in BIFF7 and earlier, country record was put (redundantly?) in each worksheet.

847 assert self.countries == (0, 0) or self.countries == countries

848 self.countries = countries

849

850 def handle_datemode(self, data):

851 datemode = unpack('<H', data[0:2])[0]

852 if DEBUG or self.verbosity:

853 fprintf(self.logfile, "DATEMODE: datemode %r\n", datemode)

854 assert datemode in (0, 1)

855 self.datemode = datemode

856

857 def handle_externname(self, data):

858 blah = DEBUG or self.verbosity >= 2

859 if self.biff_version >= 80:

860 option_flags, other_info =unpack("<HI", data[:6])

861 pos = 6

862 name, pos = unpack_unicode_update_pos(data, pos, lenlen=1)

863 extra = data[pos:]

864 if self._supbook_types[-1] == SUPBOOK_ADDIN:

865 self.addin_func_names.append(name)

866 if blah:

867 fprintf(self.logfile,

868 "EXTERNNAME: sbktype=%d oflags=0x%04x oinfo=0x%08x name=%r extra=%r\n",

869 self._supbook_types[-1], option_flags, other_info, name, extra)

870

871 def handle_externsheet(self, data):

872 self.derive_encoding() # in case CODEPAGE record missing/out of order/wrong

873 self._extnsht_count += 1 # for use as a 1-based index

874 blah1 = DEBUG or self.verbosity >= 1

875 blah2 = DEBUG or self.verbosity >= 2

876 if self.biff_version >= 80:

877 num_refs = unpack("<H", data[0:2])[0]

878 bytes_reqd = num_refs * 6 + 2

879 while len(data) < bytes_reqd:

880 if blah1:

881 fprintf(

882 self.logfile,

883 "INFO: EXTERNSHEET needs %d bytes, have %d\n",

884 bytes_reqd, len(data),

885 )

886 code2, length2, data2 = self.get_record_parts()

887 if code2 != XL_CONTINUE:

888 raise XLRDError("Missing CONTINUE after EXTERNSHEET record")

889 data += data2

890 pos = 2

891 for k in xrange(num_refs):

892 info = unpack("<HHH", data[pos:pos+6])

893 ref_recordx, ref_first_sheetx, ref_last_sheetx = info

894 self._externsheet_info.append(info)

895 pos += 6

896 if blah2:

897 fprintf(

898 self.logfile,

899 "EXTERNSHEET(b8): k = %2d, record = %2d, first_sheet = %5d, last sheet = %5d\n",

900 k, ref_recordx, ref_first_sheetx, ref_last_sheetx,

901 )

902 else:

903 nc, ty = unpack("<BB", data[:2])

904 if blah2:

905 print("EXTERNSHEET(b7-):", file=self.logfile)

906 hex_char_dump(data, 0, len(data), fout=self.logfile)

907 msg = {

908 1: "Encoded URL",

909 2: "Current sheet!!",

910 3: "Specific sheet in own doc't",

911 4: "Nonspecific sheet in own doc't!!",

912 }.get(ty, "Not encoded")

913 print(" %3d chars, type is %d (%s)" % (nc, ty, msg), file=self.logfile)

914 if ty == 3:

915 sheet_name = unicode(data[2:nc+2], self.encoding)

916 self._extnsht_name_from_num[self._extnsht_count] = sheet_name

917 if blah2: print(self._extnsht_name_from_num, file=self.logfile)

918 if not (1 <= ty <= 4):

919 ty = 0

920 self._externsheet_type_b57.append(ty)

921

922 def handle_filepass(self, data):

923 if self.verbosity >= 2:

924 logf = self.logfile

925 fprintf(logf, "FILEPASS:\n")

926 hex_char_dump(data, 0, len(data), base=0, fout=logf)

927 if self.biff_version >= 80:

928 kind1, = unpack('<H', data[:2])

929 if kind1 == 0: # weak XOR encryption

930 key, hash_value = unpack('<HH', data[2:])

931 fprintf(logf,

932 'weak XOR: key=0x%04x hash=0x%04x\n',

933 key, hash_value)

934 elif kind1 == 1:

935 kind2, = unpack('<H', data[4:6])

936 if kind2 == 1: # BIFF8 standard encryption

937 caption = "BIFF8 std"

938 elif kind2 == 2:

939 caption = "BIFF8 strong"

940 else:

941 caption = "** UNKNOWN ENCRYPTION METHOD **"

942 fprintf(logf, "%s\n", caption)

943 raise XLRDError("Workbook is encrypted")

944

945 def handle_name(self, data):

946 blah = DEBUG or self.verbosity >= 2

947 bv = self.biff_version

948 if bv < 50:

949 return

950 self.derive_encoding()

951 # print

952 # hex_char_dump(data, 0, len(data), fout=self.logfile)

953 (

954 option_flags, kb_shortcut, name_len, fmla_len, extsht_index, sheet_index,

955 menu_text_len, description_text_len, help_topic_text_len, status_bar_text_len,

956 ) = unpack("<HBBHHH4B", data[0:14])

957 nobj = Name()

958 nobj.book = self ### CIRCULAR ###

959 name_index = len(self.name_obj_list)

960 nobj.name_index = name_index

961 self.name_obj_list.append(nobj)

962 nobj.option_flags = option_flags

963 attrs = [

964 ('hidden', 1, 0),

965 ('func', 2, 1),

966 ('vbasic', 4, 2),

967 ('macro', 8, 3),

968 ('complex', 0x10, 4),

969 ('builtin', 0x20, 5),

970 ('funcgroup', 0xFC0, 6),

971 ('binary', 0x1000, 12),

972 ]

973 for attr, mask, nshift in attrs:

974 setattr(nobj, attr, (option_flags & mask) >> nshift)

975

976 macro_flag = " M"[nobj.macro]

977 if bv < 80:

978 internal_name, pos = unpack_string_update_pos(data, 14, self.encoding, known_len=name_len)

979 else:

980 internal_name, pos = unpack_unicode_update_pos(data, 14, known_len=name_len)

981 nobj.extn_sheet_num = extsht_index

982 nobj.excel_sheet_index = sheet_index

983 nobj.scope = None # patched up in the names_epilogue() method

984 if blah:

985 fprintf(

986 self.logfile,

987 "NAME[%d]:%s oflags=%d, name_len=%d, fmla_len=%d, extsht_index=%d, sheet_index=%d, name=%r\n",

988 name_index, macro_flag, option_flags, name_len,

989 fmla_len, extsht_index, sheet_index, internal_name)

990 name = internal_name

991 if nobj.builtin:

992 name = builtin_name_from_code.get(name, "??Unknown??")

993 if blah: print(" builtin: %s" % name, file=self.logfile)

994 nobj.name = name

995 nobj.raw_formula = data[pos:]

996 nobj.basic_formula_len = fmla_len

997 nobj.evaluated = 0

998 if blah:

999 nobj.dump(

1000 self.logfile,

1001 header="--- handle_name: name[%d] ---" % name_index,

1002 footer="-------------------",

1003 )

1004

1005 def names_epilogue(self):

1006 blah = self.verbosity >= 2

1007 f = self.logfile

1008 if blah:

1009 print("+++++ names_epilogue +++++", file=f)

1010 print("_all_sheets_map", REPR(self._all_sheets_map), file=f)

1011 print("_extnsht_name_from_num", REPR(self._extnsht_name_from_num), file=f)

1012 print("_sheet_num_from_name", REPR(self._sheet_num_from_name), file=f)

1013 num_names = len(self.name_obj_list)

1014 for namex in range(num_names):

1015 nobj = self.name_obj_list[namex]

1016 # Convert from excel_sheet_index to scope.

1017 # This is done here because in BIFF7 and earlier, the

1018 # BOUNDSHEET records (from which _all_sheets_map is derived)

1019 # come after the NAME records.

1020 if self.biff_version >= 80:

1021 sheet_index = nobj.excel_sheet_index

1022 if sheet_index == 0:

1023 intl_sheet_index = -1 # global

1024 elif 1 <= sheet_index <= len(self._all_sheets_map):

1025 intl_sheet_index = self._all_sheets_map[sheet_index-1]

1026 if intl_sheet_index == -1: # maps to a macro or VBA sheet

1027 intl_sheet_index = -2 # valid sheet reference but not useful

1028 else:

1029 # huh?

1030 intl_sheet_index = -3 # invalid

1031 elif 50 <= self.biff_version <= 70:

1032 sheet_index = nobj.extn_sheet_num

1033 if sheet_index == 0:

1034 intl_sheet_index = -1 # global

1035 else:

1036 sheet_name = self._extnsht_name_from_num[sheet_index]

1037 intl_sheet_index = self._sheet_num_from_name.get(sheet_name, -2)

1038 nobj.scope = intl_sheet_index

1039

1040 for namex in range(num_names):

1041 nobj = self.name_obj_list[namex]

1042 # Parse the formula ...

1043 if nobj.macro or nobj.binary: continue

1044 if nobj.evaluated: continue

1045 evaluate_name_formula(self, nobj, namex, blah=blah)

1046

1047 if self.verbosity >= 2:

1048 print("---------- name object dump ----------", file=f)

1049 for namex in range(num_names):

1050 nobj = self.name_obj_list[namex]

1051 nobj.dump(f, header="--- name[%d] ---" % namex)

1052 print("--------------------------------------", file=f)

1053 #

1054 # Build some dicts for access to the name objects

1055 #

1056 name_and_scope_map = {} # (name.lower(), scope): Name_object

1057 name_map = {} # name.lower() : list of Name_objects (sorted in scope order)

1058 for namex in range(num_names):

1059 nobj = self.name_obj_list[namex]

1060 name_lcase = nobj.name.lower()

1061 key = (name_lcase, nobj.scope)

1062 if key in name_and_scope_map and self.verbosity:

1063 fprintf(f, 'Duplicate entry %r in name_and_scope_map\n', key)

1064 name_and_scope_map[key] = nobj

1065 sort_data = (nobj.scope, namex, nobj)

1066 # namex (a temp unique ID) ensures the Name objects will not

1067 # be compared (fatal in py3)

1068 if name_lcase in name_map:

1069 name_map[name_lcase].append(sort_data)

1070 else:

1071 name_map[name_lcase] = [sort_data]

1072 for key in name_map.keys():

1073 alist = name_map[key]

1074 alist.sort()

1075 name_map[key] = [x[2] for x in alist]

1076 self.name_and_scope_map = name_and_scope_map

1077 self.name_map = name_map

1078

1079 def handle_obj(self, data):

1080 # Not doing much handling at all.

1081 # Worrying about embedded (BOF ... EOF) substreams is done elsewhere.

1082 # DEBUG = 1

1083 obj_type, obj_id = unpack('<HI', data[4:10])

1084 # if DEBUG: print "---> handle_obj type=%d id=0x%08x" % (obj_type, obj_id)

1085

1086 def handle_supbook(self, data):

1087 # aka EXTERNALBOOK in OOo docs

1088 self._supbook_types.append(None)

1089 blah = DEBUG or self.verbosity >= 2

1090 if blah:

1091 print("SUPBOOK:", file=self.logfile)

1092 hex_char_dump(data, 0, len(data), fout=self.logfile)

1093 num_sheets = unpack("<H", data[0:2])[0]

1094 if blah: print("num_sheets = %d" % num_sheets, file=self.logfile)

1095 sbn = self._supbook_count

1096 self._supbook_count += 1

1097 if data[2:4] == b"\x01\x04":

1098 self._supbook_types[-1] = SUPBOOK_INTERNAL

1099 self._supbook_locals_inx = self._supbook_count - 1

1100 if blah:

1101 print("SUPBOOK[%d]: internal 3D refs; %d sheets" % (sbn, num_sheets), file=self.logfile)

1102 print(" _all_sheets_map", self._all_sheets_map, file=self.logfile)

1103 return

1104 if data[0:4] == b"\x01\x00\x01\x3A":

1105 self._supbook_types[-1] = SUPBOOK_ADDIN

1106 self._supbook_addins_inx = self._supbook_count - 1

1107 if blah: print("SUPBOOK[%d]: add-in functions" % sbn, file=self.logfile)

1108 return

1109 url, pos = unpack_unicode_update_pos(data, 2, lenlen=2)

1110 if num_sheets == 0:

1111 self._supbook_types[-1] = SUPBOOK_DDEOLE

1112 if blah: fprintf(self.logfile, "SUPBOOK[%d]: DDE/OLE document = %r\n", sbn, url)

1113 return

1114 self._supbook_types[-1] = SUPBOOK_EXTERNAL

1115 if blah: fprintf(self.logfile, "SUPBOOK[%d]: url = %r\n", sbn, url)

1116 sheet_names = []

1117 for x in range(num_sheets):

1118 try:

1119 shname, pos = unpack_unicode_update_pos(data, pos, lenlen=2)

1120 except struct.error:

1121 # #### FIX ME ####

1122 # Should implement handling of CONTINUE record(s) ...

1123 if self.verbosity:

1124 print(

1125 "*** WARNING: unpack failure in sheet %d of %d in SUPBOOK record for file %r"

1126 % (x, num_sheets, url),

1127 file=self.logfile,

1128 )

1129 break

1130 sheet_names.append(shname)

1131 if blah: fprintf(self.logfile, " sheetx=%d namelen=%d name=%r (next pos=%d)\n", x, len(shname), shname, pos)

1132

1133 def handle_sheethdr(self, data):

1134 # This a BIFF 4W special.

1135 # The SHEETHDR record is followed by a (BOF ... EOF) substream containing

1136 # a worksheet.

1137 # DEBUG = 1

1138 self.derive_encoding()

1139 sheet_len = unpack('<i', data[:4])[0]

1140 sheet_name = unpack_string(data, 4, self.encoding, lenlen=1)

1141 sheetno = self._sheethdr_count

1142 assert sheet_name == self._sheet_names[sheetno]

1143 self._sheethdr_count += 1

1144 BOF_posn = self._position

1145 posn = BOF_posn - 4 - len(data)

1146 if DEBUG: fprintf(self.logfile, 'SHEETHDR %d at posn %d: len=%d name=%r\n', sheetno, posn, sheet_len, sheet_name)

1147 self.initialise_format_info()

1148 if DEBUG: print('SHEETHDR: xf epilogue flag is %d' % self._xf_epilogue_done, file=self.logfile)

1149 self._sheet_list.append(None) # get_sheet updates _sheet_list but needs a None beforehand

1150 self.get_sheet(sheetno, update_pos=False)

1151 if DEBUG: print('SHEETHDR: posn after get_sheet() =', self._position, file=self.logfile)

1152 self._position = BOF_posn + sheet_len

1153

1154 def handle_sheetsoffset(self, data):

1155 # DEBUG = 0

1156 posn = unpack('<i', data)[0]

1157 if DEBUG: print('SHEETSOFFSET:', posn, file=self.logfile)

1158 self._sheetsoffset = posn

1159

1160 def handle_sst(self, data):

1161 # DEBUG = 1

1162 if DEBUG:

1163 print("SST Processing", file=self.logfile)

1164 t0 = perf_counter()

1165 nbt = len(data)

1166 strlist = [data]

1167 uniquestrings = unpack('<i', data[4:8])[0]

1168 if DEBUG or self.verbosity >= 2:

1169 fprintf(self.logfile, "SST: unique strings: %d\n", uniquestrings)

1170 while 1:

1171 code, nb, data = self.get_record_parts_conditional(XL_CONTINUE)

1172 if code is None:

1173 break

1174 nbt += nb

1175 if DEBUG >= 2:

1176 fprintf(self.logfile, "CONTINUE: adding %d bytes to SST -> %d\n", nb, nbt)

1177 strlist.append(data)

1178 self._sharedstrings, rt_runlist = unpack_SST_table(strlist, uniquestrings)

1179 if self.formatting_info:

1180 self._rich_text_runlist_map = rt_runlist

1181 if DEBUG:

1182 t1 = perf_counter()

1183 print("SST processing took %.2f seconds" % (t1 - t0, ), file=self.logfile)

1184

1185 def handle_writeaccess(self, data):

1186 DEBUG = 0

1187 if self.biff_version < 80:

1188 if not self.encoding:

1189 self.raw_user_name = True

1190 self.user_name = data

1191 return

1192 strg = unpack_string(data, 0, self.encoding, lenlen=1)

1193 else:

1194 try:

1195 strg = unpack_unicode(data, 0, lenlen=2)

1196 except UnicodeDecodeError:

1197 # may have invalid trailing characters

1198 strg = unpack_unicode(data.strip(), 0, lenlen=2)

1199 if DEBUG: fprintf(self.logfile, "WRITEACCESS: %d bytes; raw=%s %r\n", len(data), self.raw_user_name, strg)

1200 strg = strg.rstrip()

1201 self.user_name = strg

1202

1203 def parse_globals(self):

1204 # DEBUG = 0

1205 # no need to position, just start reading (after the BOF)

1206 formatting.initialise_book(self)

1207 while 1:

1208 rc, length, data = self.get_record_parts()

1209 if DEBUG: print("parse_globals: record code is 0x%04x" % rc, file=self.logfile)

1210 if rc == XL_SST:

1211 self.handle_sst(data)

1212 elif rc == XL_FONT or rc == XL_FONT_B3B4:

1213 self.handle_font(data)

1214 elif rc == XL_FORMAT: # XL_FORMAT2 is BIFF <= 3.0, can't appear in globals

1215 self.handle_format(data)

1216 elif rc == XL_XF:

1217 self.handle_xf(data)

1218 elif rc == XL_BOUNDSHEET:

1219 self.handle_boundsheet(data)

1220 elif rc == XL_DATEMODE:

1221 self.handle_datemode(data)

1222 elif rc == XL_CODEPAGE:

1223 self.handle_codepage(data)

1224 elif rc == XL_COUNTRY:

1225 self.handle_country(data)

1226 elif rc == XL_EXTERNNAME:

1227 self.handle_externname(data)

1228 elif rc == XL_EXTERNSHEET:

1229 self.handle_externsheet(data)

1230 elif rc == XL_FILEPASS:

1231 self.handle_filepass(data)

1232 elif rc == XL_WRITEACCESS:

1233 self.handle_writeaccess(data)

1234 elif rc == XL_SHEETSOFFSET:

1235 self.handle_sheetsoffset(data)

1236 elif rc == XL_SHEETHDR:

1237 self.handle_sheethdr(data)

1238 elif rc == XL_SUPBOOK:

1239 self.handle_supbook(data)

1240 elif rc == XL_NAME:

1241 self.handle_name(data)

1242 elif rc == XL_PALETTE:

1243 self.handle_palette(data)

1244 elif rc == XL_STYLE:

1245 self.handle_style(data)

1246 elif rc & 0xff == 9 and self.verbosity:

1247 fprintf(self.logfile, "*** Unexpected BOF at posn %d: 0x%04x len=%d data=%r\n",

1248 self._position - length - 4, rc, length, data)

1249 elif rc == XL_EOF:

1250 self.xf_epilogue()

1251 self.names_epilogue()

1252 self.palette_epilogue()

1253 if not self.encoding:

1254 self.derive_encoding()

1255 if self.biff_version == 45:

1256 # DEBUG = 0

1257 if DEBUG: print("global EOF: position", self._position, file=self.logfile)

1258 # if DEBUG:

1259 # pos = self._position - 4

1260 # print repr(self.mem[pos:pos+40])

1261 return

1262 else:

1263 # if DEBUG:

1264 # print >> self.logfile, "parse_globals: ignoring record code 0x%04x" % rc

1265 pass

1266

1267 def read(self, pos, length):

1268 data = self.mem[pos:pos+length]

1269 self._position = pos + len(data)

1270 return data

1271

1272 def getbof(self, rqd_stream):

1273 # DEBUG = 1

1274 # if DEBUG: print >> self.logfile, "getbof(): position", self._position

1275 if DEBUG: print("reqd: 0x%04x" % rqd_stream, file=self.logfile)

1276

1277 def bof_error(msg):

1278 raise XLRDError('Unsupported format, or corrupt file: ' + msg)

1279 savpos = self._position

1280 opcode = self.get2bytes()

1281 if opcode == MY_EOF:

1282 bof_error('Expected BOF record; met end of file')

1283 if opcode not in bofcodes:

1284 bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8])

1285 length = self.get2bytes()

1286 if length == MY_EOF:

1287 bof_error('Incomplete BOF record[1]; met end of file')

1288 if not (4 <= length <= 20):

1289 bof_error(

1290 'Invalid length (%d) for BOF record type 0x%04x'

1291 % (length, opcode))

1292 padding = b'\0' * max(0, boflen[opcode] - length)

1293 data = self.read(self._position, length)

1294 if DEBUG: fprintf(self.logfile, "\ngetbof(): data=%r\n", data)

1295 if len(data) < length:

1296 bof_error('Incomplete BOF record[2]; met end of file')

1297 data += padding

1298 version1 = opcode >> 8

1299 version2, streamtype = unpack('<HH', data[0:4])

1300 if DEBUG:

1301 print("getbof(): op=0x%04x version2=0x%04x streamtype=0x%04x"

1302 % (opcode, version2, streamtype), file=self.logfile)

1303 bof_offset = self._position - 4 - length

1304 if DEBUG:

1305 print("getbof(): BOF found at offset %d; savpos=%d"

1306 % (bof_offset, savpos), file=self.logfile)

1307 version = build = year = 0

1308 if version1 == 0x08:

1309 build, year = unpack('<HH', data[4:8])

1310 if version2 == 0x0600:

1311 version = 80

1312 elif version2 == 0x0500:

1313 if year < 1994 or build in (2412, 3218, 3321):

1314 version = 50

1315 else:

1316 version = 70

1317 else:

1318 # dodgy one, created by a 3rd-party tool

1319 version = {

1320 0x0000: 21,

1321 0x0007: 21,

1322 0x0200: 21,

1323 0x0300: 30,

1324 0x0400: 40,

1325 }.get(version2, 0)

1326 elif version1 in (0x04, 0x02, 0x00):

1327 version = {0x04: 40, 0x02: 30, 0x00: 21}[version1]

1328

1329 if version == 40 and streamtype == XL_WORKBOOK_GLOBALS_4W:

1330 version = 45 # i.e. 4W

1331

1332 if DEBUG or self.verbosity >= 2:

1333 print("BOF: op=0x%04x vers=0x%04x stream=0x%04x buildid=%d buildyr=%d -> BIFF%d"

1334 % (opcode, version2, streamtype, build, year, version), file=self.logfile)

1335 got_globals = streamtype == XL_WORKBOOK_GLOBALS or (

1336 version == 45 and streamtype == XL_WORKBOOK_GLOBALS_4W)

1337 if (rqd_stream == XL_WORKBOOK_GLOBALS and got_globals) or streamtype == rqd_stream:

1338 return version

1339 if version < 50 and streamtype == XL_WORKSHEET:

1340 return version

1341 if version >= 50 and streamtype == 0x0100:

1342 bof_error("Workspace file -- no spreadsheet data")

1343 bof_error(

1344 'BOF not workbook/worksheet: op=0x%04x vers=0x%04x strm=0x%04x build=%d year=%d -> BIFF%d'

1345 % (opcode, version2, streamtype, build, year, version)

1346 )

1347

1348# === helper functions

1349

1350def expand_cell_address(inrow, incol):

1351 # Ref : OOo docs, "4.3.4 Cell Addresses in BIFF8"

1352 outrow = inrow

1353 if incol & 0x8000:

1354 if outrow >= 32768:

1355 outrow -= 65536

1356 relrow = 1

1357 else:

1358 relrow = 0

1359 outcol = incol & 0xFF

1360 if incol & 0x4000:

1361 if outcol >= 128:

1362 outcol -= 256

1363 relcol = 1

1364 else:

1365 relcol = 0

1366 return outrow, outcol, relrow, relcol

1367

1368def colname(colx, _A2Z="ABCDEFGHIJKLMNOPQRSTUVWXYZ"):

1369 assert colx >= 0

1370 name = UNICODE_LITERAL('')

1371 while 1:

1372 quot, rem = divmod(colx, 26)

1373 name = _A2Z[rem] + name

1374 if not quot:

1375 return name

1376 colx = quot - 1

1377

1378def display_cell_address(rowx, colx, relrow, relcol):

1379 if relrow:

1380 rowpart = "(*%s%d)" % ("+-"[rowx < 0], abs(rowx))

1381 else:

1382 rowpart = "$%d" % (rowx+1,)

1383 if relcol:

1384 colpart = "(*%s%d)" % ("+-"[colx < 0], abs(colx))

1385 else:

1386 colpart = "$" + colname(colx)

1387 return colpart + rowpart

1388

1389def unpack_SST_table(datatab, nstrings):

1390 "Return list of strings"

1391 datainx = 0

1392 ndatas = len(datatab)

1393 data = datatab[0]

1394 datalen = len(data)

1395 pos = 8

1396 strings = []

1397 strappend = strings.append

1398 richtext_runs = {}

1399 local_unpack = unpack

1400 local_min = min

1401 local_BYTES_ORD = BYTES_ORD

1402 latin_1 = "latin_1"

1403 for _unused_i in xrange(nstrings):

1404 nchars = local_unpack('<H', data[pos:pos+2])[0]

1405 pos += 2

1406 options = local_BYTES_ORD(data[pos])

1407 pos += 1

1408 rtcount = 0

1409 phosz = 0

1410 if options & 0x08: # richtext

1411 rtcount = local_unpack('<H', data[pos:pos+2])[0]

1412 pos += 2

1413 if options & 0x04: # phonetic

1414 phosz = local_unpack('<i', data[pos:pos+4])[0]

1415 pos += 4

1416 accstrg = UNICODE_LITERAL('')

1417 charsgot = 0

1418 while 1:

1419 charsneed = nchars - charsgot

1420 if options & 0x01:

1421 # Uncompressed UTF-16

1422 charsavail = local_min((datalen - pos) >> 1, charsneed)

1423 rawstrg = data[pos:pos+2*charsavail]

1424 # if DEBUG: print "SST U16: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)

1425 try:

1426 accstrg += unicode(rawstrg, "utf_16_le")

1427 except:

1428 # print "SST U16: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)

1429 # Probable cause: dodgy data e.g. unfinished surrogate pair.

1430 # E.g. file unicode2.xls in pyExcelerator's examples has cells containing

1431 # unichr(i) for i in range(0x100000)

1432 # so this will include 0xD800 etc

1433 raise

1434 pos += 2*charsavail

1435 else:

1436 # Note: this is COMPRESSED (not ASCII!) encoding!!!

1437 charsavail = local_min(datalen - pos, charsneed)

1438 rawstrg = data[pos:pos+charsavail]

1439 # if DEBUG: print "SST CMPRSD: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)

1440 accstrg += unicode(rawstrg, latin_1)

1441 pos += charsavail

1442 charsgot += charsavail

1443 if charsgot == nchars:

1444 break

1445 datainx += 1

1446 data = datatab[datainx]

1447 datalen = len(data)

1448 options = local_BYTES_ORD(data[0])

1449 pos = 1

1450

1451 if rtcount:

1452 runs = []

1453 for runindex in xrange(rtcount):

1454 if pos == datalen:

1455 pos = 0

1456 datainx += 1

1457 data = datatab[datainx]

1458 datalen = len(data)

1459 runs.append(local_unpack("<HH", data[pos:pos+4]))

1460 pos += 4

1461 richtext_runs[len(strings)] = runs

1462

1463 pos += phosz # size of the phonetic stuff to skip

1464 if pos >= datalen:

1465 # adjust to correct position in next record

1466 pos = pos - datalen

1467 datainx += 1

1468 if datainx < ndatas:

1469 data = datatab[datainx]

1470 datalen = len(data)

1471 else:

1472 assert _unused_i == nstrings - 1

1473 strappend(accstrg)

1474 return strings, richtext_runs