Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/xlrd/book.py: 59%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

862 statements  

1# Copyright (c) 2005-2012 Stephen John Machin, Lingfo Pty Ltd 

2# This module is part of the xlrd package, which is released under a 

3# BSD-style licence. 

4 

5from __future__ import print_function 

6 

7import struct 

8 

9from . import compdoc, formatting, sheet 

10from .biffh import * 

11from .formula import * 

12from .timemachine import * 

13 

14try: 

15 from time import perf_counter 

16except ImportError: 

17 # Python 2.7 

18 from time import clock as perf_counter 

19 

20from struct import unpack 

21 

22empty_cell = sheet.empty_cell # for exposure to the world ... 

23 

24DEBUG = 0 

25 

26import mmap 

27 

28MY_EOF = 0xF00BAAA # not a 16-bit number 

29 

30SUPBOOK_UNK, SUPBOOK_INTERNAL, SUPBOOK_EXTERNAL, SUPBOOK_ADDIN, SUPBOOK_DDEOLE = range(5) 

31 

32SUPPORTED_VERSIONS = (80, 70, 50, 45, 40, 30, 21, 20) 

33 

34_code_from_builtin_name = { 

35 "Consolidate_Area": "\x00", 

36 "Auto_Open": "\x01", 

37 "Auto_Close": "\x02", 

38 "Extract": "\x03", 

39 "Database": "\x04", 

40 "Criteria": "\x05", 

41 "Print_Area": "\x06", 

42 "Print_Titles": "\x07", 

43 "Recorder": "\x08", 

44 "Data_Form": "\x09", 

45 "Auto_Activate": "\x0A", 

46 "Auto_Deactivate": "\x0B", 

47 "Sheet_Title": "\x0C", 

48 "_FilterDatabase": "\x0D", 

49} 

50builtin_name_from_code = {} 

51code_from_builtin_name = {} 

52for _bin, _bic in _code_from_builtin_name.items(): 

53 _bin = UNICODE_LITERAL(_bin) 

54 _bic = UNICODE_LITERAL(_bic) 

55 code_from_builtin_name[_bin] = _bic 

56 builtin_name_from_code[_bic] = _bin 

57del _bin, _bic, _code_from_builtin_name 

58 

59def open_workbook_xls(filename=None, 

60 logfile=sys.stdout, verbosity=0, use_mmap=True, 

61 file_contents=None, 

62 encoding_override=None, 

63 formatting_info=False, on_demand=False, ragged_rows=False, 

64 ignore_workbook_corruption=False): 

65 t0 = perf_counter() 

66 bk = Book() 

67 try: 

68 bk.biff2_8_load( 

69 filename=filename, file_contents=file_contents, 

70 logfile=logfile, verbosity=verbosity, use_mmap=use_mmap, 

71 encoding_override=encoding_override, 

72 formatting_info=formatting_info, 

73 on_demand=on_demand, 

74 ragged_rows=ragged_rows, 

75 ignore_workbook_corruption=ignore_workbook_corruption 

76 ) 

77 t1 = perf_counter() 

78 bk.load_time_stage_1 = t1 - t0 

79 biff_version = bk.getbof(XL_WORKBOOK_GLOBALS) 

80 if not biff_version: 

81 raise XLRDError("Can't determine file's BIFF version") 

82 if biff_version not in SUPPORTED_VERSIONS: 

83 raise XLRDError( 

84 "BIFF version %s is not supported" 

85 % biff_text_from_num[biff_version] 

86 ) 

87 bk.biff_version = biff_version 

88 if biff_version <= 40: 

89 # no workbook globals, only 1 worksheet 

90 if on_demand: 

91 fprintf(bk.logfile, 

92 "*** WARNING: on_demand is not supported for this Excel version.\n" 

93 "*** Setting on_demand to False.\n") 

94 bk.on_demand = on_demand = False 

95 bk.fake_globals_get_sheet() 

96 elif biff_version == 45: 

97 # worksheet(s) embedded in global stream 

98 bk.parse_globals() 

99 if on_demand: 

100 fprintf(bk.logfile, "*** WARNING: on_demand is not supported for this Excel version.\n" 

101 "*** Setting on_demand to False.\n") 

102 bk.on_demand = on_demand = False 

103 else: 

104 bk.parse_globals() 

105 bk._sheet_list = [None for sh in bk._sheet_names] 

106 if not on_demand: 

107 bk.get_sheets() 

108 bk.nsheets = len(bk._sheet_list) 

109 if biff_version == 45 and bk.nsheets > 1: 

110 fprintf( 

111 bk.logfile, 

112 "*** WARNING: Excel 4.0 workbook (.XLW) file contains %d worksheets.\n" 

113 "*** Book-level data will be that of the last worksheet.\n", 

114 bk.nsheets 

115 ) 

116 t2 = perf_counter() 

117 bk.load_time_stage_2 = t2 - t1 

118 except: 

119 bk.release_resources() 

120 raise 

121 # normal exit 

122 if not on_demand: 

123 bk.release_resources() 

124 return bk 

125 

126 

127class Name(BaseObject): 

128 """ 

129 Information relating to a named reference, formula, macro, etc. 

130 

131 .. note:: 

132 

133 Name information is **not** extracted from files older than 

134 Excel 5.0 (``Book.biff_version < 50``) 

135 """ 

136 _repr_these = ['stack'] 

137 book = None # parent 

138 

139 #: 0 = Visible; 1 = Hidden 

140 hidden = 0 

141 

142 #: 0 = Command macro; 1 = Function macro. Relevant only if macro == 1 

143 func = 0 

144 

145 #: 0 = Sheet macro; 1 = VisualBasic macro. Relevant only if macro == 1 

146 vbasic = 0 

147 

148 #: 0 = Standard name; 1 = Macro name 

149 macro = 0 

150 

151 #: 0 = Simple formula; 1 = Complex formula (array formula or user defined). 

152 #: 

153 #: .. note:: No examples have been sighted. 

154 complex = 0 

155 

156 #: 0 = User-defined name; 1 = Built-in name 

157 #: 

158 #: Common examples: ``Print_Area``, ``Print_Titles``; see OOo docs for 

159 #: full list 

160 builtin = 0 

161 

162 #: Function group. Relevant only if macro == 1; see OOo docs for values. 

163 funcgroup = 0 

164 

165 #: 0 = Formula definition; 1 = Binary data 

166 #: 

167 #: .. note:: No examples have been sighted. 

168 binary = 0 

169 

170 #: The index of this object in book.name_obj_list 

171 name_index = 0 

172 

173 # A Unicode string. If builtin, decoded as per OOo docs. 

174 name = UNICODE_LITERAL("") 

175 

176 #: An 8-bit string. 

177 raw_formula = b'' 

178 

179 #: ``-1``: 

180 #: The name is global (visible in all calculation sheets). 

181 #: ``-2``: 

182 #: The name belongs to a macro sheet or VBA sheet. 

183 #: ``-3``: 

184 #: The name is invalid. 

185 #: ``0 <= scope < book.nsheets``: 

186 #: The name is local to the sheet whose index is scope. 

187 scope = -1 

188 

189 #: The result of evaluating the formula, if any. 

190 #: If no formula, or evaluation of the formula encountered problems, 

191 #: the result is ``None``. Otherwise the result is a single instance of the 

192 #: :class:`~xlrd.formula.Operand` class. 

193 # 

194 result = None 

195 

196 def cell(self): 

197 """ 

198 This is a convenience method for the frequent use case where the name 

199 refers to a single cell. 

200 

201 :returns: An instance of the :class:`~xlrd.sheet.Cell` class. 

202 

203 :raises xlrd.biffh.XLRDError: 

204 The name is not a constant absolute reference 

205 to a single cell. 

206 """ 

207 res = self.result 

208 if res: 

209 # result should be an instance of the Operand class 

210 kind = res.kind 

211 value = res.value 

212 if kind == oREF and len(value) == 1: 

213 ref3d = value[0] 

214 if (0 <= ref3d.shtxlo == ref3d.shtxhi - 1 and 

215 ref3d.rowxlo == ref3d.rowxhi - 1 and 

216 ref3d.colxlo == ref3d.colxhi - 1): 

217 sh = self.book.sheet_by_index(ref3d.shtxlo) 

218 return sh.cell(ref3d.rowxlo, ref3d.colxlo) 

219 self.dump( 

220 self.book.logfile, 

221 header="=== Dump of Name object ===", 

222 footer="======= End of dump =======", 

223 ) 

224 raise XLRDError("Not a constant absolute reference to a single cell") 

225 

226 def area2d(self, clipped=True): 

227 """ 

228 This is a convenience method for the use case where the name 

229 refers to one rectangular area in one worksheet. 

230 

231 :param clipped: 

232 If ``True``, the default, the returned rectangle is clipped 

233 to fit in ``(0, sheet.nrows, 0, sheet.ncols)``. 

234 it is guaranteed that ``0 <= rowxlo <= rowxhi <= sheet.nrows`` and 

235 that the number of usable rows in the area (which may be zero) is 

236 ``rowxhi - rowxlo``; likewise for columns. 

237 

238 :returns: a tuple ``(sheet_object, rowxlo, rowxhi, colxlo, colxhi)``. 

239 

240 :raises xlrd.biffh.XLRDError: 

241 The name is not a constant absolute reference 

242 to a single area in a single sheet. 

243 """ 

244 res = self.result 

245 if res: 

246 # result should be an instance of the Operand class 

247 kind = res.kind 

248 value = res.value 

249 if kind == oREF and len(value) == 1: # only 1 reference 

250 ref3d = value[0] 

251 if 0 <= ref3d.shtxlo == ref3d.shtxhi - 1: # only 1 usable sheet 

252 sh = self.book.sheet_by_index(ref3d.shtxlo) 

253 if not clipped: 

254 return sh, ref3d.rowxlo, ref3d.rowxhi, ref3d.colxlo, ref3d.colxhi 

255 rowxlo = min(ref3d.rowxlo, sh.nrows) 

256 rowxhi = max(rowxlo, min(ref3d.rowxhi, sh.nrows)) 

257 colxlo = min(ref3d.colxlo, sh.ncols) 

258 colxhi = max(colxlo, min(ref3d.colxhi, sh.ncols)) 

259 assert 0 <= rowxlo <= rowxhi <= sh.nrows 

260 assert 0 <= colxlo <= colxhi <= sh.ncols 

261 return sh, rowxlo, rowxhi, colxlo, colxhi 

262 self.dump( 

263 self.book.logfile, 

264 header="=== Dump of Name object ===", 

265 footer="======= End of dump =======", 

266 ) 

267 raise XLRDError("Not a constant absolute reference to a single area in a single sheet") 

268 

269 

270class Book(BaseObject): 

271 """ 

272 Contents of a "workbook". 

273 

274 .. warning:: 

275 

276 You should not instantiate this class yourself. You use the :class:`Book` 

277 object that was returned when you called :func:`~xlrd.open_workbook`. 

278 """ 

279 

280 #: The number of worksheets present in the workbook file. 

281 #: This information is available even when no sheets have yet been loaded. 

282 nsheets = 0 

283 

284 #: Which date system was in force when this file was last saved. 

285 #: 

286 #: 0: 

287 #: 1900 system (the Excel for Windows default). 

288 #: 

289 #: 1: 

290 #: 1904 system (the Excel for Macintosh default). 

291 #: 

292 #: Defaults to 0 in case it's not specified in the file. 

293 datemode = 0 

294 

295 #: Version of BIFF (Binary Interchange File Format) used to create the file. 

296 #: Latest is 8.0 (represented here as 80), introduced with Excel 97. 

297 #: Earliest supported by this module: 2.0 (represented as 20). 

298 biff_version = 0 

299 

300 #: List containing a :class:`Name` object for each ``NAME`` record in the 

301 #: workbook. 

302 #: 

303 #: .. versionadded:: 0.6.0 

304 name_obj_list = [] 

305 

306 #: An integer denoting the character set used for strings in this file. 

307 #: For BIFF 8 and later, this will be 1200, meaning Unicode; 

308 #: more precisely, UTF_16_LE. 

309 #: For earlier versions, this is used to derive the appropriate Python 

310 #: encoding to be used to convert to Unicode. 

311 #: Examples: ``1252 -> 'cp1252'``, ``10000 -> 'mac_roman'`` 

312 codepage = None 

313 

314 #: The encoding that was derived from the codepage. 

315 encoding = None 

316 

317 #: A tuple containing the telephone country code for: 

318 #: 

319 #: ``[0]``: 

320 #: the user-interface setting when the file was created. 

321 #: 

322 #: ``[1]``: 

323 #: the regional settings. 

324 #: 

325 #: Example: ``(1, 61)`` meaning ``(USA, Australia)``. 

326 #: 

327 #: This information may give a clue to the correct encoding for an 

328 #: unknown codepage. For a long list of observed values, refer to the 

329 #: OpenOffice.org documentation for the ``COUNTRY`` record. 

330 countries = (0, 0) 

331 

332 #: What (if anything) is recorded as the name of the last user to 

333 #: save the file. 

334 user_name = UNICODE_LITERAL('') 

335 

336 #: A list of :class:`~xlrd.formatting.Font` class instances, 

337 #: each corresponding to a FONT record. 

338 #: 

339 #: .. versionadded:: 0.6.1 

340 font_list = [] 

341 

342 #: A list of :class:`~xlrd.formatting.XF` class instances, 

343 #: each corresponding to an ``XF`` record. 

344 #: 

345 #: .. versionadded:: 0.6.1 

346 xf_list = [] 

347 

348 #: A list of :class:`~xlrd.formatting.Format` objects, each corresponding to 

349 #: a ``FORMAT`` record, in the order that they appear in the input file. 

350 #: It does *not* contain builtin formats. 

351 #: 

352 #: If you are creating an output file using (for example) :mod:`xlwt`, 

353 #: use this list. 

354 #: 

355 #: The collection to be used for all visual rendering purposes is 

356 #: :attr:`format_map`. 

357 #: 

358 #: .. versionadded:: 0.6.1 

359 format_list = [] 

360 

361 ## 

362 #: The mapping from :attr:`~xlrd.formatting.XF.format_key` to 

363 #: :class:`~xlrd.formatting.Format` object. 

364 #: 

365 #: .. versionadded:: 0.6.1 

366 format_map = {} 

367 

368 #: This provides access via name to the extended format information for 

369 #: both built-in styles and user-defined styles. 

370 #: 

371 #: It maps ``name`` to ``(built_in, xf_index)``, where 

372 #: ``name`` is either the name of a user-defined style, 

373 #: or the name of one of the built-in styles. Known built-in names are 

374 #: Normal, RowLevel_1 to RowLevel_7, 

375 #: ColLevel_1 to ColLevel_7, Comma, Currency, Percent, "Comma [0]", 

376 #: "Currency [0]", Hyperlink, and "Followed Hyperlink". 

377 #: 

378 #: ``built_in`` has the following meanings 

379 #: 

380 #: 1: 

381 #: built-in style 

382 #: 

383 #: 0: 

384 #: user-defined 

385 #: 

386 #: ``xf_index`` is an index into :attr:`Book.xf_list`. 

387 #: 

388 #: References: OOo docs s6.99 (``STYLE`` record); Excel UI Format/Style 

389 #: 

390 #: .. versionadded:: 0.6.1 

391 #: 

392 #: Extracted only if ``open_workbook(..., formatting_info=True)`` 

393 #: 

394 #: .. versionadded:: 0.7.4 

395 style_name_map = {} 

396 

397 #: This provides definitions for colour indexes. Please refer to 

398 #: :ref:`palette` for an explanation 

399 #: of how colours are represented in Excel. 

400 #: 

401 #: Colour indexes into the palette map into ``(red, green, blue)`` tuples. 

402 #: "Magic" indexes e.g. ``0x7FFF`` map to ``None``. 

403 #: 

404 #: :attr:`colour_map` is what you need if you want to render cells on screen 

405 #: or in a PDF file. If you are writing an output XLS file, use 

406 #: :attr:`palette_record`. 

407 #: 

408 #: .. note:: Extracted only if ``open_workbook(..., formatting_info=True)`` 

409 #: 

410 #: .. versionadded:: 0.6.1 

411 colour_map = {} 

412 

413 #: If the user has changed any of the colours in the standard palette, the 

414 #: XLS file will contain a ``PALETTE`` record with 56 (16 for Excel 4.0 and 

415 #: earlier) RGB values in it, and this list will be e.g. 

416 #: ``[(r0, b0, g0), ..., (r55, b55, g55)]``. 

417 #: Otherwise this list will be empty. This is what you need if you are 

418 #: writing an output XLS file. If you want to render cells on screen or in a 

419 #: PDF file, use :attr:`colour_map`. 

420 #: 

421 #: .. note:: Extracted only if ``open_workbook(..., formatting_info=True)`` 

422 #: 

423 #: .. versionadded:: 0.6.1 

424 palette_record = [] 

425 

426 #: Time in seconds to extract the XLS image as a contiguous string 

427 #: (or mmap equivalent). 

428 load_time_stage_1 = -1.0 

429 

430 #: Time in seconds to parse the data from the contiguous string 

431 #: (or mmap equivalent). 

432 load_time_stage_2 = -1.0 

433 

434 def sheets(self): 

435 """ 

436 :returns: A list of all sheets in the book. 

437 

438 All sheets not already loaded will be loaded. 

439 """ 

440 for sheetx in xrange(self.nsheets): 

441 if not self._sheet_list[sheetx]: 

442 self.get_sheet(sheetx) 

443 return self._sheet_list[:] 

444 

445 def sheet_by_index(self, sheetx): 

446 """ 

447 :param sheetx: Sheet index in ``range(nsheets)`` 

448 :returns: A :class:`~xlrd.sheet.Sheet`. 

449 """ 

450 return self._sheet_list[sheetx] or self.get_sheet(sheetx) 

451 

452 def __iter__(self): 

453 """ 

454 Makes iteration through sheets of a book a little more straightforward. 

455 Don't free resources after use since it can be called like `list(book)` 

456 """ 

457 for i in range(self.nsheets): 

458 yield self.sheet_by_index(i) 

459 

460 def sheet_by_name(self, sheet_name): 

461 """ 

462 :param sheet_name: Name of the sheet required. 

463 :returns: A :class:`~xlrd.sheet.Sheet`. 

464 """ 

465 try: 

466 sheetx = self._sheet_names.index(sheet_name) 

467 except ValueError: 

468 raise XLRDError('No sheet named <%r>' % sheet_name) 

469 return self.sheet_by_index(sheetx) 

470 

471 def __getitem__(self, item): 

472 """ 

473 Allow indexing with sheet name or index. 

474 :param item: Name or index of sheet enquired upon 

475 :return: :class:`~xlrd.sheet.Sheet`. 

476 """ 

477 if isinstance(item, int): 

478 return self.sheet_by_index(item) 

479 else: 

480 return self.sheet_by_name(item) 

481 

482 def sheet_names(self): 

483 """ 

484 :returns: 

485 A list of the names of all the worksheets in the workbook file. 

486 This information is available even when no sheets have yet been 

487 loaded. 

488 """ 

489 return self._sheet_names[:] 

490 

491 def sheet_loaded(self, sheet_name_or_index): 

492 """ 

493 :param sheet_name_or_index: Name or index of sheet enquired upon 

494 :returns: ``True`` if sheet is loaded, ``False`` otherwise. 

495 

496 .. versionadded:: 0.7.1 

497 """ 

498 if isinstance(sheet_name_or_index, int): 

499 sheetx = sheet_name_or_index 

500 else: 

501 try: 

502 sheetx = self._sheet_names.index(sheet_name_or_index) 

503 except ValueError: 

504 raise XLRDError('No sheet named <%r>' % sheet_name_or_index) 

505 return bool(self._sheet_list[sheetx]) 

506 

507 def unload_sheet(self, sheet_name_or_index): 

508 """ 

509 :param sheet_name_or_index: Name or index of sheet to be unloaded. 

510 

511 .. versionadded:: 0.7.1 

512 """ 

513 if isinstance(sheet_name_or_index, int): 

514 sheetx = sheet_name_or_index 

515 else: 

516 try: 

517 sheetx = self._sheet_names.index(sheet_name_or_index) 

518 except ValueError: 

519 raise XLRDError('No sheet named <%r>' % sheet_name_or_index) 

520 self._sheet_list[sheetx] = None 

521 

522 def release_resources(self): 

523 """ 

524 This method has a dual purpose. You can call it to release 

525 memory-consuming objects and (possibly) a memory-mapped file 

526 (:class:`mmap.mmap` object) when you have finished loading sheets in 

527 ``on_demand`` mode, but still require the :class:`Book` object to 

528 examine the loaded sheets. It is also called automatically (a) when 

529 :func:`~xlrd.open_workbook` 

530 raises an exception and (b) if you are using a ``with`` statement, when 

531 the ``with`` block is exited. Calling this method multiple times on the 

532 same object has no ill effect. 

533 """ 

534 self._resources_released = 1 

535 if hasattr(self.mem, "close"): 

536 # must be a mmap.mmap object 

537 self.mem.close() 

538 self.mem = None 

539 if hasattr(self.filestr, "close"): 

540 self.filestr.close() 

541 self.filestr = None 

542 self._sharedstrings = None 

543 self._rich_text_runlist_map = None 

544 

545 def __enter__(self): 

546 return self 

547 

548 def __exit__(self, exc_type, exc_value, exc_tb): 

549 self.release_resources() 

550 # return false 

551 

552 #: A mapping from ``(lower_case_name, scope)`` to a single :class:`Name` 

553 #: object. 

554 #: 

555 #: .. versionadded:: 0.6.0 

556 name_and_scope_map = {} 

557 

558 #: A mapping from `lower_case_name` to a list of :class:`Name` objects. 

559 #: The list is sorted in scope order. Typically there will be one item 

560 #: (of global scope) in the list. 

561 #: 

562 #: .. versionadded:: 0.6.0 

563 name_map = {} 

564 

565 def __init__(self): 

566 self._sheet_list = [] 

567 self._sheet_names = [] 

568 self._sheet_visibility = [] # from BOUNDSHEET record 

569 self.nsheets = 0 

570 self._sh_abs_posn = [] # sheet's absolute position in the stream 

571 self._sharedstrings = [] 

572 self._rich_text_runlist_map = {} 

573 self.raw_user_name = False 

574 self._sheethdr_count = 0 # BIFF 4W only 

575 self.builtinfmtcount = -1 # unknown as yet. BIFF 3, 4S, 4W 

576 self.initialise_format_info() 

577 self._all_sheets_count = 0 # includes macro & VBA sheets 

578 self._supbook_count = 0 

579 self._supbook_locals_inx = None 

580 self._supbook_addins_inx = None 

581 self._all_sheets_map = [] # maps an all_sheets index to a calc-sheets index (or -1) 

582 self._externsheet_info = [] 

583 self._externsheet_type_b57 = [] 

584 self._extnsht_name_from_num = {} 

585 self._sheet_num_from_name = {} 

586 self._extnsht_count = 0 

587 self._supbook_types = [] 

588 self._resources_released = 0 

589 self.addin_func_names = [] 

590 self.name_obj_list = [] 

591 self.colour_map = {} 

592 self.palette_record = [] 

593 self.xf_list = [] 

594 self.style_name_map = {} 

595 self.mem = b'' 

596 self.filestr = b'' 

597 

598 def biff2_8_load(self, filename=None, file_contents=None, 

599 logfile=sys.stdout, verbosity=0, use_mmap=True, 

600 encoding_override=None, 

601 formatting_info=False, 

602 on_demand=False, 

603 ragged_rows=False, 

604 ignore_workbook_corruption=False 

605 ): 

606 # DEBUG = 0 

607 self.logfile = logfile 

608 self.verbosity = verbosity 

609 self.use_mmap = use_mmap 

610 self.encoding_override = encoding_override 

611 self.formatting_info = formatting_info 

612 self.on_demand = on_demand 

613 self.ragged_rows = ragged_rows 

614 

615 if not file_contents: 

616 with open(filename, "rb") as f: 

617 f.seek(0, 2) # EOF 

618 size = f.tell() 

619 f.seek(0, 0) # BOF 

620 if size == 0: 

621 raise XLRDError("File size is 0 bytes") 

622 if self.use_mmap: 

623 self.filestr = mmap.mmap(f.fileno(), size, access=mmap.ACCESS_READ) 

624 self.stream_len = size 

625 else: 

626 self.filestr = f.read() 

627 self.stream_len = len(self.filestr) 

628 else: 

629 self.filestr = file_contents 

630 self.stream_len = len(file_contents) 

631 

632 self.base = 0 

633 if self.filestr[:8] != compdoc.SIGNATURE: 

634 # got this one at the antique store 

635 self.mem = self.filestr 

636 else: 

637 cd = compdoc.CompDoc(self.filestr, logfile=self.logfile, 

638 ignore_workbook_corruption=ignore_workbook_corruption) 

639 for qname in ['Workbook', 'Book']: 

640 self.mem, self.base, self.stream_len = \ 

641 cd.locate_named_stream(UNICODE_LITERAL(qname)) 

642 if self.mem: 

643 break 

644 else: 

645 raise XLRDError("Can't find workbook in OLE2 compound document") 

646 del cd 

647 if self.mem is not self.filestr: 

648 if hasattr(self.filestr, "close"): 

649 self.filestr.close() 

650 self.filestr = b'' 

651 self._position = self.base 

652 if DEBUG: 

653 print("mem: %s, base: %d, len: %d" % (type(self.mem), self.base, self.stream_len), file=self.logfile) 

654 

655 def initialise_format_info(self): 

656 # needs to be done once per sheet for BIFF 4W :-( 

657 self.format_map = {} 

658 self.format_list = [] 

659 self.xfcount = 0 

660 self.actualfmtcount = 0 # number of FORMAT records seen so far 

661 self._xf_index_to_xl_type_map = {0: XL_CELL_NUMBER} 

662 self._xf_epilogue_done = 0 

663 self.xf_list = [] 

664 self.font_list = [] 

665 

666 def get2bytes(self): 

667 pos = self._position 

668 buff_two = self.mem[pos:pos+2] 

669 lenbuff = len(buff_two) 

670 self._position += lenbuff 

671 if lenbuff < 2: 

672 return MY_EOF 

673 lo, hi = buff_two 

674 return (BYTES_ORD(hi) << 8) | BYTES_ORD(lo) 

675 

676 def get_record_parts(self): 

677 pos = self._position 

678 mem = self.mem 

679 code, length = unpack('<HH', mem[pos:pos+4]) 

680 pos += 4 

681 data = mem[pos:pos+length] 

682 self._position = pos + length 

683 return (code, length, data) 

684 

685 def get_record_parts_conditional(self, reqd_record): 

686 pos = self._position 

687 mem = self.mem 

688 code, length = unpack('<HH', mem[pos:pos+4]) 

689 if code != reqd_record: 

690 return (None, 0, b'') 

691 pos += 4 

692 data = mem[pos:pos+length] 

693 self._position = pos + length 

694 return (code, length, data) 

695 

696 def get_sheet(self, sh_number, update_pos=True): 

697 if self._resources_released: 

698 raise XLRDError("Can't load sheets after releasing resources.") 

699 if update_pos: 

700 self._position = self._sh_abs_posn[sh_number] 

701 self.getbof(XL_WORKSHEET) 

702 # assert biff_version == self.biff_version ### FAILS 

703 # Have an example where book is v7 but sheet reports v8!!! 

704 # It appears to work OK if the sheet version is ignored. 

705 # Confirmed by Daniel Rentz: happens when Excel does "save as" 

706 # creating an old version file; ignore version details on sheet BOF. 

707 sh = sheet.Sheet( 

708 self, 

709 self._position, 

710 self._sheet_names[sh_number], 

711 sh_number, 

712 ) 

713 sh.read(self) 

714 self._sheet_list[sh_number] = sh 

715 return sh 

716 

717 def get_sheets(self): 

718 # DEBUG = 0 

719 if DEBUG: print("GET_SHEETS:", self._sheet_names, self._sh_abs_posn, file=self.logfile) 

720 for sheetno in xrange(len(self._sheet_names)): 

721 if DEBUG: print("GET_SHEETS: sheetno =", sheetno, self._sheet_names, self._sh_abs_posn, file=self.logfile) 

722 self.get_sheet(sheetno) 

723 

724 def fake_globals_get_sheet(self): # for BIFF 4.0 and earlier 

725 formatting.initialise_book(self) 

726 fake_sheet_name = UNICODE_LITERAL('Sheet 1') 

727 self._sheet_names = [fake_sheet_name] 

728 self._sh_abs_posn = [0] 

729 self._sheet_visibility = [0] # one sheet, visible 

730 self._sheet_list.append(None) # get_sheet updates _sheet_list but needs a None beforehand 

731 self.get_sheets() 

732 

733 def handle_boundsheet(self, data): 

734 # DEBUG = 1 

735 bv = self.biff_version 

736 self.derive_encoding() 

737 if DEBUG: 

738 fprintf(self.logfile, "BOUNDSHEET: bv=%d data %r\n", bv, data) 

739 if bv == 45: # BIFF4W 

740 #### Not documented in OOo docs ... 

741 # In fact, the *only* data is the name of the sheet. 

742 sheet_name = unpack_string(data, 0, self.encoding, lenlen=1) 

743 visibility = 0 

744 sheet_type = XL_BOUNDSHEET_WORKSHEET # guess, patch later 

745 if len(self._sh_abs_posn) == 0: 

746 abs_posn = self._sheetsoffset + self.base 

747 # Note (a) this won't be used 

748 # (b) it's the position of the SHEETHDR record 

749 # (c) add 11 to get to the worksheet BOF record 

750 else: 

751 abs_posn = -1 # unknown 

752 else: 

753 offset, visibility, sheet_type = unpack('<iBB', data[0:6]) 

754 abs_posn = offset + self.base # because global BOF is always at posn 0 in the stream 

755 if bv < BIFF_FIRST_UNICODE: 

756 sheet_name = unpack_string(data, 6, self.encoding, lenlen=1) 

757 else: 

758 sheet_name = unpack_unicode(data, 6, lenlen=1) 

759 

760 if DEBUG or self.verbosity >= 2: 

761 fprintf(self.logfile, 

762 "BOUNDSHEET: inx=%d vis=%r sheet_name=%r abs_posn=%d sheet_type=0x%02x\n", 

763 self._all_sheets_count, visibility, sheet_name, abs_posn, sheet_type) 

764 self._all_sheets_count += 1 

765 if sheet_type != XL_BOUNDSHEET_WORKSHEET: 

766 self._all_sheets_map.append(-1) 

767 descr = { 

768 1: 'Macro sheet', 

769 2: 'Chart', 

770 6: 'Visual Basic module', 

771 }.get(sheet_type, 'UNKNOWN') 

772 

773 if DEBUG or self.verbosity >= 1: 

774 fprintf(self.logfile, 

775 "NOTE *** Ignoring non-worksheet data named %r (type 0x%02x = %s)\n", 

776 sheet_name, sheet_type, descr) 

777 else: 

778 snum = len(self._sheet_names) 

779 self._all_sheets_map.append(snum) 

780 self._sheet_names.append(sheet_name) 

781 self._sh_abs_posn.append(abs_posn) 

782 self._sheet_visibility.append(visibility) 

783 self._sheet_num_from_name[sheet_name] = snum 

784 

785 def handle_builtinfmtcount(self, data): 

786 ### N.B. This count appears to be utterly useless. 

787 # DEBUG = 1 

788 builtinfmtcount = unpack('<H', data[0:2])[0] 

789 if DEBUG: fprintf(self.logfile, "BUILTINFMTCOUNT: %r\n", builtinfmtcount) 

790 self.builtinfmtcount = builtinfmtcount 

791 

792 def derive_encoding(self): 

793 if self.encoding_override: 

794 self.encoding = self.encoding_override 

795 elif self.codepage is None: 

796 if self.biff_version < 80: 

797 fprintf(self.logfile, 

798 "*** No CODEPAGE record, no encoding_override: will use 'iso-8859-1'\n") 

799 self.encoding = 'iso-8859-1' 

800 else: 

801 self.codepage = 1200 # utf16le 

802 if self.verbosity >= 2: 

803 fprintf(self.logfile, "*** No CODEPAGE record; assuming 1200 (utf_16_le)\n") 

804 else: 

805 codepage = self.codepage 

806 if codepage in encoding_from_codepage: 

807 encoding = encoding_from_codepage[codepage] 

808 elif 300 <= codepage <= 1999: 

809 encoding = 'cp' + str(codepage) 

810 elif self.biff_version >= 80: 

811 self.codepage = 1200 

812 encoding = 'utf_16_le' 

813 else: 

814 encoding = 'unknown_codepage_' + str(codepage) 

815 if DEBUG or (self.verbosity and encoding != self.encoding) : 

816 fprintf(self.logfile, "CODEPAGE: codepage %r -> encoding %r\n", codepage, encoding) 

817 self.encoding = encoding 

818 if self.codepage != 1200: # utf_16_le 

819 # If we don't have a codec that can decode ASCII into Unicode, 

820 # we're well & truly stuffed -- let the punter know ASAP. 

821 try: 

822 unicode(b'trial', self.encoding) 

823 except BaseException as e: 

824 fprintf(self.logfile, 

825 "ERROR *** codepage %r -> encoding %r -> %s: %s\n", 

826 self.codepage, self.encoding, type(e).__name__.split(".")[-1], e) 

827 raise 

828 if self.raw_user_name: 

829 strg = unpack_string(self.user_name, 0, self.encoding, lenlen=1) 

830 strg = strg.rstrip() 

831 # if DEBUG: 

832 # print "CODEPAGE: user name decoded from %r to %r" % (self.user_name, strg) 

833 self.user_name = strg 

834 self.raw_user_name = False 

835 return self.encoding 

836 

837 def handle_codepage(self, data): 

838 # DEBUG = 0 

839 codepage = unpack('<H', data[0:2])[0] 

840 self.codepage = codepage 

841 self.derive_encoding() 

842 

843 def handle_country(self, data): 

844 countries = unpack('<HH', data[0:4]) 

845 if self.verbosity: print("Countries:", countries, file=self.logfile) 

846 # Note: in BIFF7 and earlier, country record was put (redundantly?) in each worksheet. 

847 assert self.countries == (0, 0) or self.countries == countries 

848 self.countries = countries 

849 

850 def handle_datemode(self, data): 

851 datemode = unpack('<H', data[0:2])[0] 

852 if DEBUG or self.verbosity: 

853 fprintf(self.logfile, "DATEMODE: datemode %r\n", datemode) 

854 assert datemode in (0, 1) 

855 self.datemode = datemode 

856 

857 def handle_externname(self, data): 

858 blah = DEBUG or self.verbosity >= 2 

859 if self.biff_version >= 80: 

860 option_flags, other_info =unpack("<HI", data[:6]) 

861 pos = 6 

862 name, pos = unpack_unicode_update_pos(data, pos, lenlen=1) 

863 extra = data[pos:] 

864 if self._supbook_types[-1] == SUPBOOK_ADDIN: 

865 self.addin_func_names.append(name) 

866 if blah: 

867 fprintf(self.logfile, 

868 "EXTERNNAME: sbktype=%d oflags=0x%04x oinfo=0x%08x name=%r extra=%r\n", 

869 self._supbook_types[-1], option_flags, other_info, name, extra) 

870 

871 def handle_externsheet(self, data): 

872 self.derive_encoding() # in case CODEPAGE record missing/out of order/wrong 

873 self._extnsht_count += 1 # for use as a 1-based index 

874 blah1 = DEBUG or self.verbosity >= 1 

875 blah2 = DEBUG or self.verbosity >= 2 

876 if self.biff_version >= 80: 

877 num_refs = unpack("<H", data[0:2])[0] 

878 bytes_reqd = num_refs * 6 + 2 

879 while len(data) < bytes_reqd: 

880 if blah1: 

881 fprintf( 

882 self.logfile, 

883 "INFO: EXTERNSHEET needs %d bytes, have %d\n", 

884 bytes_reqd, len(data), 

885 ) 

886 code2, length2, data2 = self.get_record_parts() 

887 if code2 != XL_CONTINUE: 

888 raise XLRDError("Missing CONTINUE after EXTERNSHEET record") 

889 data += data2 

890 pos = 2 

891 for k in xrange(num_refs): 

892 info = unpack("<HHH", data[pos:pos+6]) 

893 ref_recordx, ref_first_sheetx, ref_last_sheetx = info 

894 self._externsheet_info.append(info) 

895 pos += 6 

896 if blah2: 

897 fprintf( 

898 self.logfile, 

899 "EXTERNSHEET(b8): k = %2d, record = %2d, first_sheet = %5d, last sheet = %5d\n", 

900 k, ref_recordx, ref_first_sheetx, ref_last_sheetx, 

901 ) 

902 else: 

903 nc, ty = unpack("<BB", data[:2]) 

904 if blah2: 

905 print("EXTERNSHEET(b7-):", file=self.logfile) 

906 hex_char_dump(data, 0, len(data), fout=self.logfile) 

907 msg = { 

908 1: "Encoded URL", 

909 2: "Current sheet!!", 

910 3: "Specific sheet in own doc't", 

911 4: "Nonspecific sheet in own doc't!!", 

912 }.get(ty, "Not encoded") 

913 print(" %3d chars, type is %d (%s)" % (nc, ty, msg), file=self.logfile) 

914 if ty == 3: 

915 sheet_name = unicode(data[2:nc+2], self.encoding) 

916 self._extnsht_name_from_num[self._extnsht_count] = sheet_name 

917 if blah2: print(self._extnsht_name_from_num, file=self.logfile) 

918 if not (1 <= ty <= 4): 

919 ty = 0 

920 self._externsheet_type_b57.append(ty) 

921 

922 def handle_filepass(self, data): 

923 if self.verbosity >= 2: 

924 logf = self.logfile 

925 fprintf(logf, "FILEPASS:\n") 

926 hex_char_dump(data, 0, len(data), base=0, fout=logf) 

927 if self.biff_version >= 80: 

928 kind1, = unpack('<H', data[:2]) 

929 if kind1 == 0: # weak XOR encryption 

930 key, hash_value = unpack('<HH', data[2:]) 

931 fprintf(logf, 

932 'weak XOR: key=0x%04x hash=0x%04x\n', 

933 key, hash_value) 

934 elif kind1 == 1: 

935 kind2, = unpack('<H', data[4:6]) 

936 if kind2 == 1: # BIFF8 standard encryption 

937 caption = "BIFF8 std" 

938 elif kind2 == 2: 

939 caption = "BIFF8 strong" 

940 else: 

941 caption = "** UNKNOWN ENCRYPTION METHOD **" 

942 fprintf(logf, "%s\n", caption) 

943 raise XLRDError("Workbook is encrypted") 

944 

945 def handle_name(self, data): 

946 blah = DEBUG or self.verbosity >= 2 

947 bv = self.biff_version 

948 if bv < 50: 

949 return 

950 self.derive_encoding() 

951 # print 

952 # hex_char_dump(data, 0, len(data), fout=self.logfile) 

953 ( 

954 option_flags, kb_shortcut, name_len, fmla_len, extsht_index, sheet_index, 

955 menu_text_len, description_text_len, help_topic_text_len, status_bar_text_len, 

956 ) = unpack("<HBBHHH4B", data[0:14]) 

957 nobj = Name() 

958 nobj.book = self ### CIRCULAR ### 

959 name_index = len(self.name_obj_list) 

960 nobj.name_index = name_index 

961 self.name_obj_list.append(nobj) 

962 nobj.option_flags = option_flags 

963 attrs = [ 

964 ('hidden', 1, 0), 

965 ('func', 2, 1), 

966 ('vbasic', 4, 2), 

967 ('macro', 8, 3), 

968 ('complex', 0x10, 4), 

969 ('builtin', 0x20, 5), 

970 ('funcgroup', 0xFC0, 6), 

971 ('binary', 0x1000, 12), 

972 ] 

973 for attr, mask, nshift in attrs: 

974 setattr(nobj, attr, (option_flags & mask) >> nshift) 

975 

976 macro_flag = " M"[nobj.macro] 

977 if bv < 80: 

978 internal_name, pos = unpack_string_update_pos(data, 14, self.encoding, known_len=name_len) 

979 else: 

980 internal_name, pos = unpack_unicode_update_pos(data, 14, known_len=name_len) 

981 nobj.extn_sheet_num = extsht_index 

982 nobj.excel_sheet_index = sheet_index 

983 nobj.scope = None # patched up in the names_epilogue() method 

984 if blah: 

985 fprintf( 

986 self.logfile, 

987 "NAME[%d]:%s oflags=%d, name_len=%d, fmla_len=%d, extsht_index=%d, sheet_index=%d, name=%r\n", 

988 name_index, macro_flag, option_flags, name_len, 

989 fmla_len, extsht_index, sheet_index, internal_name) 

990 name = internal_name 

991 if nobj.builtin: 

992 name = builtin_name_from_code.get(name, "??Unknown??") 

993 if blah: print(" builtin: %s" % name, file=self.logfile) 

994 nobj.name = name 

995 nobj.raw_formula = data[pos:] 

996 nobj.basic_formula_len = fmla_len 

997 nobj.evaluated = 0 

998 if blah: 

999 nobj.dump( 

1000 self.logfile, 

1001 header="--- handle_name: name[%d] ---" % name_index, 

1002 footer="-------------------", 

1003 ) 

1004 

1005 def names_epilogue(self): 

1006 blah = self.verbosity >= 2 

1007 f = self.logfile 

1008 if blah: 

1009 print("+++++ names_epilogue +++++", file=f) 

1010 print("_all_sheets_map", REPR(self._all_sheets_map), file=f) 

1011 print("_extnsht_name_from_num", REPR(self._extnsht_name_from_num), file=f) 

1012 print("_sheet_num_from_name", REPR(self._sheet_num_from_name), file=f) 

1013 num_names = len(self.name_obj_list) 

1014 for namex in range(num_names): 

1015 nobj = self.name_obj_list[namex] 

1016 # Convert from excel_sheet_index to scope. 

1017 # This is done here because in BIFF7 and earlier, the 

1018 # BOUNDSHEET records (from which _all_sheets_map is derived) 

1019 # come after the NAME records. 

1020 if self.biff_version >= 80: 

1021 sheet_index = nobj.excel_sheet_index 

1022 if sheet_index == 0: 

1023 intl_sheet_index = -1 # global 

1024 elif 1 <= sheet_index <= len(self._all_sheets_map): 

1025 intl_sheet_index = self._all_sheets_map[sheet_index-1] 

1026 if intl_sheet_index == -1: # maps to a macro or VBA sheet 

1027 intl_sheet_index = -2 # valid sheet reference but not useful 

1028 else: 

1029 # huh? 

1030 intl_sheet_index = -3 # invalid 

1031 elif 50 <= self.biff_version <= 70: 

1032 sheet_index = nobj.extn_sheet_num 

1033 if sheet_index == 0: 

1034 intl_sheet_index = -1 # global 

1035 else: 

1036 sheet_name = self._extnsht_name_from_num[sheet_index] 

1037 intl_sheet_index = self._sheet_num_from_name.get(sheet_name, -2) 

1038 nobj.scope = intl_sheet_index 

1039 

1040 for namex in range(num_names): 

1041 nobj = self.name_obj_list[namex] 

1042 # Parse the formula ... 

1043 if nobj.macro or nobj.binary: continue 

1044 if nobj.evaluated: continue 

1045 evaluate_name_formula(self, nobj, namex, blah=blah) 

1046 

1047 if self.verbosity >= 2: 

1048 print("---------- name object dump ----------", file=f) 

1049 for namex in range(num_names): 

1050 nobj = self.name_obj_list[namex] 

1051 nobj.dump(f, header="--- name[%d] ---" % namex) 

1052 print("--------------------------------------", file=f) 

1053 # 

1054 # Build some dicts for access to the name objects 

1055 # 

1056 name_and_scope_map = {} # (name.lower(), scope): Name_object 

1057 name_map = {} # name.lower() : list of Name_objects (sorted in scope order) 

1058 for namex in range(num_names): 

1059 nobj = self.name_obj_list[namex] 

1060 name_lcase = nobj.name.lower() 

1061 key = (name_lcase, nobj.scope) 

1062 if key in name_and_scope_map and self.verbosity: 

1063 fprintf(f, 'Duplicate entry %r in name_and_scope_map\n', key) 

1064 name_and_scope_map[key] = nobj 

1065 sort_data = (nobj.scope, namex, nobj) 

1066 # namex (a temp unique ID) ensures the Name objects will not 

1067 # be compared (fatal in py3) 

1068 if name_lcase in name_map: 

1069 name_map[name_lcase].append(sort_data) 

1070 else: 

1071 name_map[name_lcase] = [sort_data] 

1072 for key in name_map.keys(): 

1073 alist = name_map[key] 

1074 alist.sort() 

1075 name_map[key] = [x[2] for x in alist] 

1076 self.name_and_scope_map = name_and_scope_map 

1077 self.name_map = name_map 

1078 

1079 def handle_obj(self, data): 

1080 # Not doing much handling at all. 

1081 # Worrying about embedded (BOF ... EOF) substreams is done elsewhere. 

1082 # DEBUG = 1 

1083 obj_type, obj_id = unpack('<HI', data[4:10]) 

1084 # if DEBUG: print "---> handle_obj type=%d id=0x%08x" % (obj_type, obj_id) 

1085 

1086 def handle_supbook(self, data): 

1087 # aka EXTERNALBOOK in OOo docs 

1088 self._supbook_types.append(None) 

1089 blah = DEBUG or self.verbosity >= 2 

1090 if blah: 

1091 print("SUPBOOK:", file=self.logfile) 

1092 hex_char_dump(data, 0, len(data), fout=self.logfile) 

1093 num_sheets = unpack("<H", data[0:2])[0] 

1094 if blah: print("num_sheets = %d" % num_sheets, file=self.logfile) 

1095 sbn = self._supbook_count 

1096 self._supbook_count += 1 

1097 if data[2:4] == b"\x01\x04": 

1098 self._supbook_types[-1] = SUPBOOK_INTERNAL 

1099 self._supbook_locals_inx = self._supbook_count - 1 

1100 if blah: 

1101 print("SUPBOOK[%d]: internal 3D refs; %d sheets" % (sbn, num_sheets), file=self.logfile) 

1102 print(" _all_sheets_map", self._all_sheets_map, file=self.logfile) 

1103 return 

1104 if data[0:4] == b"\x01\x00\x01\x3A": 

1105 self._supbook_types[-1] = SUPBOOK_ADDIN 

1106 self._supbook_addins_inx = self._supbook_count - 1 

1107 if blah: print("SUPBOOK[%d]: add-in functions" % sbn, file=self.logfile) 

1108 return 

1109 url, pos = unpack_unicode_update_pos(data, 2, lenlen=2) 

1110 if num_sheets == 0: 

1111 self._supbook_types[-1] = SUPBOOK_DDEOLE 

1112 if blah: fprintf(self.logfile, "SUPBOOK[%d]: DDE/OLE document = %r\n", sbn, url) 

1113 return 

1114 self._supbook_types[-1] = SUPBOOK_EXTERNAL 

1115 if blah: fprintf(self.logfile, "SUPBOOK[%d]: url = %r\n", sbn, url) 

1116 sheet_names = [] 

1117 for x in range(num_sheets): 

1118 try: 

1119 shname, pos = unpack_unicode_update_pos(data, pos, lenlen=2) 

1120 except struct.error: 

1121 # #### FIX ME #### 

1122 # Should implement handling of CONTINUE record(s) ... 

1123 if self.verbosity: 

1124 print( 

1125 "*** WARNING: unpack failure in sheet %d of %d in SUPBOOK record for file %r" 

1126 % (x, num_sheets, url), 

1127 file=self.logfile, 

1128 ) 

1129 break 

1130 sheet_names.append(shname) 

1131 if blah: fprintf(self.logfile, " sheetx=%d namelen=%d name=%r (next pos=%d)\n", x, len(shname), shname, pos) 

1132 

1133 def handle_sheethdr(self, data): 

1134 # This a BIFF 4W special. 

1135 # The SHEETHDR record is followed by a (BOF ... EOF) substream containing 

1136 # a worksheet. 

1137 # DEBUG = 1 

1138 self.derive_encoding() 

1139 sheet_len = unpack('<i', data[:4])[0] 

1140 sheet_name = unpack_string(data, 4, self.encoding, lenlen=1) 

1141 sheetno = self._sheethdr_count 

1142 assert sheet_name == self._sheet_names[sheetno] 

1143 self._sheethdr_count += 1 

1144 BOF_posn = self._position 

1145 posn = BOF_posn - 4 - len(data) 

1146 if DEBUG: fprintf(self.logfile, 'SHEETHDR %d at posn %d: len=%d name=%r\n', sheetno, posn, sheet_len, sheet_name) 

1147 self.initialise_format_info() 

1148 if DEBUG: print('SHEETHDR: xf epilogue flag is %d' % self._xf_epilogue_done, file=self.logfile) 

1149 self._sheet_list.append(None) # get_sheet updates _sheet_list but needs a None beforehand 

1150 self.get_sheet(sheetno, update_pos=False) 

1151 if DEBUG: print('SHEETHDR: posn after get_sheet() =', self._position, file=self.logfile) 

1152 self._position = BOF_posn + sheet_len 

1153 

1154 def handle_sheetsoffset(self, data): 

1155 # DEBUG = 0 

1156 posn = unpack('<i', data)[0] 

1157 if DEBUG: print('SHEETSOFFSET:', posn, file=self.logfile) 

1158 self._sheetsoffset = posn 

1159 

1160 def handle_sst(self, data): 

1161 # DEBUG = 1 

1162 if DEBUG: 

1163 print("SST Processing", file=self.logfile) 

1164 t0 = perf_counter() 

1165 nbt = len(data) 

1166 strlist = [data] 

1167 uniquestrings = unpack('<i', data[4:8])[0] 

1168 if DEBUG or self.verbosity >= 2: 

1169 fprintf(self.logfile, "SST: unique strings: %d\n", uniquestrings) 

1170 while 1: 

1171 code, nb, data = self.get_record_parts_conditional(XL_CONTINUE) 

1172 if code is None: 

1173 break 

1174 nbt += nb 

1175 if DEBUG >= 2: 

1176 fprintf(self.logfile, "CONTINUE: adding %d bytes to SST -> %d\n", nb, nbt) 

1177 strlist.append(data) 

1178 self._sharedstrings, rt_runlist = unpack_SST_table(strlist, uniquestrings) 

1179 if self.formatting_info: 

1180 self._rich_text_runlist_map = rt_runlist 

1181 if DEBUG: 

1182 t1 = perf_counter() 

1183 print("SST processing took %.2f seconds" % (t1 - t0, ), file=self.logfile) 

1184 

1185 def handle_writeaccess(self, data): 

1186 DEBUG = 0 

1187 if self.biff_version < 80: 

1188 if not self.encoding: 

1189 self.raw_user_name = True 

1190 self.user_name = data 

1191 return 

1192 strg = unpack_string(data, 0, self.encoding, lenlen=1) 

1193 else: 

1194 try: 

1195 strg = unpack_unicode(data, 0, lenlen=2) 

1196 except UnicodeDecodeError: 

1197 # may have invalid trailing characters 

1198 strg = unpack_unicode(data.strip(), 0, lenlen=2) 

1199 if DEBUG: fprintf(self.logfile, "WRITEACCESS: %d bytes; raw=%s %r\n", len(data), self.raw_user_name, strg) 

1200 strg = strg.rstrip() 

1201 self.user_name = strg 

1202 

1203 def parse_globals(self): 

1204 # DEBUG = 0 

1205 # no need to position, just start reading (after the BOF) 

1206 formatting.initialise_book(self) 

1207 while 1: 

1208 rc, length, data = self.get_record_parts() 

1209 if DEBUG: print("parse_globals: record code is 0x%04x" % rc, file=self.logfile) 

1210 if rc == XL_SST: 

1211 self.handle_sst(data) 

1212 elif rc == XL_FONT or rc == XL_FONT_B3B4: 

1213 self.handle_font(data) 

1214 elif rc == XL_FORMAT: # XL_FORMAT2 is BIFF <= 3.0, can't appear in globals 

1215 self.handle_format(data) 

1216 elif rc == XL_XF: 

1217 self.handle_xf(data) 

1218 elif rc == XL_BOUNDSHEET: 

1219 self.handle_boundsheet(data) 

1220 elif rc == XL_DATEMODE: 

1221 self.handle_datemode(data) 

1222 elif rc == XL_CODEPAGE: 

1223 self.handle_codepage(data) 

1224 elif rc == XL_COUNTRY: 

1225 self.handle_country(data) 

1226 elif rc == XL_EXTERNNAME: 

1227 self.handle_externname(data) 

1228 elif rc == XL_EXTERNSHEET: 

1229 self.handle_externsheet(data) 

1230 elif rc == XL_FILEPASS: 

1231 self.handle_filepass(data) 

1232 elif rc == XL_WRITEACCESS: 

1233 self.handle_writeaccess(data) 

1234 elif rc == XL_SHEETSOFFSET: 

1235 self.handle_sheetsoffset(data) 

1236 elif rc == XL_SHEETHDR: 

1237 self.handle_sheethdr(data) 

1238 elif rc == XL_SUPBOOK: 

1239 self.handle_supbook(data) 

1240 elif rc == XL_NAME: 

1241 self.handle_name(data) 

1242 elif rc == XL_PALETTE: 

1243 self.handle_palette(data) 

1244 elif rc == XL_STYLE: 

1245 self.handle_style(data) 

1246 elif rc & 0xff == 9 and self.verbosity: 

1247 fprintf(self.logfile, "*** Unexpected BOF at posn %d: 0x%04x len=%d data=%r\n", 

1248 self._position - length - 4, rc, length, data) 

1249 elif rc == XL_EOF: 

1250 self.xf_epilogue() 

1251 self.names_epilogue() 

1252 self.palette_epilogue() 

1253 if not self.encoding: 

1254 self.derive_encoding() 

1255 if self.biff_version == 45: 

1256 # DEBUG = 0 

1257 if DEBUG: print("global EOF: position", self._position, file=self.logfile) 

1258 # if DEBUG: 

1259 # pos = self._position - 4 

1260 # print repr(self.mem[pos:pos+40]) 

1261 return 

1262 else: 

1263 # if DEBUG: 

1264 # print >> self.logfile, "parse_globals: ignoring record code 0x%04x" % rc 

1265 pass 

1266 

1267 def read(self, pos, length): 

1268 data = self.mem[pos:pos+length] 

1269 self._position = pos + len(data) 

1270 return data 

1271 

1272 def getbof(self, rqd_stream): 

1273 # DEBUG = 1 

1274 # if DEBUG: print >> self.logfile, "getbof(): position", self._position 

1275 if DEBUG: print("reqd: 0x%04x" % rqd_stream, file=self.logfile) 

1276 

1277 def bof_error(msg): 

1278 raise XLRDError('Unsupported format, or corrupt file: ' + msg) 

1279 savpos = self._position 

1280 opcode = self.get2bytes() 

1281 if opcode == MY_EOF: 

1282 bof_error('Expected BOF record; met end of file') 

1283 if opcode not in bofcodes: 

1284 bof_error('Expected BOF record; found %r' % self.mem[savpos:savpos+8]) 

1285 length = self.get2bytes() 

1286 if length == MY_EOF: 

1287 bof_error('Incomplete BOF record[1]; met end of file') 

1288 if not (4 <= length <= 20): 

1289 bof_error( 

1290 'Invalid length (%d) for BOF record type 0x%04x' 

1291 % (length, opcode)) 

1292 padding = b'\0' * max(0, boflen[opcode] - length) 

1293 data = self.read(self._position, length) 

1294 if DEBUG: fprintf(self.logfile, "\ngetbof(): data=%r\n", data) 

1295 if len(data) < length: 

1296 bof_error('Incomplete BOF record[2]; met end of file') 

1297 data += padding 

1298 version1 = opcode >> 8 

1299 version2, streamtype = unpack('<HH', data[0:4]) 

1300 if DEBUG: 

1301 print("getbof(): op=0x%04x version2=0x%04x streamtype=0x%04x" 

1302 % (opcode, version2, streamtype), file=self.logfile) 

1303 bof_offset = self._position - 4 - length 

1304 if DEBUG: 

1305 print("getbof(): BOF found at offset %d; savpos=%d" 

1306 % (bof_offset, savpos), file=self.logfile) 

1307 version = build = year = 0 

1308 if version1 == 0x08: 

1309 build, year = unpack('<HH', data[4:8]) 

1310 if version2 == 0x0600: 

1311 version = 80 

1312 elif version2 == 0x0500: 

1313 if year < 1994 or build in (2412, 3218, 3321): 

1314 version = 50 

1315 else: 

1316 version = 70 

1317 else: 

1318 # dodgy one, created by a 3rd-party tool 

1319 version = { 

1320 0x0000: 21, 

1321 0x0007: 21, 

1322 0x0200: 21, 

1323 0x0300: 30, 

1324 0x0400: 40, 

1325 }.get(version2, 0) 

1326 elif version1 in (0x04, 0x02, 0x00): 

1327 version = {0x04: 40, 0x02: 30, 0x00: 21}[version1] 

1328 

1329 if version == 40 and streamtype == XL_WORKBOOK_GLOBALS_4W: 

1330 version = 45 # i.e. 4W 

1331 

1332 if DEBUG or self.verbosity >= 2: 

1333 print("BOF: op=0x%04x vers=0x%04x stream=0x%04x buildid=%d buildyr=%d -> BIFF%d" 

1334 % (opcode, version2, streamtype, build, year, version), file=self.logfile) 

1335 got_globals = streamtype == XL_WORKBOOK_GLOBALS or ( 

1336 version == 45 and streamtype == XL_WORKBOOK_GLOBALS_4W) 

1337 if (rqd_stream == XL_WORKBOOK_GLOBALS and got_globals) or streamtype == rqd_stream: 

1338 return version 

1339 if version < 50 and streamtype == XL_WORKSHEET: 

1340 return version 

1341 if version >= 50 and streamtype == 0x0100: 

1342 bof_error("Workspace file -- no spreadsheet data") 

1343 bof_error( 

1344 'BOF not workbook/worksheet: op=0x%04x vers=0x%04x strm=0x%04x build=%d year=%d -> BIFF%d' 

1345 % (opcode, version2, streamtype, build, year, version) 

1346 ) 

1347 

1348# === helper functions 

1349 

1350def expand_cell_address(inrow, incol): 

1351 # Ref : OOo docs, "4.3.4 Cell Addresses in BIFF8" 

1352 outrow = inrow 

1353 if incol & 0x8000: 

1354 if outrow >= 32768: 

1355 outrow -= 65536 

1356 relrow = 1 

1357 else: 

1358 relrow = 0 

1359 outcol = incol & 0xFF 

1360 if incol & 0x4000: 

1361 if outcol >= 128: 

1362 outcol -= 256 

1363 relcol = 1 

1364 else: 

1365 relcol = 0 

1366 return outrow, outcol, relrow, relcol 

1367 

1368def colname(colx, _A2Z="ABCDEFGHIJKLMNOPQRSTUVWXYZ"): 

1369 assert colx >= 0 

1370 name = UNICODE_LITERAL('') 

1371 while 1: 

1372 quot, rem = divmod(colx, 26) 

1373 name = _A2Z[rem] + name 

1374 if not quot: 

1375 return name 

1376 colx = quot - 1 

1377 

1378def display_cell_address(rowx, colx, relrow, relcol): 

1379 if relrow: 

1380 rowpart = "(*%s%d)" % ("+-"[rowx < 0], abs(rowx)) 

1381 else: 

1382 rowpart = "$%d" % (rowx+1,) 

1383 if relcol: 

1384 colpart = "(*%s%d)" % ("+-"[colx < 0], abs(colx)) 

1385 else: 

1386 colpart = "$" + colname(colx) 

1387 return colpart + rowpart 

1388 

1389def unpack_SST_table(datatab, nstrings): 

1390 "Return list of strings" 

1391 datainx = 0 

1392 ndatas = len(datatab) 

1393 data = datatab[0] 

1394 datalen = len(data) 

1395 pos = 8 

1396 strings = [] 

1397 strappend = strings.append 

1398 richtext_runs = {} 

1399 local_unpack = unpack 

1400 local_min = min 

1401 local_BYTES_ORD = BYTES_ORD 

1402 latin_1 = "latin_1" 

1403 for _unused_i in xrange(nstrings): 

1404 nchars = local_unpack('<H', data[pos:pos+2])[0] 

1405 pos += 2 

1406 options = local_BYTES_ORD(data[pos]) 

1407 pos += 1 

1408 rtcount = 0 

1409 phosz = 0 

1410 if options & 0x08: # richtext 

1411 rtcount = local_unpack('<H', data[pos:pos+2])[0] 

1412 pos += 2 

1413 if options & 0x04: # phonetic 

1414 phosz = local_unpack('<i', data[pos:pos+4])[0] 

1415 pos += 4 

1416 accstrg = UNICODE_LITERAL('') 

1417 charsgot = 0 

1418 while 1: 

1419 charsneed = nchars - charsgot 

1420 if options & 0x01: 

1421 # Uncompressed UTF-16 

1422 charsavail = local_min((datalen - pos) >> 1, charsneed) 

1423 rawstrg = data[pos:pos+2*charsavail] 

1424 # if DEBUG: print "SST U16: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg) 

1425 try: 

1426 accstrg += unicode(rawstrg, "utf_16_le") 

1427 except: 

1428 # print "SST U16: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg) 

1429 # Probable cause: dodgy data e.g. unfinished surrogate pair. 

1430 # E.g. file unicode2.xls in pyExcelerator's examples has cells containing 

1431 # unichr(i) for i in range(0x100000) 

1432 # so this will include 0xD800 etc 

1433 raise 

1434 pos += 2*charsavail 

1435 else: 

1436 # Note: this is COMPRESSED (not ASCII!) encoding!!! 

1437 charsavail = local_min(datalen - pos, charsneed) 

1438 rawstrg = data[pos:pos+charsavail] 

1439 # if DEBUG: print "SST CMPRSD: nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg) 

1440 accstrg += unicode(rawstrg, latin_1) 

1441 pos += charsavail 

1442 charsgot += charsavail 

1443 if charsgot == nchars: 

1444 break 

1445 datainx += 1 

1446 data = datatab[datainx] 

1447 datalen = len(data) 

1448 options = local_BYTES_ORD(data[0]) 

1449 pos = 1 

1450 

1451 if rtcount: 

1452 runs = [] 

1453 for runindex in xrange(rtcount): 

1454 if pos == datalen: 

1455 pos = 0 

1456 datainx += 1 

1457 data = datatab[datainx] 

1458 datalen = len(data) 

1459 runs.append(local_unpack("<HH", data[pos:pos+4])) 

1460 pos += 4 

1461 richtext_runs[len(strings)] = runs 

1462 

1463 pos += phosz # size of the phonetic stuff to skip 

1464 if pos >= datalen: 

1465 # adjust to correct position in next record 

1466 pos = pos - datalen 

1467 datainx += 1 

1468 if datainx < ndatas: 

1469 data = datatab[datainx] 

1470 datalen = len(data) 

1471 else: 

1472 assert _unused_i == nstrings - 1 

1473 strappend(accstrg) 

1474 return strings, richtext_runs