Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/xlrd/biffh.py: 54%

1# -*- coding: utf-8 -*-

3# This module is part of the xlrd package, which is released under a

4# BSD-style licence.

5from __future__ import print_function

7import sys

8from struct import unpack

10from .timemachine import *

12DEBUG = 0

16class XLRDError(Exception):

17 """

18 An exception indicating problems reading data from an Excel file.

19 """

22class BaseObject(object):

23 """

24 Parent of almost all other classes in the package. Defines a common

25 :meth:`dump` method for debugging.

26 """

28 _repr_these = []

31 def dump(self, f=None, header=None, footer=None, indent=0):

32 """

33 :param f: open file object, to which the dump is written

34 :param header: text to write before the dump

35 :param footer: text to write after the dump

36 :param indent: number of leading spaces (for recursive calls)

37 """

38 if f is None:

39 f = sys.stderr

40 if hasattr(self, "__slots__"):

41 alist = []

42 for attr in self.__slots__:

43 alist.append((attr, getattr(self, attr)))

44 else:

45 alist = self.__dict__.items()

46 alist = sorted(alist)

47 pad = " " * indent

48 if header is not None: print(header, file=f)

49 list_type = type([])

50 dict_type = type({})

51 for attr, value in alist:

52 if getattr(value, 'dump', None) and attr != 'book':

53 value.dump(f,

54 header="%s%s (%s object):" % (pad, attr, value.__class__.__name__),

55 indent=indent+4)

56 elif (attr not in self._repr_these and

57 (isinstance(value, list_type) or isinstance(value, dict_type))):

58 print("%s%s: %s, len = %d" % (pad, attr, type(value), len(value)), file=f)

59 else:

60 fprintf(f, "%s%s: %r\n", pad, attr, value)

61 if footer is not None: print(footer, file=f)

63FUN, FDT, FNU, FGE, FTX = range(5) # unknown, date, number, general, text

64DATEFORMAT = FDT

65NUMBERFORMAT = FNU

67(

68 XL_CELL_EMPTY,

69 XL_CELL_TEXT,

70 XL_CELL_NUMBER,

71 XL_CELL_DATE,

72 XL_CELL_BOOLEAN,

73 XL_CELL_ERROR,

74 XL_CELL_BLANK, # for use in debugging, gathering stats, etc

75) = range(7)

77biff_text_from_num = {

78 0: "(not BIFF)",

79 20: "2.0",

80 21: "2.1",

81 30: "3",

82 40: "4S",

83 45: "4W",

84 50: "5",

85 70: "7",

86 80: "8",

87 85: "8X",

88}

90#: This dictionary can be used to produce a text version of the internal codes

91#: that Excel uses for error cells.

92error_text_from_code = {

93 0x00: '#NULL!', # Intersection of two cell ranges is empty

94 0x07: '#DIV/0!', # Division by zero

95 0x0F: '#VALUE!', # Wrong type of operand

96 0x17: '#REF!', # Illegal or deleted cell reference

97 0x1D: '#NAME?', # Wrong function or range name

98 0x24: '#NUM!', # Value range overflow

99 0x2A: '#N/A', # Argument or function not available

100}

101

102BIFF_FIRST_UNICODE = 80

103

104XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5

105XL_WORKBOOK_GLOBALS_4W = 0x100

106XL_WORKSHEET = WRKSHEET = 0x10

107

108XL_BOUNDSHEET_WORKSHEET = 0x00

109XL_BOUNDSHEET_CHART = 0x02

110XL_BOUNDSHEET_VB_MODULE = 0x06

111

112# XL_RK2 = 0x7e

113XL_ARRAY = 0x0221

114XL_ARRAY2 = 0x0021

115XL_BLANK = 0x0201

116XL_BLANK_B2 = 0x01

117XL_BOF = 0x809

118XL_BOOLERR = 0x205

119XL_BOOLERR_B2 = 0x5

120XL_BOUNDSHEET = 0x85

121XL_BUILTINFMTCOUNT = 0x56

122XL_CF = 0x01B1

123XL_CODEPAGE = 0x42

124XL_COLINFO = 0x7D

125XL_COLUMNDEFAULT = 0x20 # BIFF2 only

126XL_COLWIDTH = 0x24 # BIFF2 only

127XL_CONDFMT = 0x01B0

128XL_CONTINUE = 0x3c

129XL_COUNTRY = 0x8C

130XL_DATEMODE = 0x22

131XL_DEFAULTROWHEIGHT = 0x0225

132XL_DEFCOLWIDTH = 0x55

133XL_DIMENSION = 0x200

134XL_DIMENSION2 = 0x0

135XL_EFONT = 0x45

136XL_EOF = 0x0a

137XL_EXTERNNAME = 0x23

138XL_EXTERNSHEET = 0x17

139XL_EXTSST = 0xff

140XL_FEAT11 = 0x872

141XL_FILEPASS = 0x2f

142XL_FONT = 0x31

143XL_FONT_B3B4 = 0x231

144XL_FORMAT = 0x41e

145XL_FORMAT2 = 0x1E # BIFF2, BIFF3

146XL_FORMULA = 0x6

147XL_FORMULA3 = 0x206

148XL_FORMULA4 = 0x406

149XL_GCW = 0xab

150XL_HLINK = 0x01B8

151XL_QUICKTIP = 0x0800

152XL_HORIZONTALPAGEBREAKS = 0x1b

153XL_INDEX = 0x20b

154XL_INTEGER = 0x2 # BIFF2 only

155XL_IXFE = 0x44 # BIFF2 only

156XL_LABEL = 0x204

157XL_LABEL_B2 = 0x04

158XL_LABELRANGES = 0x15f

159XL_LABELSST = 0xfd

160XL_LEFTMARGIN = 0x26

161XL_TOPMARGIN = 0x28

162XL_RIGHTMARGIN = 0x27

163XL_BOTTOMMARGIN = 0x29

164XL_HEADER = 0x14

165XL_FOOTER = 0x15

166XL_HCENTER = 0x83

167XL_VCENTER = 0x84

168XL_MERGEDCELLS = 0xE5

169XL_MSO_DRAWING = 0x00EC

170XL_MSO_DRAWING_GROUP = 0x00EB

171XL_MSO_DRAWING_SELECTION = 0x00ED

172XL_MULRK = 0xbd

173XL_MULBLANK = 0xbe

174XL_NAME = 0x18

175XL_NOTE = 0x1c

176XL_NUMBER = 0x203

177XL_NUMBER_B2 = 0x3

178XL_OBJ = 0x5D

179XL_PAGESETUP = 0xA1

180XL_PALETTE = 0x92

181XL_PANE = 0x41

182XL_PRINTGRIDLINES = 0x2B

183XL_PRINTHEADERS = 0x2A

184XL_RK = 0x27e

185XL_ROW = 0x208

186XL_ROW_B2 = 0x08

187XL_RSTRING = 0xd6

188XL_SCL = 0x00A0

189XL_SHEETHDR = 0x8F # BIFF4W only

190XL_SHEETPR = 0x81

191XL_SHEETSOFFSET = 0x8E # BIFF4W only

192XL_SHRFMLA = 0x04bc

193XL_SST = 0xfc

194XL_STANDARDWIDTH = 0x99

195XL_STRING = 0x207

196XL_STRING_B2 = 0x7

197XL_STYLE = 0x293

198XL_SUPBOOK = 0x1AE # aka EXTERNALBOOK in OOo docs

199XL_TABLEOP = 0x236

200XL_TABLEOP2 = 0x37

201XL_TABLEOP_B2 = 0x36

202XL_TXO = 0x1b6

203XL_UNCALCED = 0x5e

204XL_UNKNOWN = 0xffff

205XL_VERTICALPAGEBREAKS = 0x1a

206XL_WINDOW2 = 0x023E

207XL_WINDOW2_B2 = 0x003E

208XL_WRITEACCESS = 0x5C

209XL_WSBOOL = XL_SHEETPR

210XL_XF = 0xe0

211XL_XF2 = 0x0043 # BIFF2 version of XF record

212XL_XF3 = 0x0243 # BIFF3 version of XF record

213XL_XF4 = 0x0443 # BIFF4 version of XF record

214

215boflen = {0x0809: 8, 0x0409: 6, 0x0209: 6, 0x0009: 4}

216bofcodes = (0x0809, 0x0409, 0x0209, 0x0009)

217

218XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206)

219

220_cell_opcode_list = [

221 XL_BOOLERR,

222 XL_FORMULA,

223 XL_FORMULA3,

224 XL_FORMULA4,

225 XL_LABEL,

226 XL_LABELSST,

227 XL_MULRK,

228 XL_NUMBER,

229 XL_RK,

230 XL_RSTRING,

231]

232_cell_opcode_dict = {}

233for _cell_opcode in _cell_opcode_list:

234 _cell_opcode_dict[_cell_opcode] = 1

235

236def is_cell_opcode(c):

237 return c in _cell_opcode_dict

238

239def upkbits(tgt_obj, src, manifest, local_setattr=setattr):

240 for n, mask, attr in manifest:

241 local_setattr(tgt_obj, attr, (src & mask) >> n)

242

243def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int):

244 for n, mask, attr in manifest:

245 local_setattr(tgt_obj, attr, local_int((src & mask) >> n))

246

247def unpack_string(data, pos, encoding, lenlen=1):

248 nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]

249 pos += lenlen

250 return unicode(data[pos:pos+nchars], encoding)

251

252def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None):

253 if known_len is not None:

254 # On a NAME record, the length byte is detached from the front of the string.

255 nchars = known_len

256 else:

257 nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]

258 pos += lenlen

259 newpos = pos + nchars

260 return (unicode(data[pos:newpos], encoding), newpos)

261

262def unpack_unicode(data, pos, lenlen=2):

263 "Return unicode_strg"

264 nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]

265 if not nchars:

266 # Ambiguous whether 0-length string should have an "options" byte.

267 # Avoid crash if missing.

268 return UNICODE_LITERAL("")

269 pos += lenlen

270 options = BYTES_ORD(data[pos])

271 pos += 1

272 # phonetic = options & 0x04

273 # richtext = options & 0x08

274 if options & 0x08:

275 # rt = unpack('<H', data[pos:pos+2])[0] # unused

276 pos += 2

277 if options & 0x04:

278 # sz = unpack('<i', data[pos:pos+4])[0] # unused

279 pos += 4

280 if options & 0x01:

281 # Uncompressed UTF-16-LE

282 rawstrg = data[pos:pos+2*nchars]

283 # if DEBUG: print "nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)

284 strg = unicode(rawstrg, 'utf_16_le')

285 # pos += 2*nchars

286 else:

287 # Note: this is COMPRESSED (not ASCII!) encoding!!!

288 # Merely returning the raw bytes would work OK 99.99% of the time

289 # if the local codepage was cp1252 -- however this would rapidly go pear-shaped

290 # for other codepages so we grit our Anglocentric teeth and return Unicode :-)

291

292 strg = unicode(data[pos:pos+nchars], "latin_1")

293 # pos += nchars

294 # if richtext:

295 # pos += 4 * rt

296 # if phonetic:

297 # pos += sz

298 # return (strg, pos)

299 return strg

300

301def unpack_unicode_update_pos(data, pos, lenlen=2, known_len=None):

302 "Return (unicode_strg, updated value of pos)"

303 if known_len is not None:

304 # On a NAME record, the length byte is detached from the front of the string.

305 nchars = known_len

306 else:

307 nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]

308 pos += lenlen

309 if not nchars and not data[pos:]:

310 # Zero-length string with no options byte

311 return (UNICODE_LITERAL(""), pos)

312 options = BYTES_ORD(data[pos])

313 pos += 1

314 phonetic = options & 0x04

315 richtext = options & 0x08

316 if richtext:

317 rt = unpack('<H', data[pos:pos+2])[0]

318 pos += 2

319 if phonetic:

320 sz = unpack('<i', data[pos:pos+4])[0]

321 pos += 4

322 if options & 0x01:

323 # Uncompressed UTF-16-LE

324 strg = unicode(data[pos:pos+2*nchars], 'utf_16_le')

325 pos += 2*nchars

326 else:

327 # Note: this is COMPRESSED (not ASCII!) encoding!!!

328 strg = unicode(data[pos:pos+nchars], "latin_1")

329 pos += nchars

330 if richtext:

331 pos += 4 * rt

332 if phonetic:

333 pos += sz

334 return (strg, pos)

335

336def unpack_cell_range_address_list_update_pos(output_list, data, pos, biff_version, addr_size=6):

337 # output_list is updated in situ

338 assert addr_size in (6, 8)

339 # Used to assert size == 6 if not BIFF8, but pyWLWriter writes

340 # BIFF8-only MERGEDCELLS records in a BIFF5 file!

341 n, = unpack("<H", data[pos:pos+2])

342 pos += 2

343 if n:

344 if addr_size == 6:

345 fmt = "<HHBB"

346 else:

347 fmt = "<HHHH"

348 for _unused in xrange(n):

349 ra, rb, ca, cb = unpack(fmt, data[pos:pos+addr_size])

350 output_list.append((ra, rb+1, ca, cb+1))

351 pos += addr_size

352 return pos

353

354_brecstrg = """\

3550000 DIMENSIONS_B2

3560001 BLANK_B2

3570002 INTEGER_B2_ONLY

3580003 NUMBER_B2

3590004 LABEL_B2

3600005 BOOLERR_B2

3610006 FORMULA

3620007 STRING_B2

3630008 ROW_B2

3640009 BOF_B2

365000A EOF

366000B INDEX_B2_ONLY

367000C CALCCOUNT

368000D CALCMODE

369000E PRECISION

370000F REFMODE

3710010 DELTA

3720011 ITERATION

3730012 PROTECT

3740013 PASSWORD

3750014 HEADER

3760015 FOOTER

3770016 EXTERNCOUNT

3780017 EXTERNSHEET

3790018 NAME_B2,5+

3800019 WINDOWPROTECT

381001A VERTICALPAGEBREAKS

382001B HORIZONTALPAGEBREAKS

383001C NOTE

384001D SELECTION

385001E FORMAT_B2-3

386001F BUILTINFMTCOUNT_B2

3870020 COLUMNDEFAULT_B2_ONLY

3880021 ARRAY_B2_ONLY

3890022 DATEMODE

3900023 EXTERNNAME

3910024 COLWIDTH_B2_ONLY

3920025 DEFAULTROWHEIGHT_B2_ONLY

3930026 LEFTMARGIN

3940027 RIGHTMARGIN

3950028 TOPMARGIN

3960029 BOTTOMMARGIN

397002A PRINTHEADERS

398002B PRINTGRIDLINES

399002F FILEPASS

4000031 FONT

4010032 FONT2_B2_ONLY

4020036 TABLEOP_B2

4030037 TABLEOP2_B2

404003C CONTINUE

405003D WINDOW1

406003E WINDOW2_B2

4070040 BACKUP

4080041 PANE

4090042 CODEPAGE

4100043 XF_B2

4110044 IXFE_B2_ONLY

4120045 EFONT_B2_ONLY

413004D PLS

4140051 DCONREF

4150055 DEFCOLWIDTH

4160056 BUILTINFMTCOUNT_B3-4

4170059 XCT

418005A CRN

419005B FILESHARING

420005C WRITEACCESS

421005D OBJECT

422005E UNCALCED

423005F SAVERECALC

4240063 OBJECTPROTECT

425007D COLINFO

426007E RK2_mythical_?

4270080 GUTS

4280081 WSBOOL

4290082 GRIDSET

4300083 HCENTER

4310084 VCENTER

4320085 BOUNDSHEET

4330086 WRITEPROT

434008C COUNTRY

435008D HIDEOBJ

436008E SHEETSOFFSET

437008F SHEETHDR

4380090 SORT

4390092 PALETTE

4400099 STANDARDWIDTH

441009B FILTERMODE

442009C FNGROUPCOUNT

443009D AUTOFILTERINFO

444009E AUTOFILTER

44500A0 SCL

44600A1 SETUP

44700AB GCW

44800BD MULRK

44900BE MULBLANK

45000C1 MMS

45100D6 RSTRING

45200D7 DBCELL

45300DA BOOKBOOL

45400DD SCENPROTECT

45500E0 XF

45600E1 INTERFACEHDR

45700E2 INTERFACEEND

45800E5 MERGEDCELLS

45900E9 BITMAP

46000EB MSO_DRAWING_GROUP

46100EC MSO_DRAWING

46200ED MSO_DRAWING_SELECTION

46300EF PHONETIC

46400FC SST

46500FD LABELSST

46600FF EXTSST

467013D TABID

468015F LABELRANGES

4690160 USESELFS

4700161 DSF

47101AE SUPBOOK

47201AF PROTECTIONREV4

47301B0 CONDFMT

47401B1 CF

47501B2 DVAL

47601B6 TXO

47701B7 REFRESHALL

47801B8 HLINK

47901BC PASSWORDREV4

48001BE DV

48101C0 XL9FILE

48201C1 RECALCID

4830200 DIMENSIONS

4840201 BLANK

4850203 NUMBER

4860204 LABEL

4870205 BOOLERR

4880206 FORMULA_B3

4890207 STRING

4900208 ROW

4910209 BOF

492020B INDEX_B3+

4930218 NAME

4940221 ARRAY

4950223 EXTERNNAME_B3-4

4960225 DEFAULTROWHEIGHT

4970231 FONT_B3B4

4980236 TABLEOP

499023E WINDOW2

5000243 XF_B3

501027E RK

5020293 STYLE

5030406 FORMULA_B4

5040409 BOF

505041E FORMAT

5060443 XF_B4

50704BC SHRFMLA

5080800 QUICKTIP

5090809 BOF

5100862 SHEETLAYOUT

5110867 SHEETPROTECTION

5120868 RANGEPROTECTION

513"""

514

515biff_rec_name_dict = {}

516for _buff in _brecstrg.splitlines():

517 _numh, _name = _buff.split()

518 biff_rec_name_dict[int(_numh, 16)] = _name

519del _buff, _name, _brecstrg

520

521def hex_char_dump(strg, ofs, dlen, base=0, fout=sys.stdout, unnumbered=False):

522 endpos = min(ofs + dlen, len(strg))

523 pos = ofs

524 numbered = not unnumbered

525 num_prefix = ''

526 while pos < endpos:

527 endsub = min(pos + 16, endpos)

528 substrg = strg[pos:endsub]

529 lensub = endsub - pos

530 if lensub <= 0 or lensub != len(substrg):

531 fprintf(

532 sys.stdout,

533 '??? hex_char_dump: ofs=%d dlen=%d base=%d -> endpos=%d pos=%d endsub=%d substrg=%r\n',

534 ofs, dlen, base, endpos, pos, endsub, substrg)

535 break

536 hexd = ''.join("%02x " % BYTES_ORD(c) for c in substrg)

537

538 chard = ''

539 for c in substrg:

540 c = chr(BYTES_ORD(c))

541 if c == '\0':

542 c = '~'

543 elif not (' ' <= c <= '~'):

544 c = '?'

545 chard += c

546 if numbered:

547 num_prefix = "%5d: " % (base+pos-ofs)

548

549 fprintf(fout, "%s %-48s %s\n", num_prefix, hexd, chard)

550 pos = endsub

551

552def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout, unnumbered=False):

553 pos = stream_offset

554 stream_end = stream_offset + stream_len

555 adj = base - stream_offset

556 dummies = 0

557 numbered = not unnumbered

558 num_prefix = ''

559 while stream_end - pos >= 4:

560 rc, length = unpack('<HH', mem[pos:pos+4])

561 if rc == 0 and length == 0:

562 if mem[pos:] == b'\0' * (stream_end - pos):

563 dummies = stream_end - pos

564 savpos = pos

565 pos = stream_end

566 break

567 if dummies:

568 dummies += 4

569 else:

570 savpos = pos

571 dummies = 4

572 pos += 4

573 else:

574 if dummies:

575 if numbered:

576 num_prefix = "%5d: " % (adj + savpos)

577 fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)

578 dummies = 0

579 recname = biff_rec_name_dict.get(rc, '<UNKNOWN>')

580 if numbered:

581 num_prefix = "%5d: " % (adj + pos)

582 fprintf(fout, "%s%04x %s len = %04x (%d)\n", num_prefix, rc, recname, length, length)

583 pos += 4

584 hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered)

585 pos += length

586 if dummies:

587 if numbered:

588 num_prefix = "%5d: " % (adj + savpos)

589 fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)

590 if pos < stream_end:

591 if numbered:

592 num_prefix = "%5d: " % (adj + pos)

593 fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix)

594 hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered)

595 elif pos > stream_end:

596 fprintf(fout, "Last dumped record has length (%d) that is too large\n", length)

597

598def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout):

599 pos = stream_offset

600 stream_end = stream_offset + stream_len

601 tally = {}

602 while stream_end - pos >= 4:

603 rc, length = unpack('<HH', mem[pos:pos+4])

604 if rc == 0 and length == 0:

605 if mem[pos:] == b'\0' * (stream_end - pos):

606 break

607 recname = "<Dummy (zero)>"

608 else:

609 recname = biff_rec_name_dict.get(rc, None)

610 if recname is None:

611 recname = "Unknown_0x%04X" % rc

612 if recname in tally:

613 tally[recname] += 1

614 else:

615 tally[recname] = 1

616 pos += length + 4

617 slist = sorted(tally.items())

618 for recname, count in slist:

619 print("%8d %s" % (count, recname), file=fout)

620

621encoding_from_codepage = {

622 1200 : 'utf_16_le',

623 10000: 'mac_roman',

624 10006: 'mac_greek', # guess

625 10007: 'mac_cyrillic', # guess

626 10029: 'mac_latin2', # guess

627 10079: 'mac_iceland', # guess

628 10081: 'mac_turkish', # guess

629 32768: 'mac_roman',

630 32769: 'cp1252',

631}

632# some more guessing, for Indic scripts

633# codepage 57000 range:

634# 2 Devanagari [0]

635# 3 Bengali [1]

636# 4 Tamil [5]

637# 5 Telegu [6]

638# 6 Assamese [1] c.f. Bengali

639# 7 Oriya [4]

640# 8 Kannada [7]

641# 9 Malayalam [8]

642# 10 Gujarati [3]

643# 11 Gurmukhi [2]