Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/xlrd/biffh.py: 54%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2# Portions copyright © 2005-2010 Stephen John Machin, Lingfo Pty Ltd
3# This module is part of the xlrd package, which is released under a
4# BSD-style licence.
5from __future__ import print_function
7import sys
8from struct import unpack
10from .timemachine import *
12DEBUG = 0
16class XLRDError(Exception):
17 """
18 An exception indicating problems reading data from an Excel file.
19 """
22class BaseObject(object):
23 """
24 Parent of almost all other classes in the package. Defines a common
25 :meth:`dump` method for debugging.
26 """
28 _repr_these = []
31 def dump(self, f=None, header=None, footer=None, indent=0):
32 """
33 :param f: open file object, to which the dump is written
34 :param header: text to write before the dump
35 :param footer: text to write after the dump
36 :param indent: number of leading spaces (for recursive calls)
37 """
38 if f is None:
39 f = sys.stderr
40 if hasattr(self, "__slots__"):
41 alist = []
42 for attr in self.__slots__:
43 alist.append((attr, getattr(self, attr)))
44 else:
45 alist = self.__dict__.items()
46 alist = sorted(alist)
47 pad = " " * indent
48 if header is not None: print(header, file=f)
49 list_type = type([])
50 dict_type = type({})
51 for attr, value in alist:
52 if getattr(value, 'dump', None) and attr != 'book':
53 value.dump(f,
54 header="%s%s (%s object):" % (pad, attr, value.__class__.__name__),
55 indent=indent+4)
56 elif (attr not in self._repr_these and
57 (isinstance(value, list_type) or isinstance(value, dict_type))):
58 print("%s%s: %s, len = %d" % (pad, attr, type(value), len(value)), file=f)
59 else:
60 fprintf(f, "%s%s: %r\n", pad, attr, value)
61 if footer is not None: print(footer, file=f)
63FUN, FDT, FNU, FGE, FTX = range(5) # unknown, date, number, general, text
64DATEFORMAT = FDT
65NUMBERFORMAT = FNU
67(
68 XL_CELL_EMPTY,
69 XL_CELL_TEXT,
70 XL_CELL_NUMBER,
71 XL_CELL_DATE,
72 XL_CELL_BOOLEAN,
73 XL_CELL_ERROR,
74 XL_CELL_BLANK, # for use in debugging, gathering stats, etc
75) = range(7)
77biff_text_from_num = {
78 0: "(not BIFF)",
79 20: "2.0",
80 21: "2.1",
81 30: "3",
82 40: "4S",
83 45: "4W",
84 50: "5",
85 70: "7",
86 80: "8",
87 85: "8X",
88}
90#: This dictionary can be used to produce a text version of the internal codes
91#: that Excel uses for error cells.
92error_text_from_code = {
93 0x00: '#NULL!', # Intersection of two cell ranges is empty
94 0x07: '#DIV/0!', # Division by zero
95 0x0F: '#VALUE!', # Wrong type of operand
96 0x17: '#REF!', # Illegal or deleted cell reference
97 0x1D: '#NAME?', # Wrong function or range name
98 0x24: '#NUM!', # Value range overflow
99 0x2A: '#N/A', # Argument or function not available
100}
102BIFF_FIRST_UNICODE = 80
104XL_WORKBOOK_GLOBALS = WBKBLOBAL = 0x5
105XL_WORKBOOK_GLOBALS_4W = 0x100
106XL_WORKSHEET = WRKSHEET = 0x10
108XL_BOUNDSHEET_WORKSHEET = 0x00
109XL_BOUNDSHEET_CHART = 0x02
110XL_BOUNDSHEET_VB_MODULE = 0x06
112# XL_RK2 = 0x7e
113XL_ARRAY = 0x0221
114XL_ARRAY2 = 0x0021
115XL_BLANK = 0x0201
116XL_BLANK_B2 = 0x01
117XL_BOF = 0x809
118XL_BOOLERR = 0x205
119XL_BOOLERR_B2 = 0x5
120XL_BOUNDSHEET = 0x85
121XL_BUILTINFMTCOUNT = 0x56
122XL_CF = 0x01B1
123XL_CODEPAGE = 0x42
124XL_COLINFO = 0x7D
125XL_COLUMNDEFAULT = 0x20 # BIFF2 only
126XL_COLWIDTH = 0x24 # BIFF2 only
127XL_CONDFMT = 0x01B0
128XL_CONTINUE = 0x3c
129XL_COUNTRY = 0x8C
130XL_DATEMODE = 0x22
131XL_DEFAULTROWHEIGHT = 0x0225
132XL_DEFCOLWIDTH = 0x55
133XL_DIMENSION = 0x200
134XL_DIMENSION2 = 0x0
135XL_EFONT = 0x45
136XL_EOF = 0x0a
137XL_EXTERNNAME = 0x23
138XL_EXTERNSHEET = 0x17
139XL_EXTSST = 0xff
140XL_FEAT11 = 0x872
141XL_FILEPASS = 0x2f
142XL_FONT = 0x31
143XL_FONT_B3B4 = 0x231
144XL_FORMAT = 0x41e
145XL_FORMAT2 = 0x1E # BIFF2, BIFF3
146XL_FORMULA = 0x6
147XL_FORMULA3 = 0x206
148XL_FORMULA4 = 0x406
149XL_GCW = 0xab
150XL_HLINK = 0x01B8
151XL_QUICKTIP = 0x0800
152XL_HORIZONTALPAGEBREAKS = 0x1b
153XL_INDEX = 0x20b
154XL_INTEGER = 0x2 # BIFF2 only
155XL_IXFE = 0x44 # BIFF2 only
156XL_LABEL = 0x204
157XL_LABEL_B2 = 0x04
158XL_LABELRANGES = 0x15f
159XL_LABELSST = 0xfd
160XL_LEFTMARGIN = 0x26
161XL_TOPMARGIN = 0x28
162XL_RIGHTMARGIN = 0x27
163XL_BOTTOMMARGIN = 0x29
164XL_HEADER = 0x14
165XL_FOOTER = 0x15
166XL_HCENTER = 0x83
167XL_VCENTER = 0x84
168XL_MERGEDCELLS = 0xE5
169XL_MSO_DRAWING = 0x00EC
170XL_MSO_DRAWING_GROUP = 0x00EB
171XL_MSO_DRAWING_SELECTION = 0x00ED
172XL_MULRK = 0xbd
173XL_MULBLANK = 0xbe
174XL_NAME = 0x18
175XL_NOTE = 0x1c
176XL_NUMBER = 0x203
177XL_NUMBER_B2 = 0x3
178XL_OBJ = 0x5D
179XL_PAGESETUP = 0xA1
180XL_PALETTE = 0x92
181XL_PANE = 0x41
182XL_PRINTGRIDLINES = 0x2B
183XL_PRINTHEADERS = 0x2A
184XL_RK = 0x27e
185XL_ROW = 0x208
186XL_ROW_B2 = 0x08
187XL_RSTRING = 0xd6
188XL_SCL = 0x00A0
189XL_SHEETHDR = 0x8F # BIFF4W only
190XL_SHEETPR = 0x81
191XL_SHEETSOFFSET = 0x8E # BIFF4W only
192XL_SHRFMLA = 0x04bc
193XL_SST = 0xfc
194XL_STANDARDWIDTH = 0x99
195XL_STRING = 0x207
196XL_STRING_B2 = 0x7
197XL_STYLE = 0x293
198XL_SUPBOOK = 0x1AE # aka EXTERNALBOOK in OOo docs
199XL_TABLEOP = 0x236
200XL_TABLEOP2 = 0x37
201XL_TABLEOP_B2 = 0x36
202XL_TXO = 0x1b6
203XL_UNCALCED = 0x5e
204XL_UNKNOWN = 0xffff
205XL_VERTICALPAGEBREAKS = 0x1a
206XL_WINDOW2 = 0x023E
207XL_WINDOW2_B2 = 0x003E
208XL_WRITEACCESS = 0x5C
209XL_WSBOOL = XL_SHEETPR
210XL_XF = 0xe0
211XL_XF2 = 0x0043 # BIFF2 version of XF record
212XL_XF3 = 0x0243 # BIFF3 version of XF record
213XL_XF4 = 0x0443 # BIFF4 version of XF record
215boflen = {0x0809: 8, 0x0409: 6, 0x0209: 6, 0x0009: 4}
216bofcodes = (0x0809, 0x0409, 0x0209, 0x0009)
218XL_FORMULA_OPCODES = (0x0006, 0x0406, 0x0206)
220_cell_opcode_list = [
221 XL_BOOLERR,
222 XL_FORMULA,
223 XL_FORMULA3,
224 XL_FORMULA4,
225 XL_LABEL,
226 XL_LABELSST,
227 XL_MULRK,
228 XL_NUMBER,
229 XL_RK,
230 XL_RSTRING,
231]
232_cell_opcode_dict = {}
233for _cell_opcode in _cell_opcode_list:
234 _cell_opcode_dict[_cell_opcode] = 1
236def is_cell_opcode(c):
237 return c in _cell_opcode_dict
239def upkbits(tgt_obj, src, manifest, local_setattr=setattr):
240 for n, mask, attr in manifest:
241 local_setattr(tgt_obj, attr, (src & mask) >> n)
243def upkbitsL(tgt_obj, src, manifest, local_setattr=setattr, local_int=int):
244 for n, mask, attr in manifest:
245 local_setattr(tgt_obj, attr, local_int((src & mask) >> n))
247def unpack_string(data, pos, encoding, lenlen=1):
248 nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
249 pos += lenlen
250 return unicode(data[pos:pos+nchars], encoding)
252def unpack_string_update_pos(data, pos, encoding, lenlen=1, known_len=None):
253 if known_len is not None:
254 # On a NAME record, the length byte is detached from the front of the string.
255 nchars = known_len
256 else:
257 nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
258 pos += lenlen
259 newpos = pos + nchars
260 return (unicode(data[pos:newpos], encoding), newpos)
262def unpack_unicode(data, pos, lenlen=2):
263 "Return unicode_strg"
264 nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
265 if not nchars:
266 # Ambiguous whether 0-length string should have an "options" byte.
267 # Avoid crash if missing.
268 return UNICODE_LITERAL("")
269 pos += lenlen
270 options = BYTES_ORD(data[pos])
271 pos += 1
272 # phonetic = options & 0x04
273 # richtext = options & 0x08
274 if options & 0x08:
275 # rt = unpack('<H', data[pos:pos+2])[0] # unused
276 pos += 2
277 if options & 0x04:
278 # sz = unpack('<i', data[pos:pos+4])[0] # unused
279 pos += 4
280 if options & 0x01:
281 # Uncompressed UTF-16-LE
282 rawstrg = data[pos:pos+2*nchars]
283 # if DEBUG: print "nchars=%d pos=%d rawstrg=%r" % (nchars, pos, rawstrg)
284 strg = unicode(rawstrg, 'utf_16_le')
285 # pos += 2*nchars
286 else:
287 # Note: this is COMPRESSED (not ASCII!) encoding!!!
288 # Merely returning the raw bytes would work OK 99.99% of the time
289 # if the local codepage was cp1252 -- however this would rapidly go pear-shaped
290 # for other codepages so we grit our Anglocentric teeth and return Unicode :-)
292 strg = unicode(data[pos:pos+nchars], "latin_1")
293 # pos += nchars
294 # if richtext:
295 # pos += 4 * rt
296 # if phonetic:
297 # pos += sz
298 # return (strg, pos)
299 return strg
301def unpack_unicode_update_pos(data, pos, lenlen=2, known_len=None):
302 "Return (unicode_strg, updated value of pos)"
303 if known_len is not None:
304 # On a NAME record, the length byte is detached from the front of the string.
305 nchars = known_len
306 else:
307 nchars = unpack('<' + 'BH'[lenlen-1], data[pos:pos+lenlen])[0]
308 pos += lenlen
309 if not nchars and not data[pos:]:
310 # Zero-length string with no options byte
311 return (UNICODE_LITERAL(""), pos)
312 options = BYTES_ORD(data[pos])
313 pos += 1
314 phonetic = options & 0x04
315 richtext = options & 0x08
316 if richtext:
317 rt = unpack('<H', data[pos:pos+2])[0]
318 pos += 2
319 if phonetic:
320 sz = unpack('<i', data[pos:pos+4])[0]
321 pos += 4
322 if options & 0x01:
323 # Uncompressed UTF-16-LE
324 strg = unicode(data[pos:pos+2*nchars], 'utf_16_le')
325 pos += 2*nchars
326 else:
327 # Note: this is COMPRESSED (not ASCII!) encoding!!!
328 strg = unicode(data[pos:pos+nchars], "latin_1")
329 pos += nchars
330 if richtext:
331 pos += 4 * rt
332 if phonetic:
333 pos += sz
334 return (strg, pos)
336def unpack_cell_range_address_list_update_pos(output_list, data, pos, biff_version, addr_size=6):
337 # output_list is updated in situ
338 assert addr_size in (6, 8)
339 # Used to assert size == 6 if not BIFF8, but pyWLWriter writes
340 # BIFF8-only MERGEDCELLS records in a BIFF5 file!
341 n, = unpack("<H", data[pos:pos+2])
342 pos += 2
343 if n:
344 if addr_size == 6:
345 fmt = "<HHBB"
346 else:
347 fmt = "<HHHH"
348 for _unused in xrange(n):
349 ra, rb, ca, cb = unpack(fmt, data[pos:pos+addr_size])
350 output_list.append((ra, rb+1, ca, cb+1))
351 pos += addr_size
352 return pos
354_brecstrg = """\
3550000 DIMENSIONS_B2
3560001 BLANK_B2
3570002 INTEGER_B2_ONLY
3580003 NUMBER_B2
3590004 LABEL_B2
3600005 BOOLERR_B2
3610006 FORMULA
3620007 STRING_B2
3630008 ROW_B2
3640009 BOF_B2
365000A EOF
366000B INDEX_B2_ONLY
367000C CALCCOUNT
368000D CALCMODE
369000E PRECISION
370000F REFMODE
3710010 DELTA
3720011 ITERATION
3730012 PROTECT
3740013 PASSWORD
3750014 HEADER
3760015 FOOTER
3770016 EXTERNCOUNT
3780017 EXTERNSHEET
3790018 NAME_B2,5+
3800019 WINDOWPROTECT
381001A VERTICALPAGEBREAKS
382001B HORIZONTALPAGEBREAKS
383001C NOTE
384001D SELECTION
385001E FORMAT_B2-3
386001F BUILTINFMTCOUNT_B2
3870020 COLUMNDEFAULT_B2_ONLY
3880021 ARRAY_B2_ONLY
3890022 DATEMODE
3900023 EXTERNNAME
3910024 COLWIDTH_B2_ONLY
3920025 DEFAULTROWHEIGHT_B2_ONLY
3930026 LEFTMARGIN
3940027 RIGHTMARGIN
3950028 TOPMARGIN
3960029 BOTTOMMARGIN
397002A PRINTHEADERS
398002B PRINTGRIDLINES
399002F FILEPASS
4000031 FONT
4010032 FONT2_B2_ONLY
4020036 TABLEOP_B2
4030037 TABLEOP2_B2
404003C CONTINUE
405003D WINDOW1
406003E WINDOW2_B2
4070040 BACKUP
4080041 PANE
4090042 CODEPAGE
4100043 XF_B2
4110044 IXFE_B2_ONLY
4120045 EFONT_B2_ONLY
413004D PLS
4140051 DCONREF
4150055 DEFCOLWIDTH
4160056 BUILTINFMTCOUNT_B3-4
4170059 XCT
418005A CRN
419005B FILESHARING
420005C WRITEACCESS
421005D OBJECT
422005E UNCALCED
423005F SAVERECALC
4240063 OBJECTPROTECT
425007D COLINFO
426007E RK2_mythical_?
4270080 GUTS
4280081 WSBOOL
4290082 GRIDSET
4300083 HCENTER
4310084 VCENTER
4320085 BOUNDSHEET
4330086 WRITEPROT
434008C COUNTRY
435008D HIDEOBJ
436008E SHEETSOFFSET
437008F SHEETHDR
4380090 SORT
4390092 PALETTE
4400099 STANDARDWIDTH
441009B FILTERMODE
442009C FNGROUPCOUNT
443009D AUTOFILTERINFO
444009E AUTOFILTER
44500A0 SCL
44600A1 SETUP
44700AB GCW
44800BD MULRK
44900BE MULBLANK
45000C1 MMS
45100D6 RSTRING
45200D7 DBCELL
45300DA BOOKBOOL
45400DD SCENPROTECT
45500E0 XF
45600E1 INTERFACEHDR
45700E2 INTERFACEEND
45800E5 MERGEDCELLS
45900E9 BITMAP
46000EB MSO_DRAWING_GROUP
46100EC MSO_DRAWING
46200ED MSO_DRAWING_SELECTION
46300EF PHONETIC
46400FC SST
46500FD LABELSST
46600FF EXTSST
467013D TABID
468015F LABELRANGES
4690160 USESELFS
4700161 DSF
47101AE SUPBOOK
47201AF PROTECTIONREV4
47301B0 CONDFMT
47401B1 CF
47501B2 DVAL
47601B6 TXO
47701B7 REFRESHALL
47801B8 HLINK
47901BC PASSWORDREV4
48001BE DV
48101C0 XL9FILE
48201C1 RECALCID
4830200 DIMENSIONS
4840201 BLANK
4850203 NUMBER
4860204 LABEL
4870205 BOOLERR
4880206 FORMULA_B3
4890207 STRING
4900208 ROW
4910209 BOF
492020B INDEX_B3+
4930218 NAME
4940221 ARRAY
4950223 EXTERNNAME_B3-4
4960225 DEFAULTROWHEIGHT
4970231 FONT_B3B4
4980236 TABLEOP
499023E WINDOW2
5000243 XF_B3
501027E RK
5020293 STYLE
5030406 FORMULA_B4
5040409 BOF
505041E FORMAT
5060443 XF_B4
50704BC SHRFMLA
5080800 QUICKTIP
5090809 BOF
5100862 SHEETLAYOUT
5110867 SHEETPROTECTION
5120868 RANGEPROTECTION
513"""
515biff_rec_name_dict = {}
516for _buff in _brecstrg.splitlines():
517 _numh, _name = _buff.split()
518 biff_rec_name_dict[int(_numh, 16)] = _name
519del _buff, _name, _brecstrg
521def hex_char_dump(strg, ofs, dlen, base=0, fout=sys.stdout, unnumbered=False):
522 endpos = min(ofs + dlen, len(strg))
523 pos = ofs
524 numbered = not unnumbered
525 num_prefix = ''
526 while pos < endpos:
527 endsub = min(pos + 16, endpos)
528 substrg = strg[pos:endsub]
529 lensub = endsub - pos
530 if lensub <= 0 or lensub != len(substrg):
531 fprintf(
532 sys.stdout,
533 '??? hex_char_dump: ofs=%d dlen=%d base=%d -> endpos=%d pos=%d endsub=%d substrg=%r\n',
534 ofs, dlen, base, endpos, pos, endsub, substrg)
535 break
536 hexd = ''.join("%02x " % BYTES_ORD(c) for c in substrg)
538 chard = ''
539 for c in substrg:
540 c = chr(BYTES_ORD(c))
541 if c == '\0':
542 c = '~'
543 elif not (' ' <= c <= '~'):
544 c = '?'
545 chard += c
546 if numbered:
547 num_prefix = "%5d: " % (base+pos-ofs)
549 fprintf(fout, "%s %-48s %s\n", num_prefix, hexd, chard)
550 pos = endsub
552def biff_dump(mem, stream_offset, stream_len, base=0, fout=sys.stdout, unnumbered=False):
553 pos = stream_offset
554 stream_end = stream_offset + stream_len
555 adj = base - stream_offset
556 dummies = 0
557 numbered = not unnumbered
558 num_prefix = ''
559 while stream_end - pos >= 4:
560 rc, length = unpack('<HH', mem[pos:pos+4])
561 if rc == 0 and length == 0:
562 if mem[pos:] == b'\0' * (stream_end - pos):
563 dummies = stream_end - pos
564 savpos = pos
565 pos = stream_end
566 break
567 if dummies:
568 dummies += 4
569 else:
570 savpos = pos
571 dummies = 4
572 pos += 4
573 else:
574 if dummies:
575 if numbered:
576 num_prefix = "%5d: " % (adj + savpos)
577 fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
578 dummies = 0
579 recname = biff_rec_name_dict.get(rc, '<UNKNOWN>')
580 if numbered:
581 num_prefix = "%5d: " % (adj + pos)
582 fprintf(fout, "%s%04x %s len = %04x (%d)\n", num_prefix, rc, recname, length, length)
583 pos += 4
584 hex_char_dump(mem, pos, length, adj+pos, fout, unnumbered)
585 pos += length
586 if dummies:
587 if numbered:
588 num_prefix = "%5d: " % (adj + savpos)
589 fprintf(fout, "%s---- %d zero bytes skipped ----\n", num_prefix, dummies)
590 if pos < stream_end:
591 if numbered:
592 num_prefix = "%5d: " % (adj + pos)
593 fprintf(fout, "%s---- Misc bytes at end ----\n", num_prefix)
594 hex_char_dump(mem, pos, stream_end-pos, adj + pos, fout, unnumbered)
595 elif pos > stream_end:
596 fprintf(fout, "Last dumped record has length (%d) that is too large\n", length)
598def biff_count_records(mem, stream_offset, stream_len, fout=sys.stdout):
599 pos = stream_offset
600 stream_end = stream_offset + stream_len
601 tally = {}
602 while stream_end - pos >= 4:
603 rc, length = unpack('<HH', mem[pos:pos+4])
604 if rc == 0 and length == 0:
605 if mem[pos:] == b'\0' * (stream_end - pos):
606 break
607 recname = "<Dummy (zero)>"
608 else:
609 recname = biff_rec_name_dict.get(rc, None)
610 if recname is None:
611 recname = "Unknown_0x%04X" % rc
612 if recname in tally:
613 tally[recname] += 1
614 else:
615 tally[recname] = 1
616 pos += length + 4
617 slist = sorted(tally.items())
618 for recname, count in slist:
619 print("%8d %s" % (count, recname), file=fout)
621encoding_from_codepage = {
622 1200 : 'utf_16_le',
623 10000: 'mac_roman',
624 10006: 'mac_greek', # guess
625 10007: 'mac_cyrillic', # guess
626 10029: 'mac_latin2', # guess
627 10079: 'mac_iceland', # guess
628 10081: 'mac_turkish', # guess
629 32768: 'mac_roman',
630 32769: 'cp1252',
631}
632# some more guessing, for Indic scripts
633# codepage 57000 range:
634# 2 Devanagari [0]
635# 3 Bengali [1]
636# 4 Tamil [5]
637# 5 Telegu [6]
638# 6 Assamese [1] c.f. Bengali
639# 7 Oriya [4]
640# 8 Kannada [7]
641# 9 Malayalam [8]
642# 10 Gujarati [3]
643# 11 Gurmukhi [2]