Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/olefile/olefile.py: 44%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1149 statements  

1""" 

2olefile (formerly OleFileIO_PL) 

3 

4Module to read/write Microsoft OLE2 files (also called Structured Storage or 

5Microsoft Compound Document File Format), such as Microsoft Office 97-2003 

6documents, Image Composer and FlashPix files, Outlook messages, ... 

7This version is compatible with Python 2.7 and 3.5+ 

8 

9Project website: https://www.decalage.info/olefile 

10 

11olefile is copyright (c) 2005-2023 Philippe Lagadec 

12(https://www.decalage.info) 

13 

14olefile is based on the OleFileIO module from the PIL library v1.1.7 

15See: http://www.pythonware.com/products/pil/index.htm 

16and http://svn.effbot.org/public/tags/pil-1.1.7/PIL/OleFileIO.py 

17 

18The Python Imaging Library (PIL) is 

19Copyright (c) 1997-2009 by Secret Labs AB 

20Copyright (c) 1995-2009 by Fredrik Lundh 

21 

22See source code and LICENSE.txt for information on usage and redistribution. 

23""" 

24 

25# Since olefile v0.47, only Python 2.7 and 3.5+ are supported 

26# This import enables print() as a function rather than a keyword 

27# (main requirement to be compatible with Python 3.x) 

28# The comment on the line below should be printed on Python 2.5 or older: 

29from __future__ import print_function # This version of olefile requires Python 2.7 or 3.5+. 

30 

31 

32#--- LICENSE ------------------------------------------------------------------ 

33 

34# olefile (formerly OleFileIO_PL) is copyright (c) 2005-2023 Philippe Lagadec 

35# (https://www.decalage.info) 

36# 

37# All rights reserved. 

38# 

39# Redistribution and use in source and binary forms, with or without modification, 

40# are permitted provided that the following conditions are met: 

41# 

42# * Redistributions of source code must retain the above copyright notice, this 

43# list of conditions and the following disclaimer. 

44# * Redistributions in binary form must reproduce the above copyright notice, 

45# this list of conditions and the following disclaimer in the documentation 

46# and/or other materials provided with the distribution. 

47# 

48# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 

49# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 

50# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 

51# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 

52# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 

53# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 

54# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 

55# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 

56# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

57# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

58 

59# ---------- 

60# PIL License: 

61# 

62# olefile is based on source code from the OleFileIO module of the Python 

63# Imaging Library (PIL) published by Fredrik Lundh under the following license: 

64 

65# The Python Imaging Library (PIL) is 

66# Copyright (c) 1997-2009 by Secret Labs AB 

67# Copyright (c) 1995-2009 by Fredrik Lundh 

68# 

69# By obtaining, using, and/or copying this software and/or its associated 

70# documentation, you agree that you have read, understood, and will comply with 

71# the following terms and conditions: 

72# 

73# Permission to use, copy, modify, and distribute this software and its 

74# associated documentation for any purpose and without fee is hereby granted, 

75# provided that the above copyright notice appears in all copies, and that both 

76# that copyright notice and this permission notice appear in supporting 

77# documentation, and that the name of Secret Labs AB or the author(s) not be used 

78# in advertising or publicity pertaining to distribution of the software 

79# without specific, written prior permission. 

80# 

81# SECRET LABS AB AND THE AUTHORS DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS 

82# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 

83# IN NO EVENT SHALL SECRET LABS AB OR THE AUTHORS BE LIABLE FOR ANY SPECIAL, 

84# INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 

85# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 

86# OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 

87# PERFORMANCE OF THIS SOFTWARE. 

88 

89__date__ = "2023-12-01" 

90__version__ = '0.47' 

91__author__ = "Philippe Lagadec" 

92 

93__all__ = ['isOleFile', 'OleFileIO', 'OleMetadata', 'enable_logging', 

94 'MAGIC', 'STGTY_EMPTY', 

95 'STGTY_STREAM', 'STGTY_STORAGE', 'STGTY_ROOT', 'STGTY_PROPERTY', 

96 'STGTY_LOCKBYTES', 'MINIMAL_OLEFILE_SIZE', 

97 'DEFECT_UNSURE', 'DEFECT_POTENTIAL', 'DEFECT_INCORRECT', 

98 'DEFECT_FATAL', 'DEFAULT_PATH_ENCODING', 

99 'MAXREGSECT', 'DIFSECT', 'FATSECT', 'ENDOFCHAIN', 'FREESECT', 

100 'MAXREGSID', 'NOSTREAM', 'UNKNOWN_SIZE', 'WORD_CLSID', 

101 'OleFileIONotClosed' 

102] 

103 

104import io 

105import sys 

106import struct, array, os.path, datetime, logging, warnings, traceback 

107 

108#=== COMPATIBILITY WORKAROUNDS ================================================ 

109 

110# For Python 3.x, need to redefine long as int: 

111if str is not bytes: 

112 long = int 

113 

114# Need to make sure we use xrange both on Python 2 and 3.x: 

115try: 

116 # on Python 2 we need xrange: 

117 iterrange = xrange 

118except Exception: 

119 # no xrange, for Python 3 it was renamed as range: 

120 iterrange = range 

121 

122# [PL] workaround to fix an issue with array item size on 64 bits systems: 

123if array.array('L').itemsize == 4: 

124 # on 32 bits platforms, long integers in an array are 32 bits: 

125 UINT32 = 'L' 

126elif array.array('I').itemsize == 4: 

127 # on 64 bits platforms, integers in an array are 32 bits: 

128 UINT32 = 'I' 

129elif array.array('i').itemsize == 4: 

130 # On 64 bit Jython, signed integers ('i') are the only way to store our 32 

131 # bit values in an array in a *somewhat* reasonable way, as the otherwise 

132 # perfectly suited 'H' (unsigned int, 32 bits) results in a completely 

133 # unusable behaviour. This is most likely caused by the fact that Java 

134 # doesn't have unsigned values, and thus Jython's "array" implementation, 

135 # which is based on "jarray", doesn't have them either. 

136 # NOTE: to trick Jython into converting the values it would normally 

137 # interpret as "signed" into "unsigned", a binary-and operation with 

138 # 0xFFFFFFFF can be used. This way it is possible to use the same comparing 

139 # operations on all platforms / implementations. The corresponding code 

140 # lines are flagged with a 'JYTHON-WORKAROUND' tag below. 

141 UINT32 = 'i' 

142else: 

143 raise ValueError('Need to fix a bug with 32 bit arrays, please contact author...') 

144 

145 

146# [PL] These workarounds were inspired from the Path module 

147# (see http://www.jorendorff.com/articles/python/path/) 

148# TODO: remove the use of basestring, as it was removed in Python 3 

149try: 

150 basestring 

151except NameError: 

152 basestring = str 

153 

154if sys.version_info[0] < 3: 

155 # On Python 2.x, the default encoding for path names is UTF-8: 

156 DEFAULT_PATH_ENCODING = 'utf-8' 

157else: 

158 # On Python 3.x, the default encoding for path names is Unicode (None): 

159 DEFAULT_PATH_ENCODING = None 

160 

161 

162# === LOGGING ================================================================= 

163 

164def get_logger(name, level=logging.CRITICAL+1): 

165 """ 

166 Create a suitable logger object for this module. 

167 The goal is not to change settings of the root logger, to avoid getting 

168 other modules' logs on the screen. 

169 If a logger exists with same name, reuse it. (Else it would have duplicate 

170 handlers and messages would be doubled.) 

171 The level is set to CRITICAL+1 by default, to avoid any logging. 

172 """ 

173 # First, test if there is already a logger with the same name, else it 

174 # will generate duplicate messages (due to duplicate handlers): 

175 if name in logging.Logger.manager.loggerDict: 

176 #NOTE: another less intrusive but more "hackish" solution would be to 

177 # use getLogger then test if its effective level is not default. 

178 logger = logging.getLogger(name) 

179 # make sure level is OK: 

180 logger.setLevel(level) 

181 return logger 

182 # get a new logger: 

183 logger = logging.getLogger(name) 

184 # only add a NullHandler for this logger, it is up to the application 

185 # to configure its own logging: 

186 logger.addHandler(logging.NullHandler()) 

187 logger.setLevel(level) 

188 return logger 

189 

190 

191# a global logger object used for debugging: 

192log = get_logger('olefile') 

193 

194 

195def enable_logging(): 

196 """ 

197 Enable logging for this module (disabled by default). 

198 This will set the module-specific logger level to NOTSET, which 

199 means the main application controls the actual logging level. 

200 """ 

201 log.setLevel(logging.NOTSET) 

202 

203 

204#=== CONSTANTS =============================================================== 

205 

206#: magic bytes that should be at the beginning of every OLE file: 

207MAGIC = b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1' 

208 

209# [PL]: added constants for Sector IDs (from AAF specifications) 

210MAXREGSECT = 0xFFFFFFFA #: (-6) maximum SECT 

211DIFSECT = 0xFFFFFFFC #: (-4) denotes a DIFAT sector in a FAT 

212FATSECT = 0xFFFFFFFD #: (-3) denotes a FAT sector in a FAT 

213ENDOFCHAIN = 0xFFFFFFFE #: (-2) end of a virtual stream chain 

214FREESECT = 0xFFFFFFFF #: (-1) unallocated sector 

215 

216# [PL]: added constants for Directory Entry IDs (from AAF specifications) 

217MAXREGSID = 0xFFFFFFFA #: (-6) maximum directory entry ID 

218NOSTREAM = 0xFFFFFFFF #: (-1) unallocated directory entry 

219 

220# [PL] object types in storage (from AAF specifications) 

221STGTY_EMPTY = 0 #: empty directory entry 

222STGTY_STORAGE = 1 #: element is a storage object 

223STGTY_STREAM = 2 #: element is a stream object 

224STGTY_LOCKBYTES = 3 #: element is an ILockBytes object 

225STGTY_PROPERTY = 4 #: element is an IPropertyStorage object 

226STGTY_ROOT = 5 #: element is a root storage 

227 

228# Unknown size for a stream (used by OleStream): 

229UNKNOWN_SIZE = 0x7FFFFFFF 

230 

231# 

232# -------------------------------------------------------------------- 

233# property types 

234 

235VT_EMPTY=0; VT_NULL=1; VT_I2=2; VT_I4=3; VT_R4=4; VT_R8=5; VT_CY=6; 

236VT_DATE=7; VT_BSTR=8; VT_DISPATCH=9; VT_ERROR=10; VT_BOOL=11; 

237VT_VARIANT=12; VT_UNKNOWN=13; VT_DECIMAL=14; VT_I1=16; VT_UI1=17; 

238VT_UI2=18; VT_UI4=19; VT_I8=20; VT_UI8=21; VT_INT=22; VT_UINT=23; 

239VT_VOID=24; VT_HRESULT=25; VT_PTR=26; VT_SAFEARRAY=27; VT_CARRAY=28; 

240VT_USERDEFINED=29; VT_LPSTR=30; VT_LPWSTR=31; VT_FILETIME=64; 

241VT_BLOB=65; VT_STREAM=66; VT_STORAGE=67; VT_STREAMED_OBJECT=68; 

242VT_STORED_OBJECT=69; VT_BLOB_OBJECT=70; VT_CF=71; VT_CLSID=72; 

243VT_VECTOR=0x1000; 

244 

245# map property id to name (for debugging purposes) 

246VT = {} 

247for keyword, var in list(vars().items()): 

248 if keyword[:3] == "VT_": 

249 VT[var] = keyword 

250 

251# 

252# -------------------------------------------------------------------- 

253# Some common document types (root.clsid fields) 

254 

255WORD_CLSID = "00020900-0000-0000-C000-000000000046" 

256# TODO: check Excel, PPT, ... 

257 

258# [PL]: Defect levels to classify parsing errors - see OleFileIO._raise_defect() 

259DEFECT_UNSURE = 10 # a case which looks weird, but not sure it's a defect 

260DEFECT_POTENTIAL = 20 # a potential defect 

261DEFECT_INCORRECT = 30 # an error according to specifications, but parsing 

262 # can go on 

263DEFECT_FATAL = 40 # an error which cannot be ignored, parsing is 

264 # impossible 

265 

266# Minimal size of an empty OLE file, with 512-bytes sectors = 1536 bytes 

267# (this is used in isOleFile and OleFileIO.open) 

268MINIMAL_OLEFILE_SIZE = 1536 

269 

270#=== FUNCTIONS =============================================================== 

271 

272def isOleFile (filename=None, data=None): 

273 """ 

274 Test if a file is an OLE container (according to the magic bytes in its header). 

275 

276 .. note:: 

277 This function only checks the first 8 bytes of the file, not the 

278 rest of the OLE structure. 

279 If data is provided, it also checks if the file size is above 

280 the minimal size of an OLE file (1536 bytes). 

281 If filename is provided with the path of the file on disk, the file is 

282 open only to read the first 8 bytes, then closed. 

283 

284 .. versionadded:: 0.16 

285 

286 :param filename: filename, contents or file-like object of the OLE file (string-like or file-like object) 

287 

288 - if data is provided, filename is ignored. 

289 - if filename is a unicode string, it is used as path of the file to open on disk. 

290 - if filename is a bytes string smaller than 1536 bytes, it is used as path 

291 of the file to open on disk. 

292 - [deprecated] if filename is a bytes string longer than 1535 bytes, it is parsed 

293 as the content of an OLE file in memory. (bytes type only) 

294 Note that this use case is deprecated and should be replaced by the new data parameter 

295 - if filename is a file-like object (with read and seek methods), 

296 it is parsed as-is. 

297 :type filename: bytes, str, unicode or file-like object 

298 

299 :param data: bytes string with the contents of the file to be checked, when the file is in memory 

300 (added in olefile 0.47) 

301 :type data: bytes 

302 

303 :returns: True if OLE, False otherwise. 

304 :rtype: bool 

305 """ 

306 header = None 

307 # first check if data is provided and large enough 

308 if data is not None: 

309 if len(data) >= MINIMAL_OLEFILE_SIZE: 

310 header = data[:len(MAGIC)] 

311 else: 

312 # the file is too small, cannot be OLE 

313 return False 

314 # check if filename is a string-like or file-like object: 

315 elif hasattr(filename, 'read') and hasattr(filename, 'seek'): 

316 # file-like object: use it directly 

317 header = filename.read(len(MAGIC)) 

318 # just in case, seek back to start of file: 

319 filename.seek(0) 

320 elif isinstance(filename, bytes) and len(filename) >= MINIMAL_OLEFILE_SIZE: 

321 # filename is a bytes string containing the OLE file to be parsed: 

322 header = filename[:len(MAGIC)] 

323 else: 

324 # string-like object: filename of file on disk 

325 with open(filename, 'rb') as fp: 

326 header = fp.read(len(MAGIC)) 

327 if header == MAGIC: 

328 return True 

329 else: 

330 return False 

331 

332 

333if bytes is str: 

334 # version for Python 2.x 

335 def i8(c): 

336 return ord(c) 

337else: 

338 # version for Python 3.x 

339 def i8(c): 

340 return c if c.__class__ is int else c[0] 

341 

342 

343def i16(c, o = 0): 

344 """ 

345 Converts a 2-bytes (16 bits) string to an integer. 

346 

347 :param c: string containing bytes to convert 

348 :param o: offset of bytes to convert in string 

349 """ 

350 return struct.unpack("<H", c[o:o+2])[0] 

351 

352 

353def i32(c, o = 0): 

354 """ 

355 Converts a 4-bytes (32 bits) string to an integer. 

356 

357 :param c: string containing bytes to convert 

358 :param o: offset of bytes to convert in string 

359 """ 

360 return struct.unpack("<I", c[o:o+4])[0] 

361 

362 

363def _clsid(clsid): 

364 """ 

365 Converts a CLSID to a human-readable string. 

366 

367 :param clsid: string of length 16. 

368 """ 

369 assert len(clsid) == 16 

370 # if clsid is only made of null bytes, return an empty string: 

371 # (PL: why not simply return the string with zeroes?) 

372 if not clsid.strip(b"\0"): 

373 return "" 

374 return (("%08X-%04X-%04X-%02X%02X-" + "%02X" * 6) % 

375 ((i32(clsid, 0), i16(clsid, 4), i16(clsid, 6)) + 

376 tuple(map(i8, clsid[8:16])))) 

377 

378 

379 

380def filetime2datetime(filetime): 

381 """ 

382 convert FILETIME (64 bits int) to Python datetime.datetime 

383 """ 

384 # TODO: manage exception when microseconds is too large 

385 # inspired from https://code.activestate.com/recipes/511425-filetime-to-datetime/ 

386 _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) 

387 # log.debug('timedelta days=%d' % (filetime//(10*1000000*3600*24))) 

388 return _FILETIME_null_date + datetime.timedelta(microseconds=filetime//10) 

389 

390 

391 

392#=== CLASSES ================================================================== 

393 

394class OleFileError(IOError): 

395 """ 

396 Generic base error for this module. 

397 """ 

398 pass 

399 

400class NotOleFileError(OleFileError): 

401 """ 

402 Error raised when the opened file is not an OLE file. 

403 """ 

404 pass 

405 

406class OleMetadata: 

407 """ 

408 Class to parse and store metadata from standard properties of OLE files. 

409 

410 Available attributes: 

411 codepage, title, subject, author, keywords, comments, template, 

412 last_saved_by, revision_number, total_edit_time, last_printed, create_time, 

413 last_saved_time, num_pages, num_words, num_chars, thumbnail, 

414 creating_application, security, codepage_doc, category, presentation_target, 

415 bytes, lines, paragraphs, slides, notes, hidden_slides, mm_clips, 

416 scale_crop, heading_pairs, titles_of_parts, manager, company, links_dirty, 

417 chars_with_spaces, unused, shared_doc, link_base, hlinks, hlinks_changed, 

418 version, dig_sig, content_type, content_status, language, doc_version 

419 

420 Note: an attribute is set to None when not present in the properties of the 

421 OLE file. 

422 

423 References for SummaryInformation stream: 

424 

425 - https://msdn.microsoft.com/en-us/library/dd942545.aspx 

426 - https://msdn.microsoft.com/en-us/library/dd925819%28v=office.12%29.aspx 

427 - https://msdn.microsoft.com/en-us/library/windows/desktop/aa380376%28v=vs.85%29.aspx 

428 - https://msdn.microsoft.com/en-us/library/aa372045.aspx 

429 - http://sedna-soft.de/articles/summary-information-stream/ 

430 - https://poi.apache.org/apidocs/org/apache/poi/hpsf/SummaryInformation.html 

431 

432 References for DocumentSummaryInformation stream: 

433 

434 - https://msdn.microsoft.com/en-us/library/dd945671%28v=office.12%29.aspx 

435 - https://msdn.microsoft.com/en-us/library/windows/desktop/aa380374%28v=vs.85%29.aspx 

436 - https://poi.apache.org/apidocs/org/apache/poi/hpsf/DocumentSummaryInformation.html 

437 

438 New in version 0.25 

439 """ 

440 

441 # attribute names for SummaryInformation stream properties: 

442 # (ordered by property id, starting at 1) 

443 SUMMARY_ATTRIBS = ['codepage', 'title', 'subject', 'author', 'keywords', 'comments', 

444 'template', 'last_saved_by', 'revision_number', 'total_edit_time', 

445 'last_printed', 'create_time', 'last_saved_time', 'num_pages', 

446 'num_words', 'num_chars', 'thumbnail', 'creating_application', 

447 'security'] 

448 

449 # attribute names for DocumentSummaryInformation stream properties: 

450 # (ordered by property id, starting at 1) 

451 DOCSUM_ATTRIBS = ['codepage_doc', 'category', 'presentation_target', 'bytes', 'lines', 'paragraphs', 

452 'slides', 'notes', 'hidden_slides', 'mm_clips', 

453 'scale_crop', 'heading_pairs', 'titles_of_parts', 'manager', 

454 'company', 'links_dirty', 'chars_with_spaces', 'unused', 'shared_doc', 

455 'link_base', 'hlinks', 'hlinks_changed', 'version', 'dig_sig', 

456 'content_type', 'content_status', 'language', 'doc_version'] 

457 

458 def __init__(self): 

459 """ 

460 Constructor for OleMetadata 

461 All attributes are set to None by default 

462 """ 

463 # properties from SummaryInformation stream 

464 self.codepage = None 

465 self.title = None 

466 self.subject = None 

467 self.author = None 

468 self.keywords = None 

469 self.comments = None 

470 self.template = None 

471 self.last_saved_by = None 

472 self.revision_number = None 

473 self.total_edit_time = None 

474 self.last_printed = None 

475 self.create_time = None 

476 self.last_saved_time = None 

477 self.num_pages = None 

478 self.num_words = None 

479 self.num_chars = None 

480 self.thumbnail = None 

481 self.creating_application = None 

482 self.security = None 

483 # properties from DocumentSummaryInformation stream 

484 self.codepage_doc = None 

485 self.category = None 

486 self.presentation_target = None 

487 self.bytes = None 

488 self.lines = None 

489 self.paragraphs = None 

490 self.slides = None 

491 self.notes = None 

492 self.hidden_slides = None 

493 self.mm_clips = None 

494 self.scale_crop = None 

495 self.heading_pairs = None 

496 self.titles_of_parts = None 

497 self.manager = None 

498 self.company = None 

499 self.links_dirty = None 

500 self.chars_with_spaces = None 

501 self.unused = None 

502 self.shared_doc = None 

503 self.link_base = None 

504 self.hlinks = None 

505 self.hlinks_changed = None 

506 self.version = None 

507 self.dig_sig = None 

508 self.content_type = None 

509 self.content_status = None 

510 self.language = None 

511 self.doc_version = None 

512 

513 def parse_properties(self, ole_file): 

514 """ 

515 Parse standard properties of an OLE file, from the streams 

516 ``\\x05SummaryInformation`` and ``\\x05DocumentSummaryInformation``, 

517 if present. 

518 Properties are converted to strings, integers or python datetime objects. 

519 If a property is not present, its value is set to None. 

520 

521 :param ole_file: OleFileIO object from which to parse properties 

522 """ 

523 # first set all attributes to None: 

524 for attrib in (self.SUMMARY_ATTRIBS + self.DOCSUM_ATTRIBS): 

525 setattr(self, attrib, None) 

526 if ole_file.exists("\x05SummaryInformation"): 

527 # get properties from the stream: 

528 # (converting timestamps to python datetime, except total_edit_time, 

529 # which is property #10) 

530 props = ole_file.getproperties("\x05SummaryInformation", 

531 convert_time=True, no_conversion=[10]) 

532 # store them into this object's attributes: 

533 for i in range(len(self.SUMMARY_ATTRIBS)): 

534 # ids for standards properties start at 0x01, until 0x13 

535 value = props.get(i+1, None) 

536 setattr(self, self.SUMMARY_ATTRIBS[i], value) 

537 if ole_file.exists("\x05DocumentSummaryInformation"): 

538 # get properties from the stream: 

539 props = ole_file.getproperties("\x05DocumentSummaryInformation", 

540 convert_time=True) 

541 # store them into this object's attributes: 

542 for i in range(len(self.DOCSUM_ATTRIBS)): 

543 # ids for standards properties start at 0x01, until 0x13 

544 value = props.get(i+1, None) 

545 setattr(self, self.DOCSUM_ATTRIBS[i], value) 

546 

547 def dump(self): 

548 """ 

549 Dump all metadata, for debugging purposes. 

550 """ 

551 print('Properties from SummaryInformation stream:') 

552 for prop in self.SUMMARY_ATTRIBS: 

553 value = getattr(self, prop) 

554 print('- {}: {}'.format(prop, repr(value))) 

555 print('Properties from DocumentSummaryInformation stream:') 

556 for prop in self.DOCSUM_ATTRIBS: 

557 value = getattr(self, prop) 

558 print('- {}: {}'.format(prop, repr(value))) 

559 

560class OleFileIONotClosed(RuntimeWarning): 

561 """ 

562 Warning type used when OleFileIO is destructed but has open file handle. 

563 """ 

564 def __init__(self, stack_of_open=None): 

565 super(OleFileIONotClosed, self).__init__() 

566 self.stack_of_open = stack_of_open 

567 

568 def __str__(self): 

569 msg = 'Deleting OleFileIO instance with open file handle. ' \ 

570 'You should ensure that OleFileIO is never deleted ' \ 

571 'without calling close() first. Consider using '\ 

572 '"with OleFileIO(...) as ole: ...".' 

573 if self.stack_of_open: 

574 return ''.join([msg, '\n', 'Stacktrace of open() call:\n'] + 

575 self.stack_of_open.format()) 

576 else: 

577 return msg 

578 

579 

580# --- OleStream --------------------------------------------------------------- 

581 

582class OleStream(io.BytesIO): 

583 """ 

584 OLE2 Stream 

585 

586 Returns a read-only file object which can be used to read 

587 the contents of a OLE stream (instance of the BytesIO class). 

588 To open a stream, use the openstream method in the OleFileIO class. 

589 

590 This function can be used with either ordinary streams, 

591 or ministreams, depending on the offset, sectorsize, and 

592 fat table arguments. 

593 

594 Attributes: 

595 

596 - size: actual size of data stream, after it was opened. 

597 """ 

598 # FIXME: should store the list of sects obtained by following 

599 # the fat chain, and load new sectors on demand instead of 

600 # loading it all in one go. 

601 

602 def __init__(self, fp, sect, size, offset, sectorsize, fat, filesize, olefileio): 

603 """ 

604 Constructor for OleStream class. 

605 

606 :param fp: file object, the OLE container or the MiniFAT stream 

607 :param sect: sector index of first sector in the stream 

608 :param size: total size of the stream 

609 :param offset: offset in bytes for the first FAT or MiniFAT sector 

610 :param sectorsize: size of one sector 

611 :param fat: array/list of sector indexes (FAT or MiniFAT) 

612 :param filesize: size of OLE file (for debugging) 

613 :param olefileio: OleFileIO object containing this stream 

614 :returns: a BytesIO instance containing the OLE stream 

615 """ 

616 log.debug('OleStream.__init__:') 

617 log.debug(' sect=%d (%X), size=%d, offset=%d, sectorsize=%d, len(fat)=%d, fp=%s' 

618 %(sect,sect,size,offset,sectorsize,len(fat), repr(fp))) 

619 self.ole = olefileio 

620 # this check is necessary, otherwise when attempting to open a stream 

621 # from a closed OleFileIO, a stream of size zero is returned without 

622 # raising an exception. (see issue #81) 

623 if self.ole.fp.closed: 

624 raise OSError('Attempting to open a stream from a closed OLE File') 

625 # [PL] To detect malformed documents with FAT loops, we compute the 

626 # expected number of sectors in the stream: 

627 unknown_size = False 

628 if size == UNKNOWN_SIZE: 

629 # this is the case when called from OleFileIO._open(), and stream 

630 # size is not known in advance (for example when reading the 

631 # Directory stream). Then we can only guess maximum size: 

632 size = len(fat)*sectorsize 

633 # and we keep a record that size was unknown: 

634 unknown_size = True 

635 log.debug(' stream with UNKNOWN SIZE') 

636 nb_sectors = (size + (sectorsize-1)) // sectorsize 

637 log.debug('nb_sectors = %d' % nb_sectors) 

638 # This number should (at least) be less than the total number of 

639 # sectors in the given FAT: 

640 if nb_sectors > len(fat): 

641 self.ole._raise_defect(DEFECT_INCORRECT, 'malformed OLE document, stream too large') 

642 # optimization(?): data is first a list of strings, and join() is called 

643 # at the end to concatenate all in one string. 

644 # (this may not be really useful with recent Python versions) 

645 data = [] 

646 # if size is zero, then first sector index should be ENDOFCHAIN: 

647 if size == 0 and sect != ENDOFCHAIN: 

648 log.debug('size == 0 and sect != ENDOFCHAIN:') 

649 self.ole._raise_defect(DEFECT_INCORRECT, 'incorrect OLE sector index for empty stream') 

650 # [PL] A fixed-length for loop is used instead of an undefined while 

651 # loop to avoid DoS attacks: 

652 for i in range(nb_sectors): 

653 log.debug('Reading stream sector[%d] = %Xh' % (i, sect)) 

654 # Sector index may be ENDOFCHAIN, but only if size was unknown 

655 if sect == ENDOFCHAIN: 

656 if unknown_size: 

657 log.debug('Reached ENDOFCHAIN sector for stream with unknown size') 

658 break 

659 else: 

660 # else this means that the stream is smaller than declared: 

661 log.debug('sect=ENDOFCHAIN before expected size') 

662 self.ole._raise_defect(DEFECT_INCORRECT, 'incomplete OLE stream') 

663 # sector index should be within FAT: 

664 if sect<0 or sect>=len(fat): 

665 log.debug('sect=%d (%X) / len(fat)=%d' % (sect, sect, len(fat))) 

666 log.debug('i=%d / nb_sectors=%d' %(i, nb_sectors)) 

667## tmp_data = b"".join(data) 

668## f = open('test_debug.bin', 'wb') 

669## f.write(tmp_data) 

670## f.close() 

671## log.debug('data read so far: %d bytes' % len(tmp_data)) 

672 self.ole._raise_defect(DEFECT_INCORRECT, 'incorrect OLE FAT, sector index out of range') 

673 # stop reading here if the exception is ignored: 

674 break 

675 # TODO: merge this code with OleFileIO.getsect() ? 

676 # TODO: check if this works with 4K sectors: 

677 try: 

678 fp.seek(offset + sectorsize * sect) 

679 except Exception: 

680 log.debug('sect=%d, seek=%d, filesize=%d' % 

681 (sect, offset+sectorsize*sect, filesize)) 

682 self.ole._raise_defect(DEFECT_INCORRECT, 'OLE sector index out of range') 

683 # stop reading here if the exception is ignored: 

684 break 

685 sector_data = fp.read(sectorsize) 

686 # [PL] check if there was enough data: 

687 # Note: if sector is the last of the file, sometimes it is not a 

688 # complete sector (of 512 or 4K), so we may read less than 

689 # sectorsize. 

690 if len(sector_data)!=sectorsize and sect!=(len(fat)-1): 

691 log.debug('sect=%d / len(fat)=%d, seek=%d / filesize=%d, len read=%d' % 

692 (sect, len(fat), offset+sectorsize*sect, filesize, len(sector_data))) 

693 log.debug('seek+len(read)=%d' % (offset+sectorsize*sect+len(sector_data))) 

694 self.ole._raise_defect(DEFECT_INCORRECT, 'incomplete OLE sector') 

695 data.append(sector_data) 

696 # jump to next sector in the FAT: 

697 try: 

698 sect = fat[sect] & 0xFFFFFFFF # JYTHON-WORKAROUND 

699 except IndexError: 

700 # [PL] if pointer is out of the FAT an exception is raised 

701 self.ole._raise_defect(DEFECT_INCORRECT, 'incorrect OLE FAT, sector index out of range') 

702 # stop reading here if the exception is ignored: 

703 break 

704 # [PL] Last sector should be a "end of chain" marker: 

705 # if sect != ENDOFCHAIN: 

706 # raise IOError('incorrect last sector index in OLE stream') 

707 data = b"".join(data) 

708 # Data is truncated to the actual stream size: 

709 if len(data) >= size: 

710 log.debug('Read data of length %d, truncated to stream size %d' % (len(data), size)) 

711 data = data[:size] 

712 # actual stream size is stored for future use: 

713 self.size = size 

714 elif unknown_size: 

715 # actual stream size was not known, now we know the size of read 

716 # data: 

717 log.debug('Read data of length %d, the stream size was unknown' % len(data)) 

718 self.size = len(data) 

719 else: 

720 # read data is less than expected: 

721 log.debug('Read data of length %d, less than expected stream size %d' % (len(data), size)) 

722 # TODO: provide details in exception message 

723 self.size = len(data) 

724 self.ole._raise_defect(DEFECT_INCORRECT, 'OLE stream size is less than declared') 

725 # when all data is read in memory, BytesIO constructor is called 

726 io.BytesIO.__init__(self, data) 

727 # Then the OleStream object can be used as a read-only file object. 

728 

729 

730# --- OleDirectoryEntry ------------------------------------------------------- 

731 

732class OleDirectoryEntry: 

733 """ 

734 OLE2 Directory Entry pointing to a stream or a storage 

735 """ 

736 # struct to parse directory entries: 

737 # <: little-endian byte order, standard sizes 

738 # (note: this should guarantee that Q returns a 64 bits int) 

739 # 64s: string containing entry name in unicode UTF-16 (max 31 chars) + null char = 64 bytes 

740 # H: uint16, number of bytes used in name buffer, including null = (len+1)*2 

741 # B: uint8, dir entry type (between 0 and 5) 

742 # B: uint8, color: 0=black, 1=red 

743 # I: uint32, index of left child node in the red-black tree, NOSTREAM if none 

744 # I: uint32, index of right child node in the red-black tree, NOSTREAM if none 

745 # I: uint32, index of child root node if it is a storage, else NOSTREAM 

746 # 16s: CLSID, unique identifier (only used if it is a storage) 

747 # I: uint32, user flags 

748 # Q (was 8s): uint64, creation timestamp or zero 

749 # Q (was 8s): uint64, modification timestamp or zero 

750 # I: uint32, SID of first sector if stream or ministream, SID of 1st sector 

751 # of stream containing ministreams if root entry, 0 otherwise 

752 # I: uint32, total stream size in bytes if stream (low 32 bits), 0 otherwise 

753 # I: uint32, total stream size in bytes if stream (high 32 bits), 0 otherwise 

754 STRUCT_DIRENTRY = '<64sHBBIII16sIQQIII' 

755 # size of a directory entry: 128 bytes 

756 DIRENTRY_SIZE = 128 

757 assert struct.calcsize(STRUCT_DIRENTRY) == DIRENTRY_SIZE 

758 

759 def __init__(self, entry, sid, ole_file): 

760 """ 

761 Constructor for an OleDirectoryEntry object. 

762 Parses a 128-bytes entry from the OLE Directory stream. 

763 

764 :param bytes entry: bytes string (must be 128 bytes long) 

765 :param int sid: index of this directory entry in the OLE file directory 

766 :param OleFileIO ole_file: OleFileIO object containing this directory entry 

767 """ 

768 self.sid = sid 

769 # ref to ole_file is stored for future use 

770 self.olefile = ole_file 

771 # kids is a list of children entries, if this entry is a storage: 

772 # (list of OleDirectoryEntry objects) 

773 self.kids = [] 

774 # kids_dict is a dictionary of children entries, indexed by their 

775 # name in lowercase: used to quickly find an entry, and to detect 

776 # duplicates 

777 self.kids_dict = {} 

778 # flag used to detect if the entry is referenced more than once in 

779 # directory: 

780 self.used = False 

781 # decode DirEntry 

782 ( 

783 self.name_raw, # 64s: string containing entry name in unicode UTF-16 (max 31 chars) + null char = 64 bytes 

784 self.namelength, # H: uint16, number of bytes used in name buffer, including null = (len+1)*2 

785 self.entry_type, 

786 self.color, 

787 self.sid_left, 

788 self.sid_right, 

789 self.sid_child, 

790 clsid, 

791 self.dwUserFlags, 

792 self.createTime, 

793 self.modifyTime, 

794 self.isectStart, 

795 self.sizeLow, 

796 self.sizeHigh 

797 ) = struct.unpack(OleDirectoryEntry.STRUCT_DIRENTRY, entry) 

798 if self.entry_type not in [STGTY_ROOT, STGTY_STORAGE, STGTY_STREAM, STGTY_EMPTY]: 

799 ole_file._raise_defect(DEFECT_INCORRECT, 'unhandled OLE storage type') 

800 # only first directory entry can (and should) be root: 

801 if self.entry_type == STGTY_ROOT and sid != 0: 

802 ole_file._raise_defect(DEFECT_INCORRECT, 'duplicate OLE root entry') 

803 if sid == 0 and self.entry_type != STGTY_ROOT: 

804 ole_file._raise_defect(DEFECT_INCORRECT, 'incorrect OLE root entry') 

805 # log.debug(struct.unpack(fmt_entry, entry[:len_entry])) 

806 # name should be at most 31 unicode characters + null character, 

807 # so 64 bytes in total (31*2 + 2): 

808 if self.namelength > 64: 

809 ole_file._raise_defect(DEFECT_INCORRECT, 'incorrect DirEntry name length >64 bytes') 

810 # if exception not raised, namelength is set to the maximum value: 

811 self.namelength = 64 

812 # only characters without ending null char are kept: 

813 self.name_utf16 = self.name_raw[:(self.namelength-2)] 

814 # TODO: check if the name is actually followed by a null unicode character ([MS-CFB] 2.6.1) 

815 # TODO: check if the name does not contain forbidden characters: 

816 # [MS-CFB] 2.6.1: "The following characters are illegal and MUST NOT be part of the name: '/', '\', ':', '!'." 

817 # name is converted from UTF-16LE to the path encoding specified in the OleFileIO: 

818 self.name = ole_file._decode_utf16_str(self.name_utf16) 

819 

820 log.debug('DirEntry SID=%d: %s' % (self.sid, repr(self.name))) 

821 log.debug(' - type: %d' % self.entry_type) 

822 log.debug(' - sect: %Xh' % self.isectStart) 

823 log.debug(' - SID left: %d, right: %d, child: %d' % (self.sid_left, 

824 self.sid_right, self.sid_child)) 

825 

826 # sizeHigh is only used for 4K sectors, it should be zero for 512 bytes 

827 # sectors, BUT apparently some implementations set it as 0xFFFFFFFF, 1 

828 # or some other value so it cannot be raised as a defect in general: 

829 if ole_file.sectorsize == 512: 

830 if self.sizeHigh != 0 and self.sizeHigh != 0xFFFFFFFF: 

831 log.debug('sectorsize=%d, sizeLow=%d, sizeHigh=%d (%X)' % 

832 (ole_file.sectorsize, self.sizeLow, self.sizeHigh, self.sizeHigh)) 

833 ole_file._raise_defect(DEFECT_UNSURE, 'incorrect OLE stream size') 

834 self.size = self.sizeLow 

835 else: 

836 self.size = self.sizeLow + (long(self.sizeHigh)<<32) 

837 log.debug(' - size: %d (sizeLow=%d, sizeHigh=%d)' % (self.size, self.sizeLow, self.sizeHigh)) 

838 

839 self.clsid = _clsid(clsid) 

840 # a storage should have a null size, BUT some implementations such as 

841 # Word 8 for Mac seem to allow non-null values => Potential defect: 

842 if self.entry_type == STGTY_STORAGE and self.size != 0: 

843 ole_file._raise_defect(DEFECT_POTENTIAL, 'OLE storage with size>0') 

844 # check if stream is not already referenced elsewhere: 

845 self.is_minifat = False 

846 if self.entry_type in (STGTY_ROOT, STGTY_STREAM) and self.size>0: 

847 if self.size < ole_file.minisectorcutoff \ 

848 and self.entry_type==STGTY_STREAM: # only streams can be in MiniFAT 

849 # ministream object 

850 self.is_minifat = True 

851 else: 

852 self.is_minifat = False 

853 ole_file._check_duplicate_stream(self.isectStart, self.is_minifat) 

854 self.sect_chain = None 

855 

856 def build_sect_chain(self, ole_file): 

857 """ 

858 Build the sector chain for a stream (from the FAT or the MiniFAT) 

859 

860 :param OleFileIO ole_file: OleFileIO object containing this directory entry 

861 :return: nothing 

862 """ 

863 # TODO: seems to be used only from _write_mini_stream, is it useful? 

864 # TODO: use self.olefile instead of ole_file 

865 if self.sect_chain: 

866 return 

867 if self.entry_type not in (STGTY_ROOT, STGTY_STREAM) or self.size == 0: 

868 return 

869 

870 self.sect_chain = list() 

871 

872 if self.is_minifat and not ole_file.minifat: 

873 ole_file.loadminifat() 

874 

875 next_sect = self.isectStart 

876 while next_sect != ENDOFCHAIN: 

877 self.sect_chain.append(next_sect) 

878 if self.is_minifat: 

879 next_sect = ole_file.minifat[next_sect] 

880 else: 

881 next_sect = ole_file.fat[next_sect] 

882 

883 def build_storage_tree(self): 

884 """ 

885 Read and build the red-black tree attached to this OleDirectoryEntry 

886 object, if it is a storage. 

887 Note that this method builds a tree of all subentries, so it should 

888 only be called for the root object once. 

889 """ 

890 log.debug('build_storage_tree: SID=%d - %s - sid_child=%d' 

891 % (self.sid, repr(self.name), self.sid_child)) 

892 if self.sid_child != NOSTREAM: 

893 # if child SID is not NOSTREAM, then this entry is a storage. 

894 # Let's walk through the tree of children to fill the kids list: 

895 self.append_kids(self.sid_child) 

896 

897 # Note from OpenOffice documentation: the safest way is to 

898 # recreate the tree because some implementations may store broken 

899 # red-black trees... 

900 

901 # in the OLE file, entries are sorted on (length, name). 

902 # for convenience, we sort them on name instead: 

903 # (see rich comparison methods in this class) 

904 self.kids.sort() 

905 

906 def append_kids(self, child_sid): 

907 """ 

908 Walk through red-black tree of children of this directory entry to add 

909 all of them to the kids list. (recursive method) 

910 

911 :param child_sid: index of child directory entry to use, or None when called 

912 first time for the root. (only used during recursion) 

913 """ 

914 log.debug('append_kids: child_sid=%d' % child_sid) 

915 # [PL] this method was added to use simple recursion instead of a complex 

916 # algorithm. 

917 # if this is not a storage or a leaf of the tree, nothing to do: 

918 if child_sid == NOSTREAM: 

919 return 

920 # check if child SID is in the proper range: 

921 if child_sid<0 or child_sid>=len(self.olefile.direntries): 

922 self.olefile._raise_defect(DEFECT_INCORRECT, 'OLE DirEntry index out of range') 

923 else: 

924 # get child direntry: 

925 child = self.olefile._load_direntry(child_sid) #direntries[child_sid] 

926 log.debug('append_kids: child_sid=%d - %s - sid_left=%d, sid_right=%d, sid_child=%d' 

927 % (child.sid, repr(child.name), child.sid_left, child.sid_right, child.sid_child)) 

928 # Check if kid was not already referenced in a storage: 

929 if child.used: 

930 self.olefile._raise_defect(DEFECT_INCORRECT, 

931 'OLE Entry referenced more than once') 

932 return 

933 child.used = True 

934 # the directory entries are organized as a red-black tree. 

935 # (cf. Wikipedia for details) 

936 # First walk through left side of the tree: 

937 self.append_kids(child.sid_left) 

938 # Check if its name is not already used (case-insensitive): 

939 name_lower = child.name.lower() 

940 if name_lower in self.kids_dict: 

941 self.olefile._raise_defect(DEFECT_INCORRECT, 

942 "Duplicate filename in OLE storage") 

943 # Then the child_sid OleDirectoryEntry object is appended to the 

944 # kids list and dictionary: 

945 self.kids.append(child) 

946 self.kids_dict[name_lower] = child 

947 # Finally walk through right side of the tree: 

948 self.append_kids(child.sid_right) 

949 # Afterwards build kid's own tree if it's also a storage: 

950 child.build_storage_tree() 

951 

952 def __eq__(self, other): 

953 "Compare entries by name" 

954 return self.name == other.name 

955 

956 def __lt__(self, other): 

957 "Compare entries by name" 

958 return self.name < other.name 

959 

960 def __ne__(self, other): 

961 return not self.__eq__(other) 

962 

963 def __le__(self, other): 

964 return self.__eq__(other) or self.__lt__(other) 

965 

966 # Reflected __lt__() and __le__() will be used for __gt__() and __ge__() 

967 

968 # TODO: replace by the same function as MS implementation ? 

969 # (order by name length first, then case-insensitive order) 

970 

971 def dump(self, tab = 0): 

972 "Dump this entry, and all its subentries (for debug purposes only)" 

973 TYPES = ["(invalid)", "(storage)", "(stream)", "(lockbytes)", 

974 "(property)", "(root)"] 

975 try: 

976 type_name = TYPES[self.entry_type] 

977 except IndexError: 

978 type_name = '(UNKNOWN)' 

979 print(" "*tab + repr(self.name), type_name, end=' ') 

980 if self.entry_type in (STGTY_STREAM, STGTY_ROOT): 

981 print(self.size, "bytes", end=' ') 

982 print() 

983 if self.entry_type in (STGTY_STORAGE, STGTY_ROOT) and self.clsid: 

984 print(" "*tab + "{%s}" % self.clsid) 

985 

986 for kid in self.kids: 

987 kid.dump(tab + 2) 

988 

989 def getmtime(self): 

990 """ 

991 Return modification time of a directory entry. 

992 

993 :returns: None if modification time is null, a python datetime object 

994 otherwise (UTC timezone) 

995 

996 new in version 0.26 

997 """ 

998 if self.modifyTime == 0: 

999 return None 

1000 return filetime2datetime(self.modifyTime) 

1001 

1002 

1003 def getctime(self): 

1004 """ 

1005 Return creation time of a directory entry. 

1006 

1007 :returns: None if modification time is null, a python datetime object 

1008 otherwise (UTC timezone) 

1009 

1010 new in version 0.26 

1011 """ 

1012 if self.createTime == 0: 

1013 return None 

1014 return filetime2datetime(self.createTime) 

1015 

1016 

1017#--- OleFileIO ---------------------------------------------------------------- 

1018 

1019class OleFileIO: 

1020 """ 

1021 OLE container object 

1022 

1023 This class encapsulates the interface to an OLE 2 structured 

1024 storage file. Use the listdir and openstream methods to 

1025 access the contents of this file. 

1026 

1027 Object names are given as a list of strings, one for each subentry 

1028 level. The root entry should be omitted. For example, the following 

1029 code extracts all image streams from a Microsoft Image Composer file:: 

1030 

1031 with OleFileIO("fan.mic") as ole: 

1032 

1033 for entry in ole.listdir(): 

1034 if entry[1:2] == "Image": 

1035 fin = ole.openstream(entry) 

1036 fout = open(entry[0:1], "wb") 

1037 while True: 

1038 s = fin.read(8192) 

1039 if not s: 

1040 break 

1041 fout.write(s) 

1042 

1043 You can use the viewer application provided with the Python Imaging 

1044 Library to view the resulting files (which happens to be standard 

1045 TIFF files). 

1046 """ 

1047 

1048 def __init__(self, filename=None, raise_defects=DEFECT_FATAL, 

1049 write_mode=False, debug=False, path_encoding=DEFAULT_PATH_ENCODING): 

1050 """ 

1051 Constructor for the OleFileIO class. 

1052 

1053 :param filename: file to open. 

1054 

1055 - if filename is a string smaller than 1536 bytes, it is the path 

1056 of the file to open. (bytes or unicode string) 

1057 - if filename is a string longer than 1535 bytes, it is parsed 

1058 as the content of an OLE file in memory. (bytes type only) 

1059 - if filename is a file-like object (with read, seek and tell methods), 

1060 it is parsed as-is. The caller is responsible for closing it when done. 

1061 

1062 :param raise_defects: minimal level for defects to be raised as exceptions. 

1063 (use DEFECT_FATAL for a typical application, DEFECT_INCORRECT for a 

1064 security-oriented application, see source code for details) 

1065 

1066 :param write_mode: bool, if True the file is opened in read/write mode instead 

1067 of read-only by default. 

1068 

1069 :param debug: bool, set debug mode (deprecated, not used anymore) 

1070 

1071 :param path_encoding: None or str, name of the codec to use for path 

1072 names (streams and storages), or None for Unicode. 

1073 Unicode by default on Python 3+, UTF-8 on Python 2.x. 

1074 (new in olefile 0.42, was hardcoded to Latin-1 until olefile v0.41) 

1075 """ 

1076 # minimal level for defects to be raised as exceptions: 

1077 self._raise_defects_level = raise_defects 

1078 #: list of defects/issues not raised as exceptions: 

1079 #: tuples of (exception type, message) 

1080 self.parsing_issues = [] 

1081 self.write_mode = write_mode 

1082 self.path_encoding = path_encoding 

1083 # initialize all attributes to default values: 

1084 self._filesize = None 

1085 self.ministream = None 

1086 self._used_streams_fat = [] 

1087 self._used_streams_minifat = [] 

1088 self.byte_order = None 

1089 self.directory_fp = None 

1090 self.direntries = None 

1091 self.dll_version = None 

1092 self.fat = None 

1093 self.first_difat_sector = None 

1094 self.first_dir_sector = None 

1095 self.first_mini_fat_sector = None 

1096 self.fp = None 

1097 self.header_clsid = None 

1098 self.header_signature = None 

1099 self.metadata = None 

1100 self.mini_sector_shift = None 

1101 self.mini_sector_size = None 

1102 self.mini_stream_cutoff_size = None 

1103 self.minifat = None 

1104 self.minifatsect = None 

1105 # TODO: duplicates? 

1106 self.minisectorcutoff = None 

1107 self.minisectorsize = None 

1108 self.ministream = None 

1109 self.minor_version = None 

1110 self.nb_sect = None 

1111 self.num_difat_sectors = None 

1112 self.num_dir_sectors = None 

1113 self.num_fat_sectors = None 

1114 self.num_mini_fat_sectors = None 

1115 self.reserved1 = None 

1116 self.reserved2 = None 

1117 self.root = None 

1118 self.sector_shift = None 

1119 self.sector_size = None 

1120 self.transaction_signature_number = None 

1121 self.warn_if_not_closed = False 

1122 self._we_opened_fp = False 

1123 self._open_stack = None 

1124 if filename: 

1125 # try opening, ensure fp is closed if that fails 

1126 try: 

1127 self.open(filename, write_mode=write_mode) 

1128 except Exception: 

1129 # caller has no chance of calling close() now 

1130 self._close(warn=False) 

1131 raise 

1132 

1133 def __del__(self): 

1134 """Destructor, ensures all file handles are closed that we opened.""" 

1135 self._close(warn=True) 

1136 # super(OleFileIO, self).__del__() # there's no super-class destructor 

1137 

1138 

1139 def __enter__(self): 

1140 return self 

1141 

1142 

1143 def __exit__(self, *args): 

1144 self._close(warn=False) 

1145 

1146 

1147 def _raise_defect(self, defect_level, message, exception_type=OleFileError): 

1148 """ 

1149 This method should be called for any defect found during file parsing. 

1150 It may raise an OleFileError exception according to the minimal level chosen 

1151 for the OleFileIO object. 

1152 

1153 :param defect_level: defect level, possible values are: 

1154 

1155 - DEFECT_UNSURE : a case which looks weird, but not sure it's a defect 

1156 - DEFECT_POTENTIAL : a potential defect 

1157 - DEFECT_INCORRECT : an error according to specifications, but parsing can go on 

1158 - DEFECT_FATAL : an error which cannot be ignored, parsing is impossible 

1159 

1160 :param message: string describing the defect, used with raised exception. 

1161 :param exception_type: exception class to be raised, OleFileError by default 

1162 """ 

1163 # added by [PL] 

1164 if defect_level >= self._raise_defects_level: 

1165 log.error(message) 

1166 raise exception_type(message) 

1167 else: 

1168 # just record the issue, no exception raised: 

1169 self.parsing_issues.append((exception_type, message)) 

1170 log.warning(message) 

1171 

1172 

1173 def _decode_utf16_str(self, utf16_str, errors='replace'): 

1174 """ 

1175 Decode a string encoded in UTF-16 LE format, as found in the OLE 

1176 directory or in property streams. Return a string encoded 

1177 according to the path_encoding specified for the OleFileIO object. 

1178 

1179 :param bytes utf16_str: bytes string encoded in UTF-16 LE format 

1180 :param str errors: str, see python documentation for str.decode() 

1181 :return: str, encoded according to path_encoding 

1182 :rtype: str 

1183 """ 

1184 unicode_str = utf16_str.decode('UTF-16LE', errors) 

1185 if self.path_encoding: 

1186 # an encoding has been specified for path names: 

1187 return unicode_str.encode(self.path_encoding, errors) 

1188 else: 

1189 # path_encoding=None, return the Unicode string as-is: 

1190 return unicode_str 

1191 

1192 

1193 def open(self, filename, write_mode=False): 

1194 """ 

1195 Open an OLE2 file in read-only or read/write mode. 

1196 Read and parse the header, FAT and directory. 

1197 

1198 :param filename: string-like or file-like object, OLE file to parse 

1199 

1200 - if filename is a string smaller than 1536 bytes, it is the path 

1201 of the file to open. (bytes or unicode string) 

1202 - if filename is a string longer than 1535 bytes, it is parsed 

1203 as the content of an OLE file in memory. (bytes type only) 

1204 - if filename is a file-like object (with read, seek and tell methods), 

1205 it is parsed as-is. The caller is responsible for closing it when done 

1206 

1207 :param write_mode: bool, if True the file is opened in read/write mode instead 

1208 of read-only by default. (ignored if filename is not a path) 

1209 """ 

1210 self.write_mode = write_mode 

1211 # [PL] check if filename is a string-like or file-like object: 

1212 # (it is better to check for a read() method) 

1213 if hasattr(filename, 'read'): 

1214 # TODO: also check seek and tell methods? 

1215 # file-like object: use it directly 

1216 self.fp = filename 

1217 elif isinstance(filename, bytes) and len(filename) >= MINIMAL_OLEFILE_SIZE: 

1218 # filename is a bytes string containing the OLE file to be parsed: 

1219 # convert it to BytesIO 

1220 self.fp = io.BytesIO(filename) 

1221 else: 

1222 # string-like object: filename of file on disk 

1223 if self.write_mode: 

1224 # open file in mode 'read with update, binary' 

1225 # According to https://docs.python.org/library/functions.html#open 

1226 # 'w' would truncate the file, 'a' may only append on some Unixes 

1227 mode = 'r+b' 

1228 else: 

1229 # read-only mode by default 

1230 mode = 'rb' 

1231 self.fp = open(filename, mode) 

1232 self._we_opened_fp = True 

1233 self._open_stack = traceback.extract_stack() # remember for warning 

1234 # obtain the filesize by using seek and tell, which should work on most 

1235 # file-like objects: 

1236 # TODO: do it above, using getsize with filename when possible? 

1237 # TODO: fix code to fail with clear exception when filesize cannot be obtained 

1238 filesize = 0 

1239 self.fp.seek(0, os.SEEK_END) 

1240 try: 

1241 filesize = self.fp.tell() 

1242 finally: 

1243 self.fp.seek(0) 

1244 self._filesize = filesize 

1245 log.debug('File size: %d bytes (%Xh)' % (self._filesize, self._filesize)) 

1246 

1247 # lists of streams in FAT and MiniFAT, to detect duplicate references 

1248 # (list of indexes of first sectors of each stream) 

1249 self._used_streams_fat = [] 

1250 self._used_streams_minifat = [] 

1251 

1252 header = self.fp.read(512) 

1253 

1254 if len(header) != 512 or header[:8] != MAGIC: 

1255 log.debug('Magic = {!r} instead of {!r}'.format(header[:8], MAGIC)) 

1256 self._raise_defect(DEFECT_FATAL, "not an OLE2 structured storage file", NotOleFileError) 

1257 

1258 # [PL] header structure according to AAF specifications: 

1259 ##Header 

1260 ##struct StructuredStorageHeader { // [offset from start (bytes), length (bytes)] 

1261 ##BYTE _abSig[8]; // [00H,08] {0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 

1262 ## // 0x1a, 0xe1} for current version 

1263 ##CLSID _clsid; // [08H,16] reserved must be zero (WriteClassStg/ 

1264 ## // GetClassFile uses root directory class id) 

1265 ##USHORT _uMinorVersion; // [18H,02] minor version of the format: 33 is 

1266 ## // written by reference implementation 

1267 ##USHORT _uDllVersion; // [1AH,02] major version of the dll/format: 3 for 

1268 ## // 512-byte sectors, 4 for 4 KB sectors 

1269 ##USHORT _uByteOrder; // [1CH,02] 0xFFFE: indicates Intel byte-ordering 

1270 ##USHORT _uSectorShift; // [1EH,02] size of sectors in power-of-two; 

1271 ## // typically 9 indicating 512-byte sectors 

1272 ##USHORT _uMiniSectorShift; // [20H,02] size of mini-sectors in power-of-two; 

1273 ## // typically 6 indicating 64-byte mini-sectors 

1274 ##USHORT _usReserved; // [22H,02] reserved, must be zero 

1275 ##ULONG _ulReserved1; // [24H,04] reserved, must be zero 

1276 ##FSINDEX _csectDir; // [28H,04] must be zero for 512-byte sectors, 

1277 ## // number of SECTs in directory chain for 4 KB 

1278 ## // sectors 

1279 ##FSINDEX _csectFat; // [2CH,04] number of SECTs in the FAT chain 

1280 ##SECT _sectDirStart; // [30H,04] first SECT in the directory chain 

1281 ##DFSIGNATURE _signature; // [34H,04] signature used for transactions; must 

1282 ## // be zero. The reference implementation 

1283 ## // does not support transactions 

1284 ##ULONG _ulMiniSectorCutoff; // [38H,04] maximum size for a mini stream; 

1285 ## // typically 4096 bytes 

1286 ##SECT _sectMiniFatStart; // [3CH,04] first SECT in the MiniFAT chain 

1287 ##FSINDEX _csectMiniFat; // [40H,04] number of SECTs in the MiniFAT chain 

1288 ##SECT _sectDifStart; // [44H,04] first SECT in the DIFAT chain 

1289 ##FSINDEX _csectDif; // [48H,04] number of SECTs in the DIFAT chain 

1290 ##SECT _sectFat[109]; // [4CH,436] the SECTs of first 109 FAT sectors 

1291 ##}; 

1292 

1293 # [PL] header decoding: 

1294 # '<' indicates little-endian byte ordering for Intel (cf. struct module help) 

1295 fmt_header = '<8s16sHHHHHHLLLLLLLLLL' 

1296 header_size = struct.calcsize(fmt_header) 

1297 log.debug( "fmt_header size = %d, +FAT = %d" % (header_size, header_size + 109*4) ) 

1298 header1 = header[:header_size] 

1299 ( 

1300 self.header_signature, 

1301 self.header_clsid, 

1302 self.minor_version, 

1303 self.dll_version, 

1304 self.byte_order, 

1305 self.sector_shift, 

1306 self.mini_sector_shift, 

1307 self.reserved1, 

1308 self.reserved2, 

1309 self.num_dir_sectors, 

1310 self.num_fat_sectors, 

1311 self.first_dir_sector, 

1312 self.transaction_signature_number, 

1313 self.mini_stream_cutoff_size, 

1314 self.first_mini_fat_sector, 

1315 self.num_mini_fat_sectors, 

1316 self.first_difat_sector, 

1317 self.num_difat_sectors 

1318 ) = struct.unpack(fmt_header, header1) 

1319 log.debug( struct.unpack(fmt_header, header1)) 

1320 

1321 if self.header_signature != MAGIC: 

1322 # OLE signature should always be present 

1323 self._raise_defect(DEFECT_FATAL, "incorrect OLE signature") 

1324 if self.header_clsid != bytearray(16): 

1325 # according to AAF specs, CLSID should always be zero 

1326 self._raise_defect(DEFECT_INCORRECT, "incorrect CLSID in OLE header") 

1327 log.debug( "Minor Version = %d" % self.minor_version ) 

1328 # TODO: according to MS-CFB, minor version should be 0x003E 

1329 log.debug( "DLL Version = %d (expected: 3 or 4)" % self.dll_version ) 

1330 if self.dll_version not in [3, 4]: 

1331 # version 3: usual format, 512 bytes per sector 

1332 # version 4: large format, 4K per sector 

1333 self._raise_defect(DEFECT_INCORRECT, "incorrect DllVersion in OLE header") 

1334 log.debug( "Byte Order = %X (expected: FFFE)" % self.byte_order ) 

1335 if self.byte_order != 0xFFFE: 

1336 # For now only common little-endian documents are handled correctly 

1337 self._raise_defect(DEFECT_INCORRECT, "incorrect ByteOrder in OLE header") 

1338 # TODO: add big-endian support for documents created on Mac ? 

1339 # But according to [MS-CFB] ? v20140502, ByteOrder MUST be 0xFFFE. 

1340 self.sector_size = 2**self.sector_shift 

1341 log.debug( "Sector Size = %d bytes (expected: 512 or 4096)" % self.sector_size ) 

1342 if self.sector_size not in [512, 4096]: 

1343 self._raise_defect(DEFECT_INCORRECT, "incorrect sector_size in OLE header") 

1344 if (self.dll_version==3 and self.sector_size!=512) \ 

1345 or (self.dll_version==4 and self.sector_size!=4096): 

1346 self._raise_defect(DEFECT_INCORRECT, "sector_size does not match DllVersion in OLE header") 

1347 self.mini_sector_size = 2**self.mini_sector_shift 

1348 log.debug( "MiniFAT Sector Size = %d bytes (expected: 64)" % self.mini_sector_size ) 

1349 if self.mini_sector_size not in [64]: 

1350 self._raise_defect(DEFECT_INCORRECT, "incorrect mini_sector_size in OLE header") 

1351 if self.reserved1 != 0 or self.reserved2 != 0: 

1352 self._raise_defect(DEFECT_INCORRECT, "incorrect OLE header (non-null reserved bytes)") 

1353 log.debug( "Number of Directory sectors = %d" % self.num_dir_sectors ) 

1354 # Number of directory sectors (only allowed if DllVersion != 3) 

1355 if self.sector_size==512 and self.num_dir_sectors!=0: 

1356 self._raise_defect(DEFECT_INCORRECT, "incorrect number of directory sectors in OLE header") 

1357 log.debug( "Number of FAT sectors = %d" % self.num_fat_sectors ) 

1358 # num_fat_sectors = number of FAT sectors in the file 

1359 log.debug( "First Directory sector = %Xh" % self.first_dir_sector ) 

1360 # first_dir_sector = 1st sector containing the directory 

1361 log.debug( "Transaction Signature Number = %d" % self.transaction_signature_number ) 

1362 # Signature should be zero, BUT some implementations do not follow this 

1363 # rule => only a potential defect: 

1364 # (according to MS-CFB, may be != 0 for applications supporting file 

1365 # transactions) 

1366 if self.transaction_signature_number != 0: 

1367 self._raise_defect(DEFECT_POTENTIAL, "incorrect OLE header (transaction_signature_number>0)") 

1368 log.debug( "Mini Stream cutoff size = %Xh (expected: 1000h)" % self.mini_stream_cutoff_size ) 

1369 # MS-CFB: This integer field MUST be set to 0x00001000. This field 

1370 # specifies the maximum size of a user-defined data stream allocated 

1371 # from the mini FAT and mini stream, and that cutoff is 4096 bytes. 

1372 # Any user-defined data stream larger than or equal to this cutoff size 

1373 # must be allocated as normal sectors from the FAT. 

1374 if self.mini_stream_cutoff_size != 0x1000: 

1375 self._raise_defect(DEFECT_INCORRECT, "incorrect mini_stream_cutoff_size in OLE header") 

1376 # if no exception is raised, the cutoff size is fixed to 0x1000 

1377 log.warning('Fixing the mini_stream_cutoff_size to 4096 (mandatory value) instead of %d' % 

1378 self.mini_stream_cutoff_size) 

1379 self.mini_stream_cutoff_size = 0x1000 

1380 # TODO: check if these values are OK 

1381 log.debug( "First MiniFAT sector = %Xh" % self.first_mini_fat_sector ) 

1382 log.debug( "Number of MiniFAT sectors = %d" % self.num_mini_fat_sectors ) 

1383 log.debug( "First DIFAT sector = %Xh" % self.first_difat_sector ) 

1384 log.debug( "Number of DIFAT sectors = %d" % self.num_difat_sectors ) 

1385 

1386 # calculate the number of sectors in the file 

1387 # (-1 because header doesn't count) 

1388 self.nb_sect = ( (filesize + self.sector_size-1) // self.sector_size) - 1 

1389 log.debug( "Maximum number of sectors in the file: %d (%Xh)" % (self.nb_sect, self.nb_sect)) 

1390 # TODO: change this test, because an OLE file MAY contain other data 

1391 # after the last sector. 

1392 

1393 # file clsid 

1394 self.header_clsid = _clsid(header[8:24]) 

1395 

1396 # TODO: remove redundant attributes, and fix the code which uses them? 

1397 self.sectorsize = self.sector_size #1 << i16(header, 30) 

1398 self.minisectorsize = self.mini_sector_size #1 << i16(header, 32) 

1399 self.minisectorcutoff = self.mini_stream_cutoff_size # i32(header, 56) 

1400 

1401 # check known streams for duplicate references (these are always in FAT, 

1402 # never in MiniFAT): 

1403 self._check_duplicate_stream(self.first_dir_sector) 

1404 # check MiniFAT only if it is not empty: 

1405 if self.num_mini_fat_sectors: 

1406 self._check_duplicate_stream(self.first_mini_fat_sector) 

1407 # check DIFAT only if it is not empty: 

1408 if self.num_difat_sectors: 

1409 self._check_duplicate_stream(self.first_difat_sector) 

1410 

1411 # Load file allocation tables 

1412 self.loadfat(header) 

1413 # Load directory. This sets both the direntries list (ordered by sid) 

1414 # and the root (ordered by hierarchy) members. 

1415 self.loaddirectory(self.first_dir_sector) 

1416 self.minifatsect = self.first_mini_fat_sector 

1417 

1418 def close(self): 

1419 """ 

1420 close the OLE file, release the file object if we created it ourselves. 

1421 

1422 Leaves the file handle open if it was provided by the caller. 

1423 """ 

1424 self._close(warn=False) 

1425 

1426 def _close(self, warn=False): 

1427 """Implementation of close() with internal arg `warn`.""" 

1428 if self._we_opened_fp: 

1429 if warn and self.warn_if_not_closed: 

1430 # we only raise a warning if the file was not explicitly closed, 

1431 # and if the option warn_if_not_closed is enabled 

1432 warnings.warn(OleFileIONotClosed(self._open_stack)) 

1433 self.fp.close() 

1434 self._we_opened_fp = False 

1435 

1436 def _check_duplicate_stream(self, first_sect, minifat=False): 

1437 """ 

1438 Checks if a stream has not been already referenced elsewhere. 

1439 This method should only be called once for each known stream, and only 

1440 if stream size is not null. 

1441 

1442 :param first_sect: int, index of first sector of the stream in FAT 

1443 :param minifat: bool, if True, stream is located in the MiniFAT, else in the FAT 

1444 """ 

1445 if minifat: 

1446 log.debug('_check_duplicate_stream: sect=%Xh in MiniFAT' % first_sect) 

1447 used_streams = self._used_streams_minifat 

1448 else: 

1449 log.debug('_check_duplicate_stream: sect=%Xh in FAT' % first_sect) 

1450 # some values can be safely ignored (not a real stream): 

1451 if first_sect in (DIFSECT,FATSECT,ENDOFCHAIN,FREESECT): 

1452 return 

1453 used_streams = self._used_streams_fat 

1454 # TODO: would it be more efficient using a dict or hash values, instead 

1455 # of a list of long ? 

1456 if first_sect in used_streams: 

1457 self._raise_defect(DEFECT_INCORRECT, 'Stream referenced twice') 

1458 else: 

1459 used_streams.append(first_sect) 

1460 

1461 def dumpfat(self, fat, firstindex=0): 

1462 """ 

1463 Display a part of FAT in human-readable form for debugging purposes 

1464 """ 

1465 # dictionary to convert special FAT values in human-readable strings 

1466 VPL = 8 # values per line (8+1 * 8+1 = 81) 

1467 fatnames = { 

1468 FREESECT: "..free..", 

1469 ENDOFCHAIN: "[ END. ]", 

1470 FATSECT: "FATSECT ", 

1471 DIFSECT: "DIFSECT " 

1472 } 

1473 nbsect = len(fat) 

1474 nlines = (nbsect+VPL-1)//VPL 

1475 print("index", end=" ") 

1476 for i in range(VPL): 

1477 print("%8X" % i, end=" ") 

1478 print() 

1479 for l in range(nlines): 

1480 index = l*VPL 

1481 print("%6X:" % (firstindex+index), end=" ") 

1482 for i in range(index, index+VPL): 

1483 if i>=nbsect: 

1484 break 

1485 sect = fat[i] 

1486 aux = sect & 0xFFFFFFFF # JYTHON-WORKAROUND 

1487 if aux in fatnames: 

1488 name = fatnames[aux] 

1489 else: 

1490 if sect == i+1: 

1491 name = " --->" 

1492 else: 

1493 name = "%8X" % sect 

1494 print(name, end=" ") 

1495 print() 

1496 

1497 def dumpsect(self, sector, firstindex=0): 

1498 """ 

1499 Display a sector in a human-readable form, for debugging purposes 

1500 """ 

1501 VPL=8 # number of values per line (8+1 * 8+1 = 81) 

1502 tab = array.array(UINT32, sector) 

1503 if sys.byteorder == 'big': 

1504 tab.byteswap() 

1505 nbsect = len(tab) 

1506 nlines = (nbsect+VPL-1)//VPL 

1507 print("index", end=" ") 

1508 for i in range(VPL): 

1509 print("%8X" % i, end=" ") 

1510 print() 

1511 for l in range(nlines): 

1512 index = l*VPL 

1513 print("%6X:" % (firstindex+index), end=" ") 

1514 for i in range(index, index+VPL): 

1515 if i>=nbsect: 

1516 break 

1517 sect = tab[i] 

1518 name = "%8X" % sect 

1519 print(name, end=" ") 

1520 print() 

1521 

1522 def sect2array(self, sect): 

1523 """ 

1524 convert a sector to an array of 32 bits unsigned integers, 

1525 swapping bytes on big endian CPUs such as PowerPC (old Macs) 

1526 """ 

1527 # TODO: make this a static function 

1528 a = array.array(UINT32, sect) 

1529 # if CPU is big endian, swap bytes: 

1530 if sys.byteorder == 'big': 

1531 a.byteswap() 

1532 return a 

1533 

1534 def loadfat_sect(self, sect): 

1535 """ 

1536 Adds the indexes of the given sector to the FAT 

1537 

1538 :param sect: string containing the first FAT sector, or array of long integers 

1539 :returns: index of last FAT sector. 

1540 """ 

1541 # a FAT sector is an array of ulong integers. 

1542 if isinstance(sect, array.array): 

1543 # if sect is already an array it is directly used 

1544 fat1 = sect 

1545 else: 

1546 # if it's a raw sector, it is parsed in an array 

1547 fat1 = self.sect2array(sect) 

1548 # Display the sector contents only if the logging level is debug: 

1549 if log.isEnabledFor(logging.DEBUG): 

1550 self.dumpsect(sect) 

1551 # The FAT is a sector chain starting at the first index of itself. 

1552 # initialize isect, just in case: 

1553 isect = None 

1554 for isect in fat1: 

1555 isect = isect & 0xFFFFFFFF # JYTHON-WORKAROUND 

1556 log.debug("isect = %X" % isect) 

1557 if isect == ENDOFCHAIN or isect == FREESECT: 

1558 # the end of the sector chain has been reached 

1559 log.debug("found end of sector chain") 

1560 break 

1561 # read the FAT sector 

1562 s = self.getsect(isect) 

1563 # parse it as an array of 32 bits integers, and add it to the 

1564 # global FAT array 

1565 nextfat = self.sect2array(s) 

1566 self.fat = self.fat + nextfat 

1567 return isect 

1568 

1569 def loadfat(self, header): 

1570 """ 

1571 Load the FAT table. 

1572 """ 

1573 # The 1st sector of the file contains sector numbers for the first 109 

1574 # FAT sectors, right after the header which is 76 bytes long. 

1575 # (always 109, whatever the sector size: 512 bytes = 76+4*109) 

1576 # Additional sectors are described by DIF blocks 

1577 

1578 log.debug('Loading the FAT table, starting with the 1st sector after the header') 

1579 sect = header[76:512] 

1580 log.debug( "len(sect)=%d, so %d integers" % (len(sect), len(sect)//4) ) 

1581 # fat = [] 

1582 # FAT is an array of 32 bits unsigned ints, it's more effective 

1583 # to use an array than a list in Python. 

1584 # It's initialized as empty first: 

1585 self.fat = array.array(UINT32) 

1586 self.loadfat_sect(sect) 

1587 # self.dumpfat(self.fat) 

1588 # for i in range(0, len(sect), 4): 

1589 # ix = i32(sect, i) 

1590 # # [PL] if ix == -2 or ix == -1: # ix == 0xFFFFFFFE or ix == 0xFFFFFFFF: 

1591 # if ix == 0xFFFFFFFE or ix == 0xFFFFFFFF: 

1592 # break 

1593 # s = self.getsect(ix) 

1594 # # fat = fat + [i32(s, i) for i in range(0, len(s), 4)] 

1595 # fat = fat + array.array(UINT32, s) 

1596 if self.num_difat_sectors != 0: 

1597 log.debug('DIFAT is used, because file size > 6.8MB.') 

1598 # [PL] There's a DIFAT because file is larger than 6.8MB 

1599 # some checks just in case: 

1600 if self.num_fat_sectors <= 109: 

1601 # there must be at least 109 blocks in header and the rest in 

1602 # DIFAT, so number of sectors must be >109. 

1603 self._raise_defect(DEFECT_INCORRECT, 'incorrect DIFAT, not enough sectors') 

1604 if self.first_difat_sector >= self.nb_sect: 

1605 # initial DIFAT block index must be valid 

1606 self._raise_defect(DEFECT_FATAL, 'incorrect DIFAT, first index out of range') 

1607 log.debug( "DIFAT analysis..." ) 

1608 # We compute the necessary number of DIFAT sectors : 

1609 # Number of pointers per DIFAT sector = (sectorsize/4)-1 

1610 # (-1 because the last pointer is the next DIFAT sector number) 

1611 nb_difat_sectors = (self.sectorsize//4)-1 

1612 # (if 512 bytes: each DIFAT sector = 127 pointers + 1 towards next DIFAT sector) 

1613 nb_difat = (self.num_fat_sectors-109 + nb_difat_sectors-1)//nb_difat_sectors 

1614 log.debug( "nb_difat = %d" % nb_difat ) 

1615 if self.num_difat_sectors != nb_difat: 

1616 raise IOError('incorrect DIFAT') 

1617 isect_difat = self.first_difat_sector 

1618 for i in iterrange(nb_difat): 

1619 log.debug( "DIFAT block %d, sector %X" % (i, isect_difat) ) 

1620 # TODO: check if corresponding FAT SID = DIFSECT 

1621 sector_difat = self.getsect(isect_difat) 

1622 difat = self.sect2array(sector_difat) 

1623 # Display the sector contents only if the logging level is debug: 

1624 if log.isEnabledFor(logging.DEBUG): 

1625 self.dumpsect(sector_difat) 

1626 self.loadfat_sect(difat[:nb_difat_sectors]) 

1627 # last DIFAT pointer is next DIFAT sector: 

1628 isect_difat = difat[nb_difat_sectors] 

1629 log.debug( "next DIFAT sector: %X" % isect_difat ) 

1630 # checks: 

1631 if isect_difat not in [ENDOFCHAIN, FREESECT]: 

1632 # last DIFAT pointer value must be ENDOFCHAIN or FREESECT 

1633 raise IOError('incorrect end of DIFAT') 

1634 # if len(self.fat) != self.num_fat_sectors: 

1635 # # FAT should contain num_fat_sectors blocks 

1636 # print("FAT length: %d instead of %d" % (len(self.fat), self.num_fat_sectors)) 

1637 # raise IOError('incorrect DIFAT') 

1638 else: 

1639 log.debug('No DIFAT, because file size < 6.8MB.') 

1640 # since FAT is read from fixed-size sectors, it may contain more values 

1641 # than the actual number of sectors in the file. 

1642 # Keep only the relevant sector indexes: 

1643 if len(self.fat) > self.nb_sect: 

1644 log.debug('len(fat)=%d, shrunk to nb_sect=%d' % (len(self.fat), self.nb_sect)) 

1645 self.fat = self.fat[:self.nb_sect] 

1646 log.debug('FAT references %d sectors / Maximum %d sectors in file' % (len(self.fat), self.nb_sect)) 

1647 # Display the FAT contents only if the logging level is debug: 

1648 if log.isEnabledFor(logging.DEBUG): 

1649 log.debug('\nFAT:') 

1650 self.dumpfat(self.fat) 

1651 

1652 def loadminifat(self): 

1653 """ 

1654 Load the MiniFAT table. 

1655 """ 

1656 # MiniFAT is stored in a standard sub-stream, pointed to by a header 

1657 # field. 

1658 # NOTE: there are two sizes to take into account for this stream: 

1659 # 1) Stream size is calculated according to the number of sectors 

1660 # declared in the OLE header. This allocated stream may be more than 

1661 # needed to store the actual sector indexes. 

1662 # (self.num_mini_fat_sectors is the number of sectors of size self.sector_size) 

1663 stream_size = self.num_mini_fat_sectors * self.sector_size 

1664 # 2) Actually used size is calculated by dividing the MiniStream size 

1665 # (given by root entry size) by the size of mini sectors, *4 for 

1666 # 32 bits indexes: 

1667 nb_minisectors = (self.root.size + self.mini_sector_size-1) // self.mini_sector_size 

1668 used_size = nb_minisectors * 4 

1669 log.debug('loadminifat(): minifatsect=%d, nb FAT sectors=%d, used_size=%d, stream_size=%d, nb MiniSectors=%d' % 

1670 (self.minifatsect, self.num_mini_fat_sectors, used_size, stream_size, nb_minisectors)) 

1671 if used_size > stream_size: 

1672 # This is not really a problem, but may indicate a wrong implementation: 

1673 self._raise_defect(DEFECT_INCORRECT, 'OLE MiniStream is larger than MiniFAT') 

1674 # In any case, first read stream_size: 

1675 s = self._open(self.minifatsect, stream_size, force_FAT=True).read() 

1676 # [PL] Old code replaced by an array: 

1677 #self.minifat = [i32(s, i) for i in range(0, len(s), 4)] 

1678 self.minifat = self.sect2array(s) 

1679 # Then shrink the array to used size, to avoid indexes out of MiniStream: 

1680 log.debug('MiniFAT shrunk from %d to %d sectors' % (len(self.minifat), nb_minisectors)) 

1681 self.minifat = self.minifat[:nb_minisectors] 

1682 log.debug('loadminifat(): len=%d' % len(self.minifat)) 

1683 # Display the FAT contents only if the logging level is debug: 

1684 if log.isEnabledFor(logging.DEBUG): 

1685 log.debug('\nMiniFAT:') 

1686 self.dumpfat(self.minifat) 

1687 

1688 def getsect(self, sect): 

1689 """ 

1690 Read given sector from file on disk. 

1691 

1692 :param sect: int, sector index 

1693 :returns: a string containing the sector data. 

1694 """ 

1695 # From [MS-CFB]: A sector number can be converted into a byte offset 

1696 # into the file by using the following formula: 

1697 # (sector number + 1) x Sector Size. 

1698 # This implies that sector #0 of the file begins at byte offset Sector 

1699 # Size, not at 0. 

1700 

1701 # [PL] the original code in PIL was wrong when sectors are 4KB instead of 

1702 # 512 bytes: 

1703 #self.fp.seek(512 + self.sectorsize * sect) 

1704 # [PL]: added safety checks: 

1705 #print("getsect(%X)" % sect) 

1706 try: 

1707 self.fp.seek(self.sectorsize * (sect+1)) 

1708 except Exception: 

1709 log.debug('getsect(): sect=%X, seek=%d, filesize=%d' % 

1710 (sect, self.sectorsize*(sect+1), self._filesize)) 

1711 self._raise_defect(DEFECT_FATAL, 'OLE sector index out of range') 

1712 sector = self.fp.read(self.sectorsize) 

1713 if len(sector) != self.sectorsize: 

1714 log.debug('getsect(): sect=%X, read=%d, sectorsize=%d' % 

1715 (sect, len(sector), self.sectorsize)) 

1716 self._raise_defect(DEFECT_FATAL, 'incomplete OLE sector') 

1717 return sector 

1718 

1719 def write_sect(self, sect, data, padding=b'\x00'): 

1720 """ 

1721 Write given sector to file on disk. 

1722 

1723 :param sect: int, sector index 

1724 :param data: bytes, sector data 

1725 :param padding: single byte, padding character if data < sector size 

1726 """ 

1727 if not isinstance(data, bytes): 

1728 raise TypeError("write_sect: data must be a bytes string") 

1729 if not isinstance(padding, bytes) or len(padding)!=1: 

1730 raise TypeError("write_sect: padding must be a bytes string of 1 char") 

1731 # TODO: we could allow padding=None for no padding at all 

1732 try: 

1733 self.fp.seek(self.sectorsize * (sect+1)) 

1734 except Exception: 

1735 log.debug('write_sect(): sect=%X, seek=%d, filesize=%d' % 

1736 (sect, self.sectorsize*(sect+1), self._filesize)) 

1737 self._raise_defect(DEFECT_FATAL, 'OLE sector index out of range') 

1738 if len(data) < self.sectorsize: 

1739 # add padding 

1740 data += padding * (self.sectorsize - len(data)) 

1741 elif len(data) > self.sectorsize: 

1742 raise ValueError("Data is larger than sector size") 

1743 self.fp.write(data) 

1744 

1745 def _write_mini_sect(self, fp_pos, data, padding = b'\x00'): 

1746 """ 

1747 Write given sector to file on disk. 

1748 

1749 :param fp_pos: int, file position 

1750 :param data: bytes, sector data 

1751 :param padding: single byte, padding character if data < sector size 

1752 """ 

1753 if not isinstance(data, bytes): 

1754 raise TypeError("write_mini_sect: data must be a bytes string") 

1755 if not isinstance(padding, bytes) or len(padding) != 1: 

1756 raise TypeError("write_mini_sect: padding must be a bytes string of 1 char") 

1757 

1758 try: 

1759 self.fp.seek(fp_pos) 

1760 except Exception: 

1761 log.debug('write_mini_sect(): fp_pos=%d, filesize=%d' % 

1762 (fp_pos, self._filesize)) 

1763 self._raise_defect(DEFECT_FATAL, 'OLE sector index out of range') 

1764 len_data = len(data) 

1765 if len_data < self.mini_sector_size: 

1766 data += padding * (self.mini_sector_size - len_data) 

1767 if self.mini_sector_size < len_data: 

1768 raise ValueError("Data is larger than sector size") 

1769 self.fp.write(data) 

1770 

1771 def loaddirectory(self, sect): 

1772 """ 

1773 Load the directory. 

1774 

1775 :param sect: sector index of directory stream. 

1776 """ 

1777 log.debug('Loading the Directory:') 

1778 # The directory is stored in a standard 

1779 # substream, independent of its size. 

1780 

1781 # open directory stream as a read-only file: 

1782 # (stream size is not known in advance) 

1783 self.directory_fp = self._open(sect, force_FAT=True) 

1784 

1785 # [PL] to detect malformed documents and avoid DoS attacks, the maximum 

1786 # number of directory entries can be calculated: 

1787 max_entries = self.directory_fp.size // 128 

1788 log.debug('loaddirectory: size=%d, max_entries=%d' % 

1789 (self.directory_fp.size, max_entries)) 

1790 

1791 # Create list of directory entries 

1792 # self.direntries = [] 

1793 # We start with a list of "None" object 

1794 self.direntries = [None] * max_entries 

1795 # for sid in iterrange(max_entries): 

1796 # entry = fp.read(128) 

1797 # if not entry: 

1798 # break 

1799 # self.direntries.append(OleDirectoryEntry(entry, sid, self)) 

1800 # load root entry: 

1801 root_entry = self._load_direntry(0) 

1802 # Root entry is the first entry: 

1803 self.root = self.direntries[0] 

1804 # TODO: read ALL directory entries (ignore bad entries?) 

1805 # TODO: adapt build_storage_tree to avoid duplicate reads 

1806 # for i in range(1, max_entries): 

1807 # self._load_direntry(i) 

1808 # read and build all storage trees, starting from the root: 

1809 self.root.build_storage_tree() 

1810 

1811 def _load_direntry (self, sid): 

1812 """ 

1813 Load a directory entry from the directory. 

1814 This method should only be called once for each storage/stream when 

1815 loading the directory. 

1816 

1817 :param sid: index of storage/stream in the directory. 

1818 :returns: a OleDirectoryEntry object 

1819 

1820 :exception OleFileError: if the entry has always been referenced. 

1821 """ 

1822 # check if SID is OK: 

1823 if sid<0 or sid>=len(self.direntries): 

1824 self._raise_defect(DEFECT_FATAL, "OLE directory index out of range") 

1825 # check if entry was already referenced: 

1826 if self.direntries[sid] is not None: 

1827 self._raise_defect(DEFECT_INCORRECT, 

1828 "double reference for OLE stream/storage") 

1829 # if exception not raised, return the object 

1830 return self.direntries[sid] 

1831 self.directory_fp.seek(sid * 128) 

1832 entry = self.directory_fp.read(128) 

1833 self.direntries[sid] = OleDirectoryEntry(entry, sid, self) 

1834 return self.direntries[sid] 

1835 

1836 def dumpdirectory(self): 

1837 """ 

1838 Dump directory (for debugging only) 

1839 """ 

1840 self.root.dump() 

1841 

1842 def _open(self, start, size = UNKNOWN_SIZE, force_FAT=False): 

1843 """ 

1844 Open a stream, either in FAT or MiniFAT according to its size. 

1845 (openstream helper) 

1846 

1847 :param start: index of first sector 

1848 :param size: size of stream (or nothing if size is unknown) 

1849 :param force_FAT: if False (default), stream will be opened in FAT or MiniFAT 

1850 according to size. If True, it will always be opened in FAT. 

1851 """ 

1852 log.debug('OleFileIO.open(): sect=%Xh, size=%d, force_FAT=%s' % 

1853 (start, size, str(force_FAT))) 

1854 # stream size is compared to the mini_stream_cutoff_size threshold: 

1855 if size < self.minisectorcutoff and not force_FAT: 

1856 # ministream object 

1857 if not self.ministream: 

1858 # load MiniFAT if it wasn't already done: 

1859 self.loadminifat() 

1860 # The first sector index of the miniFAT stream is stored in the 

1861 # root directory entry: 

1862 size_ministream = self.root.size 

1863 log.debug('Opening MiniStream: sect=%Xh, size=%d' % 

1864 (self.root.isectStart, size_ministream)) 

1865 self.ministream = self._open(self.root.isectStart, 

1866 size_ministream, force_FAT=True) 

1867 return OleStream(fp=self.ministream, sect=start, size=size, 

1868 offset=0, sectorsize=self.minisectorsize, 

1869 fat=self.minifat, filesize=self.ministream.size, 

1870 olefileio=self) 

1871 else: 

1872 # standard stream 

1873 return OleStream(fp=self.fp, sect=start, size=size, 

1874 offset=self.sectorsize, 

1875 sectorsize=self.sectorsize, fat=self.fat, 

1876 filesize=self._filesize, 

1877 olefileio=self) 

1878 

1879 def _list(self, files, prefix, node, streams=True, storages=False): 

1880 """ 

1881 listdir helper 

1882 

1883 :param files: list of files to fill in 

1884 :param prefix: current location in storage tree (list of names) 

1885 :param node: current node (OleDirectoryEntry object) 

1886 :param streams: bool, include streams if True (True by default) - new in v0.26 

1887 :param storages: bool, include storages if True (False by default) - new in v0.26 

1888 (note: the root storage is never included) 

1889 """ 

1890 prefix = prefix + [node.name] 

1891 for entry in node.kids: 

1892 if entry.entry_type == STGTY_STORAGE: 

1893 # this is a storage 

1894 if storages: 

1895 # add it to the list 

1896 files.append(prefix[1:] + [entry.name]) 

1897 # check its kids 

1898 self._list(files, prefix, entry, streams, storages) 

1899 elif entry.entry_type == STGTY_STREAM: 

1900 # this is a stream 

1901 if streams: 

1902 # add it to the list 

1903 files.append(prefix[1:] + [entry.name]) 

1904 else: 

1905 self._raise_defect(DEFECT_INCORRECT, 'The directory tree contains an entry which is not a stream nor a storage.') 

1906 

1907 def listdir(self, streams=True, storages=False): 

1908 """ 

1909 Return a list of streams and/or storages stored in this file 

1910 

1911 :param streams: bool, include streams if True (True by default) - new in v0.26 

1912 :param storages: bool, include storages if True (False by default) - new in v0.26 

1913 (note: the root storage is never included) 

1914 :returns: list of stream and/or storage paths 

1915 """ 

1916 files = [] 

1917 self._list(files, [], self.root, streams, storages) 

1918 return files 

1919 

1920 def _find(self, filename): 

1921 """ 

1922 Returns directory entry of given filename. (openstream helper) 

1923 Note: this method is case-insensitive. 

1924 

1925 :param filename: path of stream in storage tree (except root entry), either: 

1926 

1927 - a string using Unix path syntax, for example: 

1928 'storage_1/storage_1.2/stream' 

1929 - or a list of storage filenames, path to the desired stream/storage. 

1930 Example: ['storage_1', 'storage_1.2', 'stream'] 

1931 

1932 :returns: sid of requested filename 

1933 :exception IOError: if file not found 

1934 """ 

1935 

1936 # if filename is a string instead of a list, split it on slashes to 

1937 # convert to a list: 

1938 if isinstance(filename, basestring): 

1939 filename = filename.split('/') 

1940 # walk across storage tree, following given path: 

1941 node = self.root 

1942 for name in filename: 

1943 for kid in node.kids: 

1944 if kid.name.lower() == name.lower(): 

1945 break 

1946 else: 

1947 raise IOError("file not found") 

1948 node = kid 

1949 return node.sid 

1950 

1951 def openstream(self, filename): 

1952 """ 

1953 Open a stream as a read-only file object (BytesIO). 

1954 Note: filename is case-insensitive. 

1955 

1956 :param filename: path of stream in storage tree (except root entry), either: 

1957 

1958 - a string using Unix path syntax, for example: 

1959 'storage_1/storage_1.2/stream' 

1960 - or a list of storage filenames, path to the desired stream/storage. 

1961 Example: ['storage_1', 'storage_1.2', 'stream'] 

1962 

1963 :returns: file object (read-only) 

1964 :exception IOError: if filename not found, or if this is not a stream. 

1965 """ 

1966 sid = self._find(filename) 

1967 entry = self.direntries[sid] 

1968 if entry.entry_type != STGTY_STREAM: 

1969 raise IOError("this file is not a stream") 

1970 return self._open(entry.isectStart, entry.size) 

1971 

1972 def _write_mini_stream(self, entry, data_to_write): 

1973 if not entry.sect_chain: 

1974 entry.build_sect_chain(self) 

1975 nb_sectors = len(entry.sect_chain) 

1976 

1977 if not self.root.sect_chain: 

1978 self.root.build_sect_chain(self) 

1979 block_size = self.sector_size // self.mini_sector_size 

1980 for idx, sect in enumerate(entry.sect_chain): 

1981 sect_base = sect // block_size 

1982 sect_offset = sect % block_size 

1983 fp_pos = (self.root.sect_chain[sect_base] + 1)*self.sector_size + sect_offset*self.mini_sector_size 

1984 if idx < (nb_sectors - 1): 

1985 data_per_sector = data_to_write[idx * self.mini_sector_size: (idx + 1) * self.mini_sector_size] 

1986 else: 

1987 data_per_sector = data_to_write[idx * self.mini_sector_size:] 

1988 self._write_mini_sect(fp_pos, data_per_sector) 

1989 

1990 def write_stream(self, stream_name, data): 

1991 """ 

1992 Write a stream to disk. For now, it is only possible to replace an 

1993 existing stream by data of the same size. 

1994 

1995 :param stream_name: path of stream in storage tree (except root entry), either: 

1996 

1997 - a string using Unix path syntax, for example: 

1998 'storage_1/storage_1.2/stream' 

1999 - or a list of storage filenames, path to the desired stream/storage. 

2000 Example: ['storage_1', 'storage_1.2', 'stream'] 

2001 

2002 :param data: bytes, data to be written, must be the same size as the original 

2003 stream. 

2004 """ 

2005 if not isinstance(data, bytes): 

2006 raise TypeError("write_stream: data must be a bytes string") 

2007 sid = self._find(stream_name) 

2008 entry = self.direntries[sid] 

2009 if entry.entry_type != STGTY_STREAM: 

2010 raise IOError("this is not a stream") 

2011 size = entry.size 

2012 if size != len(data): 

2013 raise ValueError("write_stream: data must be the same size as the existing stream") 

2014 if size < self.minisectorcutoff and entry.entry_type != STGTY_ROOT: 

2015 return self._write_mini_stream(entry = entry, data_to_write = data) 

2016 

2017 sect = entry.isectStart 

2018 # number of sectors to write 

2019 nb_sectors = (size + (self.sectorsize-1)) // self.sectorsize 

2020 log.debug('nb_sectors = %d' % nb_sectors) 

2021 for i in range(nb_sectors): 

2022 # try: 

2023 # self.fp.seek(offset + self.sectorsize * sect) 

2024 # except Exception: 

2025 # log.debug('sect=%d, seek=%d' % 

2026 # (sect, offset+self.sectorsize*sect)) 

2027 # raise IOError('OLE sector index out of range') 

2028 # extract one sector from data, the last one being smaller: 

2029 if i<(nb_sectors-1): 

2030 data_sector = data [i*self.sectorsize : (i+1)*self.sectorsize] 

2031 # TODO: comment this if it works 

2032 assert(len(data_sector)==self.sectorsize) 

2033 else: 

2034 data_sector = data [i*self.sectorsize:] 

2035 # TODO: comment this if it works 

2036 log.debug('write_stream: size=%d sectorsize=%d data_sector=%Xh size%%sectorsize=%d' 

2037 % (size, self.sectorsize, len(data_sector), size % self.sectorsize)) 

2038 assert(len(data_sector) % self.sectorsize==size % self.sectorsize) 

2039 self.write_sect(sect, data_sector) 

2040 # self.fp.write(data_sector) 

2041 # jump to next sector in the FAT: 

2042 try: 

2043 sect = self.fat[sect] 

2044 except IndexError: 

2045 # [PL] if pointer is out of the FAT an exception is raised 

2046 raise IOError('incorrect OLE FAT, sector index out of range') 

2047 # [PL] Last sector should be a "end of chain" marker: 

2048 if sect != ENDOFCHAIN: 

2049 raise IOError('incorrect last sector index in OLE stream') 

2050 

2051 def get_type(self, filename): 

2052 """ 

2053 Test if given filename exists as a stream or a storage in the OLE 

2054 container, and return its type. 

2055 

2056 :param filename: path of stream in storage tree. (see openstream for syntax) 

2057 :returns: False if object does not exist, its entry type (>0) otherwise: 

2058 

2059 - STGTY_STREAM: a stream 

2060 - STGTY_STORAGE: a storage 

2061 - STGTY_ROOT: the root entry 

2062 """ 

2063 try: 

2064 sid = self._find(filename) 

2065 entry = self.direntries[sid] 

2066 return entry.entry_type 

2067 except Exception: 

2068 return False 

2069 

2070 def getclsid(self, filename): 

2071 """ 

2072 Return clsid of a stream/storage. 

2073 

2074 :param filename: path of stream/storage in storage tree. (see openstream for 

2075 syntax) 

2076 :returns: Empty string if clsid is null, a printable representation of the clsid otherwise 

2077 

2078 new in version 0.44 

2079 """ 

2080 sid = self._find(filename) 

2081 entry = self.direntries[sid] 

2082 return entry.clsid 

2083 

2084 def getmtime(self, filename): 

2085 """ 

2086 Return modification time of a stream/storage. 

2087 

2088 :param filename: path of stream/storage in storage tree. (see openstream for 

2089 syntax) 

2090 :returns: None if modification time is null, a python datetime object 

2091 otherwise (UTC timezone) 

2092 

2093 new in version 0.26 

2094 """ 

2095 sid = self._find(filename) 

2096 entry = self.direntries[sid] 

2097 return entry.getmtime() 

2098 

2099 def getctime(self, filename): 

2100 """ 

2101 Return creation time of a stream/storage. 

2102 

2103 :param filename: path of stream/storage in storage tree. (see openstream for 

2104 syntax) 

2105 :returns: None if creation time is null, a python datetime object 

2106 otherwise (UTC timezone) 

2107 

2108 new in version 0.26 

2109 """ 

2110 sid = self._find(filename) 

2111 entry = self.direntries[sid] 

2112 return entry.getctime() 

2113 

2114 def exists(self, filename): 

2115 """ 

2116 Test if given filename exists as a stream or a storage in the OLE 

2117 container. 

2118 Note: filename is case-insensitive. 

2119 

2120 :param filename: path of stream in storage tree. (see openstream for syntax) 

2121 :returns: True if object exist, else False. 

2122 """ 

2123 try: 

2124 sid = self._find(filename) 

2125 return True 

2126 except Exception: 

2127 return False 

2128 

2129 def get_size(self, filename): 

2130 """ 

2131 Return size of a stream in the OLE container, in bytes. 

2132 

2133 :param filename: path of stream in storage tree (see openstream for syntax) 

2134 :returns: size in bytes (long integer) 

2135 :exception IOError: if file not found 

2136 :exception TypeError: if this is not a stream. 

2137 """ 

2138 sid = self._find(filename) 

2139 entry = self.direntries[sid] 

2140 if entry.entry_type != STGTY_STREAM: 

2141 # TODO: Should it return zero instead of raising an exception ? 

2142 raise TypeError('object is not an OLE stream') 

2143 return entry.size 

2144 

2145 def get_rootentry_name(self): 

2146 """ 

2147 Return root entry name. Should usually be 'Root Entry' or 'R' in most 

2148 implementations. 

2149 """ 

2150 return self.root.name 

2151 

2152 def getproperties(self, filename, convert_time=False, no_conversion=None): 

2153 """ 

2154 Return properties described in substream. 

2155 

2156 :param filename: path of stream in storage tree (see openstream for syntax) 

2157 :param convert_time: bool, if True timestamps will be converted to Python datetime 

2158 :param no_conversion: None or list of int, timestamps not to be converted 

2159 (for example total editing time is not a real timestamp) 

2160 

2161 :returns: a dictionary of values indexed by id (integer) 

2162 """ 

2163 #REFERENCE: [MS-OLEPS] https://msdn.microsoft.com/en-us/library/dd942421.aspx 

2164 # make sure no_conversion is a list, just to simplify code below: 

2165 if no_conversion == None: 

2166 no_conversion = [] 

2167 # stream path as a string to report exceptions: 

2168 streampath = filename 

2169 if not isinstance(streampath, str): 

2170 streampath = '/'.join(streampath) 

2171 fp = self.openstream(filename) 

2172 data = {} 

2173 try: 

2174 # header 

2175 s = fp.read(28) 

2176 clsid = _clsid(s[8:24]) 

2177 # format id 

2178 s = fp.read(20) 

2179 fmtid = _clsid(s[:16]) 

2180 fp.seek(i32(s, 16)) 

2181 # get section 

2182 s = b"****" + fp.read(i32(fp.read(4))-4) 

2183 # number of properties: 

2184 num_props = i32(s, 4) 

2185 except BaseException as exc: 

2186 # catch exception while parsing property header, and only raise 

2187 # a DEFECT_INCORRECT then return an empty dict, because this is not 

2188 # a fatal error when parsing the whole file 

2189 msg = 'Error while parsing properties header in stream {}: {}'.format( 

2190 repr(streampath), exc) 

2191 self._raise_defect(DEFECT_INCORRECT, msg, type(exc)) 

2192 return data 

2193 # clamp num_props based on the data length 

2194 num_props = min(num_props, int(len(s) / 8)) 

2195 for i in iterrange(num_props): 

2196 property_id = 0 # just in case of an exception 

2197 try: 

2198 property_id = i32(s, 8+i*8) 

2199 offset = i32(s, 12+i*8) 

2200 property_type = i32(s, offset) 

2201 

2202 vt_name = VT.get(property_type, 'UNKNOWN') 

2203 log.debug('property id=%d: type=%d/%s offset=%X' % (property_id, property_type, vt_name, offset)) 

2204 

2205 value = self._parse_property(s, offset+4, property_id, property_type, convert_time, no_conversion) 

2206 data[property_id] = value 

2207 except BaseException as exc: 

2208 # catch exception while parsing each property, and only raise 

2209 # a DEFECT_INCORRECT, because parsing can go on 

2210 msg = 'Error while parsing property id %d in stream %s: %s' % ( 

2211 property_id, repr(streampath), exc) 

2212 self._raise_defect(DEFECT_INCORRECT, msg, type(exc)) 

2213 

2214 return data 

2215 

2216 def _parse_property(self, s, offset, property_id, property_type, convert_time, no_conversion): 

2217 v = None 

2218 if property_type <= VT_BLOB or property_type in (VT_CLSID, VT_CF): 

2219 v, _ = self._parse_property_basic(s, offset, property_id, property_type, convert_time, no_conversion) 

2220 elif property_type == VT_VECTOR | VT_VARIANT: 

2221 log.debug('property_type == VT_VECTOR | VT_VARIANT') 

2222 off = 4 

2223 count = i32(s, offset) 

2224 values = [] 

2225 for _ in range(count): 

2226 property_type = i32(s, offset + off) 

2227 v, sz = self._parse_property_basic(s, offset + off + 4, property_id, property_type, convert_time, no_conversion) 

2228 values.append(v) 

2229 off += sz + 4 

2230 v = values 

2231 

2232 elif property_type & VT_VECTOR: 

2233 property_type_base = property_type & ~VT_VECTOR 

2234 log.debug('property_type == VT_VECTOR | %s' % VT.get(property_type_base, 'UNKNOWN')) 

2235 off = 4 

2236 count = i32(s, offset) 

2237 values = [] 

2238 for _ in range(count): 

2239 v, sz = self._parse_property_basic(s, offset + off, property_id, property_type & ~VT_VECTOR, convert_time, no_conversion) 

2240 values.append(v) 

2241 off += sz 

2242 v = values 

2243 else: 

2244 log.debug('property id=%d: type=%d not implemented in parser yet' % (property_id, property_type)) 

2245 return v 

2246 

2247 def _parse_property_basic(self, s, offset, property_id, property_type, convert_time, no_conversion): 

2248 value = None 

2249 size = 0 

2250 # test for common types first (should perhaps use 

2251 # a dictionary instead?) 

2252 

2253 if property_type == VT_I2: # 16-bit signed integer 

2254 value = i16(s, offset) 

2255 if value >= 32768: 

2256 value = value - 65536 

2257 size = 2 

2258 elif property_type == VT_UI2: # 2-byte unsigned integer 

2259 value = i16(s, offset) 

2260 size = 2 

2261 elif property_type in (VT_I4, VT_INT, VT_ERROR): 

2262 # VT_I4: 32-bit signed integer 

2263 # VT_ERROR: HRESULT, similar to 32-bit signed integer, 

2264 # see https://msdn.microsoft.com/en-us/library/cc230330.aspx 

2265 value = i32(s, offset) 

2266 size = 4 

2267 elif property_type in (VT_UI4, VT_UINT): # 4-byte unsigned integer 

2268 value = i32(s, offset) # FIXME 

2269 size = 4 

2270 elif property_type in (VT_BSTR, VT_LPSTR): 

2271 # CodePageString, see https://msdn.microsoft.com/en-us/library/dd942354.aspx 

2272 # size is a 32 bits integer, including the null terminator, and 

2273 # possibly trailing or embedded null chars 

2274 #TODO: if codepage is unicode, the string should be converted as such 

2275 count = i32(s, offset) 

2276 value = s[offset+4:offset+4+count-1] 

2277 # remove all null chars: 

2278 value = value.replace(b'\x00', b'') 

2279 size = 4 + count 

2280 elif property_type == VT_BLOB: 

2281 # binary large object (BLOB) 

2282 # see https://msdn.microsoft.com/en-us/library/dd942282.aspx 

2283 count = i32(s, offset) 

2284 value = s[offset+4:offset+4+count] 

2285 size = 4 + count 

2286 elif property_type == VT_LPWSTR: 

2287 # UnicodeString 

2288 # see https://msdn.microsoft.com/en-us/library/dd942313.aspx 

2289 # "the string should NOT contain embedded or additional trailing 

2290 # null characters." 

2291 count = i32(s, offset+4) 

2292 value = self._decode_utf16_str(s[offset+4:offset+4+count*2]) 

2293 size = 4 + count * 2 

2294 elif property_type == VT_FILETIME: 

2295 value = long(i32(s, offset)) + (long(i32(s, offset+4))<<32) 

2296 # FILETIME is a 64-bit int: "number of 100ns periods 

2297 # since Jan 1,1601". 

2298 if convert_time and property_id not in no_conversion: 

2299 log.debug('Converting property #%d to python datetime, value=%d=%fs' 

2300 %(property_id, value, float(value)/10000000)) 

2301 # convert FILETIME to Python datetime.datetime 

2302 # inspired from https://code.activestate.com/recipes/511425-filetime-to-datetime/ 

2303 _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) 

2304 log.debug('timedelta days=%d' % (value//(10*1000000*3600*24))) 

2305 value = _FILETIME_null_date + datetime.timedelta(microseconds=value//10) 

2306 else: 

2307 # legacy code kept for backward compatibility: returns a 

2308 # number of seconds since Jan 1,1601 

2309 value = value // 10000000 # seconds 

2310 size = 8 

2311 elif property_type == VT_UI1: # 1-byte unsigned integer 

2312 value = i8(s[offset]) 

2313 size = 1 

2314 elif property_type == VT_CLSID: 

2315 value = _clsid(s[offset:offset+16]) 

2316 size = 16 

2317 elif property_type == VT_CF: 

2318 # PropertyIdentifier or ClipboardData?? 

2319 # see https://msdn.microsoft.com/en-us/library/dd941945.aspx 

2320 count = i32(s, offset) 

2321 value = s[offset+4:offset+4+count] 

2322 size = 4 + count 

2323 elif property_type == VT_BOOL: 

2324 # VARIANT_BOOL, 16 bits bool, 0x0000=Fals, 0xFFFF=True 

2325 # see https://msdn.microsoft.com/en-us/library/cc237864.aspx 

2326 value = bool(i16(s, offset)) 

2327 size = 2 

2328 else: 

2329 value = None # everything else yields "None" 

2330 log.debug('property id=%d: type=%d not implemented in parser yet' % (property_id, property_type)) 

2331 

2332 # missing: VT_EMPTY, VT_NULL, VT_R4, VT_R8, VT_CY, VT_DATE, 

2333 # VT_DECIMAL, VT_I1, VT_I8, VT_UI8, 

2334 # see https://msdn.microsoft.com/en-us/library/dd942033.aspx 

2335 

2336 #print("%08x" % property_id, repr(value), end=" ") 

2337 #print("(%s)" % VT[i32(s, offset) & 0xFFF]) 

2338 return value, size 

2339 

2340 

2341 def get_metadata(self): 

2342 """ 

2343 Parse standard properties streams, return an OleMetadata object 

2344 containing all the available metadata. 

2345 (also stored in the metadata attribute of the OleFileIO object) 

2346 

2347 new in version 0.25 

2348 """ 

2349 self.metadata = OleMetadata() 

2350 self.metadata.parse_properties(self) 

2351 return self.metadata 

2352 

2353 def get_userdefined_properties(self, filename, convert_time=False, no_conversion=None): 

2354 """ 

2355 Return properties described in substream. 

2356 

2357 :param filename: path of stream in storage tree (see openstream for syntax) 

2358 :param convert_time: bool, if True timestamps will be converted to Python datetime 

2359 :param no_conversion: None or list of int, timestamps not to be converted 

2360 (for example total editing time is not a real timestamp) 

2361 

2362 :returns: a dictionary of values indexed by id (integer) 

2363 """ 

2364 # REFERENCE: [MS-OLEPS] https://msdn.microsoft.com/en-us/library/dd942421.aspx 

2365 # REFERENCE: https://docs.microsoft.com/en-us/openspecs/office_file_formats/ms-oshared/2ea8be67-a4a0-4e2e-b42f-49a182645562 

2366 #'D5CDD502-2E9C-101B-9397-08002B2CF9AE' 

2367 # TODO: testing the code more rigorously 

2368 # TODO: adding exception handeling 

2369 FMTID_USERDEFINED_PROPERTIES = _clsid(b'\x05\xD5\xCD\xD5\x9C\x2E\x1B\x10\x93\x97\x08\x00\x2B\x2C\xF9\xAE') 

2370 

2371 # make sure no_conversion is a list, just to simplify code below: 

2372 if no_conversion == None: 

2373 no_conversion = [] 

2374 # stream path as a string to report exceptions: 

2375 streampath = filename 

2376 if not isinstance(streampath, str): 

2377 streampath = '/'.join(streampath) 

2378 

2379 fp = self.openstream(filename) 

2380 

2381 data = [] 

2382 

2383 # header 

2384 s = fp.read(28) 

2385 clsid = _clsid(s[8:24]) 

2386 

2387 # PropertySetStream.cSections (4 bytes starts at 1c): number of property sets in this stream 

2388 sections_count = i32(s, 24) 

2389 

2390 section_file_pointers = [] 

2391 

2392 try: 

2393 for i in range(sections_count): 

2394 # format id 

2395 s = fp.read(20) 

2396 fmtid = _clsid(s[:16]) 

2397 

2398 if fmtid == FMTID_USERDEFINED_PROPERTIES: 

2399 file_pointer = i32(s, 16) 

2400 fp.seek(file_pointer) 

2401 # read saved sections 

2402 s = b"****" + fp.read(i32(fp.read(4)) - 4) 

2403 # number of properties: 

2404 num_props = i32(s, 4) 

2405 

2406 PropertyIdentifierAndOffset = s[8: 8+8*num_props] 

2407 

2408 # property names (dictionary) 

2409 # ref: https://docs.microsoft.com/en-us/openspecs/windows_protocols/MS-OLEPS/99127b7f-c440-4697-91a4-c853086d6b33 

2410 index = 8+8*num_props 

2411 entry_count = i32(s[index: index+4]) 

2412 index += 4 

2413 for i in range(entry_count): 

2414 identifier = s[index: index +4] 

2415 str_size = i32(s[index+4: index + 8]) 

2416 string = s[index+8: index+8+str_size].decode('utf_8').strip('\0') 

2417 data.append({'property_name':string, 'value':None}) 

2418 index = index+8+str_size 

2419 # clamp num_props based on the data length 

2420 num_props = min(num_props, int(len(s) / 8)) 

2421 

2422 # property values 

2423 # ref: https://docs.microsoft.com/en-us/openspecs/windows_protocols/MS-OLEPS/f122b9d7-e5cf-4484-8466-83f6fd94b3cc 

2424 for i in iterrange(2, num_props): 

2425 property_id = 0 # just in case of an exception 

2426 try: 

2427 property_id = i32(s, 8 + i * 8) 

2428 offset = i32(s, 12 + i * 8) 

2429 property_type = i32(s, offset) 

2430 

2431 vt_name = VT.get(property_type, 'UNKNOWN') 

2432 log.debug('property id=%d: type=%d/%s offset=%X' % (property_id, property_type, vt_name, offset)) 

2433 

2434 # test for common types first (should perhaps use 

2435 # a dictionary instead?) 

2436 

2437 if property_type == VT_I2: # 16-bit signed integer 

2438 value = i16(s, offset + 4) 

2439 if value >= 32768: 

2440 value = value - 65536 

2441 elif property_type == 1: 

2442 # supposed to be VT_NULL but seems it is not NULL 

2443 str_size = i32(s, offset + 8) 

2444 value = s[offset + 12:offset + 12 + str_size - 1] 

2445 

2446 elif property_type == VT_UI2: # 2-byte unsigned integer 

2447 value = i16(s, offset + 4) 

2448 elif property_type in (VT_I4, VT_INT, VT_ERROR): 

2449 # VT_I4: 32-bit signed integer 

2450 # VT_ERROR: HRESULT, similar to 32-bit signed integer, 

2451 # see https://msdn.microsoft.com/en-us/library/cc230330.aspx 

2452 value = i32(s, offset + 4) 

2453 elif property_type in (VT_UI4, VT_UINT): # 4-byte unsigned integer 

2454 value = i32(s, offset + 4) # FIXME 

2455 elif property_type in (VT_BSTR, VT_LPSTR): 

2456 # CodePageString, see https://msdn.microsoft.com/en-us/library/dd942354.aspx 

2457 # size is a 32 bits integer, including the null terminator, and 

2458 # possibly trailing or embedded null chars 

2459 # TODO: if codepage is unicode, the string should be converted as such 

2460 count = i32(s, offset + 4) 

2461 value = s[offset + 8:offset + 8 + count - 1] 

2462 # remove all null chars: 

2463 value = value.replace(b'\x00', b'') 

2464 elif property_type == VT_BLOB: 

2465 # binary large object (BLOB) 

2466 # see https://msdn.microsoft.com/en-us/library/dd942282.aspx 

2467 count = i32(s, offset + 4) 

2468 value = s[offset + 8:offset + 8 + count] 

2469 elif property_type == VT_LPWSTR: 

2470 # UnicodeString 

2471 # see https://msdn.microsoft.com/en-us/library/dd942313.aspx 

2472 # "the string should NOT contain embedded or additional trailing 

2473 # null characters." 

2474 count = i32(s, offset + 4) 

2475 value = self._decode_utf16_str(s[offset + 8:offset + 8 + count * 2]) 

2476 elif property_type == VT_FILETIME: 

2477 value = long(i32(s, offset + 4)) + (long(i32(s, offset + 8)) << 32) 

2478 # FILETIME is a 64-bit int: "number of 100ns periods 

2479 # since Jan 1,1601". 

2480 if convert_time and property_id not in no_conversion: 

2481 log.debug('Converting property #%d to python datetime, value=%d=%fs' 

2482 % (property_id, value, float(value) / 10000000)) 

2483 # convert FILETIME to Python datetime.datetime 

2484 # inspired from https://code.activestate.com/recipes/511425-filetime-to-datetime/ 

2485 _FILETIME_null_date = datetime.datetime(1601, 1, 1, 0, 0, 0) 

2486 log.debug('timedelta days=%d' % (value // (10 * 1000000 * 3600 * 24))) 

2487 value = _FILETIME_null_date + datetime.timedelta(microseconds=value // 10) 

2488 else: 

2489 # legacy code kept for backward compatibility: returns a 

2490 # number of seconds since Jan 1,1601 

2491 value = value // 10000000 # seconds 

2492 elif property_type == VT_UI1: # 1-byte unsigned integer 

2493 value = i8(s[offset + 4]) 

2494 elif property_type == VT_CLSID: 

2495 value = _clsid(s[offset + 4:offset + 20]) 

2496 elif property_type == VT_CF: 

2497 # PropertyIdentifier or ClipboardData?? 

2498 # see https://msdn.microsoft.com/en-us/library/dd941945.aspx 

2499 count = i32(s, offset + 4) 

2500 value = s[offset + 8:offset + 8 + count] 

2501 elif property_type == VT_BOOL: 

2502 # VARIANT_BOOL, 16 bits bool, 0x0000=Fals, 0xFFFF=True 

2503 # see https://msdn.microsoft.com/en-us/library/cc237864.aspx 

2504 value = bool(i16(s, offset + 4)) 

2505 else: 

2506 value = None # everything else yields "None" 

2507 log.debug( 

2508 'property id=%d: type=%d not implemented in parser yet' % (property_id, property_type)) 

2509 

2510 # missing: VT_EMPTY, VT_NULL, VT_R4, VT_R8, VT_CY, VT_DATE, 

2511 # VT_DECIMAL, VT_I1, VT_I8, VT_UI8, 

2512 # see https://msdn.microsoft.com/en-us/library/dd942033.aspx 

2513 

2514 # FIXME: add support for VT_VECTOR 

2515 # VT_VECTOR is a 32 uint giving the number of items, followed by 

2516 # the items in sequence. The VT_VECTOR value is combined with the 

2517 # type of items, e.g. VT_VECTOR|VT_BSTR 

2518 # see https://msdn.microsoft.com/en-us/library/dd942011.aspx 

2519 

2520 # print("%08x" % property_id, repr(value), end=" ") 

2521 # print("(%s)" % VT[i32(s, offset) & 0xFFF]) 

2522 

2523 data[i-2]['value']=value 

2524 except BaseException as exc: 

2525 # catch exception while parsing each property, and only raise 

2526 # a DEFECT_INCORRECT, because parsing can go on 

2527 msg = 'Error while parsing property id %d in stream %s: %s' % ( 

2528 property_id, repr(streampath), exc) 

2529 self._raise_defect(DEFECT_INCORRECT, msg, type(exc)) 

2530 

2531 except BaseException as exc: 

2532 # catch exception while parsing property header, and only raise 

2533 # a DEFECT_INCORRECT then return an empty dict, because this is not 

2534 # a fatal error when parsing the whole file 

2535 msg = 'Error while parsing properties header in stream %s: %s' % ( 

2536 repr(streampath), exc) 

2537 self._raise_defect(DEFECT_INCORRECT, msg, type(exc)) 

2538 return data 

2539 

2540 return data 

2541 

2542 

2543# -------------------------------------------------------------------- 

2544# This script can be used to dump the directory of any OLE2 structured 

2545# storage file. 

2546 

2547def main(): 

2548 """ 

2549 Main function when olefile is runs as a script from the command line. 

2550 This will open an OLE2 file and display its structure and properties 

2551 :return: nothing 

2552 """ 

2553 import sys, optparse 

2554 

2555 DEFAULT_LOG_LEVEL = "warning" # Default log level 

2556 LOG_LEVELS = { 

2557 'debug': logging.DEBUG, 

2558 'info': logging.INFO, 

2559 'warning': logging.WARNING, 

2560 'error': logging.ERROR, 

2561 'critical': logging.CRITICAL 

2562 } 

2563 

2564 usage = 'usage: %prog [options] <filename> [filename2 ...]' 

2565 parser = optparse.OptionParser(usage=usage) 

2566 

2567 parser.add_option("-c", action="store_true", dest="check_streams", 

2568 help='check all streams (for debugging purposes)') 

2569 parser.add_option("-p", action="store_true", dest="extract_customprop", 

2570 help='extract all user-defined propertires') 

2571 parser.add_option("-d", action="store_true", dest="debug_mode", 

2572 help='debug mode, shortcut for -l debug (displays a lot of debug information, for developers only)') 

2573 parser.add_option('-l', '--loglevel', dest="loglevel", action="store", default=DEFAULT_LOG_LEVEL, 

2574 help="logging level debug/info/warning/error/critical (default=%default)") 

2575 

2576 (options, args) = parser.parse_args() 

2577 

2578 print('olefile version {} {} - https://www.decalage.info/en/olefile\n'.format(__version__, __date__)) 

2579 

2580 # Print help if no arguments are passed 

2581 if len(args) == 0: 

2582 print(__doc__) 

2583 parser.print_help() 

2584 sys.exit() 

2585 

2586 if options.debug_mode: 

2587 options.loglevel = 'debug' 

2588 

2589 # setup logging to the console 

2590 logging.basicConfig(level=LOG_LEVELS[options.loglevel], format='%(levelname)-8s %(message)s') 

2591 

2592 # also enable the module's logger: 

2593 enable_logging() 

2594 

2595 for filename in args: 

2596 try: 

2597 ole = OleFileIO(filename)#, raise_defects=DEFECT_INCORRECT) 

2598 print("-" * 68) 

2599 print(filename) 

2600 print("-" * 68) 

2601 ole.dumpdirectory() 

2602 for streamname in ole.listdir(): 

2603 if streamname[-1][0] == "\005": 

2604 print("%r: properties" % streamname) 

2605 try: 

2606 props = ole.getproperties(streamname, convert_time=True) 

2607 props = sorted(props.items()) 

2608 for k, v in props: 

2609 # [PL]: avoid to display too large or binary values: 

2610 if isinstance(v, (basestring, bytes)): 

2611 if len(v) > 50: 

2612 v = v[:50] 

2613 if isinstance(v, bytes): 

2614 # quick and dirty binary check: 

2615 for c in (1,2,3,4,5,6,7,11,12,14,15,16,17,18,19,20, 

2616 21,22,23,24,25,26,27,28,29,30,31): 

2617 if c in bytearray(v): 

2618 v = '(binary data)' 

2619 break 

2620 print(" ", k, v) 

2621 except Exception: 

2622 log.exception('Error while parsing property stream %r' % streamname) 

2623 

2624 try: 

2625 if options.extract_customprop: 

2626 variables = ole.get_userdefined_properties(streamname, convert_time=True) 

2627 if len(variables): 

2628 print("%r: user-defined properties" % streamname) 

2629 for index, variable in enumerate(variables): 

2630 print('\t{} {}: {}'.format(index, variable['property_name'],variable['value'])) 

2631 

2632 except: 

2633 log.exception('Error while parsing user-defined property stream %r' % streamname) 

2634 

2635 

2636 if options.check_streams: 

2637 # Read all streams to check if there are errors: 

2638 print('\nChecking streams...') 

2639 for streamname in ole.listdir(): 

2640 # print name using repr() to convert binary chars to \xNN: 

2641 print('-', repr('/'.join(streamname)),'-', end=' ') 

2642 st_type = ole.get_type(streamname) 

2643 if st_type == STGTY_STREAM: 

2644 print('size %d' % ole.get_size(streamname)) 

2645 # just try to read stream in memory: 

2646 ole.openstream(streamname) 

2647 else: 

2648 print('NOT a stream : type=%d' % st_type) 

2649 print() 

2650 

2651 # for streamname in ole.listdir(): 

2652 # # print name using repr() to convert binary chars to \xNN: 

2653 # print('-', repr('/'.join(streamname)),'-', end=' ') 

2654 # print(ole.getmtime(streamname)) 

2655 # print() 

2656 

2657 print('Modification/Creation times of all directory entries:') 

2658 for entry in ole.direntries: 

2659 if entry is not None: 

2660 print('- {}: mtime={} ctime={}'.format(entry.name, 

2661 entry.getmtime(), entry.getctime())) 

2662 print() 

2663 

2664 # parse and display metadata: 

2665 try: 

2666 meta = ole.get_metadata() 

2667 meta.dump() 

2668 except Exception: 

2669 log.exception('Error while parsing metadata') 

2670 print() 

2671 # [PL] Test a few new methods: 

2672 root = ole.get_rootentry_name() 

2673 print('Root entry name: "%s"' % root) 

2674 if ole.exists('worddocument'): 

2675 print("This is a Word document.") 

2676 print("type of stream 'WordDocument':", ole.get_type('worddocument')) 

2677 print("size :", ole.get_size('worddocument')) 

2678 if ole.exists('macros/vba'): 

2679 print("This document may contain VBA macros.") 

2680 

2681 # print parsing issues: 

2682 print('\nNon-fatal issues raised during parsing:') 

2683 if ole.parsing_issues: 

2684 for exctype, msg in ole.parsing_issues: 

2685 print('- {}: {}'.format(exctype.__name__, msg)) 

2686 else: 

2687 print('None') 

2688 ole.close() 

2689 except Exception: 

2690 log.exception('Error while parsing file %r' % filename) 

2691 

2692 

2693if __name__ == "__main__": 

2694 main() 

2695 

2696# this code was developed while listening to The Wedding Present "Sea Monsters"