Line | Count | Source |
1 | | /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * |
2 | | * |
3 | | * Copyright 2004 Komarov Valery |
4 | | * Copyright 2006 Christophe Leitienne |
5 | | * Copyright 2008-2017 David Hoerl |
6 | | * Copyright 2013 Bob Colbert |
7 | | * Copyright 2013-2018 Evan Miller |
8 | | * |
9 | | * This file is part of libxls -- A multiplatform, C/C++ library for parsing |
10 | | * Excel(TM) files. |
11 | | * |
12 | | * Redistribution and use in source and binary forms, with or without |
13 | | * modification, are permitted provided that the following conditions are met: |
14 | | * |
15 | | * 1. Redistributions of source code must retain the above copyright notice, |
16 | | * this list of conditions and the following disclaimer. |
17 | | * |
18 | | * 2. Redistributions in binary form must reproduce the above copyright |
19 | | * notice, this list of conditions and the following disclaimer in the |
20 | | * documentation and/or other materials provided with the distribution. |
21 | | * |
22 | | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ''AS |
23 | | * IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, |
24 | | * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
25 | | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR |
26 | | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
27 | | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
28 | | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
29 | | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
30 | | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
31 | | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
32 | | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
33 | | * |
34 | | */ |
35 | | |
36 | | #include "config.h" |
37 | | |
38 | | #include <memory.h> |
39 | | #include <string.h> |
40 | | #include <stdio.h> |
41 | | #include <stdlib.h> |
42 | | |
43 | | #include "../include/libxls/ole.h" |
44 | | #include "../include/libxls/xlstool.h" |
45 | | #include "../include/libxls/endian.h" |
46 | | |
47 | | extern int xls_debug; |
48 | | |
49 | | //#define OLE_DEBUG |
50 | | |
51 | | //static const DWORD MSATSECT = 0xFFFFFFFC; // -4 |
52 | | //static const DWORD FATSECT = 0xFFFFFFFD; // -3 |
53 | | static const DWORD ENDOFCHAIN = 0xFFFFFFFE; // -2 |
54 | | static const DWORD FREESECT = 0xFFFFFFFF; // -1 |
55 | | |
56 | | static size_t sector_pos(OLE2* ole2, DWORD sid); |
57 | | static ssize_t sector_read(OLE2* ole2, void *buffer, size_t buffer_len, DWORD sid); |
58 | | static ssize_t read_MSAT(OLE2* ole2, OLE2Header *oleh); |
59 | | static void *ole_malloc(size_t len); |
60 | | static void *ole_realloc(void *ptr, size_t len); |
61 | | |
62 | 7.76k | static void *ole_malloc(size_t len) { |
63 | 7.76k | if (len > (1<<24) || len == 0) { |
64 | 22 | return NULL; |
65 | 22 | } |
66 | 7.73k | return malloc(len); |
67 | 7.76k | } |
68 | | |
69 | 752 | static void *ole_realloc(void *ptr, size_t len) { |
70 | 752 | if (len > (1<<24) || len == 0) { |
71 | 11 | free(ptr); |
72 | 11 | return NULL; |
73 | 11 | } |
74 | 741 | return realloc(ptr, len); |
75 | 752 | } |
76 | | |
77 | 2.91k | static int ole2_validate_sector_chain(DWORD *chain, DWORD chain_count, DWORD chain_start) { |
78 | 2.91k | DWORD count = 0; |
79 | 2.91k | DWORD sector = chain_start; |
80 | 16.5M | while (sector != ENDOFCHAIN) { |
81 | 16.5M | if (sector >= chain_count) |
82 | 154 | return 0; |
83 | | |
84 | 16.5M | if (++count >= chain_count) |
85 | 26 | return 0; |
86 | | |
87 | 16.5M | sector = xlsIntVal(chain[sector]); |
88 | 16.5M | } |
89 | 2.73k | return 1; |
90 | 2.91k | } |
91 | | |
92 | 3.62M | static int ole2_validate_sector(DWORD sector, OLE2 *ole) { |
93 | 3.62M | if (sector >= ole->SecIDCount) { |
94 | 2.99k | if (xls_debug) fprintf(stderr, "Error: fatpos %d out-of-bounds for SecID[%d]\n", |
95 | 0 | (int)sector, ole->SecIDCount); |
96 | 2.99k | return 0; |
97 | 2.99k | } |
98 | | |
99 | 3.62M | if (sector == xlsIntVal(ole->SecID[sector])) { |
100 | 4 | if (xls_debug) fprintf(stderr, "Error: Sector loop detected, SecID[%d] = %d\n", |
101 | 0 | (int)sector, (int)sector); |
102 | 4 | return 0; |
103 | 4 | } |
104 | | |
105 | 3.62M | return 1; |
106 | 3.62M | } |
107 | | |
108 | | // Read next sector of stream |
109 | | static int ole2_bufread(OLE2Stream* olest) |
110 | 855k | { |
111 | 855k | BYTE *ptr; |
112 | | |
113 | | #ifdef OLE_DEBUG |
114 | | fprintf(stderr, "----------------------------------------------\n"); |
115 | | fprintf(stderr, "ole2_bufread (start)\n"); |
116 | | #endif |
117 | | |
118 | 855k | if (olest == NULL || olest->ole == NULL) |
119 | 0 | return -1; |
120 | | |
121 | 855k | if ((DWORD)olest->fatpos!=ENDOFCHAIN) |
122 | 845k | { |
123 | 845k | if(olest->sfat) { |
124 | 216k | if (olest->ole->SSAT == NULL || olest->buf == NULL || olest->ole->SSecID == NULL) |
125 | 18 | return -1; |
126 | | |
127 | 215k | if (olest->fatpos*olest->ole->lssector + olest->bufsize > olest->ole->SSATCount) { |
128 | 153 | if (xls_debug) fprintf(stderr, "Error: fatpos %d out-of-bounds for SSAT\n", (int)olest->fatpos); |
129 | 153 | return -1; |
130 | 153 | } |
131 | | |
132 | 215k | ptr = olest->ole->SSAT + olest->fatpos*olest->ole->lssector; |
133 | 215k | memcpy(olest->buf, ptr, olest->bufsize); |
134 | | |
135 | 215k | if (olest->fatpos >= olest->ole->SSecIDCount) { |
136 | 0 | if (xls_debug) fprintf(stderr, "Error: fatpos %d out-of-bounds for SSecID[%d]\n", |
137 | 0 | (int)olest->fatpos, olest->ole->SSecIDCount); |
138 | 0 | return -1; |
139 | 0 | } |
140 | | |
141 | 215k | olest->fatpos=xlsIntVal(olest->ole->SSecID[olest->fatpos]); |
142 | 215k | olest->pos=0; |
143 | 215k | olest->cfat++; |
144 | 629k | } else { |
145 | 629k | if ((int)olest->fatpos < 0 || |
146 | 629k | sector_read(olest->ole, olest->buf, olest->bufsize, olest->fatpos) == -1) { |
147 | 7.16k | if (xls_debug) fprintf(stderr, "Error: Unable to read sector #%d\n", (int)olest->fatpos); |
148 | 7.16k | return -1; |
149 | 7.16k | } |
150 | | |
151 | 622k | if (!ole2_validate_sector(olest->fatpos, olest->ole)) { |
152 | 0 | return -1; |
153 | 0 | } |
154 | | |
155 | 622k | olest->fatpos = xlsIntVal(olest->ole->SecID[olest->fatpos]); |
156 | 622k | olest->pos=0; |
157 | 622k | olest->cfat++; |
158 | 622k | } |
159 | 845k | } |
160 | | #ifdef OLE_DEBUG |
161 | | fprintf(stderr, "----------------------------------------------\n"); |
162 | | fprintf(stderr, "ole2_bufread (end)\n"); |
163 | | #endif |
164 | | // else printf("ENDOFCHAIN!!!\n"); |
165 | 848k | return 0; |
166 | 855k | } |
167 | | |
168 | | // Read part of stream |
169 | | ssize_t ole2_read(void* buf, size_t size, size_t count, OLE2Stream* olest) |
170 | 8.23M | { |
171 | 8.23M | size_t didReadCount=0; |
172 | 8.23M | size_t totalReadCount; |
173 | | |
174 | 8.23M | totalReadCount=size*count; |
175 | | |
176 | | // olest->size inited to -1 |
177 | | // printf("===== ole2_read(%ld bytes)\n", totalReadCount); |
178 | | |
179 | 8.23M | if ((long)olest->size>=0 && !olest->sfat) // directory is -1 |
180 | 6.33M | { |
181 | 6.33M | size_t rem; |
182 | 6.33M | rem = olest->size - (olest->cfat*olest->ole->lsector+olest->pos); |
183 | 6.33M | totalReadCount = rem<totalReadCount?rem:totalReadCount; |
184 | 6.33M | if (rem<=0) olest->eof=1; |
185 | | |
186 | | // printf(" rem=%ld olest->size=%d - subfunc=%d\n", rem, olest->size, (olest->cfat*olest->ole->lsector+olest->pos) ); |
187 | | //printf(" totalReadCount=%d (rem=%d size*count=%ld)\n", totalReadCount, rem, size*count); |
188 | 6.33M | } |
189 | | |
190 | 17.2M | while ((!olest->eof) && (didReadCount < totalReadCount)) |
191 | 8.97M | { |
192 | 8.97M | unsigned long remainingBytes; |
193 | 8.97M | size_t needToReadCount; |
194 | | |
195 | 8.97M | needToReadCount = totalReadCount - didReadCount; |
196 | 8.97M | remainingBytes = olest->bufsize - olest->pos; |
197 | | |
198 | 8.97M | if (needToReadCount < remainingBytes) { // does the current sector contain all the data I need? |
199 | 8.15M | memcpy((BYTE*)buf + didReadCount, olest->buf + olest->pos, needToReadCount); |
200 | 8.15M | olest->pos += needToReadCount; |
201 | 8.15M | didReadCount += needToReadCount; |
202 | 8.15M | } else { |
203 | 817k | memcpy((BYTE*)buf + didReadCount, olest->buf + olest->pos, remainingBytes); |
204 | 817k | olest->pos += remainingBytes; |
205 | 817k | didReadCount += remainingBytes; |
206 | 817k | if (ole2_bufread(olest) == -1) |
207 | 6.85k | return -1; |
208 | 817k | } |
209 | 8.96M | if (((DWORD)olest->fatpos == ENDOFCHAIN) && (olest->pos >= olest->bufsize)) { |
210 | 8.97k | olest->eof=1; |
211 | 8.97k | } |
212 | 8.96M | } |
213 | 8.23M | if (didReadCount > totalReadCount) |
214 | 0 | return -1; |
215 | | |
216 | | // printf(" didReadCount=%ld EOF=%d\n", didReadCount, olest->eof); |
217 | | // printf("=====\n"); |
218 | | |
219 | | #ifdef OLE_DEBUG |
220 | | fprintf(stderr, "----------------------------------------------\n"); |
221 | | fprintf(stderr, "ole2_read (end)\n"); |
222 | | fprintf(stderr, "start: %d \n",olest->start); |
223 | | fprintf(stderr, "pos: %d \n",(int)olest->pos); |
224 | | fprintf(stderr, "cfat: %d \n",(int)olest->cfat); |
225 | | fprintf(stderr, "size: %d \n",(int)olest->size); |
226 | | fprintf(stderr, "fatpos: %d \n",(int)olest->fatpos); |
227 | | fprintf(stderr, "bufsize: %d \n",(int)olest->bufsize); |
228 | | fprintf(stderr, "eof: %d \n",olest->eof); |
229 | | #endif |
230 | | |
231 | 8.23M | return didReadCount; |
232 | 8.23M | } |
233 | | |
234 | | // Open stream in logical ole file |
235 | | OLE2Stream* ole2_sopen(OLE2* ole,DWORD start, size_t size) |
236 | 2.91k | { |
237 | 2.91k | OLE2Stream* olest=NULL; |
238 | 2.91k | int success = 1; |
239 | | |
240 | | #ifdef OLE_DEBUG |
241 | | fprintf(stderr, "----------------------------------------------\n"); |
242 | | fprintf(stderr, "ole2_sopen start=%Xh\n", start); |
243 | | #endif |
244 | | |
245 | 2.91k | olest = calloc(1, sizeof(OLE2Stream)); |
246 | 2.91k | olest->ole=ole; |
247 | 2.91k | olest->size=size; |
248 | 2.91k | olest->fatpos=start; |
249 | 2.91k | olest->start=start; |
250 | 2.91k | olest->cfat=-1; |
251 | 2.91k | if((long)size > 0 && size < (size_t)ole->sectorcutoff) { |
252 | 585 | olest->bufsize=ole->lssector; |
253 | 585 | olest->sfat = 1; |
254 | 2.33k | } else { |
255 | 2.33k | olest->bufsize=ole->lsector; |
256 | 2.33k | } |
257 | 2.91k | if ((olest->buf = ole_malloc(olest->bufsize)) == NULL) { |
258 | 0 | success = 0; |
259 | 0 | goto cleanup; |
260 | 0 | } |
261 | | |
262 | 2.91k | if (olest->sfat) { |
263 | 585 | if (!ole2_validate_sector_chain(ole->SSecID, ole->SSecIDCount, start)) { |
264 | 72 | success = 0; |
265 | 72 | goto cleanup; |
266 | 72 | } |
267 | 2.33k | } else { |
268 | 2.33k | if (!ole2_validate_sector_chain(ole->SecID, ole->SecIDCount, start)) { |
269 | 108 | success = 0; |
270 | 108 | goto cleanup; |
271 | 108 | } |
272 | 2.33k | } |
273 | | |
274 | 2.73k | if (ole2_bufread(olest) == -1) { |
275 | 45 | success = 0; |
276 | 45 | goto cleanup; |
277 | 45 | } |
278 | | |
279 | 2.91k | cleanup: |
280 | 2.91k | if (!success) { |
281 | 225 | ole2_fclose(olest); |
282 | 225 | olest = NULL; |
283 | 225 | } |
284 | | |
285 | | // if(xls_debug) printf("sopen: sector=%d next=%d\n", start, olest->fatpos); |
286 | 2.91k | return olest; |
287 | 2.73k | } |
288 | | |
289 | | // Move in stream |
290 | | int ole2_seek(OLE2Stream* olest,DWORD ofs) |
291 | 38.5k | { |
292 | | #ifdef OLE_DEBUG |
293 | | fprintf(stderr, "SEEK %x\n", ofs); |
294 | | #endif |
295 | 38.5k | if(olest->sfat) { |
296 | 8.87k | ldiv_t div_rez=ldiv(ofs,olest->ole->lssector); |
297 | 8.87k | int i; |
298 | 8.87k | olest->fatpos=olest->start; |
299 | | |
300 | 8.87k | if (div_rez.quot!=0) |
301 | 5.85k | { |
302 | 67.3k | for (i=0;i<div_rez.quot;i++) { |
303 | 62.3k | if (olest->fatpos >= olest->ole->SSecIDCount) |
304 | 790 | return -1; |
305 | 61.5k | olest->fatpos=xlsIntVal(olest->ole->SSecID[olest->fatpos]); |
306 | 61.5k | } |
307 | 5.85k | } |
308 | | |
309 | 8.08k | if (ole2_bufread(olest) == -1) |
310 | 57 | return -1; |
311 | | |
312 | 8.02k | olest->pos=div_rez.rem; |
313 | 8.02k | olest->eof=0; |
314 | 8.02k | olest->cfat=div_rez.quot; |
315 | | //printf("%i=%i %i\n",ofs,div_rez.quot,div_rez.rem); |
316 | 29.6k | } else { |
317 | 29.6k | ldiv_t div_rez=ldiv(ofs,olest->ole->lsector); |
318 | 29.6k | int i; |
319 | | #ifdef OLE_DEBUG |
320 | | fprintf(stderr, "seeking fatpos%lu start %u\n", olest->fatpos, olest->start); |
321 | | #endif |
322 | 29.6k | olest->fatpos=olest->start; |
323 | | |
324 | 29.6k | if (div_rez.quot!=0) |
325 | 4.88k | { |
326 | 165k | for (i=0;i<div_rez.quot;i++) { |
327 | 163k | if (!ole2_validate_sector(olest->fatpos, olest->ole)) |
328 | 2.95k | return -1; |
329 | 160k | olest->fatpos=xlsIntVal(olest->ole->SecID[olest->fatpos]); |
330 | 160k | } |
331 | 4.88k | } |
332 | | |
333 | 26.6k | if (ole2_bufread(olest) == -1) |
334 | 378 | return -1; |
335 | | |
336 | 26.2k | olest->pos=div_rez.rem; |
337 | 26.2k | olest->eof=0; |
338 | 26.2k | olest->cfat=div_rez.quot; |
339 | | //printf("%i=%i %i\n",ofs,div_rez.quot,div_rez.rem); |
340 | 26.2k | } |
341 | 34.3k | return 0; |
342 | 38.5k | } |
343 | | |
344 | | // Open logical file contained in physical OLE file |
345 | | OLE2Stream* ole2_fopen(OLE2* ole, const char *file) |
346 | 4.15k | { |
347 | 4.15k | int i; |
348 | | |
349 | | #ifdef OLE_DEBUG |
350 | | fprintf(stderr, "----------------------------------------------\n"); |
351 | | fprintf(stderr, "ole2_fopen %s\n", file); |
352 | | #endif |
353 | | |
354 | 11.0k | for (i=0;i<ole->files.count;i++) { |
355 | 8.32k | char *str = ole->files.file[i].name; |
356 | | #ifdef OLE_DEBUG |
357 | | fprintf(stderr, "----------------------------------------------\n"); |
358 | | fprintf(stderr, "ole2_fopen found %s\n", str); |
359 | | #endif |
360 | 8.32k | if (str && strcmp(str,file)==0) // newer versions of Excel don't write the "Root Entry" string for the first set of data |
361 | 1.41k | { |
362 | 1.41k | return ole2_sopen(ole,ole->files.file[i].start,ole->files.file[i].size); |
363 | 1.41k | } |
364 | 8.32k | } |
365 | 2.73k | return NULL; |
366 | 4.15k | } |
367 | | |
368 | 3.67M | static int ole2_fseek(OLE2 *ole2, size_t pos) { |
369 | 3.67M | if (ole2->file) |
370 | 0 | return fseek(ole2->file, pos, SEEK_SET); |
371 | | |
372 | 3.67M | if (pos > ole2->buffer_len) |
373 | 6.85k | return -1; |
374 | | |
375 | 3.66M | ole2->buffer_pos = pos; |
376 | 3.66M | return 0; |
377 | 3.67M | } |
378 | | |
379 | | // Will read up to `size' bytes from the input, and pad the rest of `size' with |
380 | | // zeros if the input file or buffer is short. |
381 | 3.66M | static size_t ole2_fread(OLE2 *ole2, void *buffer, size_t buffer_len, size_t size) { |
382 | 3.66M | if (size > buffer_len) |
383 | 7 | return 0; |
384 | | |
385 | 3.66M | memset(buffer, 0, size); |
386 | | |
387 | 3.66M | if (ole2->file) |
388 | 0 | return fread(buffer, 1, size, ole2->file) > 0; |
389 | | |
390 | 3.66M | if (ole2->buffer_pos >= ole2->buffer_len) |
391 | 443 | return 0; |
392 | | |
393 | 3.66M | if (ole2->buffer_pos + size > ole2->buffer_len) |
394 | 1.40M | size = ole2->buffer_len - ole2->buffer_pos; |
395 | | |
396 | 3.66M | memcpy(buffer, (const char *)ole2->buffer + ole2->buffer_pos, size); |
397 | 3.66M | ole2->buffer_pos += size; |
398 | | |
399 | 3.66M | return 1; |
400 | 3.66M | } |
401 | | |
402 | | // read header and check magic numbers |
403 | 1.74k | static ssize_t ole2_read_header(OLE2 *ole) { |
404 | 1.74k | ssize_t bytes_read = 0, total_bytes_read = 0; |
405 | 1.74k | OLE2Header *oleh = malloc(sizeof(OLE2Header)); |
406 | 1.74k | if (ole2_fread(ole, oleh, sizeof(OLE2Header), sizeof(OLE2Header)) != 1) { |
407 | 0 | total_bytes_read = -1; |
408 | 0 | goto cleanup; |
409 | 0 | } |
410 | 1.74k | total_bytes_read += sizeof(OLE2Header); |
411 | 1.74k | xlsConvertHeader(oleh); |
412 | | |
413 | | // make sure the file looks good. Note: this code only works on Little Endian machines |
414 | 1.74k | if(oleh->id[0] != 0xE011CFD0 || oleh->id[1] != 0xE11AB1A1 || oleh->byteorder != 0xFFFE) { |
415 | 40 | if (xls_debug) fprintf(stderr, "Not an excel file\n"); |
416 | 40 | total_bytes_read = -1; |
417 | 40 | goto cleanup; |
418 | 40 | } |
419 | | |
420 | | //ole->lsector=(WORD)pow(2,oleh->lsector); |
421 | | //ole->lssector=(WORD)pow(2,oleh->lssector); |
422 | 1.70k | ole->lsector=512; |
423 | 1.70k | ole->lssector=64; |
424 | | |
425 | 1.70k | if (oleh->lsectorB != 9 || oleh->lssectorB != 6) { // 2**9 == 512, 2**6 == 64 |
426 | 11 | if (xls_debug) fprintf(stderr, "Unexpected sector size\n"); |
427 | 11 | total_bytes_read = -1; |
428 | 11 | goto cleanup; |
429 | 11 | } |
430 | | |
431 | 1.69k | ole->cfat=oleh->cfat; |
432 | 1.69k | ole->dirstart=oleh->dirstart; |
433 | 1.69k | ole->sectorcutoff=oleh->sectorcutoff; |
434 | 1.69k | ole->sfatstart=oleh->sfatstart; |
435 | 1.69k | ole->csfat=oleh->csfat; |
436 | 1.69k | ole->difstart=oleh->difstart; |
437 | 1.69k | ole->cdif=oleh->cdif; |
438 | 1.69k | ole->files.count=0; |
439 | | |
440 | | #ifdef OLE_DEBUG |
441 | | fprintf(stderr, "==== OLE HEADER ====\n"); |
442 | | //printf ("Header Size: %i \n", sizeof(OLE2Header)); |
443 | | //printf ("id[0]-id[1]: %X-%X \n", oleh->id[0], oleh->id[1]); |
444 | | fprintf(stderr, "verminor: %X \n",oleh->verminor); |
445 | | fprintf(stderr, "verdll: %X \n",oleh->verdll); |
446 | | //printf ("Byte order: %X \n",oleh->byteorder); |
447 | | fprintf(stderr, "sect len: %X (%i)\n",ole->lsector,ole->lsector); // ole |
448 | | fprintf(stderr, "mini len: %X (%i)\n",ole->lssector,ole->lssector); // ole |
449 | | fprintf(stderr, "Fat sect.: %i \n",oleh->cfat); |
450 | | fprintf(stderr, "Dir Start: %i \n",oleh->dirstart); |
451 | | |
452 | | fprintf(stderr, "Mini Cutoff: %i \n",oleh->sectorcutoff); |
453 | | fprintf(stderr, "MiniFat Start: %X \n",oleh->sfatstart); |
454 | | fprintf(stderr, "Count MFat: %i \n",oleh->csfat); |
455 | | fprintf(stderr, "Dif start: %X \n",oleh->difstart); |
456 | | fprintf(stderr, "Count Dif: %i \n",oleh->cdif); |
457 | | fprintf(stderr, "Fat Size: %u (0x%X) \n",oleh->cfat*ole->lsector,oleh->cfat*ole->lsector); |
458 | | #endif |
459 | | // read directory entries |
460 | 1.69k | if ((bytes_read = read_MSAT(ole, oleh)) == -1) { |
461 | 194 | total_bytes_read = -1; |
462 | 194 | goto cleanup; |
463 | 194 | } |
464 | 1.50k | total_bytes_read += bytes_read; |
465 | | |
466 | 1.74k | cleanup: |
467 | 1.74k | free(oleh); |
468 | | |
469 | 1.74k | return total_bytes_read; |
470 | 1.50k | } |
471 | | |
472 | 1.50k | static ssize_t ole2_read_body(OLE2 *ole) { |
473 | | // reuse this buffer |
474 | 1.50k | PSS *pss = NULL; |
475 | 1.50k | OLE2Stream *olest = NULL; |
476 | 1.50k | char* name = NULL; |
477 | 1.50k | ssize_t bytes_read = 0, total_bytes_read = 0; |
478 | | |
479 | 1.50k | if ((olest = ole2_sopen(ole,ole->dirstart, -1)) == NULL) { |
480 | 73 | total_bytes_read = -1; |
481 | 73 | goto cleanup; |
482 | 73 | } |
483 | 1.43k | pss = malloc(sizeof(PSS)); |
484 | 7.29k | do { |
485 | 7.29k | if ((bytes_read = ole2_read(pss,1,sizeof(PSS),olest)) == -1) { |
486 | 10 | total_bytes_read = -1; |
487 | 10 | goto cleanup; |
488 | 10 | } |
489 | 7.28k | total_bytes_read += bytes_read; |
490 | 7.28k | xlsConvertPss(pss); |
491 | 7.28k | if (pss->bsize > sizeof(pss->name)) { |
492 | 14 | total_bytes_read = -1; |
493 | 14 | goto cleanup; |
494 | 14 | } |
495 | 7.26k | name=transcode_utf16_to_utf8(pss->name, pss->bsize); |
496 | | #ifdef OLE_DEBUG |
497 | | fprintf(stderr, "OLE NAME: %s count=%d\n", name, (int)ole->files.count); |
498 | | #endif |
499 | 7.26k | if (pss->type == PS_USER_ROOT || pss->type == PS_USER_STREAM) // (name!=NULL) // |
500 | 3.01k | { |
501 | | |
502 | | #ifdef OLE_DEBUG |
503 | | fprintf(stderr, "OLE TYPE: %s file=%d size=%d\n", |
504 | | pss->type == PS_USER_ROOT ? "root" : "user", |
505 | | (int)ole->files.count, (int)pss->size); |
506 | | #endif |
507 | 3.01k | ole->files.file = realloc(ole->files.file,(ole->files.count+1)*sizeof(struct st_olefiles_data)); |
508 | 3.01k | ole->files.file[ole->files.count].name=name; |
509 | 3.01k | ole->files.file[ole->files.count].start=pss->sstart; |
510 | 3.01k | ole->files.file[ole->files.count].size=pss->size; |
511 | 3.01k | ole->files.count++; |
512 | | |
513 | | #ifdef OLE_DEBUG |
514 | | fprintf(stderr, "----------------------------------------------\n"); |
515 | | fprintf(stderr, "name: %s (size=%d [c=%c])\n", name, pss->bsize, name ? name[0]:' '); |
516 | | fprintf(stderr, "bsize %i\n",pss->bsize); |
517 | | fprintf(stderr, "type %i\n",pss->type); |
518 | | fprintf(stderr, "flag %i\n",pss->flag); |
519 | | fprintf(stderr, "left %X\n",pss->left); |
520 | | fprintf(stderr, "right %X\n",pss->right); |
521 | | fprintf(stderr, "child %X\n",pss->child); |
522 | | fprintf(stderr, "guid %.4X-%.4X-%.4X-%.4X %.4X-%.4X-%.4X-%.4X\n", |
523 | | pss->guid[0],pss->guid[1],pss->guid[2],pss->guid[3], |
524 | | pss->guid[4],pss->guid[5],pss->guid[6],pss->guid[7]); |
525 | | fprintf(stderr, "user flag %.4X\n",pss->userflags); |
526 | | fprintf(stderr, "sstart %.4d\n",pss->sstart); |
527 | | fprintf(stderr, "size %.4d\n",pss->size); |
528 | | #endif |
529 | 3.01k | if(pss->sstart == ENDOFCHAIN) { |
530 | 55 | if (xls_debug) verbose("END OF CHAIN\n"); |
531 | 2.95k | } else if(pss->type == PS_USER_STREAM) { |
532 | 2.20k | } else if(pss->type == PS_USER_ROOT) { |
533 | 752 | DWORD sector, k, blocks; |
534 | 752 | BYTE *wptr; |
535 | 752 | size_t bytes_left; |
536 | | |
537 | 752 | blocks = (pss->size + (ole->lsector - 1)) / ole->lsector; // count partial |
538 | | #ifdef OLE_DEBUG |
539 | | fprintf(stderr, "OLE BLOCKS: %d = (%d + (%d - 1))/%d\n", |
540 | | (int)blocks, (int)pss->size, (int)ole->lsector, (int)ole->lsector); |
541 | | #endif |
542 | 752 | if ((ole->SSAT = ole_realloc(ole->SSAT, blocks*ole->lsector)) == NULL) { |
543 | 11 | total_bytes_read = -1; |
544 | 11 | goto cleanup; |
545 | 11 | } |
546 | 741 | ole->SSATCount = blocks*ole->lsector; |
547 | | // printf("blocks %d\n", blocks); |
548 | | |
549 | 741 | sector = pss->sstart; |
550 | 741 | wptr = (BYTE*)ole->SSAT; |
551 | 741 | bytes_left = blocks*ole->lsector; |
552 | 1.29M | for(k=0; k<blocks; ++k) { |
553 | | // printf("block %d sector %d\n", k, sector); |
554 | 1.29M | if (sector == ENDOFCHAIN || sector_read(ole, wptr, bytes_left, sector) == -1) { |
555 | 37 | if (xls_debug) fprintf(stderr, "Unable to read sector #%d\n", sector); |
556 | 37 | total_bytes_read = -1; |
557 | 37 | goto cleanup; |
558 | 37 | } |
559 | 1.29M | if (!ole2_validate_sector(sector, ole)) { |
560 | 19 | total_bytes_read = -1; |
561 | 19 | goto cleanup; |
562 | 19 | } |
563 | 1.29M | total_bytes_read += ole->lsector; |
564 | 1.29M | wptr += ole->lsector; |
565 | 1.29M | bytes_left -= ole->lsector; |
566 | 1.29M | sector = xlsIntVal(ole->SecID[sector]); |
567 | 1.29M | } |
568 | 741 | } |
569 | 4.25k | } else { |
570 | 4.25k | free(name); |
571 | 4.25k | } |
572 | 7.26k | } while (!olest->eof); |
573 | | |
574 | 1.50k | cleanup: |
575 | 1.50k | if (olest) |
576 | 1.43k | ole2_fclose(olest); |
577 | 1.50k | if (pss) |
578 | 1.43k | free(pss); |
579 | | |
580 | | #ifdef OLE_DEBUG |
581 | | fprintf(stderr, "----------------------------------------------\n"); |
582 | | fprintf(stderr, "ole2_read_body: %d bytes\n", (int)total_bytes_read); |
583 | | #endif |
584 | | |
585 | 1.50k | return total_bytes_read; |
586 | 1.43k | } |
587 | | |
588 | 1.74k | OLE2 *ole2_read_header_and_body(OLE2 *ole) { |
589 | 1.74k | if (ole2_read_header(ole) == -1) { |
590 | 245 | ole2_close(ole); |
591 | 245 | return NULL; |
592 | 245 | } |
593 | | |
594 | 1.50k | if (ole2_read_body(ole) == -1) { |
595 | 164 | ole2_close(ole); |
596 | 164 | return NULL; |
597 | 164 | } |
598 | | |
599 | 1.33k | return ole; |
600 | 1.50k | } |
601 | | |
602 | | // Open in-memory buffer |
603 | 1.74k | OLE2 *ole2_open_buffer(const void *buffer, size_t len) { |
604 | 1.74k | OLE2 *ole = calloc(1, sizeof(OLE2)); |
605 | | |
606 | 1.74k | ole->buffer = buffer; |
607 | 1.74k | ole->buffer_len = len; |
608 | | |
609 | 1.74k | return ole2_read_header_and_body(ole); |
610 | 1.74k | } |
611 | | |
612 | | // Open physical file |
613 | | OLE2* ole2_open_file(const char *file) |
614 | 0 | { |
615 | 0 | OLE2* ole = NULL; |
616 | |
|
617 | | #ifdef OLE_DEBUG |
618 | | fprintf(stderr, "----------------------------------------------\n"); |
619 | | fprintf(stderr, "ole2_open_file %s\n", file); |
620 | | #endif |
621 | |
|
622 | 0 | if(xls_debug) printf("ole2_open: %s\n", file); |
623 | 0 | ole = calloc(1, sizeof(OLE2)); |
624 | |
|
625 | 0 | if (!(ole->file=fopen(file, "rb"))) { |
626 | 0 | if(xls_debug) fprintf(stderr, "File not found\n"); |
627 | 0 | free(ole); |
628 | 0 | return NULL; |
629 | 0 | } |
630 | | |
631 | 0 | return ole2_read_header_and_body(ole); |
632 | 0 | } |
633 | | |
634 | | void ole2_close(OLE2* ole2) |
635 | 1.74k | { |
636 | 1.74k | int i; |
637 | 1.74k | if (ole2->file) |
638 | 0 | fclose(ole2->file); |
639 | | |
640 | 4.76k | for(i=0; i<ole2->files.count; ++i) { |
641 | 3.01k | free(ole2->files.file[i].name); |
642 | 3.01k | } |
643 | 1.74k | free(ole2->files.file); |
644 | 1.74k | free(ole2->SecID); |
645 | 1.74k | free(ole2->SSecID); |
646 | 1.74k | free(ole2->SSAT); |
647 | 1.74k | free(ole2); |
648 | 1.74k | } |
649 | | |
650 | | void ole2_fclose(OLE2Stream* ole2st) |
651 | 2.91k | { |
652 | 2.91k | free(ole2st->buf); |
653 | 2.91k | free(ole2st); |
654 | 2.91k | } |
655 | | |
656 | | // Return offset in bytes of a sector from its sid |
657 | | static size_t sector_pos(OLE2* ole2, DWORD sid) |
658 | 3.67M | { |
659 | 3.67M | return 512 + sid * ole2->lsector; |
660 | 3.67M | } |
661 | | // Read one sector from its sid |
662 | | static ssize_t sector_read(OLE2* ole2, void *buffer, size_t buffer_len, DWORD sid) |
663 | 3.67M | { |
664 | 3.67M | size_t num; |
665 | 3.67M | size_t seeked; |
666 | | |
667 | 3.67M | if ((seeked = ole2_fseek(ole2, sector_pos(ole2, sid))) != 0) { |
668 | 6.85k | if (xls_debug) fprintf(stderr, "Error: wanted to seek to sector %u (0x%x) loc=%u\n", sid, sid, |
669 | 0 | (unsigned int)sector_pos(ole2, sid)); |
670 | 6.85k | return -1; |
671 | 6.85k | } |
672 | | |
673 | 3.66M | if ((num = ole2_fread(ole2, buffer, buffer_len, ole2->lsector)) != 1) { |
674 | 450 | if (xls_debug) fprintf(stderr, "Error: fread wanted 1 got %lu loc=%u\n", (unsigned long)num, |
675 | 0 | (unsigned int)sector_pos(ole2, sid)); |
676 | 450 | return -1; |
677 | 450 | } |
678 | | |
679 | 3.66M | return ole2->lsector; |
680 | 3.66M | } |
681 | | |
682 | | // read first 109 sectors of MSAT from header |
683 | 1.67k | static ssize_t read_MSAT_header(OLE2* ole2, OLE2Header* oleh, DWORD sectorCount) { |
684 | 1.67k | BYTE *sector = (BYTE*)ole2->SecID; |
685 | 1.67k | ssize_t bytes_read = 0, total_bytes_read = 0; |
686 | 1.67k | size_t bytes_left = ole2->SecIDCount * sizeof(DWORD); |
687 | 1.67k | DWORD sectorNum; |
688 | | |
689 | 17.0k | for (sectorNum = 0; sectorNum < sectorCount && sectorNum < 109; sectorNum++) |
690 | 15.3k | { |
691 | 15.3k | if ((bytes_read = sector_read(ole2, sector, bytes_left, oleh->MSAT[sectorNum])) == -1) { |
692 | 21 | if (xls_debug) fprintf(stderr, "Error: Unable to read sector #%d\n", oleh->MSAT[sectorNum]); |
693 | 21 | return -1; |
694 | 21 | } |
695 | 15.3k | sector += ole2->lsector; |
696 | 15.3k | bytes_left -= ole2->lsector; |
697 | 15.3k | total_bytes_read += bytes_read; |
698 | 15.3k | } |
699 | 1.65k | return total_bytes_read; |
700 | 1.67k | } |
701 | | |
702 | | // Add additional sectors of the MSAT |
703 | 1.65k | static ssize_t read_MSAT_body(OLE2 *ole2, DWORD sectorOffset, DWORD sectorCount) { |
704 | 1.65k | DWORD sid = ole2->difstart; |
705 | 1.65k | ssize_t bytes_read = 0, total_bytes_read = 0; |
706 | 1.65k | DWORD sectorNum = sectorOffset; |
707 | | |
708 | 1.65k | DWORD *sector = ole_malloc(ole2->lsector); |
709 | | //printf("sid=%u (0x%x) sector=%u\n", sid, sid, ole2->lsector); |
710 | 3.51k | while (sid != ENDOFCHAIN && sid != FREESECT) // FREESECT only here due to an actual file that requires it (old Apple Numbers bug) |
711 | 1.93k | { |
712 | 1.93k | int posInSector; |
713 | | // read MSAT sector |
714 | 1.93k | if ((bytes_read = sector_read(ole2, sector, ole2->lsector, sid)) == -1) { |
715 | 23 | total_bytes_read = -1; |
716 | 23 | if (xls_debug) fprintf(stderr, "Error: Unable to read sector #%d\n", sid); |
717 | 23 | goto cleanup; |
718 | 23 | } |
719 | 1.90k | total_bytes_read += bytes_read; |
720 | | |
721 | | // read content |
722 | 239k | for (posInSector = 0; posInSector < (ole2->lsector-4)/4; posInSector++) |
723 | 237k | { |
724 | 237k | DWORD s = sector[posInSector]; |
725 | | //printf(" s[%d]=%d (0x%x)\n", posInSector, s, s); |
726 | | |
727 | 237k | if (s != ENDOFCHAIN && s != FREESECT) // see patch in Bug 31. For very large files |
728 | 183k | { |
729 | 183k | if (sectorNum == sectorCount) { |
730 | 21 | if (xls_debug) fprintf(stderr, "Error: Unable to seek to sector #%d\n", s); |
731 | 21 | total_bytes_read = -1; |
732 | 21 | goto cleanup; |
733 | 21 | } |
734 | 183k | if ((bytes_read = sector_read(ole2, (BYTE*)(ole2->SecID)+sectorNum*ole2->lsector, |
735 | 183k | (ole2->SecIDCount * sizeof(DWORD) - sectorNum*ole2->lsector), s)) == -1) { |
736 | 22 | if (xls_debug) fprintf(stderr, "Error: Unable to read sector #%d\n", s); |
737 | 22 | total_bytes_read = -1; |
738 | 22 | goto cleanup; |
739 | 22 | } |
740 | 183k | total_bytes_read += bytes_read; |
741 | 183k | sectorNum++; |
742 | 183k | } |
743 | 237k | } |
744 | 1.86k | if (sid == sector[posInSector]) { |
745 | 11 | if (xls_debug) fprintf(stderr, "Error: Loop detected in sector #%d\n", sid); |
746 | 11 | total_bytes_read = -1; |
747 | 11 | goto cleanup; |
748 | 11 | } |
749 | 1.85k | sid = sector[posInSector]; |
750 | | //printf(" s[%d]=%d (0x%x)\n", posInSector, sid, sid); |
751 | 1.85k | } |
752 | | #ifdef OLE_DEBUG |
753 | | if(xls_debug) { |
754 | | //printf("==== READ IN SECTORS FOR MSAT TABLE====\n"); |
755 | | int i; |
756 | | for(i=0; i<512/4; ++i) { // just the first block |
757 | | if(ole2->SecID[i] != FREESECT) printf("SecID[%d]=%d\n", i, ole2->SecID[i]); |
758 | | } |
759 | | } |
760 | | //exit(0); |
761 | | #endif |
762 | | |
763 | 1.65k | cleanup: |
764 | 1.65k | free(sector); |
765 | 1.65k | return total_bytes_read; |
766 | 1.65k | } |
767 | | |
768 | | // read in short table |
769 | 1.58k | static ssize_t read_MSAT_trailer(OLE2 *ole2) { |
770 | 1.58k | ssize_t total_bytes_read = 0; |
771 | 1.58k | DWORD sector, k; |
772 | 1.58k | BYTE *wptr; |
773 | 1.58k | size_t bytes_left; |
774 | | |
775 | 1.58k | if(ole2->sfatstart == ENDOFCHAIN) |
776 | 80 | return 0; |
777 | | |
778 | 1.50k | if ((ole2->SSecID = ole_malloc(ole2->csfat*(size_t)ole2->lsector)) == NULL) { |
779 | 14 | return -1; |
780 | 14 | } |
781 | 1.48k | ole2->SSecIDCount = ole2->csfat*(size_t)ole2->lsector/4; |
782 | 1.48k | sector = ole2->sfatstart; |
783 | 1.48k | wptr=(BYTE*)ole2->SSecID; |
784 | 1.48k | bytes_left = ole2->SSecIDCount * sizeof(DWORD); |
785 | 1.54M | for(k=0; k<ole2->csfat; ++k) { |
786 | 1.54M | if (sector == ENDOFCHAIN || sector_read(ole2, wptr, bytes_left, sector) == -1) { |
787 | 42 | total_bytes_read = -1; |
788 | 42 | goto cleanup; |
789 | 42 | } |
790 | 1.54M | if (!ole2_validate_sector(sector, ole2)) { |
791 | 21 | total_bytes_read = -1; |
792 | 21 | goto cleanup; |
793 | 21 | } |
794 | 1.54M | wptr += ole2->lsector; |
795 | 1.54M | bytes_left -= ole2->lsector; |
796 | 1.54M | total_bytes_read += ole2->lsector; |
797 | 1.54M | sector = xlsIntVal(ole2->SecID[sector]); |
798 | 1.54M | } |
799 | | #ifdef OLE_DEBUG |
800 | | if(xls_debug) { |
801 | | int i; |
802 | | for(i=0; i<ole2->csfat; ++i) { |
803 | | if(ole2->SSecID[i] != FREESECT) fprintf(stderr, "SSecID[%d]=%d\n", i, ole2->SSecID[i]); |
804 | | } |
805 | | } |
806 | | #endif |
807 | | |
808 | 1.48k | cleanup: |
809 | 1.48k | return total_bytes_read; |
810 | 1.48k | } |
811 | | |
812 | | |
813 | | // Read MSAT |
814 | | static ssize_t read_MSAT(OLE2* ole2, OLE2Header* oleh) |
815 | 1.69k | { |
816 | | // reconstitution of the MSAT |
817 | 1.69k | DWORD count = ole2->cfat; |
818 | 1.69k | if(count == 0 || count > (1 << 24)) { |
819 | 11 | if (xls_debug) fprintf(stderr, "Error: MSAT count %u out-of-bounds\n", count); |
820 | 11 | return -1; |
821 | 11 | } |
822 | | |
823 | 1.68k | ssize_t total_bytes_read = 0; |
824 | 1.68k | ssize_t bytes_read = 0; |
825 | | |
826 | 1.68k | ole2->SecIDCount = count*ole2->lsector/4; |
827 | 1.68k | if ((ole2->SecID = ole_malloc(ole2->SecIDCount * sizeof(DWORD))) == NULL) { |
828 | 8 | total_bytes_read = -1; |
829 | 8 | goto cleanup; |
830 | 8 | } |
831 | | |
832 | 1.67k | if ((bytes_read = read_MSAT_header(ole2, oleh, count)) == -1) { |
833 | 21 | total_bytes_read = -1; |
834 | 21 | goto cleanup; |
835 | 21 | } |
836 | 1.65k | total_bytes_read += bytes_read; |
837 | | |
838 | 1.65k | if ((bytes_read = read_MSAT_body(ole2, total_bytes_read / ole2->lsector, count)) == -1) { |
839 | 77 | total_bytes_read = -1; |
840 | 77 | goto cleanup; |
841 | 77 | } |
842 | 1.58k | total_bytes_read += bytes_read; |
843 | | |
844 | 1.58k | if ((bytes_read = read_MSAT_trailer(ole2)) == -1) { |
845 | 77 | total_bytes_read = -1; |
846 | 77 | goto cleanup; |
847 | 77 | } |
848 | 1.50k | total_bytes_read += bytes_read; |
849 | | |
850 | 1.68k | cleanup: |
851 | 1.68k | if (total_bytes_read == -1) { |
852 | 183 | if (ole2->SecID) { |
853 | 175 | free(ole2->SecID); |
854 | 175 | ole2->SecID = NULL; |
855 | 175 | } |
856 | 183 | if (ole2->SSecID) { |
857 | 63 | free(ole2->SSecID); |
858 | 63 | ole2->SSecID = NULL; |
859 | 63 | } |
860 | 183 | } |
861 | | |
862 | 1.68k | return total_bytes_read; |
863 | 1.50k | } |