/src/wireshark/wiretap/file_wrappers.c
Line | Count | Source |
1 | | /* file_wrappers.c |
2 | | * |
3 | | * Wiretap Library |
4 | | * Copyright (c) 1998 by Gilbert Ramirez <gram@alumni.rice.edu> |
5 | | * |
6 | | * SPDX-License-Identifier: GPL-2.0-or-later |
7 | | */ |
8 | | |
9 | | /* file_access interface based heavily on zlib gzread.c and gzlib.c from zlib |
10 | | * Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler |
11 | | * under licence: |
12 | | * |
13 | | * SPDX-License-Identifier: Zlib |
14 | | * |
15 | | */ |
16 | | |
17 | | #include "config.h" |
18 | 0 | #define WS_LOG_DOMAIN LOG_DOMAIN_WIRETAP |
19 | | |
20 | | #include "file_wrappers.h" |
21 | | |
22 | | #include <assert.h> |
23 | | #include <errno.h> |
24 | | #include <string.h> |
25 | | #include "wtap_module.h" |
26 | | |
27 | | #include <wsutil/file_util.h> |
28 | | #include <wsutil/zlib_compat.h> |
29 | | #include <wsutil/file_compressed.h> |
30 | | |
31 | | #ifdef HAVE_ZSTD |
32 | | #include <zstd.h> |
33 | | #endif /* HAVE_ZSTD */ |
34 | | |
35 | | #ifdef HAVE_LZ4FRAME_H |
36 | | #include <lz4.h> |
37 | | #include <lz4frame.h> |
38 | | #ifndef LZ4F_BLOCK_HEADER_SIZE /* Added in LZ4_VERSION_NUMBER 10902 */ |
39 | | #define LZ4F_BLOCK_HEADER_SIZE 4 |
40 | | #endif /* LZ4F_BLOCK_HEADER_SIZE */ |
41 | | #endif /* HAVE_LZ4FRAME_H */ |
42 | | |
43 | | static ws_compression_type file_get_compression_type(FILE_T stream); |
44 | | |
45 | | ws_compression_type |
46 | | wtap_get_compression_type(wtap *wth) |
47 | 0 | { |
48 | 0 | return file_get_compression_type((wth->fh == NULL) ? wth->random_fh : wth->fh); |
49 | 0 | } |
50 | | |
51 | | /* #define GZBUFSIZE 8192 */ |
52 | 0 | #define GZBUFSIZE 4096 |
53 | | #define LZ4BUFSIZE 4194304 // 4MiB, maximum block size |
54 | | |
55 | | /* values for wtap_reader compression */ |
56 | | typedef enum { |
57 | | UNKNOWN, /* unknown - look for a compression header */ |
58 | | UNCOMPRESSED, /* uncompressed - copy input directly */ |
59 | | ZLIB, /* decompress a zlib stream */ |
60 | | GZIP_AFTER_HEADER, |
61 | | ZSTD, |
62 | | LZ4, /* start of a LZ4 Frame */ |
63 | | LZ4_AFTER_HEADER, /* start of a LZ4 Block */ |
64 | | } compression_t; |
65 | | |
66 | | /* |
67 | | * We limit the size of our input and output buffers to 2^30 bytes, |
68 | | * because: |
69 | | * |
70 | | * 1) on Windows with MSVC, the return value of _read() is int, |
71 | | * so the biggest read you can do is INT_MAX, and the biggest |
72 | | * power of 2 below that is 2^30; |
73 | | * |
74 | | * 2) the "avail_in" and "avail_out" values in a z_stream structure |
75 | | * in zlib are uInts, and those are unsigned ints, and that |
76 | | * imposes a limit on the buffer size when we're reading a |
77 | | * gzipped file. |
78 | | * |
79 | | * Thus, we use unsigned for the buffer sizes, offsets, amount available |
80 | | * from the buffer, etc. |
81 | | * |
82 | | * If we want an even bigger buffer for uncompressed data, or for |
83 | | * some other form of compression, then the unsigned-sized values should |
84 | | * be in structure values used only for reading gzipped files, and |
85 | | * other values should be used for uncompressed data or data |
86 | | * compressed using other algorithms (e.g., in a union). |
87 | | */ |
88 | 0 | #define MAX_READ_BUF_SIZE (1U << 30) |
89 | | |
90 | | struct wtap_reader_buf { |
91 | | uint8_t *buf; /* buffer */ |
92 | | uint8_t *next; /* next byte to deliver from buffer */ |
93 | | unsigned avail; /* number of bytes available to deliver at next */ |
94 | | }; |
95 | | |
96 | | struct wtap_reader { |
97 | | int fd; /* file descriptor */ |
98 | | int64_t raw_pos; /* current position in file (just to not call lseek()) */ |
99 | | int64_t pos; /* current position in uncompressed data */ |
100 | | unsigned size; /* buffer size */ |
101 | | |
102 | | struct wtap_reader_buf in; /* input buffer, containing compressed data */ |
103 | | struct wtap_reader_buf out; /* output buffer, containing uncompressed data */ |
104 | | |
105 | | bool eof; /* true if end of input file reached */ |
106 | | int64_t start; /* where the gzip data started, for rewinding */ |
107 | | int64_t raw; /* where the raw data started, for seeking */ |
108 | | compression_t compression; /* type of compression, if any */ |
109 | | compression_t last_compression; /* last known compression type */ |
110 | | bool is_compressed; /* false if completely uncompressed, true otherwise */ |
111 | | |
112 | | /* seek request */ |
113 | | int64_t skip; /* amount to skip (already rewound if backwards) */ |
114 | | bool seek_pending; /* true if seek request pending */ |
115 | | |
116 | | /* error information */ |
117 | | int err; /* error code */ |
118 | | const char *err_info; /* additional error information string for some errors */ |
119 | | |
120 | | /* |
121 | | * Decompression stream information. |
122 | | * |
123 | | * XXX - should this be a union? |
124 | | */ |
125 | | #ifdef USE_ZLIB_OR_ZLIBNG |
126 | | /* zlib inflate stream */ |
127 | | zlib_stream strm; /* stream structure in-place (not a pointer) */ |
128 | | bool dont_check_crc; /* true if we aren't supposed to check the CRC */ |
129 | | #endif /* USE_ZLIB_OR_ZLIBNG */ |
130 | | #ifdef HAVE_ZSTD |
131 | | ZSTD_DCtx *zstd_dctx; |
132 | | #endif /* HAVE_ZSTD */ |
133 | | #ifdef HAVE_LZ4FRAME_H |
134 | | LZ4F_dctx *lz4_dctx; |
135 | | LZ4F_frameInfo_t lz4_info; |
136 | | unsigned char lz4_hdr[LZ4F_HEADER_SIZE_MAX]; |
137 | | #endif /* HAVE_LZ4FRAME_H */ |
138 | | |
139 | | /* fast seeking */ |
140 | | GPtrArray *fast_seek; |
141 | | void *fast_seek_cur; |
142 | | }; |
143 | | |
144 | | /* Current read offset within a buffer. */ |
145 | | static unsigned |
146 | | offset_in_buffer(struct wtap_reader_buf *buf) |
147 | 0 | { |
148 | | /* buf->next points to the next byte to read, and buf->buf points |
149 | | to the first byte in the buffer, so the difference between them |
150 | | is the offset. |
151 | | |
152 | | This will fit in an unsigned int, because it can't be bigger |
153 | | than the size of the buffer, which is an unsigned int. */ |
154 | 0 | return (unsigned)(buf->next - buf->buf); |
155 | 0 | } |
156 | | |
157 | | /* Number of bytes of data that are in a buffer. */ |
158 | | static unsigned |
159 | | bytes_in_buffer(struct wtap_reader_buf *buf) |
160 | 0 | { |
161 | | /* buf->next + buf->avail points just past the last byte of data in |
162 | | the buffer. |
163 | | Thus, (buf->next + buf->avail) - buf->buf is the number of bytes |
164 | | of data in the buffer. |
165 | | |
166 | | This will fit in an unsigned, because it can't be bigger |
167 | | than the size of the buffer, which is a unsigned. */ |
168 | 0 | return (unsigned)((buf->next + buf->avail) - buf->buf); |
169 | 0 | } |
170 | | |
171 | | /* Reset a buffer, discarding all data in the buffer, so we read into |
172 | | it starting at the beginning. */ |
173 | | static void |
174 | | buf_reset(struct wtap_reader_buf *buf) |
175 | 0 | { |
176 | 0 | buf->next = buf->buf; |
177 | 0 | buf->avail = 0; |
178 | 0 | } |
179 | | |
180 | | static int |
181 | | buf_read(FILE_T state, struct wtap_reader_buf *buf) |
182 | 0 | { |
183 | 0 | unsigned space_left, to_read; |
184 | 0 | unsigned char *read_ptr; |
185 | 0 | ssize_t ret; |
186 | | |
187 | | /* How much space is left at the end of the buffer? |
188 | | XXX - the output buffer actually has state->size * 2 bytes. */ |
189 | 0 | space_left = state->size - bytes_in_buffer(buf); |
190 | 0 | if (space_left == 0) { |
191 | | /* There's no space left, so we start fresh at the beginning |
192 | | of the buffer. */ |
193 | 0 | buf_reset(buf); |
194 | |
|
195 | 0 | read_ptr = buf->buf; |
196 | 0 | to_read = state->size; |
197 | 0 | } else { |
198 | | /* There's some space left; try to read as much data as we |
199 | | can into that space. We may get less than that if we're |
200 | | reading from a pipe or if we're near the end of the file. */ |
201 | 0 | read_ptr = buf->next + buf->avail; |
202 | 0 | to_read = space_left; |
203 | 0 | } |
204 | |
|
205 | 0 | ret = ws_read(state->fd, read_ptr, to_read); |
206 | 0 | if (ret < 0) { |
207 | 0 | state->err = errno; |
208 | 0 | state->err_info = NULL; |
209 | 0 | return -1; |
210 | 0 | } |
211 | 0 | if (ret == 0) |
212 | 0 | state->eof = true; |
213 | 0 | state->raw_pos += ret; |
214 | 0 | buf->avail += (unsigned)ret; |
215 | 0 | return 0; |
216 | 0 | } |
217 | | |
218 | | static int /* gz_avail */ |
219 | | fill_in_buffer(FILE_T state) |
220 | 0 | { |
221 | 0 | if (state->err != 0) |
222 | 0 | return -1; |
223 | 0 | if (!state->eof) { |
224 | 0 | if (buf_read(state, &state->in) < 0) |
225 | 0 | return -1; |
226 | 0 | } |
227 | 0 | return 0; |
228 | 0 | } |
229 | | |
230 | 0 | #define ZLIB_WINSIZE 32768 |
231 | | #define LZ4_WINSIZE 65536 |
232 | | |
233 | | struct fast_seek_point { |
234 | | int64_t out; /* corresponding offset in uncompressed data */ |
235 | | int64_t in; /* offset in input file of first full byte */ |
236 | | |
237 | | compression_t compression; |
238 | | union { |
239 | | struct { |
240 | | #ifdef HAVE_INFLATEPRIME |
241 | | int bits; /* number of bits (1-7) from byte at in - 1, or 0 */ |
242 | | #endif /* HAVE_INFLATEPRIME */ |
243 | | unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */ |
244 | | |
245 | | /* be gentle with Z_STREAM_END, 8 bytes more... Another solution would be to comment checks out */ |
246 | | uint32_t adler; |
247 | | uint32_t total_out; |
248 | | } zlib; |
249 | | #ifdef HAVE_LZ4FRAME_H |
250 | | struct { |
251 | | LZ4F_frameInfo_t lz4_info; |
252 | | unsigned char lz4_hdr[LZ4F_HEADER_SIZE_MAX]; |
253 | | unsigned char window[LZ4_WINSIZE]; /* preceding 64K of uncompressed data */ |
254 | | } lz4; |
255 | | #endif |
256 | | } data; |
257 | | }; |
258 | | |
259 | | struct zlib_cur_seek_point { |
260 | | unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */ |
261 | | unsigned int pos; |
262 | | unsigned int have; |
263 | | }; |
264 | | |
265 | | struct lz4_cur_seek_point { |
266 | | unsigned char window[LZ4_WINSIZE]; /* preceding 64K of uncompressed data */ |
267 | | unsigned pos; /* start position in circular buffer */ |
268 | | unsigned have; |
269 | | }; |
270 | | |
271 | 0 | #define SPAN INT64_C(1048576) |
272 | | static struct fast_seek_point * |
273 | | fast_seek_find(FILE_T file, int64_t pos) |
274 | 0 | { |
275 | 0 | struct fast_seek_point *smallest = NULL; |
276 | 0 | struct fast_seek_point *item; |
277 | 0 | unsigned low, i, max; |
278 | |
|
279 | 0 | if (!file->fast_seek) |
280 | 0 | return NULL; |
281 | | |
282 | 0 | for (low = 0, max = file->fast_seek->len; low < max; ) { |
283 | 0 | i = (low + max) / 2; |
284 | 0 | item = (struct fast_seek_point *)file->fast_seek->pdata[i]; |
285 | |
|
286 | 0 | if (pos < item->out) |
287 | 0 | max = i; |
288 | 0 | else if (pos > item->out) { |
289 | 0 | smallest = item; |
290 | 0 | low = i + 1; |
291 | 0 | } else { |
292 | 0 | return item; |
293 | 0 | } |
294 | 0 | } |
295 | 0 | return smallest; |
296 | 0 | } |
297 | | |
298 | | static void |
299 | | fast_seek_header(FILE_T file, int64_t in_pos, int64_t out_pos, |
300 | | compression_t compression) |
301 | 0 | { |
302 | 0 | struct fast_seek_point *item = NULL; |
303 | |
|
304 | 0 | if (!file->fast_seek) { |
305 | 0 | return; |
306 | 0 | } |
307 | | |
308 | 0 | if (file->fast_seek->len != 0) |
309 | 0 | item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1]; |
310 | | |
311 | | /* fast_seek_header always adds a fast seek point, even if less than |
312 | | * SPAN from the last one. That is because it used for new streams |
313 | | * (including concatenated streams) where the compression type |
314 | | * or, for LZ4, compression options, may change. |
315 | | */ |
316 | 0 | if (!item || item->out < out_pos) { |
317 | 0 | struct fast_seek_point *val = g_new(struct fast_seek_point,1); |
318 | 0 | val->in = in_pos; |
319 | 0 | val->out = out_pos; |
320 | 0 | val->compression = compression; |
321 | |
|
322 | | #ifdef HAVE_LZ4FRAME_H |
323 | | if (compression == LZ4) { |
324 | | val->data.lz4.lz4_info = file->lz4_info; |
325 | | memcpy(val->data.lz4.lz4_hdr, file->lz4_hdr, LZ4F_HEADER_SIZE_MAX); |
326 | | } |
327 | | #endif /* HAVE_LZ4FRAME_H */ |
328 | 0 | g_ptr_array_add(file->fast_seek, val); |
329 | 0 | } |
330 | 0 | } |
331 | | |
332 | | static void |
333 | | fast_seek_reset(FILE_T state) |
334 | 0 | { |
335 | 0 | switch (state->compression) { |
336 | | |
337 | 0 | case UNKNOWN: |
338 | 0 | break; |
339 | | |
340 | 0 | case UNCOMPRESSED: |
341 | | /* Nothing to do */ |
342 | 0 | break; |
343 | | |
344 | 0 | case ZLIB: |
345 | 0 | #ifdef USE_ZLIB_OR_ZLIBNG |
346 | 0 | if (state->fast_seek_cur != NULL) { |
347 | 0 | struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur; |
348 | |
|
349 | 0 | cur->have = 0; |
350 | 0 | } |
351 | | #else |
352 | | /* This "cannot happen" */ |
353 | | ws_assert_not_reached(); |
354 | | #endif /* USE_ZLIB_OR_ZLIBNG */ |
355 | 0 | break; |
356 | | |
357 | 0 | case GZIP_AFTER_HEADER: |
358 | 0 | break; |
359 | | |
360 | 0 | case ZSTD: |
361 | | #ifdef HAVE_ZSTD |
362 | | /* Anything to do? */ |
363 | | #else |
364 | | /* This "cannot happen" */ |
365 | 0 | ws_assert_not_reached(); |
366 | 0 | #endif /* HAVE_ZSTD */ |
367 | 0 | break; |
368 | | |
369 | 0 | case LZ4: |
370 | 0 | case LZ4_AFTER_HEADER: |
371 | | #ifdef HAVE_LZ4 |
372 | | /* Anything to do? */ |
373 | | #else |
374 | | /* This "cannot happen" */ |
375 | 0 | ws_assert_not_reached(); |
376 | 0 | #endif /* HAVE_LZ4 */ |
377 | 0 | break; |
378 | | |
379 | | /* Add other compression types here */ |
380 | | |
381 | 0 | default: |
382 | | /* This "cannot happen" */ |
383 | 0 | ws_assert_not_reached(); |
384 | 0 | break; |
385 | 0 | } |
386 | 0 | } |
387 | | |
388 | | static bool |
389 | | uncompressed_fill_out_buffer(FILE_T state) |
390 | 0 | { |
391 | 0 | if (buf_read(state, &state->out) < 0) |
392 | 0 | return false; |
393 | 0 | return true; |
394 | 0 | } |
395 | | |
396 | | /* Get next byte from input, or -1 if end or error. |
397 | | * |
398 | | * Note: |
399 | | * |
400 | | * 1) errors from buf_read(), and thus from fill_in_buffer(), are |
401 | | * "sticky", and fill_in_buffer() won't do any reading if there's |
402 | | * an error; |
403 | | * |
404 | | * 2) GZ_GETC() returns -1 on an EOF; |
405 | | * |
406 | | * so it's safe to make multiple GZ_GETC() calls and only check the |
407 | | * last one for an error. */ |
408 | 0 | #define GZ_GETC() ((state->in.avail == 0 && fill_in_buffer(state) == -1) ? -1 : \ |
409 | 0 | (state->in.avail == 0 ? -1 : \ |
410 | 0 | (state->in.avail--, *(state->in.next)++))) |
411 | | |
412 | | |
413 | | /* |
414 | | * Gzipped files, using compression from zlib or zlib-ng. |
415 | | * |
416 | | * https://tools.ietf.org/html/rfc1952 (RFC 1952) |
417 | | */ |
418 | | #ifdef USE_ZLIB_OR_ZLIBNG |
419 | | |
420 | | /* Get a one-byte integer and return 0 on success and the value in *ret. |
421 | | Otherwise -1 is returned, state->err is set, and *ret is not modified. */ |
422 | | static int |
423 | | gz_next1(FILE_T state, uint8_t *ret) |
424 | 0 | { |
425 | 0 | int ch; |
426 | |
|
427 | 0 | ch = GZ_GETC(); |
428 | 0 | if (ch == -1) { |
429 | 0 | if (state->err == 0) { |
430 | | /* EOF */ |
431 | 0 | state->err = WTAP_ERR_SHORT_READ; |
432 | 0 | state->err_info = NULL; |
433 | 0 | } |
434 | 0 | return -1; |
435 | 0 | } |
436 | 0 | *ret = ch; |
437 | 0 | return 0; |
438 | 0 | } |
439 | | |
440 | | /* Get a two-byte little-endian integer and return 0 on success and the value |
441 | | in *ret. Otherwise -1 is returned, state->err is set, and *ret is not |
442 | | modified. */ |
443 | | static int |
444 | | gz_next2(FILE_T state, uint16_t *ret) |
445 | 0 | { |
446 | 0 | uint16_t val; |
447 | 0 | int ch; |
448 | |
|
449 | 0 | val = GZ_GETC(); |
450 | 0 | ch = GZ_GETC(); |
451 | 0 | if (ch == -1) { |
452 | 0 | if (state->err == 0) { |
453 | | /* EOF */ |
454 | 0 | state->err = WTAP_ERR_SHORT_READ; |
455 | 0 | state->err_info = NULL; |
456 | 0 | } |
457 | 0 | return -1; |
458 | 0 | } |
459 | 0 | val += (uint16_t)ch << 8; |
460 | 0 | *ret = val; |
461 | 0 | return 0; |
462 | 0 | } |
463 | | |
464 | | /* Get a four-byte little-endian integer and return 0 on success and the value |
465 | | in *ret. Otherwise -1 is returned, state->err is set, and *ret is not |
466 | | modified. */ |
467 | | static int |
468 | | gz_next4(FILE_T state, uint32_t *ret) |
469 | 0 | { |
470 | 0 | uint32_t val; |
471 | 0 | int ch; |
472 | |
|
473 | 0 | val = GZ_GETC(); |
474 | 0 | val += (unsigned)GZ_GETC() << 8; |
475 | 0 | val += (uint32_t)GZ_GETC() << 16; |
476 | 0 | ch = GZ_GETC(); |
477 | 0 | if (ch == -1) { |
478 | 0 | if (state->err == 0) { |
479 | | /* EOF */ |
480 | 0 | state->err = WTAP_ERR_SHORT_READ; |
481 | 0 | state->err_info = NULL; |
482 | 0 | } |
483 | 0 | return -1; |
484 | 0 | } |
485 | 0 | val += (uint32_t)ch << 24; |
486 | 0 | *ret = val; |
487 | 0 | return 0; |
488 | 0 | } |
489 | | |
490 | | /* Skip the specified number of bytes and return 0 on success. Otherwise -1 |
491 | | is returned. */ |
492 | | static int |
493 | | gz_skipn(FILE_T state, size_t n) |
494 | 0 | { |
495 | 0 | while (n != 0) { |
496 | 0 | if (GZ_GETC() == -1) { |
497 | 0 | if (state->err == 0) { |
498 | | /* EOF */ |
499 | 0 | state->err = WTAP_ERR_SHORT_READ; |
500 | 0 | state->err_info = NULL; |
501 | 0 | } |
502 | 0 | return -1; |
503 | 0 | } |
504 | 0 | n--; |
505 | 0 | } |
506 | 0 | return 0; |
507 | 0 | } |
508 | | |
509 | | /* Skip a null-terminated string and return 0 on success. Otherwise -1 |
510 | | is returned. */ |
511 | | static int |
512 | | gz_skipzstr(FILE_T state) |
513 | 0 | { |
514 | 0 | int ch; |
515 | | |
516 | | /* It's null-terminated, so scan until we read a byte with |
517 | | the value 0 or get an error. */ |
518 | 0 | while ((ch = GZ_GETC()) > 0) |
519 | 0 | ; |
520 | 0 | if (ch == -1) { |
521 | 0 | if (state->err == 0) { |
522 | | /* EOF */ |
523 | 0 | state->err = WTAP_ERR_SHORT_READ; |
524 | 0 | state->err_info = NULL; |
525 | 0 | } |
526 | 0 | return -1; |
527 | 0 | } |
528 | 0 | return 0; |
529 | 0 | } |
530 | | |
531 | | static void |
532 | | zlib_fast_seek_add(FILE_T file, struct zlib_cur_seek_point *point, int bits, int64_t in_pos, int64_t out_pos) |
533 | 0 | { |
534 | | /* it's for sure after gzip header, so file->fast_seek->len != 0 */ |
535 | 0 | struct fast_seek_point *item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1]; |
536 | |
|
537 | | #ifndef HAVE_INFLATEPRIME |
538 | | if (bits) |
539 | | return; |
540 | | #endif /* HAVE_INFLATEPRIME */ |
541 | | |
542 | | /* Glib has got Balanced Binary Trees (GTree) but I couldn't find a way to do quick search for nearest (and smaller) value to seek (It's what fast_seek_find() do) |
543 | | * Inserting value in middle of sorted array is expensive, so we want to add only in the end. |
544 | | * It's not big deal, cause first-read don't usually invoke seeking |
545 | | */ |
546 | 0 | if (item->out + SPAN < out_pos) { |
547 | 0 | struct fast_seek_point *val = g_new(struct fast_seek_point,1); |
548 | 0 | val->in = in_pos; |
549 | 0 | val->out = out_pos; |
550 | 0 | val->compression = ZLIB; |
551 | 0 | #ifdef HAVE_INFLATEPRIME |
552 | 0 | val->data.zlib.bits = bits; |
553 | 0 | #endif /* HAVE_INFLATEPRIME */ |
554 | 0 | if (point->pos != 0) { |
555 | 0 | unsigned int left = ZLIB_WINSIZE - point->pos; |
556 | |
|
557 | 0 | memcpy(val->data.zlib.window, point->window + point->pos, left); |
558 | 0 | memcpy(val->data.zlib.window + left, point->window, point->pos); |
559 | 0 | } else |
560 | 0 | memcpy(val->data.zlib.window, point->window, ZLIB_WINSIZE); |
561 | | |
562 | | /* |
563 | | * XXX - strm.adler is a uLong in at least some versions |
564 | | * of zlib, and uLong is an unsigned long in at least |
565 | | * some of those versions, which means it's 64-bit |
566 | | * on LP64 platforms, even though the checksum is |
567 | | * 32-bit. We assume the actual Adler checksum |
568 | | * is in the lower 32 bits of strm.adler; as the |
569 | | * checksum in the file is only 32 bits, we save only |
570 | | * those lower 32 bits, and cast away any additional |
571 | | * bits to squelch warnings. |
572 | | * |
573 | | * The same applies to strm.total_out. |
574 | | */ |
575 | 0 | val->data.zlib.adler = (uint32_t) file->strm.adler; |
576 | 0 | val->data.zlib.total_out = (uint32_t) file->strm.total_out; |
577 | 0 | g_ptr_array_add(file->fast_seek, val); |
578 | 0 | } |
579 | 0 | } |
580 | | |
581 | | /* |
582 | | * Based on what gz_decomp() in zlib does. |
583 | | */ |
584 | | static void |
585 | | zlib_fill_out_buffer(FILE_T state) |
586 | 0 | { |
587 | 0 | int ret = 0; /* XXX */ |
588 | 0 | uint32_t crc, len; |
589 | 0 | zlib_streamp strm = &(state->strm); |
590 | 0 | unsigned char *buf = state->out.buf; |
591 | 0 | unsigned int count = state->size << 1; |
592 | |
|
593 | 0 | unsigned char *buf2 = buf; |
594 | 0 | unsigned int count2 = count; |
595 | |
|
596 | 0 | strm->avail_out = count; |
597 | 0 | strm->next_out = buf; |
598 | | |
599 | | /* fill output buffer up to end of deflate stream or error */ |
600 | 0 | do { |
601 | | /* get more input for inflate() */ |
602 | 0 | if (state->in.avail == 0 && fill_in_buffer(state) == -1) |
603 | 0 | break; |
604 | 0 | if (state->in.avail == 0) { |
605 | | /* EOF */ |
606 | 0 | state->err = WTAP_ERR_SHORT_READ; |
607 | 0 | state->err_info = NULL; |
608 | 0 | break; |
609 | 0 | } |
610 | | |
611 | 0 | strm->avail_in = state->in.avail; |
612 | 0 | strm->next_in = state->in.next; |
613 | | /* decompress and handle errors */ |
614 | 0 | #ifdef Z_BLOCK |
615 | 0 | ret = ZLIB_PREFIX(inflate)(strm, Z_BLOCK); |
616 | | #else /* Z_BLOCK */ |
617 | | ret = ZLIB_PREFIX(inflate)(strm, Z_NO_FLUSH); |
618 | | #endif /* Z_BLOCK */ |
619 | 0 | state->in.avail = strm->avail_in; |
620 | 0 | #ifdef z_const |
621 | 0 | DIAG_OFF(cast-qual) |
622 | 0 | state->in.next = (unsigned char *)strm->next_in; |
623 | 0 | DIAG_ON(cast-qual) |
624 | | #else /* z_const */ |
625 | | state->in.next = strm->next_in; |
626 | | #endif /* z_const */ |
627 | 0 | if (ret == Z_STREAM_ERROR) { |
628 | 0 | state->err = WTAP_ERR_DECOMPRESS; |
629 | 0 | state->err_info = strm->msg; |
630 | 0 | break; |
631 | 0 | } |
632 | 0 | if (ret == Z_NEED_DICT) { |
633 | 0 | state->err = WTAP_ERR_DECOMPRESS; |
634 | 0 | state->err_info = "preset dictionary needed"; |
635 | 0 | break; |
636 | 0 | } |
637 | 0 | if (ret == Z_MEM_ERROR) { |
638 | | /* This means "not enough memory". */ |
639 | 0 | state->err = ENOMEM; |
640 | 0 | state->err_info = NULL; |
641 | 0 | break; |
642 | 0 | } |
643 | 0 | if (ret == Z_DATA_ERROR) { /* deflate stream invalid */ |
644 | 0 | state->err = WTAP_ERR_DECOMPRESS; |
645 | 0 | state->err_info = strm->msg; |
646 | 0 | break; |
647 | 0 | } |
648 | | /* |
649 | | * XXX - Z_BUF_ERROR? |
650 | | */ |
651 | | |
652 | 0 | strm->adler = ZLIB_PREFIX(crc32)(strm->adler, buf2, count2 - strm->avail_out); |
653 | 0 | #ifdef Z_BLOCK |
654 | 0 | if (state->fast_seek_cur != NULL) { |
655 | 0 | struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur; |
656 | 0 | unsigned int ready = count2 - strm->avail_out; |
657 | |
|
658 | 0 | if (ready < ZLIB_WINSIZE) { |
659 | 0 | unsigned left = ZLIB_WINSIZE - cur->pos; |
660 | |
|
661 | 0 | if (ready >= left) { |
662 | 0 | memcpy(cur->window + cur->pos, buf2, left); |
663 | 0 | if (ready != left) |
664 | 0 | memcpy(cur->window, buf2 + left, ready - left); |
665 | |
|
666 | 0 | cur->pos = ready - left; |
667 | 0 | cur->have += ready; |
668 | 0 | } else { |
669 | 0 | memcpy(cur->window + cur->pos, buf2, ready); |
670 | 0 | cur->pos += ready; |
671 | 0 | cur->have += ready; |
672 | 0 | } |
673 | |
|
674 | 0 | if (cur->have >= ZLIB_WINSIZE) |
675 | 0 | cur->have = ZLIB_WINSIZE; |
676 | |
|
677 | 0 | } else { |
678 | 0 | memcpy(cur->window, buf2 + (ready - ZLIB_WINSIZE), ZLIB_WINSIZE); |
679 | 0 | cur->pos = 0; |
680 | 0 | cur->have = ZLIB_WINSIZE; |
681 | 0 | } |
682 | |
|
683 | 0 | if (cur->have >= ZLIB_WINSIZE && ret != Z_STREAM_END && (strm->data_type & 128) && !(strm->data_type & 64)) |
684 | 0 | zlib_fast_seek_add(state, cur, (strm->data_type & 7), state->raw_pos - strm->avail_in, state->pos + (count - strm->avail_out)); |
685 | 0 | } |
686 | 0 | #endif /* Z_BLOCK */ |
687 | 0 | buf2 = (buf2 + count2 - strm->avail_out); |
688 | 0 | count2 = strm->avail_out; |
689 | |
|
690 | 0 | } while (strm->avail_out && ret != Z_STREAM_END); |
691 | | |
692 | | /* update available output and crc check value */ |
693 | 0 | state->out.next = buf; |
694 | 0 | state->out.avail = count - strm->avail_out; |
695 | | |
696 | | /* Check gzip trailer if at end of deflate stream. |
697 | | We don't fail immediately here, we just set an error |
698 | | indication, so that we try to process what data we |
699 | | got before the error. The next attempt to read |
700 | | something past that data will get the error. */ |
701 | 0 | if (ret == Z_STREAM_END) { |
702 | 0 | if (gz_next4(state, &crc) != -1 && |
703 | 0 | gz_next4(state, &len) != -1) { |
704 | 0 | if (crc != strm->adler && !state->dont_check_crc) { |
705 | 0 | state->err = WTAP_ERR_DECOMPRESS; |
706 | 0 | state->err_info = "bad CRC"; |
707 | 0 | } else if (len != (strm->total_out & 0xffffffffUL)) { |
708 | 0 | state->err = WTAP_ERR_DECOMPRESS; |
709 | 0 | state->err_info = "length field wrong"; |
710 | 0 | } |
711 | 0 | } |
712 | 0 | state->last_compression = state->compression; |
713 | 0 | state->compression = UNKNOWN; /* ready for next stream, once have is 0 */ |
714 | 0 | g_free(state->fast_seek_cur); |
715 | 0 | state->fast_seek_cur = NULL; |
716 | 0 | } |
717 | 0 | } |
718 | | #endif /* USE_ZLIB_OR_ZLIBNG */ |
719 | | |
720 | | /* |
721 | | * Check for a gzip header. |
722 | | * |
723 | | * Based on the gzip-specific stuff gz_head() from zlib does. |
724 | | */ |
725 | | static int |
726 | | check_for_zlib_compression(FILE_T state) |
727 | 0 | { |
728 | | /* |
729 | | * Look for the gzip header. The first two bytes are 31 and 139, |
730 | | * and if we find it, return success if we support gzip and an |
731 | | * error if we don't. |
732 | | */ |
733 | 0 | if (state->in.next[0] == 31) { |
734 | 0 | state->in.avail--; |
735 | 0 | state->in.next++; |
736 | | |
737 | | /* Make sure the byte after the first byte is present */ |
738 | 0 | if (state->in.avail == 0 && fill_in_buffer(state) == -1) { |
739 | | /* Read error. */ |
740 | 0 | return -1; |
741 | 0 | } |
742 | 0 | if (state->in.avail != 0) { |
743 | 0 | if (state->in.next[0] == 139) { |
744 | | /* |
745 | | * We have what looks like the ID1 and ID2 bytes of a gzip |
746 | | * header. |
747 | | * Continue processing the file. |
748 | | * |
749 | | * XXX - some capture file formats (I'M LOOKING AT YOU, |
750 | | * ENDACE!) can have 31 in the first byte of the file |
751 | | * and 139 in the second byte of the file. For now, in |
752 | | * those cases, you lose. |
753 | | */ |
754 | 0 | #ifdef USE_ZLIB_OR_ZLIBNG |
755 | 0 | uint8_t cm; |
756 | 0 | uint8_t flags; |
757 | 0 | uint16_t len; |
758 | 0 | uint16_t hcrc; |
759 | |
|
760 | 0 | state->in.avail--; |
761 | 0 | state->in.next++; |
762 | | |
763 | | /* read rest of header */ |
764 | | |
765 | | /* compression method (CM) */ |
766 | 0 | if (gz_next1(state, &cm) == -1) |
767 | 0 | return -1; |
768 | 0 | if (cm != 8) { |
769 | 0 | state->err = WTAP_ERR_DECOMPRESS; |
770 | 0 | state->err_info = "unknown compression method"; |
771 | 0 | return -1; |
772 | 0 | } |
773 | | |
774 | | /* flags (FLG) */ |
775 | 0 | if (gz_next1(state, &flags) == -1) { |
776 | | /* Read error. */ |
777 | 0 | return -1; |
778 | 0 | } |
779 | 0 | if (flags & 0xe0) { /* reserved flag bits */ |
780 | 0 | state->err = WTAP_ERR_DECOMPRESS; |
781 | 0 | state->err_info = "reserved flag bits set"; |
782 | 0 | return -1; |
783 | 0 | } |
784 | | |
785 | | /* modification time (MTIME) */ |
786 | 0 | if (gz_skipn(state, 4) == -1) { |
787 | | /* Read error. */ |
788 | 0 | return -1; |
789 | 0 | } |
790 | | |
791 | | /* extra flags (XFL) */ |
792 | 0 | if (gz_skipn(state, 1) == -1) { |
793 | | /* Read error. */ |
794 | 0 | return -1; |
795 | 0 | } |
796 | | |
797 | | /* operating system (OS) */ |
798 | 0 | if (gz_skipn(state, 1) == -1) { |
799 | | /* Read error. */ |
800 | 0 | return -1; |
801 | 0 | } |
802 | | |
803 | 0 | if (flags & 4) { |
804 | | /* extra field - get XLEN */ |
805 | 0 | if (gz_next2(state, &len) == -1) { |
806 | | /* Read error. */ |
807 | 0 | return -1; |
808 | 0 | } |
809 | | |
810 | | /* skip the extra field */ |
811 | 0 | if (gz_skipn(state, len) == -1) { |
812 | | /* Read error. */ |
813 | 0 | return -1; |
814 | 0 | } |
815 | 0 | } |
816 | 0 | if (flags & 8) { |
817 | | /* file name */ |
818 | 0 | if (gz_skipzstr(state) == -1) { |
819 | | /* Read error. */ |
820 | 0 | return -1; |
821 | 0 | } |
822 | 0 | } |
823 | 0 | if (flags & 16) { |
824 | | /* comment */ |
825 | 0 | if (gz_skipzstr(state) == -1) { |
826 | | /* Read error. */ |
827 | 0 | return -1; |
828 | 0 | } |
829 | 0 | } |
830 | 0 | if (flags & 2) { |
831 | | /* header crc */ |
832 | 0 | if (gz_next2(state, &hcrc) == -1) { |
833 | | /* Read error. */ |
834 | 0 | return -1; |
835 | 0 | } |
836 | | /* XXX - check the CRC? */ |
837 | 0 | } |
838 | | |
839 | | /* set up for decompression */ |
840 | 0 | ZLIB_PREFIX(inflateReset)(&(state->strm)); |
841 | 0 | state->strm.adler = ZLIB_PREFIX(crc32)(0L, Z_NULL, 0); |
842 | 0 | state->compression = ZLIB; |
843 | 0 | state->is_compressed = true; |
844 | 0 | #ifdef Z_BLOCK |
845 | 0 | if (state->fast_seek) { |
846 | 0 | struct zlib_cur_seek_point *cur = g_new(struct zlib_cur_seek_point,1); |
847 | |
|
848 | 0 | cur->pos = cur->have = 0; |
849 | 0 | g_free(state->fast_seek_cur); |
850 | 0 | state->fast_seek_cur = cur; |
851 | 0 | fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, GZIP_AFTER_HEADER); |
852 | 0 | } |
853 | 0 | #endif /* Z_BLOCK */ |
854 | 0 | return 1; |
855 | | #else /* USE_ZLIB_OR_ZLIBNG */ |
856 | | state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED; |
857 | | state->err_info = "reading gzip-compressed files isn't supported"; |
858 | | return -1; |
859 | | #endif /* USE_ZLIB_OR_ZLIBNG */ |
860 | 0 | } |
861 | | |
862 | | /* |
863 | | * Not a gzip file. "Unget" the first character; either: |
864 | | * |
865 | | * 1) we read both of the first two bytes into the |
866 | | * buffer with the first ws_read, so we can just back |
867 | | * up by one byte; |
868 | | * |
869 | | * 2) we only read the first byte into the buffer with |
870 | | * the first ws_read (e.g., because we're reading from |
871 | | * a pipe and only the first byte had been written to |
872 | | * the pipe at that point), and read the second byte |
873 | | * into the buffer after the first byte in the |
874 | | * fill_in_buffer call, so we now have two bytes in |
875 | | * the buffer, and can just back up by one byte. |
876 | | */ |
877 | 0 | state->in.avail++; |
878 | 0 | state->in.next--; |
879 | 0 | } |
880 | 0 | } |
881 | 0 | return 0; |
882 | 0 | } |
883 | | |
884 | | |
885 | | /* |
886 | | * Zstandard compression. |
887 | | * |
888 | | * https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md |
889 | | */ |
890 | | #ifdef HAVE_ZSTD |
891 | | static bool |
892 | | zstd_fill_out_buffer(FILE_T state) |
893 | | { |
894 | | ws_assert(state->out.avail == 0); |
895 | | |
896 | | if (state->in.avail == 0 && fill_in_buffer(state) == -1) |
897 | | return false; |
898 | | |
899 | | ZSTD_outBuffer output = {state->out.buf, state->size << 1, 0}; |
900 | | ZSTD_inBuffer input = {state->in.next, state->in.avail, 0}; |
901 | | const size_t ret = ZSTD_decompressStream(state->zstd_dctx, &output, &input); |
902 | | if (ZSTD_isError(ret)) { |
903 | | state->err = WTAP_ERR_DECOMPRESS; |
904 | | state->err_info = ZSTD_getErrorName(ret); |
905 | | return false; |
906 | | } |
907 | | |
908 | | state->in.next = state->in.next + input.pos; |
909 | | state->in.avail -= (unsigned)input.pos; |
910 | | |
911 | | state->out.next = output.dst; |
912 | | state->out.avail = (unsigned)output.pos; |
913 | | |
914 | | if (ret == 0) { |
915 | | state->last_compression = state->compression; |
916 | | state->compression = UNKNOWN; |
917 | | } |
918 | | return true; |
919 | | } |
920 | | #endif /* HAVE_ZSTD */ |
921 | | |
922 | | /* |
923 | | * Check for a Zstandard header. |
924 | | */ |
925 | | static int |
926 | | check_for_zstd_compression(FILE_T state) |
927 | 0 | { |
928 | | /* |
929 | | * Look for the Zstandard header, and, if we find it, return |
930 | | * success if we support Zstandard and an error if we don't. |
931 | | */ |
932 | 0 | if (state->in.avail >= 4 |
933 | 0 | && state->in.next[0] == 0x28 && state->in.next[1] == 0xb5 |
934 | 0 | && state->in.next[2] == 0x2f && state->in.next[3] == 0xfd) { |
935 | | #ifdef HAVE_ZSTD |
936 | | const size_t ret = ZSTD_initDStream(state->zstd_dctx); |
937 | | if (ZSTD_isError(ret)) { |
938 | | state->err = WTAP_ERR_DECOMPRESS; |
939 | | state->err_info = ZSTD_getErrorName(ret); |
940 | | return -1; |
941 | | } |
942 | | |
943 | | fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, ZSTD); |
944 | | state->compression = ZSTD; |
945 | | state->is_compressed = true; |
946 | | return 1; |
947 | | #else /* HAVE_ZSTD */ |
948 | 0 | state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED; |
949 | 0 | state->err_info = "reading zstd-compressed files isn't supported"; |
950 | 0 | return -1; |
951 | 0 | #endif /* HAVE_ZSTD */ |
952 | 0 | } |
953 | 0 | return 0; |
954 | 0 | } |
955 | | |
956 | | /* |
957 | | * lz4 compression. |
958 | | * |
959 | | * https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md |
960 | | */ |
961 | | #ifdef HAVE_LZ4FRAME_H |
962 | | static void |
963 | | lz4_fast_seek_add(FILE_T file, struct lz4_cur_seek_point *point, int64_t in_pos, int64_t out_pos) |
964 | | { |
965 | | if (!file->fast_seek) { |
966 | | return; |
967 | | } |
968 | | |
969 | | struct fast_seek_point *item = NULL; |
970 | | |
971 | | if (file->fast_seek->len != 0) |
972 | | item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1]; |
973 | | |
974 | | /* As of Glib 2.68 GTree has g_tree_upper_bound, or we could use a |
975 | | * wmem_tree. However, since our initial read is usually sequential |
976 | | * only adding seek points at the end of the ptr array is fast and fine. |
977 | | */ |
978 | | |
979 | | /* don't bother adding jump points between very small blocks (min SPAN) */ |
980 | | if (!item || item->out + SPAN < out_pos) { |
981 | | struct fast_seek_point *val = g_new(struct fast_seek_point,1); |
982 | | val->in = in_pos; |
983 | | val->out = out_pos; |
984 | | val->compression = LZ4_AFTER_HEADER; |
985 | | |
986 | | if (point != NULL) { |
987 | | if (point->pos != 0) { |
988 | | unsigned int left = LZ4_WINSIZE - point->pos; |
989 | | |
990 | | memcpy(val->data.lz4.window, point->window + point->pos, left); |
991 | | memcpy(val->data.lz4.window + left, point->window, point->pos); |
992 | | } else { |
993 | | memcpy(val->data.lz4.window, point->window, LZ4_WINSIZE); |
994 | | } |
995 | | } |
996 | | |
997 | | val->data.lz4.lz4_info = file->lz4_info; |
998 | | memcpy(val->data.lz4.lz4_hdr, file->lz4_hdr, LZ4F_HEADER_SIZE_MAX); |
999 | | g_ptr_array_add(file->fast_seek, val); |
1000 | | } |
1001 | | } |
1002 | | |
1003 | | static void |
1004 | | lz4_fill_out_buffer(FILE_T state) |
1005 | | { |
1006 | | ws_assert(state->out.avail == 0); |
1007 | | |
1008 | | /* |
1009 | | * This works similar to the Z_BLOCK flush type in zlib that stops after |
1010 | | * each block. LZ4F_getFrameInfo() returns the number of bytes expected |
1011 | | * to finish the current block, plus the header for the next block, when |
1012 | | * called when already in a frame and the compression context is set up. |
1013 | | * We pass in no more than that many bytes of input, and if we do stop |
1014 | | * on a block end, add a fast seek point (but *before* the header.) |
1015 | | */ |
1016 | | |
1017 | | unsigned count = state->size << 1; |
1018 | | unsigned char *buf2; |
1019 | | size_t outBufSize = 0; // Zero so we don't actually consume the block |
1020 | | size_t inBufSize; |
1021 | | |
1022 | | size_t compressedSize = 0; |
1023 | | size_t ret = SIZE_MAX; // 0 indicates end of frame, initialize to something else |
1024 | | |
1025 | | state->out.next = state->out.buf; |
1026 | | |
1027 | | do { |
1028 | | /* get more input for decompress() */ |
1029 | | if (state->in.avail == 0 && fill_in_buffer(state) == -1) |
1030 | | break; |
1031 | | if (state->eof) { |
1032 | | /* EOF */ |
1033 | | state->err = WTAP_ERR_SHORT_READ; |
1034 | | state->err_info = NULL; |
1035 | | break; |
1036 | | } |
1037 | | |
1038 | | inBufSize = state->in.avail; |
1039 | | compressedSize = LZ4F_getFrameInfo(state->lz4_dctx, &state->lz4_info, state->in.next, &inBufSize); |
1040 | | |
1041 | | // We only call this when we're in the middle of decoding a frame, not |
1042 | | // before the start of a frame, so this shouldn't consume any bytes. |
1043 | | ws_assert(inBufSize == 0); |
1044 | | |
1045 | | if (LZ4F_isError(compressedSize)) { |
1046 | | state->err = WTAP_ERR_DECOMPRESS; |
1047 | | state->err_info = LZ4F_getErrorName(compressedSize); |
1048 | | break; |
1049 | | } |
1050 | | |
1051 | | if (compressedSize > state->size) { |
1052 | | /* |
1053 | | * What is this? Either bogus, or some new variant of LZ4 Frames with |
1054 | | * a larger block size we don't support. We could have a buffer |
1055 | | * overrun if we try to process it. |
1056 | | * |
1057 | | * TODO - We could realloc here. |
1058 | | */ |
1059 | | state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED; |
1060 | | state->err_info = "lz4 compressed block size too large"; |
1061 | | break; |
1062 | | } |
1063 | | |
1064 | | /* Now, read that size */ |
1065 | | |
1066 | | outBufSize = count - state->out.avail; |
1067 | | inBufSize = MIN(state->in.avail, compressedSize); |
1068 | | |
1069 | | buf2 = state->out.buf + state->out.avail; |
1070 | | ret = LZ4F_decompress(state->lz4_dctx, buf2, &outBufSize, state->in.next, &inBufSize, NULL); |
1071 | | |
1072 | | if (LZ4F_isError(ret)) { |
1073 | | state->err = WTAP_ERR_DECOMPRESS; |
1074 | | state->err_info = LZ4F_getErrorName(ret); |
1075 | | break; |
1076 | | } |
1077 | | |
1078 | | state->in.next += (unsigned)inBufSize; |
1079 | | state->in.avail -= (unsigned)inBufSize; |
1080 | | compressedSize -= inBufSize; |
1081 | | |
1082 | | state->out.avail += (unsigned)outBufSize; |
1083 | | |
1084 | | if (state->fast_seek_cur != NULL) { |
1085 | | struct lz4_cur_seek_point *cur = (struct lz4_cur_seek_point *) state->fast_seek_cur; |
1086 | | switch (state->lz4_info.blockMode) { |
1087 | | |
1088 | | case LZ4F_blockIndependent: |
1089 | | /* We don't need the history, always create a fast seek point. */ |
1090 | | cur = NULL; |
1091 | | break; |
1092 | | |
1093 | | #if LZ4_VERSION_NUMBER >= 11000 |
1094 | | case LZ4F_blockLinked: |
1095 | | { |
1096 | | /* Save recent history to the current fast seek point. */ |
1097 | | unsigned int ready = (unsigned)outBufSize; |
1098 | | |
1099 | | /* Do we have a full dictionary's worth of decompressed |
1100 | | * history to copy? */ |
1101 | | if (ready < LZ4_WINSIZE) { |
1102 | | /* No. Can we fit it to the right of the current |
1103 | | * circular buffer position? |
1104 | | */ |
1105 | | unsigned left = LZ4_WINSIZE - cur->pos; |
1106 | | |
1107 | | if (ready <= left) { |
1108 | | /* Yes. Do so. */ |
1109 | | memcpy(cur->window + cur->pos, buf2, ready); |
1110 | | cur->pos += ready; |
1111 | | cur->have += ready; |
1112 | | } else { |
1113 | | /* No. Fill the circular buffer, then start over |
1114 | | * at the beginning. |
1115 | | */ |
1116 | | memcpy(cur->window + cur->pos, buf2, left); |
1117 | | memcpy(cur->window, buf2, ready - left); |
1118 | | cur->pos = ready - left; |
1119 | | cur->have += ready; |
1120 | | } |
1121 | | if (cur->have >= LZ4_WINSIZE) { |
1122 | | cur->have = LZ4_WINSIZE; |
1123 | | } |
1124 | | } else { |
1125 | | /* Yes. Just copy the last 64 KB. */ |
1126 | | memcpy(cur->window, buf2 + (ready - LZ4_WINSIZE), LZ4_WINSIZE); |
1127 | | cur->pos = 0; |
1128 | | cur->have = LZ4_WINSIZE; |
1129 | | } |
1130 | | break; |
1131 | | } |
1132 | | #endif /* LZ4_VERSION_NUMBER >= 11000 */ |
1133 | | |
1134 | | default: |
1135 | | /* Do nothing. Since cur will be non-NULL but have 0, |
1136 | | * we won't create a fast seek point below. |
1137 | | */ |
1138 | | break; |
1139 | | } |
1140 | | |
1141 | | if (compressedSize == 0 && ret > LZ4F_BLOCK_HEADER_SIZE) { |
1142 | | /* End of block plus the next block header. We want to add a fast |
1143 | | * seek point to the beginning of a block, before the header. We |
1144 | | * don't add a fast seek point after before the EndMark / footer, |
1145 | | * which has no data. This also has the effect of preventing us |
1146 | | * from calculating the frame Content Checksum after doing fast |
1147 | | * seeks and random access, which is good because the LZ4 Frame |
1148 | | * API also doesn't have a method to update the running checksum |
1149 | | * value. |
1150 | | */ |
1151 | | |
1152 | | if (cur == NULL || cur->have >= LZ4_WINSIZE) { |
1153 | | /* There's little point in adding a fast seek point with |
1154 | | * less than a full 64 KB of dictionary, as that's too |
1155 | | * close to the frame start to be useful. |
1156 | | */ |
1157 | | lz4_fast_seek_add(state, cur, state->raw_pos - state->in.avail - LZ4F_BLOCK_HEADER_SIZE, state->pos + state->out.avail); |
1158 | | } |
1159 | | } |
1160 | | } |
1161 | | |
1162 | | outBufSize = count - state->out.avail; |
1163 | | } while (ret != 0 && outBufSize); |
1164 | | |
1165 | | state->out.next = state->out.buf; |
1166 | | |
1167 | | if (ret == 0) { |
1168 | | /* End of Frame */ |
1169 | | state->last_compression = state->compression; |
1170 | | state->compression = UNKNOWN; |
1171 | | g_free(state->fast_seek_cur); |
1172 | | state->fast_seek_cur = NULL; |
1173 | | } |
1174 | | } |
1175 | | #endif /* HAVE_LZ4FRAME_H */ |
1176 | | |
1177 | | /* |
1178 | | * Check for an lz4 header. |
1179 | | */ |
1180 | | static int |
1181 | | check_for_lz4_compression(FILE_T state) |
1182 | 0 | { |
1183 | | /* |
1184 | | * Look for the lz4 header, and, if we find it, return success |
1185 | | * if we support lz4 and an error if we don't. |
1186 | | */ |
1187 | 0 | if (state->in.avail >= 4 |
1188 | 0 | && state->in.next[0] == 0x04 && state->in.next[1] == 0x22 |
1189 | 0 | && state->in.next[2] == 0x4d && state->in.next[3] == 0x18) { |
1190 | | #ifdef HAVE_LZ4FRAME_H |
1191 | | LZ4F_resetDecompressionContext(state->lz4_dctx); |
1192 | | size_t headerSize = LZ4F_HEADER_SIZE_MAX; |
1193 | | #if LZ4_VERSION_NUMBER >= 10903 |
1194 | | /* |
1195 | | * In 1.9.3+ we can handle a silly edge case of a tiny valid |
1196 | | * frame at the end of a file that is smaller than the maximum |
1197 | | * header size. (lz4frame.h added the function in 1.9.0, but |
1198 | | * only for the static library; it wasn't exported until 1.9.3) |
1199 | | */ |
1200 | | while (state->in.avail < LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH) { |
1201 | | if (fill_in_buffer(state) == -1) { |
1202 | | return -1; |
1203 | | } |
1204 | | if (state->eof) { |
1205 | | state->err = WTAP_ERR_SHORT_READ; |
1206 | | state->err_info = NULL; |
1207 | | return 0; |
1208 | | } |
1209 | | } |
1210 | | headerSize = LZ4F_headerSize(state->in.next, state->in.avail); |
1211 | | if (LZ4F_isError(headerSize)) { |
1212 | | state->err = WTAP_ERR_DECOMPRESS; |
1213 | | state->err_info = LZ4F_getErrorName(headerSize); |
1214 | | return -1; |
1215 | | } |
1216 | | #endif /* LZ4_VERSION_NUMBER >= 10903 */ |
1217 | | while (state->in.avail < headerSize) { |
1218 | | if (fill_in_buffer(state) == -1) { |
1219 | | return -1; |
1220 | | } |
1221 | | if (state->eof) { |
1222 | | state->err = WTAP_ERR_SHORT_READ; |
1223 | | state->err_info = NULL; |
1224 | | return 0; |
1225 | | } |
1226 | | } |
1227 | | size_t inBufSize = state->in.avail; |
1228 | | memcpy(state->lz4_hdr, state->in.next, headerSize); |
1229 | | const LZ4F_errorCode_t err = LZ4F_getFrameInfo(state->lz4_dctx, &state->lz4_info, state->in.next, &inBufSize); |
1230 | | if (LZ4F_isError(err)) { |
1231 | | state->err = WTAP_ERR_DECOMPRESS; |
1232 | | state->err_info = LZ4F_getErrorName(err); |
1233 | | return -1; |
1234 | | } |
1235 | | |
1236 | | /* |
1237 | | * XXX - We could check state->lz4_info.blockSizeID here, and |
1238 | | * only realloc the buffers to a larger value if the max |
1239 | | * block size is bigger than state->size. Also we could fail |
1240 | | * on unknown values? |
1241 | | */ |
1242 | | state->in.avail -= (unsigned)inBufSize; |
1243 | | state->in.next += (unsigned)inBufSize; |
1244 | | |
1245 | | #if LZ4_VERSION_NUMBER >= 11000 |
1246 | | if (state->fast_seek && state->lz4_info.blockMode == LZ4F_blockLinked) { |
1247 | | struct lz4_cur_seek_point *cur = g_new(struct lz4_cur_seek_point,1); |
1248 | | |
1249 | | cur->pos = cur->have = 0; |
1250 | | g_free(state->fast_seek_cur); |
1251 | | state->fast_seek_cur = cur; |
1252 | | } |
1253 | | #endif /* LZ4_VERSION_NUMBER >= 11000 */ |
1254 | | fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, LZ4); |
1255 | | state->compression = LZ4; |
1256 | | state->is_compressed = true; |
1257 | | return 1; |
1258 | | #else /* HAVE_LZ4FRAME_H */ |
1259 | 0 | state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED; |
1260 | 0 | state->err_info = "reading lz4-compressed files isn't supported"; |
1261 | 0 | return -1; |
1262 | 0 | #endif /* HAVE_LZ4FRAME_H */ |
1263 | 0 | } |
1264 | 0 | return 0; |
1265 | 0 | } |
1266 | | |
1267 | | typedef int (*compression_type_test)(FILE_T); |
1268 | | |
1269 | | static compression_type_test const compression_type_tests[] = { |
1270 | | check_for_zlib_compression, |
1271 | | check_for_zstd_compression, |
1272 | | check_for_lz4_compression, |
1273 | | }; |
1274 | | |
1275 | | /* |
1276 | | * Used when we haven't yet determined whether we have a compressed file |
1277 | | * and, if we do, what sort of compressed file it is. |
1278 | | * |
1279 | | * Based on the non-gzip-specific stuff that gz_head() from zlib does. |
1280 | | */ |
1281 | | static int |
1282 | | check_for_compression(FILE_T state) |
1283 | 0 | { |
1284 | | /* |
1285 | | * If this isn't the first frame / compressed stream, ensure that |
1286 | | * we're starting at the beginning of the buffer. This shouldn't |
1287 | | * get called much. |
1288 | | * |
1289 | | * This is to avoid edge cases where a previous frame finished but |
1290 | | * state->in.next is close to the end of the buffer so there isn't |
1291 | | * much room to put the start of the next frame. |
1292 | | * This also lets us put back bytes if things go wrong. |
1293 | | */ |
1294 | 0 | if (state->in.next != state->in.buf) { |
1295 | 0 | memmove(state->in.buf, state->in.next, state->in.avail); |
1296 | 0 | state->in.next = state->in.buf; |
1297 | 0 | } |
1298 | | |
1299 | | /* get some data in the input buffer */ |
1300 | 0 | if (state->in.avail == 0) { |
1301 | 0 | if (fill_in_buffer(state) == -1) |
1302 | 0 | return -1; |
1303 | 0 | if (state->in.avail == 0) |
1304 | 0 | return 0; |
1305 | 0 | } |
1306 | | |
1307 | | /* |
1308 | | * Check for the compression types we support. |
1309 | | */ |
1310 | 0 | for (size_t i = 0; i < G_N_ELEMENTS(compression_type_tests); i++) { |
1311 | 0 | int ret; |
1312 | |
|
1313 | 0 | ret = compression_type_tests[i](state); |
1314 | 0 | if (ret == -1) |
1315 | 0 | return -1; /* error */ |
1316 | 0 | if (ret == 1) |
1317 | 0 | return 0; /* found it */ |
1318 | 0 | } |
1319 | | |
1320 | | /* |
1321 | | * Some other compressed file formats we might want to support: |
1322 | | * |
1323 | | * XZ format: |
1324 | | * https://tukaani.org/xz/ |
1325 | | * https://github.com/tukaani-project/xz |
1326 | | * https://github.com/tukaani-project/xz/blob/master/doc/xz-file-format.txt |
1327 | | * |
1328 | | * Bzip2 format: |
1329 | | * https://www.sourceware.org/bzip2/ |
1330 | | * https://gitlab.com/bzip2/bzip2/ |
1331 | | * https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf |
1332 | | * (GitHub won't render it; download and open it) |
1333 | | * |
1334 | | * Lzip format: |
1335 | | * https://www.nongnu.org/lzip/ |
1336 | | */ |
1337 | | |
1338 | | /* |
1339 | | * We didn't see anything that looks like a header for any type of |
1340 | | * compressed file that we support, so just do uncompressed I/O. |
1341 | | * |
1342 | | * XXX - This fast seek data is for the case where a compressed stream |
1343 | | * ends and is followed by an uncompressed portion. It only works if |
1344 | | * the uncompressed portion is at the end, as we don't constantly scan |
1345 | | * for magic bytes in the middle of uncompressed data. (Concatenated |
1346 | | * compressed streams _do_ work, even streams of different compression types.) |
1347 | | */ |
1348 | 0 | if (state->fast_seek) |
1349 | 0 | fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, UNCOMPRESSED); |
1350 | | |
1351 | | |
1352 | | /* doing raw i/o, save start of raw data for seeking, copy any leftover |
1353 | | input to output -- this assumes that the output buffer is larger than |
1354 | | the input buffer, which also assures space for gzungetc() */ |
1355 | 0 | state->raw = state->pos; |
1356 | 0 | state->out.next = state->out.buf; |
1357 | | /* not a compressed file -- copy everything we've read into the |
1358 | | input buffer to the output buffer and fall to raw i/o */ |
1359 | 0 | if (state->in.avail) { |
1360 | 0 | memcpy(state->out.buf, state->in.next, state->in.avail); |
1361 | 0 | state->out.avail = state->in.avail; |
1362 | | |
1363 | | /* Now discard everything in the input buffer */ |
1364 | 0 | buf_reset(&state->in); |
1365 | 0 | } |
1366 | 0 | state->compression = UNCOMPRESSED; |
1367 | 0 | return 0; |
1368 | 0 | } |
1369 | | |
1370 | | /* |
1371 | | * Based on what gz_make() in zlib does. |
1372 | | */ |
1373 | | static int |
1374 | | fill_out_buffer(FILE_T state) |
1375 | 0 | { |
1376 | 0 | if (state->compression == UNKNOWN) { |
1377 | | /* |
1378 | | * We don't yet know whether the file is compressed, |
1379 | | * so check for a compressed-file header. |
1380 | | */ |
1381 | 0 | if (check_for_compression(state) == -1) |
1382 | 0 | return -1; |
1383 | 0 | if (state->out.avail != 0) /* got some data from check_for_compression() */ |
1384 | 0 | return 0; |
1385 | 0 | } |
1386 | | |
1387 | | /* |
1388 | | * We got no data from check_for_compression(), or we didn't call |
1389 | | * it as we already know the compression type, so read some more |
1390 | | * data. |
1391 | | */ |
1392 | 0 | switch (state->compression) { |
1393 | | |
1394 | 0 | case UNCOMPRESSED: |
1395 | | /* straight copy */ |
1396 | 0 | if (!uncompressed_fill_out_buffer(state)) |
1397 | 0 | return -1; |
1398 | 0 | break; |
1399 | | |
1400 | 0 | #ifdef USE_ZLIB_OR_ZLIBNG |
1401 | 0 | case ZLIB: |
1402 | | /* zlib (gzip) decompress */ |
1403 | 0 | zlib_fill_out_buffer(state); |
1404 | 0 | break; |
1405 | 0 | #endif /* USE_ZLIB_OR_ZLIBNG */ |
1406 | | |
1407 | | #ifdef HAVE_ZSTD |
1408 | | case ZSTD: |
1409 | | /* zstd decompress */ |
1410 | | if (!zstd_fill_out_buffer(state)) |
1411 | | return -1; |
1412 | | break; |
1413 | | #endif /* HAVE_ZSTD */ |
1414 | | |
1415 | | #ifdef HAVE_LZ4FRAME_H |
1416 | | case LZ4: |
1417 | | /* lz4 decompress */ |
1418 | | lz4_fill_out_buffer(state); |
1419 | | break; |
1420 | | #endif /* HAVE_LZ4FRAME_H */ |
1421 | | |
1422 | 0 | default: |
1423 | | /* Unknown compression type; keep reading */ |
1424 | 0 | break; |
1425 | 0 | } |
1426 | 0 | return 0; |
1427 | 0 | } |
1428 | | |
1429 | | static int |
1430 | | gz_skip(FILE_T state, int64_t len) |
1431 | 0 | { |
1432 | 0 | unsigned n; |
1433 | | |
1434 | | /* skip over len bytes or reach end-of-file, whichever comes first */ |
1435 | 0 | while (len) |
1436 | 0 | if (state->out.avail != 0) { |
1437 | | /* We have stuff in the output buffer; skip over |
1438 | | it. */ |
1439 | 0 | n = (int64_t)state->out.avail > len ? (unsigned)len : state->out.avail; |
1440 | 0 | state->out.avail -= n; |
1441 | 0 | state->out.next += n; |
1442 | 0 | state->pos += n; |
1443 | 0 | len -= n; |
1444 | 0 | } else if (state->err != 0) { |
1445 | | /* We have nothing in the output buffer, and |
1446 | | we have an error that may not have been |
1447 | | reported yet; that means we can't generate |
1448 | | any more data into the output buffer, so |
1449 | | return an error indication. */ |
1450 | 0 | return -1; |
1451 | 0 | } else if (state->eof && state->in.avail == 0) { |
1452 | | /* We have nothing in the output buffer, and |
1453 | | we're at the end of the input; just return. */ |
1454 | 0 | break; |
1455 | 0 | } else { |
1456 | | /* We have nothing in the output buffer, and |
1457 | | we can generate more data; get more output, |
1458 | | looking for header if required. */ |
1459 | 0 | if (fill_out_buffer(state) == -1) |
1460 | 0 | return -1; |
1461 | 0 | } |
1462 | 0 | return 0; |
1463 | 0 | } |
1464 | | |
1465 | | static void |
1466 | | gz_reset(FILE_T state) |
1467 | 0 | { |
1468 | 0 | buf_reset(&state->out); /* no output data available */ |
1469 | 0 | state->eof = false; /* not at end of file */ |
1470 | 0 | state->compression = UNKNOWN; /* look for compression header */ |
1471 | |
|
1472 | 0 | state->seek_pending = false; /* no seek request pending */ |
1473 | 0 | state->err = 0; /* clear error */ |
1474 | 0 | state->err_info = NULL; |
1475 | 0 | state->pos = 0; /* no uncompressed data yet */ |
1476 | 0 | buf_reset(&state->in); /* no input data yet */ |
1477 | 0 | } |
1478 | | |
1479 | | FILE_T |
1480 | | file_fdopen(int fd) |
1481 | 0 | { |
1482 | | /* |
1483 | | * XXX - we now check whether we have st_blksize in struct stat; |
1484 | | * it's not available on all platforms. |
1485 | | * |
1486 | | * I'm not sure why we're testing _STATBUF_ST_BLKSIZE; it's not |
1487 | | * set on all platforms that have st_blksize in struct stat. |
1488 | | * (Not all platforms have st_blksize in struct stat.) |
1489 | | * |
1490 | | * Is there some reason *not* to make the buffer size the maximum |
1491 | | * of GBUFSIZE and st_blksize? On most UN*Xes, the standard I/O |
1492 | | * library does I/O with st_blksize as the buffer size; on others, |
1493 | | * and on Windows, it's a 4K buffer size. If st_blksize is bigger |
1494 | | * than GBUFSIZE (which is currently 4KB), that's probably a |
1495 | | * hint that reading in st_blksize chunks is considered a good |
1496 | | * idea (e.g., an 8K/1K Berkeley fast file system with st_blksize |
1497 | | * being 8K, or APFS, where st_blksize is big on at least some |
1498 | | * versions of macOS). |
1499 | | */ |
1500 | 0 | #ifdef _STATBUF_ST_BLKSIZE |
1501 | 0 | ws_statb64 st; |
1502 | 0 | #endif /* _STATBUF_ST_BLKSIZE */ |
1503 | | #ifdef HAVE_ZSTD |
1504 | | size_t zstd_buf_size; |
1505 | | #endif /* HAVE_ZSTD */ |
1506 | 0 | unsigned want = GZBUFSIZE; |
1507 | 0 | FILE_T state; |
1508 | | #ifdef HAVE_LZ4FRAME_H |
1509 | | size_t ret; |
1510 | | #endif /* HAVE_LZ4FRAME_H */ |
1511 | |
|
1512 | 0 | if (fd == -1) |
1513 | 0 | return NULL; |
1514 | | |
1515 | | /* allocate FILE_T structure to return */ |
1516 | 0 | state = (FILE_T)g_try_malloc0(sizeof *state); |
1517 | 0 | if (state == NULL) |
1518 | 0 | return NULL; |
1519 | | |
1520 | 0 | state->fast_seek_cur = NULL; |
1521 | 0 | state->fast_seek = NULL; |
1522 | | |
1523 | | /* open the file with the appropriate mode (or just use fd) */ |
1524 | 0 | state->fd = fd; |
1525 | | |
1526 | | /* we don't yet know whether it's compressed */ |
1527 | 0 | state->is_compressed = false; |
1528 | 0 | state->last_compression = UNKNOWN; |
1529 | | |
1530 | | /* save the current position for rewinding (only if reading) */ |
1531 | 0 | state->start = ws_lseek64(state->fd, 0, SEEK_CUR); |
1532 | 0 | if (state->start == -1) state->start = 0; |
1533 | 0 | state->raw_pos = state->start; |
1534 | | |
1535 | | /* initialize stream */ |
1536 | 0 | gz_reset(state); |
1537 | |
|
1538 | 0 | #ifdef _STATBUF_ST_BLKSIZE |
1539 | | /* |
1540 | | * See what I/O size the file system recommends using, and if |
1541 | | * it's bigger than what we're using and isn't too big, use |
1542 | | * it. |
1543 | | */ |
1544 | 0 | if (ws_fstat64(fd, &st) >= 0) { |
1545 | | /* |
1546 | | * Yes, st_blksize can be bigger than an int; apparently, |
1547 | | * it's a long on LP64 Linux, for example. |
1548 | | * |
1549 | | * If the value is too big to fit into a unsigned, |
1550 | | * just use the maximum read buffer size. |
1551 | | * |
1552 | | * On top of that, the Single UNIX Specification says that |
1553 | | * st_blksize is of type blksize_t, which is a *signed* |
1554 | | * integer type, and, at minimum, macOS 11.6 and Linux 5.14.11's |
1555 | | * include/uapi/asm-generic/stat.h define it as such. |
1556 | | * |
1557 | | * However, other OSes might make it unsigned, and older versions |
1558 | | * of OSes that currently make it signed might make it unsigned, |
1559 | | * so we try to avoid warnings from that. |
1560 | | * |
1561 | | * We cast MAX_READ_BUF_SIZE to long in order to avoid the |
1562 | | * warning, although it might introduce warnings on platforms |
1563 | | * where st_blocksize is unsigned; we'll deal with that if |
1564 | | * it ever shows up as an issue. |
1565 | | * |
1566 | | * MAX_READ_BUF_SIZE is < the largest *signed* 32-bt integer, |
1567 | | * so casting it to long won't turn it into a negative number. |
1568 | | * (We only support 32-bit and 64-bit 2's-complement platforms.) |
1569 | | */ |
1570 | 0 | if (st.st_blksize <= (long)MAX_READ_BUF_SIZE) |
1571 | 0 | want = (unsigned)st.st_blksize; |
1572 | 0 | else |
1573 | 0 | want = MAX_READ_BUF_SIZE; |
1574 | | /* XXX, verify result? */ |
1575 | 0 | } |
1576 | 0 | #endif /* _STATBUF_ST_BLKSIZE */ |
1577 | | #ifdef HAVE_ZSTD |
1578 | | /* we should have separate input and output buf sizes */ |
1579 | | zstd_buf_size = ZSTD_DStreamInSize(); |
1580 | | if (zstd_buf_size > want) { |
1581 | | if (zstd_buf_size <= MAX_READ_BUF_SIZE) |
1582 | | want = (unsigned)zstd_buf_size; |
1583 | | else |
1584 | | want = MAX_READ_BUF_SIZE; |
1585 | | } |
1586 | | zstd_buf_size = ZSTD_DStreamOutSize(); |
1587 | | if (zstd_buf_size > want) { |
1588 | | if (zstd_buf_size <= MAX_READ_BUF_SIZE) |
1589 | | want = (unsigned)zstd_buf_size; |
1590 | | else |
1591 | | want = MAX_READ_BUF_SIZE; |
1592 | | } |
1593 | | #endif /* HAVE_ZSTD */ |
1594 | | #ifdef HAVE_LZ4FRAME_H |
1595 | | if (LZ4BUFSIZE > want) { |
1596 | | if (LZ4BUFSIZE <= MAX_READ_BUF_SIZE) { |
1597 | | want = LZ4BUFSIZE; |
1598 | | } else { |
1599 | | goto err; |
1600 | | } |
1601 | | } |
1602 | | #endif /* HAVE_LZ4FRAME_H */ |
1603 | | |
1604 | | /* allocate buffers */ |
1605 | 0 | state->in.buf = (unsigned char *)g_try_malloc(want); |
1606 | 0 | state->in.next = state->in.buf; |
1607 | 0 | state->in.avail = 0; |
1608 | 0 | state->out.buf = (unsigned char *)g_try_malloc(want << 1); |
1609 | 0 | state->out.next = state->out.buf; |
1610 | 0 | state->out.avail = 0; |
1611 | 0 | state->size = want; |
1612 | 0 | if (state->in.buf == NULL || state->out.buf == NULL) { |
1613 | 0 | goto err; |
1614 | 0 | } |
1615 | | |
1616 | 0 | #ifdef USE_ZLIB_OR_ZLIBNG |
1617 | | /* allocate inflate memory */ |
1618 | 0 | state->strm.zalloc = Z_NULL; |
1619 | 0 | state->strm.zfree = Z_NULL; |
1620 | 0 | state->strm.opaque = Z_NULL; |
1621 | 0 | state->strm.avail_in = 0; |
1622 | 0 | state->strm.next_in = Z_NULL; |
1623 | 0 | if (ZLIB_PREFIX(inflateInit2)(&(state->strm), -15) != Z_OK) { /* raw inflate */ |
1624 | 0 | goto err; |
1625 | 0 | } |
1626 | | |
1627 | | /* for now, assume we should check the crc */ |
1628 | 0 | state->dont_check_crc = false; |
1629 | 0 | #endif /* USE_ZLIB_OR_ZLIBNG */ |
1630 | |
|
1631 | | #ifdef HAVE_ZSTD |
1632 | | state->zstd_dctx = ZSTD_createDCtx(); |
1633 | | if (state->zstd_dctx == NULL) { |
1634 | | goto err; |
1635 | | } |
1636 | | #endif /* HAVE_ZSTD */ |
1637 | |
|
1638 | | #ifdef HAVE_LZ4FRAME_H |
1639 | | ret = LZ4F_createDecompressionContext(&state->lz4_dctx, LZ4F_VERSION); |
1640 | | if (LZ4F_isError(ret)) { |
1641 | | goto err; |
1642 | | } |
1643 | | #endif /* HAVE_LZ4FRAME_H */ |
1644 | | |
1645 | | /* return stream */ |
1646 | 0 | return state; |
1647 | | |
1648 | 0 | err: |
1649 | 0 | #ifdef USE_ZLIB_OR_ZLIBNG |
1650 | 0 | ZLIB_PREFIX(inflateEnd)(&state->strm); |
1651 | 0 | #endif /* USE_ZLIB_OR_ZLIBNG */ |
1652 | | #ifdef HAVE_ZSTD |
1653 | | ZSTD_freeDCtx(state->zstd_dctx); |
1654 | | #endif /* HAVE_ZSTD */ |
1655 | | #ifdef HAVE_LZ4FRAME_H |
1656 | | LZ4F_freeDecompressionContext(state->lz4_dctx); |
1657 | | #endif /* HAVE_LZ4FRAME_H */ |
1658 | 0 | g_free(state->out.buf); |
1659 | 0 | g_free(state->in.buf); |
1660 | 0 | g_free(state); |
1661 | 0 | errno = ENOMEM; |
1662 | 0 | return NULL; |
1663 | 0 | } |
1664 | | |
1665 | | FILE_T |
1666 | | file_open(const char *path) |
1667 | 0 | { |
1668 | 0 | int fd; |
1669 | 0 | FILE_T ft; |
1670 | 0 | #ifdef USE_ZLIB_OR_ZLIBNG |
1671 | 0 | const char *suffixp; |
1672 | 0 | #endif /* USE_ZLIB_OR_ZLIBNG */ |
1673 | | |
1674 | | /* open file and do correct filename conversions. |
1675 | | |
1676 | | XXX - do we need O_LARGEFILE? On UN*X, if we need to do |
1677 | | something special to get large file support, the configure |
1678 | | script should have set us up with the appropriate #defines, |
1679 | | so we should be getting a large-file-enabled file descriptor |
1680 | | here. Pre-Large File Summit UN*Xes, and possibly even some |
1681 | | post-LFS UN*Xes, might require O_LARGEFILE here, though. |
1682 | | If so, we should probably handle that in ws_open(). */ |
1683 | 0 | if ((fd = ws_open(path, O_RDONLY|O_BINARY, 0000)) == -1) |
1684 | 0 | return NULL; |
1685 | | |
1686 | | /* open file handle */ |
1687 | 0 | ft = file_fdopen(fd); |
1688 | 0 | if (ft == NULL) { |
1689 | 0 | ws_close(fd); |
1690 | 0 | return NULL; |
1691 | 0 | } |
1692 | | |
1693 | 0 | #ifdef USE_ZLIB_OR_ZLIBNG |
1694 | | /* |
1695 | | * If this file's name ends in ".caz", it's probably a compressed |
1696 | | * Windows Sniffer file. The compression is gzip, but if we |
1697 | | * process the CRC as specified by RFC 1952, the computed CRC |
1698 | | * doesn't match the stored CRC. |
1699 | | * |
1700 | | * Compressed Windows Sniffer files don't all have the same CRC |
1701 | | * value; is it just random crap, or are they running the CRC on |
1702 | | * a different set of data than you're supposed to (e.g., not |
1703 | | * CRCing some of the data), or something such as that? |
1704 | | * |
1705 | | * For now, we just set a flag to ignore CRC errors. |
1706 | | */ |
1707 | 0 | suffixp = strrchr(path, '.'); |
1708 | 0 | if (suffixp != NULL) { |
1709 | 0 | if (g_ascii_strcasecmp(suffixp, ".caz") == 0) |
1710 | 0 | ft->dont_check_crc = true; |
1711 | 0 | } |
1712 | 0 | #endif /* USE_ZLIB_OR_ZLIBNG */ |
1713 | |
|
1714 | 0 | return ft; |
1715 | 0 | } |
1716 | | |
1717 | | void |
1718 | | file_set_random_access(FILE_T stream, bool random_flag _U_, GPtrArray *seek) |
1719 | 0 | { |
1720 | 0 | stream->fast_seek = seek; |
1721 | 0 | } |
1722 | | |
1723 | | int64_t |
1724 | | file_seek(FILE_T file, int64_t offset, int whence, int *err) |
1725 | 0 | { |
1726 | 0 | struct fast_seek_point *here; |
1727 | 0 | unsigned n; |
1728 | |
|
1729 | 0 | if (whence != SEEK_SET && whence != SEEK_CUR && whence != SEEK_END) { |
1730 | 0 | ws_assert_not_reached(); |
1731 | | /* |
1732 | | *err = EINVAL; |
1733 | | return -1; |
1734 | | */ |
1735 | 0 | } |
1736 | | |
1737 | | /* Normalize offset to a SEEK_CUR specification */ |
1738 | 0 | if (whence == SEEK_END) { |
1739 | | /* Seek relative to the end of the file; given that we might be |
1740 | | reading from a compressed file, we do that by seeking to the |
1741 | | end of the file, making an offset relative to the end of |
1742 | | the file an offset relative to the current position. |
1743 | | |
1744 | | XXX - we don't actually use this yet, but, for uncompressed |
1745 | | files, we could optimize it, if desired, by directly using |
1746 | | ws_lseek64(). */ |
1747 | 0 | if (gz_skip(file, INT64_MAX) == -1) { |
1748 | 0 | *err = file->err; |
1749 | 0 | return -1; |
1750 | 0 | } |
1751 | 0 | if (offset == 0) { |
1752 | | /* We are done */ |
1753 | 0 | return file->pos; |
1754 | 0 | } |
1755 | 0 | } else if (whence == SEEK_SET) |
1756 | 0 | offset -= file->pos; |
1757 | 0 | else if (file->seek_pending) { |
1758 | | /* There's a forward-skip pending, so file->pos doesn't reflect |
1759 | | the actual file position, it represents the position from |
1760 | | which we're skipping; update the offset to include that. */ |
1761 | 0 | offset += file->skip; |
1762 | 0 | } |
1763 | 0 | file->seek_pending = false; |
1764 | | |
1765 | | /* |
1766 | | * Are we moving at all? |
1767 | | */ |
1768 | 0 | if (offset == 0) { |
1769 | | /* No. Just return the current position. */ |
1770 | 0 | return file->pos; |
1771 | 0 | } |
1772 | | |
1773 | | /* |
1774 | | * Are we seeking backwards? |
1775 | | */ |
1776 | 0 | if (offset < 0) { |
1777 | | /* |
1778 | | * Yes. |
1779 | | * |
1780 | | * Do we have enough data before the current position in the |
1781 | | * buffer that we can seek backwards within the buffer? |
1782 | | */ |
1783 | 0 | if (-offset <= offset_in_buffer(&file->out)) { |
1784 | | /* |
1785 | | * Yes. Adjust appropriately. |
1786 | | * |
1787 | | * offset is negative, so -offset is non-negative, and |
1788 | | * -offset is <= an unsigned and thus fits in an unsigned. |
1789 | | * Get that value and adjust appropriately. |
1790 | | * |
1791 | | * (Casting offset to unsigned makes it positive, which |
1792 | | * is not what we would want, so we cast -offset instead.) |
1793 | | * |
1794 | | * XXX - this won't work with -offset = 2^63, as its |
1795 | | * negative isn't a valid 64-bit integer, but we are |
1796 | | * not at all likely to see files big enough to ever |
1797 | | * see a negative offset that large. |
1798 | | */ |
1799 | 0 | unsigned adjustment = (unsigned)(-offset); |
1800 | |
|
1801 | 0 | file->out.avail += adjustment; |
1802 | 0 | file->out.next -= adjustment; |
1803 | 0 | file->pos -= adjustment; |
1804 | 0 | return file->pos; |
1805 | 0 | } |
1806 | 0 | } else { |
1807 | | /* |
1808 | | * No. Offset is positive; we're seeking forwards. |
1809 | | * |
1810 | | * Do we have enough data after the current position in the |
1811 | | * buffer that we can seek forwards within the buffer? |
1812 | | */ |
1813 | 0 | if (offset < file->out.avail) { |
1814 | | /* |
1815 | | * Yes. Adjust appropriately. |
1816 | | * |
1817 | | * offset is < an unsigned and thus fits in an unsigned, |
1818 | | * so we can cast it to unsigned safely. |
1819 | | */ |
1820 | 0 | file->out.avail -= (unsigned)offset; |
1821 | 0 | file->out.next += offset; |
1822 | 0 | file->pos += offset; |
1823 | 0 | return file->pos; |
1824 | 0 | } |
1825 | 0 | } |
1826 | | |
1827 | | /* |
1828 | | * We're not seeking within the buffer. Do we have "fast seek" data |
1829 | | * for the location to which we will be seeking, and are we either |
1830 | | * seeking backwards or is the fast seek point past what is in the |
1831 | | * buffer? (We don't want to "fast seek" backwards to a point that |
1832 | | * we've already read and buffered if we're actually seeking forwards.) |
1833 | | * |
1834 | | * It might in certain cases be faster to continue reading linearly |
1835 | | * forward rather than jump to the fast seek point if the distance |
1836 | | * to the fast seek point is small, but we might only be able to do that |
1837 | | * if the compression context doesn't change (which for LZ4 includes if |
1838 | | * we jump to a LZ4 with different options.) |
1839 | | * XXX - profile different buffer and SPAN sizes |
1840 | | */ |
1841 | 0 | if ((here = fast_seek_find(file, file->pos + offset)) && |
1842 | 0 | (offset < 0 || here->out >= file->pos + file->out.avail)) { |
1843 | 0 | int64_t off, off2; |
1844 | | |
1845 | | /* |
1846 | | * Yes. Use that data to do the seek. |
1847 | | * Note that this will be true only if file_set_random_access() |
1848 | | * has been called on this file, which should never be the case |
1849 | | * for a pipe. |
1850 | | */ |
1851 | 0 | switch (here->compression) { |
1852 | | |
1853 | 0 | #ifdef USE_ZLIB_OR_ZLIBNG |
1854 | 0 | case ZLIB: |
1855 | 0 | #ifdef HAVE_INFLATEPRIME |
1856 | 0 | off = here->in - (here->data.zlib.bits ? 1 : 0); |
1857 | | #else /* HAVE_INFLATEPRIME */ |
1858 | | off = here->in; |
1859 | | #endif /* HAVE_INFLATEPRIME */ |
1860 | 0 | off2 = here->out; |
1861 | 0 | break; |
1862 | | |
1863 | 0 | case GZIP_AFTER_HEADER: |
1864 | 0 | off = here->in; |
1865 | 0 | off2 = here->out; |
1866 | 0 | break; |
1867 | 0 | #endif /* USE_ZLIB_OR_ZLIBNG */ |
1868 | | |
1869 | | #ifdef HAVE_LZ4FRAME_H |
1870 | | case LZ4: |
1871 | | case LZ4_AFTER_HEADER: |
1872 | | ws_debug("fast seek lz4"); |
1873 | | off = here->in; |
1874 | | off2 = here->out; |
1875 | | break; |
1876 | | #endif /* HAVE_LZ4FRAME_H */ |
1877 | | |
1878 | 0 | case UNCOMPRESSED: |
1879 | | /* In an uncompressed portion, seek directly to the offset */ |
1880 | 0 | off2 = (file->pos + offset); |
1881 | 0 | off = here->in + (off2 - here->out); |
1882 | 0 | break; |
1883 | | |
1884 | 0 | default: |
1885 | | /* Otherwise, seek to the fast seek point to do any needed setup. */ |
1886 | 0 | off = here->in; |
1887 | 0 | off2 = here->out; |
1888 | 0 | break; |
1889 | 0 | } |
1890 | | |
1891 | 0 | if (ws_lseek64(file->fd, off, SEEK_SET) == -1) { |
1892 | 0 | *err = errno; |
1893 | 0 | return -1; |
1894 | 0 | } |
1895 | 0 | fast_seek_reset(file); |
1896 | |
|
1897 | 0 | file->raw_pos = off; |
1898 | 0 | buf_reset(&file->out); |
1899 | 0 | file->eof = false; |
1900 | 0 | file->seek_pending = false; |
1901 | 0 | file->err = 0; |
1902 | 0 | file->err_info = NULL; |
1903 | 0 | buf_reset(&file->in); |
1904 | |
|
1905 | 0 | switch (here->compression) { |
1906 | | |
1907 | 0 | #ifdef USE_ZLIB_OR_ZLIBNG |
1908 | 0 | case ZLIB: { |
1909 | 0 | zlib_stream*strm = &file->strm; |
1910 | 0 | ZLIB_PREFIX(inflateReset)(strm); |
1911 | 0 | strm->adler = here->data.zlib.adler; |
1912 | 0 | strm->total_out = here->data.zlib.total_out; |
1913 | 0 | #ifdef HAVE_INFLATEPRIME |
1914 | 0 | if (here->data.zlib.bits) { |
1915 | 0 | FILE_T state = file; |
1916 | 0 | int ret = GZ_GETC(); |
1917 | |
|
1918 | 0 | if (ret == -1) { |
1919 | 0 | if (state->err == 0) { |
1920 | | /* EOF */ |
1921 | 0 | *err = WTAP_ERR_SHORT_READ; |
1922 | 0 | } else |
1923 | 0 | *err = state->err; |
1924 | 0 | return -1; |
1925 | 0 | } |
1926 | 0 | (void)ZLIB_PREFIX(inflatePrime)(strm, here->data.zlib.bits, ret >> (8 - here->data.zlib.bits)); |
1927 | 0 | } |
1928 | 0 | #endif /* HAVE_INFLATEPRIME */ |
1929 | 0 | (void)ZLIB_PREFIX(inflateSetDictionary)(strm, here->data.zlib.window, ZLIB_WINSIZE); |
1930 | 0 | file->compression = ZLIB; |
1931 | 0 | break; |
1932 | 0 | } |
1933 | | |
1934 | 0 | case GZIP_AFTER_HEADER: { |
1935 | 0 | zlib_stream* strm = &file->strm; |
1936 | 0 | ZLIB_PREFIX(inflateReset)(strm); |
1937 | 0 | strm->adler = ZLIB_PREFIX(crc32)(0L, Z_NULL, 0); |
1938 | 0 | file->compression = ZLIB; |
1939 | 0 | break; |
1940 | 0 | } |
1941 | 0 | #endif /* USE_ZLIB_OR_ZLIBNG */ |
1942 | | |
1943 | | #ifdef HAVE_LZ4FRAME_H |
1944 | | case LZ4: |
1945 | | case LZ4_AFTER_HEADER: |
1946 | | /* At the start of a frame, reset the context and re-read it. |
1947 | | * Unfortunately the API doesn't provide a method to set the |
1948 | | * context options explicitly based on an already read |
1949 | | * LZ4F_frameInfo_t. |
1950 | | */ |
1951 | | LZ4F_resetDecompressionContext(file->lz4_dctx); |
1952 | | size_t hdr_size = LZ4F_HEADER_SIZE_MAX; |
1953 | | const LZ4F_errorCode_t frame_err = LZ4F_getFrameInfo(file->lz4_dctx, &file->lz4_info, here->data.lz4.lz4_hdr, &hdr_size); |
1954 | | if (LZ4F_isError(frame_err)) { |
1955 | | file->err = WTAP_ERR_DECOMPRESS; |
1956 | | file->err_info = LZ4F_getErrorName(frame_err); |
1957 | | return -1; |
1958 | | } |
1959 | | file->lz4_info = here->data.lz4.lz4_info; |
1960 | | file->compression = LZ4; |
1961 | | #if LZ4_VERSION_NUMBER >= 11000 |
1962 | | if (here->compression == LZ4_AFTER_HEADER && here->data.lz4.lz4_info.blockMode == LZ4F_blockLinked) { |
1963 | | size_t dstSize = 0, srcSize = 0; |
1964 | | LZ4F_decompress_usingDict(file->lz4_dctx, NULL, &dstSize, NULL, &srcSize, here->data.lz4.window, LZ4_WINSIZE, NULL); |
1965 | | } |
1966 | | #endif /* LZ4_VERSION_NUMBER >= 11000 */ |
1967 | | break; |
1968 | | #endif /* HAVE_LZ4FRAME_H */ |
1969 | | |
1970 | | #ifdef HAVE_ZSTD |
1971 | | case ZSTD: |
1972 | | { |
1973 | | const size_t ret = ZSTD_initDStream(file->zstd_dctx); |
1974 | | if (ZSTD_isError(ret)) { |
1975 | | file->err = WTAP_ERR_DECOMPRESS; |
1976 | | file->err_info = ZSTD_getErrorName(ret); |
1977 | | return -1; |
1978 | | } |
1979 | | file->compression = ZSTD; |
1980 | | break; |
1981 | | } |
1982 | | #endif /* HAVE_ZSTD */ |
1983 | | |
1984 | 0 | default: |
1985 | 0 | file->compression = here->compression; |
1986 | 0 | break; |
1987 | 0 | } |
1988 | | |
1989 | 0 | offset = (file->pos + offset) - off2; |
1990 | 0 | file->pos = off2; |
1991 | 0 | ws_debug("Fast seek OK! %"PRId64, offset); |
1992 | |
|
1993 | 0 | if (offset) { |
1994 | | /* Don't skip forward yet, wait until we want to read from |
1995 | | the file; that way, if we do multiple seeks in a row, |
1996 | | all involving forward skips, they will be combined. */ |
1997 | 0 | file->seek_pending = true; |
1998 | 0 | file->skip = offset; |
1999 | 0 | } |
2000 | 0 | return file->pos + offset; |
2001 | 0 | } |
2002 | | |
2003 | | /* |
2004 | | * Is this an uncompressed file, are we within the raw area, |
2005 | | * are we either seeking backwards or seeking past the end |
2006 | | * of the buffer, and are we set up for random access with |
2007 | | * file_set_random_access()? |
2008 | | * |
2009 | | * Again, note that this will never be true on a pipe, as |
2010 | | * file_set_random_access() should never be called if we're |
2011 | | * reading from a pipe. |
2012 | | */ |
2013 | 0 | if (file->compression == UNCOMPRESSED && file->pos + offset >= file->raw |
2014 | 0 | && (offset < 0 || offset >= file->out.avail) |
2015 | 0 | && (file->fast_seek != NULL)) |
2016 | 0 | { |
2017 | | /* |
2018 | | * Yes. Just seek there within the file. |
2019 | | */ |
2020 | 0 | if (ws_lseek64(file->fd, offset - file->out.avail, SEEK_CUR) == -1) { |
2021 | 0 | *err = errno; |
2022 | 0 | return -1; |
2023 | 0 | } |
2024 | 0 | file->raw_pos += (offset - file->out.avail); |
2025 | 0 | buf_reset(&file->out); |
2026 | 0 | file->eof = false; |
2027 | 0 | file->seek_pending = false; |
2028 | 0 | file->err = 0; |
2029 | 0 | file->err_info = NULL; |
2030 | 0 | buf_reset(&file->in); |
2031 | 0 | file->pos += offset; |
2032 | 0 | return file->pos; |
2033 | 0 | } |
2034 | | |
2035 | | /* |
2036 | | * Are we seeking backwards? |
2037 | | */ |
2038 | 0 | if (offset < 0) { |
2039 | | /* |
2040 | | * Yes. We have no fast seek data, so we have to rewind and |
2041 | | * seek forward. |
2042 | | * XXX - true only for compressed files. |
2043 | | * |
2044 | | * Calculate the amount to skip forward after rewinding. |
2045 | | */ |
2046 | 0 | offset += file->pos; |
2047 | 0 | if (offset < 0) { /* before start of file! */ |
2048 | 0 | *err = EINVAL; |
2049 | 0 | return -1; |
2050 | 0 | } |
2051 | | /* rewind, then skip to offset */ |
2052 | | |
2053 | | /* back up and start over */ |
2054 | 0 | if (ws_lseek64(file->fd, file->start, SEEK_SET) == -1) { |
2055 | 0 | *err = errno; |
2056 | 0 | return -1; |
2057 | 0 | } |
2058 | 0 | fast_seek_reset(file); |
2059 | 0 | file->raw_pos = file->start; |
2060 | 0 | gz_reset(file); |
2061 | 0 | } |
2062 | | |
2063 | | /* |
2064 | | * Either we're seeking backwards, but have rewound and now need to |
2065 | | * skip forwards, or we're seeking forwards. |
2066 | | * |
2067 | | * Skip what's in output buffer (one less gzgetc() check). |
2068 | | */ |
2069 | 0 | n = (int64_t)file->out.avail > offset ? (unsigned)offset : file->out.avail; |
2070 | 0 | file->out.avail -= n; |
2071 | 0 | file->out.next += n; |
2072 | 0 | file->pos += n; |
2073 | 0 | offset -= n; |
2074 | | |
2075 | | /* request skip (if not zero) */ |
2076 | 0 | if (offset) { |
2077 | | /* Don't skip forward yet, wait until we want to read from |
2078 | | the file; that way, if we do multiple seeks in a row, |
2079 | | all involving forward skips, they will be combined. */ |
2080 | 0 | file->seek_pending = true; |
2081 | 0 | file->skip = offset; |
2082 | 0 | } |
2083 | 0 | return file->pos + offset; |
2084 | 0 | } |
2085 | | |
2086 | | int64_t |
2087 | | file_tell(FILE_T stream) |
2088 | 0 | { |
2089 | | /* return position */ |
2090 | 0 | return stream->pos + (stream->seek_pending ? stream->skip : 0); |
2091 | 0 | } |
2092 | | |
2093 | | int64_t |
2094 | | file_tell_raw(FILE_T stream) |
2095 | 0 | { |
2096 | 0 | return stream->raw_pos; |
2097 | 0 | } |
2098 | | |
2099 | | int |
2100 | | file_fstat(FILE_T stream, ws_statb64 *statb, int *err) |
2101 | 0 | { |
2102 | 0 | if (ws_fstat64(stream->fd, statb) == -1) { |
2103 | 0 | if (err != NULL) |
2104 | 0 | *err = errno; |
2105 | 0 | return -1; |
2106 | 0 | } |
2107 | 0 | return 0; |
2108 | 0 | } |
2109 | | |
2110 | | bool |
2111 | | file_iscompressed(FILE_T stream) |
2112 | 0 | { |
2113 | 0 | return stream->is_compressed; |
2114 | 0 | } |
2115 | | |
2116 | | /* Returns a wtap compression type. If we don't know the compression type, |
2117 | | * return WS_FILE_UNCOMPRESSED, but if our compression state is temporarily |
2118 | | * UNKNOWN because we need to reread compression headers, return the last |
2119 | | * known compression type. |
2120 | | */ |
2121 | | static ws_compression_type |
2122 | | file_get_compression_type(FILE_T stream) |
2123 | 0 | { |
2124 | 0 | if (stream->is_compressed) { |
2125 | 0 | switch ((stream->compression == UNKNOWN) ? stream->last_compression : stream->compression) { |
2126 | | |
2127 | 0 | case ZLIB: |
2128 | 0 | case GZIP_AFTER_HEADER: |
2129 | 0 | return WS_FILE_GZIP_COMPRESSED; |
2130 | | |
2131 | 0 | case ZSTD: |
2132 | 0 | return WS_FILE_ZSTD_COMPRESSED; |
2133 | | |
2134 | 0 | case LZ4: |
2135 | 0 | case LZ4_AFTER_HEADER: |
2136 | 0 | return WS_FILE_LZ4_COMPRESSED; |
2137 | | |
2138 | 0 | case UNCOMPRESSED: |
2139 | 0 | return WS_FILE_UNCOMPRESSED; |
2140 | | |
2141 | 0 | default: /* UNKNOWN, should never happen if is_compressed is set */ |
2142 | 0 | ws_assert_not_reached(); |
2143 | 0 | return WS_FILE_UNCOMPRESSED; |
2144 | 0 | } |
2145 | 0 | } |
2146 | 0 | return WS_FILE_UNCOMPRESSED; |
2147 | 0 | } |
2148 | | |
2149 | | int |
2150 | | file_read(void *buf, unsigned int len, FILE_T file) |
2151 | 0 | { |
2152 | 0 | unsigned got, n; |
2153 | | |
2154 | | /* if len is zero, avoid unnecessary operations */ |
2155 | 0 | if (len == 0) |
2156 | 0 | return 0; |
2157 | | |
2158 | | /* process a skip request */ |
2159 | 0 | if (file->seek_pending) { |
2160 | 0 | file->seek_pending = false; |
2161 | 0 | if (gz_skip(file, file->skip) == -1) |
2162 | 0 | return -1; |
2163 | 0 | } |
2164 | | |
2165 | | /* |
2166 | | * Get len bytes to buf, or less than len if at the end; |
2167 | | * if buf is null, just throw the bytes away. |
2168 | | */ |
2169 | 0 | got = 0; |
2170 | 0 | do { |
2171 | 0 | if (file->out.avail != 0) { |
2172 | | /* We have stuff in the output buffer; copy |
2173 | | what we have. */ |
2174 | 0 | n = file->out.avail > len ? len : file->out.avail; |
2175 | 0 | if (buf != NULL) { |
2176 | 0 | memcpy(buf, file->out.next, n); |
2177 | 0 | buf = (char *)buf + n; |
2178 | 0 | } |
2179 | 0 | file->out.next += n; |
2180 | 0 | file->out.avail -= n; |
2181 | 0 | len -= n; |
2182 | 0 | got += n; |
2183 | 0 | file->pos += n; |
2184 | 0 | } else if (file->err != 0) { |
2185 | | /* We have nothing in the output buffer, and |
2186 | | we have an error that may not have been |
2187 | | reported yet; that means we can't generate |
2188 | | any more data into the output buffer, so |
2189 | | return an error indication. */ |
2190 | 0 | return -1; |
2191 | 0 | } else if (file->eof && file->in.avail == 0) { |
2192 | | /* We have nothing in the output buffer, and |
2193 | | we're at the end of the input; just return |
2194 | | with what we've gotten so far. */ |
2195 | 0 | break; |
2196 | 0 | } else { |
2197 | | /* We have nothing in the output buffer, and |
2198 | | we can generate more data; get more output, |
2199 | | looking for header if required, and |
2200 | | keep looping to process the new stuff |
2201 | | in the output buffer. */ |
2202 | 0 | if (fill_out_buffer(file) == -1) |
2203 | 0 | return -1; |
2204 | 0 | } |
2205 | 0 | } while (len); |
2206 | | |
2207 | 0 | return (int)got; |
2208 | 0 | } |
2209 | | |
2210 | | /* |
2211 | | * XXX - this *peeks* at next byte, not a character. |
2212 | | */ |
2213 | | int |
2214 | | file_peekc(FILE_T file) |
2215 | 0 | { |
2216 | 0 | int ret = 0; |
2217 | | |
2218 | | /* check that we're reading and that there's no error */ |
2219 | 0 | if (file->err != 0) |
2220 | 0 | return -1; |
2221 | | |
2222 | | /* try output buffer (no need to check for skip request) */ |
2223 | 0 | if (file->out.avail != 0) { |
2224 | 0 | return *(file->out.next); |
2225 | 0 | } |
2226 | | |
2227 | | /* process a skip request */ |
2228 | 0 | if (file->seek_pending) { |
2229 | 0 | file->seek_pending = false; |
2230 | 0 | if (gz_skip(file, file->skip) == -1) |
2231 | 0 | return -1; |
2232 | 0 | } |
2233 | | /* if we processed a skip request, there may be data in the buffer, |
2234 | | * or an error could have occurred; likewise if we didn't do seek but |
2235 | | * now call fill_out_buffer, the errors can occur. So we do this while |
2236 | | * loop to check before and after - this is basically the logic from |
2237 | | * file_read() but only for peeking not consuming a byte |
2238 | | */ |
2239 | 0 | while (1) { |
2240 | 0 | if (file->out.avail != 0) { |
2241 | 0 | return *(file->out.next); |
2242 | 0 | } |
2243 | 0 | else if (file->err != 0) { |
2244 | 0 | return -1; |
2245 | 0 | } |
2246 | 0 | else if (file->eof && file->in.avail == 0) { |
2247 | 0 | return -1; |
2248 | 0 | } |
2249 | 0 | else if (fill_out_buffer(file) == -1) { |
2250 | 0 | return -1; |
2251 | 0 | } |
2252 | 0 | } |
2253 | | /* it's actually impossible to get here */ |
2254 | 0 | return ret; |
2255 | 0 | } |
2256 | | |
2257 | | /* |
2258 | | * XXX - this gets a byte, not a character. |
2259 | | */ |
2260 | | int |
2261 | | file_getc(FILE_T file) |
2262 | 0 | { |
2263 | 0 | unsigned char buf[1]; |
2264 | 0 | int ret; |
2265 | | |
2266 | | /* check that we're reading and that there's no error */ |
2267 | 0 | if (file->err != 0) |
2268 | 0 | return -1; |
2269 | | |
2270 | | /* try output buffer (no need to check for skip request) */ |
2271 | 0 | if (file->out.avail != 0) { |
2272 | 0 | file->out.avail--; |
2273 | 0 | file->pos++; |
2274 | 0 | return *(file->out.next)++; |
2275 | 0 | } |
2276 | | |
2277 | 0 | ret = file_read(buf, 1, file); |
2278 | 0 | return ret < 1 ? -1 : buf[0]; |
2279 | 0 | } |
2280 | | |
2281 | | /* |
2282 | | * Like file_gets, but returns a pointer to the terminating NUL |
2283 | | * on success and NULL on failure. |
2284 | | */ |
2285 | | char * |
2286 | | file_getsp(char *buf, int len, FILE_T file) |
2287 | 0 | { |
2288 | 0 | unsigned left, n; |
2289 | 0 | char *curp; |
2290 | 0 | unsigned char *eol; |
2291 | | |
2292 | | /* check parameters */ |
2293 | 0 | if (buf == NULL || len < 1) |
2294 | 0 | return NULL; |
2295 | | |
2296 | | /* check that there's no error */ |
2297 | 0 | if (file->err != 0) |
2298 | 0 | return NULL; |
2299 | | |
2300 | | /* process a skip request */ |
2301 | 0 | if (file->seek_pending) { |
2302 | 0 | file->seek_pending = false; |
2303 | 0 | if (gz_skip(file, file->skip) == -1) |
2304 | 0 | return NULL; |
2305 | 0 | } |
2306 | | |
2307 | | /* copy output bytes up to new line or len - 1, whichever comes first -- |
2308 | | append a terminating zero to the string (we don't check for a zero in |
2309 | | the contents, let the user worry about that) */ |
2310 | 0 | curp = buf; |
2311 | 0 | left = (unsigned)len - 1; |
2312 | 0 | if (left) do { |
2313 | | /* assure that something is in the output buffer */ |
2314 | 0 | if (file->out.avail == 0) { |
2315 | | /* We have nothing in the output buffer. */ |
2316 | 0 | if (file->err != 0) { |
2317 | | /* We have an error that may not have |
2318 | | been reported yet; that means we |
2319 | | can't generate any more data into |
2320 | | the output buffer, so return an |
2321 | | error indication. */ |
2322 | 0 | return NULL; |
2323 | 0 | } |
2324 | 0 | if (fill_out_buffer(file) == -1) |
2325 | 0 | return NULL; /* error */ |
2326 | 0 | if (file->out.avail == 0) { /* end of file */ |
2327 | 0 | if (curp == buf) /* got bupkus */ |
2328 | 0 | return NULL; |
2329 | 0 | break; /* got something -- return it */ |
2330 | 0 | } |
2331 | 0 | } |
2332 | | |
2333 | | /* look for end-of-line in current output buffer */ |
2334 | 0 | n = file->out.avail > left ? left : file->out.avail; |
2335 | 0 | eol = (unsigned char *)memchr(file->out.next, '\n', n); |
2336 | 0 | if (eol != NULL) |
2337 | 0 | n = (unsigned)(eol - file->out.next) + 1; |
2338 | | |
2339 | | /* copy through end-of-line, or remainder if not found */ |
2340 | 0 | memcpy(curp, file->out.next, n); |
2341 | 0 | file->out.avail -= n; |
2342 | 0 | file->out.next += n; |
2343 | 0 | file->pos += n; |
2344 | 0 | left -= n; |
2345 | 0 | curp += n; |
2346 | 0 | } while (left && eol == NULL); |
2347 | | |
2348 | | /* found end-of-line or out of space -- add a terminator and return |
2349 | | a pointer to it */ |
2350 | 0 | *curp = '\0'; |
2351 | 0 | return curp; |
2352 | 0 | } |
2353 | | |
2354 | | /* |
2355 | | * Returns a pointer to the beginning of the buffer on success |
2356 | | * and NULL on failure. |
2357 | | */ |
2358 | | char * |
2359 | | file_gets(char *buf, int len, FILE_T file) |
2360 | 0 | { |
2361 | 0 | if (!file_getsp(buf, len, file)) return NULL; |
2362 | 0 | return buf; |
2363 | 0 | } |
2364 | | |
2365 | | bool |
2366 | | file_eof(FILE_T file) |
2367 | 0 | { |
2368 | | /* return end-of-file state */ |
2369 | 0 | return (file->eof && file->in.avail == 0 && file->out.avail == 0); |
2370 | 0 | } |
2371 | | |
2372 | | /* |
2373 | | * Routine to return a Wiretap error code (0 for no error, an errno |
2374 | | * for a file error, or a WTAP_ERR_ code for other errors) for an |
2375 | | * I/O stream. Also returns an error string for some errors. |
2376 | | */ |
2377 | | int |
2378 | | file_error(FILE_T fh, char **err_info) |
2379 | 0 | { |
2380 | 0 | if (fh->err!=0 && err_info) { |
2381 | | /* g_strdup() returns NULL for NULL argument */ |
2382 | 0 | *err_info = g_strdup(fh->err_info); |
2383 | 0 | } |
2384 | 0 | return fh->err; |
2385 | 0 | } |
2386 | | |
2387 | | void |
2388 | | file_clearerr(FILE_T stream) |
2389 | 0 | { |
2390 | | /* clear error and end-of-file */ |
2391 | 0 | stream->err = 0; |
2392 | 0 | stream->err_info = NULL; |
2393 | 0 | stream->eof = false; |
2394 | 0 | } |
2395 | | |
2396 | | void |
2397 | | file_fdclose(FILE_T file) |
2398 | 0 | { |
2399 | 0 | if (file->fd != -1) |
2400 | 0 | ws_close(file->fd); |
2401 | 0 | file->fd = -1; |
2402 | 0 | } |
2403 | | |
2404 | | bool |
2405 | | file_fdreopen(FILE_T file, const char *path) |
2406 | 0 | { |
2407 | 0 | int fd; |
2408 | |
|
2409 | 0 | if ((fd = ws_open(path, O_RDONLY|O_BINARY, 0000)) == -1) |
2410 | 0 | return false; |
2411 | 0 | file->fd = fd; |
2412 | 0 | return true; |
2413 | 0 | } |
2414 | | |
2415 | | void |
2416 | | file_close(FILE_T file) |
2417 | 0 | { |
2418 | 0 | int fd = file->fd; |
2419 | | |
2420 | | /* free memory and close file */ |
2421 | 0 | if (file->size) { |
2422 | 0 | #ifdef USE_ZLIB_OR_ZLIBNG |
2423 | 0 | ZLIB_PREFIX(inflateEnd)(&(file->strm)); |
2424 | 0 | #endif /* USE_ZLIB_OR_ZLIBNG */ |
2425 | | #ifdef HAVE_ZSTD |
2426 | | ZSTD_freeDCtx(file->zstd_dctx); |
2427 | | #endif /* HAVE_ZSTD */ |
2428 | | #ifdef HAVE_LZ4FRAME_H |
2429 | | LZ4F_freeDecompressionContext(file->lz4_dctx); |
2430 | | #endif /* HAVE_LZ4FRAME_H */ |
2431 | 0 | g_free(file->out.buf); |
2432 | 0 | g_free(file->in.buf); |
2433 | 0 | } |
2434 | 0 | g_free(file->fast_seek_cur); |
2435 | 0 | file->err = 0; |
2436 | 0 | file->err_info = NULL; |
2437 | 0 | g_free(file); |
2438 | | /* |
2439 | | * If fd is -1, somebody's done a file_closefd() on us, so |
2440 | | * we don't need to close the FD itself, and shouldn't do |
2441 | | * so. |
2442 | | */ |
2443 | 0 | if (fd != -1) |
2444 | 0 | ws_close(fd); |
2445 | 0 | } |
2446 | | |
2447 | | /* |
2448 | | * Editor modelines - https://www.wireshark.org/tools/modelines.html |
2449 | | * |
2450 | | * Local variables: |
2451 | | * c-basic-offset: 4 |
2452 | | * tab-width: 8 |
2453 | | * indent-tabs-mode: nil |
2454 | | * End: |
2455 | | * |
2456 | | * vi: set shiftwidth=4 tabstop=8 expandtab: |
2457 | | * :indentSize=4:tabSize=8:noTabs=true: |
2458 | | */ |