Coverage Report

Created: 2026-03-30 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/wireshark/wiretap/file_wrappers.c
Line
Count
Source
1
/* file_wrappers.c
2
 *
3
 * Wiretap Library
4
 * Copyright (c) 1998 by Gilbert Ramirez <gram@alumni.rice.edu>
5
 *
6
 * SPDX-License-Identifier: GPL-2.0-or-later
7
 */
8
9
/* file_access interface based heavily on zlib gzread.c and gzlib.c from zlib
10
 * Copyright (C) 1995-2010 Jean-loup Gailly and Mark Adler
11
 * under licence:
12
 *
13
 * SPDX-License-Identifier: Zlib
14
 *
15
 */
16
17
#include "config.h"
18
0
#define WS_LOG_DOMAIN LOG_DOMAIN_WIRETAP
19
20
#include "file_wrappers.h"
21
22
#include <assert.h>
23
#include <errno.h>
24
#include <string.h>
25
#include "wtap_module.h"
26
27
#include <wsutil/file_util.h>
28
#include <wsutil/zlib_compat.h>
29
#include <wsutil/file_compressed.h>
30
31
#ifdef HAVE_ZSTD
32
#include <zstd.h>
33
#endif /* HAVE_ZSTD */
34
35
#ifdef HAVE_LZ4FRAME_H
36
#include <lz4.h>
37
#include <lz4frame.h>
38
#ifndef LZ4F_BLOCK_HEADER_SIZE /* Added in LZ4_VERSION_NUMBER 10902 */
39
#define LZ4F_BLOCK_HEADER_SIZE 4
40
#endif /* LZ4F_BLOCK_HEADER_SIZE */
41
#endif /* HAVE_LZ4FRAME_H */
42
43
static ws_compression_type file_get_compression_type(FILE_T stream);
44
45
ws_compression_type
46
wtap_get_compression_type(wtap *wth)
47
0
{
48
0
  return file_get_compression_type((wth->fh == NULL) ? wth->random_fh : wth->fh);
49
0
}
50
51
/* #define GZBUFSIZE 8192 */
52
0
#define GZBUFSIZE 4096
53
#define LZ4BUFSIZE 4194304 // 4MiB, maximum block size
54
55
/* values for wtap_reader compression */
56
typedef enum {
57
    UNKNOWN,       /* unknown - look for a compression header */
58
    UNCOMPRESSED,  /* uncompressed - copy input directly */
59
    ZLIB,          /* decompress a zlib stream */
60
    GZIP_AFTER_HEADER,
61
    ZSTD,
62
    LZ4,              /* start of a LZ4 Frame */
63
    LZ4_AFTER_HEADER, /* start of a LZ4 Block */
64
} compression_t;
65
66
/*
67
 * We limit the size of our input and output buffers to 2^30 bytes,
68
 * because:
69
 *
70
 *    1) on Windows with MSVC, the return value of _read() is int,
71
 *       so the biggest read you can do is INT_MAX, and the biggest
72
 *       power of 2 below that is 2^30;
73
 *
74
 *    2) the "avail_in" and "avail_out" values in a z_stream structure
75
 *       in zlib are uInts, and those are unsigned ints, and that
76
 *       imposes a limit on the buffer size when we're reading a
77
 *       gzipped file.
78
 *
79
 * Thus, we use unsigned for the buffer sizes, offsets, amount available
80
 * from the buffer, etc.
81
 *
82
 * If we want an even bigger buffer for uncompressed data, or for
83
 * some other form of compression, then the unsigned-sized values should
84
 * be in structure values used only for reading gzipped files, and
85
 * other values should be used for uncompressed data or data
86
 * compressed using other algorithms (e.g., in a union).
87
 */
88
0
#define MAX_READ_BUF_SIZE (1U << 30)
89
90
struct wtap_reader_buf {
91
    uint8_t *buf;  /* buffer */
92
    uint8_t *next; /* next byte to deliver from buffer */
93
    unsigned avail;  /* number of bytes available to deliver at next */
94
};
95
96
struct wtap_reader {
97
    int fd;                     /* file descriptor */
98
    int64_t raw_pos;            /* current position in file (just to not call lseek()) */
99
    int64_t pos;                /* current position in uncompressed data */
100
    unsigned size;              /* buffer size */
101
102
    struct wtap_reader_buf in;  /* input buffer, containing compressed data */
103
    struct wtap_reader_buf out; /* output buffer, containing uncompressed data */
104
105
    bool eof;                   /* true if end of input file reached */
106
    int64_t start;              /* where the gzip data started, for rewinding */
107
    int64_t raw;                /* where the raw data started, for seeking */
108
    compression_t compression;  /* type of compression, if any */
109
    compression_t last_compression; /* last known compression type */
110
    bool is_compressed;         /* false if completely uncompressed, true otherwise */
111
112
    /* seek request */
113
    int64_t skip;               /* amount to skip (already rewound if backwards) */
114
    bool seek_pending;          /* true if seek request pending */
115
116
    /* error information */
117
    int err;                    /* error code */
118
    const char *err_info;       /* additional error information string for some errors */
119
120
    /*
121
     * Decompression stream information.
122
     *
123
     * XXX - should this be a union?
124
     */
125
#ifdef USE_ZLIB_OR_ZLIBNG
126
    /* zlib inflate stream */
127
    zlib_stream strm;           /* stream structure in-place (not a pointer) */
128
    bool dont_check_crc;        /* true if we aren't supposed to check the CRC */
129
#endif /* USE_ZLIB_OR_ZLIBNG */
130
#ifdef HAVE_ZSTD
131
    ZSTD_DCtx *zstd_dctx;
132
#endif /* HAVE_ZSTD */
133
#ifdef HAVE_LZ4FRAME_H
134
    LZ4F_dctx *lz4_dctx;
135
    LZ4F_frameInfo_t lz4_info;
136
    unsigned char lz4_hdr[LZ4F_HEADER_SIZE_MAX];
137
#endif /* HAVE_LZ4FRAME_H */
138
139
    /* fast seeking */
140
    GPtrArray *fast_seek;
141
    void *fast_seek_cur;
142
};
143
144
/* Current read offset within a buffer. */
145
static unsigned
146
offset_in_buffer(struct wtap_reader_buf *buf)
147
0
{
148
    /* buf->next points to the next byte to read, and buf->buf points
149
       to the first byte in the buffer, so the difference between them
150
       is the offset.
151
152
       This will fit in an unsigned int, because it can't be bigger
153
       than the size of the buffer, which is an unsigned int. */
154
0
    return (unsigned)(buf->next - buf->buf);
155
0
}
156
157
/* Number of bytes of data that are in a buffer. */
158
static unsigned
159
bytes_in_buffer(struct wtap_reader_buf *buf)
160
0
{
161
    /* buf->next + buf->avail points just past the last byte of data in
162
       the buffer.
163
       Thus, (buf->next + buf->avail) - buf->buf is the number of bytes
164
       of data in the buffer.
165
166
       This will fit in an unsigned, because it can't be bigger
167
       than the size of the buffer, which is a unsigned. */
168
0
    return (unsigned)((buf->next + buf->avail) - buf->buf);
169
0
}
170
171
/* Reset a buffer, discarding all data in the buffer, so we read into
172
   it starting at the beginning. */
173
static void
174
buf_reset(struct wtap_reader_buf *buf)
175
0
{
176
0
    buf->next = buf->buf;
177
0
    buf->avail = 0;
178
0
}
179
180
static int
181
buf_read(FILE_T state, struct wtap_reader_buf *buf)
182
0
{
183
0
    unsigned space_left, to_read;
184
0
    unsigned char *read_ptr;
185
0
    ssize_t ret;
186
187
    /* How much space is left at the end of the buffer?
188
       XXX - the output buffer actually has state->size * 2 bytes. */
189
0
    space_left = state->size - bytes_in_buffer(buf);
190
0
    if (space_left == 0) {
191
        /* There's no space left, so we start fresh at the beginning
192
           of the buffer. */
193
0
        buf_reset(buf);
194
195
0
        read_ptr = buf->buf;
196
0
        to_read = state->size;
197
0
    } else {
198
        /* There's some space left; try to read as much data as we
199
           can into that space.  We may get less than that if we're
200
           reading from a pipe or if we're near the end of the file. */
201
0
        read_ptr = buf->next + buf->avail;
202
0
        to_read = space_left;
203
0
    }
204
205
0
    ret = ws_read(state->fd, read_ptr, to_read);
206
0
    if (ret < 0) {
207
0
        state->err = errno;
208
0
        state->err_info = NULL;
209
0
        return -1;
210
0
    }
211
0
    if (ret == 0)
212
0
        state->eof = true;
213
0
    state->raw_pos += ret;
214
0
    buf->avail += (unsigned)ret;
215
0
    return 0;
216
0
}
217
218
static int /* gz_avail */
219
fill_in_buffer(FILE_T state)
220
0
{
221
0
    if (state->err != 0)
222
0
        return -1;
223
0
    if (!state->eof) {
224
0
        if (buf_read(state, &state->in) < 0)
225
0
            return -1;
226
0
    }
227
0
    return 0;
228
0
}
229
230
0
#define ZLIB_WINSIZE 32768
231
#define  LZ4_WINSIZE 65536
232
233
struct fast_seek_point {
234
    int64_t out;         /* corresponding offset in uncompressed data */
235
    int64_t in;          /* offset in input file of first full byte */
236
237
    compression_t compression;
238
    union {
239
        struct {
240
#ifdef HAVE_INFLATEPRIME
241
            int bits;   /* number of bits (1-7) from byte at in - 1, or 0 */
242
#endif /* HAVE_INFLATEPRIME */
243
            unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */
244
245
            /* be gentle with Z_STREAM_END, 8 bytes more... Another solution would be to comment checks out */
246
            uint32_t adler;
247
            uint32_t total_out;
248
        } zlib;
249
#ifdef HAVE_LZ4FRAME_H
250
        struct {
251
            LZ4F_frameInfo_t lz4_info;
252
            unsigned char lz4_hdr[LZ4F_HEADER_SIZE_MAX];
253
            unsigned char window[LZ4_WINSIZE]; /* preceding 64K of uncompressed data */
254
        } lz4;
255
#endif
256
    } data;
257
};
258
259
struct zlib_cur_seek_point {
260
    unsigned char window[ZLIB_WINSIZE]; /* preceding 32K of uncompressed data */
261
    unsigned int pos;
262
    unsigned int have;
263
};
264
265
struct lz4_cur_seek_point {
266
    unsigned char window[LZ4_WINSIZE]; /* preceding 64K of uncompressed data */
267
    unsigned pos; /* start position in circular buffer */
268
    unsigned have;
269
};
270
271
0
#define SPAN INT64_C(1048576)
272
static struct fast_seek_point *
273
fast_seek_find(FILE_T file, int64_t pos)
274
0
{
275
0
    struct fast_seek_point *smallest = NULL;
276
0
    struct fast_seek_point *item;
277
0
    unsigned low, i, max;
278
279
0
    if (!file->fast_seek)
280
0
        return NULL;
281
282
0
    for (low = 0, max = file->fast_seek->len; low < max; ) {
283
0
        i = (low + max) / 2;
284
0
        item = (struct fast_seek_point *)file->fast_seek->pdata[i];
285
286
0
        if (pos < item->out)
287
0
            max = i;
288
0
        else if (pos > item->out) {
289
0
            smallest = item;
290
0
            low = i + 1;
291
0
        } else {
292
0
            return item;
293
0
        }
294
0
    }
295
0
    return smallest;
296
0
}
297
298
static void
299
fast_seek_header(FILE_T file, int64_t in_pos, int64_t out_pos,
300
                 compression_t compression)
301
0
{
302
0
    struct fast_seek_point *item = NULL;
303
304
0
    if (!file->fast_seek) {
305
0
        return;
306
0
    }
307
308
0
    if (file->fast_seek->len != 0)
309
0
        item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
310
311
    /* fast_seek_header always adds a fast seek point, even if less than
312
     * SPAN from the last one. That is because it used for new streams
313
     * (including concatenated streams) where the compression type
314
     * or, for LZ4, compression options, may change.
315
     */
316
0
    if (!item || item->out < out_pos) {
317
0
        struct fast_seek_point *val = g_new(struct fast_seek_point,1);
318
0
        val->in = in_pos;
319
0
        val->out = out_pos;
320
0
        val->compression = compression;
321
322
#ifdef HAVE_LZ4FRAME_H
323
        if (compression == LZ4) {
324
            val->data.lz4.lz4_info = file->lz4_info;
325
            memcpy(val->data.lz4.lz4_hdr, file->lz4_hdr, LZ4F_HEADER_SIZE_MAX);
326
        }
327
#endif /* HAVE_LZ4FRAME_H */
328
0
        g_ptr_array_add(file->fast_seek, val);
329
0
    }
330
0
}
331
332
static void
333
fast_seek_reset(FILE_T state)
334
0
{
335
0
    switch (state->compression) {
336
337
0
    case UNKNOWN:
338
0
        break;
339
340
0
    case UNCOMPRESSED:
341
        /* Nothing to do */
342
0
        break;
343
344
0
    case ZLIB:
345
0
#ifdef USE_ZLIB_OR_ZLIBNG
346
0
        if (state->fast_seek_cur != NULL) {
347
0
            struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur;
348
349
0
            cur->have = 0;
350
0
        }
351
#else
352
        /* This "cannot happen" */
353
        ws_assert_not_reached();
354
#endif /* USE_ZLIB_OR_ZLIBNG */
355
0
        break;
356
357
0
    case GZIP_AFTER_HEADER:
358
0
        break;
359
360
0
    case ZSTD:
361
#ifdef HAVE_ZSTD
362
        /* Anything to do? */
363
#else
364
        /* This "cannot happen" */
365
0
        ws_assert_not_reached();
366
0
#endif /* HAVE_ZSTD */
367
0
        break;
368
369
0
    case LZ4:
370
0
    case LZ4_AFTER_HEADER:
371
#ifdef HAVE_LZ4
372
        /* Anything to do? */
373
#else
374
        /* This "cannot happen" */
375
0
        ws_assert_not_reached();
376
0
#endif /* HAVE_LZ4 */
377
0
        break;
378
379
    /* Add other compression types here */
380
381
0
    default:
382
        /* This "cannot happen" */
383
0
        ws_assert_not_reached();
384
0
        break;
385
0
    }
386
0
}
387
388
static bool
389
uncompressed_fill_out_buffer(FILE_T state)
390
0
{
391
0
    if (buf_read(state, &state->out) < 0)
392
0
        return false;
393
0
    return true;
394
0
}
395
396
/* Get next byte from input, or -1 if end or error.
397
 *
398
 * Note:
399
 *
400
 *      1) errors from buf_read(), and thus from fill_in_buffer(), are
401
 *      "sticky", and fill_in_buffer() won't do any reading if there's
402
 *      an error;
403
 *
404
 *      2) GZ_GETC() returns -1 on an EOF;
405
 *
406
 * so it's safe to make multiple GZ_GETC() calls and only check the
407
 * last one for an error. */
408
0
#define GZ_GETC() ((state->in.avail == 0 && fill_in_buffer(state) == -1) ? -1 : \
409
0
                   (state->in.avail == 0 ? -1 :                         \
410
0
                    (state->in.avail--, *(state->in.next)++)))
411
412
413
/*
414
 * Gzipped files, using compression from zlib or zlib-ng.
415
 *
416
 * https://tools.ietf.org/html/rfc1952 (RFC 1952)
417
 */
418
#ifdef USE_ZLIB_OR_ZLIBNG
419
420
/* Get a one-byte integer and return 0 on success and the value in *ret.
421
   Otherwise -1 is returned, state->err is set, and *ret is not modified. */
422
static int
423
gz_next1(FILE_T state, uint8_t *ret)
424
0
{
425
0
    int ch;
426
427
0
    ch = GZ_GETC();
428
0
    if (ch == -1) {
429
0
        if (state->err == 0) {
430
            /* EOF */
431
0
            state->err = WTAP_ERR_SHORT_READ;
432
0
            state->err_info = NULL;
433
0
        }
434
0
        return -1;
435
0
    }
436
0
    *ret = ch;
437
0
    return 0;
438
0
}
439
440
/* Get a two-byte little-endian integer and return 0 on success and the value
441
   in *ret.  Otherwise -1 is returned, state->err is set, and *ret is not
442
   modified. */
443
static int
444
gz_next2(FILE_T state, uint16_t *ret)
445
0
{
446
0
    uint16_t val;
447
0
    int ch;
448
449
0
    val = GZ_GETC();
450
0
    ch = GZ_GETC();
451
0
    if (ch == -1) {
452
0
        if (state->err == 0) {
453
            /* EOF */
454
0
            state->err = WTAP_ERR_SHORT_READ;
455
0
            state->err_info = NULL;
456
0
        }
457
0
        return -1;
458
0
    }
459
0
    val += (uint16_t)ch << 8;
460
0
    *ret = val;
461
0
    return 0;
462
0
}
463
464
/* Get a four-byte little-endian integer and return 0 on success and the value
465
   in *ret.  Otherwise -1 is returned, state->err is set, and *ret is not
466
   modified. */
467
static int
468
gz_next4(FILE_T state, uint32_t *ret)
469
0
{
470
0
    uint32_t val;
471
0
    int ch;
472
473
0
    val = GZ_GETC();
474
0
    val += (unsigned)GZ_GETC() << 8;
475
0
    val += (uint32_t)GZ_GETC() << 16;
476
0
    ch = GZ_GETC();
477
0
    if (ch == -1) {
478
0
        if (state->err == 0) {
479
            /* EOF */
480
0
            state->err = WTAP_ERR_SHORT_READ;
481
0
            state->err_info = NULL;
482
0
        }
483
0
        return -1;
484
0
    }
485
0
    val += (uint32_t)ch << 24;
486
0
    *ret = val;
487
0
    return 0;
488
0
}
489
490
/* Skip the specified number of bytes and return 0 on success.  Otherwise -1
491
   is returned. */
492
static int
493
gz_skipn(FILE_T state, size_t n)
494
0
{
495
0
    while (n != 0) {
496
0
        if (GZ_GETC() == -1) {
497
0
            if (state->err == 0) {
498
                /* EOF */
499
0
                state->err = WTAP_ERR_SHORT_READ;
500
0
                state->err_info = NULL;
501
0
            }
502
0
            return -1;
503
0
        }
504
0
        n--;
505
0
    }
506
0
    return 0;
507
0
}
508
509
/* Skip a null-terminated string and return 0 on success.  Otherwise -1
510
   is returned. */
511
static int
512
gz_skipzstr(FILE_T state)
513
0
{
514
0
    int ch;
515
516
    /* It's null-terminated, so scan until we read a byte with
517
       the value 0 or get an error. */
518
0
    while ((ch = GZ_GETC()) > 0)
519
0
        ;
520
0
    if (ch == -1) {
521
0
        if (state->err == 0) {
522
            /* EOF */
523
0
            state->err = WTAP_ERR_SHORT_READ;
524
0
            state->err_info = NULL;
525
0
        }
526
0
        return -1;
527
0
    }
528
0
    return 0;
529
0
}
530
531
static void
532
zlib_fast_seek_add(FILE_T file, struct zlib_cur_seek_point *point, int bits, int64_t in_pos, int64_t out_pos)
533
0
{
534
    /* it's for sure after gzip header, so file->fast_seek->len != 0 */
535
0
    struct fast_seek_point *item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
536
537
#ifndef HAVE_INFLATEPRIME
538
    if (bits)
539
        return;
540
#endif /* HAVE_INFLATEPRIME */
541
542
    /* Glib has got Balanced Binary Trees (GTree) but I couldn't find a way to do quick search for nearest (and smaller) value to seek (It's what fast_seek_find() do)
543
     *      Inserting value in middle of sorted array is expensive, so we want to add only in the end.
544
     *      It's not big deal, cause first-read don't usually invoke seeking
545
     */
546
0
    if (item->out + SPAN < out_pos) {
547
0
        struct fast_seek_point *val = g_new(struct fast_seek_point,1);
548
0
        val->in = in_pos;
549
0
        val->out = out_pos;
550
0
        val->compression = ZLIB;
551
0
#ifdef HAVE_INFLATEPRIME
552
0
        val->data.zlib.bits = bits;
553
0
#endif /* HAVE_INFLATEPRIME */
554
0
        if (point->pos != 0) {
555
0
            unsigned int left = ZLIB_WINSIZE - point->pos;
556
557
0
            memcpy(val->data.zlib.window, point->window + point->pos, left);
558
0
            memcpy(val->data.zlib.window + left, point->window, point->pos);
559
0
        } else
560
0
            memcpy(val->data.zlib.window, point->window, ZLIB_WINSIZE);
561
562
        /*
563
         * XXX - strm.adler is a uLong in at least some versions
564
         * of zlib, and uLong is an unsigned long in at least
565
         * some of those versions, which means it's 64-bit
566
         * on LP64 platforms, even though the checksum is
567
         * 32-bit.  We assume the actual Adler checksum
568
         * is in the lower 32 bits of strm.adler; as the
569
         * checksum in the file is only 32 bits, we save only
570
         * those lower 32 bits, and cast away any additional
571
         * bits to squelch warnings.
572
         *
573
         * The same applies to strm.total_out.
574
         */
575
0
        val->data.zlib.adler = (uint32_t) file->strm.adler;
576
0
        val->data.zlib.total_out = (uint32_t) file->strm.total_out;
577
0
        g_ptr_array_add(file->fast_seek, val);
578
0
    }
579
0
}
580
581
/*
582
 * Based on what gz_decomp() in zlib does.
583
 */
584
static void
585
zlib_fill_out_buffer(FILE_T state)
586
0
{
587
0
    int ret = 0;        /* XXX */
588
0
    uint32_t crc, len;
589
0
    zlib_streamp strm = &(state->strm);
590
0
    unsigned char *buf = state->out.buf;
591
0
    unsigned int count = state->size << 1;
592
593
0
    unsigned char *buf2 = buf;
594
0
    unsigned int count2 = count;
595
596
0
    strm->avail_out = count;
597
0
    strm->next_out = buf;
598
599
    /* fill output buffer up to end of deflate stream or error */
600
0
    do {
601
        /* get more input for inflate() */
602
0
        if (state->in.avail == 0 && fill_in_buffer(state) == -1)
603
0
            break;
604
0
        if (state->in.avail == 0) {
605
            /* EOF */
606
0
            state->err = WTAP_ERR_SHORT_READ;
607
0
            state->err_info = NULL;
608
0
            break;
609
0
        }
610
611
0
        strm->avail_in = state->in.avail;
612
0
        strm->next_in = state->in.next;
613
        /* decompress and handle errors */
614
0
#ifdef Z_BLOCK
615
0
        ret = ZLIB_PREFIX(inflate)(strm, Z_BLOCK);
616
#else /* Z_BLOCK */
617
        ret = ZLIB_PREFIX(inflate)(strm, Z_NO_FLUSH);
618
#endif /* Z_BLOCK */
619
0
        state->in.avail = strm->avail_in;
620
0
#ifdef z_const
621
0
DIAG_OFF(cast-qual)
622
0
        state->in.next = (unsigned char *)strm->next_in;
623
0
DIAG_ON(cast-qual)
624
#else /* z_const */
625
        state->in.next = strm->next_in;
626
#endif /* z_const */
627
0
        if (ret == Z_STREAM_ERROR) {
628
0
            state->err = WTAP_ERR_DECOMPRESS;
629
0
            state->err_info = strm->msg;
630
0
            break;
631
0
        }
632
0
        if (ret == Z_NEED_DICT) {
633
0
            state->err = WTAP_ERR_DECOMPRESS;
634
0
            state->err_info = "preset dictionary needed";
635
0
            break;
636
0
        }
637
0
        if (ret == Z_MEM_ERROR) {
638
            /* This means "not enough memory". */
639
0
            state->err = ENOMEM;
640
0
            state->err_info = NULL;
641
0
            break;
642
0
        }
643
0
        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
644
0
            state->err = WTAP_ERR_DECOMPRESS;
645
0
            state->err_info = strm->msg;
646
0
            break;
647
0
        }
648
        /*
649
         * XXX - Z_BUF_ERROR?
650
         */
651
652
0
        strm->adler = ZLIB_PREFIX(crc32)(strm->adler, buf2, count2 - strm->avail_out);
653
0
#ifdef Z_BLOCK
654
0
        if (state->fast_seek_cur != NULL) {
655
0
            struct zlib_cur_seek_point *cur = (struct zlib_cur_seek_point *) state->fast_seek_cur;
656
0
            unsigned int ready = count2 - strm->avail_out;
657
658
0
            if (ready < ZLIB_WINSIZE) {
659
0
                unsigned left = ZLIB_WINSIZE - cur->pos;
660
661
0
                if (ready >= left) {
662
0
                    memcpy(cur->window + cur->pos, buf2, left);
663
0
                    if (ready != left)
664
0
                        memcpy(cur->window, buf2 + left, ready - left);
665
666
0
                    cur->pos = ready - left;
667
0
                    cur->have += ready;
668
0
                } else {
669
0
                    memcpy(cur->window + cur->pos, buf2, ready);
670
0
                    cur->pos += ready;
671
0
                    cur->have += ready;
672
0
                }
673
674
0
                if (cur->have >= ZLIB_WINSIZE)
675
0
                    cur->have = ZLIB_WINSIZE;
676
677
0
            } else {
678
0
                memcpy(cur->window, buf2 + (ready - ZLIB_WINSIZE), ZLIB_WINSIZE);
679
0
                cur->pos = 0;
680
0
                cur->have = ZLIB_WINSIZE;
681
0
            }
682
683
0
            if (cur->have >= ZLIB_WINSIZE && ret != Z_STREAM_END && (strm->data_type & 128) && !(strm->data_type & 64))
684
0
                zlib_fast_seek_add(state, cur, (strm->data_type & 7), state->raw_pos - strm->avail_in, state->pos + (count - strm->avail_out));
685
0
        }
686
0
#endif /* Z_BLOCK */
687
0
        buf2 = (buf2 + count2 - strm->avail_out);
688
0
        count2 = strm->avail_out;
689
690
0
    } while (strm->avail_out && ret != Z_STREAM_END);
691
692
    /* update available output and crc check value */
693
0
    state->out.next = buf;
694
0
    state->out.avail = count - strm->avail_out;
695
696
    /* Check gzip trailer if at end of deflate stream.
697
       We don't fail immediately here, we just set an error
698
       indication, so that we try to process what data we
699
       got before the error.  The next attempt to read
700
       something past that data will get the error. */
701
0
    if (ret == Z_STREAM_END) {
702
0
        if (gz_next4(state, &crc) != -1 &&
703
0
            gz_next4(state, &len) != -1) {
704
0
            if (crc != strm->adler && !state->dont_check_crc) {
705
0
                state->err = WTAP_ERR_DECOMPRESS;
706
0
                state->err_info = "bad CRC";
707
0
            } else if (len != (strm->total_out & 0xffffffffUL)) {
708
0
                state->err = WTAP_ERR_DECOMPRESS;
709
0
                state->err_info = "length field wrong";
710
0
            }
711
0
        }
712
0
        state->last_compression = state->compression;
713
0
        state->compression = UNKNOWN;      /* ready for next stream, once have is 0 */
714
0
        g_free(state->fast_seek_cur);
715
0
        state->fast_seek_cur = NULL;
716
0
    }
717
0
}
718
#endif /* USE_ZLIB_OR_ZLIBNG */
719
720
/*
721
 * Check for a gzip header.
722
 *
723
 * Based on the gzip-specific stuff gz_head() from zlib does.
724
 */
725
static int
726
check_for_zlib_compression(FILE_T state)
727
0
{
728
    /*
729
     * Look for the gzip header.  The first two bytes are 31 and 139,
730
     * and if we find it, return success if we support gzip and an
731
     * error if we don't.
732
     */
733
0
    if (state->in.next[0] == 31) {
734
0
        state->in.avail--;
735
0
        state->in.next++;
736
737
        /* Make sure the byte after the first byte is present */
738
0
        if (state->in.avail == 0 && fill_in_buffer(state) == -1) {
739
            /* Read error. */
740
0
            return -1;
741
0
        }
742
0
        if (state->in.avail != 0) {
743
0
            if (state->in.next[0] == 139) {
744
                /*
745
                 * We have what looks like the ID1 and ID2 bytes of a gzip
746
                 * header.
747
                 * Continue processing the file.
748
                 *
749
                 * XXX - some capture file formats (I'M LOOKING AT YOU,
750
                 * ENDACE!) can have 31 in the first byte of the file
751
                 * and 139 in the second byte of the file.  For now, in
752
                 * those cases, you lose.
753
                 */
754
0
#ifdef USE_ZLIB_OR_ZLIBNG
755
0
                uint8_t cm;
756
0
                uint8_t flags;
757
0
                uint16_t len;
758
0
                uint16_t hcrc;
759
760
0
                state->in.avail--;
761
0
                state->in.next++;
762
763
                /* read rest of header */
764
765
                /* compression method (CM) */
766
0
                if (gz_next1(state, &cm) == -1)
767
0
                    return -1;
768
0
                if (cm != 8) {
769
0
                    state->err = WTAP_ERR_DECOMPRESS;
770
0
                    state->err_info = "unknown compression method";
771
0
                    return -1;
772
0
                }
773
774
                /* flags (FLG) */
775
0
                if (gz_next1(state, &flags) == -1) {
776
                    /* Read error. */
777
0
                    return -1;
778
0
                }
779
0
                if (flags & 0xe0) {     /* reserved flag bits */
780
0
                    state->err = WTAP_ERR_DECOMPRESS;
781
0
                    state->err_info = "reserved flag bits set";
782
0
                    return -1;
783
0
                }
784
785
                /* modification time (MTIME) */
786
0
                if (gz_skipn(state, 4) == -1) {
787
                    /* Read error. */
788
0
                    return -1;
789
0
                }
790
791
                /* extra flags (XFL) */
792
0
                if (gz_skipn(state, 1) == -1) {
793
                    /* Read error. */
794
0
                    return -1;
795
0
                }
796
797
                /* operating system (OS) */
798
0
                if (gz_skipn(state, 1) == -1) {
799
                    /* Read error. */
800
0
                    return -1;
801
0
                }
802
803
0
                if (flags & 4) {
804
                    /* extra field - get XLEN */
805
0
                    if (gz_next2(state, &len) == -1) {
806
                        /* Read error. */
807
0
                        return -1;
808
0
                    }
809
810
                    /* skip the extra field */
811
0
                    if (gz_skipn(state, len) == -1) {
812
                        /* Read error. */
813
0
                        return -1;
814
0
                    }
815
0
                }
816
0
                if (flags & 8) {
817
                    /* file name */
818
0
                    if (gz_skipzstr(state) == -1) {
819
                        /* Read error. */
820
0
                        return -1;
821
0
                    }
822
0
                }
823
0
                if (flags & 16) {
824
                    /* comment */
825
0
                    if (gz_skipzstr(state) == -1) {
826
                        /* Read error. */
827
0
                        return -1;
828
0
                    }
829
0
                }
830
0
                if (flags & 2) {
831
                    /* header crc */
832
0
                    if (gz_next2(state, &hcrc) == -1) {
833
                        /* Read error. */
834
0
                        return -1;
835
0
                    }
836
                    /* XXX - check the CRC? */
837
0
                }
838
839
                /* set up for decompression */
840
0
                ZLIB_PREFIX(inflateReset)(&(state->strm));
841
0
                state->strm.adler = ZLIB_PREFIX(crc32)(0L, Z_NULL, 0);
842
0
                state->compression = ZLIB;
843
0
                state->is_compressed = true;
844
0
#ifdef Z_BLOCK
845
0
                if (state->fast_seek) {
846
0
                    struct zlib_cur_seek_point *cur = g_new(struct zlib_cur_seek_point,1);
847
848
0
                    cur->pos = cur->have = 0;
849
0
                    g_free(state->fast_seek_cur);
850
0
                    state->fast_seek_cur = cur;
851
0
                    fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, GZIP_AFTER_HEADER);
852
0
                }
853
0
#endif /* Z_BLOCK */
854
0
                return 1;
855
#else /* USE_ZLIB_OR_ZLIBNG */
856
                state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
857
                state->err_info = "reading gzip-compressed files isn't supported";
858
                return -1;
859
#endif /* USE_ZLIB_OR_ZLIBNG */
860
0
            }
861
862
            /*
863
             * Not a gzip file.  "Unget" the first character; either:
864
             *
865
             *    1) we read both of the first two bytes into the
866
             *    buffer with the first ws_read, so we can just back
867
             *    up by one byte;
868
             *
869
             *    2) we only read the first byte into the buffer with
870
             *    the first ws_read (e.g., because we're reading from
871
             *    a pipe and only the first byte had been written to
872
             *    the pipe at that point), and read the second byte
873
             *    into the buffer after the first byte in the
874
             *    fill_in_buffer call, so we now have two bytes in
875
             *    the buffer, and can just back up by one byte.
876
             */
877
0
            state->in.avail++;
878
0
            state->in.next--;
879
0
        }
880
0
    }
881
0
    return 0;
882
0
}
883
884
885
/*
886
 * Zstandard compression.
887
 *
888
 * https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md
889
 */
890
#ifdef HAVE_ZSTD
891
static bool
892
zstd_fill_out_buffer(FILE_T state)
893
{
894
    ws_assert(state->out.avail == 0);
895
896
    if (state->in.avail == 0 && fill_in_buffer(state) == -1)
897
        return false;
898
899
    ZSTD_outBuffer output = {state->out.buf, state->size << 1, 0};
900
    ZSTD_inBuffer input = {state->in.next, state->in.avail, 0};
901
    const size_t ret = ZSTD_decompressStream(state->zstd_dctx, &output, &input);
902
    if (ZSTD_isError(ret)) {
903
        state->err = WTAP_ERR_DECOMPRESS;
904
        state->err_info = ZSTD_getErrorName(ret);
905
        return false;
906
    }
907
908
    state->in.next = state->in.next + input.pos;
909
    state->in.avail -= (unsigned)input.pos;
910
911
    state->out.next = output.dst;
912
    state->out.avail = (unsigned)output.pos;
913
914
    if (ret == 0) {
915
        state->last_compression = state->compression;
916
        state->compression = UNKNOWN;
917
    }
918
    return true;
919
}
920
#endif /* HAVE_ZSTD */
921
922
/*
923
 * Check for a Zstandard header.
924
 */
925
static int
926
check_for_zstd_compression(FILE_T state)
927
0
{
928
    /*
929
     * Look for the Zstandard header, and, if we find it, return
930
     * success if we support Zstandard and an error if we don't.
931
     */
932
0
    if (state->in.avail >= 4
933
0
        && state->in.next[0] == 0x28 && state->in.next[1] == 0xb5
934
0
        && state->in.next[2] == 0x2f && state->in.next[3] == 0xfd) {
935
#ifdef HAVE_ZSTD
936
        const size_t ret = ZSTD_initDStream(state->zstd_dctx);
937
        if (ZSTD_isError(ret)) {
938
            state->err = WTAP_ERR_DECOMPRESS;
939
            state->err_info = ZSTD_getErrorName(ret);
940
            return -1;
941
        }
942
943
        fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, ZSTD);
944
        state->compression = ZSTD;
945
        state->is_compressed = true;
946
        return 1;
947
#else /* HAVE_ZSTD */
948
0
        state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
949
0
        state->err_info = "reading zstd-compressed files isn't supported";
950
0
        return -1;
951
0
#endif /* HAVE_ZSTD */
952
0
    }
953
0
    return 0;
954
0
}
955
956
/*
957
 * lz4 compression.
958
 *
959
 * https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md
960
 */
961
#ifdef HAVE_LZ4FRAME_H
962
static void
963
lz4_fast_seek_add(FILE_T file, struct lz4_cur_seek_point *point, int64_t in_pos, int64_t out_pos)
964
{
965
    if (!file->fast_seek) {
966
        return;
967
    }
968
969
    struct fast_seek_point *item = NULL;
970
971
    if (file->fast_seek->len != 0)
972
        item = (struct fast_seek_point *)file->fast_seek->pdata[file->fast_seek->len - 1];
973
974
    /* As of Glib 2.68 GTree has g_tree_upper_bound, or we could use a
975
     * wmem_tree. However, since our initial read is usually sequential
976
     * only adding seek points at the end of the ptr array is fast and fine.
977
     */
978
979
    /* don't bother adding jump points between very small blocks (min SPAN) */
980
    if (!item || item->out + SPAN < out_pos) {
981
        struct fast_seek_point *val = g_new(struct fast_seek_point,1);
982
        val->in = in_pos;
983
        val->out = out_pos;
984
        val->compression = LZ4_AFTER_HEADER;
985
986
        if (point != NULL) {
987
            if (point->pos != 0) {
988
                unsigned int left = LZ4_WINSIZE - point->pos;
989
990
                memcpy(val->data.lz4.window, point->window + point->pos, left);
991
                memcpy(val->data.lz4.window + left, point->window, point->pos);
992
            } else {
993
                memcpy(val->data.lz4.window, point->window, LZ4_WINSIZE);
994
            }
995
        }
996
997
        val->data.lz4.lz4_info = file->lz4_info;
998
        memcpy(val->data.lz4.lz4_hdr, file->lz4_hdr, LZ4F_HEADER_SIZE_MAX);
999
        g_ptr_array_add(file->fast_seek, val);
1000
    }
1001
}
1002
1003
static void
1004
lz4_fill_out_buffer(FILE_T state)
1005
{
1006
    ws_assert(state->out.avail == 0);
1007
1008
    /*
1009
     * This works similar to the Z_BLOCK flush type in zlib that stops after
1010
     * each block. LZ4F_getFrameInfo() returns the number of bytes expected
1011
     * to finish the current block, plus the header for the next block, when
1012
     * called when already in a frame and the compression context is set up.
1013
     * We pass in no more than that many bytes of input, and if we do stop
1014
     * on a block end, add a fast seek point (but *before* the header.)
1015
     */
1016
1017
    unsigned count = state->size << 1;
1018
    unsigned char *buf2;
1019
    size_t outBufSize = 0; // Zero so we don't actually consume the block
1020
    size_t inBufSize;
1021
1022
    size_t compressedSize = 0;
1023
    size_t ret = SIZE_MAX; // 0 indicates end of frame, initialize to something else
1024
1025
    state->out.next = state->out.buf;
1026
1027
    do {
1028
        /* get more input for decompress() */
1029
        if (state->in.avail == 0 && fill_in_buffer(state) == -1)
1030
            break;
1031
        if (state->eof) {
1032
            /* EOF */
1033
            state->err = WTAP_ERR_SHORT_READ;
1034
            state->err_info = NULL;
1035
            break;
1036
        }
1037
1038
        inBufSize = state->in.avail;
1039
        compressedSize = LZ4F_getFrameInfo(state->lz4_dctx, &state->lz4_info, state->in.next, &inBufSize);
1040
1041
        // We only call this when we're in the middle of decoding a frame, not
1042
        // before the start of a frame, so this shouldn't consume any bytes.
1043
        ws_assert(inBufSize == 0);
1044
1045
        if (LZ4F_isError(compressedSize)) {
1046
            state->err = WTAP_ERR_DECOMPRESS;
1047
            state->err_info = LZ4F_getErrorName(compressedSize);
1048
            break;
1049
        }
1050
1051
        if (compressedSize > state->size) {
1052
            /*
1053
             * What is this? Either bogus, or some new variant of LZ4 Frames with
1054
             * a larger block size we don't support. We could have a buffer
1055
             * overrun if we try to process it.
1056
             *
1057
             * TODO - We could realloc here.
1058
             */
1059
            state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
1060
            state->err_info = "lz4 compressed block size too large";
1061
            break;
1062
        }
1063
1064
        /* Now, read that size */
1065
1066
        outBufSize = count - state->out.avail;
1067
        inBufSize = MIN(state->in.avail, compressedSize);
1068
1069
        buf2 = state->out.buf + state->out.avail;
1070
        ret = LZ4F_decompress(state->lz4_dctx, buf2, &outBufSize, state->in.next, &inBufSize, NULL);
1071
1072
        if (LZ4F_isError(ret)) {
1073
            state->err = WTAP_ERR_DECOMPRESS;
1074
            state->err_info = LZ4F_getErrorName(ret);
1075
            break;
1076
        }
1077
1078
        state->in.next  += (unsigned)inBufSize;
1079
        state->in.avail -= (unsigned)inBufSize;
1080
        compressedSize -= inBufSize;
1081
1082
        state->out.avail += (unsigned)outBufSize;
1083
1084
        if (state->fast_seek_cur != NULL) {
1085
            struct lz4_cur_seek_point *cur = (struct lz4_cur_seek_point *) state->fast_seek_cur;
1086
            switch (state->lz4_info.blockMode) {
1087
1088
            case LZ4F_blockIndependent:
1089
                /* We don't need the history, always create a fast seek point. */
1090
                cur = NULL;
1091
                break;
1092
1093
#if LZ4_VERSION_NUMBER >= 11000
1094
            case LZ4F_blockLinked:
1095
            {
1096
                /* Save recent history to the current fast seek point. */
1097
                unsigned int ready = (unsigned)outBufSize;
1098
1099
                /* Do we have a full dictionary's worth of decompressed
1100
                 * history to copy? */
1101
                if (ready < LZ4_WINSIZE) {
1102
                    /* No. Can we fit it to the right of the current
1103
                     * circular buffer position?
1104
                     */
1105
                    unsigned left = LZ4_WINSIZE - cur->pos;
1106
1107
                    if (ready <= left) {
1108
                        /* Yes. Do so. */
1109
                        memcpy(cur->window + cur->pos, buf2, ready);
1110
                        cur->pos += ready;
1111
                        cur->have += ready;
1112
                    } else {
1113
                        /* No. Fill the circular buffer, then start over
1114
                         * at the beginning.
1115
                         */
1116
                        memcpy(cur->window + cur->pos, buf2, left);
1117
                        memcpy(cur->window, buf2, ready - left);
1118
                        cur->pos = ready - left;
1119
                        cur->have += ready;
1120
                    }
1121
                    if (cur->have >= LZ4_WINSIZE) {
1122
                        cur->have = LZ4_WINSIZE;
1123
                    }
1124
                } else {
1125
                    /* Yes. Just copy the last 64 KB. */
1126
                    memcpy(cur->window, buf2 + (ready - LZ4_WINSIZE), LZ4_WINSIZE);
1127
                    cur->pos = 0;
1128
                    cur->have = LZ4_WINSIZE;
1129
                }
1130
                break;
1131
            }
1132
#endif /* LZ4_VERSION_NUMBER >= 11000 */
1133
1134
            default:
1135
                /* Do nothing. Since cur will be non-NULL but have 0,
1136
                 * we won't create a fast seek point below.
1137
                 */
1138
                break;
1139
            }
1140
1141
            if (compressedSize == 0 && ret > LZ4F_BLOCK_HEADER_SIZE) {
1142
                /* End of block plus the next block header. We want to add a fast
1143
                 * seek point to the beginning of a block, before the header. We
1144
                 * don't add a fast seek point after before the EndMark / footer,
1145
                 * which has no data. This also has the effect of preventing us
1146
                 * from calculating the frame Content Checksum after doing fast
1147
                 * seeks and random access, which is good because the LZ4 Frame
1148
                 * API also doesn't have a method to update the running checksum
1149
                 * value.
1150
                 */
1151
1152
                if (cur == NULL || cur->have >= LZ4_WINSIZE) {
1153
                    /* There's little point in adding a fast seek point with
1154
                     * less than a full 64 KB of dictionary, as that's too
1155
                     * close to the frame start to be useful.
1156
                     */
1157
                    lz4_fast_seek_add(state, cur, state->raw_pos - state->in.avail - LZ4F_BLOCK_HEADER_SIZE, state->pos + state->out.avail);
1158
                }
1159
            }
1160
        }
1161
1162
        outBufSize = count - state->out.avail;
1163
    } while (ret != 0 && outBufSize);
1164
1165
    state->out.next  = state->out.buf;
1166
1167
    if (ret == 0) {
1168
        /* End of Frame */
1169
        state->last_compression = state->compression;
1170
        state->compression = UNKNOWN;
1171
        g_free(state->fast_seek_cur);
1172
        state->fast_seek_cur = NULL;
1173
    }
1174
}
1175
#endif /* HAVE_LZ4FRAME_H */
1176
1177
/*
1178
 * Check for an lz4 header.
1179
 */
1180
static int
1181
check_for_lz4_compression(FILE_T state)
1182
0
{
1183
    /*
1184
     * Look for the lz4 header, and, if we find it, return success
1185
     * if we support lz4 and an error if we don't.
1186
     */
1187
0
    if (state->in.avail >= 4
1188
0
        && state->in.next[0] == 0x04 && state->in.next[1] == 0x22
1189
0
        && state->in.next[2] == 0x4d && state->in.next[3] == 0x18) {
1190
#ifdef HAVE_LZ4FRAME_H
1191
        LZ4F_resetDecompressionContext(state->lz4_dctx);
1192
        size_t headerSize = LZ4F_HEADER_SIZE_MAX;
1193
#if LZ4_VERSION_NUMBER >= 10903
1194
        /*
1195
         * In 1.9.3+ we can handle a silly edge case of a tiny valid
1196
         * frame at the end of a file that is smaller than the maximum
1197
         * header size. (lz4frame.h added the function in 1.9.0, but
1198
         * only for the static library; it wasn't exported until 1.9.3)
1199
         */
1200
        while (state->in.avail < LZ4F_MIN_SIZE_TO_KNOW_HEADER_LENGTH) {
1201
            if (fill_in_buffer(state) == -1) {
1202
                return -1;
1203
            }
1204
            if (state->eof) {
1205
                state->err = WTAP_ERR_SHORT_READ;
1206
                state->err_info = NULL;
1207
                return 0;
1208
            }
1209
        }
1210
        headerSize = LZ4F_headerSize(state->in.next, state->in.avail);
1211
        if (LZ4F_isError(headerSize)) {
1212
            state->err = WTAP_ERR_DECOMPRESS;
1213
            state->err_info = LZ4F_getErrorName(headerSize);
1214
            return -1;
1215
        }
1216
#endif /* LZ4_VERSION_NUMBER >= 10903 */
1217
        while (state->in.avail < headerSize) {
1218
            if (fill_in_buffer(state) == -1) {
1219
                return -1;
1220
            }
1221
            if (state->eof) {
1222
                state->err = WTAP_ERR_SHORT_READ;
1223
                state->err_info = NULL;
1224
                return 0;
1225
            }
1226
        }
1227
        size_t inBufSize = state->in.avail;
1228
        memcpy(state->lz4_hdr, state->in.next, headerSize);
1229
        const LZ4F_errorCode_t err = LZ4F_getFrameInfo(state->lz4_dctx, &state->lz4_info, state->in.next, &inBufSize);
1230
        if (LZ4F_isError(err)) {
1231
            state->err = WTAP_ERR_DECOMPRESS;
1232
            state->err_info = LZ4F_getErrorName(err);
1233
            return -1;
1234
        }
1235
1236
        /*
1237
         * XXX - We could check state->lz4_info.blockSizeID here, and
1238
         * only realloc the buffers to a larger value if the max
1239
         * block size is bigger than state->size. Also we could fail
1240
         * on unknown values?
1241
         */
1242
        state->in.avail -= (unsigned)inBufSize;
1243
        state->in.next += (unsigned)inBufSize;
1244
1245
#if LZ4_VERSION_NUMBER >= 11000
1246
        if (state->fast_seek && state->lz4_info.blockMode == LZ4F_blockLinked) {
1247
            struct lz4_cur_seek_point *cur = g_new(struct lz4_cur_seek_point,1);
1248
1249
            cur->pos = cur->have = 0;
1250
            g_free(state->fast_seek_cur);
1251
            state->fast_seek_cur = cur;
1252
        }
1253
#endif /* LZ4_VERSION_NUMBER >= 11000 */
1254
        fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, LZ4);
1255
        state->compression = LZ4;
1256
        state->is_compressed = true;
1257
        return 1;
1258
#else /* HAVE_LZ4FRAME_H */
1259
0
        state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
1260
0
        state->err_info = "reading lz4-compressed files isn't supported";
1261
0
        return -1;
1262
0
#endif /* HAVE_LZ4FRAME_H */
1263
0
    }
1264
0
    return 0;
1265
0
}
1266
1267
typedef int (*compression_type_test)(FILE_T);
1268
1269
static compression_type_test const compression_type_tests[] = {
1270
    check_for_zlib_compression,
1271
    check_for_zstd_compression,
1272
    check_for_lz4_compression,
1273
};
1274
1275
/*
1276
 * Used when we haven't yet determined whether we have a compressed file
1277
 * and, if we do, what sort of compressed file it is.
1278
 *
1279
 * Based on the non-gzip-specific stuff that gz_head() from zlib does.
1280
 */
1281
static int
1282
check_for_compression(FILE_T state)
1283
0
{
1284
    /*
1285
     * If this isn't the first frame / compressed stream, ensure that
1286
     * we're starting at the beginning of the buffer. This shouldn't
1287
     * get called much.
1288
     *
1289
     * This is to avoid edge cases where a previous frame finished but
1290
     * state->in.next is close to the end of the buffer so there isn't
1291
     * much room to put the start of the next frame.
1292
     * This also lets us put back bytes if things go wrong.
1293
     */
1294
0
    if (state->in.next != state->in.buf) {
1295
0
        memmove(state->in.buf, state->in.next, state->in.avail);
1296
0
        state->in.next = state->in.buf;
1297
0
    }
1298
1299
    /* get some data in the input buffer */
1300
0
    if (state->in.avail == 0) {
1301
0
        if (fill_in_buffer(state) == -1)
1302
0
            return -1;
1303
0
        if (state->in.avail == 0)
1304
0
            return 0;
1305
0
    }
1306
1307
    /*
1308
     * Check for the compression types we support.
1309
     */
1310
0
    for (size_t i = 0; i < G_N_ELEMENTS(compression_type_tests); i++) {
1311
0
        int ret;
1312
1313
0
        ret = compression_type_tests[i](state);
1314
0
        if (ret == -1)
1315
0
            return -1;    /* error */
1316
0
        if (ret == 1)
1317
0
            return 0;     /* found it */
1318
0
    }
1319
1320
    /*
1321
     * Some other compressed file formats we might want to support:
1322
     *
1323
     *   XZ format:
1324
     *     https://tukaani.org/xz/
1325
     *     https://github.com/tukaani-project/xz
1326
     *     https://github.com/tukaani-project/xz/blob/master/doc/xz-file-format.txt
1327
     *
1328
     *    Bzip2 format:
1329
     *      https://www.sourceware.org/bzip2/
1330
     *      https://gitlab.com/bzip2/bzip2/
1331
     *      https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf
1332
     *        (GitHub won't render it; download and open it)
1333
     *
1334
     *    Lzip format:
1335
     *      https://www.nongnu.org/lzip/
1336
     */
1337
1338
    /*
1339
     * We didn't see anything that looks like a header for any type of
1340
     * compressed file that we support, so just do uncompressed I/O.
1341
     *
1342
     * XXX - This fast seek data is for the case where a compressed stream
1343
     * ends and is followed by an uncompressed portion.  It only works if
1344
     * the uncompressed portion is at the end, as we don't constantly scan
1345
     * for magic bytes in the middle of uncompressed data. (Concatenated
1346
     * compressed streams _do_ work, even streams of different compression types.)
1347
     */
1348
0
    if (state->fast_seek)
1349
0
        fast_seek_header(state, state->raw_pos - state->in.avail, state->pos, UNCOMPRESSED);
1350
1351
1352
    /* doing raw i/o, save start of raw data for seeking, copy any leftover
1353
       input to output -- this assumes that the output buffer is larger than
1354
       the input buffer, which also assures space for gzungetc() */
1355
0
    state->raw = state->pos;
1356
0
    state->out.next = state->out.buf;
1357
    /* not a compressed file -- copy everything we've read into the
1358
       input buffer to the output buffer and fall to raw i/o */
1359
0
    if (state->in.avail) {
1360
0
        memcpy(state->out.buf, state->in.next, state->in.avail);
1361
0
        state->out.avail = state->in.avail;
1362
1363
        /* Now discard everything in the input buffer */
1364
0
        buf_reset(&state->in);
1365
0
    }
1366
0
    state->compression = UNCOMPRESSED;
1367
0
    return 0;
1368
0
}
1369
1370
/*
1371
 * Based on what gz_make() in zlib does.
1372
 */
1373
static int
1374
fill_out_buffer(FILE_T state)
1375
0
{
1376
0
    if (state->compression == UNKNOWN) {
1377
        /*
1378
         * We don't yet know whether the file is compressed,
1379
         * so check for a compressed-file header.
1380
         */
1381
0
        if (check_for_compression(state) == -1)
1382
0
            return -1;
1383
0
        if (state->out.avail != 0)                /* got some data from check_for_compression() */
1384
0
            return 0;
1385
0
    }
1386
1387
    /*
1388
     * We got no data from check_for_compression(), or we didn't call
1389
     * it as we already know the compression type, so read some more
1390
     * data.
1391
     */
1392
0
    switch (state->compression) {
1393
1394
0
    case UNCOMPRESSED:
1395
        /* straight copy */
1396
0
        if (!uncompressed_fill_out_buffer(state))
1397
0
            return -1;
1398
0
        break;
1399
1400
0
#ifdef USE_ZLIB_OR_ZLIBNG
1401
0
    case ZLIB:
1402
        /* zlib (gzip) decompress */
1403
0
        zlib_fill_out_buffer(state);
1404
0
        break;
1405
0
#endif /* USE_ZLIB_OR_ZLIBNG */
1406
1407
#ifdef HAVE_ZSTD
1408
    case ZSTD:
1409
        /* zstd decompress */
1410
        if (!zstd_fill_out_buffer(state))
1411
            return -1;
1412
        break;
1413
#endif /* HAVE_ZSTD */
1414
1415
#ifdef HAVE_LZ4FRAME_H
1416
    case LZ4:
1417
        /* lz4 decompress */
1418
        lz4_fill_out_buffer(state);
1419
        break;
1420
#endif /* HAVE_LZ4FRAME_H */
1421
1422
0
    default:
1423
        /* Unknown compression type; keep reading */
1424
0
        break;
1425
0
    }
1426
0
    return 0;
1427
0
}
1428
1429
static int
1430
gz_skip(FILE_T state, int64_t len)
1431
0
{
1432
0
    unsigned n;
1433
1434
    /* skip over len bytes or reach end-of-file, whichever comes first */
1435
0
    while (len)
1436
0
        if (state->out.avail != 0) {
1437
            /* We have stuff in the output buffer; skip over
1438
               it. */
1439
0
            n = (int64_t)state->out.avail > len ? (unsigned)len : state->out.avail;
1440
0
            state->out.avail -= n;
1441
0
            state->out.next += n;
1442
0
            state->pos += n;
1443
0
            len -= n;
1444
0
        } else if (state->err != 0) {
1445
            /* We have nothing in the output buffer, and
1446
               we have an error that may not have been
1447
               reported yet; that means we can't generate
1448
               any more data into the output buffer, so
1449
               return an error indication. */
1450
0
            return -1;
1451
0
        } else if (state->eof && state->in.avail == 0) {
1452
            /* We have nothing in the output buffer, and
1453
               we're at the end of the input; just return. */
1454
0
            break;
1455
0
        } else {
1456
            /* We have nothing in the output buffer, and
1457
               we can generate more data; get more output,
1458
               looking for header if required. */
1459
0
            if (fill_out_buffer(state) == -1)
1460
0
                return -1;
1461
0
        }
1462
0
    return 0;
1463
0
}
1464
1465
static void
1466
gz_reset(FILE_T state)
1467
0
{
1468
0
    buf_reset(&state->out);       /* no output data available */
1469
0
    state->eof = false;           /* not at end of file */
1470
0
    state->compression = UNKNOWN; /* look for compression header */
1471
1472
0
    state->seek_pending = false;  /* no seek request pending */
1473
0
    state->err = 0;               /* clear error */
1474
0
    state->err_info = NULL;
1475
0
    state->pos = 0;               /* no uncompressed data yet */
1476
0
    buf_reset(&state->in);        /* no input data yet */
1477
0
}
1478
1479
FILE_T
1480
file_fdopen(int fd)
1481
0
{
1482
    /*
1483
     * XXX - we now check whether we have st_blksize in struct stat;
1484
     * it's not available on all platforms.
1485
     *
1486
     * I'm not sure why we're testing _STATBUF_ST_BLKSIZE; it's not
1487
     * set on all platforms that have st_blksize in struct stat.
1488
     * (Not all platforms have st_blksize in struct stat.)
1489
     *
1490
     * Is there some reason *not* to make the buffer size the maximum
1491
     * of GBUFSIZE and st_blksize?  On most UN*Xes, the standard I/O
1492
     * library does I/O with st_blksize as the buffer size; on others,
1493
     * and on Windows, it's a 4K buffer size.  If st_blksize is bigger
1494
     * than GBUFSIZE (which is currently 4KB), that's probably a
1495
     * hint that reading in st_blksize chunks is considered a good
1496
     * idea (e.g., an 8K/1K Berkeley fast file system with st_blksize
1497
     * being 8K, or APFS, where st_blksize is big on at least some
1498
     * versions of macOS).
1499
     */
1500
0
#ifdef _STATBUF_ST_BLKSIZE
1501
0
    ws_statb64 st;
1502
0
#endif /* _STATBUF_ST_BLKSIZE */
1503
#ifdef HAVE_ZSTD
1504
    size_t zstd_buf_size;
1505
#endif /* HAVE_ZSTD */
1506
0
    unsigned want = GZBUFSIZE;
1507
0
    FILE_T state;
1508
#ifdef HAVE_LZ4FRAME_H
1509
    size_t ret;
1510
#endif /* HAVE_LZ4FRAME_H */
1511
1512
0
    if (fd == -1)
1513
0
        return NULL;
1514
1515
    /* allocate FILE_T structure to return */
1516
0
    state = (FILE_T)g_try_malloc0(sizeof *state);
1517
0
    if (state == NULL)
1518
0
        return NULL;
1519
1520
0
    state->fast_seek_cur = NULL;
1521
0
    state->fast_seek = NULL;
1522
1523
    /* open the file with the appropriate mode (or just use fd) */
1524
0
    state->fd = fd;
1525
1526
    /* we don't yet know whether it's compressed */
1527
0
    state->is_compressed = false;
1528
0
    state->last_compression = UNKNOWN;
1529
1530
    /* save the current position for rewinding (only if reading) */
1531
0
    state->start = ws_lseek64(state->fd, 0, SEEK_CUR);
1532
0
    if (state->start == -1) state->start = 0;
1533
0
    state->raw_pos = state->start;
1534
1535
    /* initialize stream */
1536
0
    gz_reset(state);
1537
1538
0
#ifdef _STATBUF_ST_BLKSIZE
1539
    /*
1540
     * See what I/O size the file system recommends using, and if
1541
     * it's bigger than what we're using and isn't too big, use
1542
     * it.
1543
     */
1544
0
    if (ws_fstat64(fd, &st) >= 0) {
1545
        /*
1546
         * Yes, st_blksize can be bigger than an int; apparently,
1547
         * it's a long on LP64 Linux, for example.
1548
         *
1549
         * If the value is too big to fit into a unsigned,
1550
         * just use the maximum read buffer size.
1551
         *
1552
         * On top of that, the Single UNIX Specification says that
1553
         * st_blksize is of type blksize_t, which is a *signed*
1554
         * integer type, and, at minimum, macOS 11.6 and Linux 5.14.11's
1555
         * include/uapi/asm-generic/stat.h define it as such.
1556
         *
1557
         * However, other OSes might make it unsigned, and older versions
1558
         * of OSes that currently make it signed might make it unsigned,
1559
         * so we try to avoid warnings from that.
1560
         *
1561
         * We cast MAX_READ_BUF_SIZE to long in order to avoid the
1562
         * warning, although it might introduce warnings on platforms
1563
         * where st_blocksize is unsigned; we'll deal with that if
1564
         * it ever shows up as an issue.
1565
         *
1566
         * MAX_READ_BUF_SIZE is < the largest *signed* 32-bt integer,
1567
         * so casting it to long won't turn it into a negative number.
1568
         * (We only support 32-bit and 64-bit 2's-complement platforms.)
1569
         */
1570
0
        if (st.st_blksize <= (long)MAX_READ_BUF_SIZE)
1571
0
            want = (unsigned)st.st_blksize;
1572
0
        else
1573
0
            want = MAX_READ_BUF_SIZE;
1574
        /* XXX, verify result? */
1575
0
    }
1576
0
#endif /* _STATBUF_ST_BLKSIZE */
1577
#ifdef HAVE_ZSTD
1578
    /* we should have separate input and output buf sizes */
1579
    zstd_buf_size = ZSTD_DStreamInSize();
1580
    if (zstd_buf_size > want) {
1581
        if (zstd_buf_size <= MAX_READ_BUF_SIZE)
1582
            want = (unsigned)zstd_buf_size;
1583
        else
1584
            want = MAX_READ_BUF_SIZE;
1585
    }
1586
    zstd_buf_size = ZSTD_DStreamOutSize();
1587
    if (zstd_buf_size > want) {
1588
        if (zstd_buf_size <= MAX_READ_BUF_SIZE)
1589
            want = (unsigned)zstd_buf_size;
1590
        else
1591
            want = MAX_READ_BUF_SIZE;
1592
    }
1593
#endif /* HAVE_ZSTD */
1594
#ifdef HAVE_LZ4FRAME_H
1595
    if (LZ4BUFSIZE > want) {
1596
        if (LZ4BUFSIZE <= MAX_READ_BUF_SIZE) {
1597
            want = LZ4BUFSIZE;
1598
        } else {
1599
            goto err;
1600
        }
1601
    }
1602
#endif /* HAVE_LZ4FRAME_H */
1603
1604
    /* allocate buffers */
1605
0
    state->in.buf = (unsigned char *)g_try_malloc(want);
1606
0
    state->in.next = state->in.buf;
1607
0
    state->in.avail = 0;
1608
0
    state->out.buf = (unsigned char *)g_try_malloc(want << 1);
1609
0
    state->out.next = state->out.buf;
1610
0
    state->out.avail = 0;
1611
0
    state->size = want;
1612
0
    if (state->in.buf == NULL || state->out.buf == NULL) {
1613
0
       goto err;
1614
0
    }
1615
1616
0
#ifdef USE_ZLIB_OR_ZLIBNG
1617
    /* allocate inflate memory */
1618
0
    state->strm.zalloc = Z_NULL;
1619
0
    state->strm.zfree = Z_NULL;
1620
0
    state->strm.opaque = Z_NULL;
1621
0
    state->strm.avail_in = 0;
1622
0
    state->strm.next_in = Z_NULL;
1623
0
    if (ZLIB_PREFIX(inflateInit2)(&(state->strm), -15) != Z_OK) {    /* raw inflate */
1624
0
        goto err;
1625
0
    }
1626
1627
    /* for now, assume we should check the crc */
1628
0
    state->dont_check_crc = false;
1629
0
#endif /* USE_ZLIB_OR_ZLIBNG */
1630
1631
#ifdef HAVE_ZSTD
1632
    state->zstd_dctx = ZSTD_createDCtx();
1633
    if (state->zstd_dctx == NULL) {
1634
        goto err;
1635
    }
1636
#endif /* HAVE_ZSTD */
1637
1638
#ifdef HAVE_LZ4FRAME_H
1639
    ret = LZ4F_createDecompressionContext(&state->lz4_dctx, LZ4F_VERSION);
1640
    if (LZ4F_isError(ret)) {
1641
        goto err;
1642
    }
1643
#endif /* HAVE_LZ4FRAME_H */
1644
1645
    /* return stream */
1646
0
    return state;
1647
1648
0
err:
1649
0
#ifdef USE_ZLIB_OR_ZLIBNG
1650
0
    ZLIB_PREFIX(inflateEnd)(&state->strm);
1651
0
#endif /* USE_ZLIB_OR_ZLIBNG */
1652
#ifdef HAVE_ZSTD
1653
    ZSTD_freeDCtx(state->zstd_dctx);
1654
#endif /* HAVE_ZSTD */
1655
#ifdef HAVE_LZ4FRAME_H
1656
    LZ4F_freeDecompressionContext(state->lz4_dctx);
1657
#endif /* HAVE_LZ4FRAME_H */
1658
0
    g_free(state->out.buf);
1659
0
    g_free(state->in.buf);
1660
0
    g_free(state);
1661
0
    errno = ENOMEM;
1662
0
    return NULL;
1663
0
}
1664
1665
FILE_T
1666
file_open(const char *path)
1667
0
{
1668
0
    int fd;
1669
0
    FILE_T ft;
1670
0
#ifdef USE_ZLIB_OR_ZLIBNG
1671
0
    const char *suffixp;
1672
0
#endif /* USE_ZLIB_OR_ZLIBNG */
1673
1674
    /* open file and do correct filename conversions.
1675
1676
       XXX - do we need O_LARGEFILE?  On UN*X, if we need to do
1677
       something special to get large file support, the configure
1678
       script should have set us up with the appropriate #defines,
1679
       so we should be getting a large-file-enabled file descriptor
1680
       here.  Pre-Large File Summit UN*Xes, and possibly even some
1681
       post-LFS UN*Xes, might require O_LARGEFILE here, though.
1682
       If so, we should probably handle that in ws_open(). */
1683
0
    if ((fd = ws_open(path, O_RDONLY|O_BINARY, 0000)) == -1)
1684
0
        return NULL;
1685
1686
    /* open file handle */
1687
0
    ft = file_fdopen(fd);
1688
0
    if (ft == NULL) {
1689
0
        ws_close(fd);
1690
0
        return NULL;
1691
0
    }
1692
1693
0
#ifdef USE_ZLIB_OR_ZLIBNG
1694
    /*
1695
     * If this file's name ends in ".caz", it's probably a compressed
1696
     * Windows Sniffer file.  The compression is gzip, but if we
1697
     * process the CRC as specified by RFC 1952, the computed CRC
1698
     * doesn't match the stored CRC.
1699
     *
1700
     * Compressed Windows Sniffer files don't all have the same CRC
1701
     * value; is it just random crap, or are they running the CRC on
1702
     * a different set of data than you're supposed to (e.g., not
1703
     * CRCing some of the data), or something such as that?
1704
     *
1705
     * For now, we just set a flag to ignore CRC errors.
1706
     */
1707
0
    suffixp = strrchr(path, '.');
1708
0
    if (suffixp != NULL) {
1709
0
        if (g_ascii_strcasecmp(suffixp, ".caz") == 0)
1710
0
            ft->dont_check_crc = true;
1711
0
    }
1712
0
#endif /* USE_ZLIB_OR_ZLIBNG */
1713
1714
0
    return ft;
1715
0
}
1716
1717
void
1718
file_set_random_access(FILE_T stream, bool random_flag _U_, GPtrArray *seek)
1719
0
{
1720
0
    stream->fast_seek = seek;
1721
0
}
1722
1723
int64_t
1724
file_seek(FILE_T file, int64_t offset, int whence, int *err)
1725
0
{
1726
0
    struct fast_seek_point *here;
1727
0
    unsigned n;
1728
1729
0
    if (whence != SEEK_SET && whence != SEEK_CUR && whence != SEEK_END) {
1730
0
        ws_assert_not_reached();
1731
/*
1732
 *err = EINVAL;
1733
 return -1;
1734
*/
1735
0
    }
1736
1737
    /* Normalize offset to a SEEK_CUR specification */
1738
0
    if (whence == SEEK_END) {
1739
        /* Seek relative to the end of the file; given that we might be
1740
           reading from a compressed file, we do that by seeking to the
1741
           end of the file, making an offset relative to the end of
1742
           the file an offset relative to the current position.
1743
1744
           XXX - we don't actually use this yet, but, for uncompressed
1745
           files, we could optimize it, if desired, by directly using
1746
           ws_lseek64(). */
1747
0
        if (gz_skip(file, INT64_MAX) == -1) {
1748
0
            *err = file->err;
1749
0
            return -1;
1750
0
        }
1751
0
        if (offset == 0) {
1752
            /* We are done */
1753
0
            return file->pos;
1754
0
        }
1755
0
    } else if (whence == SEEK_SET)
1756
0
        offset -= file->pos;
1757
0
    else if (file->seek_pending) {
1758
        /* There's a forward-skip pending, so file->pos doesn't reflect
1759
           the actual file position, it represents the position from
1760
           which we're skipping; update the offset to include that. */
1761
0
        offset += file->skip;
1762
0
    }
1763
0
    file->seek_pending = false;
1764
1765
    /*
1766
     * Are we moving at all?
1767
     */
1768
0
    if (offset == 0) {
1769
        /* No.  Just return the current position. */
1770
0
        return file->pos;
1771
0
    }
1772
1773
    /*
1774
     * Are we seeking backwards?
1775
     */
1776
0
    if (offset < 0) {
1777
        /*
1778
         * Yes.
1779
         *
1780
         * Do we have enough data before the current position in the
1781
         * buffer that we can seek backwards within the buffer?
1782
         */
1783
0
        if (-offset <= offset_in_buffer(&file->out)) {
1784
            /*
1785
             * Yes.  Adjust appropriately.
1786
             *
1787
             * offset is negative, so -offset is non-negative, and
1788
             * -offset is <= an unsigned and thus fits in an unsigned.
1789
             * Get that value and adjust appropriately.
1790
             *
1791
             * (Casting offset to unsigned makes it positive, which
1792
             * is not what we would want, so we cast -offset instead.)
1793
             *
1794
             * XXX - this won't work with -offset = 2^63, as its
1795
             * negative isn't a valid 64-bit integer, but we are
1796
             * not at all likely to see files big enough to ever
1797
             * see a negative offset that large.
1798
             */
1799
0
            unsigned adjustment = (unsigned)(-offset);
1800
1801
0
            file->out.avail += adjustment;
1802
0
            file->out.next -= adjustment;
1803
0
            file->pos -= adjustment;
1804
0
            return file->pos;
1805
0
        }
1806
0
    } else {
1807
        /*
1808
         * No.  Offset is positive; we're seeking forwards.
1809
         *
1810
         * Do we have enough data after the current position in the
1811
         * buffer that we can seek forwards within the buffer?
1812
         */
1813
0
        if (offset < file->out.avail) {
1814
            /*
1815
             * Yes.  Adjust appropriately.
1816
             *
1817
             * offset is < an unsigned and thus fits in an unsigned,
1818
             * so we can cast it to unsigned safely.
1819
             */
1820
0
            file->out.avail -= (unsigned)offset;
1821
0
            file->out.next += offset;
1822
0
            file->pos += offset;
1823
0
            return file->pos;
1824
0
        }
1825
0
    }
1826
1827
    /*
1828
     * We're not seeking within the buffer.  Do we have "fast seek" data
1829
     * for the location to which we will be seeking, and are we either
1830
     * seeking backwards or is the fast seek point past what is in the
1831
     * buffer? (We don't want to "fast seek" backwards to a point that
1832
     * we've already read and buffered if we're actually seeking forwards.)
1833
     *
1834
     * It might in certain cases be faster to continue reading linearly
1835
     * forward rather than jump to the fast seek point if the distance
1836
     * to the fast seek point is small, but we might only be able to do that
1837
     * if the compression context doesn't change (which for LZ4 includes if
1838
     * we jump to a LZ4 with different options.)
1839
     * XXX - profile different buffer and SPAN sizes
1840
     */
1841
0
    if ((here = fast_seek_find(file, file->pos + offset)) &&
1842
0
        (offset < 0 || here->out >= file->pos + file->out.avail)) {
1843
0
        int64_t off, off2;
1844
1845
        /*
1846
         * Yes.  Use that data to do the seek.
1847
         * Note that this will be true only if file_set_random_access()
1848
         * has been called on this file, which should never be the case
1849
         * for a pipe.
1850
         */
1851
0
        switch (here->compression) {
1852
1853
0
#ifdef USE_ZLIB_OR_ZLIBNG
1854
0
        case ZLIB:
1855
0
#ifdef HAVE_INFLATEPRIME
1856
0
            off = here->in - (here->data.zlib.bits ? 1 : 0);
1857
#else /* HAVE_INFLATEPRIME */
1858
            off = here->in;
1859
#endif /* HAVE_INFLATEPRIME */
1860
0
            off2 = here->out;
1861
0
            break;
1862
1863
0
        case GZIP_AFTER_HEADER:
1864
0
            off = here->in;
1865
0
            off2 = here->out;
1866
0
            break;
1867
0
#endif /* USE_ZLIB_OR_ZLIBNG */
1868
1869
#ifdef HAVE_LZ4FRAME_H
1870
        case LZ4:
1871
        case LZ4_AFTER_HEADER:
1872
            ws_debug("fast seek lz4");
1873
            off = here->in;
1874
            off2 = here->out;
1875
            break;
1876
#endif /* HAVE_LZ4FRAME_H */
1877
1878
0
        case UNCOMPRESSED:
1879
            /* In an uncompressed portion, seek directly to the offset */
1880
0
            off2 = (file->pos + offset);
1881
0
            off = here->in + (off2 - here->out);
1882
0
            break;
1883
1884
0
        default:
1885
            /* Otherwise, seek to the fast seek point to do any needed setup. */
1886
0
            off = here->in;
1887
0
            off2 = here->out;
1888
0
            break;
1889
0
        }
1890
1891
0
        if (ws_lseek64(file->fd, off, SEEK_SET) == -1) {
1892
0
            *err = errno;
1893
0
            return -1;
1894
0
        }
1895
0
        fast_seek_reset(file);
1896
1897
0
        file->raw_pos = off;
1898
0
        buf_reset(&file->out);
1899
0
        file->eof = false;
1900
0
        file->seek_pending = false;
1901
0
        file->err = 0;
1902
0
        file->err_info = NULL;
1903
0
        buf_reset(&file->in);
1904
1905
0
        switch (here->compression) {
1906
1907
0
#ifdef USE_ZLIB_OR_ZLIBNG
1908
0
        case ZLIB: {
1909
0
            zlib_stream*strm = &file->strm;
1910
0
            ZLIB_PREFIX(inflateReset)(strm);
1911
0
            strm->adler = here->data.zlib.adler;
1912
0
            strm->total_out = here->data.zlib.total_out;
1913
0
#ifdef HAVE_INFLATEPRIME
1914
0
            if (here->data.zlib.bits) {
1915
0
                FILE_T state = file;
1916
0
                int ret = GZ_GETC();
1917
1918
0
                if (ret == -1) {
1919
0
                    if (state->err == 0) {
1920
                        /* EOF */
1921
0
                        *err = WTAP_ERR_SHORT_READ;
1922
0
                    } else
1923
0
                        *err = state->err;
1924
0
                    return -1;
1925
0
                }
1926
0
                (void)ZLIB_PREFIX(inflatePrime)(strm, here->data.zlib.bits, ret >> (8 - here->data.zlib.bits));
1927
0
            }
1928
0
#endif /* HAVE_INFLATEPRIME */
1929
0
            (void)ZLIB_PREFIX(inflateSetDictionary)(strm, here->data.zlib.window, ZLIB_WINSIZE);
1930
0
            file->compression = ZLIB;
1931
0
            break;
1932
0
        }
1933
1934
0
        case GZIP_AFTER_HEADER: {
1935
0
            zlib_stream* strm = &file->strm;
1936
0
            ZLIB_PREFIX(inflateReset)(strm);
1937
0
            strm->adler = ZLIB_PREFIX(crc32)(0L, Z_NULL, 0);
1938
0
            file->compression = ZLIB;
1939
0
            break;
1940
0
        }
1941
0
#endif /* USE_ZLIB_OR_ZLIBNG */
1942
1943
#ifdef HAVE_LZ4FRAME_H
1944
        case LZ4:
1945
        case LZ4_AFTER_HEADER:
1946
            /* At the start of a frame, reset the context and re-read it.
1947
             * Unfortunately the API doesn't provide a method to set the
1948
             * context options explicitly based on an already read
1949
             * LZ4F_frameInfo_t.
1950
             */
1951
            LZ4F_resetDecompressionContext(file->lz4_dctx);
1952
            size_t hdr_size = LZ4F_HEADER_SIZE_MAX;
1953
            const LZ4F_errorCode_t frame_err = LZ4F_getFrameInfo(file->lz4_dctx, &file->lz4_info, here->data.lz4.lz4_hdr, &hdr_size);
1954
            if (LZ4F_isError(frame_err)) {
1955
                file->err = WTAP_ERR_DECOMPRESS;
1956
                file->err_info = LZ4F_getErrorName(frame_err);
1957
                return -1;
1958
            }
1959
            file->lz4_info = here->data.lz4.lz4_info;
1960
            file->compression = LZ4;
1961
#if LZ4_VERSION_NUMBER >= 11000
1962
            if (here->compression == LZ4_AFTER_HEADER && here->data.lz4.lz4_info.blockMode == LZ4F_blockLinked) {
1963
                size_t dstSize = 0, srcSize = 0;
1964
                LZ4F_decompress_usingDict(file->lz4_dctx, NULL, &dstSize, NULL, &srcSize, here->data.lz4.window, LZ4_WINSIZE, NULL);
1965
            }
1966
#endif /* LZ4_VERSION_NUMBER >= 11000 */
1967
            break;
1968
#endif /* HAVE_LZ4FRAME_H */
1969
1970
#ifdef HAVE_ZSTD
1971
        case ZSTD:
1972
        {
1973
            const size_t ret = ZSTD_initDStream(file->zstd_dctx);
1974
            if (ZSTD_isError(ret)) {
1975
                file->err = WTAP_ERR_DECOMPRESS;
1976
                file->err_info = ZSTD_getErrorName(ret);
1977
                return -1;
1978
            }
1979
            file->compression = ZSTD;
1980
            break;
1981
        }
1982
#endif /* HAVE_ZSTD */
1983
1984
0
        default:
1985
0
            file->compression = here->compression;
1986
0
            break;
1987
0
        }
1988
1989
0
        offset = (file->pos + offset) - off2;
1990
0
        file->pos = off2;
1991
0
        ws_debug("Fast seek OK! %"PRId64, offset);
1992
1993
0
        if (offset) {
1994
            /* Don't skip forward yet, wait until we want to read from
1995
               the file; that way, if we do multiple seeks in a row,
1996
               all involving forward skips, they will be combined. */
1997
0
            file->seek_pending = true;
1998
0
            file->skip = offset;
1999
0
        }
2000
0
        return file->pos + offset;
2001
0
    }
2002
2003
    /*
2004
     * Is this an uncompressed file, are we within the raw area,
2005
     * are we either seeking backwards or seeking past the end
2006
     * of the buffer, and are we set up for random access with
2007
     * file_set_random_access()?
2008
     *
2009
     * Again, note that this will never be true on a pipe, as
2010
     * file_set_random_access() should never be called if we're
2011
     * reading from a pipe.
2012
     */
2013
0
    if (file->compression == UNCOMPRESSED && file->pos + offset >= file->raw
2014
0
        && (offset < 0 || offset >= file->out.avail)
2015
0
        && (file->fast_seek != NULL))
2016
0
    {
2017
        /*
2018
         * Yes.  Just seek there within the file.
2019
         */
2020
0
        if (ws_lseek64(file->fd, offset - file->out.avail, SEEK_CUR) == -1) {
2021
0
            *err = errno;
2022
0
            return -1;
2023
0
        }
2024
0
        file->raw_pos += (offset - file->out.avail);
2025
0
        buf_reset(&file->out);
2026
0
        file->eof = false;
2027
0
        file->seek_pending = false;
2028
0
        file->err = 0;
2029
0
        file->err_info = NULL;
2030
0
        buf_reset(&file->in);
2031
0
        file->pos += offset;
2032
0
        return file->pos;
2033
0
    }
2034
2035
    /*
2036
     * Are we seeking backwards?
2037
     */
2038
0
    if (offset < 0) {
2039
        /*
2040
         * Yes.  We have no fast seek data, so we have to rewind and
2041
         * seek forward.
2042
         * XXX - true only for compressed files.
2043
         *
2044
         * Calculate the amount to skip forward after rewinding.
2045
         */
2046
0
        offset += file->pos;
2047
0
        if (offset < 0) {                    /* before start of file! */
2048
0
            *err = EINVAL;
2049
0
            return -1;
2050
0
        }
2051
        /* rewind, then skip to offset */
2052
2053
        /* back up and start over */
2054
0
        if (ws_lseek64(file->fd, file->start, SEEK_SET) == -1) {
2055
0
            *err = errno;
2056
0
            return -1;
2057
0
        }
2058
0
        fast_seek_reset(file);
2059
0
        file->raw_pos = file->start;
2060
0
        gz_reset(file);
2061
0
    }
2062
2063
    /*
2064
     * Either we're seeking backwards, but have rewound and now need to
2065
     * skip forwards, or we're seeking forwards.
2066
     *
2067
     * Skip what's in output buffer (one less gzgetc() check).
2068
     */
2069
0
    n = (int64_t)file->out.avail > offset ? (unsigned)offset : file->out.avail;
2070
0
    file->out.avail -= n;
2071
0
    file->out.next += n;
2072
0
    file->pos += n;
2073
0
    offset -= n;
2074
2075
    /* request skip (if not zero) */
2076
0
    if (offset) {
2077
        /* Don't skip forward yet, wait until we want to read from
2078
           the file; that way, if we do multiple seeks in a row,
2079
           all involving forward skips, they will be combined. */
2080
0
        file->seek_pending = true;
2081
0
        file->skip = offset;
2082
0
    }
2083
0
    return file->pos + offset;
2084
0
}
2085
2086
int64_t
2087
file_tell(FILE_T stream)
2088
0
{
2089
    /* return position */
2090
0
    return stream->pos + (stream->seek_pending ? stream->skip : 0);
2091
0
}
2092
2093
int64_t
2094
file_tell_raw(FILE_T stream)
2095
0
{
2096
0
    return stream->raw_pos;
2097
0
}
2098
2099
int
2100
file_fstat(FILE_T stream, ws_statb64 *statb, int *err)
2101
0
{
2102
0
    if (ws_fstat64(stream->fd, statb) == -1) {
2103
0
        if (err != NULL)
2104
0
            *err = errno;
2105
0
        return -1;
2106
0
    }
2107
0
    return 0;
2108
0
}
2109
2110
bool
2111
file_iscompressed(FILE_T stream)
2112
0
{
2113
0
    return stream->is_compressed;
2114
0
}
2115
2116
/* Returns a wtap compression type. If we don't know the compression type,
2117
 * return WS_FILE_UNCOMPRESSED, but if our compression state is temporarily
2118
 * UNKNOWN because we need to reread compression headers, return the last
2119
 * known compression type.
2120
 */
2121
static ws_compression_type
2122
file_get_compression_type(FILE_T stream)
2123
0
{
2124
0
    if (stream->is_compressed) {
2125
0
        switch ((stream->compression == UNKNOWN) ? stream->last_compression : stream->compression) {
2126
2127
0
        case ZLIB:
2128
0
        case GZIP_AFTER_HEADER:
2129
0
            return WS_FILE_GZIP_COMPRESSED;
2130
2131
0
        case ZSTD:
2132
0
            return WS_FILE_ZSTD_COMPRESSED;
2133
2134
0
        case LZ4:
2135
0
        case LZ4_AFTER_HEADER:
2136
0
            return WS_FILE_LZ4_COMPRESSED;
2137
2138
0
        case UNCOMPRESSED:
2139
0
            return WS_FILE_UNCOMPRESSED;
2140
2141
0
        default: /* UNKNOWN, should never happen if is_compressed is set */
2142
0
            ws_assert_not_reached();
2143
0
            return WS_FILE_UNCOMPRESSED;
2144
0
        }
2145
0
    }
2146
0
    return WS_FILE_UNCOMPRESSED;
2147
0
}
2148
2149
int
2150
file_read(void *buf, unsigned int len, FILE_T file)
2151
0
{
2152
0
    unsigned got, n;
2153
2154
    /* if len is zero, avoid unnecessary operations */
2155
0
    if (len == 0)
2156
0
        return 0;
2157
2158
    /* process a skip request */
2159
0
    if (file->seek_pending) {
2160
0
        file->seek_pending = false;
2161
0
        if (gz_skip(file, file->skip) == -1)
2162
0
            return -1;
2163
0
    }
2164
2165
    /*
2166
     * Get len bytes to buf, or less than len if at the end;
2167
     * if buf is null, just throw the bytes away.
2168
     */
2169
0
    got = 0;
2170
0
    do {
2171
0
        if (file->out.avail != 0) {
2172
            /* We have stuff in the output buffer; copy
2173
               what we have. */
2174
0
            n = file->out.avail > len ? len : file->out.avail;
2175
0
            if (buf != NULL) {
2176
0
                memcpy(buf, file->out.next, n);
2177
0
                buf = (char *)buf + n;
2178
0
            }
2179
0
            file->out.next += n;
2180
0
            file->out.avail -= n;
2181
0
            len -= n;
2182
0
            got += n;
2183
0
            file->pos += n;
2184
0
        } else if (file->err != 0) {
2185
            /* We have nothing in the output buffer, and
2186
               we have an error that may not have been
2187
               reported yet; that means we can't generate
2188
               any more data into the output buffer, so
2189
               return an error indication. */
2190
0
            return -1;
2191
0
        } else if (file->eof && file->in.avail == 0) {
2192
            /* We have nothing in the output buffer, and
2193
               we're at the end of the input; just return
2194
               with what we've gotten so far. */
2195
0
            break;
2196
0
        } else {
2197
            /* We have nothing in the output buffer, and
2198
               we can generate more data; get more output,
2199
               looking for header if required, and
2200
               keep looping to process the new stuff
2201
               in the output buffer. */
2202
0
            if (fill_out_buffer(file) == -1)
2203
0
                return -1;
2204
0
        }
2205
0
    } while (len);
2206
2207
0
    return (int)got;
2208
0
}
2209
2210
/*
2211
 * XXX - this *peeks* at next byte, not a character.
2212
 */
2213
int
2214
file_peekc(FILE_T file)
2215
0
{
2216
0
    int ret = 0;
2217
2218
    /* check that we're reading and that there's no error */
2219
0
    if (file->err != 0)
2220
0
        return -1;
2221
2222
    /* try output buffer (no need to check for skip request) */
2223
0
    if (file->out.avail != 0) {
2224
0
        return *(file->out.next);
2225
0
    }
2226
2227
    /* process a skip request */
2228
0
    if (file->seek_pending) {
2229
0
        file->seek_pending = false;
2230
0
        if (gz_skip(file, file->skip) == -1)
2231
0
            return -1;
2232
0
    }
2233
    /* if we processed a skip request, there may be data in the buffer,
2234
     * or an error could have occurred; likewise if we didn't do seek but
2235
     * now call fill_out_buffer, the errors can occur.  So we do this while
2236
     * loop to check before and after - this is basically the logic from
2237
     * file_read() but only for peeking not consuming a byte
2238
     */
2239
0
    while (1) {
2240
0
        if (file->out.avail != 0) {
2241
0
            return *(file->out.next);
2242
0
        }
2243
0
        else if (file->err != 0) {
2244
0
            return -1;
2245
0
        }
2246
0
        else if (file->eof && file->in.avail == 0) {
2247
0
            return -1;
2248
0
        }
2249
0
        else if (fill_out_buffer(file) == -1) {
2250
0
            return -1;
2251
0
        }
2252
0
    }
2253
    /* it's actually impossible to get here */
2254
0
    return ret;
2255
0
}
2256
2257
/*
2258
 * XXX - this gets a byte, not a character.
2259
 */
2260
int
2261
file_getc(FILE_T file)
2262
0
{
2263
0
    unsigned char buf[1];
2264
0
    int ret;
2265
2266
    /* check that we're reading and that there's no error */
2267
0
    if (file->err != 0)
2268
0
        return -1;
2269
2270
    /* try output buffer (no need to check for skip request) */
2271
0
    if (file->out.avail != 0) {
2272
0
        file->out.avail--;
2273
0
        file->pos++;
2274
0
        return *(file->out.next)++;
2275
0
    }
2276
2277
0
    ret = file_read(buf, 1, file);
2278
0
    return ret < 1 ? -1 : buf[0];
2279
0
}
2280
2281
/*
2282
 * Like file_gets, but returns a pointer to the terminating NUL
2283
 * on success and NULL on failure.
2284
 */
2285
char *
2286
file_getsp(char *buf, int len, FILE_T file)
2287
0
{
2288
0
    unsigned left, n;
2289
0
    char *curp;
2290
0
    unsigned char *eol;
2291
2292
    /* check parameters */
2293
0
    if (buf == NULL || len < 1)
2294
0
        return NULL;
2295
2296
    /* check that there's no error */
2297
0
    if (file->err != 0)
2298
0
        return NULL;
2299
2300
    /* process a skip request */
2301
0
    if (file->seek_pending) {
2302
0
        file->seek_pending = false;
2303
0
        if (gz_skip(file, file->skip) == -1)
2304
0
            return NULL;
2305
0
    }
2306
2307
    /* copy output bytes up to new line or len - 1, whichever comes first --
2308
       append a terminating zero to the string (we don't check for a zero in
2309
       the contents, let the user worry about that) */
2310
0
    curp = buf;
2311
0
    left = (unsigned)len - 1;
2312
0
    if (left) do {
2313
            /* assure that something is in the output buffer */
2314
0
            if (file->out.avail == 0) {
2315
                /* We have nothing in the output buffer. */
2316
0
                if (file->err != 0) {
2317
                    /* We have an error that may not have
2318
                       been reported yet; that means we
2319
                       can't generate any more data into
2320
                       the output buffer, so return an
2321
                       error indication. */
2322
0
                    return NULL;
2323
0
                }
2324
0
                if (fill_out_buffer(file) == -1)
2325
0
                    return NULL;            /* error */
2326
0
                if (file->out.avail == 0)  {     /* end of file */
2327
0
                    if (curp == buf)        /* got bupkus */
2328
0
                        return NULL;
2329
0
                    break;                  /* got something -- return it */
2330
0
                }
2331
0
            }
2332
2333
            /* look for end-of-line in current output buffer */
2334
0
            n = file->out.avail > left ? left : file->out.avail;
2335
0
            eol = (unsigned char *)memchr(file->out.next, '\n', n);
2336
0
            if (eol != NULL)
2337
0
                n = (unsigned)(eol - file->out.next) + 1;
2338
2339
            /* copy through end-of-line, or remainder if not found */
2340
0
            memcpy(curp, file->out.next, n);
2341
0
            file->out.avail -= n;
2342
0
            file->out.next += n;
2343
0
            file->pos += n;
2344
0
            left -= n;
2345
0
            curp += n;
2346
0
        } while (left && eol == NULL);
2347
2348
    /* found end-of-line or out of space -- add a terminator and return
2349
       a pointer to it */
2350
0
    *curp = '\0';
2351
0
    return curp;
2352
0
}
2353
2354
/*
2355
 * Returns a pointer to the beginning of the buffer on success
2356
 * and NULL on failure.
2357
 */
2358
char *
2359
file_gets(char *buf, int len, FILE_T file)
2360
0
{
2361
0
    if (!file_getsp(buf, len, file)) return NULL;
2362
0
    return buf;
2363
0
}
2364
2365
bool
2366
file_eof(FILE_T file)
2367
0
{
2368
    /* return end-of-file state */
2369
0
    return (file->eof && file->in.avail == 0 && file->out.avail == 0);
2370
0
}
2371
2372
/*
2373
 * Routine to return a Wiretap error code (0 for no error, an errno
2374
 * for a file error, or a WTAP_ERR_ code for other errors) for an
2375
 * I/O stream.  Also returns an error string for some errors.
2376
 */
2377
int
2378
file_error(FILE_T fh, char **err_info)
2379
0
{
2380
0
    if (fh->err!=0 && err_info) {
2381
        /* g_strdup() returns NULL for NULL argument */
2382
0
        *err_info = g_strdup(fh->err_info);
2383
0
    }
2384
0
    return fh->err;
2385
0
}
2386
2387
void
2388
file_clearerr(FILE_T stream)
2389
0
{
2390
    /* clear error and end-of-file */
2391
0
    stream->err = 0;
2392
0
    stream->err_info = NULL;
2393
0
    stream->eof = false;
2394
0
}
2395
2396
void
2397
file_fdclose(FILE_T file)
2398
0
{
2399
0
    if (file->fd != -1)
2400
0
        ws_close(file->fd);
2401
0
    file->fd = -1;
2402
0
}
2403
2404
bool
2405
file_fdreopen(FILE_T file, const char *path)
2406
0
{
2407
0
    int fd;
2408
2409
0
    if ((fd = ws_open(path, O_RDONLY|O_BINARY, 0000)) == -1)
2410
0
        return false;
2411
0
    file->fd = fd;
2412
0
    return true;
2413
0
}
2414
2415
void
2416
file_close(FILE_T file)
2417
0
{
2418
0
    int fd = file->fd;
2419
2420
    /* free memory and close file */
2421
0
    if (file->size) {
2422
0
#ifdef USE_ZLIB_OR_ZLIBNG
2423
0
        ZLIB_PREFIX(inflateEnd)(&(file->strm));
2424
0
#endif /* USE_ZLIB_OR_ZLIBNG */
2425
#ifdef HAVE_ZSTD
2426
        ZSTD_freeDCtx(file->zstd_dctx);
2427
#endif /* HAVE_ZSTD */
2428
#ifdef HAVE_LZ4FRAME_H
2429
        LZ4F_freeDecompressionContext(file->lz4_dctx);
2430
#endif /* HAVE_LZ4FRAME_H */
2431
0
        g_free(file->out.buf);
2432
0
        g_free(file->in.buf);
2433
0
    }
2434
0
    g_free(file->fast_seek_cur);
2435
0
    file->err = 0;
2436
0
    file->err_info = NULL;
2437
0
    g_free(file);
2438
    /*
2439
     * If fd is -1, somebody's done a file_closefd() on us, so
2440
     * we don't need to close the FD itself, and shouldn't do
2441
     * so.
2442
     */
2443
0
    if (fd != -1)
2444
0
        ws_close(fd);
2445
0
}
2446
2447
/*
2448
 * Editor modelines  -  https://www.wireshark.org/tools/modelines.html
2449
 *
2450
 * Local variables:
2451
 * c-basic-offset: 4
2452
 * tab-width: 8
2453
 * indent-tabs-mode: nil
2454
 * End:
2455
 *
2456
 * vi: set shiftwidth=4 tabstop=8 expandtab:
2457
 * :indentSize=4:tabSize=8:noTabs=true:
2458
 */