Coverage Report

Created: 2025-07-23 06:42

/src/sleuthkit/tsk/fs/lzvn.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright (c) 2015-2016, Apple Inc. All rights reserved.
3
4
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
6
1.  Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
8
2.  Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer
9
    in the documentation and/or other materials provided with the distribution.
10
11
3.  Neither the name of the copyright holder(s) nor the names of any contributors may be used to endorse or promote products derived
12
    from this software without specific prior written permission.
13
14
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
15
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
16
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
18
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
19
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
20
21
NOTE: This is distributed in licenses/bsd.txt
22
23
*/
24
25
// LZVN low-level decoder
26
27
#include "lzvn.h"
28
29
#include <assert.h>
30
#include <string.h>
31
32
#if defined(_MSC_VER) && !defined(__clang__)
33
#  define LZFSE_INLINE __forceinline
34
#  define __builtin_expect(X, Y) (X)
35
#  define __attribute__(X)
36
#  pragma warning(disable : 4068) // warning C4068: unknown pragma
37
#else
38
#  define LZFSE_INLINE static inline __attribute__((__always_inline__))
39
#endif
40
41
/*! @abstract Signed offset in buffers, stored on either 32 or 64 bits. */
42
#if defined(_M_AMD64) || defined(__x86_64__) || defined(__arm64__)
43
typedef int64_t lzvn_offset;
44
#else
45
typedef int32_t lzvn_offset;
46
#endif
47
48
/*! @abstract Base decoder state. */
49
typedef struct {
50
51
  // Decoder I/O
52
53
  // Next byte to read in source buffer
54
  const unsigned char *src;
55
  // Next byte after source buffer
56
  const unsigned char *src_end;
57
58
  // Next byte to write in destination buffer (by decoder)
59
  unsigned char *dst;
60
  // Valid range for destination buffer is [dst_begin, dst_end - 1]
61
  unsigned char *dst_begin;
62
  unsigned char *dst_end;
63
  // Next byte to read in destination buffer (modified by caller)
64
  unsigned char *dst_current;
65
66
  // Decoder state
67
68
  // Partially expanded match, or 0,0,0.
69
  // In that case, src points to the next literal to copy, or the next op-code
70
  // if L==0.
71
  size_t L, M, D;
72
73
  // Distance for last emitted match, or 0
74
  lzvn_offset d_prev;
75
76
  // Did we decode end-of-stream?
77
  int end_of_stream;
78
79
} lzvn_decoder_state;
80
81
/*! @abstract Load bytes from memory location SRC. */
82
0
LZFSE_INLINE uint16_t load2(const void *ptr) {
83
0
  uint16_t data;
84
0
  memcpy(&data, ptr, sizeof data);
85
0
  return data;
86
0
}
87
88
0
LZFSE_INLINE uint32_t load4(const void *ptr) {
89
0
  uint32_t data;
90
0
  memcpy(&data, ptr, sizeof data);
91
0
  return data;
92
0
}
93
94
0
LZFSE_INLINE uint64_t load8(const void *ptr) {
95
0
  uint64_t data;
96
0
  memcpy(&data, ptr, sizeof data);
97
0
  return data;
98
0
}
99
100
/*! @abstract Store bytes to memory location DST. */
101
/*
102
LZFSE_INLINE void store2(void *ptr, uint16_t data) {
103
  memcpy(ptr, &data, sizeof data);
104
}
105
*/
106
107
0
LZFSE_INLINE void store4(void *ptr, uint32_t data) {
108
0
  memcpy(ptr, &data, sizeof data);
109
0
}
110
111
0
LZFSE_INLINE void store8(void *ptr, uint64_t data) {
112
0
  memcpy(ptr, &data, sizeof data);
113
0
}
114
115
/*! @abstract Extracts \p width bits from \p container, starting with \p lsb; if
116
 * we view \p container as a bit array, we extract \c container[lsb:lsb+width]. */
117
LZFSE_INLINE uintmax_t extract(uintmax_t container, unsigned lsb,
118
0
                               unsigned width) {
119
0
  static const size_t container_width = sizeof container * 8;
120
0
  assert(lsb < container_width);
121
0
  assert(width > 0 && width <= container_width);
122
0
  assert(lsb + width <= container_width);
123
0
  if (width == container_width)
124
0
    return container;
125
0
  return (container >> lsb) & (((uintmax_t)1 << width) - 1);
126
0
}
127
128
#if !defined(HAVE_LABELS_AS_VALUES)
129
#  if defined(__GNUC__) || defined(__clang__)
130
#    define HAVE_LABELS_AS_VALUES 1
131
#  else
132
#    define HAVE_LABELS_AS_VALUES 0
133
#  endif
134
#endif
135
136
//  Both the source and destination buffers are represented by a pointer and
137
//  a length; they are *always* updated in concert using this macro; however
138
//  many bytes the pointer is advanced, the length is decremented by the same
139
//  amount. Thus, pointer + length always points to the byte one past the end
140
//  of the buffer.
141
#define PTR_LEN_INC(_pointer, _length, _increment)                             \
142
0
  (_pointer += _increment, _length -= _increment)
143
144
//  Update state with current positions and distance, corresponding to the
145
//  beginning of an instruction in both streams
146
#define UPDATE_GOOD                                                            \
147
0
  (state->src = src_ptr, state->dst = dst_ptr, state->d_prev = D)
148
149
/*! @abstract Decode source to destination.
150
 *  Updates \p state (src,dst,d_prev). */
151
0
void lzvn_decode(lzvn_decoder_state *state) {
152
0
#if HAVE_LABELS_AS_VALUES
153
  // Jump table for all instructions
154
0
  static const void *opc_tbl[256] = {
155
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&eos,   &&lrg_d,
156
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop,   &&lrg_d,
157
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&nop,   &&lrg_d,
158
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef,  &&lrg_d,
159
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef,  &&lrg_d,
160
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef,  &&lrg_d,
161
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef,  &&lrg_d,
162
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&udef,  &&lrg_d,
163
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
164
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
165
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
166
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
167
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
168
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
169
0
      &&udef,  &&udef,  &&udef,  &&udef,  &&udef,  &&udef,  &&udef,  &&udef,
170
0
      &&udef,  &&udef,  &&udef,  &&udef,  &&udef,  &&udef,  &&udef,  &&udef,
171
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
172
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
173
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
174
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
175
0
      &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
176
0
      &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
177
0
      &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
178
0
      &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d, &&med_d,
179
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
180
0
      &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&sml_d, &&pre_d, &&lrg_d,
181
0
      &&udef,  &&udef,  &&udef,  &&udef,  &&udef,  &&udef,  &&udef,  &&udef,
182
0
      &&udef,  &&udef,  &&udef,  &&udef,  &&udef,  &&udef,  &&udef,  &&udef,
183
0
      &&lrg_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l,
184
0
      &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l, &&sml_l,
185
0
      &&lrg_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m,
186
0
      &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m, &&sml_m};
187
0
#endif
188
0
  size_t src_len = state->src_end - state->src;
189
0
  size_t dst_len = state->dst_end - state->dst;
190
0
  if (src_len == 0 || dst_len == 0)
191
0
    return; // empty buffer
192
193
0
  const unsigned char *src_ptr = state->src;
194
0
  unsigned char *dst_ptr = state->dst;
195
0
  size_t D = state->d_prev;
196
0
  size_t M;
197
0
  size_t L;
198
0
  size_t opc_len;
199
200
  // Do we have a partially expanded match saved in state?
201
0
  if (state->L != 0 || state->M != 0) {
202
0
    L = state->L;
203
0
    M = state->M;
204
0
    D = state->D;
205
0
    opc_len = 0; // we already skipped the op
206
0
    state->L = state->M = state->D = 0;
207
0
    if (M == 0)
208
0
      goto copy_literal;
209
0
    if (L == 0)
210
0
      goto copy_match;
211
0
    goto copy_literal_and_match;
212
0
  }
213
214
0
  unsigned char opc = src_ptr[0];
215
216
0
#if HAVE_LABELS_AS_VALUES
217
0
  goto *opc_tbl[opc];
218
#else
219
  for (;;) {
220
    switch (opc) {
221
#endif
222
//  ===============================================================
223
//  These four opcodes (sml_d, med_d, lrg_d, and pre_d) encode both a
224
//  literal and a match. The bulk of their implementations are shared;
225
//  each label here only does the work of setting the opcode length (not
226
//  including any literal bytes), and extracting the literal length, match
227
//  length, and match distance (except in pre_d). They then jump into the
228
//  shared implementation to actually output the literal and match bytes.
229
//
230
//  No error checking happens in the first stage, except for ensuring that
231
//  the source has enough length to represent the full opcode before
232
//  reading past the first byte.
233
0
#if HAVE_LABELS_AS_VALUES
234
0
sml_d:
235
#else
236
  case 0:
237
  case 1:
238
  case 2:
239
  case 3:
240
  case 4:
241
  case 5:
242
  case 8:
243
  case 9:
244
  case 10:
245
  case 11:
246
  case 12:
247
  case 13:
248
  case 16:
249
  case 17:
250
  case 18:
251
  case 19:
252
  case 20:
253
  case 21:
254
  case 24:
255
  case 25:
256
  case 26:
257
  case 27:
258
  case 28:
259
  case 29:
260
  case 32:
261
  case 33:
262
  case 34:
263
  case 35:
264
  case 36:
265
  case 37:
266
  case 40:
267
  case 41:
268
  case 42:
269
  case 43:
270
  case 44:
271
  case 45:
272
  case 48:
273
  case 49:
274
  case 50:
275
  case 51:
276
  case 52:
277
  case 53:
278
  case 56:
279
  case 57:
280
  case 58:
281
  case 59:
282
  case 60:
283
  case 61:
284
  case 64:
285
  case 65:
286
  case 66:
287
  case 67:
288
  case 68:
289
  case 69:
290
  case 72:
291
  case 73:
292
  case 74:
293
  case 75:
294
  case 76:
295
  case 77:
296
  case 80:
297
  case 81:
298
  case 82:
299
  case 83:
300
  case 84:
301
  case 85:
302
  case 88:
303
  case 89:
304
  case 90:
305
  case 91:
306
  case 92:
307
  case 93:
308
  case 96:
309
  case 97:
310
  case 98:
311
  case 99:
312
  case 100:
313
  case 101:
314
  case 104:
315
  case 105:
316
  case 106:
317
  case 107:
318
  case 108:
319
  case 109:
320
  case 128:
321
  case 129:
322
  case 130:
323
  case 131:
324
  case 132:
325
  case 133:
326
  case 136:
327
  case 137:
328
  case 138:
329
  case 139:
330
  case 140:
331
  case 141:
332
  case 144:
333
  case 145:
334
  case 146:
335
  case 147:
336
  case 148:
337
  case 149:
338
  case 152:
339
  case 153:
340
  case 154:
341
  case 155:
342
  case 156:
343
  case 157:
344
  case 192:
345
  case 193:
346
  case 194:
347
  case 195:
348
  case 196:
349
  case 197:
350
  case 200:
351
  case 201:
352
  case 202:
353
  case 203:
354
  case 204:
355
  case 205:
356
#endif
357
0
  UPDATE_GOOD;
358
  // "small distance": This opcode has the structure LLMMMDDD DDDDDDDD LITERAL
359
  //  where the length of literal (0-3 bytes) is encoded by the high 2 bits of
360
  //  the first byte. We first extract the literal length so we know how long
361
  //  the opcode is, then check that the source can hold both this opcode and
362
  //  at least one byte of the next (because any valid input stream must be
363
  //  terminated with an eos token).
364
0
  opc_len = 2;
365
0
  L = (size_t)extract(opc, 6, 2);
366
0
  M = (size_t)extract(opc, 3, 3) + 3;
367
  //  We need to ensure that the source buffer is long enough that we can
368
  //  safely read this entire opcode, the literal that follows, and the first
369
  //  byte of the next opcode.  Once we satisfy this requirement, we can
370
  //  safely unpack the match distance. A check similar to this one is
371
  //  present in all the opcode implementations.
372
0
  if (src_len <= opc_len + L)
373
0
    return; // source truncated
374
0
  D = (size_t)extract(opc, 0, 3) << 8 | src_ptr[1];
375
0
  goto copy_literal_and_match;
376
377
0
#if HAVE_LABELS_AS_VALUES
378
0
med_d:
379
#else
380
  case 160:
381
  case 161:
382
  case 162:
383
  case 163:
384
  case 164:
385
  case 165:
386
  case 166:
387
  case 167:
388
  case 168:
389
  case 169:
390
  case 170:
391
  case 171:
392
  case 172:
393
  case 173:
394
  case 174:
395
  case 175:
396
  case 176:
397
  case 177:
398
  case 178:
399
  case 179:
400
  case 180:
401
  case 181:
402
  case 182:
403
  case 183:
404
  case 184:
405
  case 185:
406
  case 186:
407
  case 187:
408
  case 188:
409
  case 189:
410
  case 190:
411
  case 191:
412
#endif
413
0
  UPDATE_GOOD;
414
  //  "medium distance": This is a minor variant of the "small distance"
415
  //  encoding, where we will now use two extra bytes instead of one to encode
416
  //  the restof the match length and distance. This allows an extra two bits
417
  //  for the match length, and an extra three bits for the match distance. The
418
  //  full structure of the opcode is 101LLMMM DDDDDDMM DDDDDDDD LITERAL.
419
0
  opc_len = 3;
420
0
  L = (size_t)extract(opc, 3, 2);
421
0
  if (src_len <= opc_len + L)
422
0
    return; // source truncated
423
0
  uint16_t opc23 = load2(&src_ptr[1]);
424
0
  M = (size_t)((extract(opc, 0, 3) << 2 | extract(opc23, 0, 2)) + 3);
425
0
  D = (size_t)extract(opc23, 2, 14);
426
0
  goto copy_literal_and_match;
427
428
0
#if HAVE_LABELS_AS_VALUES
429
0
lrg_d:
430
#else
431
  case 7:
432
  case 15:
433
  case 23:
434
  case 31:
435
  case 39:
436
  case 47:
437
  case 55:
438
  case 63:
439
  case 71:
440
  case 79:
441
  case 87:
442
  case 95:
443
  case 103:
444
  case 111:
445
  case 135:
446
  case 143:
447
  case 151:
448
  case 159:
449
  case 199:
450
  case 207:
451
#endif
452
0
  UPDATE_GOOD;
453
  //  "large distance": This is another variant of the "small distance"
454
  //  encoding, where we will now use two extra bytes to encode the match
455
  //  distance, which allows distances up to 65535 to be represented. The full
456
  //  structure of the opcode is LLMMM111 DDDDDDDD DDDDDDDD LITERAL.
457
0
  opc_len = 3;
458
0
  L = (size_t)extract(opc, 6, 2);
459
0
  M = (size_t)extract(opc, 3, 3) + 3;
460
0
  if (src_len <= opc_len + L)
461
0
    return; // source truncated
462
0
  D = load2(&src_ptr[1]);
463
0
  goto copy_literal_and_match;
464
465
0
#if HAVE_LABELS_AS_VALUES
466
0
pre_d:
467
#else
468
  case 70:
469
  case 78:
470
  case 86:
471
  case 94:
472
  case 102:
473
  case 110:
474
  case 134:
475
  case 142:
476
  case 150:
477
  case 158:
478
  case 198:
479
  case 206:
480
#endif
481
0
  UPDATE_GOOD;
482
  //  "previous distance": This opcode has the structure LLMMM110, where the
483
  //  length of the literal (0-3 bytes) is encoded by the high 2 bits of the
484
  //  first byte. We first extract the literal length so we know how long
485
  //  the opcode is, then check that the source can hold both this opcode and
486
  //  at least one byte of the next (because any valid input stream must be
487
  //  terminated with an eos token).
488
0
  opc_len = 1;
489
0
  L = (size_t)extract(opc, 6, 2);
490
0
  M = (size_t)extract(opc, 3, 3) + 3;
491
0
  if (src_len <= opc_len + L)
492
0
    return; // source truncated
493
0
  goto copy_literal_and_match;
494
495
0
copy_literal_and_match:
496
  //  Common implementation of writing data for opcodes that have both a
497
  //  literal and a match. We begin by advancing the source pointer past
498
  //  the opcode, so that it points at the first literal byte (if L
499
  //  is non-zero; otherwise it points at the next opcode).
500
0
  PTR_LEN_INC(src_ptr, src_len, opc_len);
501
  //  Now we copy the literal from the source pointer to the destination.
502
0
  if (__builtin_expect(dst_len >= 4 && src_len >= 4, 1)) {
503
    //  The literal is 0-3 bytes; if we are not near the end of the buffer,
504
    //  we can safely just do a 4 byte copy (which is guaranteed to cover
505
    //  the complete literal, and may include some other bytes as well).
506
0
    store4(dst_ptr, load4(src_ptr));
507
0
  } else if (L <= dst_len) {
508
    //  We are too close to the end of either the input or output stream
509
    //  to be able to safely use a four-byte copy, but we will not exhaust
510
    //  either stream (we already know that the source will not be
511
    //  exhausted from checks in the individual opcode implementations,
512
    //  and we just tested that dst_len > L). Thus, we need to do a
513
    //  byte-by-byte copy of the literal. This is slow, but it can only ever
514
    //  happen near the very end of a buffer, so it is not an important case to
515
    //  optimize.
516
0
    size_t i;
517
0
    for (i = 0; i < L; ++i)
518
0
      dst_ptr[i] = src_ptr[i];
519
0
  } else {
520
    // Destination truncated: fill DST, and store partial match
521
522
    // Copy partial literal
523
0
    size_t i;
524
0
    for (i = 0; i < dst_len; ++i)
525
0
      dst_ptr[i] = src_ptr[i];
526
    // Save state
527
0
    state->src = src_ptr + dst_len;
528
0
    state->dst = dst_ptr + dst_len;
529
0
    state->L = L - dst_len;
530
0
    state->M = M;
531
0
    state->D = D;
532
0
    return; // destination truncated
533
0
  }
534
  //  Having completed the copy of the literal, we advance both the source
535
  //  and destination pointers by the number of literal bytes.
536
0
  PTR_LEN_INC(dst_ptr, dst_len, L);
537
0
  PTR_LEN_INC(src_ptr, src_len, L);
538
  //  Check if the match distance is valid; matches must not reference
539
  //  bytes that preceed the start of the output buffer, nor can the match
540
  //  distance be zero.
541
0
  if (D > (size_t)(dst_ptr - state->dst_begin) || D == 0)
542
0
    goto invalid_match_distance;
543
0
copy_match:
544
  //  Now we copy the match from dst_ptr - D to dst_ptr. It is important to keep
545
  //  in mind that we may have D < M, in which case the source and destination
546
  //  windows overlap in the copy. The semantics of the match copy are *not*
547
  //  those of memmove( ); if the buffers overlap it needs to behave as though
548
  //  we were copying byte-by-byte in increasing address order. If, for example,
549
  //  D is 1, the copy operation is equivalent to:
550
  //
551
  //      memset(dst_ptr, dst_ptr[-1], M);
552
  //
553
  //  i.e. it splats the previous byte. This means that we need to be very
554
  //  careful about using wide loads or stores to perform the copy operation.
555
0
  if (__builtin_expect(dst_len >= M + 7 && D >= 8, 1)) {
556
    //  We are not near the end of the buffer, and the match distance
557
    //  is at least eight. Thus, we can safely loop using eight byte
558
    //  copies. The last of these may slop over the intended end of
559
    //  the match, but this is OK because we know we have a safety bound
560
    //  away from the end of the destination buffer.
561
0
    size_t i;
562
0
    for (i = 0; i < M; i += 8)
563
0
      store8(&dst_ptr[i], load8(&dst_ptr[i - D]));
564
0
  } else if (M <= dst_len) {
565
    //  Either the match distance is too small, or we are too close to
566
    //  the end of the buffer to safely use eight byte copies. Fall back
567
    //  on a simple byte-by-byte implementation.
568
0
    size_t i;
569
0
    for (i = 0; i < M; ++i)
570
0
      dst_ptr[i] = dst_ptr[i - D];
571
0
  } else {
572
    // Destination truncated: fill DST, and store partial match
573
574
    // Copy partial match
575
0
    size_t i;
576
0
    for (i = 0; i < dst_len; ++i)
577
0
      dst_ptr[i] = dst_ptr[i - D];
578
    // Save state
579
0
    state->src = src_ptr;
580
0
    state->dst = dst_ptr + dst_len;
581
0
    state->L = 0;
582
0
    state->M = M - dst_len;
583
0
    state->D = D;
584
0
    return; // destination truncated
585
0
  }
586
  //  Update the destination pointer and length to account for the bytes
587
  //  written by the match, then load the next opcode byte and branch to
588
  //  the appropriate implementation.
589
0
  PTR_LEN_INC(dst_ptr, dst_len, M);
590
0
  opc = src_ptr[0];
591
0
#if HAVE_LABELS_AS_VALUES
592
0
  goto *opc_tbl[opc];
593
#else
594
  break;
595
#endif
596
597
// ===============================================================
598
// Opcodes representing only a match (no literal).
599
//  These two opcodes (lrg_m and sml_m) encode only a match. The match
600
//  distance is carried over from the previous opcode, so all they need
601
//  to encode is the match length. We are able to reuse the match copy
602
//  sequence from the literal and match opcodes to perform the actual
603
//  copy implementation.
604
0
#if HAVE_LABELS_AS_VALUES
605
0
sml_m:
606
#else
607
  case 241:
608
  case 242:
609
  case 243:
610
  case 244:
611
  case 245:
612
  case 246:
613
  case 247:
614
  case 248:
615
  case 249:
616
  case 250:
617
  case 251:
618
  case 252:
619
  case 253:
620
  case 254:
621
  case 255:
622
#endif
623
0
  UPDATE_GOOD;
624
  //  "small match": This opcode has no literal, and uses the previous match
625
  //  distance (i.e. it encodes only the match length), in a single byte as
626
  //  1111MMMM.
627
0
  opc_len = 1;
628
0
  if (src_len <= opc_len)
629
0
    return; // source truncated
630
0
  M = (size_t)extract(opc, 0, 4);
631
0
  PTR_LEN_INC(src_ptr, src_len, opc_len);
632
0
  goto copy_match;
633
634
0
#if HAVE_LABELS_AS_VALUES
635
0
lrg_m:
636
#else
637
  case 240:
638
#endif
639
0
  UPDATE_GOOD;
640
  //  "large match": This opcode has no literal, and uses the previous match
641
  //  distance (i.e. it encodes only the match length). It is encoded in two
642
  //  bytes as 11110000 MMMMMMMM.  Because matches smaller than 16 bytes can
643
  //  be represented by sml_m, there is an implicit bias of 16 on the match
644
  //  length; the representable values are [16,271].
645
0
  opc_len = 2;
646
0
  if (src_len <= opc_len)
647
0
    return; // source truncated
648
0
  M = src_ptr[1] + 16;
649
0
  PTR_LEN_INC(src_ptr, src_len, opc_len);
650
0
  goto copy_match;
651
652
// ===============================================================
653
// Opcodes representing only a literal (no match).
654
//  These two opcodes (lrg_l and sml_l) encode only a literal.  There is no
655
//  match length or match distance to worry about (but we need to *not*
656
//  touch D, as it must be preserved between opcodes).
657
0
#if HAVE_LABELS_AS_VALUES
658
0
sml_l:
659
#else
660
  case 225:
661
  case 226:
662
  case 227:
663
  case 228:
664
  case 229:
665
  case 230:
666
  case 231:
667
  case 232:
668
  case 233:
669
  case 234:
670
  case 235:
671
  case 236:
672
  case 237:
673
  case 238:
674
  case 239:
675
#endif
676
0
  UPDATE_GOOD;
677
  //  "small literal": This opcode has no match, and encodes only a literal
678
  //  of length up to 15 bytes. The format is 1110LLLL LITERAL.
679
0
  opc_len = 1;
680
0
  L = (size_t)extract(opc, 0, 4);
681
0
  goto copy_literal;
682
683
0
#if HAVE_LABELS_AS_VALUES
684
0
lrg_l:
685
#else
686
  case 224:
687
#endif
688
0
  UPDATE_GOOD;
689
  //  "large literal": This opcode has no match, and uses the previous match
690
  //  distance (i.e. it encodes only the match length). It is encoded in two
691
  //  bytes as 11100000 LLLLLLLL LITERAL.  Because literals smaller than 16
692
  //  bytes can be represented by sml_l, there is an implicit bias of 16 on
693
  //  the literal length; the representable values are [16,271].
694
0
  opc_len = 2;
695
0
  if (src_len <= 2)
696
0
    return; // source truncated
697
0
  L = src_ptr[1] + 16;
698
0
  goto copy_literal;
699
700
0
copy_literal:
701
  //  Check that the source buffer is large enough to hold the complete
702
  //  literal and at least the first byte of the next opcode. If so, advance
703
  //  the source pointer to point to the first byte of the literal and adjust
704
  //  the source length accordingly.
705
0
  if (src_len <= opc_len + L)
706
0
    return; // source truncated
707
0
  PTR_LEN_INC(src_ptr, src_len, opc_len);
708
  //  Now we copy the literal from the source pointer to the destination.
709
0
  if (dst_len >= L + 7 && src_len >= L + 7) {
710
    //  We are not near the end of the source or destination buffers; thus
711
    //  we can safely copy the literal using wide copies, without worrying
712
    //  about reading or writing past the end of either buffer.
713
0
    size_t i;
714
0
    for (i = 0; i < L; i += 8)
715
0
      store8(&dst_ptr[i], load8(&src_ptr[i]));
716
0
  } else if (L <= dst_len) {
717
    //  We are too close to the end of either the input or output stream
718
    //  to be able to safely use an eight-byte copy. Instead we copy the
719
    //  literal byte-by-byte.
720
0
    size_t i;
721
0
    for (i = 0; i < L; ++i)
722
0
      dst_ptr[i] = src_ptr[i];
723
0
  } else {
724
    // Destination truncated: fill DST, and store partial match
725
726
    // Copy partial literal
727
0
    size_t i;
728
0
    for (i = 0; i < dst_len; ++i)
729
0
      dst_ptr[i] = src_ptr[i];
730
    // Save state
731
0
    state->src = src_ptr + dst_len;
732
0
    state->dst = dst_ptr + dst_len;
733
0
    state->L = L - dst_len;
734
0
    state->M = 0;
735
0
    state->D = D;
736
0
    return; // destination truncated
737
0
  }
738
  //  Having completed the copy of the literal, we advance both the source
739
  //  and destination pointers by the number of literal bytes.
740
0
  PTR_LEN_INC(dst_ptr, dst_len, L);
741
0
  PTR_LEN_INC(src_ptr, src_len, L);
742
  //  Load the first byte of the next opcode, and jump to its implementation.
743
0
  opc = src_ptr[0];
744
0
#if HAVE_LABELS_AS_VALUES
745
0
  goto *opc_tbl[opc];
746
#else
747
  break;
748
#endif
749
750
// ===============================================================
751
// Other opcodes
752
0
#if HAVE_LABELS_AS_VALUES
753
0
nop:
754
#else
755
  case 14:
756
  case 22:
757
#endif
758
0
  UPDATE_GOOD;
759
0
  opc_len = 1;
760
0
  if (src_len <= opc_len)
761
0
    return; // source truncated
762
0
  PTR_LEN_INC(src_ptr, src_len, opc_len);
763
0
  opc = src_ptr[0];
764
0
#if HAVE_LABELS_AS_VALUES
765
0
  goto *opc_tbl[opc];
766
#else
767
  break;
768
#endif
769
770
0
#if HAVE_LABELS_AS_VALUES
771
0
eos:
772
#else
773
  case 6:
774
#endif
775
0
  opc_len = 8;
776
0
  if (src_len < opc_len)
777
0
    return; // source truncated (here we don't need an extra byte for next op
778
            // code)
779
0
  PTR_LEN_INC(src_ptr, src_len, opc_len);
780
0
  state->end_of_stream = 1;
781
0
  UPDATE_GOOD;
782
0
  return; // end-of-stream
783
784
// ===============================================================
785
// Return on error
786
0
#if HAVE_LABELS_AS_VALUES
787
0
udef:
788
#else
789
  case 30:
790
  case 38:
791
  case 46:
792
  case 54:
793
  case 62:
794
  case 112:
795
  case 113:
796
  case 114:
797
  case 115:
798
  case 116:
799
  case 117:
800
  case 118:
801
  case 119:
802
  case 120:
803
  case 121:
804
  case 122:
805
  case 123:
806
  case 124:
807
  case 125:
808
  case 126:
809
  case 127:
810
  case 208:
811
  case 209:
812
  case 210:
813
  case 211:
814
  case 212:
815
  case 213:
816
  case 214:
817
  case 215:
818
  case 216:
819
  case 217:
820
  case 218:
821
  case 219:
822
  case 220:
823
  case 221:
824
  case 222:
825
  case 223:
826
#endif
827
0
invalid_match_distance:
828
829
0
  return; // we already updated state
830
#if !HAVE_LABELS_AS_VALUES
831
    }
832
  }
833
#endif
834
0
}
835
836
size_t lzvn_decode_buffer(void *dst, size_t dst_size,
837
0
                          const void *src, size_t src_size) {
838
  // Init LZVN decoder state
839
0
  lzvn_decoder_state dstate;
840
0
  memset(&dstate, 0x00, sizeof(dstate));
841
0
  dstate.src = src;
842
0
  dstate.src_end = (const unsigned char*) src + src_size;
843
844
0
  dstate.dst_begin = dst;
845
0
  dstate.dst = dst;
846
0
  dstate.dst_end = (unsigned char*) dst + dst_size;
847
848
0
  dstate.d_prev = 0;
849
0
  dstate.end_of_stream = 0;
850
851
  // Run LZVN decoder
852
0
  lzvn_decode(&dstate);
853
854
  // This is how much we decompressed
855
0
  return dstate.dst - (unsigned char*) dst;
856
0
}