Coverage Report

Created: 2023-12-08 06:48

/src/clamav/libclamunrar/unpack.hpp
Line
Count
Source (jump to first uncovered line)
1
#ifndef _RAR_UNPACK_
2
#define _RAR_UNPACK_
3
4
// Maximum allowed number of compressed bits processed in quick mode.
5
1.06M
#define MAX_QUICK_DECODE_BITS      10
6
7
// Maximum number of filters per entire data block. Must be at least
8
// twice more than MAX_PACK_FILTERS to store filters from two data blocks.
9
4.05k
#define MAX_UNPACK_FILTERS       8192
10
11
// Maximum number of filters per entire data block for RAR3 unpack.
12
// Must be at least twice more than v3_MAX_PACK_FILTERS to store filters
13
// from two data blocks.
14
128k
#define MAX3_UNPACK_FILTERS      8192
15
16
// Limit maximum number of channels in RAR3 delta filter to some reasonable
17
// value to prevent too slow processing of corrupt archives with invalid
18
// channels number. Must be equal or larger than v3_MAX_FILTER_CHANNELS.
19
// No need to provide it for RAR5, which uses only 5 bits to store channels.
20
0
#define MAX3_UNPACK_CHANNELS      1024
21
22
// Maximum size of single filter block. We restrict it to limit memory
23
// allocation. Must be equal or larger than MAX_ANALYZE_SIZE.
24
4.05k
#define MAX_FILTER_BLOCK_SIZE 0x400000
25
26
// Write data in 4 MB or smaller blocks. Must not exceed PACK_MAX_READ,
27
// so we keep the number of buffered filters in unpacker reasonable.
28
#define UNPACK_MAX_WRITE      0x400000
29
30
// Decode compressed bit fields to alphabet numbers.
31
struct DecodeTable:PackDef
32
{
33
  // Real size of DecodeNum table.
34
  uint MaxNum;
35
36
  // Left aligned start and upper limit codes defining code space 
37
  // ranges for bit lengths. DecodeLen[BitLength-1] defines the start of
38
  // range for bit length and DecodeLen[BitLength] defines next code
39
  // after the end of range or in other words the upper limit code
40
  // for specified bit length.
41
  uint DecodeLen[16]; 
42
43
  // Every item of this array contains the sum of all preceding items.
44
  // So it contains the start position in code list for every bit length. 
45
  uint DecodePos[16];
46
47
  // Number of compressed bits processed in quick mode.
48
  // Must not exceed MAX_QUICK_DECODE_BITS.
49
  uint QuickBits;
50
51
  // Translates compressed bits (up to QuickBits length)
52
  // to bit length in quick mode.
53
  byte QuickLen[1<<MAX_QUICK_DECODE_BITS];
54
55
  // Translates compressed bits (up to QuickBits length)
56
  // to position in alphabet in quick mode.
57
  // 'ushort' saves some memory and even provides a little speed gain
58
  // comparting to 'uint' here.
59
  ushort QuickNum[1<<MAX_QUICK_DECODE_BITS];
60
61
  // Translate the position in code list to position in alphabet.
62
  // We do not allocate it dynamically to avoid performance overhead
63
  // introduced by pointer, so we use the largest possible table size
64
  // as array dimension. Real size of this array is defined in MaxNum.
65
  // We use this array if compressed bit field is too lengthy
66
  // for QuickLen based translation.
67
  // 'ushort' saves some memory and even provides a little speed gain
68
  // comparting to 'uint' here.
69
  ushort DecodeNum[LARGEST_TABLE_SIZE];
70
};
71
72
73
struct UnpackBlockHeader
74
{
75
  int BlockSize;
76
  int BlockBitSize;
77
  int BlockStart;
78
  int HeaderSize;
79
  bool LastBlockInFile;
80
  bool TablePresent;
81
};
82
83
84
struct UnpackBlockTables
85
{
86
  DecodeTable LD;  // Decode literals.
87
  DecodeTable DD;  // Decode distances.
88
  DecodeTable LDD; // Decode lower bits of distances.
89
  DecodeTable RD;  // Decode repeating distances.
90
  DecodeTable BD;  // Decode bit lengths in Huffman table.
91
};
92
93
94
#ifdef RAR_SMP
95
enum UNP_DEC_TYPE {
96
  UNPDT_LITERAL=0,UNPDT_MATCH,UNPDT_FULLREP,UNPDT_REP,UNPDT_FILTER
97
};
98
99
struct UnpackDecodedItem
100
{
101
  byte Type; // 'byte' instead of enum type to reduce memory use.
102
  ushort Length;
103
  union
104
  {
105
    uint Distance;
106
    byte Literal[8]; // Store up to 8 chars here to speed up extraction.
107
  };
108
};
109
110
111
struct UnpackThreadData
112
{
113
  Unpack *UnpackPtr;
114
  BitInput Inp;
115
  bool HeaderRead;
116
  UnpackBlockHeader BlockHeader;
117
  bool TableRead;
118
  UnpackBlockTables BlockTables;
119
  int DataSize;    // Data left in buffer. Can be less than block size.
120
  bool DamagedData;
121
  bool LargeBlock;
122
  bool NoDataLeft; // 'true' if file is read completely.
123
  bool Incomplete; // Not entire block was processed, need to read more data.
124
125
  UnpackDecodedItem *Decoded;
126
  uint DecodedSize;
127
  uint DecodedAllocated;
128
  uint ThreadNumber; // For debugging.
129
130
  UnpackThreadData()
131
  :Inp(false)
132
  {
133
    Decoded=NULL;
134
  }
135
  ~UnpackThreadData()
136
  {
137
    if (Decoded!=NULL)
138
      free(Decoded);
139
  }
140
};
141
#endif
142
143
144
struct UnpackFilter
145
{
146
  byte Type;
147
  uint BlockStart;
148
  uint BlockLength;
149
  byte Channels;
150
//  uint Width;
151
//  byte PosR;
152
  bool NextWindow;
153
};
154
155
156
struct UnpackFilter30
157
{
158
  unsigned int BlockStart;
159
  unsigned int BlockLength;
160
  bool NextWindow;
161
162
  // Position of parent filter in Filters array used as prototype for filter
163
  // in PrgStack array. Not defined for filters in Filters array.
164
  unsigned int ParentFilter;
165
166
  VM_PreparedProgram Prg;
167
};
168
169
170
struct AudioVariables // For RAR 2.0 archives only.
171
{
172
  int K1,K2,K3,K4,K5;
173
  int D1,D2,D3,D4;
174
  int LastDelta;
175
  unsigned int Dif[11];
176
  unsigned int ByteCount;
177
  int LastChar;
178
};
179
180
181
// We can use the fragmented dictionary in case heap does not have the single
182
// large enough memory block. It is slower than normal dictionary.
183
class FragmentedWindow
184
{
185
  private:
186
    enum {MAX_MEM_BLOCKS=32};
187
188
    void Reset();
189
    byte *Mem[MAX_MEM_BLOCKS];
190
    size_t MemSize[MAX_MEM_BLOCKS];
191
  public:
192
    FragmentedWindow();
193
    ~FragmentedWindow();
194
    void Init(size_t WinSize);
195
    byte& operator [](size_t Item);
196
    void CopyString(uint Length,uint Distance,size_t &UnpPtr,size_t MaxWinMask);
197
    void CopyData(byte *Dest,size_t WinPos,size_t Size);
198
    size_t GetBlockSize(size_t StartPos,size_t RequiredSize);
199
};
200
201
202
class Unpack:PackDef
203
{
204
  private:
205
206
    void Unpack5(bool Solid);
207
    void Unpack5MT(bool Solid);
208
    bool UnpReadBuf();
209
    void UnpWriteBuf();
210
    byte* ApplyFilter(byte *Data,uint DataSize,UnpackFilter *Flt);
211
    void UnpWriteArea(size_t StartPtr,size_t EndPtr);
212
    void UnpWriteData(byte *Data,size_t Size);
213
    _forceinline uint SlotToLength(BitInput &Inp,uint Slot);
214
    void UnpInitData50(bool Solid);
215
    bool ReadBlockHeader(BitInput &Inp,UnpackBlockHeader &Header);
216
    bool ReadTables(BitInput &Inp,UnpackBlockHeader &Header,UnpackBlockTables &Tables);
217
    void MakeDecodeTables(byte *LengthTable,DecodeTable *Dec,uint Size);
218
    _forceinline uint DecodeNumber(BitInput &Inp,DecodeTable *Dec);
219
    void CopyString();
220
    inline void InsertOldDist(unsigned int Distance);
221
    void UnpInitData(bool Solid);
222
    _forceinline void CopyString(uint Length,uint Distance);
223
    uint ReadFilterData(BitInput &Inp);
224
    bool ReadFilter(BitInput &Inp,UnpackFilter &Filter);
225
    bool AddFilter(UnpackFilter &Filter);
226
    bool AddFilter();
227
    void InitFilters();
228
229
    ComprDataIO *UnpIO;
230
    BitInput Inp;
231
232
#ifdef RAR_SMP
233
    void InitMT();
234
    bool UnpackLargeBlock(UnpackThreadData &D);
235
    bool ProcessDecoded(UnpackThreadData &D);
236
237
    ThreadPool *UnpThreadPool;
238
    UnpackThreadData *UnpThreadData;
239
    uint MaxUserThreads;
240
    byte *ReadBufMT;
241
#endif
242
243
    Array<byte> FilterSrcMemory;
244
    Array<byte> FilterDstMemory;
245
246
    // Filters code, one entry per filter.
247
    Array<UnpackFilter> Filters;
248
249
    uint OldDist[4],OldDistPtr;
250
    uint LastLength;
251
252
    // LastDist is necessary only for RAR2 and older with circular OldDist
253
    // array. In RAR3 last distance is always stored in OldDist[0].
254
    uint LastDist;
255
256
    size_t UnpPtr,WrPtr;
257
    
258
    // Top border of read packed data.
259
    int ReadTop; 
260
261
    // Border to call UnpReadBuf. We use it instead of (ReadTop-C)
262
    // for optimization reasons. Ensures that we have C bytes in buffer
263
    // unless we are at the end of file.
264
    int ReadBorder;
265
266
    UnpackBlockHeader BlockHeader;
267
    UnpackBlockTables BlockTables;
268
269
    size_t WriteBorder;
270
271
    byte *Window;
272
273
    FragmentedWindow FragWindow;
274
    bool Fragmented;
275
276
277
    int64 DestUnpSize;
278
279
    bool Suspended;
280
    bool UnpAllBuf;
281
    bool UnpSomeRead;
282
    int64 WrittenFileSize;
283
    bool FileExtracted;
284
285
286
/***************************** Unpack v 1.5 *********************************/
287
    void Unpack15(bool Solid);
288
    void ShortLZ();
289
    void LongLZ();
290
    void HuffDecode();
291
    void GetFlagsBuf();
292
    void UnpInitData15(int Solid);
293
    void InitHuff();
294
    void CorrHuff(ushort *CharSet,byte *NumToPlace);
295
    void CopyString15(uint Distance,uint Length);
296
    uint DecodeNum(uint Num,uint StartPos,uint *DecTab,uint *PosTab);
297
298
    ushort ChSet[256],ChSetA[256],ChSetB[256],ChSetC[256];
299
    byte NToPl[256],NToPlB[256],NToPlC[256];
300
    uint FlagBuf,AvrPlc,AvrPlcB,AvrLn1,AvrLn2,AvrLn3;
301
    int Buf60,NumHuf,StMode,LCount,FlagsCnt;
302
    uint Nhfb,Nlzb,MaxDist3;
303
/***************************** Unpack v 1.5 *********************************/
304
305
/***************************** Unpack v 2.0 *********************************/
306
    void Unpack20(bool Solid);
307
308
    DecodeTable MD[4]; // Decode multimedia data, up to 4 channels.
309
310
    unsigned char UnpOldTable20[MC20*4];
311
    bool UnpAudioBlock;
312
    uint UnpChannels,UnpCurChannel;
313
    int UnpChannelDelta;
314
    void CopyString20(uint Length,uint Distance);
315
    bool ReadTables20();
316
    void UnpWriteBuf20();
317
    void UnpInitData20(int Solid);
318
    void ReadLastTables();
319
    byte DecodeAudio(int Delta);
320
    struct AudioVariables AudV[4];
321
/***************************** Unpack v 2.0 *********************************/
322
323
/***************************** Unpack v 3.0 *********************************/
324
    enum BLOCK_TYPES {BLOCK_LZ,BLOCK_PPM};
325
326
    void UnpInitData30(bool Solid);
327
    void Unpack29(bool Solid);
328
    void InitFilters30(bool Solid);
329
    bool ReadEndOfBlock();
330
    bool ReadVMCode();
331
    bool ReadVMCodePPM();
332
    bool AddVMCode(uint FirstByte,byte *Code,uint CodeSize);
333
    int SafePPMDecodeChar();
334
    bool ReadTables30();
335
    bool UnpReadBuf30();
336
    void UnpWriteBuf30();
337
    void ExecuteCode(VM_PreparedProgram *Prg);
338
339
    int PrevLowDist,LowDistRepCount;
340
341
    ModelPPM PPM;
342
    int PPMEscChar;
343
344
    byte UnpOldTable[HUFF_TABLE_SIZE30];
345
    int UnpBlockType;
346
347
    // If we already read decoding tables for Unpack v2,v3,v5.
348
    // We should not use a single variable for all algorithm versions,
349
    // because we can have a corrupt archive with one algorithm file
350
    // followed by another algorithm file with "solid" flag and we do not
351
    // want to reuse tables from one algorithm in another.
352
    bool TablesRead2,TablesRead3,TablesRead5;
353
354
    // Virtual machine to execute filters code.
355
    RarVM VM;
356
  
357
    // Buffer to read VM filters code. We moved it here from AddVMCode
358
    // function to reduce time spent in BitInput constructor.
359
    BitInput VMCodeInp;
360
361
    // Filters code, one entry per filter.
362
    Array<UnpackFilter30 *> Filters30;
363
364
    // Filters stack, several entrances of same filter are possible.
365
    Array<UnpackFilter30 *> PrgStack;
366
367
    // Lengths of preceding data blocks, one length of one last block
368
    // for every filter. Used to reduce the size required to write
369
    // the data block length if lengths are repeating.
370
    Array<int> OldFilterLengths;
371
372
    int LastFilter;
373
/***************************** Unpack v 3.0 *********************************/
374
375
  public:
376
    Unpack(ComprDataIO *DataIO);
377
    ~Unpack();
378
    void Init(size_t WinSize,bool Solid);
379
    void DoUnpack(uint Method,bool Solid);
380
0
    bool IsFileExtracted() {return(FileExtracted);}
381
212k
    void SetDestSize(int64 DestSize) {DestUnpSize=DestSize;FileExtracted=false;}
382
0
    void SetSuspended(bool Suspended) {Unpack::Suspended=Suspended;}
383
384
#ifdef RAR_SMP
385
    void SetThreads(uint Threads);
386
    void UnpackDecode(UnpackThreadData &D);
387
#endif
388
389
    size_t MaxWinSize;
390
    size_t MaxWinMask;
391
392
    uint GetChar()
393
1.82M
    {
394
1.82M
      if (Inp.InAddr>BitInput::MAX_SIZE-30)
395
0
      {
396
0
        UnpReadBuf();
397
0
        if (Inp.InAddr>=BitInput::MAX_SIZE) // If nothing was read.
398
0
          return 0;
399
0
      }
400
1.82M
      return Inp.InBuf[Inp.InAddr++];
401
1.82M
    }
402
};
403
404
#endif