/src/clamav/libclamunrar/unpack.hpp
Line | Count | Source (jump to first uncovered line) |
1 | | #ifndef _RAR_UNPACK_ |
2 | | #define _RAR_UNPACK_ |
3 | | |
4 | | // Maximum allowed number of compressed bits processed in quick mode. |
5 | 1.06M | #define MAX_QUICK_DECODE_BITS 10 |
6 | | |
7 | | // Maximum number of filters per entire data block. Must be at least |
8 | | // twice more than MAX_PACK_FILTERS to store filters from two data blocks. |
9 | 4.05k | #define MAX_UNPACK_FILTERS 8192 |
10 | | |
11 | | // Maximum number of filters per entire data block for RAR3 unpack. |
12 | | // Must be at least twice more than v3_MAX_PACK_FILTERS to store filters |
13 | | // from two data blocks. |
14 | 128k | #define MAX3_UNPACK_FILTERS 8192 |
15 | | |
16 | | // Limit maximum number of channels in RAR3 delta filter to some reasonable |
17 | | // value to prevent too slow processing of corrupt archives with invalid |
18 | | // channels number. Must be equal or larger than v3_MAX_FILTER_CHANNELS. |
19 | | // No need to provide it for RAR5, which uses only 5 bits to store channels. |
20 | 0 | #define MAX3_UNPACK_CHANNELS 1024 |
21 | | |
22 | | // Maximum size of single filter block. We restrict it to limit memory |
23 | | // allocation. Must be equal or larger than MAX_ANALYZE_SIZE. |
24 | 4.05k | #define MAX_FILTER_BLOCK_SIZE 0x400000 |
25 | | |
26 | | // Write data in 4 MB or smaller blocks. Must not exceed PACK_MAX_READ, |
27 | | // so we keep the number of buffered filters in unpacker reasonable. |
28 | | #define UNPACK_MAX_WRITE 0x400000 |
29 | | |
30 | | // Decode compressed bit fields to alphabet numbers. |
31 | | struct DecodeTable:PackDef |
32 | | { |
33 | | // Real size of DecodeNum table. |
34 | | uint MaxNum; |
35 | | |
36 | | // Left aligned start and upper limit codes defining code space |
37 | | // ranges for bit lengths. DecodeLen[BitLength-1] defines the start of |
38 | | // range for bit length and DecodeLen[BitLength] defines next code |
39 | | // after the end of range or in other words the upper limit code |
40 | | // for specified bit length. |
41 | | uint DecodeLen[16]; |
42 | | |
43 | | // Every item of this array contains the sum of all preceding items. |
44 | | // So it contains the start position in code list for every bit length. |
45 | | uint DecodePos[16]; |
46 | | |
47 | | // Number of compressed bits processed in quick mode. |
48 | | // Must not exceed MAX_QUICK_DECODE_BITS. |
49 | | uint QuickBits; |
50 | | |
51 | | // Translates compressed bits (up to QuickBits length) |
52 | | // to bit length in quick mode. |
53 | | byte QuickLen[1<<MAX_QUICK_DECODE_BITS]; |
54 | | |
55 | | // Translates compressed bits (up to QuickBits length) |
56 | | // to position in alphabet in quick mode. |
57 | | // 'ushort' saves some memory and even provides a little speed gain |
58 | | // comparting to 'uint' here. |
59 | | ushort QuickNum[1<<MAX_QUICK_DECODE_BITS]; |
60 | | |
61 | | // Translate the position in code list to position in alphabet. |
62 | | // We do not allocate it dynamically to avoid performance overhead |
63 | | // introduced by pointer, so we use the largest possible table size |
64 | | // as array dimension. Real size of this array is defined in MaxNum. |
65 | | // We use this array if compressed bit field is too lengthy |
66 | | // for QuickLen based translation. |
67 | | // 'ushort' saves some memory and even provides a little speed gain |
68 | | // comparting to 'uint' here. |
69 | | ushort DecodeNum[LARGEST_TABLE_SIZE]; |
70 | | }; |
71 | | |
72 | | |
73 | | struct UnpackBlockHeader |
74 | | { |
75 | | int BlockSize; |
76 | | int BlockBitSize; |
77 | | int BlockStart; |
78 | | int HeaderSize; |
79 | | bool LastBlockInFile; |
80 | | bool TablePresent; |
81 | | }; |
82 | | |
83 | | |
84 | | struct UnpackBlockTables |
85 | | { |
86 | | DecodeTable LD; // Decode literals. |
87 | | DecodeTable DD; // Decode distances. |
88 | | DecodeTable LDD; // Decode lower bits of distances. |
89 | | DecodeTable RD; // Decode repeating distances. |
90 | | DecodeTable BD; // Decode bit lengths in Huffman table. |
91 | | }; |
92 | | |
93 | | |
94 | | #ifdef RAR_SMP |
95 | | enum UNP_DEC_TYPE { |
96 | | UNPDT_LITERAL=0,UNPDT_MATCH,UNPDT_FULLREP,UNPDT_REP,UNPDT_FILTER |
97 | | }; |
98 | | |
99 | | struct UnpackDecodedItem |
100 | | { |
101 | | byte Type; // 'byte' instead of enum type to reduce memory use. |
102 | | ushort Length; |
103 | | union |
104 | | { |
105 | | uint Distance; |
106 | | byte Literal[8]; // Store up to 8 chars here to speed up extraction. |
107 | | }; |
108 | | }; |
109 | | |
110 | | |
111 | | struct UnpackThreadData |
112 | | { |
113 | | Unpack *UnpackPtr; |
114 | | BitInput Inp; |
115 | | bool HeaderRead; |
116 | | UnpackBlockHeader BlockHeader; |
117 | | bool TableRead; |
118 | | UnpackBlockTables BlockTables; |
119 | | int DataSize; // Data left in buffer. Can be less than block size. |
120 | | bool DamagedData; |
121 | | bool LargeBlock; |
122 | | bool NoDataLeft; // 'true' if file is read completely. |
123 | | bool Incomplete; // Not entire block was processed, need to read more data. |
124 | | |
125 | | UnpackDecodedItem *Decoded; |
126 | | uint DecodedSize; |
127 | | uint DecodedAllocated; |
128 | | uint ThreadNumber; // For debugging. |
129 | | |
130 | | UnpackThreadData() |
131 | | :Inp(false) |
132 | | { |
133 | | Decoded=NULL; |
134 | | } |
135 | | ~UnpackThreadData() |
136 | | { |
137 | | if (Decoded!=NULL) |
138 | | free(Decoded); |
139 | | } |
140 | | }; |
141 | | #endif |
142 | | |
143 | | |
144 | | struct UnpackFilter |
145 | | { |
146 | | byte Type; |
147 | | uint BlockStart; |
148 | | uint BlockLength; |
149 | | byte Channels; |
150 | | // uint Width; |
151 | | // byte PosR; |
152 | | bool NextWindow; |
153 | | }; |
154 | | |
155 | | |
156 | | struct UnpackFilter30 |
157 | | { |
158 | | unsigned int BlockStart; |
159 | | unsigned int BlockLength; |
160 | | bool NextWindow; |
161 | | |
162 | | // Position of parent filter in Filters array used as prototype for filter |
163 | | // in PrgStack array. Not defined for filters in Filters array. |
164 | | unsigned int ParentFilter; |
165 | | |
166 | | VM_PreparedProgram Prg; |
167 | | }; |
168 | | |
169 | | |
170 | | struct AudioVariables // For RAR 2.0 archives only. |
171 | | { |
172 | | int K1,K2,K3,K4,K5; |
173 | | int D1,D2,D3,D4; |
174 | | int LastDelta; |
175 | | unsigned int Dif[11]; |
176 | | unsigned int ByteCount; |
177 | | int LastChar; |
178 | | }; |
179 | | |
180 | | |
181 | | // We can use the fragmented dictionary in case heap does not have the single |
182 | | // large enough memory block. It is slower than normal dictionary. |
183 | | class FragmentedWindow |
184 | | { |
185 | | private: |
186 | | enum {MAX_MEM_BLOCKS=32}; |
187 | | |
188 | | void Reset(); |
189 | | byte *Mem[MAX_MEM_BLOCKS]; |
190 | | size_t MemSize[MAX_MEM_BLOCKS]; |
191 | | public: |
192 | | FragmentedWindow(); |
193 | | ~FragmentedWindow(); |
194 | | void Init(size_t WinSize); |
195 | | byte& operator [](size_t Item); |
196 | | void CopyString(uint Length,uint Distance,size_t &UnpPtr,size_t MaxWinMask); |
197 | | void CopyData(byte *Dest,size_t WinPos,size_t Size); |
198 | | size_t GetBlockSize(size_t StartPos,size_t RequiredSize); |
199 | | }; |
200 | | |
201 | | |
202 | | class Unpack:PackDef |
203 | | { |
204 | | private: |
205 | | |
206 | | void Unpack5(bool Solid); |
207 | | void Unpack5MT(bool Solid); |
208 | | bool UnpReadBuf(); |
209 | | void UnpWriteBuf(); |
210 | | byte* ApplyFilter(byte *Data,uint DataSize,UnpackFilter *Flt); |
211 | | void UnpWriteArea(size_t StartPtr,size_t EndPtr); |
212 | | void UnpWriteData(byte *Data,size_t Size); |
213 | | _forceinline uint SlotToLength(BitInput &Inp,uint Slot); |
214 | | void UnpInitData50(bool Solid); |
215 | | bool ReadBlockHeader(BitInput &Inp,UnpackBlockHeader &Header); |
216 | | bool ReadTables(BitInput &Inp,UnpackBlockHeader &Header,UnpackBlockTables &Tables); |
217 | | void MakeDecodeTables(byte *LengthTable,DecodeTable *Dec,uint Size); |
218 | | _forceinline uint DecodeNumber(BitInput &Inp,DecodeTable *Dec); |
219 | | void CopyString(); |
220 | | inline void InsertOldDist(unsigned int Distance); |
221 | | void UnpInitData(bool Solid); |
222 | | _forceinline void CopyString(uint Length,uint Distance); |
223 | | uint ReadFilterData(BitInput &Inp); |
224 | | bool ReadFilter(BitInput &Inp,UnpackFilter &Filter); |
225 | | bool AddFilter(UnpackFilter &Filter); |
226 | | bool AddFilter(); |
227 | | void InitFilters(); |
228 | | |
229 | | ComprDataIO *UnpIO; |
230 | | BitInput Inp; |
231 | | |
232 | | #ifdef RAR_SMP |
233 | | void InitMT(); |
234 | | bool UnpackLargeBlock(UnpackThreadData &D); |
235 | | bool ProcessDecoded(UnpackThreadData &D); |
236 | | |
237 | | ThreadPool *UnpThreadPool; |
238 | | UnpackThreadData *UnpThreadData; |
239 | | uint MaxUserThreads; |
240 | | byte *ReadBufMT; |
241 | | #endif |
242 | | |
243 | | Array<byte> FilterSrcMemory; |
244 | | Array<byte> FilterDstMemory; |
245 | | |
246 | | // Filters code, one entry per filter. |
247 | | Array<UnpackFilter> Filters; |
248 | | |
249 | | uint OldDist[4],OldDistPtr; |
250 | | uint LastLength; |
251 | | |
252 | | // LastDist is necessary only for RAR2 and older with circular OldDist |
253 | | // array. In RAR3 last distance is always stored in OldDist[0]. |
254 | | uint LastDist; |
255 | | |
256 | | size_t UnpPtr,WrPtr; |
257 | | |
258 | | // Top border of read packed data. |
259 | | int ReadTop; |
260 | | |
261 | | // Border to call UnpReadBuf. We use it instead of (ReadTop-C) |
262 | | // for optimization reasons. Ensures that we have C bytes in buffer |
263 | | // unless we are at the end of file. |
264 | | int ReadBorder; |
265 | | |
266 | | UnpackBlockHeader BlockHeader; |
267 | | UnpackBlockTables BlockTables; |
268 | | |
269 | | size_t WriteBorder; |
270 | | |
271 | | byte *Window; |
272 | | |
273 | | FragmentedWindow FragWindow; |
274 | | bool Fragmented; |
275 | | |
276 | | |
277 | | int64 DestUnpSize; |
278 | | |
279 | | bool Suspended; |
280 | | bool UnpAllBuf; |
281 | | bool UnpSomeRead; |
282 | | int64 WrittenFileSize; |
283 | | bool FileExtracted; |
284 | | |
285 | | |
286 | | /***************************** Unpack v 1.5 *********************************/ |
287 | | void Unpack15(bool Solid); |
288 | | void ShortLZ(); |
289 | | void LongLZ(); |
290 | | void HuffDecode(); |
291 | | void GetFlagsBuf(); |
292 | | void UnpInitData15(int Solid); |
293 | | void InitHuff(); |
294 | | void CorrHuff(ushort *CharSet,byte *NumToPlace); |
295 | | void CopyString15(uint Distance,uint Length); |
296 | | uint DecodeNum(uint Num,uint StartPos,uint *DecTab,uint *PosTab); |
297 | | |
298 | | ushort ChSet[256],ChSetA[256],ChSetB[256],ChSetC[256]; |
299 | | byte NToPl[256],NToPlB[256],NToPlC[256]; |
300 | | uint FlagBuf,AvrPlc,AvrPlcB,AvrLn1,AvrLn2,AvrLn3; |
301 | | int Buf60,NumHuf,StMode,LCount,FlagsCnt; |
302 | | uint Nhfb,Nlzb,MaxDist3; |
303 | | /***************************** Unpack v 1.5 *********************************/ |
304 | | |
305 | | /***************************** Unpack v 2.0 *********************************/ |
306 | | void Unpack20(bool Solid); |
307 | | |
308 | | DecodeTable MD[4]; // Decode multimedia data, up to 4 channels. |
309 | | |
310 | | unsigned char UnpOldTable20[MC20*4]; |
311 | | bool UnpAudioBlock; |
312 | | uint UnpChannels,UnpCurChannel; |
313 | | int UnpChannelDelta; |
314 | | void CopyString20(uint Length,uint Distance); |
315 | | bool ReadTables20(); |
316 | | void UnpWriteBuf20(); |
317 | | void UnpInitData20(int Solid); |
318 | | void ReadLastTables(); |
319 | | byte DecodeAudio(int Delta); |
320 | | struct AudioVariables AudV[4]; |
321 | | /***************************** Unpack v 2.0 *********************************/ |
322 | | |
323 | | /***************************** Unpack v 3.0 *********************************/ |
324 | | enum BLOCK_TYPES {BLOCK_LZ,BLOCK_PPM}; |
325 | | |
326 | | void UnpInitData30(bool Solid); |
327 | | void Unpack29(bool Solid); |
328 | | void InitFilters30(bool Solid); |
329 | | bool ReadEndOfBlock(); |
330 | | bool ReadVMCode(); |
331 | | bool ReadVMCodePPM(); |
332 | | bool AddVMCode(uint FirstByte,byte *Code,uint CodeSize); |
333 | | int SafePPMDecodeChar(); |
334 | | bool ReadTables30(); |
335 | | bool UnpReadBuf30(); |
336 | | void UnpWriteBuf30(); |
337 | | void ExecuteCode(VM_PreparedProgram *Prg); |
338 | | |
339 | | int PrevLowDist,LowDistRepCount; |
340 | | |
341 | | ModelPPM PPM; |
342 | | int PPMEscChar; |
343 | | |
344 | | byte UnpOldTable[HUFF_TABLE_SIZE30]; |
345 | | int UnpBlockType; |
346 | | |
347 | | // If we already read decoding tables for Unpack v2,v3,v5. |
348 | | // We should not use a single variable for all algorithm versions, |
349 | | // because we can have a corrupt archive with one algorithm file |
350 | | // followed by another algorithm file with "solid" flag and we do not |
351 | | // want to reuse tables from one algorithm in another. |
352 | | bool TablesRead2,TablesRead3,TablesRead5; |
353 | | |
354 | | // Virtual machine to execute filters code. |
355 | | RarVM VM; |
356 | | |
357 | | // Buffer to read VM filters code. We moved it here from AddVMCode |
358 | | // function to reduce time spent in BitInput constructor. |
359 | | BitInput VMCodeInp; |
360 | | |
361 | | // Filters code, one entry per filter. |
362 | | Array<UnpackFilter30 *> Filters30; |
363 | | |
364 | | // Filters stack, several entrances of same filter are possible. |
365 | | Array<UnpackFilter30 *> PrgStack; |
366 | | |
367 | | // Lengths of preceding data blocks, one length of one last block |
368 | | // for every filter. Used to reduce the size required to write |
369 | | // the data block length if lengths are repeating. |
370 | | Array<int> OldFilterLengths; |
371 | | |
372 | | int LastFilter; |
373 | | /***************************** Unpack v 3.0 *********************************/ |
374 | | |
375 | | public: |
376 | | Unpack(ComprDataIO *DataIO); |
377 | | ~Unpack(); |
378 | | void Init(size_t WinSize,bool Solid); |
379 | | void DoUnpack(uint Method,bool Solid); |
380 | 0 | bool IsFileExtracted() {return(FileExtracted);} |
381 | 212k | void SetDestSize(int64 DestSize) {DestUnpSize=DestSize;FileExtracted=false;} |
382 | 0 | void SetSuspended(bool Suspended) {Unpack::Suspended=Suspended;} |
383 | | |
384 | | #ifdef RAR_SMP |
385 | | void SetThreads(uint Threads); |
386 | | void UnpackDecode(UnpackThreadData &D); |
387 | | #endif |
388 | | |
389 | | size_t MaxWinSize; |
390 | | size_t MaxWinMask; |
391 | | |
392 | | uint GetChar() |
393 | 1.82M | { |
394 | 1.82M | if (Inp.InAddr>BitInput::MAX_SIZE-30) |
395 | 0 | { |
396 | 0 | UnpReadBuf(); |
397 | 0 | if (Inp.InAddr>=BitInput::MAX_SIZE) // If nothing was read. |
398 | 0 | return 0; |
399 | 0 | } |
400 | 1.82M | return Inp.InBuf[Inp.InAddr++]; |
401 | 1.82M | } |
402 | | }; |
403 | | |
404 | | #endif |