1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 """A WKdm decompressor.
19
20 This code is very closely based on the C implementation by
21
22 Paul Wilson -- wilson@cs.utexas.edu
23
24 and
25
26 Scott F. Kaplan -- sfkaplan@cs.utexas.edu
27
28 from September 1997.
29 """
30
31 __author__ = "Andreas Moser <amoser@google.com>"
32
33 import itertools
34 import math
35 import struct
36 import time
37
38 DICTIONARY_SIZE = 16
39
40 TAGS_AREA_OFFSET = 4
41 TAGS_AREA_SIZE = 64
42
43 NUM_LOW_BITS = 10
44 LOW_BITS_MASK = 0x3FF
45
46 ZERO_TAG = 0x0
47 PARTIAL_TAG = 0x1
48 MISS_TAG = 0x2
49 EXACT_TAG = 0x3
50
51
52
53
54
55
56
57
58
59
60 HASH_LOOKUP_TABLE_CONTENTS = [
61 0, 13, 2, 14, 4, 3, 7, 5, 1, 9, 12, 6, 11, 10, 8, 15,
62 2, 3, 7, 5, 1, 15, 4, 9, 6, 12, 11, 8, 13, 14, 10, 3,
63 2, 12, 4, 13, 15, 7, 14, 8, 5, 6, 9, 10, 11, 1, 2, 10,
64 15, 8, 5, 11, 1, 9, 13, 6, 4, 14, 12, 3, 7, 4, 2, 10,
65 9, 7, 8, 3, 1, 11, 13, 5, 6, 12, 15, 14, 10, 12, 2, 8,
66 7, 9, 1, 11, 5, 14, 15, 6, 13, 4, 3, 3, 1, 12, 5, 2,
67 13, 4, 15, 6, 9, 11, 7, 14, 10, 8, 9, 5, 6, 15, 10, 11,
68 13, 4, 8, 1, 12, 2, 7, 14, 3, 7, 8, 10, 13, 9, 4, 5,
69 12, 2, 1, 15, 6, 14, 11, 3, 2, 9, 6, 7, 4, 15, 5, 14,
70 8, 10, 12, 3, 1, 11, 13, 11, 10, 3, 14, 2, 9, 6, 15, 7,
71 12, 1, 8, 5, 4, 13, 15, 3, 6, 9, 2, 1, 4, 14, 12, 11,
72 10, 13, 8, 5, 7, 8, 3, 9, 7, 6, 14, 10, 4, 13, 11, 1,
73 5, 15, 2, 12, 12, 13, 3, 5, 8, 11, 9, 7, 1, 10, 6, 2,
74 14, 15, 4, 9, 8, 2, 10, 1, 13, 6, 11, 5, 3, 7, 12, 14,
75 4, 15, 1, 13, 15, 12, 5, 4, 14, 11, 6, 2, 10, 3, 8, 7,
76 9, 6, 8, 3, 1, 5, 4, 15, 9, 7, 2, 13, 10, 12, 11, 14
77 ]
78
79
80
81
82
84 res = []
85 it = itertools.izip(*([iter(source_buf)] * 16))
86 for (in1, in2, in3, in4, in5, in6, in7, in8,
87 in9, in10, in11, in12, in13, in14, in15, in16) in it:
88 res.extend([
89 in1 + (in5 << 2) + (in9 << 4) + (in13 << 6),
90 in2 + (in6 << 2) + (in10 << 4) + (in14 << 6),
91 in3 + (in7 << 2) + (in11 << 4) + (in15 << 6),
92 in4 + (in8 << 2) + (in12 << 4) + (in16 << 6)
93 ])
94
95 return res
96
97
98
99
100
101
102
103
105 res = []
106 it = itertools.izip(*([iter(source_buf)] * 8))
107 for in1, in2, in3, in4, in5, in6, in7, in8 in it:
108 res.extend([
109 in1 + (in5 << 4),
110 in2 + (in6 << 4),
111 in3 + (in7 << 4),
112 in4 + (in8 << 4)
113 ])
114
115 return res
116
117
118
119
120
121
123
124 packed_input = []
125 for in1, in2, in3 in itertools.izip(*([iter(source_buf)] * 3)):
126 packed_input.append(in1 | (in2 << 10) | (in3 << 20))
127
128 return packed_input
129
130
131
132
133
134
135
136
137
138
139
140
141
142 and3_sh0 = []
143 and3_sh2 = []
144 and3_sh4 = []
145 and3_sh6 = []
146 and_f = []
147 sh4_and_f = []
148
149 for i in xrange(256):
150 and3_sh0.append((i >> 0) & 3)
151 and3_sh2.append((i >> 2) & 3)
152 and3_sh4.append((i >> 4) & 3)
153 and3_sh6.append((i >> 6) & 3)
154 and_f.append(i & 0xf)
155 sh4_and_f.append((i >> 4) & 0xf)
156
158
159 output = []
160 for in1, in2, in3, in4 in itertools.izip(*([iter(input_buf)] * 4)):
161 output.extend([
162 and3_sh0[in1], and3_sh0[in2], and3_sh0[in3], and3_sh0[in4],
163 and3_sh2[in1], and3_sh2[in2], and3_sh2[in3], and3_sh2[in4],
164 and3_sh4[in1], and3_sh4[in2], and3_sh4[in3], and3_sh4[in4],
165 and3_sh6[in1], and3_sh6[in2], and3_sh6[in3], and3_sh6[in4]
166 ])
167 return output
168
169
170
171
172
173
174
175
177 output = []
178 for in1, in2, in3, in4 in itertools.izip(*([iter(input_buf)] * 4)):
179 output.extend([
180 and_f[in1],
181 and_f[in2],
182 and_f[in3],
183 and_f[in4],
184 sh4_and_f[in1],
185 sh4_and_f[in2],
186 sh4_and_f[in3],
187 sh4_and_f[in4]])
188
189 return output
190
191
192
193
194
196 output = []
197 for in1, in2, in3, in4 in itertools.izip(*([iter(input_buf)] * 4)):
198 output.extend([
199 in1 & 0x3FF, (in1 >> 10) & 0x3FF, (in1 >> 20) & 0x3FF,
200 in2 & 0x3FF, (in2 >> 10) & 0x3FF, (in2 >> 20) & 0x3FF,
201 in3 & 0x3FF, (in3 >> 10) & 0x3FF, (in3 >> 20) & 0x3FF,
202 in4 & 0x3FF, (in4 >> 10) & 0x3FF, (in4 >> 20) & 0x3FF
203 ])
204
205 return output
206
208 dictionary = []
209 for _ in xrange(DICTIONARY_SIZE):
210 dictionary.append((1, 0))
211 hashLookupTable = HASH_LOOKUP_TABLE_CONTENTS
212
213 tempTagsArray = []
214 tempQPosArray = []
215
216
217 tempLowBitsArray = []
218
219
220 full_patterns = []
221
222 input_words = struct.unpack("I" * (len(src_buf) / 4), src_buf)
223
224 for input_word in input_words:
225
226
227 input_high_bits = input_word / 1024
228 dict_location = hashLookupTable[input_high_bits % 256]
229 dict_word, dict_high = dictionary[dict_location]
230
231 if (input_word == dict_word):
232 tempTagsArray.append(EXACT_TAG)
233 tempQPosArray.append(dict_location)
234 elif (input_word == 0):
235 tempTagsArray.append(ZERO_TAG)
236 else:
237 if input_high_bits == dict_high:
238 tempTagsArray.append(PARTIAL_TAG)
239 tempQPosArray.append(dict_location)
240 tempLowBitsArray.append((input_word % 1024))
241 else:
242 tempTagsArray.append(MISS_TAG)
243 full_patterns.append(input_word)
244
245 dictionary[dict_location] = (input_word, input_high_bits)
246
247 qpos_start = len(full_patterns) + TAGS_AREA_OFFSET + (len(src_buf) / 64)
248
249 packed_tags = WK_pack_2bits(tempTagsArray)
250
251 num_bytes_to_pack = len(tempQPosArray)
252 num_packed_words = math.ceil(num_bytes_to_pack / 8.0)
253 num_source_bytes = int(num_packed_words * 8)
254
255 tempQPosArray += [0] * (num_source_bytes - len(tempQPosArray))
256
257 packed_qp = WK_pack_4bits(tempQPosArray)
258
259 low_start = qpos_start + int(num_packed_words)
260
261 num_packed_words = len(tempLowBitsArray) / 3
262
263 while len(tempLowBitsArray) % 3:
264 tempLowBitsArray.append(0)
265
266 packed_low = WK_pack_3_tenbits(tempLowBitsArray)
267
268 low_end = low_start + len(packed_low)
269
270 header = [0, qpos_start, low_start, low_end]
271
272 return struct.pack(
273 "IIII" +
274 "B" * len(packed_tags) +
275 "I" * len(full_patterns) +
276 "B" * len(packed_qp) +
277 "I" * len(packed_low),
278 * (header + packed_tags + full_patterns + packed_qp + packed_low))
279
281 qpos_start, low_start, low_end = struct.unpack("III", src_buf[:12])
282
283 return _WKdm_decompress(src_buf, qpos_start, low_start, low_end, 12)
284
286 qpos_start, low_start, low_end = struct.unpack("III", src_buf[4:16])
287
288 return _WKdm_decompress(src_buf, qpos_start, low_start, low_end, 16)
289
291
292 if max(qpos_start, low_start, low_end) > len(src_buf):
293 return None
294
295 if qpos_start > low_start or low_start > low_end:
296 return None
297
298 dictionary = [1] * DICTIONARY_SIZE
299 hashLookupTable = HASH_LOOKUP_TABLE_CONTENTS
300
301 tags_str = src_buf[header_size : header_size + 256]
302 tags_array = WK_unpack_2bits(struct.unpack("B" * len(tags_str), tags_str))
303
304 qpos_str = src_buf[qpos_start * 4:low_start * 4]
305 tempQPosArray = WK_unpack_4bits(
306 struct.unpack("B" * len(qpos_str), qpos_str))
307
308 lowbits_str = src_buf[low_start * 4:low_end * 4]
309 num_lowbits_bytes = len(lowbits_str)
310 num_lowbits_words = num_lowbits_bytes / 4
311 num_packed_lowbits = num_lowbits_words * 3
312
313 rem = len(lowbits_str) % 16
314 if rem:
315 lowbits_str += "\x00" * (16 - rem)
316
317 packed_lowbits = struct.unpack("I" * (len(lowbits_str) / 4), lowbits_str)
318
319 tempLowBitsArray = WK_unpack_3_tenbits(packed_lowbits)[:num_packed_lowbits]
320
321 patterns_str = src_buf[256 + header_size:qpos_start * 4]
322 full_patterns = struct.unpack("I" * (len(patterns_str) / 4), patterns_str)
323
324 p_tempQPosArray = iter(tempQPosArray)
325 p_tempLowBitsArray = iter(tempLowBitsArray)
326 p_full_patterns = iter(full_patterns)
327
328 output = []
329
330 for tag in tags_array:
331
332 if tag == ZERO_TAG:
333 output.append(0)
334 elif tag == EXACT_TAG:
335 output.append(dictionary[p_tempQPosArray.next()])
336 elif tag == PARTIAL_TAG:
337
338 dict_idx = p_tempQPosArray.next()
339 temp = ((dictionary[dict_idx] / 1024) * 1024)
340 temp += p_tempLowBitsArray.next()
341
342 dictionary[dict_idx] = temp
343 output.append(temp)
344 elif tag == MISS_TAG:
345 missed_word = p_full_patterns.next()
346 dict_idx = hashLookupTable[(missed_word / 1024) % 256]
347 dictionary[dict_idx] = missed_word
348 output.append(missed_word)
349
350 for p in [p_tempQPosArray, p_tempLowBitsArray, p_full_patterns]:
351 for leftover in p:
352 if leftover != 0:
353
354 return None
355
356 return struct.pack("I" * len(output), *output)
357