/src/git/ewah/ewah_bitmap.c
Line | Count | Source (jump to first uncovered line) |
1 | | /** |
2 | | * Copyright 2013, GitHub, Inc |
3 | | * Copyright 2009-2013, Daniel Lemire, Cliff Moon, |
4 | | * David McIntosh, Robert Becho, Google Inc. and Veronika Zenz |
5 | | * |
6 | | * This program is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU General Public License |
8 | | * as published by the Free Software Foundation; either version 2 |
9 | | * of the License, or (at your option) any later version. |
10 | | * |
11 | | * This program is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14 | | * GNU General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU General Public License |
17 | | * along with this program; if not, see <http://www.gnu.org/licenses/>. |
18 | | */ |
19 | | #include "git-compat-util.h" |
20 | | #include "ewok.h" |
21 | | #include "ewok_rlw.h" |
22 | | |
23 | | static inline size_t min_size(size_t a, size_t b) |
24 | 0 | { |
25 | 0 | return a < b ? a : b; |
26 | 0 | } |
27 | | |
28 | | static inline size_t max_size(size_t a, size_t b) |
29 | 0 | { |
30 | 0 | return a > b ? a : b; |
31 | 0 | } |
32 | | |
33 | | static inline void buffer_grow(struct ewah_bitmap *self, size_t new_size) |
34 | 0 | { |
35 | 0 | size_t rlw_offset = (uint8_t *)self->rlw - (uint8_t *)self->buffer; |
36 | 0 | ALLOC_GROW(self->buffer, new_size, self->alloc_size); |
37 | 0 | self->rlw = self->buffer + (rlw_offset / sizeof(eword_t)); |
38 | 0 | } |
39 | | |
40 | | static inline void buffer_push(struct ewah_bitmap *self, eword_t value) |
41 | 0 | { |
42 | 0 | buffer_grow(self, self->buffer_size + 1); |
43 | 0 | self->buffer[self->buffer_size++] = value; |
44 | 0 | } |
45 | | |
46 | | static void buffer_push_rlw(struct ewah_bitmap *self, eword_t value) |
47 | 0 | { |
48 | 0 | buffer_push(self, value); |
49 | 0 | self->rlw = self->buffer + self->buffer_size - 1; |
50 | 0 | } |
51 | | |
52 | | static size_t add_empty_words(struct ewah_bitmap *self, int v, size_t number) |
53 | 0 | { |
54 | 0 | size_t added = 0; |
55 | 0 | eword_t runlen, can_add; |
56 | |
|
57 | 0 | if (rlw_get_run_bit(self->rlw) != v && rlw_size(self->rlw) == 0) { |
58 | 0 | rlw_set_run_bit(self->rlw, v); |
59 | 0 | } else if (rlw_get_literal_words(self->rlw) != 0 || |
60 | 0 | rlw_get_run_bit(self->rlw) != v) { |
61 | 0 | buffer_push_rlw(self, 0); |
62 | 0 | if (v) rlw_set_run_bit(self->rlw, v); |
63 | 0 | added++; |
64 | 0 | } |
65 | |
|
66 | 0 | runlen = rlw_get_running_len(self->rlw); |
67 | 0 | can_add = min_size(number, RLW_LARGEST_RUNNING_COUNT - runlen); |
68 | |
|
69 | 0 | rlw_set_running_len(self->rlw, runlen + can_add); |
70 | 0 | number -= can_add; |
71 | |
|
72 | 0 | while (number >= RLW_LARGEST_RUNNING_COUNT) { |
73 | 0 | buffer_push_rlw(self, 0); |
74 | 0 | added++; |
75 | 0 | if (v) rlw_set_run_bit(self->rlw, v); |
76 | 0 | rlw_set_running_len(self->rlw, RLW_LARGEST_RUNNING_COUNT); |
77 | 0 | number -= RLW_LARGEST_RUNNING_COUNT; |
78 | 0 | } |
79 | |
|
80 | 0 | if (number > 0) { |
81 | 0 | buffer_push_rlw(self, 0); |
82 | 0 | added++; |
83 | |
|
84 | 0 | if (v) rlw_set_run_bit(self->rlw, v); |
85 | 0 | rlw_set_running_len(self->rlw, number); |
86 | 0 | } |
87 | |
|
88 | 0 | return added; |
89 | 0 | } |
90 | | |
91 | | size_t ewah_add_empty_words(struct ewah_bitmap *self, int v, size_t number) |
92 | 0 | { |
93 | 0 | if (number == 0) |
94 | 0 | return 0; |
95 | | |
96 | 0 | self->bit_size += number * BITS_IN_EWORD; |
97 | 0 | return add_empty_words(self, v, number); |
98 | 0 | } |
99 | | |
100 | | static size_t add_literal(struct ewah_bitmap *self, eword_t new_data) |
101 | 0 | { |
102 | 0 | eword_t current_num = rlw_get_literal_words(self->rlw); |
103 | |
|
104 | 0 | if (current_num >= RLW_LARGEST_LITERAL_COUNT) { |
105 | 0 | buffer_push_rlw(self, 0); |
106 | |
|
107 | 0 | rlw_set_literal_words(self->rlw, 1); |
108 | 0 | buffer_push(self, new_data); |
109 | 0 | return 2; |
110 | 0 | } |
111 | | |
112 | 0 | rlw_set_literal_words(self->rlw, current_num + 1); |
113 | | |
114 | | /* sanity check */ |
115 | 0 | assert(rlw_get_literal_words(self->rlw) == current_num + 1); |
116 | | |
117 | 0 | buffer_push(self, new_data); |
118 | 0 | return 1; |
119 | 0 | } |
120 | | |
121 | | void ewah_add_dirty_words( |
122 | | struct ewah_bitmap *self, const eword_t *buffer, |
123 | | size_t number, int negate) |
124 | 0 | { |
125 | 0 | size_t literals, can_add; |
126 | |
|
127 | 0 | while (1) { |
128 | 0 | literals = rlw_get_literal_words(self->rlw); |
129 | 0 | can_add = min_size(number, RLW_LARGEST_LITERAL_COUNT - literals); |
130 | |
|
131 | 0 | rlw_set_literal_words(self->rlw, literals + can_add); |
132 | |
|
133 | 0 | buffer_grow(self, self->buffer_size + can_add); |
134 | |
|
135 | 0 | if (negate) { |
136 | 0 | size_t i; |
137 | 0 | for (i = 0; i < can_add; ++i) |
138 | 0 | self->buffer[self->buffer_size++] = ~buffer[i]; |
139 | 0 | } else { |
140 | 0 | memcpy(self->buffer + self->buffer_size, |
141 | 0 | buffer, can_add * sizeof(eword_t)); |
142 | 0 | self->buffer_size += can_add; |
143 | 0 | } |
144 | |
|
145 | 0 | self->bit_size += can_add * BITS_IN_EWORD; |
146 | |
|
147 | 0 | if (number - can_add == 0) |
148 | 0 | break; |
149 | | |
150 | 0 | buffer_push_rlw(self, 0); |
151 | 0 | buffer += can_add; |
152 | 0 | number -= can_add; |
153 | 0 | } |
154 | 0 | } |
155 | | |
156 | | static size_t add_empty_word(struct ewah_bitmap *self, int v) |
157 | 0 | { |
158 | 0 | int no_literal = (rlw_get_literal_words(self->rlw) == 0); |
159 | 0 | eword_t run_len = rlw_get_running_len(self->rlw); |
160 | |
|
161 | 0 | if (no_literal && run_len == 0) { |
162 | 0 | rlw_set_run_bit(self->rlw, v); |
163 | 0 | assert(rlw_get_run_bit(self->rlw) == v); |
164 | 0 | } |
165 | | |
166 | 0 | if (no_literal && rlw_get_run_bit(self->rlw) == v && |
167 | 0 | run_len < RLW_LARGEST_RUNNING_COUNT) { |
168 | 0 | rlw_set_running_len(self->rlw, run_len + 1); |
169 | 0 | assert(rlw_get_running_len(self->rlw) == run_len + 1); |
170 | 0 | return 0; |
171 | 0 | } else { |
172 | 0 | buffer_push_rlw(self, 0); |
173 | |
|
174 | 0 | assert(rlw_get_running_len(self->rlw) == 0); |
175 | 0 | assert(rlw_get_run_bit(self->rlw) == 0); |
176 | 0 | assert(rlw_get_literal_words(self->rlw) == 0); |
177 | | |
178 | 0 | rlw_set_run_bit(self->rlw, v); |
179 | 0 | assert(rlw_get_run_bit(self->rlw) == v); |
180 | | |
181 | 0 | rlw_set_running_len(self->rlw, 1); |
182 | 0 | assert(rlw_get_running_len(self->rlw) == 1); |
183 | 0 | assert(rlw_get_literal_words(self->rlw) == 0); |
184 | 0 | return 1; |
185 | 0 | } |
186 | 0 | } |
187 | | |
188 | | size_t ewah_add(struct ewah_bitmap *self, eword_t word) |
189 | 0 | { |
190 | 0 | self->bit_size += BITS_IN_EWORD; |
191 | |
|
192 | 0 | if (word == 0) |
193 | 0 | return add_empty_word(self, 0); |
194 | | |
195 | 0 | if (word == (eword_t)(~0)) |
196 | 0 | return add_empty_word(self, 1); |
197 | | |
198 | 0 | return add_literal(self, word); |
199 | 0 | } |
200 | | |
201 | | void ewah_set(struct ewah_bitmap *self, size_t i) |
202 | 0 | { |
203 | 0 | const size_t dist = |
204 | 0 | DIV_ROUND_UP(i + 1, BITS_IN_EWORD) - |
205 | 0 | DIV_ROUND_UP(self->bit_size, BITS_IN_EWORD); |
206 | |
|
207 | 0 | assert(i >= self->bit_size); |
208 | | |
209 | 0 | self->bit_size = i + 1; |
210 | |
|
211 | 0 | if (dist > 0) { |
212 | 0 | if (dist > 1) |
213 | 0 | add_empty_words(self, 0, dist - 1); |
214 | |
|
215 | 0 | add_literal(self, (eword_t)1 << (i % BITS_IN_EWORD)); |
216 | 0 | return; |
217 | 0 | } |
218 | | |
219 | 0 | if (rlw_get_literal_words(self->rlw) == 0) { |
220 | 0 | rlw_set_running_len(self->rlw, |
221 | 0 | rlw_get_running_len(self->rlw) - 1); |
222 | 0 | add_literal(self, (eword_t)1 << (i % BITS_IN_EWORD)); |
223 | 0 | return; |
224 | 0 | } |
225 | | |
226 | 0 | self->buffer[self->buffer_size - 1] |= |
227 | 0 | ((eword_t)1 << (i % BITS_IN_EWORD)); |
228 | | |
229 | | /* check if we just completed a stream of 1s */ |
230 | 0 | if (self->buffer[self->buffer_size - 1] == (eword_t)(~0)) { |
231 | 0 | self->buffer[--self->buffer_size] = 0; |
232 | 0 | rlw_set_literal_words(self->rlw, |
233 | 0 | rlw_get_literal_words(self->rlw) - 1); |
234 | 0 | add_empty_word(self, 1); |
235 | 0 | } |
236 | 0 | } |
237 | | |
238 | | void ewah_each_bit(struct ewah_bitmap *self, void (*callback)(size_t, void*), void *payload) |
239 | 0 | { |
240 | 0 | size_t pos = 0; |
241 | 0 | size_t pointer = 0; |
242 | 0 | size_t k; |
243 | |
|
244 | 0 | while (pointer < self->buffer_size) { |
245 | 0 | eword_t *word = &self->buffer[pointer]; |
246 | |
|
247 | 0 | if (rlw_get_run_bit(word)) { |
248 | 0 | size_t len = rlw_get_running_len(word) * BITS_IN_EWORD; |
249 | 0 | for (k = 0; k < len; ++k, ++pos) |
250 | 0 | callback(pos, payload); |
251 | 0 | } else { |
252 | 0 | pos += rlw_get_running_len(word) * BITS_IN_EWORD; |
253 | 0 | } |
254 | |
|
255 | 0 | ++pointer; |
256 | |
|
257 | 0 | for (k = 0; k < rlw_get_literal_words(word); ++k) { |
258 | 0 | int c; |
259 | | |
260 | | /* todo: zero count optimization */ |
261 | 0 | for (c = 0; c < BITS_IN_EWORD; ++c, ++pos) { |
262 | 0 | if ((self->buffer[pointer] & ((eword_t)1 << c)) != 0) |
263 | 0 | callback(pos, payload); |
264 | 0 | } |
265 | |
|
266 | 0 | ++pointer; |
267 | 0 | } |
268 | 0 | } |
269 | 0 | } |
270 | | |
271 | | /** |
272 | | * Clear all the bits in the bitmap. Does not free or resize |
273 | | * memory. |
274 | | */ |
275 | | static void ewah_clear(struct ewah_bitmap *self) |
276 | 0 | { |
277 | 0 | self->buffer_size = 1; |
278 | 0 | self->buffer[0] = 0; |
279 | 0 | self->bit_size = 0; |
280 | 0 | self->rlw = self->buffer; |
281 | 0 | } |
282 | | |
283 | | struct ewah_bitmap *ewah_new(void) |
284 | 0 | { |
285 | 0 | struct ewah_bitmap *self; |
286 | |
|
287 | 0 | self = xmalloc(sizeof(struct ewah_bitmap)); |
288 | 0 | self->alloc_size = 32; |
289 | 0 | ALLOC_ARRAY(self->buffer, self->alloc_size); |
290 | |
|
291 | 0 | ewah_clear(self); |
292 | 0 | return self; |
293 | 0 | } |
294 | | |
295 | | void ewah_free(struct ewah_bitmap *self) |
296 | 0 | { |
297 | 0 | if (!self) |
298 | 0 | return; |
299 | | |
300 | 0 | if (self->alloc_size) |
301 | 0 | free(self->buffer); |
302 | |
|
303 | 0 | free(self); |
304 | 0 | } |
305 | | |
306 | | static void read_new_rlw(struct ewah_iterator *it) |
307 | 0 | { |
308 | 0 | const eword_t *word = NULL; |
309 | |
|
310 | 0 | it->literals = 0; |
311 | 0 | it->compressed = 0; |
312 | |
|
313 | 0 | while (1) { |
314 | 0 | word = &it->buffer[it->pointer]; |
315 | |
|
316 | 0 | it->rl = rlw_get_running_len(word); |
317 | 0 | it->lw = rlw_get_literal_words(word); |
318 | 0 | it->b = rlw_get_run_bit(word); |
319 | |
|
320 | 0 | if (it->rl || it->lw) |
321 | 0 | return; |
322 | | |
323 | 0 | if (it->pointer < it->buffer_size - 1) { |
324 | 0 | it->pointer++; |
325 | 0 | } else { |
326 | 0 | it->pointer = it->buffer_size; |
327 | 0 | return; |
328 | 0 | } |
329 | 0 | } |
330 | 0 | } |
331 | | |
332 | | int ewah_iterator_next(eword_t *next, struct ewah_iterator *it) |
333 | 0 | { |
334 | 0 | if (it->pointer >= it->buffer_size) |
335 | 0 | return 0; |
336 | | |
337 | 0 | if (it->compressed < it->rl) { |
338 | 0 | it->compressed++; |
339 | 0 | *next = it->b ? (eword_t)(~0) : 0; |
340 | 0 | } else { |
341 | 0 | assert(it->literals < it->lw); |
342 | | |
343 | 0 | it->literals++; |
344 | 0 | it->pointer++; |
345 | |
|
346 | 0 | assert(it->pointer < it->buffer_size); |
347 | | |
348 | 0 | *next = it->buffer[it->pointer]; |
349 | 0 | } |
350 | | |
351 | 0 | if (it->compressed == it->rl && it->literals == it->lw) { |
352 | 0 | if (++it->pointer < it->buffer_size) |
353 | 0 | read_new_rlw(it); |
354 | 0 | } |
355 | |
|
356 | 0 | return 1; |
357 | 0 | } |
358 | | |
359 | | void ewah_iterator_init(struct ewah_iterator *it, struct ewah_bitmap *parent) |
360 | 0 | { |
361 | 0 | it->buffer = parent->buffer; |
362 | 0 | it->buffer_size = parent->buffer_size; |
363 | 0 | it->pointer = 0; |
364 | |
|
365 | 0 | it->lw = 0; |
366 | 0 | it->rl = 0; |
367 | 0 | it->compressed = 0; |
368 | 0 | it->literals = 0; |
369 | 0 | it->b = 0; |
370 | |
|
371 | 0 | if (it->pointer < it->buffer_size) |
372 | 0 | read_new_rlw(it); |
373 | 0 | } |
374 | | |
375 | | void ewah_xor( |
376 | | struct ewah_bitmap *ewah_i, |
377 | | struct ewah_bitmap *ewah_j, |
378 | | struct ewah_bitmap *out) |
379 | 0 | { |
380 | 0 | struct rlw_iterator rlw_i; |
381 | 0 | struct rlw_iterator rlw_j; |
382 | 0 | size_t literals; |
383 | |
|
384 | 0 | rlwit_init(&rlw_i, ewah_i); |
385 | 0 | rlwit_init(&rlw_j, ewah_j); |
386 | |
|
387 | 0 | while (rlwit_word_size(&rlw_i) > 0 && rlwit_word_size(&rlw_j) > 0) { |
388 | 0 | while (rlw_i.rlw.running_len > 0 || rlw_j.rlw.running_len > 0) { |
389 | 0 | struct rlw_iterator *prey, *predator; |
390 | 0 | size_t index; |
391 | 0 | int negate_words; |
392 | |
|
393 | 0 | if (rlw_i.rlw.running_len < rlw_j.rlw.running_len) { |
394 | 0 | prey = &rlw_i; |
395 | 0 | predator = &rlw_j; |
396 | 0 | } else { |
397 | 0 | prey = &rlw_j; |
398 | 0 | predator = &rlw_i; |
399 | 0 | } |
400 | |
|
401 | 0 | negate_words = !!predator->rlw.running_bit; |
402 | 0 | index = rlwit_discharge(prey, out, |
403 | 0 | predator->rlw.running_len, negate_words); |
404 | |
|
405 | 0 | ewah_add_empty_words(out, negate_words, |
406 | 0 | predator->rlw.running_len - index); |
407 | |
|
408 | 0 | rlwit_discard_first_words(predator, |
409 | 0 | predator->rlw.running_len); |
410 | 0 | } |
411 | |
|
412 | 0 | literals = min_size( |
413 | 0 | rlw_i.rlw.literal_words, |
414 | 0 | rlw_j.rlw.literal_words); |
415 | |
|
416 | 0 | if (literals) { |
417 | 0 | size_t k; |
418 | |
|
419 | 0 | for (k = 0; k < literals; ++k) { |
420 | 0 | ewah_add(out, |
421 | 0 | rlw_i.buffer[rlw_i.literal_word_start + k] ^ |
422 | 0 | rlw_j.buffer[rlw_j.literal_word_start + k] |
423 | 0 | ); |
424 | 0 | } |
425 | |
|
426 | 0 | rlwit_discard_first_words(&rlw_i, literals); |
427 | 0 | rlwit_discard_first_words(&rlw_j, literals); |
428 | 0 | } |
429 | 0 | } |
430 | |
|
431 | 0 | if (rlwit_word_size(&rlw_i) > 0) |
432 | 0 | rlwit_discharge(&rlw_i, out, ~0, 0); |
433 | 0 | else |
434 | 0 | rlwit_discharge(&rlw_j, out, ~0, 0); |
435 | |
|
436 | 0 | out->bit_size = max_size(ewah_i->bit_size, ewah_j->bit_size); |
437 | 0 | } |
438 | | |
439 | 0 | #define BITMAP_POOL_MAX 16 |
440 | | static struct ewah_bitmap *bitmap_pool[BITMAP_POOL_MAX]; |
441 | | static size_t bitmap_pool_size; |
442 | | |
443 | | struct ewah_bitmap *ewah_pool_new(void) |
444 | 0 | { |
445 | 0 | if (bitmap_pool_size) |
446 | 0 | return bitmap_pool[--bitmap_pool_size]; |
447 | | |
448 | 0 | return ewah_new(); |
449 | 0 | } |
450 | | |
451 | | void ewah_pool_free(struct ewah_bitmap *self) |
452 | 0 | { |
453 | 0 | if (!self) |
454 | 0 | return; |
455 | | |
456 | 0 | if (bitmap_pool_size == BITMAP_POOL_MAX || |
457 | 0 | self->alloc_size == 0) { |
458 | 0 | ewah_free(self); |
459 | 0 | return; |
460 | 0 | } |
461 | | |
462 | 0 | ewah_clear(self); |
463 | 0 | bitmap_pool[bitmap_pool_size++] = self; |
464 | 0 | } |
465 | | |
466 | | uint32_t ewah_checksum(struct ewah_bitmap *self) |
467 | 0 | { |
468 | 0 | const uint8_t *p = (uint8_t *)self->buffer; |
469 | 0 | uint32_t crc = (uint32_t)self->bit_size; |
470 | 0 | size_t size = self->buffer_size * sizeof(eword_t); |
471 | |
|
472 | 0 | while (size--) |
473 | 0 | crc = (crc << 5) - crc + (uint32_t)*p++; |
474 | |
|
475 | 0 | return crc; |
476 | 0 | } |