Coverage Report

Created: 2024-09-16 06:12

/src/git/ewah/ewah_bitmap.c
Line
Count
Source (jump to first uncovered line)
1
/**
2
 * Copyright 2013, GitHub, Inc
3
 * Copyright 2009-2013, Daniel Lemire, Cliff Moon,
4
 *  David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
5
 *
6
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public License
8
 * as published by the Free Software Foundation; either version 2
9
 * of the License, or (at your option) any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
#include "git-compat-util.h"
20
#include "ewok.h"
21
#include "ewok_rlw.h"
22
23
static inline size_t min_size(size_t a, size_t b)
24
0
{
25
0
  return a < b ? a : b;
26
0
}
27
28
static inline size_t max_size(size_t a, size_t b)
29
0
{
30
0
  return a > b ? a : b;
31
0
}
32
33
static inline void buffer_grow(struct ewah_bitmap *self, size_t new_size)
34
0
{
35
0
  size_t rlw_offset = (uint8_t *)self->rlw - (uint8_t *)self->buffer;
36
0
  ALLOC_GROW(self->buffer, new_size, self->alloc_size);
37
0
  self->rlw = self->buffer + (rlw_offset / sizeof(eword_t));
38
0
}
39
40
static inline void buffer_push(struct ewah_bitmap *self, eword_t value)
41
0
{
42
0
  buffer_grow(self, self->buffer_size + 1);
43
0
  self->buffer[self->buffer_size++] = value;
44
0
}
45
46
static void buffer_push_rlw(struct ewah_bitmap *self, eword_t value)
47
0
{
48
0
  buffer_push(self, value);
49
0
  self->rlw = self->buffer + self->buffer_size - 1;
50
0
}
51
52
static size_t add_empty_words(struct ewah_bitmap *self, int v, size_t number)
53
0
{
54
0
  size_t added = 0;
55
0
  eword_t runlen, can_add;
56
57
0
  if (rlw_get_run_bit(self->rlw) != v && rlw_size(self->rlw) == 0) {
58
0
    rlw_set_run_bit(self->rlw, v);
59
0
  } else if (rlw_get_literal_words(self->rlw) != 0 ||
60
0
      rlw_get_run_bit(self->rlw) != v) {
61
0
    buffer_push_rlw(self, 0);
62
0
    if (v) rlw_set_run_bit(self->rlw, v);
63
0
    added++;
64
0
  }
65
66
0
  runlen = rlw_get_running_len(self->rlw);
67
0
  can_add = min_size(number, RLW_LARGEST_RUNNING_COUNT - runlen);
68
69
0
  rlw_set_running_len(self->rlw, runlen + can_add);
70
0
  number -= can_add;
71
72
0
  while (number >= RLW_LARGEST_RUNNING_COUNT) {
73
0
    buffer_push_rlw(self, 0);
74
0
    added++;
75
0
    if (v) rlw_set_run_bit(self->rlw, v);
76
0
    rlw_set_running_len(self->rlw, RLW_LARGEST_RUNNING_COUNT);
77
0
    number -= RLW_LARGEST_RUNNING_COUNT;
78
0
  }
79
80
0
  if (number > 0) {
81
0
    buffer_push_rlw(self, 0);
82
0
    added++;
83
84
0
    if (v) rlw_set_run_bit(self->rlw, v);
85
0
    rlw_set_running_len(self->rlw, number);
86
0
  }
87
88
0
  return added;
89
0
}
90
91
size_t ewah_add_empty_words(struct ewah_bitmap *self, int v, size_t number)
92
0
{
93
0
  if (number == 0)
94
0
    return 0;
95
96
0
  self->bit_size += number * BITS_IN_EWORD;
97
0
  return add_empty_words(self, v, number);
98
0
}
99
100
static size_t add_literal(struct ewah_bitmap *self, eword_t new_data)
101
0
{
102
0
  eword_t current_num = rlw_get_literal_words(self->rlw);
103
104
0
  if (current_num >= RLW_LARGEST_LITERAL_COUNT) {
105
0
    buffer_push_rlw(self, 0);
106
107
0
    rlw_set_literal_words(self->rlw, 1);
108
0
    buffer_push(self, new_data);
109
0
    return 2;
110
0
  }
111
112
0
  rlw_set_literal_words(self->rlw, current_num + 1);
113
114
  /* sanity check */
115
0
  assert(rlw_get_literal_words(self->rlw) == current_num + 1);
116
117
0
  buffer_push(self, new_data);
118
0
  return 1;
119
0
}
120
121
void ewah_add_dirty_words(
122
  struct ewah_bitmap *self, const eword_t *buffer,
123
  size_t number, int negate)
124
0
{
125
0
  size_t literals, can_add;
126
127
0
  while (1) {
128
0
    literals = rlw_get_literal_words(self->rlw);
129
0
    can_add = min_size(number, RLW_LARGEST_LITERAL_COUNT - literals);
130
131
0
    rlw_set_literal_words(self->rlw, literals + can_add);
132
133
0
    buffer_grow(self, self->buffer_size + can_add);
134
135
0
    if (negate) {
136
0
      size_t i;
137
0
      for (i = 0; i < can_add; ++i)
138
0
        self->buffer[self->buffer_size++] = ~buffer[i];
139
0
    } else {
140
0
      memcpy(self->buffer + self->buffer_size,
141
0
        buffer, can_add * sizeof(eword_t));
142
0
      self->buffer_size += can_add;
143
0
    }
144
145
0
    self->bit_size += can_add * BITS_IN_EWORD;
146
147
0
    if (number - can_add == 0)
148
0
      break;
149
150
0
    buffer_push_rlw(self, 0);
151
0
    buffer += can_add;
152
0
    number -= can_add;
153
0
  }
154
0
}
155
156
static size_t add_empty_word(struct ewah_bitmap *self, int v)
157
0
{
158
0
  int no_literal = (rlw_get_literal_words(self->rlw) == 0);
159
0
  eword_t run_len = rlw_get_running_len(self->rlw);
160
161
0
  if (no_literal && run_len == 0) {
162
0
    rlw_set_run_bit(self->rlw, v);
163
0
    assert(rlw_get_run_bit(self->rlw) == v);
164
0
  }
165
166
0
  if (no_literal && rlw_get_run_bit(self->rlw) == v &&
167
0
    run_len < RLW_LARGEST_RUNNING_COUNT) {
168
0
    rlw_set_running_len(self->rlw, run_len + 1);
169
0
    assert(rlw_get_running_len(self->rlw) == run_len + 1);
170
0
    return 0;
171
0
  } else {
172
0
    buffer_push_rlw(self, 0);
173
174
0
    assert(rlw_get_running_len(self->rlw) == 0);
175
0
    assert(rlw_get_run_bit(self->rlw) == 0);
176
0
    assert(rlw_get_literal_words(self->rlw) == 0);
177
178
0
    rlw_set_run_bit(self->rlw, v);
179
0
    assert(rlw_get_run_bit(self->rlw) == v);
180
181
0
    rlw_set_running_len(self->rlw, 1);
182
0
    assert(rlw_get_running_len(self->rlw) == 1);
183
0
    assert(rlw_get_literal_words(self->rlw) == 0);
184
0
    return 1;
185
0
  }
186
0
}
187
188
size_t ewah_add(struct ewah_bitmap *self, eword_t word)
189
0
{
190
0
  self->bit_size += BITS_IN_EWORD;
191
192
0
  if (word == 0)
193
0
    return add_empty_word(self, 0);
194
195
0
  if (word == (eword_t)(~0))
196
0
    return add_empty_word(self, 1);
197
198
0
  return add_literal(self, word);
199
0
}
200
201
void ewah_set(struct ewah_bitmap *self, size_t i)
202
0
{
203
0
  const size_t dist =
204
0
    DIV_ROUND_UP(i + 1, BITS_IN_EWORD) -
205
0
    DIV_ROUND_UP(self->bit_size, BITS_IN_EWORD);
206
207
0
  assert(i >= self->bit_size);
208
209
0
  self->bit_size = i + 1;
210
211
0
  if (dist > 0) {
212
0
    if (dist > 1)
213
0
      add_empty_words(self, 0, dist - 1);
214
215
0
    add_literal(self, (eword_t)1 << (i % BITS_IN_EWORD));
216
0
    return;
217
0
  }
218
219
0
  if (rlw_get_literal_words(self->rlw) == 0) {
220
0
    rlw_set_running_len(self->rlw,
221
0
      rlw_get_running_len(self->rlw) - 1);
222
0
    add_literal(self, (eword_t)1 << (i % BITS_IN_EWORD));
223
0
    return;
224
0
  }
225
226
0
  self->buffer[self->buffer_size - 1] |=
227
0
    ((eword_t)1 << (i % BITS_IN_EWORD));
228
229
  /* check if we just completed a stream of 1s */
230
0
  if (self->buffer[self->buffer_size - 1] == (eword_t)(~0)) {
231
0
    self->buffer[--self->buffer_size] = 0;
232
0
    rlw_set_literal_words(self->rlw,
233
0
      rlw_get_literal_words(self->rlw) - 1);
234
0
    add_empty_word(self, 1);
235
0
  }
236
0
}
237
238
void ewah_each_bit(struct ewah_bitmap *self, void (*callback)(size_t, void*), void *payload)
239
0
{
240
0
  size_t pos = 0;
241
0
  size_t pointer = 0;
242
0
  size_t k;
243
244
0
  while (pointer < self->buffer_size) {
245
0
    eword_t *word = &self->buffer[pointer];
246
247
0
    if (rlw_get_run_bit(word)) {
248
0
      size_t len = rlw_get_running_len(word) * BITS_IN_EWORD;
249
0
      for (k = 0; k < len; ++k, ++pos)
250
0
        callback(pos, payload);
251
0
    } else {
252
0
      pos += rlw_get_running_len(word) * BITS_IN_EWORD;
253
0
    }
254
255
0
    ++pointer;
256
257
0
    for (k = 0; k < rlw_get_literal_words(word); ++k) {
258
0
      int c;
259
260
      /* todo: zero count optimization */
261
0
      for (c = 0; c < BITS_IN_EWORD; ++c, ++pos) {
262
0
        if ((self->buffer[pointer] & ((eword_t)1 << c)) != 0)
263
0
          callback(pos, payload);
264
0
      }
265
266
0
      ++pointer;
267
0
    }
268
0
  }
269
0
}
270
271
/**
272
 * Clear all the bits in the bitmap. Does not free or resize
273
 * memory.
274
 */
275
static void ewah_clear(struct ewah_bitmap *self)
276
0
{
277
0
  self->buffer_size = 1;
278
0
  self->buffer[0] = 0;
279
0
  self->bit_size = 0;
280
0
  self->rlw = self->buffer;
281
0
}
282
283
struct ewah_bitmap *ewah_new(void)
284
0
{
285
0
  struct ewah_bitmap *self;
286
287
0
  self = xmalloc(sizeof(struct ewah_bitmap));
288
0
  self->alloc_size = 32;
289
0
  ALLOC_ARRAY(self->buffer, self->alloc_size);
290
291
0
  ewah_clear(self);
292
0
  return self;
293
0
}
294
295
void ewah_free(struct ewah_bitmap *self)
296
0
{
297
0
  if (!self)
298
0
    return;
299
300
0
  if (self->alloc_size)
301
0
    free(self->buffer);
302
303
0
  free(self);
304
0
}
305
306
static void read_new_rlw(struct ewah_iterator *it)
307
0
{
308
0
  const eword_t *word = NULL;
309
310
0
  it->literals = 0;
311
0
  it->compressed = 0;
312
313
0
  while (1) {
314
0
    word = &it->buffer[it->pointer];
315
316
0
    it->rl = rlw_get_running_len(word);
317
0
    it->lw = rlw_get_literal_words(word);
318
0
    it->b = rlw_get_run_bit(word);
319
320
0
    if (it->rl || it->lw)
321
0
      return;
322
323
0
    if (it->pointer < it->buffer_size - 1) {
324
0
      it->pointer++;
325
0
    } else {
326
0
      it->pointer = it->buffer_size;
327
0
      return;
328
0
    }
329
0
  }
330
0
}
331
332
int ewah_iterator_next(eword_t *next, struct ewah_iterator *it)
333
0
{
334
0
  if (it->pointer >= it->buffer_size)
335
0
    return 0;
336
337
0
  if (it->compressed < it->rl) {
338
0
    it->compressed++;
339
0
    *next = it->b ? (eword_t)(~0) : 0;
340
0
  } else {
341
0
    assert(it->literals < it->lw);
342
343
0
    it->literals++;
344
0
    it->pointer++;
345
346
0
    assert(it->pointer < it->buffer_size);
347
348
0
    *next = it->buffer[it->pointer];
349
0
  }
350
351
0
  if (it->compressed == it->rl && it->literals == it->lw) {
352
0
    if (++it->pointer < it->buffer_size)
353
0
      read_new_rlw(it);
354
0
  }
355
356
0
  return 1;
357
0
}
358
359
void ewah_iterator_init(struct ewah_iterator *it, struct ewah_bitmap *parent)
360
0
{
361
0
  it->buffer = parent->buffer;
362
0
  it->buffer_size = parent->buffer_size;
363
0
  it->pointer = 0;
364
365
0
  it->lw = 0;
366
0
  it->rl = 0;
367
0
  it->compressed = 0;
368
0
  it->literals = 0;
369
0
  it->b = 0;
370
371
0
  if (it->pointer < it->buffer_size)
372
0
    read_new_rlw(it);
373
0
}
374
375
void ewah_xor(
376
  struct ewah_bitmap *ewah_i,
377
  struct ewah_bitmap *ewah_j,
378
  struct ewah_bitmap *out)
379
0
{
380
0
  struct rlw_iterator rlw_i;
381
0
  struct rlw_iterator rlw_j;
382
0
  size_t literals;
383
384
0
  rlwit_init(&rlw_i, ewah_i);
385
0
  rlwit_init(&rlw_j, ewah_j);
386
387
0
  while (rlwit_word_size(&rlw_i) > 0 && rlwit_word_size(&rlw_j) > 0) {
388
0
    while (rlw_i.rlw.running_len > 0 || rlw_j.rlw.running_len > 0) {
389
0
      struct rlw_iterator *prey, *predator;
390
0
      size_t index;
391
0
      int negate_words;
392
393
0
      if (rlw_i.rlw.running_len < rlw_j.rlw.running_len) {
394
0
        prey = &rlw_i;
395
0
        predator = &rlw_j;
396
0
      } else {
397
0
        prey = &rlw_j;
398
0
        predator = &rlw_i;
399
0
      }
400
401
0
      negate_words = !!predator->rlw.running_bit;
402
0
      index = rlwit_discharge(prey, out,
403
0
        predator->rlw.running_len, negate_words);
404
405
0
      ewah_add_empty_words(out, negate_words,
406
0
        predator->rlw.running_len - index);
407
408
0
      rlwit_discard_first_words(predator,
409
0
        predator->rlw.running_len);
410
0
    }
411
412
0
    literals = min_size(
413
0
      rlw_i.rlw.literal_words,
414
0
      rlw_j.rlw.literal_words);
415
416
0
    if (literals) {
417
0
      size_t k;
418
419
0
      for (k = 0; k < literals; ++k) {
420
0
        ewah_add(out,
421
0
          rlw_i.buffer[rlw_i.literal_word_start + k] ^
422
0
          rlw_j.buffer[rlw_j.literal_word_start + k]
423
0
        );
424
0
      }
425
426
0
      rlwit_discard_first_words(&rlw_i, literals);
427
0
      rlwit_discard_first_words(&rlw_j, literals);
428
0
    }
429
0
  }
430
431
0
  if (rlwit_word_size(&rlw_i) > 0)
432
0
    rlwit_discharge(&rlw_i, out, ~0, 0);
433
0
  else
434
0
    rlwit_discharge(&rlw_j, out, ~0, 0);
435
436
0
  out->bit_size = max_size(ewah_i->bit_size, ewah_j->bit_size);
437
0
}
438
439
0
#define BITMAP_POOL_MAX 16
440
static struct ewah_bitmap *bitmap_pool[BITMAP_POOL_MAX];
441
static size_t bitmap_pool_size;
442
443
struct ewah_bitmap *ewah_pool_new(void)
444
0
{
445
0
  if (bitmap_pool_size)
446
0
    return bitmap_pool[--bitmap_pool_size];
447
448
0
  return ewah_new();
449
0
}
450
451
void ewah_pool_free(struct ewah_bitmap *self)
452
0
{
453
0
  if (!self)
454
0
    return;
455
456
0
  if (bitmap_pool_size == BITMAP_POOL_MAX ||
457
0
    self->alloc_size == 0) {
458
0
    ewah_free(self);
459
0
    return;
460
0
  }
461
462
0
  ewah_clear(self);
463
0
  bitmap_pool[bitmap_pool_size++] = self;
464
0
}
465
466
uint32_t ewah_checksum(struct ewah_bitmap *self)
467
0
{
468
0
  const uint8_t *p = (uint8_t *)self->buffer;
469
0
  uint32_t crc = (uint32_t)self->bit_size;
470
0
  size_t size = self->buffer_size * sizeof(eword_t);
471
472
0
  while (size--)
473
0
    crc = (crc << 5) - crc + (uint32_t)*p++;
474
475
0
  return crc;
476
0
}