Coverage Report

Created: 2025-12-31 07:01

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/git/ewah/ewah_bitmap.c
Line
Count
Source
1
/**
2
 * Copyright 2013, GitHub, Inc
3
 * Copyright 2009-2013, Daniel Lemire, Cliff Moon,
4
 *  David McIntosh, Robert Becho, Google Inc. and Veronika Zenz
5
 *
6
 * This program is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU General Public License
8
 * as published by the Free Software Foundation; either version 2
9
 * of the License, or (at your option) any later version.
10
 *
11
 * This program is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
 * GNU General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU General Public License
17
 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
20
#include "git-compat-util.h"
21
#include "ewok.h"
22
#include "ewok_rlw.h"
23
24
static inline size_t min_size(size_t a, size_t b)
25
0
{
26
0
  return a < b ? a : b;
27
0
}
28
29
static inline size_t max_size(size_t a, size_t b)
30
0
{
31
0
  return a > b ? a : b;
32
0
}
33
34
static inline void buffer_grow(struct ewah_bitmap *self, size_t new_size)
35
0
{
36
0
  size_t rlw_offset = (uint8_t *)self->rlw - (uint8_t *)self->buffer;
37
0
  ALLOC_GROW(self->buffer, new_size, self->alloc_size);
38
0
  self->rlw = self->buffer + (rlw_offset / sizeof(eword_t));
39
0
}
40
41
static inline void buffer_push(struct ewah_bitmap *self, eword_t value)
42
0
{
43
0
  buffer_grow(self, self->buffer_size + 1);
44
0
  self->buffer[self->buffer_size++] = value;
45
0
}
46
47
static void buffer_push_rlw(struct ewah_bitmap *self, eword_t value)
48
0
{
49
0
  buffer_push(self, value);
50
0
  self->rlw = self->buffer + self->buffer_size - 1;
51
0
}
52
53
static size_t add_empty_words(struct ewah_bitmap *self, int v, size_t number)
54
0
{
55
0
  size_t added = 0;
56
0
  eword_t runlen, can_add;
57
58
0
  if (rlw_get_run_bit(self->rlw) != v && rlw_size(self->rlw) == 0) {
59
0
    rlw_set_run_bit(self->rlw, v);
60
0
  } else if (rlw_get_literal_words(self->rlw) != 0 ||
61
0
      rlw_get_run_bit(self->rlw) != v) {
62
0
    buffer_push_rlw(self, 0);
63
0
    if (v) rlw_set_run_bit(self->rlw, v);
64
0
    added++;
65
0
  }
66
67
0
  runlen = rlw_get_running_len(self->rlw);
68
0
  can_add = min_size(number, RLW_LARGEST_RUNNING_COUNT - runlen);
69
70
0
  rlw_set_running_len(self->rlw, runlen + can_add);
71
0
  number -= can_add;
72
73
0
  while (number >= RLW_LARGEST_RUNNING_COUNT) {
74
0
    buffer_push_rlw(self, 0);
75
0
    added++;
76
0
    if (v) rlw_set_run_bit(self->rlw, v);
77
0
    rlw_set_running_len(self->rlw, RLW_LARGEST_RUNNING_COUNT);
78
0
    number -= RLW_LARGEST_RUNNING_COUNT;
79
0
  }
80
81
0
  if (number > 0) {
82
0
    buffer_push_rlw(self, 0);
83
0
    added++;
84
85
0
    if (v) rlw_set_run_bit(self->rlw, v);
86
0
    rlw_set_running_len(self->rlw, number);
87
0
  }
88
89
0
  return added;
90
0
}
91
92
size_t ewah_add_empty_words(struct ewah_bitmap *self, int v, size_t number)
93
0
{
94
0
  if (number == 0)
95
0
    return 0;
96
97
0
  self->bit_size += number * BITS_IN_EWORD;
98
0
  return add_empty_words(self, v, number);
99
0
}
100
101
static size_t add_literal(struct ewah_bitmap *self, eword_t new_data)
102
0
{
103
0
  eword_t current_num = rlw_get_literal_words(self->rlw);
104
105
0
  if (current_num >= RLW_LARGEST_LITERAL_COUNT) {
106
0
    buffer_push_rlw(self, 0);
107
108
0
    rlw_set_literal_words(self->rlw, 1);
109
0
    buffer_push(self, new_data);
110
0
    return 2;
111
0
  }
112
113
0
  rlw_set_literal_words(self->rlw, current_num + 1);
114
115
  /* sanity check */
116
0
  assert(rlw_get_literal_words(self->rlw) == current_num + 1);
117
118
0
  buffer_push(self, new_data);
119
0
  return 1;
120
0
}
121
122
void ewah_add_dirty_words(
123
  struct ewah_bitmap *self, const eword_t *buffer,
124
  size_t number, int negate)
125
0
{
126
0
  size_t literals, can_add;
127
128
0
  while (1) {
129
0
    literals = rlw_get_literal_words(self->rlw);
130
0
    can_add = min_size(number, RLW_LARGEST_LITERAL_COUNT - literals);
131
132
0
    rlw_set_literal_words(self->rlw, literals + can_add);
133
134
0
    buffer_grow(self, self->buffer_size + can_add);
135
136
0
    if (negate) {
137
0
      size_t i;
138
0
      for (i = 0; i < can_add; ++i)
139
0
        self->buffer[self->buffer_size++] = ~buffer[i];
140
0
    } else {
141
0
      memcpy(self->buffer + self->buffer_size,
142
0
        buffer, can_add * sizeof(eword_t));
143
0
      self->buffer_size += can_add;
144
0
    }
145
146
0
    self->bit_size += can_add * BITS_IN_EWORD;
147
148
0
    if (number - can_add == 0)
149
0
      break;
150
151
0
    buffer_push_rlw(self, 0);
152
0
    buffer += can_add;
153
0
    number -= can_add;
154
0
  }
155
0
}
156
157
static size_t add_empty_word(struct ewah_bitmap *self, int v)
158
0
{
159
0
  int no_literal = (rlw_get_literal_words(self->rlw) == 0);
160
0
  eword_t run_len = rlw_get_running_len(self->rlw);
161
162
0
  if (no_literal && run_len == 0) {
163
0
    rlw_set_run_bit(self->rlw, v);
164
0
    assert(rlw_get_run_bit(self->rlw) == v);
165
0
  }
166
167
0
  if (no_literal && rlw_get_run_bit(self->rlw) == v &&
168
0
    run_len < RLW_LARGEST_RUNNING_COUNT) {
169
0
    rlw_set_running_len(self->rlw, run_len + 1);
170
0
    assert(rlw_get_running_len(self->rlw) == run_len + 1);
171
0
    return 0;
172
0
  } else {
173
0
    buffer_push_rlw(self, 0);
174
175
0
    assert(rlw_get_running_len(self->rlw) == 0);
176
0
    assert(rlw_get_run_bit(self->rlw) == 0);
177
0
    assert(rlw_get_literal_words(self->rlw) == 0);
178
179
0
    rlw_set_run_bit(self->rlw, v);
180
0
    assert(rlw_get_run_bit(self->rlw) == v);
181
182
0
    rlw_set_running_len(self->rlw, 1);
183
0
    assert(rlw_get_running_len(self->rlw) == 1);
184
0
    assert(rlw_get_literal_words(self->rlw) == 0);
185
0
    return 1;
186
0
  }
187
0
}
188
189
size_t ewah_add(struct ewah_bitmap *self, eword_t word)
190
0
{
191
0
  self->bit_size += BITS_IN_EWORD;
192
193
0
  if (word == 0)
194
0
    return add_empty_word(self, 0);
195
196
0
  if (word == (eword_t)(~0))
197
0
    return add_empty_word(self, 1);
198
199
0
  return add_literal(self, word);
200
0
}
201
202
void ewah_set(struct ewah_bitmap *self, size_t i)
203
0
{
204
0
  const size_t dist =
205
0
    DIV_ROUND_UP(i + 1, BITS_IN_EWORD) -
206
0
    DIV_ROUND_UP(self->bit_size, BITS_IN_EWORD);
207
208
0
  assert(i >= self->bit_size);
209
210
0
  self->bit_size = i + 1;
211
212
0
  if (dist > 0) {
213
0
    if (dist > 1)
214
0
      add_empty_words(self, 0, dist - 1);
215
216
0
    add_literal(self, (eword_t)1 << (i % BITS_IN_EWORD));
217
0
    return;
218
0
  }
219
220
0
  if (rlw_get_literal_words(self->rlw) == 0) {
221
0
    rlw_set_running_len(self->rlw,
222
0
      rlw_get_running_len(self->rlw) - 1);
223
0
    add_literal(self, (eword_t)1 << (i % BITS_IN_EWORD));
224
0
    return;
225
0
  }
226
227
0
  self->buffer[self->buffer_size - 1] |=
228
0
    ((eword_t)1 << (i % BITS_IN_EWORD));
229
230
  /* check if we just completed a stream of 1s */
231
0
  if (self->buffer[self->buffer_size - 1] == (eword_t)(~0)) {
232
0
    self->buffer[--self->buffer_size] = 0;
233
0
    rlw_set_literal_words(self->rlw,
234
0
      rlw_get_literal_words(self->rlw) - 1);
235
0
    add_empty_word(self, 1);
236
0
  }
237
0
}
238
239
void ewah_each_bit(struct ewah_bitmap *self, void (*callback)(size_t, void*), void *payload)
240
0
{
241
0
  size_t pos = 0;
242
0
  size_t pointer = 0;
243
0
  size_t k;
244
245
0
  while (pointer < self->buffer_size) {
246
0
    eword_t *word = &self->buffer[pointer];
247
248
0
    if (rlw_get_run_bit(word)) {
249
0
      size_t len = rlw_get_running_len(word) * BITS_IN_EWORD;
250
0
      for (k = 0; k < len; ++k, ++pos)
251
0
        callback(pos, payload);
252
0
    } else {
253
0
      pos += rlw_get_running_len(word) * BITS_IN_EWORD;
254
0
    }
255
256
0
    ++pointer;
257
258
0
    for (k = 0; k < rlw_get_literal_words(word); ++k) {
259
      /* todo: zero count optimization */
260
0
      for (size_t c = 0; c < BITS_IN_EWORD; ++c, ++pos) {
261
0
        if ((self->buffer[pointer] & ((eword_t)1 << c)) != 0)
262
0
          callback(pos, payload);
263
0
      }
264
265
0
      ++pointer;
266
0
    }
267
0
  }
268
0
}
269
270
/**
271
 * Clear all the bits in the bitmap. Does not free or resize
272
 * memory.
273
 */
274
static void ewah_clear(struct ewah_bitmap *self)
275
0
{
276
0
  self->buffer_size = 1;
277
0
  self->buffer[0] = 0;
278
0
  self->bit_size = 0;
279
0
  self->rlw = self->buffer;
280
0
}
281
282
struct ewah_bitmap *ewah_new(void)
283
0
{
284
0
  struct ewah_bitmap *self;
285
286
0
  self = xmalloc(sizeof(struct ewah_bitmap));
287
0
  self->alloc_size = 32;
288
0
  ALLOC_ARRAY(self->buffer, self->alloc_size);
289
290
0
  ewah_clear(self);
291
0
  return self;
292
0
}
293
294
void ewah_free(struct ewah_bitmap *self)
295
0
{
296
0
  if (!self)
297
0
    return;
298
299
0
  if (self->alloc_size)
300
0
    free(self->buffer);
301
302
0
  free(self);
303
0
}
304
305
static void read_new_rlw(struct ewah_iterator *it)
306
0
{
307
0
  const eword_t *word = NULL;
308
309
0
  it->literals = 0;
310
0
  it->compressed = 0;
311
312
0
  while (1) {
313
0
    word = &it->buffer[it->pointer];
314
315
0
    it->rl = rlw_get_running_len(word);
316
0
    it->lw = rlw_get_literal_words(word);
317
0
    it->b = rlw_get_run_bit(word);
318
319
0
    if (it->rl || it->lw)
320
0
      return;
321
322
0
    if (it->pointer < it->buffer_size - 1) {
323
0
      it->pointer++;
324
0
    } else {
325
0
      it->pointer = it->buffer_size;
326
0
      return;
327
0
    }
328
0
  }
329
0
}
330
331
int ewah_iterator_next(eword_t *next, struct ewah_iterator *it)
332
0
{
333
0
  if (it->pointer >= it->buffer_size)
334
0
    return 0;
335
336
0
  if (it->compressed < it->rl) {
337
0
    it->compressed++;
338
0
    *next = it->b ? (eword_t)(~0) : 0;
339
0
  } else {
340
0
    assert(it->literals < it->lw);
341
342
0
    it->literals++;
343
0
    it->pointer++;
344
345
0
    assert(it->pointer < it->buffer_size);
346
347
0
    *next = it->buffer[it->pointer];
348
0
  }
349
350
0
  if (it->compressed == it->rl && it->literals == it->lw) {
351
0
    if (++it->pointer < it->buffer_size)
352
0
      read_new_rlw(it);
353
0
  }
354
355
0
  return 1;
356
0
}
357
358
void ewah_iterator_init(struct ewah_iterator *it, struct ewah_bitmap *parent)
359
0
{
360
0
  it->buffer = parent->buffer;
361
0
  it->buffer_size = parent->buffer_size;
362
0
  it->pointer = 0;
363
364
0
  it->lw = 0;
365
0
  it->rl = 0;
366
0
  it->compressed = 0;
367
0
  it->literals = 0;
368
0
  it->b = 0;
369
370
0
  if (it->pointer < it->buffer_size)
371
0
    read_new_rlw(it);
372
0
}
373
374
void ewah_or_iterator_init(struct ewah_or_iterator *it,
375
         struct ewah_bitmap **parents, size_t nr)
376
0
{
377
0
  size_t i;
378
379
0
  memset(it, 0, sizeof(*it));
380
381
0
  ALLOC_ARRAY(it->its, nr);
382
0
  for (i = 0; i < nr; i++)
383
0
    ewah_iterator_init(&it->its[it->nr++], parents[i]);
384
0
}
385
386
int ewah_or_iterator_next(eword_t *next, struct ewah_or_iterator *it)
387
0
{
388
0
  eword_t buf, out = 0;
389
0
  size_t i;
390
0
  int ret = 0;
391
392
0
  for (i = 0; i < it->nr; i++)
393
0
    if (ewah_iterator_next(&buf, &it->its[i])) {
394
0
      out |= buf;
395
0
      ret = 1;
396
0
    }
397
398
0
  *next = out;
399
0
  return ret;
400
0
}
401
402
void ewah_or_iterator_release(struct ewah_or_iterator *it)
403
0
{
404
0
  free(it->its);
405
0
}
406
407
void ewah_xor(
408
  struct ewah_bitmap *ewah_i,
409
  struct ewah_bitmap *ewah_j,
410
  struct ewah_bitmap *out)
411
0
{
412
0
  struct rlw_iterator rlw_i;
413
0
  struct rlw_iterator rlw_j;
414
0
  size_t literals;
415
416
0
  rlwit_init(&rlw_i, ewah_i);
417
0
  rlwit_init(&rlw_j, ewah_j);
418
419
0
  while (rlwit_word_size(&rlw_i) > 0 && rlwit_word_size(&rlw_j) > 0) {
420
0
    while (rlw_i.rlw.running_len > 0 || rlw_j.rlw.running_len > 0) {
421
0
      struct rlw_iterator *prey, *predator;
422
0
      size_t index;
423
0
      int negate_words;
424
425
0
      if (rlw_i.rlw.running_len < rlw_j.rlw.running_len) {
426
0
        prey = &rlw_i;
427
0
        predator = &rlw_j;
428
0
      } else {
429
0
        prey = &rlw_j;
430
0
        predator = &rlw_i;
431
0
      }
432
433
0
      negate_words = !!predator->rlw.running_bit;
434
0
      index = rlwit_discharge(prey, out,
435
0
        predator->rlw.running_len, negate_words);
436
437
0
      ewah_add_empty_words(out, negate_words,
438
0
        predator->rlw.running_len - index);
439
440
0
      rlwit_discard_first_words(predator,
441
0
        predator->rlw.running_len);
442
0
    }
443
444
0
    literals = min_size(
445
0
      rlw_i.rlw.literal_words,
446
0
      rlw_j.rlw.literal_words);
447
448
0
    if (literals) {
449
0
      size_t k;
450
451
0
      for (k = 0; k < literals; ++k) {
452
0
        ewah_add(out,
453
0
          rlw_i.buffer[rlw_i.literal_word_start + k] ^
454
0
          rlw_j.buffer[rlw_j.literal_word_start + k]
455
0
        );
456
0
      }
457
458
0
      rlwit_discard_first_words(&rlw_i, literals);
459
0
      rlwit_discard_first_words(&rlw_j, literals);
460
0
    }
461
0
  }
462
463
0
  if (rlwit_word_size(&rlw_i) > 0)
464
0
    rlwit_discharge(&rlw_i, out, ~0, 0);
465
0
  else
466
0
    rlwit_discharge(&rlw_j, out, ~0, 0);
467
468
0
  out->bit_size = max_size(ewah_i->bit_size, ewah_j->bit_size);
469
0
}
470
471
0
#define BITMAP_POOL_MAX 16
472
static struct ewah_bitmap *bitmap_pool[BITMAP_POOL_MAX];
473
static size_t bitmap_pool_size;
474
475
struct ewah_bitmap *ewah_pool_new(void)
476
0
{
477
0
  if (bitmap_pool_size)
478
0
    return bitmap_pool[--bitmap_pool_size];
479
480
0
  return ewah_new();
481
0
}
482
483
void ewah_pool_free(struct ewah_bitmap *self)
484
0
{
485
0
  if (!self)
486
0
    return;
487
488
0
  if (bitmap_pool_size == BITMAP_POOL_MAX ||
489
0
    self->alloc_size == 0) {
490
0
    ewah_free(self);
491
0
    return;
492
0
  }
493
494
0
  ewah_clear(self);
495
0
  bitmap_pool[bitmap_pool_size++] = self;
496
0
}
497
498
uint32_t ewah_checksum(struct ewah_bitmap *self)
499
0
{
500
0
  const uint8_t *p = (uint8_t *)self->buffer;
501
0
  uint32_t crc = (uint32_t)self->bit_size;
502
0
  size_t size = self->buffer_size * sizeof(eword_t);
503
504
0
  while (size--)
505
0
    crc = (crc << 5) - crc + (uint32_t)*p++;
506
507
0
  return crc;
508
0
}