Coverage Report

Created: 2025-07-12 06:32

/src/h2o/deps/libgkc/gkc.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2016 Fastly, Inc.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a copy
5
 * of this software and associated documentation files (the "Software"), to
6
 * deal in the Software without restriction, including without limitation the
7
 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8
 * sell copies of the Software, and to permit persons to whom the Software is
9
 * furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included in
12
 * all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20
 * IN THE SOFTWARE.
21
 */
22
23
/*
24
 * A streaming quantile library.
25
 *
26
 *
27
 * A `gkc_summary` structure is used to summarize observations
28
 * within a given error range. Observations are inserted using
29
 * `gkc_insert_value`, quantile queries can then be performed with
30
 * `gkc_query` against the summary.
31
 * Provided two summaries are using the same epsilon, they can be merged
32
 * by calling `gkc_combine`.
33
 *
34
 * The algorithm guaranties a bounded memory usage to:
35
 * (11/(2 x epsilon))*log(2 * epsilon * N)
36
 *
37
 * For epsilon = 0.01 and N = 2^64, this is only 10k max in the
38
 * theoritical worse case. In practice, it's reliably using less:
39
 * inserting random data gets us * ~100 max insertions for > 50 millions
40
 * of entries.
41
 *
42
 * See www.cis.upenn.edu/~sanjeev/papers/sigmod01_quantiles.pdf for
43
 * the paper describing this algorithm and data structure.
44
 *
45
 */
46
47
#include <stdio.h>
48
#include <stdlib.h>
49
#include <string.h>
50
#include <limits.h>
51
#include <inttypes.h>
52
53
struct list {
54
    struct list *prev, *next;
55
};
56
57
struct gkc_summary {
58
    size_t nr_elems;
59
    double epsilon;
60
    uint64_t alloced;
61
    uint64_t max_alloced;
62
    struct list head;
63
    struct freelist *fl;
64
};
65
66
67
static inline int list_empty(struct list *l)
68
0
{
69
0
    return l->next == l;
70
0
}
71
static inline void list_init(struct list *n)
72
0
{
73
0
    n->next = n;
74
0
    n->prev = n;
75
0
}
76
77
static inline void list_del(struct list *n)
78
0
{
79
0
    n->next->prev = n->prev;
80
0
    n->prev->next = n->next;
81
0
}
82
83
static inline void list_add(struct list *l, struct list *n)
84
0
{
85
0
    n->next = l->next;
86
0
    n->next->prev = n;
87
0
    l->next = n;
88
0
    n->prev = l;
89
0
}
90
91
static inline void list_add_tail(struct list *l, struct list *n)
92
0
{
93
0
    list_add(l->prev, n);
94
0
}
95
96
#undef offsetof
97
#ifdef __compiler_offsetof
98
#define offsetof(TYPE,MEMBER) __compiler_offsetof(TYPE,MEMBER)
99
#else
100
0
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
101
#endif
102
103
0
#define container_of(ptr, type, member) ((type *)((char *)(ptr) - offsetof(type, member)))
104
105
struct freelist {
106
    struct freelist *next;
107
};
108
109
static int ullog2(uint64_t x)
110
0
{
111
0
    return 63 - __builtin_clzll(x);
112
0
}
113
114
struct gkc_tuple {
115
    uint64_t value;
116
    double g;
117
    uint64_t delta;
118
    struct list node;
119
};
120
0
#define list_to_tuple(ln) (container_of((ln), struct gkc_tuple, node))
121
122
123
void gkc_summary_init(struct gkc_summary *s, double epsilon)
124
0
{
125
0
    list_init(&s->head);
126
0
    s->epsilon = epsilon;
127
0
}
128
129
struct gkc_summary *gkc_summary_alloc(double epsilon)
130
0
{
131
0
    struct gkc_summary *s;
132
0
    s = calloc(1, sizeof(*s));
133
0
    gkc_summary_init(s, epsilon);
134
0
    return s;
135
0
}
136
137
#include <assert.h>
138
/* debug only, checks a number of properties that s must satisfy at all times */
139
void gkc_sanity_check(struct gkc_summary *s)
140
0
{
141
0
    uint64_t nr_elems, nr_alloced;
142
0
    struct list *cur;
143
0
    struct gkc_tuple *tcur;
144
145
0
    nr_elems = 0;
146
0
    nr_alloced = 0;
147
0
    cur = s->head.next;
148
0
    while (cur != &s->head) {
149
0
        tcur = list_to_tuple(cur);
150
0
        cur = cur->next;
151
0
        nr_elems += tcur->g;
152
0
        nr_alloced++;
153
0
        if (s->nr_elems > (1/s->epsilon)) {
154
            /* there must be enough observations for this to become true */
155
0
            assert(tcur->g + tcur->delta <= (s->nr_elems * s->epsilon * 2));
156
0
        }
157
0
        assert(nr_alloced <= s->alloced);
158
0
    }
159
0
    assert(nr_elems == s->nr_elems);
160
0
    assert(nr_alloced == s->alloced);
161
0
}
162
163
static struct gkc_tuple *gkc_alloc(struct gkc_summary *s)
164
0
{
165
0
    s->alloced++;
166
0
    if (s->alloced > s->max_alloced) {
167
0
        s->max_alloced = s->alloced;
168
0
    }
169
170
0
    if (s->fl) {
171
0
        void *ret;
172
0
        ret = s->fl;
173
0
        s->fl = s->fl->next;
174
0
        return ret;
175
0
    }
176
0
    return malloc(sizeof(struct gkc_tuple));
177
0
}
178
179
static void gkc_free(struct gkc_summary *s, struct gkc_tuple *p)
180
0
{
181
0
    struct freelist *flp = (void *)p;
182
0
    s->alloced--;
183
184
0
    flp->next = s->fl;
185
0
    s->fl = flp;
186
0
}
187
188
void gkc_summary_free(struct gkc_summary *s)
189
0
{
190
0
    struct freelist *fl;
191
0
    struct list *cur;
192
193
0
    cur = s->head.next;
194
0
    while (cur != &s->head) {
195
0
        struct list *next;
196
0
        next = cur->next;
197
0
        gkc_free(s, list_to_tuple(cur));
198
0
        cur = next;
199
0
    }
200
0
    fl = s->fl;
201
0
    while (fl) {
202
0
        void *p;
203
0
        p = fl;
204
0
        fl = fl->next;
205
0
        free(p);
206
0
    }
207
0
    free(s);
208
0
}
209
210
uint64_t gkc_query(struct gkc_summary *s, double q)
211
0
{
212
0
    struct list *cur, *next;
213
0
    int rank;
214
0
    double gi;
215
0
    double ne;
216
217
0
    rank = 0.5 + q * s->nr_elems;
218
0
    ne = s->nr_elems * s->epsilon;
219
0
    gi = 0;
220
0
    if (list_empty(&s->head)) {
221
0
        return 0;
222
0
    }
223
224
0
    cur = s->head.next;
225
226
0
    while (1) {
227
0
        struct gkc_tuple *tcur, *tnext;
228
229
0
        tcur = list_to_tuple(cur);
230
0
        next = cur->next;
231
0
        if (next == &s->head) {
232
0
            return tcur->value;
233
0
        }
234
0
        tnext = list_to_tuple(next);
235
236
0
        gi += tcur->g;
237
0
        if ((rank + ne) < (gi + tnext->g + tnext->delta)) {
238
0
            if ((rank + ne) < (gi + tnext->g)) {
239
0
                return tcur->value;
240
0
            }
241
0
            return tnext->value;
242
0
        }
243
0
        cur = next;
244
0
    }
245
0
}
246
247
static uint64_t band(struct gkc_summary *s, uint64_t delta)
248
0
{
249
0
    uint64_t diff;
250
251
0
    diff = 1 + (s->epsilon * s->nr_elems * 2) - delta;
252
253
0
    return ullog2(diff);
254
0
}
255
256
static void gkc_compress(struct gkc_summary *s)
257
0
{
258
0
    int max_compress;
259
0
    struct list *cur, *prev;
260
0
    struct gkc_tuple *tcur, *tprev;
261
0
    uint64_t bi, b_plus_1;
262
263
0
    max_compress = 2 * s->epsilon * s->nr_elems;
264
0
    if (s->nr_elems < 2) {
265
0
        return;
266
0
    }
267
268
0
    prev = s->head.prev;
269
0
    cur = prev->prev;
270
271
0
    while (cur != &s->head) {
272
0
        tcur = list_to_tuple(cur);
273
0
        tprev = list_to_tuple(prev);
274
275
0
        b_plus_1 = band(s, tprev->delta);
276
0
        bi = band(s, tcur->delta);
277
278
0
        if (bi <= b_plus_1 && ((tcur->g + tprev->g + tprev->delta) <= max_compress)) {
279
0
            tprev->g += tcur->g;
280
0
            list_del(cur);
281
0
            gkc_free(s, tcur);
282
0
            cur = prev->prev;
283
0
            continue;
284
0
        }
285
0
        prev = cur;
286
0
        cur = cur->prev;
287
0
    }
288
0
}
289
290
void gkc_insert_value(struct gkc_summary *s, double value)
291
0
{
292
0
    struct list *cur = NULL;
293
0
    struct gkc_tuple *new, *tcur, *tnext = NULL;
294
295
0
    new = gkc_alloc(s);
296
0
    memset(new, 0, sizeof(*new));
297
0
    new->value = value;
298
0
    new->g = 1;
299
0
    list_init(&new->node);
300
301
302
0
    s->nr_elems++;
303
304
305
    /* first insert */
306
0
    if (list_empty(&s->head)) {
307
0
        list_add(&s->head, &new->node);
308
0
        return;
309
0
    }
310
311
0
    cur = s->head.next;
312
0
    tcur = list_to_tuple(cur);
313
    /* v < v0, new min */
314
0
    if (tcur->value > new->value) {
315
0
        list_add(&s->head, &new->node);
316
0
        goto out;
317
0
    }
318
319
0
    double gi = 0;
320
0
    while (cur->next != &s->head) {
321
0
        tnext = list_to_tuple(cur->next);
322
0
        tcur = list_to_tuple(cur);
323
324
0
        gi += tcur->g;
325
0
        if (tcur->value <= new->value && new->value < tnext->value) {
326
            /*     INSERT "(v, 1, Δ)" into S between vi and vi+1; */
327
0
            new->delta = tcur->g + tcur->delta - 1;
328
0
            list_add(cur, &new->node);
329
0
            goto out;
330
0
        }
331
0
        cur = cur->next;
332
0
    }
333
    /* v > vs-1, new max */
334
0
    list_add_tail(&s->head, &new->node);
335
0
out:
336
0
    if (s->nr_elems % (int)(1/(2*s->epsilon))) {
337
0
        gkc_compress(s);
338
0
    }
339
0
}
340
341
void gkc_print_summary(struct gkc_summary *s)
342
0
{
343
0
    struct gkc_tuple *tcur;
344
0
    struct list *cur;
345
346
0
    fprintf(stderr, "nr_elems: %zu, epsilon: %.02f, alloced: %" PRIu64 ", overfilled: %.02f, max_alloced: %" PRIu64 "\n",
347
0
            s->nr_elems, s->epsilon, s->alloced, 2 * s->epsilon * s->nr_elems, s->max_alloced);
348
0
    if (list_empty(&s->head)) {
349
0
        fprintf(stderr, "Empty summary\n");
350
0
        return;
351
0
    }
352
353
0
    cur = s->head.next;
354
0
    while (cur != &s->head) {
355
0
        tcur = list_to_tuple(cur);
356
0
        fprintf(stderr, "(v: %" PRIu64 ", g: %.02f, d: %" PRIu64 ") ", tcur->value, tcur->g, tcur->delta);
357
0
        cur = cur->next;
358
0
    }
359
0
    fprintf(stderr, "\n");
360
0
}
361
362
struct gkc_summary *gkc_combine(struct gkc_summary *s1, struct gkc_summary *s2)
363
0
{
364
0
    struct gkc_summary *snew;
365
0
    struct list *cur1, *cur2;
366
0
    struct gkc_tuple *tcur1, *tcur2, *tnew;
367
368
0
    if (s1->epsilon != s2->epsilon) {
369
0
        return NULL;
370
0
    }
371
0
    snew = gkc_summary_alloc(s1->epsilon);
372
373
0
    cur1 = s1->head.next;
374
0
    cur2 = s2->head.next;
375
0
    while (cur1 != &s1->head && cur2 != &s2->head) {
376
0
        tcur1 = list_to_tuple(cur1);
377
0
        tcur2 = list_to_tuple(cur2);
378
379
0
        tnew = gkc_alloc(snew);
380
0
        if (tcur1->value < tcur2->value) {
381
0
            tnew->value = tcur1->value;
382
0
            tnew->g = tcur1->g;
383
0
            tnew->delta = tcur1->delta;
384
0
            cur1 = cur1->next;
385
0
        } else {
386
0
            tnew->value = tcur2->value;
387
0
            tnew->g = tcur2->g;
388
0
            tnew->delta = tcur2->delta;
389
0
            cur2 = cur2->next;
390
0
        }
391
0
        list_add_tail(&snew->head, &tnew->node);
392
0
        snew->nr_elems += tnew->g;
393
0
    }
394
0
    while (cur1 != &s1->head) {
395
0
        tcur1 = list_to_tuple(cur1);
396
397
0
        tnew = gkc_alloc(snew);
398
0
        tnew->value = tcur1->value;
399
0
        tnew->g = tcur1->g;
400
0
        tnew->delta = tcur1->delta;
401
0
        list_add_tail(&snew->head, &tnew->node);
402
0
        snew->nr_elems += tnew->g;
403
0
        cur1 = cur1->next;
404
0
    }
405
0
    while (cur2 != &s2->head) {
406
0
        tcur2 = list_to_tuple(cur2);
407
408
0
        tnew = gkc_alloc(snew);
409
0
        tnew->value = tcur2->value;
410
0
        tnew->g = tcur2->g;
411
0
        tnew->delta = tcur2->delta;
412
0
        list_add_tail(&snew->head, &tnew->node);
413
0
        snew->nr_elems += tnew->g;
414
0
        cur2 = cur2->next;
415
0
    }
416
0
    snew->max_alloced = snew->alloced;
417
0
    gkc_compress(snew);
418
419
0
    return snew;
420
0
}