Coverage Report

Created: 2025-07-01 06:50

/src/openvswitch/lib/cmap.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2014, 2016 Nicira, Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
#include <config.h>
18
#include "cmap.h"
19
#include "coverage.h"
20
#include "bitmap.h"
21
#include "hash.h"
22
#include "ovs-rcu.h"
23
#include "random.h"
24
#include "util.h"
25
26
COVERAGE_DEFINE(cmap_expand);
27
COVERAGE_DEFINE(cmap_shrink);
28
29
/* Optimistic Concurrent Cuckoo Hash
30
 * =================================
31
 *
32
 * A "cuckoo hash" is an open addressing hash table schema, designed such that
33
 * a given element can be in one of only a small number of buckets 'd', each of
34
 * which holds up to a small number 'k' elements.  Thus, the expected and
35
 * worst-case lookup times are O(1) because they require comparing no more than
36
 * a fixed number of elements (k * d).  Inserting a new element can require
37
 * moving around existing elements, but it is also O(1) amortized expected
38
 * time.
39
 *
40
 * An optimistic concurrent hash table goes one step further, making it
41
 * possible for a single writer to execute concurrently with any number of
42
 * readers without requiring the readers to take any locks.
43
 *
44
 * This cuckoo hash implementation uses:
45
 *
46
 *    - Two hash functions (d=2).  More hash functions allow for a higher load
47
 *      factor, but increasing 'k' is easier and the benefits of increasing 'd'
48
 *      quickly fall off with the 'k' values used here.  Also, the method of
49
 *      generating hashes used in this implementation is hard to reasonably
50
 *      extend beyond d=2.  Finally, each additional hash function means that a
51
 *      lookup has to look at least one extra cache line.
52
 *
53
 *    - 5 or 7 elements per bucket (k=5 or k=7), chosen to make buckets
54
 *      exactly one cache line in size.
55
 *
56
 * According to Erlingsson [4], these parameters suggest a maximum load factor
57
 * of about 93%.  The current implementation is conservative, expanding the
58
 * hash table when it is over 85% full.
59
 *
60
 * When the load factor is below 20%, the hash table will be shrinked by half.
61
 * This is to reduce the memory utilization of the hash table and to avoid
62
 * the hash table occupying the top of heap chunk which prevents the trimming
63
 * of heap.
64
 *
65
 * Hash Functions
66
 * ==============
67
 *
68
 * A cuckoo hash requires multiple hash functions.  When reorganizing the hash
69
 * becomes too difficult, it also requires the ability to change the hash
70
 * functions.  Requiring the client to provide multiple hashes and to be able
71
 * to change them to new hashes upon insertion is inconvenient.
72
 *
73
 * This implementation takes another approach.  The client provides a single,
74
 * fixed hash.  The cuckoo hash internally "rehashes" this hash against a
75
 * randomly selected basis value (see rehash()).  This rehashed value is one of
76
 * the two hashes.  The other hash is computed by 16-bit circular rotation of
77
 * the rehashed value.  Updating the basis changes the hash functions.
78
 *
79
 * To work properly, the hash functions used by a cuckoo hash must be
80
 * independent.  If one hash function is a function of the other (e.g. h2(x) =
81
 * h1(x) + 1, or h2(x) = hash(h1(x))), then insertion will eventually fail
82
 * catastrophically (loop forever) because of collisions.  With this rehashing
83
 * technique, the two hashes are completely independent for masks up to 16 bits
84
 * wide.  For masks wider than 16 bits, only 32-n bits are independent between
85
 * the two hashes.  Thus, it becomes risky to grow a cuckoo hash table beyond
86
 * about 2**24 buckets (about 71 million elements with k=5 and maximum load
87
 * 85%).  Fortunately, Open vSwitch does not normally deal with hash tables
88
 * this large.
89
 *
90
 *
91
 * Handling Duplicates
92
 * ===================
93
 *
94
 * This cuckoo hash table implementation deals with duplicate client-provided
95
 * hash values by chaining: the second and subsequent cmap_nodes with a given
96
 * hash are chained off the initially inserted node's 'next' member.  The hash
97
 * table maintains the invariant that a single client-provided hash value
98
 * exists in only a single chain in a single bucket (even though that hash
99
 * could be stored in two buckets).
100
 *
101
 *
102
 * References
103
 * ==========
104
 *
105
 * [1] D. Zhou, B. Fan, H. Lim, M. Kaminsky, D. G. Andersen, "Scalable, High
106
 *     Performance Ethernet Forwarding with CuckooSwitch".  In Proc. 9th
107
 *     CoNEXT, Dec. 2013.
108
 *
109
 * [2] B. Fan, D. G. Andersen, and M. Kaminsky. "MemC3: Compact and concurrent
110
 *     memcache with dumber caching and smarter hashing".  In Proc. 10th USENIX
111
 *     NSDI, Apr. 2013
112
 *
113
 * [3] R. Pagh and F. Rodler. "Cuckoo hashing". Journal of Algorithms, 51(2):
114
 *     122-144, May 2004.
115
 *
116
 * [4] U. Erlingsson, M. Manasse, F. McSherry, "A Cool and Practical
117
 *     Alternative to Traditional Hash Tables".  In Proc. 7th Workshop on
118
 *     Distributed Data and Structures (WDAS'06), 2006.
119
 */
120
/* An entry is an int and a pointer: 8 bytes on 32-bit, 12 bytes on 64-bit. */
121
0
#define CMAP_ENTRY_SIZE (4 + (UINTPTR_MAX == UINT32_MAX ? 4 : 8))
122
123
/* Number of entries per bucket: 7 on 32-bit, 5 on 64-bit for 64B cacheline. */
124
0
#define CMAP_K ((CACHE_LINE_SIZE - 4) / CMAP_ENTRY_SIZE)
125
126
/* A cuckoo hash bucket.  Designed to be cache-aligned and exactly one cache
127
 * line long. */
128
struct cmap_bucket {
129
    /* Padding to make cmap_bucket exactly one cache line long. */
130
    PADDED_MEMBERS(CACHE_LINE_SIZE,
131
        /* Allows readers to track in-progress changes.  Initially zero, each
132
         * writer increments this value just before and just after each change
133
         * (see cmap_set_bucket()).  Thus, a reader can ensure that it gets a
134
         * consistent snapshot by waiting for the counter to become even (see
135
         * read_even_counter()), then checking that its value does not change
136
         * while examining the bucket (see cmap_find()). */
137
        atomic_uint32_t counter;
138
139
        /* (hash, node) slots.  They are parallel arrays instead of an array of
140
         * structs to reduce the amount of space lost to padding.
141
         *
142
         * The slots are in no particular order.  A null pointer indicates that
143
         * a pair is unused.  In-use slots are not necessarily in the earliest
144
         * slots. */
145
        uint32_t hashes[CMAP_K];
146
        struct cmap_node nodes[CMAP_K];
147
    );
148
};
149
BUILD_ASSERT_DECL(sizeof(struct cmap_bucket) == CACHE_LINE_SIZE);
150
151
/* Default maximum load factor (as a fraction of UINT32_MAX + 1) before
152
 * enlarging a cmap.  Reasonable values lie between about 75% and 93%.  Smaller
153
 * values waste memory; larger values increase the average insertion time. */
154
0
#define CMAP_MAX_LOAD ((uint32_t) (UINT32_MAX * .85))
155
156
/* Default minimum load factor (as a fraction of UINT32_MAX + 1) before
157
 * shrinking a cmap.  Currently, the value is chosen to be 20%, this
158
 * means cmap will have a 40% load factor after shrink. */
159
0
#define CMAP_MIN_LOAD ((uint32_t) (UINT32_MAX * .20))
160
161
/* The implementation of a concurrent hash map. */
162
struct cmap_impl {
163
    PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
164
        unsigned int n;             /* Number of in-use elements. */
165
        unsigned int max_n;         /* Max elements before enlarging. */
166
        unsigned int min_n;         /* Min elements before shrinking. */
167
        uint32_t mask;              /* Number of 'buckets', minus one. */
168
        uint32_t basis;             /* Basis for rehashing client's
169
                                       hash values. */
170
    );
171
172
    PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
173
        struct cmap_bucket buckets[1];
174
    );
175
};
176
BUILD_ASSERT_DECL(sizeof(struct cmap_impl) == CACHE_LINE_SIZE * 2);
177
178
/* An empty cmap. */
179
OVS_ALIGNED_VAR(CACHE_LINE_SIZE) const struct cmap_impl empty_cmap;
180
181
static struct cmap_impl *cmap_rehash(struct cmap *, uint32_t mask);
182
183
/* Explicit inline keywords in utility functions seem to be necessary
184
 * to prevent performance regression on cmap_find(). */
185
186
/* Given a rehashed value 'hash', returns the other hash for that rehashed
187
 * value.  This is symmetric: other_hash(other_hash(x)) == x.  (See also "Hash
188
 * Functions" at the top of this file.) */
189
static inline uint32_t
190
other_hash(uint32_t hash)
191
0
{
192
0
    return (hash << 16) | (hash >> 16);
193
0
}
194
195
/* Returns the rehashed value for 'hash' within 'impl'.  (See also "Hash
196
 * Functions" at the top of this file.) */
197
static inline uint32_t
198
rehash(const struct cmap_impl *impl, uint32_t hash)
199
0
{
200
0
    return hash_finish(impl->basis, hash);
201
0
}
202
203
/* Not always without the inline keyword. */
204
static inline struct cmap_impl *
205
cmap_get_impl(const struct cmap *cmap)
206
0
{
207
0
    return ovsrcu_get(struct cmap_impl *, &cmap->impl);
208
0
}
209
210
static uint32_t
211
calc_max_n(uint32_t mask)
212
0
{
213
0
    return ((uint64_t) (mask + 1) * CMAP_K * CMAP_MAX_LOAD) >> 32;
214
0
}
215
216
static uint32_t
217
calc_min_n(uint32_t mask)
218
0
{
219
0
    return ((uint64_t) (mask + 1) * CMAP_K * CMAP_MIN_LOAD) >> 32;
220
0
}
221
222
static struct cmap_impl *
223
cmap_impl_create(uint32_t mask)
224
0
{
225
0
    struct cmap_impl *impl;
226
227
0
    ovs_assert(is_pow2(mask + 1));
228
229
    /* There are 'mask + 1' buckets but struct cmap_impl has one bucket built
230
     * in, so we only need to add space for the extra 'mask' buckets. */
231
0
    impl = xzalloc_cacheline(sizeof *impl + mask * sizeof *impl->buckets);
232
0
    impl->n = 0;
233
0
    impl->max_n = calc_max_n(mask);
234
0
    impl->min_n = calc_min_n(mask);
235
0
    impl->mask = mask;
236
0
    impl->basis = random_uint32();
237
238
0
    return impl;
239
0
}
240
241
/* Initializes 'cmap' as an empty concurrent hash map. */
242
void
243
cmap_init(struct cmap *cmap)
244
0
{
245
0
    ovsrcu_set(&cmap->impl, CONST_CAST(struct cmap_impl *, &empty_cmap));
246
0
}
247
248
/* Destroys 'cmap'.
249
 *
250
 * The client is responsible for destroying any data previously held in
251
 * 'cmap'. */
252
void
253
cmap_destroy(struct cmap *cmap)
254
0
{
255
0
    if (cmap) {
256
0
        struct cmap_impl *impl = cmap_get_impl(cmap);
257
0
        if (impl != &empty_cmap) {
258
0
            ovsrcu_postpone(free_cacheline, impl);
259
0
        }
260
0
    }
261
0
}
262
263
/* Returns the number of elements in 'cmap'. */
264
size_t
265
cmap_count(const struct cmap *cmap)
266
0
{
267
0
    return cmap_get_impl(cmap)->n;
268
0
}
269
270
/* Returns true if 'cmap' is empty, false otherwise. */
271
bool
272
cmap_is_empty(const struct cmap *cmap)
273
0
{
274
0
    return cmap_count(cmap) == 0;
275
0
}
276
277
static inline uint32_t
278
read_counter(const struct cmap_bucket *bucket_)
279
0
{
280
0
    struct cmap_bucket *bucket = CONST_CAST(struct cmap_bucket *, bucket_);
281
0
    uint32_t counter;
282
283
0
    atomic_read_explicit(&bucket->counter, &counter, memory_order_acquire);
284
285
0
    return counter;
286
0
}
287
288
static inline uint32_t
289
read_even_counter(const struct cmap_bucket *bucket)
290
0
{
291
0
    uint32_t counter;
292
293
0
    do {
294
0
        counter = read_counter(bucket);
295
0
    } while (OVS_UNLIKELY(counter & 1));
296
297
0
    return counter;
298
0
}
299
300
static inline bool
301
counter_changed(const struct cmap_bucket *b_, uint32_t c)
302
0
{
303
0
    struct cmap_bucket *b = CONST_CAST(struct cmap_bucket *, b_);
304
0
    uint32_t counter;
305
306
    /* Need to make sure the counter read is not moved up, before the hash and
307
     * cmap_node_next().  Using atomic_read_explicit with memory_order_acquire
308
     * would allow prior reads to be moved after the barrier.
309
     * atomic_thread_fence prevents all following memory accesses from moving
310
     * prior to preceding loads. */
311
0
    atomic_thread_fence(memory_order_acquire);
312
0
    atomic_read_relaxed(&b->counter, &counter);
313
314
0
    return OVS_UNLIKELY(counter != c);
315
0
}
316
317
static inline const struct cmap_node *
318
cmap_find_in_bucket(const struct cmap_bucket *bucket, uint32_t hash)
319
0
{
320
0
    for (int i = 0; i < CMAP_K; i++) {
321
0
        if (bucket->hashes[i] == hash) {
322
0
            return cmap_node_next(&bucket->nodes[i]);
323
0
        }
324
0
    }
325
0
    return NULL;
326
0
}
327
328
static inline const struct cmap_node *
329
cmap_find__(const struct cmap_bucket *b1, const struct cmap_bucket *b2,
330
            uint32_t hash)
331
0
{
332
0
    uint32_t c1, c2;
333
0
    const struct cmap_node *node;
334
335
0
    do {
336
0
        do {
337
0
            c1 = read_even_counter(b1);
338
0
            node = cmap_find_in_bucket(b1, hash);
339
0
        } while (OVS_UNLIKELY(counter_changed(b1, c1)));
340
0
        if (node) {
341
0
            break;
342
0
        }
343
0
        do {
344
0
            c2 = read_even_counter(b2);
345
0
            node = cmap_find_in_bucket(b2, hash);
346
0
        } while (OVS_UNLIKELY(counter_changed(b2, c2)));
347
0
        if (node) {
348
0
            break;
349
0
        }
350
0
    } while (OVS_UNLIKELY(counter_changed(b1, c1)));
351
352
0
    return node;
353
0
}
354
355
/* Searches 'cmap' for an element with the specified 'hash'.  If one or more is
356
 * found, returns a pointer to the first one, otherwise a null pointer.  All of
357
 * the nodes on the returned list are guaranteed to have exactly the given
358
 * 'hash'.
359
 *
360
 * This function works even if 'cmap' is changing concurrently.  If 'cmap' is
361
 * not changing, then cmap_find_protected() is slightly faster.
362
 *
363
 * CMAP_FOR_EACH_WITH_HASH is usually more convenient. */
364
const struct cmap_node *
365
cmap_find(const struct cmap *cmap, uint32_t hash)
366
0
{
367
0
    const struct cmap_impl *impl = cmap_get_impl(cmap);
368
0
    uint32_t h1 = rehash(impl, hash);
369
0
    uint32_t h2 = other_hash(h1);
370
371
0
    return cmap_find__(&impl->buckets[h1 & impl->mask],
372
0
                       &impl->buckets[h2 & impl->mask],
373
0
                       hash);
374
0
}
375
376
/* Find a node by the index of the entry of cmap. Index N means the N/CMAP_K
377
 * bucket and N%CMAP_K entry in that bucket.
378
 * Notice that it is not protected by the optimistic lock (versioning) because
379
 * it does not compare the hashes. Currently it is only used by the datapath
380
 * SMC cache.
381
 *
382
 * Return node for the entry of index or NULL if the index beyond boundary */
383
const struct cmap_node *
384
cmap_find_by_index(const struct cmap *cmap, uint32_t index)
385
0
{
386
0
    const struct cmap_impl *impl = cmap_get_impl(cmap);
387
388
0
    uint32_t b = index / CMAP_K;
389
0
    uint32_t e = index % CMAP_K;
390
391
0
    if (b > impl->mask) {
392
0
        return NULL;
393
0
    }
394
395
0
    const struct cmap_bucket *bucket = &impl->buckets[b];
396
397
0
    return cmap_node_next(&bucket->nodes[e]);
398
0
}
399
400
/* Find the index of certain hash value. Currently only used by the datapath
401
 * SMC cache.
402
 *
403
 * Return the index of the entry if found, or UINT32_MAX if not found. The
404
 * function assumes entry index cannot be larger than UINT32_MAX. */
405
uint32_t
406
cmap_find_index(const struct cmap *cmap, uint32_t hash)
407
0
{
408
0
    const struct cmap_impl *impl = cmap_get_impl(cmap);
409
0
    uint32_t h1 = rehash(impl, hash);
410
0
    uint32_t h2 = other_hash(h1);
411
412
0
    uint32_t b_index1 = h1 & impl->mask;
413
0
    uint32_t b_index2 = h2 & impl->mask;
414
415
0
    uint32_t c1, c2;
416
0
    uint32_t index = UINT32_MAX;
417
418
0
    const struct cmap_bucket *b1 = &impl->buckets[b_index1];
419
0
    const struct cmap_bucket *b2 = &impl->buckets[b_index2];
420
421
0
    do {
422
0
        do {
423
0
            c1 = read_even_counter(b1);
424
0
            for (int i = 0; i < CMAP_K; i++) {
425
0
                if (b1->hashes[i] == hash) {
426
0
                    index = b_index1 * CMAP_K + i;
427
0
                 }
428
0
            }
429
0
        } while (OVS_UNLIKELY(counter_changed(b1, c1)));
430
0
        if (index != UINT32_MAX) {
431
0
            break;
432
0
        }
433
0
        do {
434
0
            c2 = read_even_counter(b2);
435
0
            for (int i = 0; i < CMAP_K; i++) {
436
0
                if (b2->hashes[i] == hash) {
437
0
                    index = b_index2 * CMAP_K + i;
438
0
                }
439
0
            }
440
0
        } while (OVS_UNLIKELY(counter_changed(b2, c2)));
441
442
0
        if (index != UINT32_MAX) {
443
0
            break;
444
0
        }
445
0
    } while (OVS_UNLIKELY(counter_changed(b1, c1)));
446
447
0
    return index;
448
0
}
449
450
/* Looks up multiple 'hashes', when the corresponding bit in 'map' is 1,
451
 * and sets the corresponding pointer in 'nodes', if the hash value was
452
 * found from the 'cmap'.  In other cases the 'nodes' values are not changed,
453
 * i.e., no NULL pointers are stored there.
454
 * Returns a map where a bit is set to 1 if the corresponding 'nodes' pointer
455
 * was stored, 0 otherwise.
456
 * Generally, the caller wants to use CMAP_NODE_FOR_EACH to verify for
457
 * hash collisions. */
458
unsigned long
459
cmap_find_batch(const struct cmap *cmap, unsigned long map,
460
                uint32_t hashes[], const struct cmap_node *nodes[])
461
0
{
462
0
    const struct cmap_impl *impl = cmap_get_impl(cmap);
463
0
    unsigned long result = map;
464
0
    int i;
465
0
    uint32_t h1s[sizeof map * CHAR_BIT];
466
0
    const struct cmap_bucket *b1s[sizeof map * CHAR_BIT];
467
0
    const struct cmap_bucket *b2s[sizeof map * CHAR_BIT];
468
0
    uint32_t c1s[sizeof map * CHAR_BIT];
469
470
    /* Compute hashes and prefetch 1st buckets. */
471
0
    ULLONG_FOR_EACH_1(i, map) {
472
0
        h1s[i] = rehash(impl, hashes[i]);
473
0
        b1s[i] = &impl->buckets[h1s[i] & impl->mask];
474
0
        OVS_PREFETCH(b1s[i]);
475
0
    }
476
    /* Lookups, Round 1. Only look up at the first bucket. */
477
0
    ULLONG_FOR_EACH_1(i, map) {
478
0
        uint32_t c1;
479
0
        const struct cmap_bucket *b1 = b1s[i];
480
0
        const struct cmap_node *node;
481
482
0
        do {
483
0
            c1 = read_even_counter(b1);
484
0
            node = cmap_find_in_bucket(b1, hashes[i]);
485
0
        } while (OVS_UNLIKELY(counter_changed(b1, c1)));
486
487
0
        if (!node) {
488
            /* Not found (yet); Prefetch the 2nd bucket. */
489
0
            b2s[i] = &impl->buckets[other_hash(h1s[i]) & impl->mask];
490
0
            OVS_PREFETCH(b2s[i]);
491
0
            c1s[i] = c1; /* We may need to check this after Round 2. */
492
0
            continue;
493
0
        }
494
        /* Found. */
495
0
        ULLONG_SET0(map, i); /* Ignore this on round 2. */
496
0
        OVS_PREFETCH(node);
497
0
        nodes[i] = node;
498
0
    }
499
    /* Round 2. Look into the 2nd bucket, if needed. */
500
0
    ULLONG_FOR_EACH_1(i, map) {
501
0
        uint32_t c2;
502
0
        const struct cmap_bucket *b2 = b2s[i];
503
0
        const struct cmap_node *node;
504
505
0
        do {
506
0
            c2 = read_even_counter(b2);
507
0
            node = cmap_find_in_bucket(b2, hashes[i]);
508
0
        } while (OVS_UNLIKELY(counter_changed(b2, c2)));
509
510
0
        if (!node) {
511
            /* Not found, but the node may have been moved from b2 to b1 right
512
             * after we finished with b1 earlier.  We just got a clean reading
513
             * of the 2nd bucket, so we check the counter of the 1st bucket
514
             * only.  However, we need to check both buckets again, as the
515
             * entry may be moved again to the 2nd bucket.  Basically, we
516
             * need to loop as long as it takes to get stable readings of
517
             * both buckets.  cmap_find__() does that, and now that we have
518
             * fetched both buckets we can just use it. */
519
0
            if (OVS_UNLIKELY(counter_changed(b1s[i], c1s[i]))) {
520
0
                node = cmap_find__(b1s[i], b2s[i], hashes[i]);
521
0
                if (node) {
522
0
                    goto found;
523
0
                }
524
0
            }
525
            /* Not found. */
526
0
            ULLONG_SET0(result, i); /* Fix the result. */
527
0
            continue;
528
0
        }
529
0
found:
530
0
        OVS_PREFETCH(node);
531
0
        nodes[i] = node;
532
0
    }
533
0
    return result;
534
0
}
535
536
static int
537
cmap_find_slot_protected(struct cmap_bucket *b, uint32_t hash)
538
0
{
539
0
    int i;
540
541
0
    for (i = 0; i < CMAP_K; i++) {
542
0
        if (b->hashes[i] == hash && cmap_node_next_protected(&b->nodes[i])) {
543
0
            return i;
544
0
        }
545
0
    }
546
0
    return -1;
547
0
}
548
549
static struct cmap_node *
550
cmap_find_bucket_protected(struct cmap_impl *impl, uint32_t hash, uint32_t h)
551
0
{
552
0
    struct cmap_bucket *b = &impl->buckets[h & impl->mask];
553
0
    int i;
554
555
0
    for (i = 0; i < CMAP_K; i++) {
556
0
        if (b->hashes[i] == hash) {
557
0
            return cmap_node_next_protected(&b->nodes[i]);
558
0
        }
559
0
    }
560
0
    return NULL;
561
0
}
562
563
/* Like cmap_find(), but only for use if 'cmap' cannot change concurrently.
564
 *
565
 * CMAP_FOR_EACH_WITH_HASH_PROTECTED is usually more convenient. */
566
struct cmap_node *
567
cmap_find_protected(const struct cmap *cmap, uint32_t hash)
568
0
{
569
0
    struct cmap_impl *impl = cmap_get_impl(cmap);
570
0
    uint32_t h1 = rehash(impl, hash);
571
0
    uint32_t h2 = other_hash(h1);
572
0
    struct cmap_node *node;
573
574
0
    node = cmap_find_bucket_protected(impl, hash, h1);
575
0
    if (node) {
576
0
        return node;
577
0
    }
578
0
    return cmap_find_bucket_protected(impl, hash, h2);
579
0
}
580
581
static int
582
cmap_find_empty_slot_protected(const struct cmap_bucket *b)
583
0
{
584
0
    int i;
585
586
0
    for (i = 0; i < CMAP_K; i++) {
587
0
        if (!cmap_node_next_protected(&b->nodes[i])) {
588
0
            return i;
589
0
        }
590
0
    }
591
0
    return -1;
592
0
}
593
594
static void
595
cmap_set_bucket(struct cmap_bucket *b, int i,
596
                struct cmap_node *node, uint32_t hash)
597
0
{
598
0
    uint32_t c;
599
600
0
    atomic_read_explicit(&b->counter, &c, memory_order_acquire);
601
0
    atomic_store_explicit(&b->counter, c + 1, memory_order_relaxed);
602
    /* Need to make sure setting hash is not moved up before counter update. */
603
0
    atomic_thread_fence(memory_order_release);
604
0
    ovsrcu_set(&b->nodes[i].next, node); /* Also atomic. */
605
0
    b->hashes[i] = hash;
606
0
    atomic_store_explicit(&b->counter, c + 2, memory_order_release);
607
0
}
608
609
/* Searches 'b' for a node with the given 'hash'.  If it finds one, adds
610
 * 'new_node' to the node's linked list and returns true.  If it does not find
611
 * one, returns false. */
612
static bool
613
cmap_insert_dup(struct cmap_node *new_node, uint32_t hash,
614
                struct cmap_bucket *b)
615
0
{
616
0
    int i;
617
618
0
    for (i = 0; i < CMAP_K; i++) {
619
0
        if (b->hashes[i] == hash) {
620
0
            struct cmap_node *node = cmap_node_next_protected(&b->nodes[i]);
621
622
0
            if (node) {
623
0
                struct cmap_node *p;
624
625
                /* The common case is that 'new_node' is a singleton,
626
                 * with a null 'next' pointer.  Rehashing can add a
627
                 * longer chain, but due to our invariant of always
628
                 * having all nodes with the same (user) hash value at
629
                 * a single chain, rehashing will always insert the
630
                 * chain to an empty node.  The only way we can end up
631
                 * here is by the user inserting a chain of nodes at
632
                 * once.  Find the end of the chain starting at
633
                 * 'new_node', then splice 'node' to the end of that
634
                 * chain. */
635
0
                p = new_node;
636
0
                for (;;) {
637
0
                    struct cmap_node *next = cmap_node_next_protected(p);
638
639
0
                    if (!next) {
640
0
                        break;
641
0
                    }
642
0
                    p = next;
643
0
                }
644
0
                ovsrcu_set_hidden(&p->next, node);
645
0
            } else {
646
                /* The hash value is there from some previous insertion, but
647
                 * the associated node has been removed.  We're not really
648
                 * inserting a duplicate, but we can still reuse the slot.
649
                 * Carry on. */
650
0
            }
651
652
            /* Change the bucket to point to 'new_node'.  This is a degenerate
653
             * form of cmap_set_bucket() that doesn't update the counter since
654
             * we're only touching one field and in a way that doesn't change
655
             * the bucket's meaning for readers. */
656
0
            ovsrcu_set(&b->nodes[i].next, new_node);
657
658
0
            return true;
659
0
        }
660
0
    }
661
0
    return false;
662
0
}
663
664
/* Searches 'b' for an empty slot.  If successful, stores 'node' and 'hash' in
665
 * the slot and returns true.  Otherwise, returns false. */
666
static bool
667
cmap_insert_bucket(struct cmap_node *node, uint32_t hash,
668
                   struct cmap_bucket *b)
669
0
{
670
0
    int i;
671
672
0
    for (i = 0; i < CMAP_K; i++) {
673
0
        if (!cmap_node_next_protected(&b->nodes[i])) {
674
0
            cmap_set_bucket(b, i, node, hash);
675
0
            return true;
676
0
        }
677
0
    }
678
0
    return false;
679
0
}
680
681
/* Returns the other bucket that b->nodes[slot] could occupy in 'impl'.  (This
682
 * might be the same as 'b'.) */
683
static struct cmap_bucket *
684
other_bucket_protected(struct cmap_impl *impl, struct cmap_bucket *b, int slot)
685
0
{
686
0
    uint32_t h1 = rehash(impl, b->hashes[slot]);
687
0
    uint32_t h2 = other_hash(h1);
688
0
    uint32_t b_idx = b - impl->buckets;
689
0
    uint32_t other_h = (h1 & impl->mask) == b_idx ? h2 : h1;
690
691
0
    return &impl->buckets[other_h & impl->mask];
692
0
}
693
694
/* 'new_node' is to be inserted into 'impl', but both candidate buckets 'b1'
695
 * and 'b2' are full.  This function attempts to rearrange buckets within
696
 * 'impl' to make room for 'new_node'.
697
 *
698
 * The implementation is a general-purpose breadth-first search.  At first
699
 * glance, this is more complex than a random walk through 'impl' (suggested by
700
 * some references), but random walks have a tendency to loop back through a
701
 * single bucket.  We have to move nodes backward along the path that we find,
702
 * so that no node actually disappears from the hash table, which means a
703
 * random walk would have to be careful to deal with loops.  By contrast, a
704
 * successful breadth-first search always finds a *shortest* path through the
705
 * hash table, and a shortest path will never contain loops, so it avoids that
706
 * problem entirely.
707
 */
708
static bool
709
cmap_insert_bfs(struct cmap_impl *impl, struct cmap_node *new_node,
710
                uint32_t hash, struct cmap_bucket *b1, struct cmap_bucket *b2)
711
0
{
712
0
    enum { MAX_DEPTH = 4 };
713
714
    /* A path from 'start' to 'end' via the 'n' steps in 'slots[]'.
715
     *
716
     * One can follow the path via:
717
     *
718
     *     struct cmap_bucket *b;
719
     *     int i;
720
     *
721
     *     b = path->start;
722
     *     for (i = 0; i < path->n; i++) {
723
     *         b = other_bucket_protected(impl, b, path->slots[i]);
724
     *     }
725
     *     ovs_assert(b == path->end);
726
     */
727
0
    struct cmap_path {
728
0
        struct cmap_bucket *start; /* First bucket along the path. */
729
0
        struct cmap_bucket *end;   /* Last bucket on the path. */
730
0
        uint8_t slots[MAX_DEPTH];  /* Slots used for each hop. */
731
0
        int n;                     /* Number of slots[]. */
732
0
    };
733
734
    /* We need to limit the amount of work we do trying to find a path.  It
735
     * might actually be impossible to rearrange the cmap, and after some time
736
     * it is likely to be easier to rehash the entire cmap.
737
     *
738
     * This value of MAX_QUEUE is an arbitrary limit suggested by one of the
739
     * references.  Empirically, it seems to work OK. */
740
0
    enum { MAX_QUEUE = 500 };
741
0
    struct cmap_path queue[MAX_QUEUE];
742
0
    int head = 0;
743
0
    int tail = 0;
744
745
    /* Add 'b1' and 'b2' as starting points for the search. */
746
0
    queue[head].start = b1;
747
0
    queue[head].end = b1;
748
0
    queue[head].n = 0;
749
0
    head++;
750
0
    if (b1 != b2) {
751
0
        queue[head].start = b2;
752
0
        queue[head].end = b2;
753
0
        queue[head].n = 0;
754
0
        head++;
755
0
    }
756
757
0
    while (tail < head) {
758
0
        const struct cmap_path *path = &queue[tail++];
759
0
        struct cmap_bucket *this = path->end;
760
0
        int i;
761
762
0
        for (i = 0; i < CMAP_K; i++) {
763
0
            struct cmap_bucket *next = other_bucket_protected(impl, this, i);
764
0
            int j;
765
766
0
            if (this == next) {
767
0
                continue;
768
0
            }
769
770
0
            j = cmap_find_empty_slot_protected(next);
771
0
            if (j >= 0) {
772
                /* We've found a path along which we can rearrange the hash
773
                 * table:  Start at path->start, follow all the slots in
774
                 * path->slots[], then follow slot 'i', then the bucket you
775
                 * arrive at has slot 'j' empty. */
776
0
                struct cmap_bucket *buckets[MAX_DEPTH + 2];
777
0
                int slots[MAX_DEPTH + 2];
778
0
                int k;
779
780
                /* Figure out the full sequence of slots. */
781
0
                for (k = 0; k < path->n; k++) {
782
0
                    slots[k] = path->slots[k];
783
0
                }
784
0
                slots[path->n] = i;
785
0
                slots[path->n + 1] = j;
786
787
                /* Figure out the full sequence of buckets. */
788
0
                buckets[0] = path->start;
789
0
                for (k = 0; k <= path->n; k++) {
790
0
                    buckets[k + 1] = other_bucket_protected(impl, buckets[k], slots[k]);
791
0
                }
792
793
                /* Now the path is fully expressed.  One can start from
794
                 * buckets[0], go via slots[0] to buckets[1], via slots[1] to
795
                 * buckets[2], and so on.
796
                 *
797
                 * Move all the nodes across the path "backward".  After each
798
                 * step some node appears in two buckets.  Thus, every node is
799
                 * always visible to a concurrent search. */
800
0
                for (k = path->n + 1; k > 0; k--) {
801
0
                    int slot = slots[k - 1];
802
803
0
                    cmap_set_bucket(
804
0
                        buckets[k], slots[k],
805
0
                        cmap_node_next_protected(&buckets[k - 1]->nodes[slot]),
806
0
                        buckets[k - 1]->hashes[slot]);
807
0
                }
808
809
                /* Finally, replace the first node on the path by
810
                 * 'new_node'. */
811
0
                cmap_set_bucket(buckets[0], slots[0], new_node, hash);
812
813
0
                return true;
814
0
            }
815
816
0
            if (path->n < MAX_DEPTH && head < MAX_QUEUE) {
817
0
                struct cmap_path *new_path = &queue[head++];
818
819
0
                *new_path = *path;
820
0
                new_path->end = next;
821
0
                new_path->slots[new_path->n++] = i;
822
0
            }
823
0
        }
824
0
    }
825
826
0
    return false;
827
0
}
828
829
/* Adds 'node', with the given 'hash', to 'impl'.
830
 *
831
 * 'node' is ordinarily a single node, with a null 'next' pointer.  When
832
 * rehashing, however, it may be a longer chain of nodes. */
833
static bool
834
cmap_try_insert(struct cmap_impl *impl, struct cmap_node *node, uint32_t hash)
835
0
{
836
0
    uint32_t h1 = rehash(impl, hash);
837
0
    uint32_t h2 = other_hash(h1);
838
0
    struct cmap_bucket *b1 = &impl->buckets[h1 & impl->mask];
839
0
    struct cmap_bucket *b2 = &impl->buckets[h2 & impl->mask];
840
841
0
    return (OVS_UNLIKELY(cmap_insert_dup(node, hash, b1) ||
842
0
                         cmap_insert_dup(node, hash, b2)) ||
843
0
            OVS_LIKELY(cmap_insert_bucket(node, hash, b1) ||
844
0
                       cmap_insert_bucket(node, hash, b2)) ||
845
0
            cmap_insert_bfs(impl, node, hash, b1, b2));
846
0
}
847
848
/* Inserts 'node', with the given 'hash', into 'cmap'.  The caller must ensure
849
 * that 'cmap' cannot change concurrently (from another thread).  If duplicates
850
 * are undesirable, the caller must have already verified that 'cmap' does not
851
 * contain a duplicate of 'node'.
852
 *
853
 * Returns the current number of nodes in the cmap after the insertion. */
854
size_t
855
cmap_insert(struct cmap *cmap, struct cmap_node *node, uint32_t hash)
856
0
{
857
0
    struct cmap_impl *impl = cmap_get_impl(cmap);
858
859
0
    ovsrcu_set_hidden(&node->next, NULL);
860
861
0
    if (OVS_UNLIKELY(impl->n >= impl->max_n)) {
862
0
        COVERAGE_INC(cmap_expand);
863
0
        impl = cmap_rehash(cmap, (impl->mask << 1) | 1);
864
0
    }
865
866
0
    while (OVS_UNLIKELY(!cmap_try_insert(impl, node, hash))) {
867
0
        impl = cmap_rehash(cmap, impl->mask);
868
0
    }
869
0
    return ++impl->n;
870
0
}
871
872
static bool
873
cmap_replace__(struct cmap_impl *impl, struct cmap_node *node,
874
               struct cmap_node *replacement, uint32_t hash, uint32_t h)
875
0
{
876
0
    struct cmap_bucket *b = &impl->buckets[h & impl->mask];
877
0
    int slot;
878
879
0
    slot = cmap_find_slot_protected(b, hash);
880
0
    if (slot < 0) {
881
0
        return false;
882
0
    }
883
884
    /* The pointer to 'node' is changed to point to 'replacement',
885
     * which is the next node if no replacement node is given. */
886
0
    if (!replacement) {
887
0
        replacement = cmap_node_next_protected(node);
888
0
    } else {
889
        /* 'replacement' takes the position of 'node' in the list. */
890
0
        ovsrcu_set_hidden(&replacement->next, cmap_node_next_protected(node));
891
0
    }
892
893
0
    struct cmap_node *iter = &b->nodes[slot];
894
0
    for (;;) {
895
0
        struct cmap_node *next = cmap_node_next_protected(iter);
896
897
0
        if (next == node) {
898
0
            ovsrcu_set(&iter->next, replacement);
899
0
            return true;
900
0
        }
901
0
        iter = next;
902
0
    }
903
0
}
904
905
/* Replaces 'old_node' in 'cmap' with 'new_node'.  The caller must
906
 * ensure that 'cmap' cannot change concurrently (from another thread).
907
 *
908
 * 'old_node' must not be destroyed or modified or inserted back into 'cmap' or
909
 * into any other concurrent hash map while any other thread might be accessing
910
 * it.  One correct way to do this is to free it from an RCU callback with
911
 * ovsrcu_postpone().
912
 *
913
 * Returns the current number of nodes in the cmap after the replacement.  The
914
 * number of nodes decreases by one if 'new_node' is NULL. */
915
size_t
916
cmap_replace(struct cmap *cmap, struct cmap_node *old_node,
917
             struct cmap_node *new_node, uint32_t hash)
918
0
{
919
0
    struct cmap_impl *impl = cmap_get_impl(cmap);
920
0
    uint32_t h1 = rehash(impl, hash);
921
0
    uint32_t h2 = other_hash(h1);
922
923
0
    ovs_assert(cmap_replace__(impl, old_node, new_node, hash, h1) ||
924
0
               cmap_replace__(impl, old_node, new_node, hash, h2));
925
926
0
    if (!new_node) {
927
0
        impl->n--;
928
0
        if (OVS_UNLIKELY(impl->n < impl->min_n)) {
929
0
            COVERAGE_INC(cmap_shrink);
930
0
            impl = cmap_rehash(cmap, impl->mask >> 1);
931
0
        }
932
0
    }
933
0
    return impl->n;
934
0
}
935
936
static bool
937
cmap_try_rehash(const struct cmap_impl *old, struct cmap_impl *new)
938
0
{
939
0
    const struct cmap_bucket *b;
940
941
0
    for (b = old->buckets; b <= &old->buckets[old->mask]; b++) {
942
0
        int i;
943
944
0
        for (i = 0; i < CMAP_K; i++) {
945
            /* possible optimization here because we know the hashes are
946
             * unique */
947
0
            struct cmap_node *node = cmap_node_next_protected(&b->nodes[i]);
948
949
0
            if (node && !cmap_try_insert(new, node, b->hashes[i])) {
950
0
                return false;
951
0
            }
952
0
        }
953
0
    }
954
0
    return true;
955
0
}
956
957
static struct cmap_impl *
958
cmap_rehash(struct cmap *cmap, uint32_t mask)
959
0
{
960
0
    struct cmap_impl *old = cmap_get_impl(cmap);
961
0
    struct cmap_impl *new;
962
963
0
    new = cmap_impl_create(mask);
964
0
    ovs_assert(old->n < new->max_n);
965
966
0
    while (!cmap_try_rehash(old, new)) {
967
0
        memset(new->buckets, 0, (mask + 1) * sizeof *new->buckets);
968
0
        new->basis = random_uint32();
969
0
    }
970
971
0
    new->n = old->n;
972
0
    ovsrcu_set(&cmap->impl, new);
973
0
    if (old != &empty_cmap) {
974
0
        ovsrcu_postpone(free_cacheline, old);
975
0
    }
976
977
0
    return new;
978
0
}
979
980
struct cmap_cursor
981
cmap_cursor_start(const struct cmap *cmap)
982
0
{
983
0
    struct cmap_cursor cursor;
984
985
0
    cursor.impl = cmap_get_impl(cmap);
986
0
    cursor.bucket_idx = 0;
987
0
    cursor.entry_idx = 0;
988
0
    cursor.node = NULL;
989
0
    cmap_cursor_advance(&cursor);
990
991
0
    return cursor;
992
0
}
993
994
void
995
cmap_cursor_advance(struct cmap_cursor *cursor)
996
0
{
997
0
    const struct cmap_impl *impl = cursor->impl;
998
999
0
    if (cursor->node) {
1000
0
        cursor->node = cmap_node_next(cursor->node);
1001
0
        if (cursor->node) {
1002
0
            return;
1003
0
        }
1004
0
    }
1005
1006
0
    while (cursor->bucket_idx <= impl->mask) {
1007
0
        const struct cmap_bucket *b = &impl->buckets[cursor->bucket_idx];
1008
1009
0
        while (cursor->entry_idx < CMAP_K) {
1010
0
            cursor->node = cmap_node_next(&b->nodes[cursor->entry_idx++]);
1011
0
            if (cursor->node) {
1012
0
                return;
1013
0
            }
1014
0
        }
1015
1016
0
        cursor->bucket_idx++;
1017
0
        cursor->entry_idx = 0;
1018
0
    }
1019
0
}
1020
1021
/* Returns the next node in 'cmap' in hash order, or NULL if no nodes remain in
1022
 * 'cmap'.  Uses '*pos' to determine where to begin iteration, and updates
1023
 * '*pos' to pass on the next iteration into them before returning.
1024
 *
1025
 * It's better to use plain CMAP_FOR_EACH and related functions, since they are
1026
 * faster and better at dealing with cmaps that change during iteration.
1027
 *
1028
 * Before beginning iteration, set '*pos' to all zeros. */
1029
struct cmap_node *
1030
cmap_next_position(const struct cmap *cmap,
1031
                   struct cmap_position *pos)
1032
0
{
1033
0
    struct cmap_impl *impl = cmap_get_impl(cmap);
1034
0
    unsigned int bucket = pos->bucket;
1035
0
    unsigned int entry = pos->entry;
1036
0
    unsigned int offset = pos->offset;
1037
1038
0
    while (bucket <= impl->mask) {
1039
0
        const struct cmap_bucket *b = &impl->buckets[bucket];
1040
1041
0
        while (entry < CMAP_K) {
1042
0
            const struct cmap_node *node = cmap_node_next(&b->nodes[entry]);
1043
0
            unsigned int i;
1044
1045
0
            for (i = 0; node; i++, node = cmap_node_next(node)) {
1046
0
                if (i == offset) {
1047
0
                    if (cmap_node_next(node)) {
1048
0
                        offset++;
1049
0
                    } else {
1050
0
                        entry++;
1051
0
                        offset = 0;
1052
0
                    }
1053
0
                    pos->bucket = bucket;
1054
0
                    pos->entry = entry;
1055
0
                    pos->offset = offset;
1056
0
                    return CONST_CAST(struct cmap_node *, node);
1057
0
                }
1058
0
            }
1059
1060
0
            entry++;
1061
0
            offset = 0;
1062
0
        }
1063
1064
0
        bucket++;
1065
0
        entry = offset = 0;
1066
0
    }
1067
1068
0
    pos->bucket = pos->entry = pos->offset = 0;
1069
0
    return NULL;
1070
0
}