Coverage Report

Created: 2023-03-26 07:42

/src/openvswitch/lib/ovs-numa.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2014 Nicira, Inc.
3
 *
4
 * Licensed under the Apache License, Version 2.0 (the "License");
5
 * you may not use this file except in compliance with the License.
6
 * You may obtain a copy of the License at:
7
 *
8
 *     http://www.apache.org/licenses/LICENSE-2.0
9
 *
10
 * Unless required by applicable law or agreed to in writing, software
11
 * distributed under the License is distributed on an "AS IS" BASIS,
12
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
 * See the License for the specific language governing permissions and
14
 * limitations under the License.
15
 */
16
17
#include <config.h>
18
#include "ovs-numa.h"
19
20
#include <ctype.h>
21
#include <errno.h>
22
#ifdef __linux__
23
#include <dirent.h>
24
#include <stddef.h>
25
#include <string.h>
26
#include <sys/types.h>
27
#include <unistd.h>
28
#endif /* __linux__ */
29
30
#include "hash.h"
31
#include "openvswitch/hmap.h"
32
#include "openvswitch/list.h"
33
#include "ovs-thread.h"
34
#include "openvswitch/vlog.h"
35
#include "util.h"
36
37
VLOG_DEFINE_THIS_MODULE(ovs_numa);
38
39
/* ovs-numa module
40
 * ===============
41
 *
42
 * This module stores the affinity information of numa nodes and cpu cores.
43
 * It also provides functions to bookkeep the pin of threads on cpu cores.
44
 *
45
 * It is assumed that the numa node ids and cpu core ids all start from 0.
46
 * There is no guarantee that node and cpu ids are numbered consecutively
47
 * So, for example, if two nodes exist with ids 0 and 8,
48
 * 'ovs_numa_get_n_nodes()' will return 2, no assumption of node numbering
49
 * should be made.
50
 *
51
 * NOTE, this module should only be used by the main thread.
52
 *
53
 * NOTE, if cpu hotplug is used 'all_numa_nodes' and 'all_cpu_cores' must be
54
 * invalidated when ever the system topology changes.  Support for detecting
55
 * topology changes has not been included. For now, add a TODO entry for
56
 * addressing it in the future.
57
 *
58
 * TODO: Fix ovs-numa when cpu hotplug is used.
59
 */
60
61
62
/* numa node. */
63
struct numa_node {
64
    struct hmap_node hmap_node;     /* In the 'all_numa_nodes'. */
65
    struct ovs_list cores;          /* List of cpu cores on the numa node. */
66
    int numa_id;                    /* numa node id. */
67
};
68
69
/* Cpu core on a numa node. */
70
struct cpu_core {
71
    struct hmap_node hmap_node;/* In the 'all_cpu_cores'. */
72
    struct ovs_list list_node; /* In 'numa_node->cores' list. */
73
    struct numa_node *numa;    /* numa node containing the core. */
74
    unsigned core_id;          /* Core id. */
75
};
76
77
/* Contains all 'struct numa_node's. */
78
static struct hmap all_numa_nodes = HMAP_INITIALIZER(&all_numa_nodes);
79
/* Contains all 'struct cpu_core's. */
80
static struct hmap all_cpu_cores = HMAP_INITIALIZER(&all_cpu_cores);
81
/* True if numa node and core info are correctly extracted. */
82
static bool found_numa_and_core;
83
/* True if the module was initialized with dummy options. In this case, the
84
 * module must not interact with the actual cpus/nodes in the system. */
85
static bool dummy_numa = false;
86
/* If 'dummy_numa' is true, contains a copy of the dummy numa configuration
87
 * parameter */
88
static char *dummy_config;
89
90
static struct numa_node *get_numa_by_numa_id(int numa_id);
91
92
#ifdef __linux__
93
/* Returns true if 'str' contains all digits.  Returns false otherwise. */
94
static bool
95
contain_all_digits(const char *str)
96
0
{
97
0
    return str[strspn(str, "0123456789")] == '\0';
98
0
}
99
#endif /* __linux__ */
100
101
static struct numa_node *
102
insert_new_numa_node(int numa_id)
103
0
{
104
0
    struct numa_node *n = xzalloc(sizeof *n);
105
106
0
    hmap_insert(&all_numa_nodes, &n->hmap_node, hash_int(numa_id, 0));
107
0
    ovs_list_init(&n->cores);
108
0
    n->numa_id = numa_id;
109
110
0
    return n;
111
0
}
112
113
static struct cpu_core *
114
insert_new_cpu_core(struct numa_node *n, unsigned core_id)
115
0
{
116
0
    struct cpu_core *c = xzalloc(sizeof *c);
117
118
0
    hmap_insert(&all_cpu_cores, &c->hmap_node, hash_int(core_id, 0));
119
0
    ovs_list_insert(&n->cores, &c->list_node);
120
0
    c->core_id = core_id;
121
0
    c->numa = n;
122
123
0
    return c;
124
0
}
125
126
/* Has the same effect as discover_numa_and_core(), but instead of
127
 * reading sysfs entries, extracts the info from the global variable
128
 * 'dummy_config', which is set with ovs_numa_set_dummy().
129
 *
130
 * 'dummy_config' lists the numa_ids of each CPU separated by a comma, e.g.
131
 * - "0,0,0,0": four cores on numa socket 0.
132
 * - "0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1": 16 cores on two numa sockets.
133
 * - "0,0,0,0,1,1,1,1": 8 cores on two numa sockets.
134
 * - "0,0,0,0,8,8,8,8": 8 cores on two numa sockets, non-contiguous.
135
 */
136
static void
137
discover_numa_and_core_dummy(void)
138
0
{
139
0
    char *conf = xstrdup(dummy_config);
140
0
    char *id, *saveptr = NULL;
141
0
    unsigned i = 0;
142
143
0
    for (id = strtok_r(conf, ",", &saveptr); id;
144
0
         id = strtok_r(NULL, ",", &saveptr)) {
145
0
        struct hmap_node *hnode;
146
0
        struct numa_node *n;
147
0
        long numa_id;
148
149
0
        numa_id = strtol(id, NULL, 10);
150
0
        if (numa_id < 0 || numa_id >= MAX_NUMA_NODES) {
151
0
            VLOG_WARN("Invalid numa node %ld", numa_id);
152
0
            continue;
153
0
        }
154
155
0
        hnode = hmap_first_with_hash(&all_numa_nodes, hash_int(numa_id, 0));
156
157
0
        if (hnode) {
158
0
            n = CONTAINER_OF(hnode, struct numa_node, hmap_node);
159
0
        } else {
160
0
            n = insert_new_numa_node(numa_id);
161
0
        }
162
163
0
        insert_new_cpu_core(n, i);
164
165
0
        i++;
166
0
    }
167
168
0
    free(conf);
169
170
0
}
171
172
#ifdef __linux__
173
/* Check if a CPU is detected and online. */
174
static int
175
cpu_detected(unsigned int core_id)
176
0
{
177
0
    char path[PATH_MAX];
178
0
    int len = snprintf(path, sizeof(path),
179
0
                       "/sys/devices/system/cpu/cpu%d/topology/core_id",
180
0
                       core_id);
181
0
    if (len <= 0 || (unsigned) len >= sizeof(path)) {
182
0
        return 0;
183
0
    }
184
0
    if (access(path, F_OK) != 0) {
185
0
        return 0;
186
0
    }
187
188
0
    return 1;
189
0
}
190
#endif /* __linux__ */
191
192
/* Discovers all numa nodes and the corresponding cpu cores.
193
 * Constructs the 'struct numa_node' and 'struct cpu_core'. */
194
static void
195
discover_numa_and_core(void)
196
0
{
197
0
#ifdef __linux__
198
0
    int i;
199
0
    DIR *dir;
200
0
    bool numa_supported = true;
201
202
    /* Check if NUMA supported on this system. */
203
0
    dir = opendir("/sys/devices/system/node");
204
205
0
    if (!dir && errno == ENOENT) {
206
0
        numa_supported = false;
207
0
    }
208
0
    if (dir) {
209
0
        closedir(dir);
210
0
    }
211
212
0
    for (i = 0; i < MAX_NUMA_NODES; i++) {
213
0
        char* path;
214
215
0
        if (numa_supported) {
216
            /* Constructs the path to node /sys/devices/system/nodeX. */
217
0
            path = xasprintf("/sys/devices/system/node/node%d", i);
218
0
        } else {
219
0
            path = xasprintf("/sys/devices/system/cpu/");
220
0
        }
221
222
0
        dir = opendir(path);
223
224
        /* Creates 'struct numa_node' if the 'dir' is non-null. */
225
0
        if (dir) {
226
0
            struct numa_node *n;
227
0
            struct dirent *subdir;
228
229
0
            n = insert_new_numa_node(i);
230
231
0
            while ((subdir = readdir(dir)) != NULL) {
232
0
                if (!strncmp(subdir->d_name, "cpu", 3)
233
0
                    && contain_all_digits(subdir->d_name + 3)) {
234
0
                    unsigned core_id;
235
236
0
                    core_id = strtoul(subdir->d_name + 3, NULL, 10);
237
0
                    if (cpu_detected(core_id)) {
238
0
                        insert_new_cpu_core(n, core_id);
239
0
                    }
240
0
                }
241
0
            }
242
0
            closedir(dir);
243
0
        } else if (errno != ENOENT) {
244
0
            VLOG_WARN("opendir(%s) failed (%s)", path,
245
0
                      ovs_strerror(errno));
246
0
        }
247
248
0
        free(path);
249
0
        if (!numa_supported) {
250
0
            break;
251
0
        }
252
0
    }
253
0
#endif /* __linux__ */
254
0
}
255
256
/* Gets 'struct cpu_core' by 'core_id'. */
257
static struct cpu_core*
258
get_core_by_core_id(unsigned core_id)
259
0
{
260
0
    struct cpu_core *core;
261
262
0
    HMAP_FOR_EACH_WITH_HASH (core, hmap_node, hash_int(core_id, 0),
263
0
                             &all_cpu_cores) {
264
0
        if (core->core_id == core_id) {
265
0
            return core;
266
0
        }
267
0
    }
268
269
0
    return NULL;
270
0
}
271
272
/* Gets 'struct numa_node' by 'numa_id'. */
273
static struct numa_node*
274
get_numa_by_numa_id(int numa_id)
275
0
{
276
0
    struct numa_node *numa;
277
278
0
    HMAP_FOR_EACH_WITH_HASH (numa, hmap_node, hash_int(numa_id, 0),
279
0
                             &all_numa_nodes) {
280
0
        if (numa->numa_id == numa_id) {
281
0
            return numa;
282
0
        }
283
0
    }
284
285
0
    return NULL;
286
0
}
287
288

289
/* Initializes the numa module. */
290
void
291
ovs_numa_init(void)
292
0
{
293
0
    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
294
295
0
    if (ovsthread_once_start(&once)) {
296
0
        const struct numa_node *n;
297
298
0
        if (dummy_numa) {
299
0
            discover_numa_and_core_dummy();
300
0
        } else {
301
0
            discover_numa_and_core();
302
0
        }
303
304
0
        HMAP_FOR_EACH(n, hmap_node, &all_numa_nodes) {
305
0
            VLOG_INFO("Discovered %"PRIuSIZE" CPU cores on NUMA node %d",
306
0
                      ovs_list_size(&n->cores), n->numa_id);
307
0
        }
308
309
0
        VLOG_INFO("Discovered %"PRIuSIZE" NUMA nodes and %"PRIuSIZE" CPU cores",
310
0
                   hmap_count(&all_numa_nodes), hmap_count(&all_cpu_cores));
311
312
0
        if (hmap_count(&all_numa_nodes) && hmap_count(&all_cpu_cores)) {
313
0
            found_numa_and_core = true;
314
0
        }
315
316
0
        ovsthread_once_done(&once);
317
0
    }
318
0
}
319
320
/* Extracts the numa node and core info from the 'config'.  This is useful for
321
 * testing purposes.  The function must be called once, before ovs_numa_init().
322
 *
323
 * The format of 'config' is explained in the comment above
324
 * discover_numa_and_core_dummy().*/
325
void
326
ovs_numa_set_dummy(const char *config)
327
0
{
328
0
    dummy_numa = true;
329
0
    ovs_assert(config);
330
0
    free(dummy_config);
331
0
    dummy_config = xstrdup(config);
332
0
}
333
334
bool
335
ovs_numa_numa_id_is_valid(int numa_id)
336
0
{
337
0
    return found_numa_and_core && numa_id < ovs_numa_get_n_numas();
338
0
}
339
340
bool
341
ovs_numa_core_id_is_valid(unsigned core_id)
342
0
{
343
0
    return found_numa_and_core && core_id < ovs_numa_get_n_cores();
344
0
}
345
346
/* Returns the number of numa nodes. */
347
int
348
ovs_numa_get_n_numas(void)
349
0
{
350
0
    return found_numa_and_core ? hmap_count(&all_numa_nodes)
351
0
                               : OVS_NUMA_UNSPEC;
352
0
}
353
354
/* Returns the number of cpu cores. */
355
int
356
ovs_numa_get_n_cores(void)
357
0
{
358
0
    return found_numa_and_core ? hmap_count(&all_cpu_cores)
359
0
                               : OVS_CORE_UNSPEC;
360
0
}
361
362
/* Given 'core_id', returns the corresponding numa node id.  Returns
363
 * OVS_NUMA_UNSPEC if 'core_id' is invalid. */
364
int
365
ovs_numa_get_numa_id(unsigned core_id)
366
0
{
367
0
    struct cpu_core *core = get_core_by_core_id(core_id);
368
369
0
    if (core) {
370
0
        return core->numa->numa_id;
371
0
    }
372
373
0
    return OVS_NUMA_UNSPEC;
374
0
}
375
376
/* Returns the number of cpu cores on numa node.  Returns OVS_CORE_UNSPEC
377
 * if 'numa_id' is invalid. */
378
int
379
ovs_numa_get_n_cores_on_numa(int numa_id)
380
0
{
381
0
    struct numa_node *numa = get_numa_by_numa_id(numa_id);
382
383
0
    if (numa) {
384
0
        return ovs_list_size(&numa->cores);
385
0
    }
386
387
0
    return OVS_CORE_UNSPEC;
388
0
}
389
390
/* Returns the largest core_id.
391
 *
392
 * Return OVS_CORE_UNSPEC, if core_id information is not found.
393
 *
394
 * Returning OVS_CORE_UNSPEC comes at a caveat.  The caller function
395
 * must remember to check the return value of this callee function
396
 * against OVS_CORE_UNSPEC.  OVS_CORE_UNSPEC is a positive integer
397
 * INT_MAX, which the caller may interpret it as the largest
398
 * core_id if it's not checking for it.
399
 */
400
unsigned
401
ovs_numa_get_largest_core_id(void)
402
0
{
403
0
    struct cpu_core *core;
404
0
    unsigned max_id = 0;
405
406
0
    if (!found_numa_and_core) {
407
0
        return OVS_CORE_UNSPEC;
408
0
    }
409
410
0
    HMAP_FOR_EACH (core, hmap_node, &all_cpu_cores) {
411
0
        if (core->core_id > max_id) {
412
0
            max_id = core->core_id;
413
0
        }
414
0
    }
415
416
0
    return max_id;
417
0
}
418
419
static struct ovs_numa_dump *
420
ovs_numa_dump_create(void)
421
0
{
422
0
    struct ovs_numa_dump *dump = xmalloc(sizeof *dump);
423
424
0
    hmap_init(&dump->cores);
425
0
    hmap_init(&dump->numas);
426
427
0
    return dump;
428
0
}
429
430
static void
431
ovs_numa_dump_add(struct ovs_numa_dump *dump, int numa_id, int core_id)
432
0
{
433
0
    struct ovs_numa_info_core *c = xzalloc(sizeof *c);
434
0
    struct ovs_numa_info_numa *n;
435
436
0
    c->numa_id = numa_id;
437
0
    c->core_id = core_id;
438
0
    hmap_insert(&dump->cores, &c->hmap_node, hash_2words(numa_id, core_id));
439
440
0
    HMAP_FOR_EACH_WITH_HASH (n, hmap_node, hash_int(numa_id, 0),
441
0
                             &dump->numas) {
442
0
        if (n->numa_id == numa_id) {
443
0
            n->n_cores++;
444
0
            return;
445
0
        }
446
0
    }
447
448
0
    n = xzalloc(sizeof *n);
449
0
    n->numa_id = numa_id;
450
0
    n->n_cores = 1;
451
0
    hmap_insert(&dump->numas, &n->hmap_node, hash_int(numa_id, 0));
452
0
}
453
454
/* Given the 'numa_id', returns dump of all cores on the numa node. */
455
struct ovs_numa_dump *
456
ovs_numa_dump_cores_on_numa(int numa_id)
457
0
{
458
0
    struct ovs_numa_dump *dump = ovs_numa_dump_create();
459
0
    struct numa_node *numa = get_numa_by_numa_id(numa_id);
460
461
0
    if (numa) {
462
0
        struct cpu_core *core;
463
464
0
        LIST_FOR_EACH (core, list_node, &numa->cores) {
465
0
            ovs_numa_dump_add(dump, numa->numa_id, core->core_id);
466
0
        }
467
0
    }
468
469
0
    return dump;
470
0
}
471
472
struct ovs_numa_dump *
473
ovs_numa_dump_cores_with_cmask(const char *cmask)
474
0
{
475
0
    struct ovs_numa_dump *dump = ovs_numa_dump_create();
476
0
    int core_id = 0;
477
0
    int end_idx;
478
479
    /* Ignore leading 0x. */
480
0
    end_idx = 0;
481
0
    if (!strncmp(cmask, "0x", 2) || !strncmp(cmask, "0X", 2)) {
482
0
        end_idx = 2;
483
0
    }
484
485
0
    for (int i = strlen(cmask) - 1; i >= end_idx; i--) {
486
0
        char hex = cmask[i];
487
0
        int bin;
488
489
0
        bin = hexit_value(hex);
490
0
        if (bin == -1) {
491
0
            VLOG_WARN("Invalid cpu mask: %c", cmask[i]);
492
0
            bin = 0;
493
0
        }
494
495
0
        for (int j = 0; j < 4; j++) {
496
0
            if ((bin >> j) & 0x1) {
497
0
                struct cpu_core *core = get_core_by_core_id(core_id);
498
499
0
                if (core) {
500
0
                    ovs_numa_dump_add(dump,
501
0
                                      core->numa->numa_id,
502
0
                                      core->core_id);
503
0
                }
504
0
            }
505
506
0
            core_id++;
507
0
        }
508
0
    }
509
510
0
    return dump;
511
0
}
512
513
struct ovs_numa_dump *
514
ovs_numa_dump_n_cores_per_numa(int cores_per_numa)
515
0
{
516
0
    struct ovs_numa_dump *dump = ovs_numa_dump_create();
517
0
    const struct numa_node *n;
518
519
0
    HMAP_FOR_EACH (n, hmap_node, &all_numa_nodes) {
520
0
        const struct cpu_core *core;
521
0
        int i = 0;
522
523
0
        LIST_FOR_EACH (core, list_node, &n->cores) {
524
0
            if (i++ >= cores_per_numa) {
525
0
                break;
526
0
            }
527
528
0
            ovs_numa_dump_add(dump, core->numa->numa_id, core->core_id);
529
0
        }
530
0
    }
531
532
0
    return dump;
533
0
}
534
535
bool
536
ovs_numa_dump_contains_core(const struct ovs_numa_dump *dump,
537
                            int numa_id, unsigned core_id)
538
0
{
539
0
    struct ovs_numa_info_core *core;
540
541
0
    HMAP_FOR_EACH_WITH_HASH (core, hmap_node, hash_2words(numa_id, core_id),
542
0
                             &dump->cores) {
543
0
        if (core->core_id == core_id && core->numa_id == numa_id) {
544
0
            return true;
545
0
        }
546
0
    }
547
548
0
    return false;
549
0
}
550
551
size_t
552
ovs_numa_dump_count(const struct ovs_numa_dump *dump)
553
0
{
554
0
    return hmap_count(&dump->cores);
555
0
}
556
557
void
558
ovs_numa_dump_destroy(struct ovs_numa_dump *dump)
559
0
{
560
0
    struct ovs_numa_info_core *c;
561
0
    struct ovs_numa_info_numa *n;
562
563
0
    if (!dump) {
564
0
        return;
565
0
    }
566
567
0
    HMAP_FOR_EACH_POP (c, hmap_node, &dump->cores) {
568
0
        free(c);
569
0
    }
570
571
0
    HMAP_FOR_EACH_POP (n, hmap_node, &dump->numas) {
572
0
        free(n);
573
0
    }
574
575
0
    hmap_destroy(&dump->cores);
576
0
    hmap_destroy(&dump->numas);
577
578
0
    free(dump);
579
0
}
580
581
struct ovs_numa_dump *
582
ovs_numa_thread_getaffinity_dump(void)
583
0
{
584
0
    if (dummy_numa) {
585
        /* Nothing to do. */
586
0
        return NULL;
587
0
    }
588
589
#ifndef __linux__
590
    return NULL;
591
#else
592
0
    struct ovs_numa_dump *dump;
593
0
    const struct numa_node *n;
594
0
    cpu_set_t cpuset;
595
0
    int err;
596
597
0
    CPU_ZERO(&cpuset);
598
0
    err = pthread_getaffinity_np(pthread_self(), sizeof cpuset, &cpuset);
599
0
    if (err) {
600
0
        VLOG_ERR("Thread getaffinity error: %s", ovs_strerror(err));
601
0
        return NULL;
602
0
    }
603
604
0
    dump = ovs_numa_dump_create();
605
606
0
    HMAP_FOR_EACH (n, hmap_node, &all_numa_nodes) {
607
0
        const struct cpu_core *core;
608
609
0
        LIST_FOR_EACH (core, list_node, &n->cores) {
610
0
            if (CPU_ISSET(core->core_id, &cpuset)) {
611
0
                ovs_numa_dump_add(dump, core->numa->numa_id, core->core_id);
612
0
            }
613
0
        }
614
0
    }
615
616
0
    if (!ovs_numa_dump_count(dump)) {
617
0
        ovs_numa_dump_destroy(dump);
618
0
        return NULL;
619
0
    }
620
0
    return dump;
621
0
#endif /* __linux__ */
622
0
}
623
624
int
625
ovs_numa_thread_setaffinity_dump(const struct ovs_numa_dump *dump)
626
0
{
627
0
    if (!dump || dummy_numa) {
628
        /* Nothing to do. */
629
0
        return 0;
630
0
    }
631
632
0
#ifdef __linux__
633
0
    const struct ovs_numa_info_core *core;
634
0
    cpu_set_t cpuset;
635
0
    int err;
636
637
0
    CPU_ZERO(&cpuset);
638
0
    FOR_EACH_CORE_ON_DUMP (core, dump) {
639
0
        CPU_SET(core->core_id, &cpuset);
640
0
    }
641
0
    err = pthread_setaffinity_np(pthread_self(), sizeof cpuset, &cpuset);
642
0
    if (err) {
643
0
        VLOG_ERR("Thread setaffinity error: %s", ovs_strerror(err));
644
0
        return err;
645
0
    }
646
647
0
    return 0;
648
#else /* !__linux__ */
649
    return EOPNOTSUPP;
650
#endif /* __linux__ */
651
0
}
652
653
int ovs_numa_thread_setaffinity_core(unsigned core_id)
654
0
{
655
0
    const struct cpu_core *core = get_core_by_core_id(core_id);
656
0
    struct ovs_numa_dump *affinity = ovs_numa_dump_create();
657
0
    int ret = EINVAL;
658
659
0
    if (core) {
660
0
        ovs_numa_dump_add(affinity, core->numa->numa_id, core->core_id);
661
0
        ret = ovs_numa_thread_setaffinity_dump(affinity);
662
0
    }
663
664
0
    ovs_numa_dump_destroy(affinity);
665
0
    return ret;
666
0
}