Line | Count | Source (jump to first uncovered line) |
1 | | /* Simple NUMA library. |
2 | | Copyright (C) 2003,2004,2005,2008 Andi Kleen,SuSE Labs and |
3 | | Cliff Wickman,SGI. |
4 | | |
5 | | libnuma is free software; you can redistribute it and/or |
6 | | modify it under the terms of the GNU Lesser General Public |
7 | | License as published by the Free Software Foundation; version |
8 | | 2.1. |
9 | | |
10 | | libnuma is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | | Lesser General Public License for more details. |
14 | | |
15 | | You should find a copy of v2.1 of the GNU Lesser General Public License |
16 | | somewhere on your Linux system; if not, write to the Free Software |
17 | | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | | |
19 | | All calls are undefined when numa_available returns an error. */ |
20 | | #define _GNU_SOURCE 1 |
21 | | #include <stdlib.h> |
22 | | #include <stdio.h> |
23 | | #include <unistd.h> |
24 | | #include <string.h> |
25 | | #include <sched.h> |
26 | | #include <dirent.h> |
27 | | #include <errno.h> |
28 | | #include <stdarg.h> |
29 | | #include <ctype.h> |
30 | | #include <assert.h> |
31 | | |
32 | | #include <sys/mman.h> |
33 | | #include <limits.h> |
34 | | |
35 | | #include "config.h" |
36 | | #include "numa.h" |
37 | | #include "numaif.h" |
38 | | #include "numaint.h" |
39 | | #include "util.h" |
40 | | #include "affinity.h" |
41 | | |
42 | | #define WEAK __attribute__((weak)) |
43 | | |
44 | 0 | #define CPU_BUFFER_SIZE 4096 /* This limits you to 32768 CPUs */ |
45 | | |
46 | | /* these are the old (version 1) masks */ |
47 | | nodemask_t numa_no_nodes; |
48 | | nodemask_t numa_all_nodes; |
49 | | /* these are now the default bitmask (pointers to) (version 2) */ |
50 | | struct bitmask *numa_no_nodes_ptr = NULL; |
51 | | struct bitmask *numa_all_nodes_ptr = NULL; |
52 | | struct bitmask *numa_possible_nodes_ptr = NULL; |
53 | | struct bitmask *numa_all_cpus_ptr = NULL; |
54 | | struct bitmask *numa_possible_cpus_ptr = NULL; |
55 | | /* I would prefer to use symbol versioning to create v1 and v2 versions |
56 | | of numa_no_nodes and numa_all_nodes, but the loader does not correctly |
57 | | handle versioning of BSS versus small data items */ |
58 | | |
59 | | struct bitmask *numa_nodes_ptr = NULL; |
60 | | static struct bitmask *numa_memnode_ptr = NULL; |
61 | | static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES]; |
62 | | static char node_cpu_mask_v1_stale = 1; |
63 | | static struct bitmask **node_cpu_mask_v2; |
64 | | static char node_cpu_mask_v2_stale = 1; |
65 | | |
66 | | WEAK void numa_error(char *where); |
67 | | |
68 | | #ifndef TLS |
69 | | #warning "not threadsafe" |
70 | | #define __thread |
71 | | #endif |
72 | | |
73 | | static __thread int bind_policy = MPOL_BIND; |
74 | | static __thread unsigned int mbind_flags = 0; |
75 | | static int sizes_set=0; |
76 | | static int maxconfigurednode = -1; |
77 | | static int maxconfiguredcpu = -1; |
78 | | static int numprocnode = -1; |
79 | | static int numproccpu = -1; |
80 | | static int nodemask_sz = 0; |
81 | | static int cpumask_sz = 0; |
82 | | |
83 | | static int has_preferred_many = -1; |
84 | | |
85 | | int numa_exit_on_error = 0; |
86 | | int numa_exit_on_warn = 0; |
87 | | int numa_fail_alloc_on_error = 0; |
88 | | static void set_sizes(void); |
89 | | |
90 | | /* |
91 | | * There are two special functions, _init(void) and _fini(void), which |
92 | | * are called automatically by the dynamic loader whenever a library is loaded. |
93 | | * |
94 | | * The v1 library depends upon nodemask_t's of all nodes and no nodes. |
95 | | */ |
96 | | void __attribute__((constructor)) |
97 | | numa_init(void) |
98 | 2 | { |
99 | 2 | int max,i; |
100 | | |
101 | 2 | if (sizes_set) |
102 | 0 | return; |
103 | | |
104 | 2 | set_sizes(); |
105 | | /* numa_all_nodes should represent existing nodes on this system */ |
106 | 2 | max = numa_num_configured_nodes(); |
107 | 4 | for (i = 0; i < max; i++) |
108 | 2 | nodemask_set_compat((nodemask_t *)&numa_all_nodes, i); |
109 | 2 | memset(&numa_no_nodes, 0, sizeof(numa_no_nodes)); |
110 | | |
111 | | /* clear errno */ |
112 | 2 | errno = 0; |
113 | 2 | } |
114 | | |
115 | | static void cleanup_node_cpu_mask_v2(void); |
116 | | |
117 | 0 | #define FREE_AND_ZERO(x) if (x) { \ |
118 | 0 | numa_bitmask_free(x); \ |
119 | 0 | x = NULL; \ |
120 | 0 | } |
121 | | |
122 | | void __attribute__((destructor)) |
123 | | numa_fini(void) |
124 | 0 | { |
125 | 0 | FREE_AND_ZERO(numa_all_cpus_ptr); |
126 | 0 | FREE_AND_ZERO(numa_possible_cpus_ptr); |
127 | 0 | FREE_AND_ZERO(numa_all_nodes_ptr); |
128 | 0 | FREE_AND_ZERO(numa_possible_nodes_ptr); |
129 | 0 | FREE_AND_ZERO(numa_no_nodes_ptr); |
130 | 0 | FREE_AND_ZERO(numa_memnode_ptr); |
131 | 0 | FREE_AND_ZERO(numa_nodes_ptr); |
132 | 0 | cleanup_node_cpu_mask_v2(); |
133 | 0 | } |
134 | | |
135 | | static int numa_find_first(struct bitmask *mask) |
136 | 0 | { |
137 | 0 | int i; |
138 | 0 | for (i = 0; i < mask->size; i++) |
139 | 0 | if (numa_bitmask_isbitset(mask, i)) |
140 | 0 | return i; |
141 | 0 | return -1; |
142 | 0 | } |
143 | | |
144 | | /* |
145 | | * The following bitmask declarations, bitmask_*() routines, and associated |
146 | | * _setbit() and _getbit() routines are: |
147 | | * Copyright (c) 2004_2007 Silicon Graphics, Inc. (SGI) All rights reserved. |
148 | | * SGI publishes it under the terms of the Library GNU General Public License, |
149 | | * v2, as published by the Free Software Foundation. |
150 | | */ |
151 | | static unsigned int |
152 | | _getbit(const struct bitmask *bmp, unsigned int n) |
153 | 137k | { |
154 | 137k | if (n < bmp->size) |
155 | 136k | return (bmp->maskp[n/bitsperlong] >> (n % bitsperlong)) & 1; |
156 | 789 | else |
157 | 789 | return 0; |
158 | 137k | } |
159 | | |
160 | | static void |
161 | | _setbit(struct bitmask *bmp, unsigned int n, unsigned int v) |
162 | 23.9k | { |
163 | 23.9k | if (n < bmp->size) { |
164 | 23.9k | if (v) |
165 | 10.6k | bmp->maskp[n/bitsperlong] |= 1UL << (n % bitsperlong); |
166 | 13.2k | else |
167 | 13.2k | bmp->maskp[n/bitsperlong] &= ~(1UL << (n % bitsperlong)); |
168 | 23.9k | } |
169 | 23.9k | } |
170 | | |
171 | | int |
172 | | numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int i) |
173 | 135k | { |
174 | 135k | return _getbit(bmp, i); |
175 | 135k | } |
176 | | |
177 | | struct bitmask * |
178 | | numa_bitmask_setall(struct bitmask *bmp) |
179 | 0 | { |
180 | 0 | unsigned int i; |
181 | 0 | for (i = 0; i < bmp->size; i++) |
182 | 0 | _setbit(bmp, i, 1); |
183 | 0 | return bmp; |
184 | 0 | } |
185 | | |
186 | | struct bitmask * |
187 | | numa_bitmask_clearall(struct bitmask *bmp) |
188 | 207 | { |
189 | 207 | unsigned int i; |
190 | 13.4k | for (i = 0; i < bmp->size; i++) |
191 | 13.2k | _setbit(bmp, i, 0); |
192 | 207 | return bmp; |
193 | 207 | } |
194 | | |
195 | | struct bitmask * |
196 | | numa_bitmask_setbit(struct bitmask *bmp, unsigned int i) |
197 | 10.6k | { |
198 | 10.6k | _setbit(bmp, i, 1); |
199 | 10.6k | return bmp; |
200 | 10.6k | } |
201 | | |
202 | | struct bitmask * |
203 | | numa_bitmask_clearbit(struct bitmask *bmp, unsigned int i) |
204 | 5 | { |
205 | 5 | _setbit(bmp, i, 0); |
206 | 5 | return bmp; |
207 | 5 | } |
208 | | |
209 | | unsigned int |
210 | | numa_bitmask_nbytes(struct bitmask *bmp) |
211 | 2 | { |
212 | 2 | return longsperbits(bmp->size) * sizeof(unsigned long); |
213 | 2 | } |
214 | | |
215 | | /* where n is the number of bits in the map */ |
216 | | /* This function should not exit on failure, but right now we cannot really |
217 | | recover from this. */ |
218 | | struct bitmask * |
219 | | numa_bitmask_alloc(unsigned int n) |
220 | 1.50k | { |
221 | 1.50k | struct bitmask *bmp; |
222 | | |
223 | 1.50k | if (n < 1) { |
224 | 0 | errno = EINVAL; |
225 | 0 | numa_error("request to allocate mask for invalid number"); |
226 | 0 | return NULL; |
227 | 0 | } |
228 | 1.50k | bmp = malloc(sizeof(*bmp)); |
229 | 1.50k | if (bmp == 0) |
230 | 0 | goto oom; |
231 | 1.50k | bmp->size = n; |
232 | 1.50k | bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long)); |
233 | 1.50k | if (bmp->maskp == 0) { |
234 | 0 | free(bmp); |
235 | 0 | goto oom; |
236 | 0 | } |
237 | 1.50k | return bmp; |
238 | | |
239 | 0 | oom: |
240 | 0 | numa_error("Out of memory allocating bitmask"); |
241 | 0 | exit(1); |
242 | 1.50k | } |
243 | | |
244 | | void |
245 | | numa_bitmask_free(struct bitmask *bmp) |
246 | 1.48k | { |
247 | 1.48k | if (bmp == 0) |
248 | 0 | return; |
249 | 1.48k | free(bmp->maskp); |
250 | 1.48k | bmp->maskp = (unsigned long *)0xdeadcdef; /* double free tripwire */ |
251 | 1.48k | free(bmp); |
252 | 1.48k | return; |
253 | 1.48k | } |
254 | | |
255 | | /* True if two bitmasks are equal */ |
256 | | int |
257 | | numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask *bmp2) |
258 | 0 | { |
259 | 0 | unsigned int i; |
260 | 0 | for (i = 0; i < bmp1->size || i < bmp2->size; i++) |
261 | 0 | if (_getbit(bmp1, i) != _getbit(bmp2, i)) |
262 | 0 | return 0; |
263 | 0 | return 1; |
264 | 0 | } |
265 | | |
266 | | /* Hamming Weight: number of set bits */ |
267 | | unsigned int numa_bitmask_weight(const struct bitmask *bmp) |
268 | 4 | { |
269 | 4 | unsigned int i; |
270 | 4 | unsigned int w = 0; |
271 | 2.18k | for (i = 0; i < bmp->size; i++) |
272 | 2.17k | if (_getbit(bmp, i)) |
273 | 66 | w++; |
274 | 4 | return w; |
275 | 4 | } |
276 | | |
277 | | /* *****end of bitmask_ routines ************ */ |
278 | | |
279 | | /* Next two can be overwritten by the application for different error handling */ |
280 | | WEAK void numa_error(char *where) |
281 | 0 | { |
282 | 0 | int olde = errno; |
283 | 0 | perror(where); |
284 | 0 | if (numa_exit_on_error) |
285 | 0 | exit(1); |
286 | 0 | errno = olde; |
287 | 0 | } |
288 | | |
289 | | WEAK void numa_warn(int num, char *fmt, ...) |
290 | 1.14k | { |
291 | 1.14k | static unsigned warned; |
292 | 1.14k | va_list ap; |
293 | 1.14k | int olde = errno; |
294 | | |
295 | | /* Give each warning only once */ |
296 | 1.14k | if ((1<<num) & warned) |
297 | 1.13k | return; |
298 | 9 | warned |= (1<<num); |
299 | | |
300 | 9 | va_start(ap,fmt); |
301 | 9 | fprintf(stderr, "libnuma: Warning: "); |
302 | 9 | vfprintf(stderr, fmt, ap); |
303 | 9 | fputc('\n', stderr); |
304 | 9 | va_end(ap); |
305 | | |
306 | 9 | if (numa_exit_on_warn) |
307 | 0 | exit(1); |
308 | | |
309 | 9 | errno = olde; |
310 | 9 | } |
311 | | |
312 | | static void setpol(int policy, struct bitmask *bmp) |
313 | 0 | { |
314 | 0 | if (set_mempolicy(policy, bmp->maskp, bmp->size + 1) < 0) |
315 | 0 | numa_error("set_mempolicy"); |
316 | 0 | } |
317 | | |
318 | | static void getpol(int *oldpolicy, struct bitmask *bmp) |
319 | 0 | { |
320 | 0 | if (get_mempolicy(oldpolicy, bmp->maskp, bmp->size + 1, 0, 0) < 0) |
321 | 0 | numa_error("get_mempolicy"); |
322 | 0 | } |
323 | | |
324 | | static int dombind(void *mem, size_t size, int pol, struct bitmask *bmp) |
325 | 0 | { |
326 | 0 | if (mbind(mem, size, pol, bmp ? bmp->maskp : NULL, bmp ? bmp->size + 1 : 0, |
327 | 0 | mbind_flags) < 0) { |
328 | 0 | numa_error("mbind"); |
329 | 0 | return -1; |
330 | 0 | } |
331 | 0 | return 0; |
332 | 0 | } |
333 | | |
334 | | static void *dombind_or_free(void *mem, size_t size, int pol, struct bitmask *bmp) |
335 | 0 | { |
336 | 0 | if (dombind(mem, size, pol, bmp) < 0 && numa_fail_alloc_on_error) { |
337 | 0 | munmap(mem, size); |
338 | 0 | return NULL; |
339 | 0 | } |
340 | 0 | return mem; |
341 | 0 | } |
342 | | |
343 | | /* (undocumented) */ |
344 | | /* gives the wrong answer for hugetlbfs mappings. */ |
345 | | int numa_pagesize(void) |
346 | 0 | { |
347 | 0 | static int pagesize; |
348 | 0 | if (pagesize > 0) |
349 | 0 | return pagesize; |
350 | 0 | pagesize = getpagesize(); |
351 | 0 | return pagesize; |
352 | 0 | } |
353 | | |
354 | | make_internal_alias(numa_pagesize); |
355 | | |
356 | | /* |
357 | | * Find nodes (numa_nodes_ptr), nodes with memory (numa_memnode_ptr) |
358 | | * and the highest numbered existing node (maxconfigurednode). |
359 | | */ |
360 | | static void |
361 | | set_configured_nodes(void) |
362 | 2 | { |
363 | 2 | DIR *d; |
364 | 2 | struct dirent *de; |
365 | | |
366 | 2 | numa_memnode_ptr = numa_allocate_nodemask(); |
367 | 2 | numa_nodes_ptr = numa_allocate_nodemask(); |
368 | 2 | if (!numa_memnode_ptr || !numa_nodes_ptr) |
369 | 0 | return; |
370 | | |
371 | 2 | d = opendir("/sys/devices/system/node"); |
372 | 2 | if (!d) { |
373 | 0 | maxconfigurednode = 0; |
374 | 2 | } else { |
375 | 24 | while ((de = readdir(d)) != NULL) { |
376 | 22 | int nd; |
377 | 22 | if (strncmp(de->d_name, "node", 4)) |
378 | 20 | continue; |
379 | 2 | nd = strtoul(de->d_name+4, NULL, 0); |
380 | 2 | numa_bitmask_setbit(numa_nodes_ptr, nd); |
381 | 2 | numa_bitmask_setbit(numa_memnode_ptr, nd); |
382 | 2 | if (maxconfigurednode < nd) |
383 | 2 | maxconfigurednode = nd; |
384 | 2 | } |
385 | 2 | closedir(d); |
386 | 2 | } |
387 | 2 | } |
388 | | |
389 | | static inline int is_digit(char s) |
390 | 574 | { |
391 | 574 | return (s >= '0' && s <= '9') |
392 | 574 | || (s >= 'a' && s <= 'f') |
393 | 574 | || (s >= 'A' && s <= 'F'); |
394 | 574 | } |
395 | | |
396 | | /* Is string 'pre' a prefix of string 's'? */ |
397 | | static int strprefix(const char *s, const char *pre) |
398 | 112 | { |
399 | 112 | return strncmp(s, pre, strlen(pre)) == 0; |
400 | 112 | } |
401 | | |
402 | | static const char *mask_size_file = "/proc/self/status"; |
403 | | static const char *nodemask_prefix = "Mems_allowed:\t"; |
404 | | /* |
405 | | * (do this the way Paul Jackson's libcpuset does it) |
406 | | * The nodemask values in /proc/self/status are in an |
407 | | * ascii format that uses 9 characters for each 32 bits of mask. |
408 | | * (this could also be used to find the cpumask size) |
409 | | */ |
410 | | static void |
411 | | set_nodemask_size(void) |
412 | 2 | { |
413 | 2 | FILE *fp; |
414 | 2 | char *buf = NULL; |
415 | 2 | char *tmp_buf = NULL; |
416 | 2 | int digit_len = 0; |
417 | 2 | size_t bufsize = 0; |
418 | | |
419 | 2 | if ((fp = fopen(mask_size_file, "r")) == NULL) |
420 | 0 | goto done; |
421 | | |
422 | 114 | while (getline(&buf, &bufsize, fp) > 0) { |
423 | 112 | if (strprefix(buf, nodemask_prefix)) { |
424 | 2 | tmp_buf = buf; |
425 | 2 | tmp_buf += strlen(nodemask_prefix); |
426 | 576 | while (*tmp_buf != '\n' && *tmp_buf != '\0') { |
427 | 574 | if (is_digit(*tmp_buf)) |
428 | 512 | digit_len++; |
429 | 574 | tmp_buf++; |
430 | 574 | } |
431 | 2 | nodemask_sz = digit_len * 4; |
432 | 2 | } |
433 | 112 | } |
434 | 2 | free(buf); |
435 | 2 | fclose(fp); |
436 | 2 | done: |
437 | 2 | if (nodemask_sz == 0) {/* fall back on error */ |
438 | 0 | int pol; |
439 | 0 | unsigned long *mask = NULL, *origmask; |
440 | 0 | nodemask_sz = 16; |
441 | 0 | do { |
442 | 0 | nodemask_sz <<= 1; |
443 | 0 | origmask = mask; |
444 | 0 | mask = realloc(mask, nodemask_sz / 8 + sizeof(unsigned long)); |
445 | 0 | if (!mask) { |
446 | 0 | free(origmask); |
447 | 0 | return; |
448 | 0 | } |
449 | 0 | } while (get_mempolicy(&pol, mask, nodemask_sz + 1, 0, 0) < 0 && errno == EINVAL && |
450 | 0 | nodemask_sz < 4096*8); |
451 | 0 | free(mask); |
452 | 0 | } |
453 | 2 | } |
454 | | |
455 | | /* |
456 | | * Read a mask consisting of a sequence of hexadecimal longs separated by |
457 | | * commas. Order them correctly and return the number of bits set. |
458 | | */ |
459 | | static int |
460 | | read_mask(char *s, struct bitmask *bmp) |
461 | 4 | { |
462 | 4 | char *end = s; |
463 | 4 | int tmplen = (bmp->size + bitsperint - 1) / bitsperint; |
464 | 4 | unsigned int tmp[tmplen]; |
465 | 4 | unsigned int *start = tmp; |
466 | 4 | unsigned int i, n = 0, m = 0; |
467 | | |
468 | 4 | if (!s) |
469 | 0 | return 0; /* shouldn't happen */ |
470 | | |
471 | 4 | i = strtoul(s, &end, 16); |
472 | | |
473 | | /* Skip leading zeros */ |
474 | 66 | while (!i && *end++ == ',') { |
475 | 62 | i = strtoul(end, &end, 16); |
476 | 62 | } |
477 | | |
478 | 4 | if (!i) |
479 | | /* End of string. No mask */ |
480 | 0 | return -1; |
481 | | |
482 | 4 | start[n++] = i; |
483 | | /* Read sequence of ints */ |
484 | 4 | while (*end++ == ',') { |
485 | 0 | i = strtoul(end, &end, 16); |
486 | 0 | start[n++] = i; |
487 | | |
488 | | /* buffer overflow */ |
489 | 0 | if (n > tmplen) |
490 | 0 | return -1; |
491 | 0 | } |
492 | | |
493 | | /* |
494 | | * Invert sequence of ints if necessary since the first int |
495 | | * is the highest and we put it first because we read it first. |
496 | | */ |
497 | 8 | while (n) { |
498 | 4 | int w; |
499 | 4 | unsigned long x = 0; |
500 | | /* read into long values in an endian-safe way */ |
501 | 8 | for (w = 0; n && w < bitsperlong; w += bitsperint) |
502 | 4 | x |= ((unsigned long)start[n-- - 1] << w); |
503 | | |
504 | 4 | bmp->maskp[m++] = x; |
505 | 4 | } |
506 | | /* |
507 | | * Return the number of bits set |
508 | | */ |
509 | 4 | return numa_bitmask_weight(bmp); |
510 | 4 | } |
511 | | |
512 | | /* |
513 | | * Read a processes constraints in terms of nodes and cpus from |
514 | | * /proc/self/status. |
515 | | */ |
516 | | static void |
517 | | set_task_constraints(void) |
518 | 2 | { |
519 | 2 | int hicpu = maxconfiguredcpu; |
520 | 2 | int i; |
521 | 2 | char *buffer = NULL; |
522 | 2 | size_t buflen = 0; |
523 | 2 | FILE *f; |
524 | | |
525 | 2 | numa_all_cpus_ptr = numa_allocate_cpumask(); |
526 | 2 | numa_possible_cpus_ptr = numa_allocate_cpumask(); |
527 | 2 | numa_all_nodes_ptr = numa_allocate_nodemask(); |
528 | 2 | numa_possible_nodes_ptr = numa_allocate_cpumask(); |
529 | 2 | numa_no_nodes_ptr = numa_allocate_nodemask(); |
530 | | |
531 | | // partial leak shouldn't happen because its transient |
532 | 2 | if (!numa_all_cpus_ptr || !numa_possible_cpus_ptr || |
533 | 2 | !numa_all_nodes_ptr || |
534 | 2 | !numa_possible_nodes_ptr || |
535 | 2 | !numa_no_nodes_ptr) |
536 | 0 | return; |
537 | | |
538 | 2 | f = fopen(mask_size_file, "r"); |
539 | 2 | if (!f) { |
540 | | //numa_warn(W_cpumap, "Cannot parse %s", mask_size_file); |
541 | 0 | return; |
542 | 0 | } |
543 | | |
544 | 114 | while (getline(&buffer, &buflen, f) > 0) { |
545 | | /* mask starts after [last] tab */ |
546 | 112 | char *mask = strrchr(buffer,'\t') + 1; |
547 | | |
548 | 112 | if (strncmp(buffer,"Cpus_allowed:",13) == 0) |
549 | 2 | numproccpu = read_mask(mask, numa_all_cpus_ptr); |
550 | | |
551 | 112 | if (strncmp(buffer,"Mems_allowed:",13) == 0) { |
552 | 2 | numprocnode = read_mask(mask, numa_all_nodes_ptr); |
553 | 2 | } |
554 | 112 | } |
555 | 2 | fclose(f); |
556 | 2 | free(buffer); |
557 | | |
558 | 66 | for (i = 0; i <= hicpu; i++) |
559 | 64 | numa_bitmask_setbit(numa_possible_cpus_ptr, i); |
560 | 4 | for (i = 0; i <= maxconfigurednode; i++) |
561 | 2 | numa_bitmask_setbit(numa_possible_nodes_ptr, i); |
562 | | |
563 | | /* |
564 | | * Cpus_allowed in the kernel can be defined to all f's |
565 | | * i.e. it may be a superset of the actual available processors. |
566 | | * As such let's reduce numproccpu to the number of actual |
567 | | * available cpus. |
568 | | */ |
569 | 2 | if (numproccpu <= 0) { |
570 | 0 | for (i = 0; i <= hicpu; i++) |
571 | 0 | numa_bitmask_setbit(numa_all_cpus_ptr, i); |
572 | 0 | numproccpu = hicpu+1; |
573 | 0 | } |
574 | | |
575 | 2 | if (numproccpu > hicpu+1) { |
576 | 0 | numproccpu = hicpu+1; |
577 | 0 | for (i=hicpu+1; i<numa_all_cpus_ptr->size; i++) { |
578 | 0 | numa_bitmask_clearbit(numa_all_cpus_ptr, i); |
579 | 0 | } |
580 | 0 | } |
581 | | |
582 | 2 | if (numprocnode <= 0) { |
583 | 0 | for (i = 0; i <= maxconfigurednode; i++) |
584 | 0 | numa_bitmask_setbit(numa_all_nodes_ptr, i); |
585 | 0 | numprocnode = maxconfigurednode + 1; |
586 | 0 | } |
587 | | |
588 | 2 | return; |
589 | 2 | } |
590 | | |
591 | | /* |
592 | | * Find the highest cpu number possible (in other words the size |
593 | | * of a kernel cpumask_t (in bits) - 1) |
594 | | */ |
595 | | static void |
596 | | set_numa_max_cpu(void) |
597 | 2 | { |
598 | 2 | int len = 4096; |
599 | 2 | int n; |
600 | 2 | int olde = errno; |
601 | 2 | struct bitmask *buffer; |
602 | | |
603 | 2 | do { |
604 | 2 | buffer = numa_bitmask_alloc(len); |
605 | 2 | if (!buffer) |
606 | 0 | return; |
607 | 2 | n = numa_sched_getaffinity_v2_int(0, buffer); |
608 | | /* on success, returns size of kernel cpumask_t, in bytes */ |
609 | 2 | if (n < 0) { |
610 | 0 | if (errno == EINVAL) { |
611 | 0 | if (len >= 1024*1024) |
612 | 0 | break; |
613 | 0 | len *= 2; |
614 | 0 | numa_bitmask_free(buffer); |
615 | 0 | continue; |
616 | 0 | } else { |
617 | 0 | numa_warn(W_numcpus, "Unable to determine max cpu" |
618 | 0 | " (sched_getaffinity: %s); guessing...", |
619 | 0 | strerror(errno)); |
620 | 0 | n = sizeof(cpu_set_t); |
621 | 0 | break; |
622 | 0 | } |
623 | 0 | } |
624 | 2 | } while (n < 0); |
625 | 2 | numa_bitmask_free(buffer); |
626 | 2 | errno = olde; |
627 | 2 | cpumask_sz = n*8; |
628 | 2 | } |
629 | | |
630 | | /* |
631 | | * get the total (configured) number of cpus - both online and offline |
632 | | */ |
633 | | static void |
634 | | set_configured_cpus(void) |
635 | 2 | { |
636 | 2 | maxconfiguredcpu = sysconf(_SC_NPROCESSORS_CONF) - 1; |
637 | 2 | if (maxconfiguredcpu == -1) |
638 | 0 | numa_error("sysconf(NPROCESSORS_CONF) failed"); |
639 | 2 | } |
640 | | |
641 | | static void |
642 | | set_preferred_many(void) |
643 | 0 | { |
644 | 0 | int oldp; |
645 | 0 | struct bitmask *bmp, *tmp; |
646 | 0 | int old_errno; |
647 | |
|
648 | 0 | if (has_preferred_many >= 0) |
649 | 0 | return; |
650 | | |
651 | 0 | old_errno = errno; |
652 | |
|
653 | 0 | has_preferred_many = 0; |
654 | |
|
655 | 0 | bmp = numa_allocate_nodemask(); |
656 | 0 | tmp = numa_get_mems_allowed(); |
657 | 0 | if (!tmp || !bmp) |
658 | 0 | goto out; |
659 | | |
660 | 0 | if (get_mempolicy(&oldp, bmp->maskp, bmp->size + 1, 0, 0) < 0) |
661 | 0 | goto out; |
662 | | |
663 | 0 | if (set_mempolicy(MPOL_PREFERRED_MANY, tmp->maskp, tmp->size) == 0) { |
664 | 0 | has_preferred_many = 1; |
665 | | /* reset the old memory policy ignoring error */ |
666 | 0 | (void)set_mempolicy(oldp, bmp->maskp, bmp->size+1); |
667 | 0 | } |
668 | |
|
669 | 0 | out: |
670 | 0 | numa_bitmask_free(tmp); |
671 | 0 | numa_bitmask_free(bmp); |
672 | 0 | errno = old_errno; |
673 | 0 | } |
674 | | |
675 | | /* |
676 | | * Initialize all the sizes. |
677 | | */ |
678 | | static void |
679 | | set_sizes(void) |
680 | 2 | { |
681 | 2 | sizes_set++; |
682 | 2 | set_nodemask_size(); /* size of kernel nodemask_t */ |
683 | 2 | set_configured_nodes(); /* configured nodes listed in /sys */ |
684 | 2 | set_numa_max_cpu(); /* size of kernel cpumask_t */ |
685 | 2 | set_configured_cpus(); /* cpus listed in /sys/devices/system/cpu */ |
686 | 2 | set_task_constraints(); /* cpus and nodes for current task */ |
687 | 2 | } |
688 | | |
689 | | int |
690 | | numa_num_configured_nodes(void) |
691 | 744 | { |
692 | | /* |
693 | | * NOTE: this function's behavior matches the documentation (ie: it |
694 | | * returns a count of nodes with memory) despite the poor function |
695 | | * naming. We also cannot use the similarly poorly named |
696 | | * numa_all_nodes_ptr as it only tracks nodes with memory from which |
697 | | * the calling process can allocate. Think sparse nodes, memory-less |
698 | | * nodes, cpusets... |
699 | | */ |
700 | 744 | int memnodecount=0, i; |
701 | | |
702 | 1.48k | for (i=0; i <= maxconfigurednode; i++) { |
703 | 744 | if (numa_bitmask_isbitset(numa_memnode_ptr, i)) |
704 | 744 | memnodecount++; |
705 | 744 | } |
706 | 744 | return memnodecount; |
707 | 744 | } |
708 | | |
709 | | int |
710 | | numa_num_configured_cpus(void) |
711 | 742 | { |
712 | | |
713 | 742 | return maxconfiguredcpu+1; |
714 | 742 | } |
715 | | |
716 | | int |
717 | | numa_num_possible_nodes(void) |
718 | 751 | { |
719 | 751 | return nodemask_sz; |
720 | 751 | } |
721 | | |
722 | | int |
723 | | numa_num_possible_cpus(void) |
724 | 749 | { |
725 | 749 | return cpumask_sz; |
726 | 749 | } |
727 | | |
728 | | int |
729 | | numa_num_task_nodes(void) |
730 | 0 | { |
731 | 0 | return numprocnode; |
732 | 0 | } |
733 | | |
734 | | /* |
735 | | * for backward compatibility |
736 | | */ |
737 | | int |
738 | | numa_num_thread_nodes(void) |
739 | 0 | { |
740 | 0 | return numa_num_task_nodes(); |
741 | 0 | } |
742 | | |
743 | | int |
744 | | numa_num_task_cpus(void) |
745 | 0 | { |
746 | 0 | return numproccpu; |
747 | 0 | } |
748 | | |
749 | | /* |
750 | | * for backward compatibility |
751 | | */ |
752 | | int |
753 | | numa_num_thread_cpus(void) |
754 | 0 | { |
755 | 0 | return numa_num_task_cpus(); |
756 | 0 | } |
757 | | |
758 | | /* |
759 | | * Return the number of the highest node in this running system, |
760 | | */ |
761 | | int |
762 | | numa_max_node(void) |
763 | 207 | { |
764 | 207 | return maxconfigurednode; |
765 | 207 | } |
766 | | |
767 | | make_internal_alias(numa_max_node); |
768 | | |
769 | | /* |
770 | | * Return the number of the highest possible node in a system, |
771 | | * which for v1 is the size of a numa.h nodemask_t(in bits)-1. |
772 | | * but for v2 is the size of a kernel nodemask_t(in bits)-1. |
773 | | */ |
774 | | SYMVER("numa_max_possible_node_v1", "numa_max_possible_node@libnuma_1.1") |
775 | | int |
776 | | numa_max_possible_node_v1(void) |
777 | 0 | { |
778 | 0 | return ((sizeof(nodemask_t)*8)-1); |
779 | 0 | } |
780 | | |
781 | | SYMVER("numa_max_possible_node_v2", "numa_max_possible_node@@libnuma_1.2") |
782 | | int |
783 | | numa_max_possible_node_v2(void) |
784 | 751 | { |
785 | 751 | return numa_num_possible_nodes()-1; |
786 | 751 | } |
787 | | |
788 | | make_internal_alias(numa_max_possible_node_v1); |
789 | | make_internal_alias(numa_max_possible_node_v2); |
790 | | |
791 | | /* |
792 | | * Allocate a bitmask for cpus, of a size large enough to |
793 | | * match the kernel's cpumask_t. |
794 | | */ |
795 | | struct bitmask * |
796 | | numa_allocate_cpumask() |
797 | 749 | { |
798 | 749 | int ncpus = numa_num_possible_cpus(); |
799 | | |
800 | 749 | return numa_bitmask_alloc(ncpus); |
801 | 749 | } |
802 | | |
803 | | /* |
804 | | * Allocate a bitmask the size of a libnuma nodemask_t |
805 | | */ |
806 | | static struct bitmask * |
807 | | allocate_nodemask_v1(void) |
808 | 0 | { |
809 | 0 | int nnodes = numa_max_possible_node_v1_int()+1; |
810 | |
|
811 | 0 | return numa_bitmask_alloc(nnodes); |
812 | 0 | } |
813 | | |
814 | | /* |
815 | | * Allocate a bitmask for nodes, of a size large enough to |
816 | | * match the kernel's nodemask_t. |
817 | | */ |
818 | | struct bitmask * |
819 | | numa_allocate_nodemask(void) |
820 | 750 | { |
821 | 750 | struct bitmask *bmp; |
822 | 750 | int nnodes = numa_max_possible_node_v2_int() + 1; |
823 | | |
824 | 750 | bmp = numa_bitmask_alloc(nnodes); |
825 | 750 | return bmp; |
826 | 750 | } |
827 | | |
828 | | /* (cache the result?) */ |
829 | | long long numa_node_size64(int node, long long *freep) |
830 | 0 | { |
831 | 0 | size_t len = 0; |
832 | 0 | char *line = NULL; |
833 | 0 | long long size = -1; |
834 | 0 | FILE *f; |
835 | 0 | char fn[64]; |
836 | 0 | int ok = 0; |
837 | 0 | int required = freep ? 2 : 1; |
838 | |
|
839 | 0 | if (freep) |
840 | 0 | *freep = 0; |
841 | 0 | sprintf(fn,"/sys/devices/system/node/node%d/meminfo", node); |
842 | 0 | f = fopen(fn, "r"); |
843 | 0 | if (!f) |
844 | 0 | return -1; |
845 | 0 | while (getdelim(&line, &len, '\n', f) > 0) { |
846 | 0 | char *end; |
847 | 0 | char *s = strcasestr(line, "kB"); |
848 | 0 | if (!s) |
849 | 0 | continue; |
850 | 0 | --s; |
851 | 0 | while (s > line && isspace(*s)) |
852 | 0 | --s; |
853 | 0 | while (s > line && isdigit(*s)) |
854 | 0 | --s; |
855 | 0 | if (strstr(line, "MemTotal")) { |
856 | 0 | size = strtoull(s,&end,0) << 10; |
857 | 0 | if (end == s) |
858 | 0 | size = -1; |
859 | 0 | else |
860 | 0 | ok++; |
861 | 0 | } |
862 | 0 | if (freep && strstr(line, "MemFree")) { |
863 | 0 | *freep = strtoull(s,&end,0) << 10; |
864 | 0 | if (end == s) |
865 | 0 | *freep = -1; |
866 | 0 | else |
867 | 0 | ok++; |
868 | 0 | } |
869 | 0 | } |
870 | 0 | fclose(f); |
871 | 0 | free(line); |
872 | 0 | if (ok != required) |
873 | 0 | numa_warn(W_badmeminfo, "Cannot parse sysfs meminfo (%d)", ok); |
874 | 0 | return size; |
875 | 0 | } |
876 | | |
877 | | make_internal_alias(numa_node_size64); |
878 | | |
879 | | long numa_node_size(int node, long *freep) |
880 | 0 | { |
881 | 0 | long long f2 = 0; |
882 | 0 | long sz = numa_node_size64_int(node, &f2); |
883 | 0 | if (freep) |
884 | 0 | *freep = f2; |
885 | 0 | return sz; |
886 | 0 | } |
887 | | |
888 | | int numa_available(void) |
889 | 0 | { |
890 | 0 | if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && (errno == ENOSYS || errno == EPERM)) |
891 | 0 | return -1; |
892 | 0 | return 0; |
893 | 0 | } |
894 | | |
895 | | SYMVER("numa_interleave_memory_v1", "numa_interleave_memory@libnuma_1.1") |
896 | | void |
897 | | numa_interleave_memory_v1(void *mem, size_t size, const nodemask_t *mask) |
898 | 0 | { |
899 | 0 | struct bitmask bitmask; |
900 | |
|
901 | 0 | bitmask.size = sizeof(nodemask_t) * 8; |
902 | 0 | bitmask.maskp = (unsigned long *)mask; |
903 | 0 | dombind(mem, size, MPOL_INTERLEAVE, &bitmask); |
904 | 0 | } |
905 | | |
906 | | SYMVER("numa_interleave_memory_v2", "numa_interleave_memory@@libnuma_1.2") |
907 | | void |
908 | | numa_interleave_memory_v2(void *mem, size_t size, struct bitmask *bmp) |
909 | 0 | { |
910 | 0 | dombind(mem, size, MPOL_INTERLEAVE, bmp); |
911 | 0 | } |
912 | | |
913 | | void |
914 | | numa_weighted_interleave_memory(void *mem, size_t size, struct bitmask *bmp) |
915 | 0 | { |
916 | 0 | dombind(mem, size, MPOL_WEIGHTED_INTERLEAVE, bmp); |
917 | 0 | } |
918 | | |
919 | | void numa_tonode_memory(void *mem, size_t size, int node) |
920 | 0 | { |
921 | 0 | struct bitmask *nodes; |
922 | |
|
923 | 0 | nodes = numa_allocate_nodemask(); |
924 | 0 | if (!nodes) |
925 | 0 | return; |
926 | 0 | numa_bitmask_setbit(nodes, node); |
927 | 0 | dombind(mem, size, bind_policy, nodes); |
928 | 0 | numa_bitmask_free(nodes); |
929 | 0 | } |
930 | | |
931 | | SYMVER("numa_tonodemask_memory_v1", "numa_tonodemask_memory@libnuma_1.1") |
932 | | void |
933 | | numa_tonodemask_memory_v1(void *mem, size_t size, const nodemask_t *mask) |
934 | 0 | { |
935 | 0 | struct bitmask bitmask; |
936 | |
|
937 | 0 | bitmask.maskp = (unsigned long *)mask; |
938 | 0 | bitmask.size = sizeof(nodemask_t); |
939 | 0 | dombind(mem, size, bind_policy, &bitmask); |
940 | 0 | } |
941 | | |
942 | | SYMVER("numa_tonodemask_memory_v2", "numa_tonodemask_memory@@libnuma_1.2") |
943 | | void |
944 | | numa_tonodemask_memory_v2(void *mem, size_t size, struct bitmask *bmp) |
945 | 0 | { |
946 | 0 | dombind(mem, size, bind_policy, bmp); |
947 | 0 | } |
948 | | |
949 | | void numa_setlocal_memory(void *mem, size_t size) |
950 | 0 | { |
951 | 0 | dombind(mem, size, MPOL_LOCAL, NULL); |
952 | 0 | } |
953 | | |
954 | | void numa_police_memory(void *mem, size_t size) |
955 | 0 | { |
956 | 0 | int pagesize = numa_pagesize_int(); |
957 | 0 | unsigned long i; |
958 | 0 | char *p = mem; |
959 | 0 | for (i = 0; i < size; i += pagesize, p += pagesize) |
960 | 0 | __atomic_and_fetch(p, 0xff, __ATOMIC_RELAXED); |
961 | |
|
962 | 0 | } |
963 | | |
964 | | make_internal_alias(numa_police_memory); |
965 | | |
966 | | void *numa_alloc(size_t size) |
967 | 0 | { |
968 | 0 | char *mem; |
969 | 0 | mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
970 | 0 | 0, 0); |
971 | 0 | if (mem == (char *)-1) |
972 | 0 | return NULL; |
973 | 0 | numa_police_memory_int(mem, size); |
974 | 0 | return mem; |
975 | 0 | } |
976 | | |
977 | | void *numa_realloc(void *old_addr, size_t old_size, size_t new_size) |
978 | 0 | { |
979 | 0 | char *mem; |
980 | 0 | mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE); |
981 | 0 | if (mem == (char *)-1) |
982 | 0 | return NULL; |
983 | | /* |
984 | | * The memory policy of the allocated pages is preserved by mremap(), so |
985 | | * there is no need to (re)set it here. If the policy of the original |
986 | | * allocation is not set, the new pages will be allocated according to the |
987 | | * process' mempolicy. Trying to allocate explicitly the new pages on the |
988 | | * same node as the original ones would require changing the policy of the |
989 | | * newly allocated pages, which violates the numa_realloc() semantics. |
990 | | */ |
991 | 0 | return mem; |
992 | 0 | } |
993 | | |
994 | | SYMVER("numa_alloc_interleaved_subset_v1", "numa_alloc_interleaved_subset@libnuma_1.1") |
995 | | void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask) |
996 | 0 | { |
997 | 0 | char *mem; |
998 | 0 | struct bitmask bitmask; |
999 | |
|
1000 | 0 | mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
1001 | 0 | 0, 0); |
1002 | 0 | if (mem == (char *)-1) |
1003 | 0 | return NULL; |
1004 | 0 | bitmask.maskp = (unsigned long *)mask; |
1005 | 0 | bitmask.size = sizeof(nodemask_t); |
1006 | 0 | mem = dombind_or_free(mem, size, MPOL_INTERLEAVE, &bitmask); |
1007 | 0 | return mem; |
1008 | 0 | } |
1009 | | |
1010 | | SYMVER("numa_alloc_interleaved_subset_v2", "numa_alloc_interleaved_subset@@libnuma_1.2") |
1011 | | void *numa_alloc_interleaved_subset_v2(size_t size, struct bitmask *bmp) |
1012 | 0 | { |
1013 | 0 | char *mem; |
1014 | |
|
1015 | 0 | mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
1016 | 0 | 0, 0); |
1017 | 0 | if (mem == (char *)-1) |
1018 | 0 | return NULL; |
1019 | 0 | mem = dombind_or_free(mem, size, MPOL_INTERLEAVE, bmp); |
1020 | 0 | return mem; |
1021 | 0 | } |
1022 | | |
1023 | | make_internal_alias(numa_alloc_interleaved_subset_v1); |
1024 | | make_internal_alias(numa_alloc_interleaved_subset_v2); |
1025 | | |
1026 | | void * |
1027 | | numa_alloc_interleaved(size_t size) |
1028 | 0 | { |
1029 | 0 | return numa_alloc_interleaved_subset_v2_int(size, numa_all_nodes_ptr); |
1030 | 0 | } |
1031 | | |
1032 | | void * |
1033 | | numa_alloc_weighted_interleaved_subset(size_t size, struct bitmask *bmp) |
1034 | 0 | { |
1035 | 0 | char *mem; |
1036 | |
|
1037 | 0 | mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
1038 | 0 | 0, 0); |
1039 | 0 | if (mem == (char *)-1) |
1040 | 0 | return NULL; |
1041 | 0 | mem = dombind_or_free(mem, size, MPOL_WEIGHTED_INTERLEAVE, bmp); |
1042 | 0 | return mem; |
1043 | 0 | } |
1044 | | |
1045 | | void * |
1046 | | numa_alloc_weighted_interleaved(size_t size) |
1047 | 0 | { |
1048 | 0 | return numa_alloc_weighted_interleaved_subset(size, numa_all_nodes_ptr); |
1049 | 0 | } |
1050 | | |
1051 | | /* |
1052 | | * given a user node mask, set memory policy to use those nodes |
1053 | | */ |
1054 | | SYMVER("numa_set_interleave_mask_v1", "numa_set_interleave_mask@libnuma_1.1") |
1055 | | void |
1056 | | numa_set_interleave_mask_v1(nodemask_t *mask) |
1057 | 0 | { |
1058 | 0 | struct bitmask *bmp; |
1059 | 0 | int nnodes = numa_max_possible_node_v1_int()+1; |
1060 | |
|
1061 | 0 | bmp = numa_bitmask_alloc(nnodes); |
1062 | 0 | copy_nodemask_to_bitmask(mask, bmp); |
1063 | 0 | if (numa_bitmask_equal(bmp, numa_no_nodes_ptr)) |
1064 | 0 | setpol(MPOL_DEFAULT, bmp); |
1065 | 0 | else |
1066 | 0 | setpol(MPOL_INTERLEAVE, bmp); |
1067 | 0 | numa_bitmask_free(bmp); |
1068 | 0 | } |
1069 | | |
1070 | | |
1071 | | SYMVER("numa_set_interleave_mask_v2", "numa_set_interleave_mask@@libnuma_1.2") |
1072 | | void |
1073 | | numa_set_interleave_mask_v2(struct bitmask *bmp) |
1074 | 0 | { |
1075 | 0 | if (numa_bitmask_equal(bmp, numa_no_nodes_ptr)) |
1076 | 0 | setpol(MPOL_DEFAULT, bmp); |
1077 | 0 | else |
1078 | 0 | setpol(MPOL_INTERLEAVE, bmp); |
1079 | 0 | } |
1080 | | |
1081 | | void |
1082 | | numa_set_weighted_interleave_mask(struct bitmask *bmp) |
1083 | 0 | { |
1084 | 0 | if (numa_bitmask_equal(bmp, numa_no_nodes_ptr)) |
1085 | 0 | setpol(MPOL_DEFAULT, bmp); |
1086 | 0 | else |
1087 | 0 | setpol(MPOL_WEIGHTED_INTERLEAVE, bmp); |
1088 | 0 | } |
1089 | | |
1090 | | SYMVER("numa_get_interleave_mask_v1", "numa_get_interleave_mask@libnuma_1.1") |
1091 | | nodemask_t |
1092 | | numa_get_interleave_mask_v1(void) |
1093 | 0 | { |
1094 | 0 | int oldpolicy = 0; |
1095 | 0 | struct bitmask *bmp; |
1096 | 0 | nodemask_t mask; |
1097 | |
|
1098 | 0 | bmp = allocate_nodemask_v1(); |
1099 | 0 | if (!bmp) |
1100 | 0 | return numa_no_nodes; |
1101 | 0 | getpol(&oldpolicy, bmp); |
1102 | 0 | if (oldpolicy == MPOL_INTERLEAVE) |
1103 | 0 | copy_bitmask_to_nodemask(bmp, &mask); |
1104 | 0 | else |
1105 | 0 | copy_bitmask_to_nodemask(numa_no_nodes_ptr, &mask); |
1106 | 0 | numa_bitmask_free(bmp); |
1107 | 0 | return mask; |
1108 | 0 | } |
1109 | | |
1110 | | SYMVER("numa_get_interleave_mask_v2", "numa_get_interleave_mask@@libnuma_1.2") |
1111 | | struct bitmask * |
1112 | | numa_get_interleave_mask_v2(void) |
1113 | 0 | { |
1114 | 0 | int oldpolicy = 0; |
1115 | 0 | struct bitmask *bmp; |
1116 | |
|
1117 | 0 | bmp = numa_allocate_nodemask(); |
1118 | 0 | if (!bmp) |
1119 | 0 | return NULL; |
1120 | 0 | getpol(&oldpolicy, bmp); |
1121 | 0 | if (oldpolicy != MPOL_INTERLEAVE) |
1122 | 0 | copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp); |
1123 | 0 | return bmp; |
1124 | 0 | } |
1125 | | |
1126 | | struct bitmask * |
1127 | | numa_get_weighted_interleave_mask(void) |
1128 | 0 | { |
1129 | 0 | int oldpolicy = 0; |
1130 | 0 | struct bitmask *bmp; |
1131 | |
|
1132 | 0 | bmp = numa_allocate_nodemask(); |
1133 | 0 | if (!bmp) |
1134 | 0 | return NULL; |
1135 | 0 | getpol(&oldpolicy, bmp); |
1136 | 0 | if (oldpolicy != MPOL_WEIGHTED_INTERLEAVE) |
1137 | 0 | copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp); |
1138 | 0 | return bmp; |
1139 | 0 | } |
1140 | | |
1141 | | /* (undocumented) */ |
1142 | | int numa_get_interleave_node(void) |
1143 | 0 | { |
1144 | 0 | int nd; |
1145 | 0 | if (get_mempolicy(&nd, NULL, 0, 0, MPOL_F_NODE) == 0) |
1146 | 0 | return nd; |
1147 | 0 | return 0; |
1148 | 0 | } |
1149 | | |
1150 | | void *numa_alloc_onnode(size_t size, int node) |
1151 | 0 | { |
1152 | 0 | char *mem; |
1153 | 0 | struct bitmask *bmp; |
1154 | |
|
1155 | 0 | bmp = numa_allocate_nodemask(); |
1156 | 0 | if (!bmp) |
1157 | 0 | return NULL; |
1158 | 0 | numa_bitmask_setbit(bmp, node); |
1159 | 0 | mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
1160 | 0 | 0, 0); |
1161 | 0 | if (mem == (char *)-1) |
1162 | 0 | mem = NULL; |
1163 | 0 | else |
1164 | 0 | mem = dombind_or_free(mem, size, bind_policy, bmp); |
1165 | |
|
1166 | 0 | numa_bitmask_free(bmp); |
1167 | 0 | return mem; |
1168 | 0 | } |
1169 | | |
1170 | | void *numa_alloc_local(size_t size) |
1171 | 0 | { |
1172 | 0 | char *mem; |
1173 | 0 | mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
1174 | 0 | 0, 0); |
1175 | 0 | if (mem == (char *)-1) |
1176 | 0 | mem = NULL; |
1177 | 0 | else |
1178 | 0 | mem = dombind_or_free(mem, size, MPOL_LOCAL, NULL); |
1179 | 0 | return mem; |
1180 | 0 | } |
1181 | | |
1182 | | void numa_set_bind_policy(int strict) |
1183 | 0 | { |
1184 | 0 | set_preferred_many(); |
1185 | 0 | if (strict) |
1186 | 0 | bind_policy = MPOL_BIND; |
1187 | 0 | else if (has_preferred_many) |
1188 | 0 | bind_policy = MPOL_PREFERRED_MANY; |
1189 | 0 | else |
1190 | 0 | bind_policy = MPOL_PREFERRED; |
1191 | 0 | } |
1192 | | |
1193 | | SYMVER("numa_set_membind_v1", "numa_set_membind@libnuma_1.1") |
1194 | | void |
1195 | | numa_set_membind_v1(const nodemask_t *mask) |
1196 | 0 | { |
1197 | 0 | struct bitmask bitmask; |
1198 | |
|
1199 | 0 | bitmask.maskp = (unsigned long *)mask; |
1200 | 0 | bitmask.size = sizeof(nodemask_t); |
1201 | 0 | setpol(MPOL_BIND, &bitmask); |
1202 | 0 | } |
1203 | | |
1204 | | SYMVER("numa_set_membind_v2", "numa_set_membind@@libnuma_1.2") |
1205 | | void |
1206 | | numa_set_membind_v2(struct bitmask *bmp) |
1207 | 0 | { |
1208 | 0 | setpol(MPOL_BIND, bmp); |
1209 | 0 | } |
1210 | | |
1211 | | make_internal_alias(numa_set_membind_v2); |
1212 | | |
1213 | | void |
1214 | | numa_set_membind_balancing(struct bitmask *bmp) |
1215 | 0 | { |
1216 | | /* MPOL_F_NUMA_BALANCING: ignore if unsupported */ |
1217 | 0 | if (set_mempolicy(MPOL_BIND | MPOL_F_NUMA_BALANCING, |
1218 | 0 | bmp->maskp, bmp->size + 1) < 0) { |
1219 | 0 | if (errno == EINVAL) { |
1220 | 0 | errno = 0; |
1221 | 0 | numa_set_membind_v2(bmp); |
1222 | 0 | } else |
1223 | 0 | numa_error("set_mempolicy"); |
1224 | 0 | } |
1225 | 0 | } |
1226 | | |
1227 | | /* |
1228 | | * copy a bitmask map body to a numa.h nodemask_t structure |
1229 | | */ |
1230 | | void |
1231 | | copy_bitmask_to_nodemask(struct bitmask *bmp, nodemask_t *nmp) |
1232 | 0 | { |
1233 | 0 | int max, i; |
1234 | |
|
1235 | 0 | memset(nmp, 0, sizeof(nodemask_t)); |
1236 | 0 | max = (sizeof(nodemask_t)*8); |
1237 | 0 | for (i=0; i<bmp->size; i++) { |
1238 | 0 | if (i >= max) |
1239 | 0 | break; |
1240 | 0 | if (numa_bitmask_isbitset(bmp, i)) |
1241 | 0 | nodemask_set_compat((nodemask_t *)nmp, i); |
1242 | 0 | } |
1243 | 0 | } |
1244 | | |
1245 | | /* |
1246 | | * copy a bitmask map body to another bitmask body |
1247 | | * fill a larger destination with zeroes |
1248 | | */ |
1249 | | void |
1250 | | copy_bitmask_to_bitmask(struct bitmask *bmpfrom, struct bitmask *bmpto) |
1251 | 210 | { |
1252 | 210 | int bytes; |
1253 | | |
1254 | 210 | if (bmpfrom->size >= bmpto->size) { |
1255 | 210 | memcpy(bmpto->maskp, bmpfrom->maskp, CPU_BYTES(bmpto->size)); |
1256 | 210 | } else if (bmpfrom->size < bmpto->size) { |
1257 | 0 | bytes = CPU_BYTES(bmpfrom->size); |
1258 | 0 | memcpy(bmpto->maskp, bmpfrom->maskp, bytes); |
1259 | 0 | memset(((char *)bmpto->maskp)+bytes, 0, |
1260 | 0 | CPU_BYTES(bmpto->size)-bytes); |
1261 | 0 | } |
1262 | 210 | } |
1263 | | |
1264 | | /* |
1265 | | * copy a numa.h nodemask_t structure to a bitmask map body |
1266 | | */ |
1267 | | void |
1268 | | copy_nodemask_to_bitmask(nodemask_t *nmp, struct bitmask *bmp) |
1269 | 0 | { |
1270 | 0 | int max, i; |
1271 | |
|
1272 | 0 | numa_bitmask_clearall(bmp); |
1273 | 0 | max = (sizeof(nodemask_t)*8); |
1274 | 0 | if (max > bmp->size) |
1275 | 0 | max = bmp->size; |
1276 | 0 | for (i=0; i<max; i++) { |
1277 | 0 | if (nodemask_isset_compat(nmp, i)) |
1278 | 0 | numa_bitmask_setbit(bmp, i); |
1279 | 0 | } |
1280 | 0 | } |
1281 | | |
1282 | | SYMVER("numa_get_membind_v1", "numa_get_membind@libnuma_1.1") |
1283 | | nodemask_t |
1284 | | numa_get_membind_v1(void) |
1285 | 0 | { |
1286 | 0 | int oldpolicy = 0; |
1287 | 0 | struct bitmask *bmp; |
1288 | 0 | nodemask_t nmp; |
1289 | |
|
1290 | 0 | bmp = allocate_nodemask_v1(); |
1291 | 0 | if (!bmp) |
1292 | 0 | return numa_no_nodes; |
1293 | 0 | getpol(&oldpolicy, bmp); |
1294 | 0 | if (oldpolicy == MPOL_BIND) { |
1295 | 0 | copy_bitmask_to_nodemask(bmp, &nmp); |
1296 | 0 | } else { |
1297 | | /* copy the body of the map to numa_all_nodes */ |
1298 | 0 | copy_bitmask_to_nodemask(bmp, &numa_all_nodes); |
1299 | 0 | nmp = numa_all_nodes; |
1300 | 0 | } |
1301 | 0 | numa_bitmask_free(bmp); |
1302 | 0 | return nmp; |
1303 | 0 | } |
1304 | | |
1305 | | SYMVER("numa_get_membind_v2", "numa_get_membind@@libnuma_1.2") |
1306 | | struct bitmask * |
1307 | | numa_get_membind_v2(void) |
1308 | 0 | { |
1309 | 0 | int oldpolicy = 0; |
1310 | 0 | struct bitmask *bmp = NULL; |
1311 | |
|
1312 | 0 | bmp = numa_allocate_nodemask(); |
1313 | 0 | if (!bmp) |
1314 | 0 | return NULL; |
1315 | 0 | getpol(&oldpolicy, bmp); |
1316 | 0 | if (oldpolicy != MPOL_BIND) |
1317 | 0 | copy_bitmask_to_bitmask(numa_all_nodes_ptr, bmp); |
1318 | 0 | return bmp; |
1319 | 0 | } |
1320 | | |
1321 | | //TODO: do we need a v1 nodemask_t version? |
1322 | | struct bitmask *numa_get_mems_allowed(void) |
1323 | 0 | { |
1324 | 0 | struct bitmask *bmp; |
1325 | | |
1326 | | /* |
1327 | | * can change, so query on each call. |
1328 | | */ |
1329 | 0 | bmp = numa_allocate_nodemask(); |
1330 | 0 | if (!bmp) |
1331 | 0 | return NULL; |
1332 | 0 | if (get_mempolicy(NULL, bmp->maskp, bmp->size + 1, 0, |
1333 | 0 | MPOL_F_MEMS_ALLOWED) < 0) |
1334 | 0 | numa_error("get_mempolicy"); |
1335 | 0 | return bmp; |
1336 | 0 | } |
1337 | | make_internal_alias(numa_get_mems_allowed); |
1338 | | |
1339 | | void numa_free(void *mem, size_t size) |
1340 | 0 | { |
1341 | 0 | munmap(mem, size); |
1342 | 0 | } |
1343 | | |
1344 | | SYMVER("numa_parse_bitmap_v1", "numa_parse_bitmap@libnuma_1.1") |
1345 | | int |
1346 | | numa_parse_bitmap_v1(char *line, unsigned long *mask, int ncpus) |
1347 | 0 | { |
1348 | 0 | int i; |
1349 | 0 | char *p = strchr(line, '\n'); |
1350 | 0 | if (!p) |
1351 | 0 | return -1; |
1352 | | |
1353 | 0 | for (i = 0; p > line;i++) { |
1354 | 0 | char *oldp, *endp; |
1355 | 0 | oldp = p; |
1356 | 0 | if (*p == ',') |
1357 | 0 | --p; |
1358 | 0 | while (p > line && *p != ',') |
1359 | 0 | --p; |
1360 | | /* Eat two 32bit fields at a time to get longs */ |
1361 | 0 | if (p > line && sizeof(unsigned long) == 8) { |
1362 | 0 | oldp--; |
1363 | 0 | memmove(p, p+1, oldp-p+1); |
1364 | 0 | while (p > line && *p != ',') |
1365 | 0 | --p; |
1366 | 0 | } |
1367 | 0 | if (*p == ',') |
1368 | 0 | p++; |
1369 | 0 | if (i >= CPU_LONGS(ncpus)) |
1370 | 0 | return -1; |
1371 | 0 | mask[i] = strtoul(p, &endp, 16); |
1372 | 0 | if (endp != oldp) |
1373 | 0 | return -1; |
1374 | 0 | p--; |
1375 | 0 | } |
1376 | 0 | return 0; |
1377 | 0 | } |
1378 | | |
1379 | | SYMVER("numa_parse_bitmap_v2", "numa_parse_bitmap@@libnuma_1.2") |
1380 | | int |
1381 | | numa_parse_bitmap_v2(char *line, struct bitmask *mask) |
1382 | 1 | { |
1383 | 1 | int i, ncpus; |
1384 | 1 | char *p = strchr(line, '\n'); |
1385 | 1 | if (!p) |
1386 | 0 | return -1; |
1387 | 1 | ncpus = mask->size; |
1388 | | |
1389 | 2 | for (i = 0; p > line;i++) { |
1390 | 1 | char *oldp, *endp; |
1391 | 1 | oldp = p; |
1392 | 1 | if (*p == ',') |
1393 | 0 | --p; |
1394 | 9 | while (p > line && *p != ',') |
1395 | 8 | --p; |
1396 | | /* Eat two 32bit fields at a time to get longs */ |
1397 | 1 | if (p > line && sizeof(unsigned long) == 8) { |
1398 | 0 | oldp--; |
1399 | 0 | memmove(p, p+1, oldp-p+1); |
1400 | 0 | while (p > line && *p != ',') |
1401 | 0 | --p; |
1402 | 0 | } |
1403 | 1 | if (*p == ',') |
1404 | 0 | p++; |
1405 | 1 | if (i >= CPU_LONGS(ncpus)) |
1406 | 0 | return -1; |
1407 | 1 | mask->maskp[i] = strtoul(p, &endp, 16); |
1408 | 1 | if (endp != oldp) |
1409 | 0 | return -1; |
1410 | 1 | p--; |
1411 | 1 | } |
1412 | 1 | return 0; |
1413 | 1 | } |
1414 | | |
1415 | | static void init_node_cpu_mask_v2(void) |
1416 | 1 | { |
1417 | 1 | int nnodes = numa_max_possible_node_v2_int() + 1; |
1418 | 1 | struct bitmask **new_ncm, **null_ncm = NULL; |
1419 | 1 | new_ncm = calloc (nnodes, sizeof(struct bitmask *)); |
1420 | | /* Check for races with another thread */ |
1421 | 1 | if (new_ncm && !__atomic_compare_exchange_n(&node_cpu_mask_v2, &null_ncm, |
1422 | 1 | new_ncm, 1, |
1423 | 1 | __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)) { |
1424 | 0 | free(new_ncm); |
1425 | 0 | } |
1426 | 1 | } |
1427 | | |
1428 | | static void cleanup_node_cpu_mask_v2(void) |
1429 | 0 | { |
1430 | 0 | if (node_cpu_mask_v2) { |
1431 | 0 | int i; |
1432 | 0 | int nnodes; |
1433 | 0 | nnodes = numa_max_possible_node_v2_int() + 1; |
1434 | 0 | for (i = 0; i < nnodes; i++) { |
1435 | 0 | FREE_AND_ZERO(node_cpu_mask_v2[i]); |
1436 | 0 | } |
1437 | 0 | free(node_cpu_mask_v2); |
1438 | 0 | node_cpu_mask_v2 = NULL; |
1439 | 0 | } |
1440 | 0 | } |
1441 | | |
1442 | | /* This would be better with some locking, but I don't want to make libnuma |
1443 | | dependent on pthreads right now. The races are relatively harmless. */ |
1444 | | SYMVER("numa_node_to_cpus_v1", "numa_node_to_cpus@libnuma_1.1") |
1445 | | int |
1446 | | numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen) |
1447 | 0 | { |
1448 | 0 | int err = 0; |
1449 | 0 | char fn[64]; |
1450 | 0 | FILE *f; |
1451 | 0 | char update; |
1452 | 0 | char *line = NULL; |
1453 | 0 | size_t len = 0; |
1454 | 0 | struct bitmask bitmask; |
1455 | 0 | int buflen_needed; |
1456 | 0 | unsigned long *mask; |
1457 | 0 | int ncpus = numa_num_possible_cpus(); |
1458 | 0 | int maxnode = numa_max_node_int(); |
1459 | |
|
1460 | 0 | buflen_needed = CPU_BYTES(ncpus); |
1461 | 0 | if ((unsigned)node > maxnode || bufferlen < buflen_needed) { |
1462 | 0 | errno = ERANGE; |
1463 | 0 | return -1; |
1464 | 0 | } |
1465 | 0 | if (bufferlen > buflen_needed) |
1466 | 0 | memset(buffer, 0, bufferlen); |
1467 | 0 | update = __atomic_fetch_and(&node_cpu_mask_v1_stale, 0, __ATOMIC_RELAXED); |
1468 | 0 | if (node_cpu_mask_v1[node] && !update) { |
1469 | 0 | memcpy(buffer, node_cpu_mask_v1[node], buflen_needed); |
1470 | 0 | return 0; |
1471 | 0 | } |
1472 | | |
1473 | 0 | mask = malloc(buflen_needed); |
1474 | 0 | if (!mask) |
1475 | 0 | mask = (unsigned long *)buffer; |
1476 | 0 | memset(mask, 0, buflen_needed); |
1477 | |
|
1478 | 0 | sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node); |
1479 | 0 | f = fopen(fn, "r"); |
1480 | 0 | if (!f || getdelim(&line, &len, '\n', f) < 1) { |
1481 | 0 | if (numa_bitmask_isbitset(numa_nodes_ptr, node)) { |
1482 | 0 | numa_warn(W_nosysfs2, |
1483 | 0 | "/sys not mounted or invalid. Assuming one node: %s", |
1484 | 0 | strerror(errno)); |
1485 | 0 | numa_warn(W_nosysfs2, |
1486 | 0 | "(cannot open or correctly parse %s)", fn); |
1487 | 0 | } |
1488 | 0 | bitmask.maskp = (unsigned long *)mask; |
1489 | 0 | bitmask.size = buflen_needed * 8; |
1490 | 0 | numa_bitmask_setall(&bitmask); |
1491 | 0 | err = -1; |
1492 | 0 | } |
1493 | 0 | if (f) |
1494 | 0 | fclose(f); |
1495 | |
|
1496 | 0 | if (line && (numa_parse_bitmap_v1(line, mask, ncpus) < 0)) { |
1497 | 0 | numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node"); |
1498 | 0 | bitmask.maskp = (unsigned long *)mask; |
1499 | 0 | bitmask.size = buflen_needed * 8; |
1500 | 0 | numa_bitmask_setall(&bitmask); |
1501 | 0 | err = -1; |
1502 | 0 | } |
1503 | |
|
1504 | 0 | free(line); |
1505 | 0 | memmove(buffer, mask, buflen_needed); |
1506 | | |
1507 | | /* slightly racy, see above */ |
1508 | 0 | if (node_cpu_mask_v1[node]) { |
1509 | 0 | if (update) { |
1510 | | /* |
1511 | | * There may be readers on node_cpu_mask_v1[], hence it can not |
1512 | | * be freed. |
1513 | | */ |
1514 | 0 | memcpy(node_cpu_mask_v1[node], mask, buflen_needed); |
1515 | 0 | free(mask); |
1516 | 0 | mask = NULL; |
1517 | 0 | } else if (mask != buffer) |
1518 | 0 | free(mask); |
1519 | 0 | } else { |
1520 | 0 | node_cpu_mask_v1[node] = mask; |
1521 | 0 | } |
1522 | 0 | return err; |
1523 | 0 | } |
1524 | | |
1525 | | /* |
1526 | | * test whether a node has cpus |
1527 | | */ |
1528 | | /* This would be better with some locking, but I don't want to make libnuma |
1529 | | dependent on pthreads right now. The races are relatively harmless. */ |
1530 | | /* |
1531 | | * deliver a bitmask of cpus representing the cpus on a given node |
1532 | | */ |
1533 | | SYMVER("numa_node_to_cpus_v2", "numa_node_to_cpus@@libnuma_1.2") |
1534 | | int |
1535 | | numa_node_to_cpus_v2(int node, struct bitmask *buffer) |
1536 | 207 | { |
1537 | 207 | int err = 0; |
1538 | 207 | int nnodes = numa_max_node(); |
1539 | 207 | char fn[64], *line = NULL; |
1540 | 207 | FILE *f; |
1541 | 207 | char update; |
1542 | 207 | size_t len = 0; |
1543 | 207 | struct bitmask *mask; |
1544 | | |
1545 | 207 | if (!__atomic_load_n(&node_cpu_mask_v2, __ATOMIC_CONSUME)) |
1546 | 1 | init_node_cpu_mask_v2(); |
1547 | | |
1548 | 207 | if (node > nnodes) { |
1549 | 0 | errno = ERANGE; |
1550 | 0 | return -1; |
1551 | 0 | } |
1552 | 207 | numa_bitmask_clearall(buffer); |
1553 | | |
1554 | 207 | update = __atomic_fetch_and(&node_cpu_mask_v2_stale, 0, __ATOMIC_RELAXED); |
1555 | 207 | if (node_cpu_mask_v2[node] && !update) { |
1556 | | /* have already constructed a mask for this node */ |
1557 | 206 | if (buffer->size < node_cpu_mask_v2[node]->size) { |
1558 | 0 | errno = EINVAL; |
1559 | 0 | numa_error("map size mismatch"); |
1560 | 0 | return -1; |
1561 | 0 | } |
1562 | 206 | copy_bitmask_to_bitmask(node_cpu_mask_v2[node], buffer); |
1563 | 206 | return 0; |
1564 | 206 | } |
1565 | | |
1566 | | /* need a new mask for this node */ |
1567 | 1 | mask = numa_allocate_cpumask(); |
1568 | 1 | if (!mask) |
1569 | 0 | return -1; |
1570 | | |
1571 | | /* this is a kernel cpumask_t (see node_read_cpumap()) */ |
1572 | 1 | sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node); |
1573 | 1 | f = fopen(fn, "r"); |
1574 | 1 | if (!f || getdelim(&line, &len, '\n', f) < 1) { |
1575 | 0 | if (numa_bitmask_isbitset(numa_nodes_ptr, node)) { |
1576 | 0 | numa_warn(W_nosysfs2, |
1577 | 0 | "/sys not mounted or invalid. Assuming one node: %s", |
1578 | 0 | strerror(errno)); |
1579 | 0 | numa_warn(W_nosysfs2, |
1580 | 0 | "(cannot open or correctly parse %s)", fn); |
1581 | 0 | } |
1582 | 0 | numa_bitmask_setall(mask); |
1583 | 0 | err = -1; |
1584 | 0 | } |
1585 | 1 | if (f) |
1586 | 1 | fclose(f); |
1587 | | |
1588 | 1 | if (line && (numa_parse_bitmap_v2(line, mask) < 0)) { |
1589 | 0 | numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node"); |
1590 | 0 | numa_bitmask_setall(mask); |
1591 | 0 | err = -1; |
1592 | 0 | } |
1593 | | |
1594 | 1 | free(line); |
1595 | 1 | copy_bitmask_to_bitmask(mask, buffer); |
1596 | | |
1597 | | /* slightly racy, see above */ |
1598 | | /* save the mask we created */ |
1599 | 1 | if (node_cpu_mask_v2[node]) { |
1600 | 0 | if (update) { |
1601 | 0 | copy_bitmask_to_bitmask(mask, node_cpu_mask_v2[node]); |
1602 | 0 | numa_bitmask_free(mask); |
1603 | 0 | mask = NULL; |
1604 | | /* how could this be? */ |
1605 | 0 | } else if (mask != buffer) |
1606 | 0 | numa_bitmask_free(mask); |
1607 | 1 | } else { |
1608 | | /* we don't want to cache faulty result */ |
1609 | 1 | if (!err) |
1610 | 1 | node_cpu_mask_v2[node] = mask; |
1611 | 0 | else |
1612 | 0 | numa_bitmask_free(mask); |
1613 | 1 | } |
1614 | 1 | return err; |
1615 | 1 | } |
1616 | | |
1617 | | make_internal_alias(numa_node_to_cpus_v1); |
1618 | | make_internal_alias(numa_node_to_cpus_v2); |
1619 | | |
1620 | | void numa_node_to_cpu_update(void) |
1621 | 0 | { |
1622 | 0 | __atomic_store_n(&node_cpu_mask_v1_stale, 1, __ATOMIC_RELAXED); |
1623 | 0 | __atomic_store_n(&node_cpu_mask_v2_stale, 1, __ATOMIC_RELAXED); |
1624 | 0 | } |
1625 | | |
1626 | | /* report the node of the specified cpu */ |
1627 | | int numa_node_of_cpu(int cpu) |
1628 | 0 | { |
1629 | 0 | struct bitmask *bmp; |
1630 | 0 | int ncpus, nnodes, node, ret; |
1631 | |
|
1632 | 0 | ncpus = numa_num_possible_cpus(); |
1633 | 0 | if (cpu > ncpus){ |
1634 | 0 | errno = EINVAL; |
1635 | 0 | return -1; |
1636 | 0 | } |
1637 | 0 | bmp = numa_bitmask_alloc(ncpus); |
1638 | 0 | nnodes = numa_max_node(); |
1639 | 0 | for (node = 0; node <= nnodes; node++){ |
1640 | 0 | if (numa_node_to_cpus_v2_int(node, bmp) < 0) { |
1641 | | /* It's possible for the node to not exist */ |
1642 | 0 | continue; |
1643 | 0 | } |
1644 | 0 | if (numa_bitmask_isbitset(bmp, cpu)){ |
1645 | 0 | ret = node; |
1646 | 0 | goto end; |
1647 | 0 | } |
1648 | 0 | } |
1649 | 0 | ret = -1; |
1650 | 0 | errno = EINVAL; |
1651 | 0 | end: |
1652 | 0 | numa_bitmask_free(bmp); |
1653 | 0 | return ret; |
1654 | 0 | } |
1655 | | |
1656 | | SYMVER("numa_run_on_node_mask_v1", "numa_run_on_node_mask@libnuma_1.1") |
1657 | | int |
1658 | | numa_run_on_node_mask_v1(const nodemask_t *mask) |
1659 | 0 | { |
1660 | 0 | int ncpus = numa_num_possible_cpus(); |
1661 | 0 | int i, k, err; |
1662 | 0 | unsigned long cpus[CPU_LONGS(ncpus)], nodecpus[CPU_LONGS(ncpus)]; |
1663 | 0 | memset(cpus, 0, CPU_BYTES(ncpus)); |
1664 | 0 | for (i = 0; i < NUMA_NUM_NODES; i++) { |
1665 | 0 | if (mask->n[i / BITS_PER_LONG] == 0) |
1666 | 0 | continue; |
1667 | 0 | if (nodemask_isset_compat(mask, i)) { |
1668 | 0 | if (numa_node_to_cpus_v1_int(i, nodecpus, CPU_BYTES(ncpus)) < 0) { |
1669 | 0 | numa_warn(W_noderunmask, |
1670 | 0 | "Cannot read node cpumask from sysfs"); |
1671 | 0 | continue; |
1672 | 0 | } |
1673 | 0 | for (k = 0; k < CPU_LONGS(ncpus); k++) |
1674 | 0 | cpus[k] |= nodecpus[k]; |
1675 | 0 | } |
1676 | 0 | } |
1677 | 0 | err = numa_sched_setaffinity_v1(0, CPU_BYTES(ncpus), cpus); |
1678 | | |
1679 | | /* The sched_setaffinity API is broken because it expects |
1680 | | the user to guess the kernel cpuset size. Do this in a |
1681 | | brute force way. */ |
1682 | 0 | if (err < 0 && errno == EINVAL) { |
1683 | 0 | int savederrno = errno; |
1684 | 0 | char *bigbuf; |
1685 | 0 | static int size = -1; |
1686 | 0 | if (size == -1) |
1687 | 0 | size = CPU_BYTES(ncpus) * 2; |
1688 | 0 | bigbuf = malloc(CPU_BUFFER_SIZE); |
1689 | 0 | if (!bigbuf) { |
1690 | 0 | errno = ENOMEM; |
1691 | 0 | return -1; |
1692 | 0 | } |
1693 | 0 | errno = savederrno; |
1694 | 0 | while (size <= CPU_BUFFER_SIZE) { |
1695 | 0 | memcpy(bigbuf, cpus, CPU_BYTES(ncpus)); |
1696 | 0 | memset(bigbuf + CPU_BYTES(ncpus), 0, |
1697 | 0 | CPU_BUFFER_SIZE - CPU_BYTES(ncpus)); |
1698 | 0 | err = numa_sched_setaffinity_v1_int(0, size, (unsigned long *)bigbuf); |
1699 | 0 | if (err == 0 || errno != EINVAL) |
1700 | 0 | break; |
1701 | 0 | size *= 2; |
1702 | 0 | } |
1703 | 0 | savederrno = errno; |
1704 | 0 | free(bigbuf); |
1705 | 0 | errno = savederrno; |
1706 | 0 | } |
1707 | 0 | return err; |
1708 | 0 | } |
1709 | | |
1710 | | /* |
1711 | | * Given a node mask (size of a kernel nodemask_t) (probably populated by |
1712 | | * a user argument list) set up a map of cpus (map "cpus") on those nodes. |
1713 | | * Then set affinity to those cpus. |
1714 | | */ |
1715 | | SYMVER("numa_run_on_node_mask_v2", "numa_run_on_node_mask@@libnuma_1.2") |
1716 | | int |
1717 | | numa_run_on_node_mask_v2(struct bitmask *bmp) |
1718 | 0 | { |
1719 | 0 | int ncpus, i, k, err; |
1720 | 0 | struct bitmask *cpus, *nodecpus; |
1721 | |
|
1722 | 0 | cpus = numa_allocate_cpumask(); |
1723 | 0 | ncpus = cpus->size; |
1724 | 0 | nodecpus = numa_allocate_cpumask(); |
1725 | 0 | if (!cpus || !nodecpus) |
1726 | 0 | return -1; |
1727 | | |
1728 | 0 | for (i = 0; i < bmp->size; i++) { |
1729 | 0 | if (bmp->maskp[i / BITS_PER_LONG] == 0) |
1730 | 0 | continue; |
1731 | 0 | if (numa_bitmask_isbitset(bmp, i)) { |
1732 | | /* |
1733 | | * numa_all_nodes_ptr is cpuset aware; use only |
1734 | | * these nodes |
1735 | | */ |
1736 | 0 | if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) { |
1737 | 0 | numa_warn(W_noderunmask, |
1738 | 0 | "node %d not allowed", i); |
1739 | 0 | continue; |
1740 | 0 | } |
1741 | 0 | if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { |
1742 | 0 | numa_warn(W_noderunmask, |
1743 | 0 | "Cannot read node cpumask from sysfs"); |
1744 | 0 | continue; |
1745 | 0 | } |
1746 | 0 | for (k = 0; k < CPU_LONGS(ncpus); k++) |
1747 | 0 | cpus->maskp[k] |= nodecpus->maskp[k]; |
1748 | 0 | } |
1749 | 0 | } |
1750 | 0 | err = numa_sched_setaffinity_v2_int(0, cpus); |
1751 | |
|
1752 | 0 | numa_bitmask_free(cpus); |
1753 | 0 | numa_bitmask_free(nodecpus); |
1754 | | |
1755 | | /* used to have to consider that this could fail - it shouldn't now */ |
1756 | 0 | if (err < 0) { |
1757 | 0 | numa_error("numa_sched_setaffinity_v2_int() failed"); |
1758 | 0 | } |
1759 | |
|
1760 | 0 | return err; |
1761 | 0 | } |
1762 | | |
1763 | | make_internal_alias(numa_run_on_node_mask_v2); |
1764 | | |
1765 | | /* |
1766 | | * Given a node mask (size of a kernel nodemask_t) (probably populated by |
1767 | | * a user argument list) set up a map of cpus (map "cpus") on those nodes |
1768 | | * without any cpuset awareness. Then set affinity to those cpus. |
1769 | | */ |
1770 | | int |
1771 | | numa_run_on_node_mask_all(struct bitmask *bmp) |
1772 | 0 | { |
1773 | 0 | int ncpus, i, k, err; |
1774 | 0 | struct bitmask *cpus, *nodecpus; |
1775 | |
|
1776 | 0 | cpus = numa_allocate_cpumask(); |
1777 | 0 | ncpus = cpus->size; |
1778 | 0 | nodecpus = numa_allocate_cpumask(); |
1779 | 0 | if (!cpus || !nodecpus) |
1780 | 0 | return -1; |
1781 | | |
1782 | 0 | for (i = 0; i < bmp->size; i++) { |
1783 | 0 | if (bmp->maskp[i / BITS_PER_LONG] == 0) |
1784 | 0 | continue; |
1785 | 0 | if (numa_bitmask_isbitset(bmp, i)) { |
1786 | 0 | if (!numa_bitmask_isbitset(numa_possible_nodes_ptr, i)) { |
1787 | 0 | numa_warn(W_noderunmask, |
1788 | 0 | "node %d not allowed", i); |
1789 | 0 | continue; |
1790 | 0 | } |
1791 | 0 | if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { |
1792 | 0 | numa_warn(W_noderunmask, |
1793 | 0 | "Cannot read node cpumask from sysfs"); |
1794 | 0 | continue; |
1795 | 0 | } |
1796 | 0 | for (k = 0; k < CPU_LONGS(ncpus); k++) |
1797 | 0 | cpus->maskp[k] |= nodecpus->maskp[k]; |
1798 | 0 | } |
1799 | 0 | } |
1800 | 0 | err = numa_sched_setaffinity_v2_int(0, cpus); |
1801 | |
|
1802 | 0 | numa_bitmask_free(cpus); |
1803 | 0 | numa_bitmask_free(nodecpus); |
1804 | | |
1805 | | /* With possible nodes freedom it can happen easily now */ |
1806 | 0 | if (err < 0) { |
1807 | 0 | numa_error("numa_sched_setaffinity_v2_int() failed"); |
1808 | 0 | } |
1809 | |
|
1810 | 0 | return err; |
1811 | 0 | } |
1812 | | |
1813 | | SYMVER("numa_get_run_node_mask_v1", "numa_get_run_node_mask@libnuma_1.1") |
1814 | | nodemask_t |
1815 | | numa_get_run_node_mask_v1(void) |
1816 | 0 | { |
1817 | 0 | int ncpus = numa_num_configured_cpus(); |
1818 | 0 | int i, k; |
1819 | 0 | int max = numa_max_node_int(); |
1820 | 0 | struct bitmask *bmp, *cpus, *nodecpus; |
1821 | 0 | nodemask_t nmp; |
1822 | |
|
1823 | 0 | cpus = numa_allocate_cpumask(); |
1824 | 0 | if (!cpus) |
1825 | 0 | return numa_no_nodes; |
1826 | 0 | if (numa_sched_getaffinity_v2_int(0, cpus) < 0){ |
1827 | 0 | nmp = numa_no_nodes; |
1828 | 0 | goto free_cpus; |
1829 | 0 | } |
1830 | | |
1831 | 0 | nodecpus = numa_allocate_cpumask(); |
1832 | 0 | if (!nodecpus) { |
1833 | 0 | nmp = numa_no_nodes; |
1834 | 0 | goto free_cpus; |
1835 | 0 | } |
1836 | | |
1837 | 0 | bmp = allocate_nodemask_v1(); /* the size of a nodemask_t */ |
1838 | 0 | if (!bmp) { |
1839 | 0 | nmp = numa_no_nodes; |
1840 | 0 | goto free_cpus2; |
1841 | 0 | } |
1842 | | |
1843 | 0 | for (i = 0; i <= max; i++) { |
1844 | 0 | if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { |
1845 | | /* It's possible for the node to not exist */ |
1846 | 0 | continue; |
1847 | 0 | } |
1848 | 0 | for (k = 0; k < CPU_LONGS(ncpus); k++) { |
1849 | 0 | if (nodecpus->maskp[k] & cpus->maskp[k]) |
1850 | 0 | numa_bitmask_setbit(bmp, i); |
1851 | 0 | } |
1852 | 0 | } |
1853 | 0 | copy_bitmask_to_nodemask(bmp, &nmp); |
1854 | 0 | numa_bitmask_free(bmp); |
1855 | 0 | free_cpus2: |
1856 | 0 | numa_bitmask_free(nodecpus); |
1857 | 0 | free_cpus: |
1858 | 0 | numa_bitmask_free(cpus); |
1859 | 0 | return nmp; |
1860 | 0 | } |
1861 | | |
1862 | | SYMVER("numa_get_run_node_mask_v2", "numa_get_run_node_mask@@libnuma_1.2") |
1863 | | struct bitmask * |
1864 | | numa_get_run_node_mask_v2(void) |
1865 | 0 | { |
1866 | 0 | int i, k; |
1867 | 0 | int ncpus = numa_num_configured_cpus(); |
1868 | 0 | int max = numa_max_node_int(); |
1869 | 0 | struct bitmask *bmp, *cpus, *nodecpus; |
1870 | |
|
1871 | 0 | bmp = numa_allocate_cpumask(); |
1872 | 0 | cpus = numa_allocate_cpumask(); |
1873 | 0 | if (!bmp || !cpus) |
1874 | 0 | return NULL; |
1875 | 0 | if (numa_sched_getaffinity_v2_int(0, cpus) < 0){ |
1876 | 0 | copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp); |
1877 | 0 | goto free_cpus; |
1878 | 0 | } |
1879 | | |
1880 | 0 | nodecpus = numa_allocate_cpumask(); |
1881 | 0 | for (i = 0; i <= max; i++) { |
1882 | | /* |
1883 | | * numa_all_nodes_ptr is cpuset aware; show only |
1884 | | * these nodes |
1885 | | */ |
1886 | 0 | if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) { |
1887 | 0 | continue; |
1888 | 0 | } |
1889 | 0 | if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { |
1890 | | /* It's possible for the node to not exist */ |
1891 | 0 | continue; |
1892 | 0 | } |
1893 | 0 | for (k = 0; k < CPU_LONGS(ncpus); k++) { |
1894 | 0 | if (nodecpus->maskp[k] & cpus->maskp[k]) |
1895 | 0 | numa_bitmask_setbit(bmp, i); |
1896 | 0 | } |
1897 | 0 | } |
1898 | 0 | numa_bitmask_free(nodecpus); |
1899 | 0 | free_cpus: |
1900 | 0 | numa_bitmask_free(cpus); |
1901 | 0 | return bmp; |
1902 | 0 | } |
1903 | | |
1904 | | int |
1905 | | numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bitmask *tonodes) |
1906 | 0 | { |
1907 | 0 | int numa_num_nodes = numa_num_possible_nodes(); |
1908 | |
|
1909 | 0 | return migrate_pages(pid, numa_num_nodes + 1, fromnodes->maskp, |
1910 | 0 | tonodes->maskp); |
1911 | 0 | } |
1912 | | |
1913 | | int numa_move_pages(int pid, unsigned long count, |
1914 | | void **pages, const int *nodes, int *status, int flags) |
1915 | 0 | { |
1916 | 0 | return move_pages(pid, count, pages, nodes, status, flags); |
1917 | 0 | } |
1918 | | |
1919 | | int numa_run_on_node(int node) |
1920 | 0 | { |
1921 | 0 | int numa_num_nodes = numa_num_possible_nodes(); |
1922 | 0 | int ret = -1; |
1923 | 0 | struct bitmask *cpus; |
1924 | |
|
1925 | 0 | if (node >= numa_num_nodes){ |
1926 | 0 | errno = EINVAL; |
1927 | 0 | goto out; |
1928 | 0 | } |
1929 | | |
1930 | 0 | cpus = numa_allocate_cpumask(); |
1931 | 0 | if (!cpus) |
1932 | 0 | return -1; |
1933 | | |
1934 | 0 | if (node == -1) |
1935 | 0 | numa_bitmask_setall(cpus); |
1936 | 0 | else if (numa_node_to_cpus_v2_int(node, cpus) < 0){ |
1937 | 0 | numa_warn(W_noderunmask, "Cannot read node cpumask from sysfs"); |
1938 | 0 | goto free; |
1939 | 0 | } |
1940 | | |
1941 | 0 | ret = numa_sched_setaffinity_v2_int(0, cpus); |
1942 | 0 | free: |
1943 | 0 | numa_bitmask_free(cpus); |
1944 | 0 | out: |
1945 | 0 | return ret; |
1946 | 0 | } |
1947 | | |
1948 | | static struct bitmask *__numa_preferred(void) |
1949 | 0 | { |
1950 | 0 | int policy = 0; |
1951 | 0 | struct bitmask *bmp; |
1952 | |
|
1953 | 0 | bmp = numa_allocate_nodemask(); |
1954 | 0 | if (!bmp) |
1955 | 0 | return NULL; |
1956 | | /* could read the current CPU from /proc/self/status. Probably |
1957 | | not worth it. */ |
1958 | 0 | numa_bitmask_clearall(bmp); |
1959 | 0 | getpol(&policy, bmp); |
1960 | |
|
1961 | 0 | if (policy != MPOL_PREFERRED && |
1962 | 0 | policy != MPOL_PREFERRED_MANY && |
1963 | 0 | policy != MPOL_BIND) |
1964 | 0 | return bmp; |
1965 | | |
1966 | 0 | if (policy == MPOL_PREFERRED && numa_bitmask_weight(bmp) > 1) { |
1967 | 0 | errno = EINVAL; |
1968 | 0 | numa_error(__FILE__); |
1969 | 0 | } |
1970 | |
|
1971 | 0 | return bmp; |
1972 | 0 | } |
1973 | | |
1974 | | int numa_preferred_err(void) |
1975 | 0 | { |
1976 | 0 | int first_node = 0; |
1977 | 0 | struct bitmask *bmp; |
1978 | |
|
1979 | 0 | bmp = __numa_preferred(); |
1980 | 0 | first_node = numa_find_first(bmp); |
1981 | 0 | numa_bitmask_free(bmp); |
1982 | | |
1983 | 0 | return first_node; |
1984 | 0 | } |
1985 | | |
1986 | | int numa_preferred(void) |
1987 | 0 | { |
1988 | 0 | int first_node = 0; |
1989 | |
|
1990 | 0 | first_node = numa_preferred_err(); |
1991 | 0 | first_node = first_node >= 0 ? first_node : 0; |
1992 | |
|
1993 | 0 | return first_node; |
1994 | 0 | } |
1995 | | |
1996 | | static void __numa_set_preferred(struct bitmask *bmp) |
1997 | 0 | { |
1998 | 0 | int nodes = numa_bitmask_weight(bmp); |
1999 | 0 | if (nodes > 1) { |
2000 | 0 | errno = EINVAL; |
2001 | 0 | numa_error(__FILE__); |
2002 | 0 | } |
2003 | |
|
2004 | 0 | setpol(nodes ? MPOL_PREFERRED : MPOL_LOCAL, bmp); |
2005 | 0 | } |
2006 | | |
2007 | | void numa_set_preferred(int node) |
2008 | 0 | { |
2009 | 0 | struct bitmask *bmp = numa_allocate_nodemask(); |
2010 | 0 | if (!bmp) |
2011 | 0 | return; |
2012 | 0 | numa_bitmask_setbit(bmp, node); |
2013 | 0 | __numa_set_preferred(bmp); |
2014 | 0 | numa_bitmask_free(bmp); |
2015 | 0 | } |
2016 | | |
2017 | | int numa_has_preferred_many(void) |
2018 | 0 | { |
2019 | 0 | set_preferred_many(); |
2020 | 0 | return has_preferred_many; |
2021 | 0 | } |
2022 | | |
2023 | | void numa_set_preferred_many(struct bitmask *bitmask) |
2024 | 0 | { |
2025 | 0 | int first_node = 0; |
2026 | |
|
2027 | 0 | set_preferred_many(); |
2028 | 0 | if (!has_preferred_many) { |
2029 | 0 | numa_warn(W_nodeparse, |
2030 | 0 | "Unable to handle MANY preferred nodes. Falling back to first node\n"); |
2031 | 0 | first_node = numa_find_first(bitmask); |
2032 | 0 | numa_set_preferred(first_node); |
2033 | 0 | return; |
2034 | 0 | } |
2035 | 0 | setpol(MPOL_PREFERRED_MANY, bitmask); |
2036 | 0 | } |
2037 | | |
2038 | | struct bitmask *numa_preferred_many() |
2039 | 0 | { |
2040 | 0 | return __numa_preferred(); |
2041 | 0 | } |
2042 | | |
2043 | | void numa_set_localalloc(void) |
2044 | 0 | { |
2045 | 0 | setpol(MPOL_LOCAL, numa_no_nodes_ptr); |
2046 | 0 | } |
2047 | | |
2048 | | SYMVER("numa_bind_v1", "numa_bind@libnuma_1.1") |
2049 | | void numa_bind_v1(const nodemask_t *nodemask) |
2050 | 0 | { |
2051 | 0 | struct bitmask bitmask; |
2052 | |
|
2053 | 0 | bitmask.maskp = (unsigned long *)nodemask; |
2054 | 0 | bitmask.size = sizeof(nodemask_t); |
2055 | 0 | numa_run_on_node_mask_v2_int(&bitmask); |
2056 | 0 | numa_set_membind_v2_int(&bitmask); |
2057 | 0 | } |
2058 | | |
2059 | | SYMVER("numa_bind_v2", "numa_bind@@libnuma_1.2") |
2060 | | void numa_bind_v2(struct bitmask *bmp) |
2061 | 0 | { |
2062 | 0 | numa_run_on_node_mask_v2_int(bmp); |
2063 | 0 | numa_set_membind_v2_int(bmp); |
2064 | 0 | } |
2065 | | |
2066 | | void numa_set_strict(int flag) |
2067 | 0 | { |
2068 | 0 | if (flag) |
2069 | 0 | mbind_flags |= MPOL_MF_STRICT; |
2070 | 0 | else |
2071 | 0 | mbind_flags &= ~MPOL_MF_STRICT; |
2072 | 0 | } |
2073 | | |
2074 | | /* |
2075 | | * Extract a node or processor number from the given string. |
2076 | | * Allow a relative node / processor specification within the allowed |
2077 | | * set if "relative" is nonzero |
2078 | | */ |
2079 | | static unsigned long get_nr(const char *s, char **end, struct bitmask *bmp, int relative) |
2080 | 11.7k | { |
2081 | 11.7k | long i, nr; |
2082 | | |
2083 | 11.7k | if (!relative) |
2084 | 6.68k | return strtoul(s, end, 0); |
2085 | | |
2086 | 5.06k | nr = strtoul(s, end, 0); |
2087 | 5.06k | if (s == *end) |
2088 | 4 | return nr; |
2089 | | /* Find the nth set bit */ |
2090 | 124k | for (i = 0; nr >= 0 && i <= bmp->size; i++) |
2091 | 119k | if (numa_bitmask_isbitset(bmp, i)) |
2092 | 6.86k | nr--; |
2093 | 5.06k | return i-1; |
2094 | 5.06k | } |
2095 | | |
2096 | | /* |
2097 | | * __numa_parse_nodestring() is called to create a node mask, given |
2098 | | * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10. |
2099 | | * (the + indicates that the numbers are nodeset-relative) |
2100 | | * |
2101 | | * The nodes may be specified as absolute, or relative to the current nodeset. |
2102 | | * The list of available nodes is in a map pointed to by "allowed_nodes_ptr", |
2103 | | * which may represent all nodes or the nodes in the current nodeset. |
2104 | | * |
2105 | | * The caller must free the returned bitmask. |
2106 | | */ |
2107 | | static struct bitmask * |
2108 | | __numa_parse_nodestring(const char *s, struct bitmask *allowed_nodes_ptr) |
2109 | 742 | { |
2110 | 742 | int invert = 0, relative = 0; |
2111 | 742 | int conf_nodes = numa_num_configured_nodes(); |
2112 | 742 | char *end; |
2113 | 742 | struct bitmask *mask; |
2114 | | |
2115 | 742 | mask = numa_allocate_nodemask(); |
2116 | 742 | if (!mask) |
2117 | 0 | return NULL; |
2118 | | |
2119 | 742 | if (s[0] == 0){ |
2120 | 1 | copy_bitmask_to_bitmask(numa_no_nodes_ptr, mask); |
2121 | 1 | return mask; /* return freeable mask */ |
2122 | 1 | } |
2123 | 741 | if (*s == '!') { |
2124 | 4 | invert = 1; |
2125 | 4 | s++; |
2126 | 4 | } |
2127 | 741 | if (*s == '+') { |
2128 | 305 | relative++; |
2129 | 305 | s++; |
2130 | 305 | } |
2131 | 3.17k | do { |
2132 | 3.17k | unsigned long arg; |
2133 | 3.17k | int i; |
2134 | 3.17k | if (isalpha(*s)) { |
2135 | 143 | int n; |
2136 | 143 | if (!strcmp(s,"all")) { |
2137 | 1 | copy_bitmask_to_bitmask(allowed_nodes_ptr, |
2138 | 1 | mask); |
2139 | 1 | s+=4; |
2140 | 1 | break; |
2141 | 1 | } |
2142 | 142 | n = resolve_affinity(s, mask); |
2143 | 142 | if (n != NO_IO_AFFINITY) { |
2144 | 59 | if (n < 0) |
2145 | 59 | goto err; |
2146 | 0 | s += strlen(s) + 1; |
2147 | 0 | break; |
2148 | 59 | } |
2149 | 142 | } |
2150 | 3.11k | arg = get_nr(s, &end, allowed_nodes_ptr, relative); |
2151 | 3.11k | if (end == s) { |
2152 | 125 | numa_warn(W_nodeparse, "unparseable node description `%s'\n", s); |
2153 | 125 | goto err; |
2154 | 125 | } |
2155 | 2.98k | if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg)) { |
2156 | 238 | numa_warn(W_nodeparse, "node argument %ld is out of range\n", arg); |
2157 | 238 | goto err; |
2158 | 238 | } |
2159 | 2.74k | i = arg; |
2160 | 2.74k | numa_bitmask_setbit(mask, i); |
2161 | 2.74k | s = end; |
2162 | 2.74k | if (*s == '-') { |
2163 | 1.62k | char *end2; |
2164 | 1.62k | unsigned long arg2; |
2165 | 1.62k | arg2 = get_nr(++s, &end2, allowed_nodes_ptr, relative); |
2166 | 1.62k | if (end2 == s) { |
2167 | 9 | numa_warn(W_nodeparse, "missing node argument %s\n", s); |
2168 | 9 | goto err; |
2169 | 9 | } |
2170 | 1.61k | if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg2)) { |
2171 | 198 | numa_warn(W_nodeparse, "node argument %ld out of range\n", arg2); |
2172 | 198 | goto err; |
2173 | 198 | } |
2174 | 2.57k | while (arg <= arg2) { |
2175 | 1.16k | i = arg; |
2176 | 1.16k | if (numa_bitmask_isbitset(allowed_nodes_ptr,i)) |
2177 | 1.16k | numa_bitmask_setbit(mask, i); |
2178 | 1.16k | arg++; |
2179 | 1.16k | } |
2180 | 1.41k | s = end2; |
2181 | 1.41k | } |
2182 | 2.74k | } while (*s++ == ','); |
2183 | 112 | if (s[-1] != '\0') |
2184 | 14 | goto err; |
2185 | 98 | if (invert) { |
2186 | 1 | int i; |
2187 | 2 | for (i = 0; i < conf_nodes; i++) { |
2188 | 1 | if (numa_bitmask_isbitset(mask, i)) |
2189 | 1 | numa_bitmask_clearbit(mask, i); |
2190 | 0 | else |
2191 | 0 | numa_bitmask_setbit(mask, i); |
2192 | 1 | } |
2193 | 1 | } |
2194 | 98 | return mask; |
2195 | | |
2196 | 643 | err: |
2197 | 643 | numa_bitmask_free(mask); |
2198 | 643 | return NULL; |
2199 | 112 | } |
2200 | | |
2201 | | /* |
2202 | | * numa_parse_nodestring() is called to create a bitmask from nodes available |
2203 | | * for this task. |
2204 | | */ |
2205 | | |
2206 | | struct bitmask * numa_parse_nodestring(const char *s) |
2207 | 742 | { |
2208 | 742 | return __numa_parse_nodestring(s, numa_all_nodes_ptr); |
2209 | 742 | } |
2210 | | |
2211 | | /* |
2212 | | * numa_parse_nodestring_all() is called to create a bitmask from all nodes |
2213 | | * available. |
2214 | | */ |
2215 | | |
2216 | | struct bitmask * numa_parse_nodestring_all(const char *s) |
2217 | 0 | { |
2218 | 0 | return __numa_parse_nodestring(s, numa_possible_nodes_ptr); |
2219 | 0 | } |
2220 | | |
2221 | | /* |
2222 | | * __numa_parse_cpustring() is called to create a bitmask, given |
2223 | | * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10. |
2224 | | * (the + indicates that the numbers are cpuset-relative) |
2225 | | * |
2226 | | * The cpus may be specified as absolute, or relative to the current cpuset. |
2227 | | * The list of available cpus for this task is in the map pointed to by |
2228 | | * "allowed_cpus_ptr", which may represent all cpus or the cpus in the |
2229 | | * current cpuset. |
2230 | | * |
2231 | | * The caller must free the returned bitmask. |
2232 | | */ |
2233 | | static struct bitmask * |
2234 | | __numa_parse_cpustring(const char *s, struct bitmask *allowed_cpus_ptr) |
2235 | 742 | { |
2236 | 742 | int invert = 0, relative=0; |
2237 | 742 | int conf_cpus = numa_num_configured_cpus(); |
2238 | 742 | char *end; |
2239 | 742 | struct bitmask *mask; |
2240 | 742 | int i; |
2241 | | |
2242 | 742 | mask = numa_allocate_cpumask(); |
2243 | 742 | if (!mask) |
2244 | 0 | return NULL; |
2245 | | |
2246 | 742 | if (s[0] == 0) |
2247 | 1 | return mask; |
2248 | 741 | if (*s == '!') { |
2249 | 4 | invert = 1; |
2250 | 4 | s++; |
2251 | 4 | } |
2252 | 741 | if (*s == '+') { |
2253 | 305 | relative++; |
2254 | 305 | s++; |
2255 | 305 | } |
2256 | 4.43k | do { |
2257 | 4.43k | unsigned long arg; |
2258 | | |
2259 | 4.43k | if (!strcmp(s,"all")) { |
2260 | 1 | copy_bitmask_to_bitmask(allowed_cpus_ptr, mask); |
2261 | 1 | s+=4; |
2262 | 1 | break; |
2263 | 1 | } |
2264 | 4.43k | arg = get_nr(s, &end, allowed_cpus_ptr, relative); |
2265 | 4.43k | if (end == s) { |
2266 | 192 | numa_warn(W_cpuparse, "unparseable cpu description `%s'\n", s); |
2267 | 192 | goto err; |
2268 | 192 | } |
2269 | 4.24k | if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg)) { |
2270 | 150 | numa_warn(W_cpuparse, "cpu argument %s is out of range\n", s); |
2271 | 150 | goto err; |
2272 | 150 | } |
2273 | 4.09k | i = arg; |
2274 | 4.09k | numa_bitmask_setbit(mask, i); |
2275 | 4.09k | s = end; |
2276 | 4.09k | if (*s == '-') { |
2277 | 2.58k | char *end2; |
2278 | 2.58k | unsigned long arg2; |
2279 | 2.58k | arg2 = get_nr(++s, &end2, allowed_cpus_ptr, relative); |
2280 | 2.58k | if (end2 == s) { |
2281 | 16 | numa_warn(W_cpuparse, "missing cpu argument %s\n", s); |
2282 | 16 | goto err; |
2283 | 16 | } |
2284 | 2.57k | if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg2)) { |
2285 | 158 | numa_warn(W_cpuparse, "cpu argument %s out of range\n", s); |
2286 | 158 | goto err; |
2287 | 158 | } |
2288 | 4.90k | while (arg <= arg2) { |
2289 | 2.49k | i = arg; |
2290 | 2.49k | if (numa_bitmask_isbitset(allowed_cpus_ptr, i)) |
2291 | 2.49k | numa_bitmask_setbit(mask, i); |
2292 | 2.49k | arg++; |
2293 | 2.49k | } |
2294 | 2.41k | s = end2; |
2295 | 2.41k | } |
2296 | 4.09k | } while (*s++ == ','); |
2297 | 225 | if (s[-1] != '\0') |
2298 | 19 | goto err; |
2299 | 206 | if (invert) { |
2300 | 99 | for (i = 0; i < conf_cpus; i++) { |
2301 | 96 | if (numa_bitmask_isbitset(mask, i)) |
2302 | 4 | numa_bitmask_clearbit(mask, i); |
2303 | 92 | else |
2304 | 92 | numa_bitmask_setbit(mask, i); |
2305 | 96 | } |
2306 | 3 | } |
2307 | 206 | return mask; |
2308 | | |
2309 | 535 | err: |
2310 | 535 | numa_bitmask_free(mask); |
2311 | 535 | return NULL; |
2312 | 225 | } |
2313 | | |
2314 | | /* |
2315 | | * numa_parse_cpustring() is called to create a bitmask from cpus available |
2316 | | * for this task. |
2317 | | */ |
2318 | | |
2319 | | struct bitmask * numa_parse_cpustring(const char *s) |
2320 | 742 | { |
2321 | 742 | return __numa_parse_cpustring(s, numa_all_cpus_ptr); |
2322 | 742 | } |
2323 | | |
2324 | | /* |
2325 | | * numa_parse_cpustring_all() is called to create a bitmask from all cpus |
2326 | | * available. |
2327 | | */ |
2328 | | |
2329 | | struct bitmask * numa_parse_cpustring_all(const char *s) |
2330 | 0 | { |
2331 | 0 | return __numa_parse_cpustring(s, numa_possible_cpus_ptr); |
2332 | 0 | } |
2333 | | |
2334 | | int numa_has_home_node(void) |
2335 | 0 | { |
2336 | 0 | void *mem; |
2337 | 0 | static int has_home_node = -1; |
2338 | 0 | int page_size = numa_pagesize(); |
2339 | 0 | struct bitmask *tmp = numa_get_mems_allowed(); |
2340 | |
|
2341 | 0 | if (has_home_node >= 0) |
2342 | 0 | goto out; |
2343 | | |
2344 | 0 | has_home_node = 0; |
2345 | | /* Detect whether home_node is supported */ |
2346 | 0 | mem = mmap(0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
2347 | 0 | if (mem != MAP_FAILED) { |
2348 | 0 | dombind(mem, page_size, MPOL_BIND, tmp); |
2349 | 0 | if (set_mempolicy_home_node(mem, page_size, numa_find_first(tmp), 0) == 0) |
2350 | 0 | has_home_node = 1; |
2351 | 0 | munmap(mem, page_size); |
2352 | 0 | } |
2353 | |
|
2354 | 0 | out: |
2355 | 0 | numa_bitmask_free(tmp); |
2356 | 0 | return has_home_node; |
2357 | 0 | } |
2358 | | |
2359 | | int numa_set_mempolicy_home_node(void *start, unsigned long len, int home_node, int flags) |
2360 | 0 | { |
2361 | 0 | if (set_mempolicy_home_node(start, len, home_node, flags)) { |
2362 | 0 | numa_error("set_mempolicy_home_node"); |
2363 | 0 | return -1; |
2364 | 0 | } |
2365 | | |
2366 | 0 | return 0; |
2367 | 0 | } |