Line | Count | Source (jump to first uncovered line) |
1 | | /* Simple NUMA library. |
2 | | Copyright (C) 2003,2004,2005,2008 Andi Kleen,SuSE Labs and |
3 | | Cliff Wickman,SGI. |
4 | | |
5 | | libnuma is free software; you can redistribute it and/or |
6 | | modify it under the terms of the GNU Lesser General Public |
7 | | License as published by the Free Software Foundation; version |
8 | | 2.1. |
9 | | |
10 | | libnuma is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | | Lesser General Public License for more details. |
14 | | |
15 | | You should find a copy of v2.1 of the GNU Lesser General Public License |
16 | | somewhere on your Linux system; if not, write to the Free Software |
17 | | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 | | |
19 | | All calls are undefined when numa_available returns an error. */ |
20 | | #define _GNU_SOURCE 1 |
21 | | #include <stdlib.h> |
22 | | #include <stdio.h> |
23 | | #include <unistd.h> |
24 | | #include <string.h> |
25 | | #include <sched.h> |
26 | | #include <dirent.h> |
27 | | #include <errno.h> |
28 | | #include <stdarg.h> |
29 | | #include <ctype.h> |
30 | | #include <assert.h> |
31 | | |
32 | | #include <sys/mman.h> |
33 | | #include <limits.h> |
34 | | |
35 | | #include "config.h" |
36 | | #include "numa.h" |
37 | | #include "numaif.h" |
38 | | #include "numaint.h" |
39 | | #include "util.h" |
40 | | #include "affinity.h" |
41 | | |
42 | | #define WEAK __attribute__((weak)) |
43 | | |
44 | 0 | #define CPU_BUFFER_SIZE 4096 /* This limits you to 32768 CPUs */ |
45 | | |
46 | | /* these are the old (version 1) masks */ |
47 | | nodemask_t numa_no_nodes; |
48 | | nodemask_t numa_all_nodes; |
49 | | /* these are now the default bitmask (pointers to) (version 2) */ |
50 | | struct bitmask *numa_no_nodes_ptr = NULL; |
51 | | struct bitmask *numa_all_nodes_ptr = NULL; |
52 | | struct bitmask *numa_possible_nodes_ptr = NULL; |
53 | | struct bitmask *numa_all_cpus_ptr = NULL; |
54 | | struct bitmask *numa_possible_cpus_ptr = NULL; |
55 | | /* I would prefer to use symbol versioning to create v1 and v2 versions |
56 | | of numa_no_nodes and numa_all_nodes, but the loader does not correctly |
57 | | handle versioning of BSS versus small data items */ |
58 | | |
59 | | struct bitmask *numa_nodes_ptr = NULL; |
60 | | static struct bitmask *numa_memnode_ptr = NULL; |
61 | | static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES]; |
62 | | static char node_cpu_mask_v1_stale = 1; |
63 | | static struct bitmask **node_cpu_mask_v2; |
64 | | static char node_cpu_mask_v2_stale = 1; |
65 | | |
66 | | WEAK void numa_error(char *where); |
67 | | |
68 | | #ifndef TLS |
69 | | #warning "not threadsafe" |
70 | | #define __thread |
71 | | #endif |
72 | | |
73 | | static __thread int bind_policy = MPOL_BIND; |
74 | | static __thread unsigned int mbind_flags = 0; |
75 | | static int sizes_set=0; |
76 | | static int maxconfigurednode = -1; |
77 | | static int maxconfiguredcpu = -1; |
78 | | static int numprocnode = -1; |
79 | | static int numproccpu = -1; |
80 | | static int nodemask_sz = 0; |
81 | | static int cpumask_sz = 0; |
82 | | |
83 | | static int has_preferred_many = -1; |
84 | | |
85 | | int numa_exit_on_error = 0; |
86 | | int numa_exit_on_warn = 0; |
87 | | static void set_sizes(void); |
88 | | |
89 | | /* |
90 | | * There are two special functions, _init(void) and _fini(void), which |
91 | | * are called automatically by the dynamic loader whenever a library is loaded. |
92 | | * |
93 | | * The v1 library depends upon nodemask_t's of all nodes and no nodes. |
94 | | */ |
95 | | void __attribute__((constructor)) |
96 | | numa_init(void) |
97 | 2 | { |
98 | 2 | int max,i; |
99 | | |
100 | 2 | if (sizes_set) |
101 | 0 | return; |
102 | | |
103 | 2 | set_sizes(); |
104 | | /* numa_all_nodes should represent existing nodes on this system */ |
105 | 2 | max = numa_num_configured_nodes(); |
106 | 4 | for (i = 0; i < max; i++) |
107 | 2 | nodemask_set_compat((nodemask_t *)&numa_all_nodes, i); |
108 | 2 | memset(&numa_no_nodes, 0, sizeof(numa_no_nodes)); |
109 | | |
110 | | /* clear errno */ |
111 | 2 | errno = 0; |
112 | 2 | } |
113 | | |
114 | | static void cleanup_node_cpu_mask_v2(void); |
115 | | |
116 | 0 | #define FREE_AND_ZERO(x) if (x) { \ |
117 | 0 | numa_bitmask_free(x); \ |
118 | 0 | x = NULL; \ |
119 | 0 | } |
120 | | |
121 | | void __attribute__((destructor)) |
122 | | numa_fini(void) |
123 | 0 | { |
124 | 0 | FREE_AND_ZERO(numa_all_cpus_ptr); |
125 | 0 | FREE_AND_ZERO(numa_possible_cpus_ptr); |
126 | 0 | FREE_AND_ZERO(numa_all_nodes_ptr); |
127 | 0 | FREE_AND_ZERO(numa_possible_nodes_ptr); |
128 | 0 | FREE_AND_ZERO(numa_no_nodes_ptr); |
129 | 0 | FREE_AND_ZERO(numa_memnode_ptr); |
130 | 0 | FREE_AND_ZERO(numa_nodes_ptr); |
131 | 0 | cleanup_node_cpu_mask_v2(); |
132 | 0 | } |
133 | | |
134 | | static int numa_find_first(struct bitmask *mask) |
135 | 0 | { |
136 | 0 | int i; |
137 | 0 | for (i = 0; i < mask->size; i++) |
138 | 0 | if (numa_bitmask_isbitset(mask, i)) |
139 | 0 | return i; |
140 | 0 | return -1; |
141 | 0 | } |
142 | | |
143 | | /* |
144 | | * The following bitmask declarations, bitmask_*() routines, and associated |
145 | | * _setbit() and _getbit() routines are: |
146 | | * Copyright (c) 2004_2007 Silicon Graphics, Inc. (SGI) All rights reserved. |
147 | | * SGI publishes it under the terms of the Library GNU General Public License, |
148 | | * v2, as published by the Free Software Foundation. |
149 | | */ |
150 | | static unsigned int |
151 | | _getbit(const struct bitmask *bmp, unsigned int n) |
152 | 113k | { |
153 | 113k | if (n < bmp->size) |
154 | 113k | return (bmp->maskp[n/bitsperlong] >> (n % bitsperlong)) & 1; |
155 | 705 | else |
156 | 705 | return 0; |
157 | 113k | } |
158 | | |
159 | | static void |
160 | | _setbit(struct bitmask *bmp, unsigned int n, unsigned int v) |
161 | 21.0k | { |
162 | 21.0k | if (n < bmp->size) { |
163 | 21.0k | if (v) |
164 | 9.42k | bmp->maskp[n/bitsperlong] |= 1UL << (n % bitsperlong); |
165 | 11.5k | else |
166 | 11.5k | bmp->maskp[n/bitsperlong] &= ~(1UL << (n % bitsperlong)); |
167 | 21.0k | } |
168 | 21.0k | } |
169 | | |
170 | | int |
171 | | numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int i) |
172 | 111k | { |
173 | 111k | return _getbit(bmp, i); |
174 | 111k | } |
175 | | |
176 | | struct bitmask * |
177 | | numa_bitmask_setall(struct bitmask *bmp) |
178 | 0 | { |
179 | 0 | unsigned int i; |
180 | 0 | for (i = 0; i < bmp->size; i++) |
181 | 0 | _setbit(bmp, i, 1); |
182 | 0 | return bmp; |
183 | 0 | } |
184 | | |
185 | | struct bitmask * |
186 | | numa_bitmask_clearall(struct bitmask *bmp) |
187 | 181 | { |
188 | 181 | unsigned int i; |
189 | 11.7k | for (i = 0; i < bmp->size; i++) |
190 | 11.5k | _setbit(bmp, i, 0); |
191 | 181 | return bmp; |
192 | 181 | } |
193 | | |
194 | | struct bitmask * |
195 | | numa_bitmask_setbit(struct bitmask *bmp, unsigned int i) |
196 | 9.42k | { |
197 | 9.42k | _setbit(bmp, i, 1); |
198 | 9.42k | return bmp; |
199 | 9.42k | } |
200 | | |
201 | | struct bitmask * |
202 | | numa_bitmask_clearbit(struct bitmask *bmp, unsigned int i) |
203 | 4 | { |
204 | 4 | _setbit(bmp, i, 0); |
205 | 4 | return bmp; |
206 | 4 | } |
207 | | |
208 | | unsigned int |
209 | | numa_bitmask_nbytes(struct bitmask *bmp) |
210 | 2 | { |
211 | 2 | return longsperbits(bmp->size) * sizeof(unsigned long); |
212 | 2 | } |
213 | | |
214 | | /* where n is the number of bits in the map */ |
215 | | /* This function should not exit on failure, but right now we cannot really |
216 | | recover from this. */ |
217 | | struct bitmask * |
218 | | numa_bitmask_alloc(unsigned int n) |
219 | 1.36k | { |
220 | 1.36k | struct bitmask *bmp; |
221 | | |
222 | 1.36k | if (n < 1) { |
223 | 0 | errno = EINVAL; |
224 | 0 | numa_error("request to allocate mask for invalid number"); |
225 | 0 | return NULL; |
226 | 0 | } |
227 | 1.36k | bmp = malloc(sizeof(*bmp)); |
228 | 1.36k | if (bmp == 0) |
229 | 0 | goto oom; |
230 | 1.36k | bmp->size = n; |
231 | 1.36k | bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long)); |
232 | 1.36k | if (bmp->maskp == 0) { |
233 | 0 | free(bmp); |
234 | 0 | goto oom; |
235 | 0 | } |
236 | 1.36k | return bmp; |
237 | | |
238 | 0 | oom: |
239 | 0 | numa_error("Out of memory allocating bitmask"); |
240 | 0 | exit(1); |
241 | 1.36k | } |
242 | | |
243 | | void |
244 | | numa_bitmask_free(struct bitmask *bmp) |
245 | 1.35k | { |
246 | 1.35k | if (bmp == 0) |
247 | 0 | return; |
248 | 1.35k | free(bmp->maskp); |
249 | 1.35k | bmp->maskp = (unsigned long *)0xdeadcdef; /* double free tripwire */ |
250 | 1.35k | free(bmp); |
251 | 1.35k | return; |
252 | 1.35k | } |
253 | | |
254 | | /* True if two bitmasks are equal */ |
255 | | int |
256 | | numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask *bmp2) |
257 | 0 | { |
258 | 0 | unsigned int i; |
259 | 0 | for (i = 0; i < bmp1->size || i < bmp2->size; i++) |
260 | 0 | if (_getbit(bmp1, i) != _getbit(bmp2, i)) |
261 | 0 | return 0; |
262 | 0 | return 1; |
263 | 0 | } |
264 | | |
265 | | /* Hamming Weight: number of set bits */ |
266 | | unsigned int numa_bitmask_weight(const struct bitmask *bmp) |
267 | 4 | { |
268 | 4 | unsigned int i; |
269 | 4 | unsigned int w = 0; |
270 | 2.18k | for (i = 0; i < bmp->size; i++) |
271 | 2.17k | if (_getbit(bmp, i)) |
272 | 66 | w++; |
273 | 4 | return w; |
274 | 4 | } |
275 | | |
276 | | /* *****end of bitmask_ routines ************ */ |
277 | | |
278 | | /* Next two can be overwritten by the application for different error handling */ |
279 | | WEAK void numa_error(char *where) |
280 | 0 | { |
281 | 0 | int olde = errno; |
282 | 0 | perror(where); |
283 | 0 | if (numa_exit_on_error) |
284 | 0 | exit(1); |
285 | 0 | errno = olde; |
286 | 0 | } |
287 | | |
288 | | WEAK void numa_warn(int num, char *fmt, ...) |
289 | 1.04k | { |
290 | 1.04k | static unsigned warned; |
291 | 1.04k | va_list ap; |
292 | 1.04k | int olde = errno; |
293 | | |
294 | | /* Give each warning only once */ |
295 | 1.04k | if ((1<<num) & warned) |
296 | 1.03k | return; |
297 | 9 | warned |= (1<<num); |
298 | | |
299 | 9 | va_start(ap,fmt); |
300 | 9 | fprintf(stderr, "libnuma: Warning: "); |
301 | 9 | vfprintf(stderr, fmt, ap); |
302 | 9 | fputc('\n', stderr); |
303 | 9 | va_end(ap); |
304 | | |
305 | 9 | if (numa_exit_on_warn) |
306 | 0 | exit(1); |
307 | | |
308 | 9 | errno = olde; |
309 | 9 | } |
310 | | |
311 | | static void setpol(int policy, struct bitmask *bmp) |
312 | 0 | { |
313 | 0 | if (set_mempolicy(policy, bmp->maskp, bmp->size + 1) < 0) |
314 | 0 | numa_error("set_mempolicy"); |
315 | 0 | } |
316 | | |
317 | | static void getpol(int *oldpolicy, struct bitmask *bmp) |
318 | 0 | { |
319 | 0 | if (get_mempolicy(oldpolicy, bmp->maskp, bmp->size + 1, 0, 0) < 0) |
320 | 0 | numa_error("get_mempolicy"); |
321 | 0 | } |
322 | | |
323 | | static void dombind(void *mem, size_t size, int pol, struct bitmask *bmp) |
324 | 0 | { |
325 | 0 | if (mbind(mem, size, pol, bmp ? bmp->maskp : NULL, bmp ? bmp->size + 1 : 0, |
326 | 0 | mbind_flags) < 0) |
327 | 0 | numa_error("mbind"); |
328 | 0 | } |
329 | | |
330 | | /* (undocumented) */ |
331 | | /* gives the wrong answer for hugetlbfs mappings. */ |
332 | | int numa_pagesize(void) |
333 | 0 | { |
334 | 0 | static int pagesize; |
335 | 0 | if (pagesize > 0) |
336 | 0 | return pagesize; |
337 | 0 | pagesize = getpagesize(); |
338 | 0 | return pagesize; |
339 | 0 | } |
340 | | |
341 | | make_internal_alias(numa_pagesize); |
342 | | |
343 | | /* |
344 | | * Find nodes (numa_nodes_ptr), nodes with memory (numa_memnode_ptr) |
345 | | * and the highest numbered existing node (maxconfigurednode). |
346 | | */ |
347 | | static void |
348 | | set_configured_nodes(void) |
349 | 2 | { |
350 | 2 | DIR *d; |
351 | 2 | struct dirent *de; |
352 | | |
353 | 2 | numa_memnode_ptr = numa_allocate_nodemask(); |
354 | 2 | numa_nodes_ptr = numa_allocate_nodemask(); |
355 | 2 | if (!numa_memnode_ptr || !numa_nodes_ptr) |
356 | 0 | return; |
357 | | |
358 | 2 | d = opendir("/sys/devices/system/node"); |
359 | 2 | if (!d) { |
360 | 0 | maxconfigurednode = 0; |
361 | 2 | } else { |
362 | 24 | while ((de = readdir(d)) != NULL) { |
363 | 22 | int nd; |
364 | 22 | if (strncmp(de->d_name, "node", 4)) |
365 | 20 | continue; |
366 | 2 | nd = strtoul(de->d_name+4, NULL, 0); |
367 | 2 | numa_bitmask_setbit(numa_nodes_ptr, nd); |
368 | 2 | numa_bitmask_setbit(numa_memnode_ptr, nd); |
369 | 2 | if (maxconfigurednode < nd) |
370 | 2 | maxconfigurednode = nd; |
371 | 2 | } |
372 | 2 | closedir(d); |
373 | 2 | } |
374 | 2 | } |
375 | | |
376 | | static inline int is_digit(char s) |
377 | 574 | { |
378 | 574 | return (s >= '0' && s <= '9') |
379 | 574 | || (s >= 'a' && s <= 'f') |
380 | 574 | || (s >= 'A' && s <= 'F'); |
381 | 574 | } |
382 | | |
383 | | /* Is string 'pre' a prefix of string 's'? */ |
384 | | static int strprefix(const char *s, const char *pre) |
385 | 112 | { |
386 | 112 | return strncmp(s, pre, strlen(pre)) == 0; |
387 | 112 | } |
388 | | |
389 | | static const char *mask_size_file = "/proc/self/status"; |
390 | | static const char *nodemask_prefix = "Mems_allowed:\t"; |
391 | | /* |
392 | | * (do this the way Paul Jackson's libcpuset does it) |
393 | | * The nodemask values in /proc/self/status are in an |
394 | | * ascii format that uses 9 characters for each 32 bits of mask. |
395 | | * (this could also be used to find the cpumask size) |
396 | | */ |
397 | | static void |
398 | | set_nodemask_size(void) |
399 | 2 | { |
400 | 2 | FILE *fp; |
401 | 2 | char *buf = NULL; |
402 | 2 | char *tmp_buf = NULL; |
403 | 2 | int digit_len = 0; |
404 | 2 | size_t bufsize = 0; |
405 | | |
406 | 2 | if ((fp = fopen(mask_size_file, "r")) == NULL) |
407 | 0 | goto done; |
408 | | |
409 | 114 | while (getline(&buf, &bufsize, fp) > 0) { |
410 | 112 | if (strprefix(buf, nodemask_prefix)) { |
411 | 2 | tmp_buf = buf; |
412 | 2 | tmp_buf += strlen(nodemask_prefix); |
413 | 576 | while (*tmp_buf != '\n' && *tmp_buf != '\0') { |
414 | 574 | if (is_digit(*tmp_buf)) |
415 | 512 | digit_len++; |
416 | 574 | tmp_buf++; |
417 | 574 | } |
418 | 2 | nodemask_sz = digit_len * 4; |
419 | 2 | } |
420 | 112 | } |
421 | 2 | free(buf); |
422 | 2 | fclose(fp); |
423 | 2 | done: |
424 | 2 | if (nodemask_sz == 0) {/* fall back on error */ |
425 | 0 | int pol; |
426 | 0 | unsigned long *mask = NULL; |
427 | 0 | nodemask_sz = 16; |
428 | 0 | do { |
429 | 0 | nodemask_sz <<= 1; |
430 | 0 | mask = realloc(mask, nodemask_sz / 8 + sizeof(unsigned long)); |
431 | 0 | if (!mask) |
432 | 0 | return; |
433 | 0 | } while (get_mempolicy(&pol, mask, nodemask_sz + 1, 0, 0) < 0 && errno == EINVAL && |
434 | 0 | nodemask_sz < 4096*8); |
435 | 0 | free(mask); |
436 | 0 | } |
437 | 2 | } |
438 | | |
439 | | /* |
440 | | * Read a mask consisting of a sequence of hexadecimal longs separated by |
441 | | * commas. Order them correctly and return the number of bits set. |
442 | | */ |
443 | | static int |
444 | | read_mask(char *s, struct bitmask *bmp) |
445 | 4 | { |
446 | 4 | char *end = s; |
447 | 4 | int tmplen = (bmp->size + bitsperint - 1) / bitsperint; |
448 | 4 | unsigned int tmp[tmplen]; |
449 | 4 | unsigned int *start = tmp; |
450 | 4 | unsigned int i, n = 0, m = 0; |
451 | | |
452 | 4 | if (!s) |
453 | 0 | return 0; /* shouldn't happen */ |
454 | | |
455 | 4 | i = strtoul(s, &end, 16); |
456 | | |
457 | | /* Skip leading zeros */ |
458 | 66 | while (!i && *end++ == ',') { |
459 | 62 | i = strtoul(end, &end, 16); |
460 | 62 | } |
461 | | |
462 | 4 | if (!i) |
463 | | /* End of string. No mask */ |
464 | 0 | return -1; |
465 | | |
466 | 4 | start[n++] = i; |
467 | | /* Read sequence of ints */ |
468 | 4 | while (*end++ == ',') { |
469 | 0 | i = strtoul(end, &end, 16); |
470 | 0 | start[n++] = i; |
471 | | |
472 | | /* buffer overflow */ |
473 | 0 | if (n > tmplen) |
474 | 0 | return -1; |
475 | 0 | } |
476 | | |
477 | | /* |
478 | | * Invert sequence of ints if necessary since the first int |
479 | | * is the highest and we put it first because we read it first. |
480 | | */ |
481 | 8 | while (n) { |
482 | 4 | int w; |
483 | 4 | unsigned long x = 0; |
484 | | /* read into long values in an endian-safe way */ |
485 | 8 | for (w = 0; n && w < bitsperlong; w += bitsperint) |
486 | 4 | x |= ((unsigned long)start[n-- - 1] << w); |
487 | | |
488 | 4 | bmp->maskp[m++] = x; |
489 | 4 | } |
490 | | /* |
491 | | * Return the number of bits set |
492 | | */ |
493 | 4 | return numa_bitmask_weight(bmp); |
494 | 4 | } |
495 | | |
496 | | /* |
497 | | * Read a processes constraints in terms of nodes and cpus from |
498 | | * /proc/self/status. |
499 | | */ |
500 | | static void |
501 | | set_task_constraints(void) |
502 | 2 | { |
503 | 2 | int hicpu = maxconfiguredcpu; |
504 | 2 | int i; |
505 | 2 | char *buffer = NULL; |
506 | 2 | size_t buflen = 0; |
507 | 2 | FILE *f; |
508 | | |
509 | 2 | numa_all_cpus_ptr = numa_allocate_cpumask(); |
510 | 2 | numa_possible_cpus_ptr = numa_allocate_cpumask(); |
511 | 2 | numa_all_nodes_ptr = numa_allocate_nodemask(); |
512 | 2 | numa_possible_nodes_ptr = numa_allocate_cpumask(); |
513 | 2 | numa_no_nodes_ptr = numa_allocate_nodemask(); |
514 | | |
515 | | // partial leak shouldn't happen because its transient |
516 | 2 | if (!numa_all_cpus_ptr || !numa_possible_cpus_ptr || |
517 | 2 | !numa_all_nodes_ptr || |
518 | 2 | !numa_possible_nodes_ptr || |
519 | 2 | !numa_no_nodes_ptr) |
520 | 0 | return; |
521 | | |
522 | 2 | f = fopen(mask_size_file, "r"); |
523 | 2 | if (!f) { |
524 | | //numa_warn(W_cpumap, "Cannot parse %s", mask_size_file); |
525 | 0 | return; |
526 | 0 | } |
527 | | |
528 | 114 | while (getline(&buffer, &buflen, f) > 0) { |
529 | | /* mask starts after [last] tab */ |
530 | 112 | char *mask = strrchr(buffer,'\t') + 1; |
531 | | |
532 | 112 | if (strncmp(buffer,"Cpus_allowed:",13) == 0) |
533 | 2 | numproccpu = read_mask(mask, numa_all_cpus_ptr); |
534 | | |
535 | 112 | if (strncmp(buffer,"Mems_allowed:",13) == 0) { |
536 | 2 | numprocnode = read_mask(mask, numa_all_nodes_ptr); |
537 | 2 | } |
538 | 112 | } |
539 | 2 | fclose(f); |
540 | 2 | free(buffer); |
541 | | |
542 | 66 | for (i = 0; i <= hicpu; i++) |
543 | 64 | numa_bitmask_setbit(numa_possible_cpus_ptr, i); |
544 | 4 | for (i = 0; i <= maxconfigurednode; i++) |
545 | 2 | numa_bitmask_setbit(numa_possible_nodes_ptr, i); |
546 | | |
547 | | /* |
548 | | * Cpus_allowed in the kernel can be defined to all f's |
549 | | * i.e. it may be a superset of the actual available processors. |
550 | | * As such let's reduce numproccpu to the number of actual |
551 | | * available cpus. |
552 | | */ |
553 | 2 | if (numproccpu <= 0) { |
554 | 0 | for (i = 0; i <= hicpu; i++) |
555 | 0 | numa_bitmask_setbit(numa_all_cpus_ptr, i); |
556 | 0 | numproccpu = hicpu+1; |
557 | 0 | } |
558 | | |
559 | 2 | if (numproccpu > hicpu+1) { |
560 | 0 | numproccpu = hicpu+1; |
561 | 0 | for (i=hicpu+1; i<numa_all_cpus_ptr->size; i++) { |
562 | 0 | numa_bitmask_clearbit(numa_all_cpus_ptr, i); |
563 | 0 | } |
564 | 0 | } |
565 | | |
566 | 2 | if (numprocnode <= 0) { |
567 | 0 | for (i = 0; i <= maxconfigurednode; i++) |
568 | 0 | numa_bitmask_setbit(numa_all_nodes_ptr, i); |
569 | 0 | numprocnode = maxconfigurednode + 1; |
570 | 0 | } |
571 | | |
572 | 2 | return; |
573 | 2 | } |
574 | | |
575 | | /* |
576 | | * Find the highest cpu number possible (in other words the size |
577 | | * of a kernel cpumask_t (in bits) - 1) |
578 | | */ |
579 | | static void |
580 | | set_numa_max_cpu(void) |
581 | 2 | { |
582 | 2 | int len = 4096; |
583 | 2 | int n; |
584 | 2 | int olde = errno; |
585 | 2 | struct bitmask *buffer; |
586 | | |
587 | 2 | do { |
588 | 2 | buffer = numa_bitmask_alloc(len); |
589 | 2 | if (!buffer) |
590 | 0 | return; |
591 | 2 | n = numa_sched_getaffinity_v2_int(0, buffer); |
592 | | /* on success, returns size of kernel cpumask_t, in bytes */ |
593 | 2 | if (n < 0) { |
594 | 0 | if (errno == EINVAL) { |
595 | 0 | if (len >= 1024*1024) |
596 | 0 | break; |
597 | 0 | len *= 2; |
598 | 0 | numa_bitmask_free(buffer); |
599 | 0 | continue; |
600 | 0 | } else { |
601 | 0 | numa_warn(W_numcpus, "Unable to determine max cpu" |
602 | 0 | " (sched_getaffinity: %s); guessing...", |
603 | 0 | strerror(errno)); |
604 | 0 | n = sizeof(cpu_set_t); |
605 | 0 | break; |
606 | 0 | } |
607 | 0 | } |
608 | 2 | } while (n < 0); |
609 | 2 | numa_bitmask_free(buffer); |
610 | 2 | errno = olde; |
611 | 2 | cpumask_sz = n*8; |
612 | 2 | } |
613 | | |
614 | | /* |
615 | | * get the total (configured) number of cpus - both online and offline |
616 | | */ |
617 | | static void |
618 | | set_configured_cpus(void) |
619 | 2 | { |
620 | 2 | maxconfiguredcpu = sysconf(_SC_NPROCESSORS_CONF) - 1; |
621 | 2 | if (maxconfiguredcpu == -1) |
622 | 0 | numa_error("sysconf(NPROCESSORS_CONF) failed"); |
623 | 2 | } |
624 | | |
625 | | static void |
626 | | set_preferred_many(void) |
627 | 0 | { |
628 | 0 | int oldp; |
629 | 0 | struct bitmask *bmp, *tmp; |
630 | 0 | int old_errno; |
631 | |
|
632 | 0 | if (has_preferred_many >= 0) |
633 | 0 | return; |
634 | | |
635 | 0 | old_errno = errno; |
636 | |
|
637 | 0 | has_preferred_many = 0; |
638 | |
|
639 | 0 | bmp = numa_allocate_nodemask(); |
640 | 0 | tmp = numa_get_mems_allowed(); |
641 | 0 | if (!tmp || !bmp) |
642 | 0 | goto out; |
643 | | |
644 | 0 | if (get_mempolicy(&oldp, bmp->maskp, bmp->size + 1, 0, 0) < 0) |
645 | 0 | goto out; |
646 | | |
647 | 0 | if (set_mempolicy(MPOL_PREFERRED_MANY, tmp->maskp, tmp->size) == 0) { |
648 | 0 | has_preferred_many = 1; |
649 | | /* reset the old memory policy ignoring error */ |
650 | 0 | (void)set_mempolicy(oldp, bmp->maskp, bmp->size+1); |
651 | 0 | } |
652 | |
|
653 | 0 | out: |
654 | 0 | numa_bitmask_free(tmp); |
655 | 0 | numa_bitmask_free(bmp); |
656 | 0 | errno = old_errno; |
657 | 0 | } |
658 | | |
659 | | /* |
660 | | * Initialize all the sizes. |
661 | | */ |
662 | | static void |
663 | | set_sizes(void) |
664 | 2 | { |
665 | 2 | sizes_set++; |
666 | 2 | set_nodemask_size(); /* size of kernel nodemask_t */ |
667 | 2 | set_configured_nodes(); /* configured nodes listed in /sys */ |
668 | 2 | set_numa_max_cpu(); /* size of kernel cpumask_t */ |
669 | 2 | set_configured_cpus(); /* cpus listed in /sys/devices/system/cpu */ |
670 | 2 | set_task_constraints(); /* cpus and nodes for current task */ |
671 | 2 | } |
672 | | |
673 | | int |
674 | | numa_num_configured_nodes(void) |
675 | 677 | { |
676 | | /* |
677 | | * NOTE: this function's behavior matches the documentation (ie: it |
678 | | * returns a count of nodes with memory) despite the poor function |
679 | | * naming. We also cannot use the similarly poorly named |
680 | | * numa_all_nodes_ptr as it only tracks nodes with memory from which |
681 | | * the calling process can allocate. Think sparse nodes, memory-less |
682 | | * nodes, cpusets... |
683 | | */ |
684 | 677 | int memnodecount=0, i; |
685 | | |
686 | 1.35k | for (i=0; i <= maxconfigurednode; i++) { |
687 | 677 | if (numa_bitmask_isbitset(numa_memnode_ptr, i)) |
688 | 677 | memnodecount++; |
689 | 677 | } |
690 | 677 | return memnodecount; |
691 | 677 | } |
692 | | |
693 | | int |
694 | | numa_num_configured_cpus(void) |
695 | 675 | { |
696 | | |
697 | 675 | return maxconfiguredcpu+1; |
698 | 675 | } |
699 | | |
700 | | int |
701 | | numa_num_possible_nodes(void) |
702 | 684 | { |
703 | 684 | return nodemask_sz; |
704 | 684 | } |
705 | | |
706 | | int |
707 | | numa_num_possible_cpus(void) |
708 | 682 | { |
709 | 682 | return cpumask_sz; |
710 | 682 | } |
711 | | |
712 | | int |
713 | | numa_num_task_nodes(void) |
714 | 0 | { |
715 | 0 | return numprocnode; |
716 | 0 | } |
717 | | |
718 | | /* |
719 | | * for backward compatibility |
720 | | */ |
721 | | int |
722 | | numa_num_thread_nodes(void) |
723 | 0 | { |
724 | 0 | return numa_num_task_nodes(); |
725 | 0 | } |
726 | | |
727 | | int |
728 | | numa_num_task_cpus(void) |
729 | 0 | { |
730 | 0 | return numproccpu; |
731 | 0 | } |
732 | | |
733 | | /* |
734 | | * for backward compatibility |
735 | | */ |
736 | | int |
737 | | numa_num_thread_cpus(void) |
738 | 0 | { |
739 | 0 | return numa_num_task_cpus(); |
740 | 0 | } |
741 | | |
742 | | /* |
743 | | * Return the number of the highest node in this running system, |
744 | | */ |
745 | | int |
746 | | numa_max_node(void) |
747 | 181 | { |
748 | 181 | return maxconfigurednode; |
749 | 181 | } |
750 | | |
751 | | make_internal_alias(numa_max_node); |
752 | | |
753 | | /* |
754 | | * Return the number of the highest possible node in a system, |
755 | | * which for v1 is the size of a numa.h nodemask_t(in bits)-1. |
756 | | * but for v2 is the size of a kernel nodemask_t(in bits)-1. |
757 | | */ |
758 | | SYMVER("numa_max_possible_node_v1", "numa_max_possible_node@libnuma_1.1") |
759 | | int |
760 | | numa_max_possible_node_v1(void) |
761 | 0 | { |
762 | 0 | return ((sizeof(nodemask_t)*8)-1); |
763 | 0 | } |
764 | | |
765 | | SYMVER("numa_max_possible_node_v2", "numa_max_possible_node@@libnuma_1.2") |
766 | | int |
767 | | numa_max_possible_node_v2(void) |
768 | 684 | { |
769 | 684 | return numa_num_possible_nodes()-1; |
770 | 684 | } |
771 | | |
772 | | make_internal_alias(numa_max_possible_node_v1); |
773 | | make_internal_alias(numa_max_possible_node_v2); |
774 | | |
775 | | /* |
776 | | * Allocate a bitmask for cpus, of a size large enough to |
777 | | * match the kernel's cpumask_t. |
778 | | */ |
779 | | struct bitmask * |
780 | | numa_allocate_cpumask() |
781 | 682 | { |
782 | 682 | int ncpus = numa_num_possible_cpus(); |
783 | | |
784 | 682 | return numa_bitmask_alloc(ncpus); |
785 | 682 | } |
786 | | |
787 | | /* |
788 | | * Allocate a bitmask the size of a libnuma nodemask_t |
789 | | */ |
790 | | static struct bitmask * |
791 | | allocate_nodemask_v1(void) |
792 | 0 | { |
793 | 0 | int nnodes = numa_max_possible_node_v1_int()+1; |
794 | |
|
795 | 0 | return numa_bitmask_alloc(nnodes); |
796 | 0 | } |
797 | | |
798 | | /* |
799 | | * Allocate a bitmask for nodes, of a size large enough to |
800 | | * match the kernel's nodemask_t. |
801 | | */ |
802 | | struct bitmask * |
803 | | numa_allocate_nodemask(void) |
804 | 683 | { |
805 | 683 | struct bitmask *bmp; |
806 | 683 | int nnodes = numa_max_possible_node_v2_int() + 1; |
807 | | |
808 | 683 | bmp = numa_bitmask_alloc(nnodes); |
809 | 683 | return bmp; |
810 | 683 | } |
811 | | |
812 | | /* (cache the result?) */ |
813 | | long long numa_node_size64(int node, long long *freep) |
814 | 0 | { |
815 | 0 | size_t len = 0; |
816 | 0 | char *line = NULL; |
817 | 0 | long long size = -1; |
818 | 0 | FILE *f; |
819 | 0 | char fn[64]; |
820 | 0 | int ok = 0; |
821 | 0 | int required = freep ? 2 : 1; |
822 | |
|
823 | 0 | if (freep) |
824 | 0 | *freep = 0; |
825 | 0 | sprintf(fn,"/sys/devices/system/node/node%d/meminfo", node); |
826 | 0 | f = fopen(fn, "r"); |
827 | 0 | if (!f) |
828 | 0 | return -1; |
829 | 0 | while (getdelim(&line, &len, '\n', f) > 0) { |
830 | 0 | char *end; |
831 | 0 | char *s = strcasestr(line, "kB"); |
832 | 0 | if (!s) |
833 | 0 | continue; |
834 | 0 | --s; |
835 | 0 | while (s > line && isspace(*s)) |
836 | 0 | --s; |
837 | 0 | while (s > line && isdigit(*s)) |
838 | 0 | --s; |
839 | 0 | if (strstr(line, "MemTotal")) { |
840 | 0 | size = strtoull(s,&end,0) << 10; |
841 | 0 | if (end == s) |
842 | 0 | size = -1; |
843 | 0 | else |
844 | 0 | ok++; |
845 | 0 | } |
846 | 0 | if (freep && strstr(line, "MemFree")) { |
847 | 0 | *freep = strtoull(s,&end,0) << 10; |
848 | 0 | if (end == s) |
849 | 0 | *freep = -1; |
850 | 0 | else |
851 | 0 | ok++; |
852 | 0 | } |
853 | 0 | } |
854 | 0 | fclose(f); |
855 | 0 | free(line); |
856 | 0 | if (ok != required) |
857 | 0 | numa_warn(W_badmeminfo, "Cannot parse sysfs meminfo (%d)", ok); |
858 | 0 | return size; |
859 | 0 | } |
860 | | |
861 | | make_internal_alias(numa_node_size64); |
862 | | |
863 | | long numa_node_size(int node, long *freep) |
864 | 0 | { |
865 | 0 | long long f2 = 0; |
866 | 0 | long sz = numa_node_size64_int(node, &f2); |
867 | 0 | if (freep) |
868 | 0 | *freep = f2; |
869 | 0 | return sz; |
870 | 0 | } |
871 | | |
872 | | int numa_available(void) |
873 | 0 | { |
874 | 0 | if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && (errno == ENOSYS || errno == EPERM)) |
875 | 0 | return -1; |
876 | 0 | return 0; |
877 | 0 | } |
878 | | |
879 | | SYMVER("numa_interleave_memory_v1", "numa_interleave_memory@libnuma_1.1") |
880 | | void |
881 | | numa_interleave_memory_v1(void *mem, size_t size, const nodemask_t *mask) |
882 | 0 | { |
883 | 0 | struct bitmask bitmask; |
884 | |
|
885 | 0 | bitmask.size = sizeof(nodemask_t) * 8; |
886 | 0 | bitmask.maskp = (unsigned long *)mask; |
887 | 0 | dombind(mem, size, MPOL_INTERLEAVE, &bitmask); |
888 | 0 | } |
889 | | |
890 | | SYMVER("numa_interleave_memory_v2", "numa_interleave_memory@@libnuma_1.2") |
891 | | void |
892 | | numa_interleave_memory_v2(void *mem, size_t size, struct bitmask *bmp) |
893 | 0 | { |
894 | 0 | dombind(mem, size, MPOL_INTERLEAVE, bmp); |
895 | 0 | } |
896 | | |
897 | | void |
898 | | numa_weighted_interleave_memory(void *mem, size_t size, struct bitmask *bmp) |
899 | 0 | { |
900 | 0 | dombind(mem, size, MPOL_WEIGHTED_INTERLEAVE, bmp); |
901 | 0 | } |
902 | | |
903 | | void numa_tonode_memory(void *mem, size_t size, int node) |
904 | 0 | { |
905 | 0 | struct bitmask *nodes; |
906 | |
|
907 | 0 | nodes = numa_allocate_nodemask(); |
908 | 0 | if (!nodes) |
909 | 0 | return; |
910 | 0 | numa_bitmask_setbit(nodes, node); |
911 | 0 | dombind(mem, size, bind_policy, nodes); |
912 | 0 | numa_bitmask_free(nodes); |
913 | 0 | } |
914 | | |
915 | | SYMVER("numa_tonodemask_memory_v1", "numa_tonodemask_memory@libnuma_1.1") |
916 | | void |
917 | | numa_tonodemask_memory_v1(void *mem, size_t size, const nodemask_t *mask) |
918 | 0 | { |
919 | 0 | struct bitmask bitmask; |
920 | |
|
921 | 0 | bitmask.maskp = (unsigned long *)mask; |
922 | 0 | bitmask.size = sizeof(nodemask_t); |
923 | 0 | dombind(mem, size, bind_policy, &bitmask); |
924 | 0 | } |
925 | | |
926 | | SYMVER("numa_tonodemask_memory_v2", "numa_tonodemask_memory@@libnuma_1.2") |
927 | | void |
928 | | numa_tonodemask_memory_v2(void *mem, size_t size, struct bitmask *bmp) |
929 | 0 | { |
930 | 0 | dombind(mem, size, bind_policy, bmp); |
931 | 0 | } |
932 | | |
933 | | void numa_setlocal_memory(void *mem, size_t size) |
934 | 0 | { |
935 | 0 | dombind(mem, size, MPOL_LOCAL, NULL); |
936 | 0 | } |
937 | | |
938 | | void numa_police_memory(void *mem, size_t size) |
939 | 0 | { |
940 | 0 | int pagesize = numa_pagesize_int(); |
941 | 0 | unsigned long i; |
942 | 0 | char *p = mem; |
943 | 0 | for (i = 0; i < size; i += pagesize, p += pagesize) |
944 | 0 | __atomic_and_fetch(p, 0xff, __ATOMIC_RELAXED); |
945 | |
|
946 | 0 | } |
947 | | |
948 | | make_internal_alias(numa_police_memory); |
949 | | |
950 | | void *numa_alloc(size_t size) |
951 | 0 | { |
952 | 0 | char *mem; |
953 | 0 | mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
954 | 0 | 0, 0); |
955 | 0 | if (mem == (char *)-1) |
956 | 0 | return NULL; |
957 | 0 | numa_police_memory_int(mem, size); |
958 | 0 | return mem; |
959 | 0 | } |
960 | | |
961 | | void *numa_realloc(void *old_addr, size_t old_size, size_t new_size) |
962 | 0 | { |
963 | 0 | char *mem; |
964 | 0 | mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE); |
965 | 0 | if (mem == (char *)-1) |
966 | 0 | return NULL; |
967 | | /* |
968 | | * The memory policy of the allocated pages is preserved by mremap(), so |
969 | | * there is no need to (re)set it here. If the policy of the original |
970 | | * allocation is not set, the new pages will be allocated according to the |
971 | | * process' mempolicy. Trying to allocate explicitly the new pages on the |
972 | | * same node as the original ones would require changing the policy of the |
973 | | * newly allocated pages, which violates the numa_realloc() semantics. |
974 | | */ |
975 | 0 | return mem; |
976 | 0 | } |
977 | | |
978 | | SYMVER("numa_alloc_interleaved_subset_v1", "numa_alloc_interleaved_subset@libnuma_1.1") |
979 | | void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask) |
980 | 0 | { |
981 | 0 | char *mem; |
982 | 0 | struct bitmask bitmask; |
983 | |
|
984 | 0 | mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
985 | 0 | 0, 0); |
986 | 0 | if (mem == (char *)-1) |
987 | 0 | return NULL; |
988 | 0 | bitmask.maskp = (unsigned long *)mask; |
989 | 0 | bitmask.size = sizeof(nodemask_t); |
990 | 0 | dombind(mem, size, MPOL_INTERLEAVE, &bitmask); |
991 | 0 | return mem; |
992 | 0 | } |
993 | | |
994 | | SYMVER("numa_alloc_interleaved_subset_v2", "numa_alloc_interleaved_subset@@libnuma_1.2") |
995 | | void *numa_alloc_interleaved_subset_v2(size_t size, struct bitmask *bmp) |
996 | 0 | { |
997 | 0 | char *mem; |
998 | |
|
999 | 0 | mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
1000 | 0 | 0, 0); |
1001 | 0 | if (mem == (char *)-1) |
1002 | 0 | return NULL; |
1003 | 0 | dombind(mem, size, MPOL_INTERLEAVE, bmp); |
1004 | 0 | return mem; |
1005 | 0 | } |
1006 | | |
1007 | | make_internal_alias(numa_alloc_interleaved_subset_v1); |
1008 | | make_internal_alias(numa_alloc_interleaved_subset_v2); |
1009 | | |
1010 | | void * |
1011 | | numa_alloc_interleaved(size_t size) |
1012 | 0 | { |
1013 | 0 | return numa_alloc_interleaved_subset_v2_int(size, numa_all_nodes_ptr); |
1014 | 0 | } |
1015 | | |
1016 | | void * |
1017 | | numa_alloc_weighted_interleaved_subset(size_t size, struct bitmask *bmp) |
1018 | 0 | { |
1019 | 0 | char *mem; |
1020 | |
|
1021 | 0 | mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
1022 | 0 | 0, 0); |
1023 | 0 | if (mem == (char *)-1) |
1024 | 0 | return NULL; |
1025 | 0 | dombind(mem, size, MPOL_WEIGHTED_INTERLEAVE, bmp); |
1026 | 0 | return mem; |
1027 | 0 | } |
1028 | | |
1029 | | void * |
1030 | | numa_alloc_weighted_interleaved(size_t size) |
1031 | 0 | { |
1032 | 0 | return numa_alloc_weighted_interleaved_subset(size, numa_all_nodes_ptr); |
1033 | 0 | } |
1034 | | |
1035 | | /* |
1036 | | * given a user node mask, set memory policy to use those nodes |
1037 | | */ |
1038 | | SYMVER("numa_set_interleave_mask_v1", "numa_set_interleave_mask@libnuma_1.1") |
1039 | | void |
1040 | | numa_set_interleave_mask_v1(nodemask_t *mask) |
1041 | 0 | { |
1042 | 0 | struct bitmask *bmp; |
1043 | 0 | int nnodes = numa_max_possible_node_v1_int()+1; |
1044 | |
|
1045 | 0 | bmp = numa_bitmask_alloc(nnodes); |
1046 | 0 | copy_nodemask_to_bitmask(mask, bmp); |
1047 | 0 | if (numa_bitmask_equal(bmp, numa_no_nodes_ptr)) |
1048 | 0 | setpol(MPOL_DEFAULT, bmp); |
1049 | 0 | else |
1050 | 0 | setpol(MPOL_INTERLEAVE, bmp); |
1051 | 0 | numa_bitmask_free(bmp); |
1052 | 0 | } |
1053 | | |
1054 | | |
1055 | | SYMVER("numa_set_interleave_mask_v2", "numa_set_interleave_mask@@libnuma_1.2") |
1056 | | void |
1057 | | numa_set_interleave_mask_v2(struct bitmask *bmp) |
1058 | 0 | { |
1059 | 0 | if (numa_bitmask_equal(bmp, numa_no_nodes_ptr)) |
1060 | 0 | setpol(MPOL_DEFAULT, bmp); |
1061 | 0 | else |
1062 | 0 | setpol(MPOL_INTERLEAVE, bmp); |
1063 | 0 | } |
1064 | | |
1065 | | void |
1066 | | numa_set_weighted_interleave_mask(struct bitmask *bmp) |
1067 | 0 | { |
1068 | 0 | if (numa_bitmask_equal(bmp, numa_no_nodes_ptr)) |
1069 | 0 | setpol(MPOL_DEFAULT, bmp); |
1070 | 0 | else |
1071 | 0 | setpol(MPOL_WEIGHTED_INTERLEAVE, bmp); |
1072 | 0 | } |
1073 | | |
1074 | | SYMVER("numa_get_interleave_mask_v1", "numa_get_interleave_mask@libnuma_1.1") |
1075 | | nodemask_t |
1076 | | numa_get_interleave_mask_v1(void) |
1077 | 0 | { |
1078 | 0 | int oldpolicy = 0; |
1079 | 0 | struct bitmask *bmp; |
1080 | 0 | nodemask_t mask; |
1081 | |
|
1082 | 0 | bmp = allocate_nodemask_v1(); |
1083 | 0 | if (!bmp) |
1084 | 0 | return numa_no_nodes; |
1085 | 0 | getpol(&oldpolicy, bmp); |
1086 | 0 | if (oldpolicy == MPOL_INTERLEAVE) |
1087 | 0 | copy_bitmask_to_nodemask(bmp, &mask); |
1088 | 0 | else |
1089 | 0 | copy_bitmask_to_nodemask(numa_no_nodes_ptr, &mask); |
1090 | 0 | numa_bitmask_free(bmp); |
1091 | 0 | return mask; |
1092 | 0 | } |
1093 | | |
1094 | | SYMVER("numa_get_interleave_mask_v2", "numa_get_interleave_mask@@libnuma_1.2") |
1095 | | struct bitmask * |
1096 | | numa_get_interleave_mask_v2(void) |
1097 | 0 | { |
1098 | 0 | int oldpolicy = 0; |
1099 | 0 | struct bitmask *bmp; |
1100 | |
|
1101 | 0 | bmp = numa_allocate_nodemask(); |
1102 | 0 | if (!bmp) |
1103 | 0 | return NULL; |
1104 | 0 | getpol(&oldpolicy, bmp); |
1105 | 0 | if (oldpolicy != MPOL_INTERLEAVE) |
1106 | 0 | copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp); |
1107 | 0 | return bmp; |
1108 | 0 | } |
1109 | | |
1110 | | struct bitmask * |
1111 | | numa_get_weighted_interleave_mask(void) |
1112 | 0 | { |
1113 | 0 | int oldpolicy = 0; |
1114 | 0 | struct bitmask *bmp; |
1115 | |
|
1116 | 0 | bmp = numa_allocate_nodemask(); |
1117 | 0 | if (!bmp) |
1118 | 0 | return NULL; |
1119 | 0 | getpol(&oldpolicy, bmp); |
1120 | 0 | if (oldpolicy != MPOL_WEIGHTED_INTERLEAVE) |
1121 | 0 | copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp); |
1122 | 0 | return bmp; |
1123 | 0 | } |
1124 | | |
1125 | | /* (undocumented) */ |
1126 | | int numa_get_interleave_node(void) |
1127 | 0 | { |
1128 | 0 | int nd; |
1129 | 0 | if (get_mempolicy(&nd, NULL, 0, 0, MPOL_F_NODE) == 0) |
1130 | 0 | return nd; |
1131 | 0 | return 0; |
1132 | 0 | } |
1133 | | |
1134 | | void *numa_alloc_onnode(size_t size, int node) |
1135 | 0 | { |
1136 | 0 | char *mem; |
1137 | 0 | struct bitmask *bmp; |
1138 | |
|
1139 | 0 | bmp = numa_allocate_nodemask(); |
1140 | 0 | if (!bmp) |
1141 | 0 | return NULL; |
1142 | 0 | numa_bitmask_setbit(bmp, node); |
1143 | 0 | mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
1144 | 0 | 0, 0); |
1145 | 0 | if (mem == (char *)-1) |
1146 | 0 | mem = NULL; |
1147 | 0 | else |
1148 | 0 | dombind(mem, size, bind_policy, bmp); |
1149 | 0 | numa_bitmask_free(bmp); |
1150 | 0 | return mem; |
1151 | 0 | } |
1152 | | |
1153 | | void *numa_alloc_local(size_t size) |
1154 | 0 | { |
1155 | 0 | char *mem; |
1156 | 0 | mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, |
1157 | 0 | 0, 0); |
1158 | 0 | if (mem == (char *)-1) |
1159 | 0 | mem = NULL; |
1160 | 0 | else |
1161 | 0 | dombind(mem, size, MPOL_LOCAL, NULL); |
1162 | 0 | return mem; |
1163 | 0 | } |
1164 | | |
1165 | | void numa_set_bind_policy(int strict) |
1166 | 0 | { |
1167 | 0 | set_preferred_many(); |
1168 | 0 | if (strict) |
1169 | 0 | bind_policy = MPOL_BIND; |
1170 | 0 | else if (has_preferred_many) |
1171 | 0 | bind_policy = MPOL_PREFERRED_MANY; |
1172 | 0 | else |
1173 | 0 | bind_policy = MPOL_PREFERRED; |
1174 | 0 | } |
1175 | | |
1176 | | SYMVER("numa_set_membind_v1", "numa_set_membind@libnuma_1.1") |
1177 | | void |
1178 | | numa_set_membind_v1(const nodemask_t *mask) |
1179 | 0 | { |
1180 | 0 | struct bitmask bitmask; |
1181 | |
|
1182 | 0 | bitmask.maskp = (unsigned long *)mask; |
1183 | 0 | bitmask.size = sizeof(nodemask_t); |
1184 | 0 | setpol(MPOL_BIND, &bitmask); |
1185 | 0 | } |
1186 | | |
1187 | | SYMVER("numa_set_membind_v2", "numa_set_membind@@libnuma_1.2") |
1188 | | void |
1189 | | numa_set_membind_v2(struct bitmask *bmp) |
1190 | 0 | { |
1191 | 0 | setpol(MPOL_BIND, bmp); |
1192 | 0 | } |
1193 | | |
1194 | | make_internal_alias(numa_set_membind_v2); |
1195 | | |
1196 | | void |
1197 | | numa_set_membind_balancing(struct bitmask *bmp) |
1198 | 0 | { |
1199 | | /* MPOL_F_NUMA_BALANCING: ignore if unsupported */ |
1200 | 0 | if (set_mempolicy(MPOL_BIND | MPOL_F_NUMA_BALANCING, |
1201 | 0 | bmp->maskp, bmp->size + 1) < 0) { |
1202 | 0 | if (errno == EINVAL) { |
1203 | 0 | errno = 0; |
1204 | 0 | numa_set_membind_v2(bmp); |
1205 | 0 | } else |
1206 | 0 | numa_error("set_mempolicy"); |
1207 | 0 | } |
1208 | 0 | } |
1209 | | |
1210 | | /* |
1211 | | * copy a bitmask map body to a numa.h nodemask_t structure |
1212 | | */ |
1213 | | void |
1214 | | copy_bitmask_to_nodemask(struct bitmask *bmp, nodemask_t *nmp) |
1215 | 0 | { |
1216 | 0 | int max, i; |
1217 | |
|
1218 | 0 | memset(nmp, 0, sizeof(nodemask_t)); |
1219 | 0 | max = (sizeof(nodemask_t)*8); |
1220 | 0 | for (i=0; i<bmp->size; i++) { |
1221 | 0 | if (i >= max) |
1222 | 0 | break; |
1223 | 0 | if (numa_bitmask_isbitset(bmp, i)) |
1224 | 0 | nodemask_set_compat((nodemask_t *)nmp, i); |
1225 | 0 | } |
1226 | 0 | } |
1227 | | |
1228 | | /* |
1229 | | * copy a bitmask map body to another bitmask body |
1230 | | * fill a larger destination with zeroes |
1231 | | */ |
1232 | | void |
1233 | | copy_bitmask_to_bitmask(struct bitmask *bmpfrom, struct bitmask *bmpto) |
1234 | 184 | { |
1235 | 184 | int bytes; |
1236 | | |
1237 | 184 | if (bmpfrom->size >= bmpto->size) { |
1238 | 184 | memcpy(bmpto->maskp, bmpfrom->maskp, CPU_BYTES(bmpto->size)); |
1239 | 184 | } else if (bmpfrom->size < bmpto->size) { |
1240 | 0 | bytes = CPU_BYTES(bmpfrom->size); |
1241 | 0 | memcpy(bmpto->maskp, bmpfrom->maskp, bytes); |
1242 | 0 | memset(((char *)bmpto->maskp)+bytes, 0, |
1243 | 0 | CPU_BYTES(bmpto->size)-bytes); |
1244 | 0 | } |
1245 | 184 | } |
1246 | | |
1247 | | /* |
1248 | | * copy a numa.h nodemask_t structure to a bitmask map body |
1249 | | */ |
1250 | | void |
1251 | | copy_nodemask_to_bitmask(nodemask_t *nmp, struct bitmask *bmp) |
1252 | 0 | { |
1253 | 0 | int max, i; |
1254 | |
|
1255 | 0 | numa_bitmask_clearall(bmp); |
1256 | 0 | max = (sizeof(nodemask_t)*8); |
1257 | 0 | if (max > bmp->size) |
1258 | 0 | max = bmp->size; |
1259 | 0 | for (i=0; i<max; i++) { |
1260 | 0 | if (nodemask_isset_compat(nmp, i)) |
1261 | 0 | numa_bitmask_setbit(bmp, i); |
1262 | 0 | } |
1263 | 0 | } |
1264 | | |
1265 | | SYMVER("numa_get_membind_v1", "numa_get_membind@libnuma_1.1") |
1266 | | nodemask_t |
1267 | | numa_get_membind_v1(void) |
1268 | 0 | { |
1269 | 0 | int oldpolicy = 0; |
1270 | 0 | struct bitmask *bmp; |
1271 | 0 | nodemask_t nmp; |
1272 | |
|
1273 | 0 | bmp = allocate_nodemask_v1(); |
1274 | 0 | if (!bmp) |
1275 | 0 | return numa_no_nodes; |
1276 | 0 | getpol(&oldpolicy, bmp); |
1277 | 0 | if (oldpolicy == MPOL_BIND) { |
1278 | 0 | copy_bitmask_to_nodemask(bmp, &nmp); |
1279 | 0 | } else { |
1280 | | /* copy the body of the map to numa_all_nodes */ |
1281 | 0 | copy_bitmask_to_nodemask(bmp, &numa_all_nodes); |
1282 | 0 | nmp = numa_all_nodes; |
1283 | 0 | } |
1284 | 0 | numa_bitmask_free(bmp); |
1285 | 0 | return nmp; |
1286 | 0 | } |
1287 | | |
1288 | | SYMVER("numa_get_membind_v2", "numa_get_membind@@libnuma_1.2") |
1289 | | struct bitmask * |
1290 | | numa_get_membind_v2(void) |
1291 | 0 | { |
1292 | 0 | int oldpolicy = 0; |
1293 | 0 | struct bitmask *bmp = NULL; |
1294 | |
|
1295 | 0 | bmp = numa_allocate_nodemask(); |
1296 | 0 | if (!bmp) |
1297 | 0 | return NULL; |
1298 | 0 | getpol(&oldpolicy, bmp); |
1299 | 0 | if (oldpolicy != MPOL_BIND) |
1300 | 0 | copy_bitmask_to_bitmask(numa_all_nodes_ptr, bmp); |
1301 | 0 | return bmp; |
1302 | 0 | } |
1303 | | |
1304 | | //TODO: do we need a v1 nodemask_t version? |
1305 | | struct bitmask *numa_get_mems_allowed(void) |
1306 | 0 | { |
1307 | 0 | struct bitmask *bmp; |
1308 | | |
1309 | | /* |
1310 | | * can change, so query on each call. |
1311 | | */ |
1312 | 0 | bmp = numa_allocate_nodemask(); |
1313 | 0 | if (!bmp) |
1314 | 0 | return NULL; |
1315 | 0 | if (get_mempolicy(NULL, bmp->maskp, bmp->size + 1, 0, |
1316 | 0 | MPOL_F_MEMS_ALLOWED) < 0) |
1317 | 0 | numa_error("get_mempolicy"); |
1318 | 0 | return bmp; |
1319 | 0 | } |
1320 | | make_internal_alias(numa_get_mems_allowed); |
1321 | | |
1322 | | void numa_free(void *mem, size_t size) |
1323 | 0 | { |
1324 | 0 | munmap(mem, size); |
1325 | 0 | } |
1326 | | |
1327 | | SYMVER("numa_parse_bitmap_v1", "numa_parse_bitmap@libnuma_1.1") |
1328 | | int |
1329 | | numa_parse_bitmap_v1(char *line, unsigned long *mask, int ncpus) |
1330 | 0 | { |
1331 | 0 | int i; |
1332 | 0 | char *p = strchr(line, '\n'); |
1333 | 0 | if (!p) |
1334 | 0 | return -1; |
1335 | | |
1336 | 0 | for (i = 0; p > line;i++) { |
1337 | 0 | char *oldp, *endp; |
1338 | 0 | oldp = p; |
1339 | 0 | if (*p == ',') |
1340 | 0 | --p; |
1341 | 0 | while (p > line && *p != ',') |
1342 | 0 | --p; |
1343 | | /* Eat two 32bit fields at a time to get longs */ |
1344 | 0 | if (p > line && sizeof(unsigned long) == 8) { |
1345 | 0 | oldp--; |
1346 | 0 | memmove(p, p+1, oldp-p+1); |
1347 | 0 | while (p > line && *p != ',') |
1348 | 0 | --p; |
1349 | 0 | } |
1350 | 0 | if (*p == ',') |
1351 | 0 | p++; |
1352 | 0 | if (i >= CPU_LONGS(ncpus)) |
1353 | 0 | return -1; |
1354 | 0 | mask[i] = strtoul(p, &endp, 16); |
1355 | 0 | if (endp != oldp) |
1356 | 0 | return -1; |
1357 | 0 | p--; |
1358 | 0 | } |
1359 | 0 | return 0; |
1360 | 0 | } |
1361 | | |
1362 | | SYMVER("numa_parse_bitmap_v2", "numa_parse_bitmap@@libnuma_1.2") |
1363 | | int |
1364 | | numa_parse_bitmap_v2(char *line, struct bitmask *mask) |
1365 | 1 | { |
1366 | 1 | int i, ncpus; |
1367 | 1 | char *p = strchr(line, '\n'); |
1368 | 1 | if (!p) |
1369 | 0 | return -1; |
1370 | 1 | ncpus = mask->size; |
1371 | | |
1372 | 2 | for (i = 0; p > line;i++) { |
1373 | 1 | char *oldp, *endp; |
1374 | 1 | oldp = p; |
1375 | 1 | if (*p == ',') |
1376 | 0 | --p; |
1377 | 9 | while (p > line && *p != ',') |
1378 | 8 | --p; |
1379 | | /* Eat two 32bit fields at a time to get longs */ |
1380 | 1 | if (p > line && sizeof(unsigned long) == 8) { |
1381 | 0 | oldp--; |
1382 | 0 | memmove(p, p+1, oldp-p+1); |
1383 | 0 | while (p > line && *p != ',') |
1384 | 0 | --p; |
1385 | 0 | } |
1386 | 1 | if (*p == ',') |
1387 | 0 | p++; |
1388 | 1 | if (i >= CPU_LONGS(ncpus)) |
1389 | 0 | return -1; |
1390 | 1 | mask->maskp[i] = strtoul(p, &endp, 16); |
1391 | 1 | if (endp != oldp) |
1392 | 0 | return -1; |
1393 | 1 | p--; |
1394 | 1 | } |
1395 | 1 | return 0; |
1396 | 1 | } |
1397 | | |
1398 | | static void init_node_cpu_mask_v2(void) |
1399 | 1 | { |
1400 | 1 | int nnodes = numa_max_possible_node_v2_int() + 1; |
1401 | 1 | node_cpu_mask_v2 = calloc (nnodes, sizeof(struct bitmask *)); |
1402 | 1 | } |
1403 | | |
1404 | | static void cleanup_node_cpu_mask_v2(void) |
1405 | 0 | { |
1406 | 0 | if (node_cpu_mask_v2) { |
1407 | 0 | int i; |
1408 | 0 | int nnodes; |
1409 | 0 | nnodes = numa_max_possible_node_v2_int() + 1; |
1410 | 0 | for (i = 0; i < nnodes; i++) { |
1411 | 0 | FREE_AND_ZERO(node_cpu_mask_v2[i]); |
1412 | 0 | } |
1413 | 0 | free(node_cpu_mask_v2); |
1414 | 0 | node_cpu_mask_v2 = NULL; |
1415 | 0 | } |
1416 | 0 | } |
1417 | | |
1418 | | /* This would be better with some locking, but I don't want to make libnuma |
1419 | | dependent on pthreads right now. The races are relatively harmless. */ |
1420 | | SYMVER("numa_node_to_cpus_v1", "numa_node_to_cpus@libnuma_1.1") |
1421 | | int |
1422 | | numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen) |
1423 | 0 | { |
1424 | 0 | int err = 0; |
1425 | 0 | char fn[64]; |
1426 | 0 | FILE *f; |
1427 | 0 | char update; |
1428 | 0 | char *line = NULL; |
1429 | 0 | size_t len = 0; |
1430 | 0 | struct bitmask bitmask; |
1431 | 0 | int buflen_needed; |
1432 | 0 | unsigned long *mask; |
1433 | 0 | int ncpus = numa_num_possible_cpus(); |
1434 | 0 | int maxnode = numa_max_node_int(); |
1435 | |
|
1436 | 0 | buflen_needed = CPU_BYTES(ncpus); |
1437 | 0 | if ((unsigned)node > maxnode || bufferlen < buflen_needed) { |
1438 | 0 | errno = ERANGE; |
1439 | 0 | return -1; |
1440 | 0 | } |
1441 | 0 | if (bufferlen > buflen_needed) |
1442 | 0 | memset(buffer, 0, bufferlen); |
1443 | 0 | update = __atomic_fetch_and(&node_cpu_mask_v1_stale, 0, __ATOMIC_RELAXED); |
1444 | 0 | if (node_cpu_mask_v1[node] && !update) { |
1445 | 0 | memcpy(buffer, node_cpu_mask_v1[node], buflen_needed); |
1446 | 0 | return 0; |
1447 | 0 | } |
1448 | | |
1449 | 0 | mask = malloc(buflen_needed); |
1450 | 0 | if (!mask) |
1451 | 0 | mask = (unsigned long *)buffer; |
1452 | 0 | memset(mask, 0, buflen_needed); |
1453 | |
|
1454 | 0 | sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node); |
1455 | 0 | f = fopen(fn, "r"); |
1456 | 0 | if (!f || getdelim(&line, &len, '\n', f) < 1) { |
1457 | 0 | if (numa_bitmask_isbitset(numa_nodes_ptr, node)) { |
1458 | 0 | numa_warn(W_nosysfs2, |
1459 | 0 | "/sys not mounted or invalid. Assuming one node: %s", |
1460 | 0 | strerror(errno)); |
1461 | 0 | numa_warn(W_nosysfs2, |
1462 | 0 | "(cannot open or correctly parse %s)", fn); |
1463 | 0 | } |
1464 | 0 | bitmask.maskp = (unsigned long *)mask; |
1465 | 0 | bitmask.size = buflen_needed * 8; |
1466 | 0 | numa_bitmask_setall(&bitmask); |
1467 | 0 | err = -1; |
1468 | 0 | } |
1469 | 0 | if (f) |
1470 | 0 | fclose(f); |
1471 | |
|
1472 | 0 | if (line && (numa_parse_bitmap_v1(line, mask, ncpus) < 0)) { |
1473 | 0 | numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node"); |
1474 | 0 | bitmask.maskp = (unsigned long *)mask; |
1475 | 0 | bitmask.size = buflen_needed * 8; |
1476 | 0 | numa_bitmask_setall(&bitmask); |
1477 | 0 | err = -1; |
1478 | 0 | } |
1479 | |
|
1480 | 0 | free(line); |
1481 | 0 | memcpy(buffer, mask, buflen_needed); |
1482 | | |
1483 | | /* slightly racy, see above */ |
1484 | 0 | if (node_cpu_mask_v1[node]) { |
1485 | 0 | if (update) { |
1486 | | /* |
1487 | | * There may be readers on node_cpu_mask_v1[], hence it can not |
1488 | | * be freed. |
1489 | | */ |
1490 | 0 | memcpy(node_cpu_mask_v1[node], mask, buflen_needed); |
1491 | 0 | free(mask); |
1492 | 0 | mask = NULL; |
1493 | 0 | } else if (mask != buffer) |
1494 | 0 | free(mask); |
1495 | 0 | } else { |
1496 | 0 | node_cpu_mask_v1[node] = mask; |
1497 | 0 | } |
1498 | 0 | return err; |
1499 | 0 | } |
1500 | | |
1501 | | /* |
1502 | | * test whether a node has cpus |
1503 | | */ |
1504 | | /* This would be better with some locking, but I don't want to make libnuma |
1505 | | dependent on pthreads right now. The races are relatively harmless. */ |
1506 | | /* |
1507 | | * deliver a bitmask of cpus representing the cpus on a given node |
1508 | | */ |
1509 | | SYMVER("numa_node_to_cpus_v2", "numa_node_to_cpus@@libnuma_1.2") |
1510 | | int |
1511 | | numa_node_to_cpus_v2(int node, struct bitmask *buffer) |
1512 | 181 | { |
1513 | 181 | int err = 0; |
1514 | 181 | int nnodes = numa_max_node(); |
1515 | 181 | char fn[64], *line = NULL; |
1516 | 181 | FILE *f; |
1517 | 181 | char update; |
1518 | 181 | size_t len = 0; |
1519 | 181 | struct bitmask *mask; |
1520 | | |
1521 | 181 | if (!node_cpu_mask_v2) |
1522 | 1 | init_node_cpu_mask_v2(); |
1523 | | |
1524 | 181 | if (node > nnodes) { |
1525 | 0 | errno = ERANGE; |
1526 | 0 | return -1; |
1527 | 0 | } |
1528 | 181 | numa_bitmask_clearall(buffer); |
1529 | | |
1530 | 181 | update = __atomic_fetch_and(&node_cpu_mask_v2_stale, 0, __ATOMIC_RELAXED); |
1531 | 181 | if (node_cpu_mask_v2[node] && !update) { |
1532 | | /* have already constructed a mask for this node */ |
1533 | 180 | if (buffer->size < node_cpu_mask_v2[node]->size) { |
1534 | 0 | errno = EINVAL; |
1535 | 0 | numa_error("map size mismatch"); |
1536 | 0 | return -1; |
1537 | 0 | } |
1538 | 180 | copy_bitmask_to_bitmask(node_cpu_mask_v2[node], buffer); |
1539 | 180 | return 0; |
1540 | 180 | } |
1541 | | |
1542 | | /* need a new mask for this node */ |
1543 | 1 | mask = numa_allocate_cpumask(); |
1544 | 1 | if (!mask) |
1545 | 0 | return -1; |
1546 | | |
1547 | | /* this is a kernel cpumask_t (see node_read_cpumap()) */ |
1548 | 1 | sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node); |
1549 | 1 | f = fopen(fn, "r"); |
1550 | 1 | if (!f || getdelim(&line, &len, '\n', f) < 1) { |
1551 | 0 | if (numa_bitmask_isbitset(numa_nodes_ptr, node)) { |
1552 | 0 | numa_warn(W_nosysfs2, |
1553 | 0 | "/sys not mounted or invalid. Assuming one node: %s", |
1554 | 0 | strerror(errno)); |
1555 | 0 | numa_warn(W_nosysfs2, |
1556 | 0 | "(cannot open or correctly parse %s)", fn); |
1557 | 0 | } |
1558 | 0 | numa_bitmask_setall(mask); |
1559 | 0 | err = -1; |
1560 | 0 | } |
1561 | 1 | if (f) |
1562 | 1 | fclose(f); |
1563 | | |
1564 | 1 | if (line && (numa_parse_bitmap_v2(line, mask) < 0)) { |
1565 | 0 | numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node"); |
1566 | 0 | numa_bitmask_setall(mask); |
1567 | 0 | err = -1; |
1568 | 0 | } |
1569 | | |
1570 | 1 | free(line); |
1571 | 1 | copy_bitmask_to_bitmask(mask, buffer); |
1572 | | |
1573 | | /* slightly racy, see above */ |
1574 | | /* save the mask we created */ |
1575 | 1 | if (node_cpu_mask_v2[node]) { |
1576 | 0 | if (update) { |
1577 | 0 | copy_bitmask_to_bitmask(mask, node_cpu_mask_v2[node]); |
1578 | 0 | numa_bitmask_free(mask); |
1579 | 0 | mask = NULL; |
1580 | | /* how could this be? */ |
1581 | 0 | } else if (mask != buffer) |
1582 | 0 | numa_bitmask_free(mask); |
1583 | 1 | } else { |
1584 | | /* we don't want to cache faulty result */ |
1585 | 1 | if (!err) |
1586 | 1 | node_cpu_mask_v2[node] = mask; |
1587 | 0 | else |
1588 | 0 | numa_bitmask_free(mask); |
1589 | 1 | } |
1590 | 1 | return err; |
1591 | 1 | } |
1592 | | |
1593 | | make_internal_alias(numa_node_to_cpus_v1); |
1594 | | make_internal_alias(numa_node_to_cpus_v2); |
1595 | | |
1596 | | void numa_node_to_cpu_update(void) |
1597 | 0 | { |
1598 | 0 | __atomic_store_n(&node_cpu_mask_v1_stale, 1, __ATOMIC_RELAXED); |
1599 | 0 | __atomic_store_n(&node_cpu_mask_v2_stale, 1, __ATOMIC_RELAXED); |
1600 | 0 | } |
1601 | | |
1602 | | /* report the node of the specified cpu */ |
1603 | | int numa_node_of_cpu(int cpu) |
1604 | 0 | { |
1605 | 0 | struct bitmask *bmp; |
1606 | 0 | int ncpus, nnodes, node, ret; |
1607 | |
|
1608 | 0 | ncpus = numa_num_possible_cpus(); |
1609 | 0 | if (cpu > ncpus){ |
1610 | 0 | errno = EINVAL; |
1611 | 0 | return -1; |
1612 | 0 | } |
1613 | 0 | bmp = numa_bitmask_alloc(ncpus); |
1614 | 0 | nnodes = numa_max_node(); |
1615 | 0 | for (node = 0; node <= nnodes; node++){ |
1616 | 0 | if (numa_node_to_cpus_v2_int(node, bmp) < 0) { |
1617 | | /* It's possible for the node to not exist */ |
1618 | 0 | continue; |
1619 | 0 | } |
1620 | 0 | if (numa_bitmask_isbitset(bmp, cpu)){ |
1621 | 0 | ret = node; |
1622 | 0 | goto end; |
1623 | 0 | } |
1624 | 0 | } |
1625 | 0 | ret = -1; |
1626 | 0 | errno = EINVAL; |
1627 | 0 | end: |
1628 | 0 | numa_bitmask_free(bmp); |
1629 | 0 | return ret; |
1630 | 0 | } |
1631 | | |
1632 | | SYMVER("numa_run_on_node_mask_v1", "numa_run_on_node_mask@libnuma_1.1") |
1633 | | int |
1634 | | numa_run_on_node_mask_v1(const nodemask_t *mask) |
1635 | 0 | { |
1636 | 0 | int ncpus = numa_num_possible_cpus(); |
1637 | 0 | int i, k, err; |
1638 | 0 | unsigned long cpus[CPU_LONGS(ncpus)], nodecpus[CPU_LONGS(ncpus)]; |
1639 | 0 | memset(cpus, 0, CPU_BYTES(ncpus)); |
1640 | 0 | for (i = 0; i < NUMA_NUM_NODES; i++) { |
1641 | 0 | if (mask->n[i / BITS_PER_LONG] == 0) |
1642 | 0 | continue; |
1643 | 0 | if (nodemask_isset_compat(mask, i)) { |
1644 | 0 | if (numa_node_to_cpus_v1_int(i, nodecpus, CPU_BYTES(ncpus)) < 0) { |
1645 | 0 | numa_warn(W_noderunmask, |
1646 | 0 | "Cannot read node cpumask from sysfs"); |
1647 | 0 | continue; |
1648 | 0 | } |
1649 | 0 | for (k = 0; k < CPU_LONGS(ncpus); k++) |
1650 | 0 | cpus[k] |= nodecpus[k]; |
1651 | 0 | } |
1652 | 0 | } |
1653 | 0 | err = numa_sched_setaffinity_v1(0, CPU_BYTES(ncpus), cpus); |
1654 | | |
1655 | | /* The sched_setaffinity API is broken because it expects |
1656 | | the user to guess the kernel cpuset size. Do this in a |
1657 | | brute force way. */ |
1658 | 0 | if (err < 0 && errno == EINVAL) { |
1659 | 0 | int savederrno = errno; |
1660 | 0 | char *bigbuf; |
1661 | 0 | static int size = -1; |
1662 | 0 | if (size == -1) |
1663 | 0 | size = CPU_BYTES(ncpus) * 2; |
1664 | 0 | bigbuf = malloc(CPU_BUFFER_SIZE); |
1665 | 0 | if (!bigbuf) { |
1666 | 0 | errno = ENOMEM; |
1667 | 0 | return -1; |
1668 | 0 | } |
1669 | 0 | errno = savederrno; |
1670 | 0 | while (size <= CPU_BUFFER_SIZE) { |
1671 | 0 | memcpy(bigbuf, cpus, CPU_BYTES(ncpus)); |
1672 | 0 | memset(bigbuf + CPU_BYTES(ncpus), 0, |
1673 | 0 | CPU_BUFFER_SIZE - CPU_BYTES(ncpus)); |
1674 | 0 | err = numa_sched_setaffinity_v1_int(0, size, (unsigned long *)bigbuf); |
1675 | 0 | if (err == 0 || errno != EINVAL) |
1676 | 0 | break; |
1677 | 0 | size *= 2; |
1678 | 0 | } |
1679 | 0 | savederrno = errno; |
1680 | 0 | free(bigbuf); |
1681 | 0 | errno = savederrno; |
1682 | 0 | } |
1683 | 0 | return err; |
1684 | 0 | } |
1685 | | |
1686 | | /* |
1687 | | * Given a node mask (size of a kernel nodemask_t) (probably populated by |
1688 | | * a user argument list) set up a map of cpus (map "cpus") on those nodes. |
1689 | | * Then set affinity to those cpus. |
1690 | | */ |
1691 | | SYMVER("numa_run_on_node_mask_v2", "numa_run_on_node_mask@@libnuma_1.2") |
1692 | | int |
1693 | | numa_run_on_node_mask_v2(struct bitmask *bmp) |
1694 | 0 | { |
1695 | 0 | int ncpus, i, k, err; |
1696 | 0 | struct bitmask *cpus, *nodecpus; |
1697 | |
|
1698 | 0 | cpus = numa_allocate_cpumask(); |
1699 | 0 | ncpus = cpus->size; |
1700 | 0 | nodecpus = numa_allocate_cpumask(); |
1701 | 0 | if (!cpus || !nodecpus) |
1702 | 0 | return -1; |
1703 | | |
1704 | 0 | for (i = 0; i < bmp->size; i++) { |
1705 | 0 | if (bmp->maskp[i / BITS_PER_LONG] == 0) |
1706 | 0 | continue; |
1707 | 0 | if (numa_bitmask_isbitset(bmp, i)) { |
1708 | | /* |
1709 | | * numa_all_nodes_ptr is cpuset aware; use only |
1710 | | * these nodes |
1711 | | */ |
1712 | 0 | if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) { |
1713 | 0 | numa_warn(W_noderunmask, |
1714 | 0 | "node %d not allowed", i); |
1715 | 0 | continue; |
1716 | 0 | } |
1717 | 0 | if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { |
1718 | 0 | numa_warn(W_noderunmask, |
1719 | 0 | "Cannot read node cpumask from sysfs"); |
1720 | 0 | continue; |
1721 | 0 | } |
1722 | 0 | for (k = 0; k < CPU_LONGS(ncpus); k++) |
1723 | 0 | cpus->maskp[k] |= nodecpus->maskp[k]; |
1724 | 0 | } |
1725 | 0 | } |
1726 | 0 | err = numa_sched_setaffinity_v2_int(0, cpus); |
1727 | |
|
1728 | 0 | numa_bitmask_free(cpus); |
1729 | 0 | numa_bitmask_free(nodecpus); |
1730 | | |
1731 | | /* used to have to consider that this could fail - it shouldn't now */ |
1732 | 0 | if (err < 0) { |
1733 | 0 | numa_error("numa_sched_setaffinity_v2_int() failed"); |
1734 | 0 | } |
1735 | |
|
1736 | 0 | return err; |
1737 | 0 | } |
1738 | | |
1739 | | make_internal_alias(numa_run_on_node_mask_v2); |
1740 | | |
1741 | | /* |
1742 | | * Given a node mask (size of a kernel nodemask_t) (probably populated by |
1743 | | * a user argument list) set up a map of cpus (map "cpus") on those nodes |
1744 | | * without any cpuset awareness. Then set affinity to those cpus. |
1745 | | */ |
1746 | | int |
1747 | | numa_run_on_node_mask_all(struct bitmask *bmp) |
1748 | 0 | { |
1749 | 0 | int ncpus, i, k, err; |
1750 | 0 | struct bitmask *cpus, *nodecpus; |
1751 | |
|
1752 | 0 | cpus = numa_allocate_cpumask(); |
1753 | 0 | ncpus = cpus->size; |
1754 | 0 | nodecpus = numa_allocate_cpumask(); |
1755 | 0 | if (!cpus || !nodecpus) |
1756 | 0 | return -1; |
1757 | | |
1758 | 0 | for (i = 0; i < bmp->size; i++) { |
1759 | 0 | if (bmp->maskp[i / BITS_PER_LONG] == 0) |
1760 | 0 | continue; |
1761 | 0 | if (numa_bitmask_isbitset(bmp, i)) { |
1762 | 0 | if (!numa_bitmask_isbitset(numa_possible_nodes_ptr, i)) { |
1763 | 0 | numa_warn(W_noderunmask, |
1764 | 0 | "node %d not allowed", i); |
1765 | 0 | continue; |
1766 | 0 | } |
1767 | 0 | if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { |
1768 | 0 | numa_warn(W_noderunmask, |
1769 | 0 | "Cannot read node cpumask from sysfs"); |
1770 | 0 | continue; |
1771 | 0 | } |
1772 | 0 | for (k = 0; k < CPU_LONGS(ncpus); k++) |
1773 | 0 | cpus->maskp[k] |= nodecpus->maskp[k]; |
1774 | 0 | } |
1775 | 0 | } |
1776 | 0 | err = numa_sched_setaffinity_v2_int(0, cpus); |
1777 | |
|
1778 | 0 | numa_bitmask_free(cpus); |
1779 | 0 | numa_bitmask_free(nodecpus); |
1780 | | |
1781 | | /* With possible nodes freedom it can happen easily now */ |
1782 | 0 | if (err < 0) { |
1783 | 0 | numa_error("numa_sched_setaffinity_v2_int() failed"); |
1784 | 0 | } |
1785 | |
|
1786 | 0 | return err; |
1787 | 0 | } |
1788 | | |
1789 | | SYMVER("numa_get_run_node_mask_v1", "numa_get_run_node_mask@libnuma_1.1") |
1790 | | nodemask_t |
1791 | | numa_get_run_node_mask_v1(void) |
1792 | 0 | { |
1793 | 0 | int ncpus = numa_num_configured_cpus(); |
1794 | 0 | int i, k; |
1795 | 0 | int max = numa_max_node_int(); |
1796 | 0 | struct bitmask *bmp, *cpus, *nodecpus; |
1797 | 0 | nodemask_t nmp; |
1798 | |
|
1799 | 0 | cpus = numa_allocate_cpumask(); |
1800 | 0 | if (!cpus) |
1801 | 0 | return numa_no_nodes; |
1802 | 0 | if (numa_sched_getaffinity_v2_int(0, cpus) < 0){ |
1803 | 0 | nmp = numa_no_nodes; |
1804 | 0 | goto free_cpus; |
1805 | 0 | } |
1806 | | |
1807 | 0 | nodecpus = numa_allocate_cpumask(); |
1808 | 0 | if (!nodecpus) { |
1809 | 0 | nmp = numa_no_nodes; |
1810 | 0 | goto free_cpus; |
1811 | 0 | } |
1812 | | |
1813 | 0 | bmp = allocate_nodemask_v1(); /* the size of a nodemask_t */ |
1814 | 0 | if (!bmp) { |
1815 | 0 | nmp = numa_no_nodes; |
1816 | 0 | goto free_cpus2; |
1817 | 0 | } |
1818 | | |
1819 | 0 | for (i = 0; i <= max; i++) { |
1820 | 0 | if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { |
1821 | | /* It's possible for the node to not exist */ |
1822 | 0 | continue; |
1823 | 0 | } |
1824 | 0 | for (k = 0; k < CPU_LONGS(ncpus); k++) { |
1825 | 0 | if (nodecpus->maskp[k] & cpus->maskp[k]) |
1826 | 0 | numa_bitmask_setbit(bmp, i); |
1827 | 0 | } |
1828 | 0 | } |
1829 | 0 | copy_bitmask_to_nodemask(bmp, &nmp); |
1830 | 0 | numa_bitmask_free(bmp); |
1831 | 0 | free_cpus2: |
1832 | 0 | numa_bitmask_free(nodecpus); |
1833 | 0 | free_cpus: |
1834 | 0 | numa_bitmask_free(cpus); |
1835 | 0 | return nmp; |
1836 | 0 | } |
1837 | | |
1838 | | SYMVER("numa_get_run_node_mask_v2", "numa_get_run_node_mask@@libnuma_1.2") |
1839 | | struct bitmask * |
1840 | | numa_get_run_node_mask_v2(void) |
1841 | 0 | { |
1842 | 0 | int i, k; |
1843 | 0 | int ncpus = numa_num_configured_cpus(); |
1844 | 0 | int max = numa_max_node_int(); |
1845 | 0 | struct bitmask *bmp, *cpus, *nodecpus; |
1846 | |
|
1847 | 0 | bmp = numa_allocate_cpumask(); |
1848 | 0 | cpus = numa_allocate_cpumask(); |
1849 | 0 | if (!bmp || !cpus) |
1850 | 0 | return NULL; |
1851 | 0 | if (numa_sched_getaffinity_v2_int(0, cpus) < 0){ |
1852 | 0 | copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp); |
1853 | 0 | goto free_cpus; |
1854 | 0 | } |
1855 | | |
1856 | 0 | nodecpus = numa_allocate_cpumask(); |
1857 | 0 | for (i = 0; i <= max; i++) { |
1858 | | /* |
1859 | | * numa_all_nodes_ptr is cpuset aware; show only |
1860 | | * these nodes |
1861 | | */ |
1862 | 0 | if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) { |
1863 | 0 | continue; |
1864 | 0 | } |
1865 | 0 | if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) { |
1866 | | /* It's possible for the node to not exist */ |
1867 | 0 | continue; |
1868 | 0 | } |
1869 | 0 | for (k = 0; k < CPU_LONGS(ncpus); k++) { |
1870 | 0 | if (nodecpus->maskp[k] & cpus->maskp[k]) |
1871 | 0 | numa_bitmask_setbit(bmp, i); |
1872 | 0 | } |
1873 | 0 | } |
1874 | 0 | numa_bitmask_free(nodecpus); |
1875 | 0 | free_cpus: |
1876 | 0 | numa_bitmask_free(cpus); |
1877 | 0 | return bmp; |
1878 | 0 | } |
1879 | | |
1880 | | int |
1881 | | numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bitmask *tonodes) |
1882 | 0 | { |
1883 | 0 | int numa_num_nodes = numa_num_possible_nodes(); |
1884 | |
|
1885 | 0 | return migrate_pages(pid, numa_num_nodes + 1, fromnodes->maskp, |
1886 | 0 | tonodes->maskp); |
1887 | 0 | } |
1888 | | |
1889 | | int numa_move_pages(int pid, unsigned long count, |
1890 | | void **pages, const int *nodes, int *status, int flags) |
1891 | 0 | { |
1892 | 0 | return move_pages(pid, count, pages, nodes, status, flags); |
1893 | 0 | } |
1894 | | |
1895 | | int numa_run_on_node(int node) |
1896 | 0 | { |
1897 | 0 | int numa_num_nodes = numa_num_possible_nodes(); |
1898 | 0 | int ret = -1; |
1899 | 0 | struct bitmask *cpus; |
1900 | |
|
1901 | 0 | if (node >= numa_num_nodes){ |
1902 | 0 | errno = EINVAL; |
1903 | 0 | goto out; |
1904 | 0 | } |
1905 | | |
1906 | 0 | cpus = numa_allocate_cpumask(); |
1907 | 0 | if (!cpus) |
1908 | 0 | return -1; |
1909 | | |
1910 | 0 | if (node == -1) |
1911 | 0 | numa_bitmask_setall(cpus); |
1912 | 0 | else if (numa_node_to_cpus_v2_int(node, cpus) < 0){ |
1913 | 0 | numa_warn(W_noderunmask, "Cannot read node cpumask from sysfs"); |
1914 | 0 | goto free; |
1915 | 0 | } |
1916 | | |
1917 | 0 | ret = numa_sched_setaffinity_v2_int(0, cpus); |
1918 | 0 | free: |
1919 | 0 | numa_bitmask_free(cpus); |
1920 | 0 | out: |
1921 | 0 | return ret; |
1922 | 0 | } |
1923 | | |
1924 | | static struct bitmask *__numa_preferred(void) |
1925 | 0 | { |
1926 | 0 | int policy = 0; |
1927 | 0 | struct bitmask *bmp; |
1928 | |
|
1929 | 0 | bmp = numa_allocate_nodemask(); |
1930 | 0 | if (!bmp) |
1931 | 0 | return NULL; |
1932 | | /* could read the current CPU from /proc/self/status. Probably |
1933 | | not worth it. */ |
1934 | 0 | numa_bitmask_clearall(bmp); |
1935 | 0 | getpol(&policy, bmp); |
1936 | |
|
1937 | 0 | if (policy != MPOL_PREFERRED && |
1938 | 0 | policy != MPOL_PREFERRED_MANY && |
1939 | 0 | policy != MPOL_BIND) |
1940 | 0 | return bmp; |
1941 | | |
1942 | 0 | if (policy == MPOL_PREFERRED && numa_bitmask_weight(bmp) > 1) { |
1943 | 0 | errno = EINVAL; |
1944 | 0 | numa_error(__FILE__); |
1945 | 0 | } |
1946 | |
|
1947 | 0 | return bmp; |
1948 | 0 | } |
1949 | | |
1950 | | int numa_preferred_err(void) |
1951 | 0 | { |
1952 | 0 | int first_node = 0; |
1953 | 0 | struct bitmask *bmp; |
1954 | |
|
1955 | 0 | bmp = __numa_preferred(); |
1956 | 0 | first_node = numa_find_first(bmp); |
1957 | 0 | numa_bitmask_free(bmp); |
1958 | | |
1959 | 0 | return first_node; |
1960 | 0 | } |
1961 | | |
1962 | | int numa_preferred(void) |
1963 | 0 | { |
1964 | 0 | int first_node = 0; |
1965 | |
|
1966 | 0 | first_node = numa_preferred_err(); |
1967 | 0 | first_node = first_node >= 0 ? first_node : 0; |
1968 | |
|
1969 | 0 | return first_node; |
1970 | 0 | } |
1971 | | |
1972 | | static void __numa_set_preferred(struct bitmask *bmp) |
1973 | 0 | { |
1974 | 0 | int nodes = numa_bitmask_weight(bmp); |
1975 | 0 | if (nodes > 1) { |
1976 | 0 | errno = EINVAL; |
1977 | 0 | numa_error(__FILE__); |
1978 | 0 | } |
1979 | |
|
1980 | 0 | setpol(nodes ? MPOL_PREFERRED : MPOL_LOCAL, bmp); |
1981 | 0 | } |
1982 | | |
1983 | | void numa_set_preferred(int node) |
1984 | 0 | { |
1985 | 0 | struct bitmask *bmp = numa_allocate_nodemask(); |
1986 | 0 | if (!bmp) |
1987 | 0 | return; |
1988 | 0 | numa_bitmask_setbit(bmp, node); |
1989 | 0 | __numa_set_preferred(bmp); |
1990 | 0 | numa_bitmask_free(bmp); |
1991 | 0 | } |
1992 | | |
1993 | | int numa_has_preferred_many(void) |
1994 | 0 | { |
1995 | 0 | set_preferred_many(); |
1996 | 0 | return has_preferred_many; |
1997 | 0 | } |
1998 | | |
1999 | | void numa_set_preferred_many(struct bitmask *bitmask) |
2000 | 0 | { |
2001 | 0 | int first_node = 0; |
2002 | |
|
2003 | 0 | set_preferred_many(); |
2004 | 0 | if (!has_preferred_many) { |
2005 | 0 | numa_warn(W_nodeparse, |
2006 | 0 | "Unable to handle MANY preferred nodes. Falling back to first node\n"); |
2007 | 0 | first_node = numa_find_first(bitmask); |
2008 | 0 | numa_set_preferred(first_node); |
2009 | 0 | return; |
2010 | 0 | } |
2011 | 0 | setpol(MPOL_PREFERRED_MANY, bitmask); |
2012 | 0 | } |
2013 | | |
2014 | | struct bitmask *numa_preferred_many() |
2015 | 0 | { |
2016 | 0 | return __numa_preferred(); |
2017 | 0 | } |
2018 | | |
2019 | | void numa_set_localalloc(void) |
2020 | 0 | { |
2021 | 0 | setpol(MPOL_LOCAL, numa_no_nodes_ptr); |
2022 | 0 | } |
2023 | | |
2024 | | SYMVER("numa_bind_v1", "numa_bind@libnuma_1.1") |
2025 | | void numa_bind_v1(const nodemask_t *nodemask) |
2026 | 0 | { |
2027 | 0 | struct bitmask bitmask; |
2028 | |
|
2029 | 0 | bitmask.maskp = (unsigned long *)nodemask; |
2030 | 0 | bitmask.size = sizeof(nodemask_t); |
2031 | 0 | numa_run_on_node_mask_v2_int(&bitmask); |
2032 | 0 | numa_set_membind_v2_int(&bitmask); |
2033 | 0 | } |
2034 | | |
2035 | | SYMVER("numa_bind_v2", "numa_bind@@libnuma_1.2") |
2036 | | void numa_bind_v2(struct bitmask *bmp) |
2037 | 0 | { |
2038 | 0 | numa_run_on_node_mask_v2_int(bmp); |
2039 | 0 | numa_set_membind_v2_int(bmp); |
2040 | 0 | } |
2041 | | |
2042 | | void numa_set_strict(int flag) |
2043 | 0 | { |
2044 | 0 | if (flag) |
2045 | 0 | mbind_flags |= MPOL_MF_STRICT; |
2046 | 0 | else |
2047 | 0 | mbind_flags &= ~MPOL_MF_STRICT; |
2048 | 0 | } |
2049 | | |
2050 | | /* |
2051 | | * Extract a node or processor number from the given string. |
2052 | | * Allow a relative node / processor specification within the allowed |
2053 | | * set if "relative" is nonzero |
2054 | | */ |
2055 | | static unsigned long get_nr(const char *s, char **end, struct bitmask *bmp, int relative) |
2056 | 10.4k | { |
2057 | 10.4k | long i, nr; |
2058 | | |
2059 | 10.4k | if (!relative) |
2060 | 6.20k | return strtoul(s, end, 0); |
2061 | | |
2062 | 4.27k | nr = strtoul(s, end, 0); |
2063 | 4.27k | if (s == *end) |
2064 | 8 | return nr; |
2065 | | /* Find the nth set bit */ |
2066 | 101k | for (i = 0; nr >= 0 && i <= bmp->size; i++) |
2067 | 97.4k | if (numa_bitmask_isbitset(bmp, i)) |
2068 | 5.81k | nr--; |
2069 | 4.26k | return i-1; |
2070 | 4.27k | } |
2071 | | |
2072 | | /* |
2073 | | * __numa_parse_nodestring() is called to create a node mask, given |
2074 | | * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10. |
2075 | | * (the + indicates that the numbers are nodeset-relative) |
2076 | | * |
2077 | | * The nodes may be specified as absolute, or relative to the current nodeset. |
2078 | | * The list of available nodes is in a map pointed to by "allowed_nodes_ptr", |
2079 | | * which may represent all nodes or the nodes in the current nodeset. |
2080 | | * |
2081 | | * The caller must free the returned bitmask. |
2082 | | */ |
2083 | | static struct bitmask * |
2084 | | __numa_parse_nodestring(const char *s, struct bitmask *allowed_nodes_ptr) |
2085 | 675 | { |
2086 | 675 | int invert = 0, relative = 0; |
2087 | 675 | int conf_nodes = numa_num_configured_nodes(); |
2088 | 675 | char *end; |
2089 | 675 | struct bitmask *mask; |
2090 | | |
2091 | 675 | mask = numa_allocate_nodemask(); |
2092 | 675 | if (!mask) |
2093 | 0 | return NULL; |
2094 | | |
2095 | 675 | if (s[0] == 0){ |
2096 | 1 | copy_bitmask_to_bitmask(numa_no_nodes_ptr, mask); |
2097 | 1 | return mask; /* return freeable mask */ |
2098 | 1 | } |
2099 | 674 | if (*s == '!') { |
2100 | 3 | invert = 1; |
2101 | 3 | s++; |
2102 | 3 | } |
2103 | 674 | if (*s == '+') { |
2104 | 274 | relative++; |
2105 | 274 | s++; |
2106 | 274 | } |
2107 | 2.80k | do { |
2108 | 2.80k | unsigned long arg; |
2109 | 2.80k | int i; |
2110 | 2.80k | if (isalpha(*s)) { |
2111 | 141 | int n; |
2112 | 141 | if (!strcmp(s,"all")) { |
2113 | 1 | copy_bitmask_to_bitmask(allowed_nodes_ptr, |
2114 | 1 | mask); |
2115 | 1 | s+=4; |
2116 | 1 | break; |
2117 | 1 | } |
2118 | 140 | n = resolve_affinity(s, mask); |
2119 | 140 | if (n != NO_IO_AFFINITY) { |
2120 | 57 | if (n < 0) |
2121 | 57 | goto err; |
2122 | 0 | s += strlen(s) + 1; |
2123 | 0 | break; |
2124 | 57 | } |
2125 | 140 | } |
2126 | 2.74k | arg = get_nr(s, &end, allowed_nodes_ptr, relative); |
2127 | 2.74k | if (end == s) { |
2128 | 116 | numa_warn(W_nodeparse, "unparseable node description `%s'\n", s); |
2129 | 116 | goto err; |
2130 | 116 | } |
2131 | 2.62k | if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg)) { |
2132 | 201 | numa_warn(W_nodeparse, "node argument %ld is out of range\n", arg); |
2133 | 201 | goto err; |
2134 | 201 | } |
2135 | 2.42k | i = arg; |
2136 | 2.42k | numa_bitmask_setbit(mask, i); |
2137 | 2.42k | s = end; |
2138 | 2.42k | if (*s == '-') { |
2139 | 1.47k | char *end2; |
2140 | 1.47k | unsigned long arg2; |
2141 | 1.47k | arg2 = get_nr(++s, &end2, allowed_nodes_ptr, relative); |
2142 | 1.47k | if (end2 == s) { |
2143 | 9 | numa_warn(W_nodeparse, "missing node argument %s\n", s); |
2144 | 9 | goto err; |
2145 | 9 | } |
2146 | 1.46k | if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg2)) { |
2147 | 178 | numa_warn(W_nodeparse, "node argument %ld out of range\n", arg2); |
2148 | 178 | goto err; |
2149 | 178 | } |
2150 | 2.33k | while (arg <= arg2) { |
2151 | 1.04k | i = arg; |
2152 | 1.04k | if (numa_bitmask_isbitset(allowed_nodes_ptr,i)) |
2153 | 1.04k | numa_bitmask_setbit(mask, i); |
2154 | 1.04k | arg++; |
2155 | 1.04k | } |
2156 | 1.29k | s = end2; |
2157 | 1.29k | } |
2158 | 2.42k | } while (*s++ == ','); |
2159 | 113 | if (s[-1] != '\0') |
2160 | 15 | goto err; |
2161 | 98 | if (invert) { |
2162 | 1 | int i; |
2163 | 2 | for (i = 0; i < conf_nodes; i++) { |
2164 | 1 | if (numa_bitmask_isbitset(mask, i)) |
2165 | 1 | numa_bitmask_clearbit(mask, i); |
2166 | 0 | else |
2167 | 0 | numa_bitmask_setbit(mask, i); |
2168 | 1 | } |
2169 | 1 | } |
2170 | 98 | return mask; |
2171 | | |
2172 | 576 | err: |
2173 | 576 | numa_bitmask_free(mask); |
2174 | 576 | return NULL; |
2175 | 113 | } |
2176 | | |
2177 | | /* |
2178 | | * numa_parse_nodestring() is called to create a bitmask from nodes available |
2179 | | * for this task. |
2180 | | */ |
2181 | | |
2182 | | struct bitmask * numa_parse_nodestring(const char *s) |
2183 | 675 | { |
2184 | 675 | return __numa_parse_nodestring(s, numa_all_nodes_ptr); |
2185 | 675 | } |
2186 | | |
2187 | | /* |
2188 | | * numa_parse_nodestring_all() is called to create a bitmask from all nodes |
2189 | | * available. |
2190 | | */ |
2191 | | |
2192 | | struct bitmask * numa_parse_nodestring_all(const char *s) |
2193 | 0 | { |
2194 | 0 | return __numa_parse_nodestring(s, numa_possible_nodes_ptr); |
2195 | 0 | } |
2196 | | |
2197 | | /* |
2198 | | * __numa_parse_cpustring() is called to create a bitmask, given |
2199 | | * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10. |
2200 | | * (the + indicates that the numbers are cpuset-relative) |
2201 | | * |
2202 | | * The cpus may be specified as absolute, or relative to the current cpuset. |
2203 | | * The list of available cpus for this task is in the map pointed to by |
2204 | | * "allowed_cpus_ptr", which may represent all cpus or the cpus in the |
2205 | | * current cpuset. |
2206 | | * |
2207 | | * The caller must free the returned bitmask. |
2208 | | */ |
2209 | | static struct bitmask * |
2210 | | __numa_parse_cpustring(const char *s, struct bitmask *allowed_cpus_ptr) |
2211 | 675 | { |
2212 | 675 | int invert = 0, relative=0; |
2213 | 675 | int conf_cpus = numa_num_configured_cpus(); |
2214 | 675 | char *end; |
2215 | 675 | struct bitmask *mask; |
2216 | 675 | int i; |
2217 | | |
2218 | 675 | mask = numa_allocate_cpumask(); |
2219 | 675 | if (!mask) |
2220 | 0 | return NULL; |
2221 | | |
2222 | 675 | if (s[0] == 0) |
2223 | 1 | return mask; |
2224 | 674 | if (*s == '!') { |
2225 | 3 | invert = 1; |
2226 | 3 | s++; |
2227 | 3 | } |
2228 | 674 | if (*s == '+') { |
2229 | 274 | relative++; |
2230 | 274 | s++; |
2231 | 274 | } |
2232 | 3.96k | do { |
2233 | 3.96k | unsigned long arg; |
2234 | | |
2235 | 3.96k | if (!strcmp(s,"all")) { |
2236 | 1 | copy_bitmask_to_bitmask(allowed_cpus_ptr, mask); |
2237 | 1 | s+=4; |
2238 | 1 | break; |
2239 | 1 | } |
2240 | 3.96k | arg = get_nr(s, &end, allowed_cpus_ptr, relative); |
2241 | 3.96k | if (end == s) { |
2242 | 182 | numa_warn(W_cpuparse, "unparseable cpu description `%s'\n", s); |
2243 | 182 | goto err; |
2244 | 182 | } |
2245 | 3.78k | if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg)) { |
2246 | 132 | numa_warn(W_cpuparse, "cpu argument %s is out of range\n", s); |
2247 | 132 | goto err; |
2248 | 132 | } |
2249 | 3.65k | i = arg; |
2250 | 3.65k | numa_bitmask_setbit(mask, i); |
2251 | 3.65k | s = end; |
2252 | 3.65k | if (*s == '-') { |
2253 | 2.28k | char *end2; |
2254 | 2.28k | unsigned long arg2; |
2255 | 2.28k | arg2 = get_nr(++s, &end2, allowed_cpus_ptr, relative); |
2256 | 2.28k | if (end2 == s) { |
2257 | 13 | numa_warn(W_cpuparse, "missing cpu argument %s\n", s); |
2258 | 13 | goto err; |
2259 | 13 | } |
2260 | 2.27k | if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg2)) { |
2261 | 152 | numa_warn(W_cpuparse, "cpu argument %s out of range\n", s); |
2262 | 152 | goto err; |
2263 | 152 | } |
2264 | 4.29k | while (arg <= arg2) { |
2265 | 2.16k | i = arg; |
2266 | 2.16k | if (numa_bitmask_isbitset(allowed_cpus_ptr, i)) |
2267 | 2.16k | numa_bitmask_setbit(mask, i); |
2268 | 2.16k | arg++; |
2269 | 2.16k | } |
2270 | 2.12k | s = end2; |
2271 | 2.12k | } |
2272 | 3.65k | } while (*s++ == ','); |
2273 | 195 | if (s[-1] != '\0') |
2274 | 15 | goto err; |
2275 | 180 | if (invert) { |
2276 | 66 | for (i = 0; i < conf_cpus; i++) { |
2277 | 64 | if (numa_bitmask_isbitset(mask, i)) |
2278 | 3 | numa_bitmask_clearbit(mask, i); |
2279 | 61 | else |
2280 | 61 | numa_bitmask_setbit(mask, i); |
2281 | 64 | } |
2282 | 2 | } |
2283 | 180 | return mask; |
2284 | | |
2285 | 494 | err: |
2286 | 494 | numa_bitmask_free(mask); |
2287 | 494 | return NULL; |
2288 | 195 | } |
2289 | | |
2290 | | /* |
2291 | | * numa_parse_cpustring() is called to create a bitmask from cpus available |
2292 | | * for this task. |
2293 | | */ |
2294 | | |
2295 | | struct bitmask * numa_parse_cpustring(const char *s) |
2296 | 675 | { |
2297 | 675 | return __numa_parse_cpustring(s, numa_all_cpus_ptr); |
2298 | 675 | } |
2299 | | |
2300 | | /* |
2301 | | * numa_parse_cpustring_all() is called to create a bitmask from all cpus |
2302 | | * available. |
2303 | | */ |
2304 | | |
2305 | | struct bitmask * numa_parse_cpustring_all(const char *s) |
2306 | 0 | { |
2307 | 0 | return __numa_parse_cpustring(s, numa_possible_cpus_ptr); |
2308 | 0 | } |
2309 | | |
2310 | | int numa_has_home_node(void) |
2311 | 0 | { |
2312 | 0 | void *mem; |
2313 | 0 | static int has_home_node = -1; |
2314 | 0 | int page_size = numa_pagesize(); |
2315 | 0 | struct bitmask *tmp = numa_get_mems_allowed(); |
2316 | |
|
2317 | 0 | if (has_home_node >= 0) |
2318 | 0 | goto out; |
2319 | | |
2320 | 0 | has_home_node = 0; |
2321 | | /* Detect whether home_node is supported */ |
2322 | 0 | mem = mmap(0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); |
2323 | 0 | if (mem != MAP_FAILED) { |
2324 | 0 | dombind(mem, page_size, MPOL_BIND, tmp); |
2325 | 0 | if (set_mempolicy_home_node(mem, page_size, numa_find_first(tmp), 0) == 0) |
2326 | 0 | has_home_node = 1; |
2327 | 0 | munmap(mem, page_size); |
2328 | 0 | } |
2329 | |
|
2330 | 0 | out: |
2331 | 0 | numa_bitmask_free(tmp); |
2332 | 0 | return has_home_node; |
2333 | 0 | } |
2334 | | |
2335 | | int numa_set_mempolicy_home_node(void *start, unsigned long len, int home_node, int flags) |
2336 | 0 | { |
2337 | 0 | if (set_mempolicy_home_node(start, len, home_node, flags)) { |
2338 | 0 | numa_error("set_mempolicy_home_node"); |
2339 | 0 | return -1; |
2340 | 0 | } |
2341 | | |
2342 | 0 | return 0; |
2343 | 0 | } |