Coverage Report

Created: 2025-07-18 06:29

/src/numactl/libnuma.c
Line
Count
Source (jump to first uncovered line)
1
/* Simple NUMA library.
2
   Copyright (C) 2003,2004,2005,2008 Andi Kleen,SuSE Labs and
3
   Cliff Wickman,SGI.
4
5
   libnuma is free software; you can redistribute it and/or
6
   modify it under the terms of the GNU Lesser General Public
7
   License as published by the Free Software Foundation; version
8
   2.1.
9
10
   libnuma is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
   Lesser General Public License for more details.
14
15
   You should find a copy of v2.1 of the GNU Lesser General Public License
16
   somewhere on your Linux system; if not, write to the Free Software
17
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19
   All calls are undefined when numa_available returns an error. */
20
#define _GNU_SOURCE 1
21
#include <stdlib.h>
22
#include <stdio.h>
23
#include <unistd.h>
24
#include <string.h>
25
#include <sched.h>
26
#include <dirent.h>
27
#include <errno.h>
28
#include <stdarg.h>
29
#include <ctype.h>
30
#include <assert.h>
31
32
#include <sys/mman.h>
33
#include <limits.h>
34
35
#include "config.h"
36
#include "numa.h"
37
#include "numaif.h"
38
#include "numaint.h"
39
#include "util.h"
40
#include "affinity.h"
41
42
#define WEAK __attribute__((weak))
43
44
0
#define CPU_BUFFER_SIZE 4096     /* This limits you to 32768 CPUs */
45
46
/* these are the old (version 1) masks */
47
nodemask_t numa_no_nodes;
48
nodemask_t numa_all_nodes;
49
/* these are now the default bitmask (pointers to) (version 2) */
50
struct bitmask *numa_no_nodes_ptr = NULL;
51
struct bitmask *numa_all_nodes_ptr = NULL;
52
struct bitmask *numa_possible_nodes_ptr = NULL;
53
struct bitmask *numa_all_cpus_ptr = NULL;
54
struct bitmask *numa_possible_cpus_ptr = NULL;
55
/* I would prefer to use symbol versioning to create v1 and v2 versions
56
   of numa_no_nodes and numa_all_nodes, but the loader does not correctly
57
   handle versioning of BSS versus small data items */
58
59
struct bitmask *numa_nodes_ptr = NULL;
60
static struct bitmask *numa_memnode_ptr = NULL;
61
static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES];
62
static char node_cpu_mask_v1_stale = 1;
63
static struct bitmask **node_cpu_mask_v2;
64
static char node_cpu_mask_v2_stale = 1;
65
66
WEAK void numa_error(char *where);
67
68
#ifndef TLS
69
#warning "not threadsafe"
70
#define __thread
71
#endif
72
73
static __thread int bind_policy = MPOL_BIND;
74
static __thread unsigned int mbind_flags = 0;
75
static int sizes_set=0;
76
static int maxconfigurednode = -1;
77
static int maxconfiguredcpu = -1;
78
static int numprocnode = -1;
79
static int numproccpu = -1;
80
static int nodemask_sz = 0;
81
static int cpumask_sz = 0;
82
83
static int has_preferred_many = -1;
84
85
int numa_exit_on_error = 0;
86
int numa_exit_on_warn = 0;
87
int numa_fail_alloc_on_error = 0;
88
static void set_sizes(void);
89
90
/*
91
 * There are two special functions, _init(void) and _fini(void), which
92
 * are called automatically by the dynamic loader whenever a library is loaded.
93
 *
94
 * The v1 library depends upon nodemask_t's of all nodes and no nodes.
95
 */
96
void __attribute__((constructor))
97
numa_init(void)
98
2
{
99
2
  int max,i;
100
101
2
  if (sizes_set)
102
0
    return;
103
104
2
  set_sizes();
105
  /* numa_all_nodes should represent existing nodes on this system */
106
2
        max = numa_num_configured_nodes();
107
4
        for (i = 0; i < max; i++)
108
2
                nodemask_set_compat((nodemask_t *)&numa_all_nodes, i);
109
2
  memset(&numa_no_nodes, 0, sizeof(numa_no_nodes));
110
111
  /* clear errno */
112
2
  errno = 0;
113
2
}
114
115
static void cleanup_node_cpu_mask_v2(void);
116
117
0
#define FREE_AND_ZERO(x) if (x) { \
118
0
    numa_bitmask_free(x); \
119
0
    x = NULL;   \
120
0
  }
121
122
void __attribute__((destructor))
123
numa_fini(void)
124
0
{
125
0
  FREE_AND_ZERO(numa_all_cpus_ptr);
126
0
  FREE_AND_ZERO(numa_possible_cpus_ptr);
127
0
  FREE_AND_ZERO(numa_all_nodes_ptr);
128
0
  FREE_AND_ZERO(numa_possible_nodes_ptr);
129
0
  FREE_AND_ZERO(numa_no_nodes_ptr);
130
0
  FREE_AND_ZERO(numa_memnode_ptr);
131
0
  FREE_AND_ZERO(numa_nodes_ptr);
132
0
  cleanup_node_cpu_mask_v2();
133
0
}
134
135
static int numa_find_first(struct bitmask *mask)
136
0
{
137
0
  int i;
138
0
  for (i = 0; i < mask->size; i++)
139
0
    if (numa_bitmask_isbitset(mask, i))
140
0
      return i;
141
0
  return -1;
142
0
}
143
144
/*
145
 * The following bitmask declarations, bitmask_*() routines, and associated
146
 * _setbit() and _getbit() routines are:
147
 * Copyright (c) 2004_2007 Silicon Graphics, Inc. (SGI) All rights reserved.
148
 * SGI publishes it under the terms of the Library GNU General Public License,
149
 * v2, as published by the Free Software Foundation.
150
 */
151
static unsigned int
152
_getbit(const struct bitmask *bmp, unsigned int n)
153
137k
{
154
137k
  if (n < bmp->size)
155
136k
    return (bmp->maskp[n/bitsperlong] >> (n % bitsperlong)) & 1;
156
789
  else
157
789
    return 0;
158
137k
}
159
160
static void
161
_setbit(struct bitmask *bmp, unsigned int n, unsigned int v)
162
23.9k
{
163
23.9k
  if (n < bmp->size) {
164
23.9k
    if (v)
165
10.6k
      bmp->maskp[n/bitsperlong] |= 1UL << (n % bitsperlong);
166
13.2k
    else
167
13.2k
      bmp->maskp[n/bitsperlong] &= ~(1UL << (n % bitsperlong));
168
23.9k
  }
169
23.9k
}
170
171
int
172
numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int i)
173
135k
{
174
135k
  return _getbit(bmp, i);
175
135k
}
176
177
struct bitmask *
178
numa_bitmask_setall(struct bitmask *bmp)
179
0
{
180
0
  unsigned int i;
181
0
  for (i = 0; i < bmp->size; i++)
182
0
    _setbit(bmp, i, 1);
183
0
  return bmp;
184
0
}
185
186
struct bitmask *
187
numa_bitmask_clearall(struct bitmask *bmp)
188
207
{
189
207
  unsigned int i;
190
13.4k
  for (i = 0; i < bmp->size; i++)
191
13.2k
    _setbit(bmp, i, 0);
192
207
  return bmp;
193
207
}
194
195
struct bitmask *
196
numa_bitmask_setbit(struct bitmask *bmp, unsigned int i)
197
10.6k
{
198
10.6k
  _setbit(bmp, i, 1);
199
10.6k
  return bmp;
200
10.6k
}
201
202
struct bitmask *
203
numa_bitmask_clearbit(struct bitmask *bmp, unsigned int i)
204
5
{
205
5
  _setbit(bmp, i, 0);
206
5
  return bmp;
207
5
}
208
209
unsigned int
210
numa_bitmask_nbytes(struct bitmask *bmp)
211
2
{
212
2
  return longsperbits(bmp->size) * sizeof(unsigned long);
213
2
}
214
215
/* where n is the number of bits in the map */
216
/* This function should not exit on failure, but right now we cannot really
217
   recover from this. */
218
struct bitmask *
219
numa_bitmask_alloc(unsigned int n)
220
1.50k
{
221
1.50k
  struct bitmask *bmp;
222
223
1.50k
  if (n < 1) {
224
0
    errno = EINVAL;
225
0
    numa_error("request to allocate mask for invalid number");
226
0
    return NULL;
227
0
  }
228
1.50k
  bmp = malloc(sizeof(*bmp));
229
1.50k
  if (bmp == 0)
230
0
    goto oom;
231
1.50k
  bmp->size = n;
232
1.50k
  bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long));
233
1.50k
  if (bmp->maskp == 0) {
234
0
    free(bmp);
235
0
    goto oom;
236
0
  }
237
1.50k
  return bmp;
238
239
0
oom:
240
0
  numa_error("Out of memory allocating bitmask");
241
0
  exit(1);
242
1.50k
}
243
244
void
245
numa_bitmask_free(struct bitmask *bmp)
246
1.48k
{
247
1.48k
  if (bmp == 0)
248
0
    return;
249
1.48k
  free(bmp->maskp);
250
1.48k
  bmp->maskp = (unsigned long *)0xdeadcdef;  /* double free tripwire */
251
1.48k
  free(bmp);
252
1.48k
  return;
253
1.48k
}
254
255
/* True if two bitmasks are equal */
256
int
257
numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask *bmp2)
258
0
{
259
0
  unsigned int i;
260
0
  for (i = 0; i < bmp1->size || i < bmp2->size; i++)
261
0
    if (_getbit(bmp1, i) != _getbit(bmp2, i))
262
0
      return 0;
263
0
  return 1;
264
0
}
265
266
/* Hamming Weight: number of set bits */
267
unsigned int numa_bitmask_weight(const struct bitmask *bmp)
268
4
{
269
4
  unsigned int i;
270
4
  unsigned int w = 0;
271
2.18k
  for (i = 0; i < bmp->size; i++)
272
2.17k
    if (_getbit(bmp, i))
273
66
      w++;
274
4
  return w;
275
4
}
276
277
/* *****end of bitmask_  routines ************ */
278
279
/* Next two can be overwritten by the application for different error handling */
280
WEAK void numa_error(char *where)
281
0
{
282
0
  int olde = errno;
283
0
  perror(where);
284
0
  if (numa_exit_on_error)
285
0
    exit(1);
286
0
  errno = olde;
287
0
}
288
289
WEAK void numa_warn(int num, char *fmt, ...)
290
1.14k
{
291
1.14k
  static unsigned warned;
292
1.14k
  va_list ap;
293
1.14k
  int olde = errno;
294
295
  /* Give each warning only once */
296
1.14k
  if ((1<<num) & warned)
297
1.13k
    return;
298
9
  warned |= (1<<num);
299
300
9
  va_start(ap,fmt);
301
9
  fprintf(stderr, "libnuma: Warning: ");
302
9
  vfprintf(stderr, fmt, ap);
303
9
  fputc('\n', stderr);
304
9
  va_end(ap);
305
306
9
  if (numa_exit_on_warn)
307
0
    exit(1);
308
309
9
  errno = olde;
310
9
}
311
312
static void setpol(int policy, struct bitmask *bmp)
313
0
{
314
0
  if (set_mempolicy(policy, bmp->maskp, bmp->size + 1) < 0)
315
0
    numa_error("set_mempolicy");
316
0
}
317
318
static void getpol(int *oldpolicy, struct bitmask *bmp)
319
0
{
320
0
  if (get_mempolicy(oldpolicy, bmp->maskp, bmp->size + 1, 0, 0) < 0)
321
0
    numa_error("get_mempolicy");
322
0
}
323
324
static int dombind(void *mem, size_t size, int pol, struct bitmask *bmp)
325
0
{
326
0
  if (mbind(mem, size, pol, bmp ? bmp->maskp : NULL, bmp ? bmp->size + 1 : 0,
327
0
      mbind_flags) < 0) {
328
0
    numa_error("mbind");
329
0
    return -1;
330
0
  }
331
0
  return 0;
332
0
}
333
334
static void *dombind_or_free(void *mem, size_t size, int pol, struct bitmask *bmp)
335
0
{
336
0
  if (dombind(mem, size, pol, bmp) < 0 && numa_fail_alloc_on_error) {
337
0
    munmap(mem, size);
338
0
    return NULL;
339
0
  }
340
0
  return mem;
341
0
}
342
343
/* (undocumented) */
344
/* gives the wrong answer for hugetlbfs mappings. */
345
int numa_pagesize(void)
346
0
{
347
0
  static int pagesize;
348
0
  if (pagesize > 0)
349
0
    return pagesize;
350
0
  pagesize = getpagesize();
351
0
  return pagesize;
352
0
}
353
354
make_internal_alias(numa_pagesize);
355
356
/*
357
 * Find nodes (numa_nodes_ptr), nodes with memory (numa_memnode_ptr)
358
 * and the highest numbered existing node (maxconfigurednode).
359
 */
360
static void
361
set_configured_nodes(void)
362
2
{
363
2
  DIR *d;
364
2
  struct dirent *de;
365
366
2
  numa_memnode_ptr = numa_allocate_nodemask();
367
2
  numa_nodes_ptr = numa_allocate_nodemask();
368
2
  if (!numa_memnode_ptr || !numa_nodes_ptr)
369
0
    return;
370
371
2
  d = opendir("/sys/devices/system/node");
372
2
  if (!d) {
373
0
    maxconfigurednode = 0;
374
2
  } else {
375
24
    while ((de = readdir(d)) != NULL) {
376
22
      int nd;
377
22
      if (strncmp(de->d_name, "node", 4))
378
20
        continue;
379
2
      nd = strtoul(de->d_name+4, NULL, 0);
380
2
      numa_bitmask_setbit(numa_nodes_ptr, nd);
381
2
      numa_bitmask_setbit(numa_memnode_ptr, nd);
382
2
      if (maxconfigurednode < nd)
383
2
        maxconfigurednode = nd;
384
2
    }
385
2
    closedir(d);
386
2
  }
387
2
}
388
389
static inline int is_digit(char s)
390
574
{
391
574
  return (s >= '0' && s <= '9')
392
574
    || (s >= 'a' && s <= 'f')
393
574
    || (s >= 'A' && s <= 'F');
394
574
}
395
396
/* Is string 'pre' a prefix of string 's'? */
397
static int strprefix(const char *s, const char *pre)
398
112
{
399
112
  return strncmp(s, pre, strlen(pre)) == 0;
400
112
}
401
402
static const char *mask_size_file = "/proc/self/status";
403
static const char *nodemask_prefix = "Mems_allowed:\t";
404
/*
405
 * (do this the way Paul Jackson's libcpuset does it)
406
 * The nodemask values in /proc/self/status are in an
407
 * ascii format that uses 9 characters for each 32 bits of mask.
408
 * (this could also be used to find the cpumask size)
409
 */
410
static void
411
set_nodemask_size(void)
412
2
{
413
2
  FILE *fp;
414
2
  char *buf = NULL;
415
2
  char *tmp_buf = NULL;
416
2
  int digit_len = 0;
417
2
  size_t bufsize = 0;
418
419
2
  if ((fp = fopen(mask_size_file, "r")) == NULL)
420
0
    goto done;
421
422
114
  while (getline(&buf, &bufsize, fp) > 0) {
423
112
    if (strprefix(buf, nodemask_prefix)) {
424
2
      tmp_buf = buf;
425
2
      tmp_buf += strlen(nodemask_prefix);
426
576
      while (*tmp_buf != '\n' && *tmp_buf != '\0') {
427
574
        if (is_digit(*tmp_buf))
428
512
          digit_len++;
429
574
        tmp_buf++;
430
574
      }
431
2
      nodemask_sz = digit_len * 4;
432
2
    }
433
112
  }
434
2
  free(buf);
435
2
  fclose(fp);
436
2
done:
437
2
  if (nodemask_sz == 0) {/* fall back on error */
438
0
    int pol;
439
0
    unsigned long *mask = NULL, *origmask;
440
0
    nodemask_sz = 16;
441
0
    do {
442
0
      nodemask_sz <<= 1;
443
0
      origmask = mask;
444
0
      mask = realloc(mask, nodemask_sz / 8 + sizeof(unsigned long));
445
0
      if (!mask) {
446
0
        free(origmask);
447
0
        return;
448
0
      }
449
0
    } while (get_mempolicy(&pol, mask, nodemask_sz + 1, 0, 0) < 0 && errno == EINVAL &&
450
0
        nodemask_sz < 4096*8);
451
0
    free(mask);
452
0
  }
453
2
}
454
455
/*
456
 * Read a mask consisting of a sequence of hexadecimal longs separated by
457
 * commas. Order them correctly and return the number of bits set.
458
 */
459
static int
460
read_mask(char *s, struct bitmask *bmp)
461
4
{
462
4
  char *end = s;
463
4
  int tmplen = (bmp->size + bitsperint - 1) / bitsperint;
464
4
  unsigned int tmp[tmplen];
465
4
  unsigned int *start = tmp;
466
4
  unsigned int i, n = 0, m = 0;
467
468
4
  if (!s)
469
0
    return 0; /* shouldn't happen */
470
471
4
  i = strtoul(s, &end, 16);
472
473
  /* Skip leading zeros */
474
66
  while (!i && *end++ == ',') {
475
62
    i = strtoul(end, &end, 16);
476
62
  }
477
478
4
  if (!i)
479
    /* End of string. No mask */
480
0
    return -1;
481
482
4
  start[n++] = i;
483
  /* Read sequence of ints */
484
4
  while (*end++ == ',') {
485
0
    i = strtoul(end, &end, 16);
486
0
    start[n++] = i;
487
488
    /* buffer overflow */
489
0
    if (n > tmplen)
490
0
      return -1;
491
0
  }
492
493
  /*
494
   * Invert sequence of ints if necessary since the first int
495
   * is the highest and we put it first because we read it first.
496
   */
497
8
  while (n) {
498
4
    int w;
499
4
    unsigned long x = 0;
500
    /* read into long values in an endian-safe way */
501
8
    for (w = 0; n && w < bitsperlong; w += bitsperint)
502
4
      x |= ((unsigned long)start[n-- - 1] << w);
503
504
4
    bmp->maskp[m++] = x;
505
4
  }
506
  /*
507
   * Return the number of bits set
508
   */
509
4
  return numa_bitmask_weight(bmp);
510
4
}
511
512
/*
513
 * Read a processes constraints in terms of nodes and cpus from
514
 * /proc/self/status.
515
 */
516
static void
517
set_task_constraints(void)
518
2
{
519
2
  int hicpu = maxconfiguredcpu;
520
2
  int i;
521
2
  char *buffer = NULL;
522
2
  size_t buflen = 0;
523
2
  FILE *f;
524
525
2
  numa_all_cpus_ptr = numa_allocate_cpumask();
526
2
  numa_possible_cpus_ptr = numa_allocate_cpumask();
527
2
  numa_all_nodes_ptr = numa_allocate_nodemask();
528
2
  numa_possible_nodes_ptr = numa_allocate_cpumask();
529
2
  numa_no_nodes_ptr = numa_allocate_nodemask();
530
531
  // partial leak shouldn't happen because its transient
532
2
  if (!numa_all_cpus_ptr || !numa_possible_cpus_ptr ||
533
2
    !numa_all_nodes_ptr ||
534
2
    !numa_possible_nodes_ptr ||
535
2
    !numa_no_nodes_ptr)
536
0
    return;
537
538
2
  f = fopen(mask_size_file, "r");
539
2
  if (!f) {
540
    //numa_warn(W_cpumap, "Cannot parse %s", mask_size_file);
541
0
    return;
542
0
  }
543
544
114
  while (getline(&buffer, &buflen, f) > 0) {
545
    /* mask starts after [last] tab */
546
112
    char  *mask = strrchr(buffer,'\t') + 1;
547
548
112
    if (strncmp(buffer,"Cpus_allowed:",13) == 0)
549
2
      numproccpu = read_mask(mask, numa_all_cpus_ptr);
550
551
112
    if (strncmp(buffer,"Mems_allowed:",13) == 0) {
552
2
      numprocnode = read_mask(mask, numa_all_nodes_ptr);
553
2
    }
554
112
  }
555
2
  fclose(f);
556
2
  free(buffer);
557
558
66
  for (i = 0; i <= hicpu; i++)
559
64
    numa_bitmask_setbit(numa_possible_cpus_ptr, i);
560
4
  for (i = 0; i <= maxconfigurednode; i++)
561
2
    numa_bitmask_setbit(numa_possible_nodes_ptr, i);
562
563
  /*
564
   * Cpus_allowed in the kernel can be defined to all f's
565
   * i.e. it may be a superset of the actual available processors.
566
   * As such let's reduce numproccpu to the number of actual
567
   * available cpus.
568
   */
569
2
  if (numproccpu <= 0) {
570
0
    for (i = 0; i <= hicpu; i++)
571
0
      numa_bitmask_setbit(numa_all_cpus_ptr, i);
572
0
    numproccpu = hicpu+1;
573
0
  }
574
575
2
  if (numproccpu > hicpu+1) {
576
0
    numproccpu = hicpu+1;
577
0
    for (i=hicpu+1; i<numa_all_cpus_ptr->size; i++) {
578
0
      numa_bitmask_clearbit(numa_all_cpus_ptr, i);
579
0
    }
580
0
  }
581
582
2
  if (numprocnode <= 0) {
583
0
    for (i = 0; i <= maxconfigurednode; i++)
584
0
      numa_bitmask_setbit(numa_all_nodes_ptr, i);
585
0
    numprocnode = maxconfigurednode + 1;
586
0
  }
587
588
2
  return;
589
2
}
590
591
/*
592
 * Find the highest cpu number possible (in other words the size
593
 * of a kernel cpumask_t (in bits) - 1)
594
 */
595
static void
596
set_numa_max_cpu(void)
597
2
{
598
2
  int len = 4096;
599
2
  int n;
600
2
  int olde = errno;
601
2
  struct bitmask *buffer;
602
603
2
  do {
604
2
    buffer = numa_bitmask_alloc(len);
605
2
    if (!buffer)
606
0
      return;
607
2
    n = numa_sched_getaffinity_v2_int(0, buffer);
608
    /* on success, returns size of kernel cpumask_t, in bytes */
609
2
    if (n < 0) {
610
0
      if (errno == EINVAL) {
611
0
        if (len >= 1024*1024)
612
0
          break;
613
0
        len *= 2;
614
0
        numa_bitmask_free(buffer);
615
0
        continue;
616
0
      } else {
617
0
        numa_warn(W_numcpus, "Unable to determine max cpu"
618
0
            " (sched_getaffinity: %s); guessing...",
619
0
            strerror(errno));
620
0
        n = sizeof(cpu_set_t);
621
0
        break;
622
0
      }
623
0
    }
624
2
  } while (n < 0);
625
2
  numa_bitmask_free(buffer);
626
2
  errno = olde;
627
2
  cpumask_sz = n*8;
628
2
}
629
630
/*
631
 * get the total (configured) number of cpus - both online and offline
632
 */
633
static void
634
set_configured_cpus(void)
635
2
{
636
2
  maxconfiguredcpu = sysconf(_SC_NPROCESSORS_CONF) - 1;
637
2
  if (maxconfiguredcpu == -1)
638
0
    numa_error("sysconf(NPROCESSORS_CONF) failed");
639
2
}
640
641
static void
642
set_preferred_many(void)
643
0
{
644
0
  int oldp;
645
0
  struct bitmask *bmp, *tmp;
646
0
  int old_errno;
647
648
0
  if (has_preferred_many >= 0)
649
0
    return;
650
651
0
  old_errno = errno;
652
653
0
  has_preferred_many = 0;
654
655
0
  bmp = numa_allocate_nodemask();
656
0
  tmp = numa_get_mems_allowed();
657
0
  if (!tmp || !bmp)
658
0
    goto out;
659
660
0
  if (get_mempolicy(&oldp, bmp->maskp, bmp->size + 1, 0, 0) < 0)
661
0
    goto out;
662
663
0
  if (set_mempolicy(MPOL_PREFERRED_MANY, tmp->maskp, tmp->size) == 0) {
664
0
    has_preferred_many = 1;
665
    /* reset the old memory policy ignoring error */
666
0
    (void)set_mempolicy(oldp, bmp->maskp, bmp->size+1);
667
0
  }
668
669
0
out:
670
0
  numa_bitmask_free(tmp);
671
0
  numa_bitmask_free(bmp);
672
0
  errno = old_errno;
673
0
}
674
675
/*
676
 * Initialize all the sizes.
677
 */
678
static void
679
set_sizes(void)
680
2
{
681
2
  sizes_set++;
682
2
  set_nodemask_size();  /* size of kernel nodemask_t */
683
2
  set_configured_nodes(); /* configured nodes listed in /sys */
684
2
  set_numa_max_cpu(); /* size of kernel cpumask_t */
685
2
  set_configured_cpus();  /* cpus listed in /sys/devices/system/cpu */
686
2
  set_task_constraints(); /* cpus and nodes for current task */
687
2
}
688
689
int
690
numa_num_configured_nodes(void)
691
744
{
692
  /*
693
  * NOTE: this function's behavior matches the documentation (ie: it
694
  * returns a count of nodes with memory) despite the poor function
695
  * naming.  We also cannot use the similarly poorly named
696
  * numa_all_nodes_ptr as it only tracks nodes with memory from which
697
  * the calling process can allocate.  Think sparse nodes, memory-less
698
  * nodes, cpusets...
699
  */
700
744
  int memnodecount=0, i;
701
702
1.48k
  for (i=0; i <= maxconfigurednode; i++) {
703
744
    if (numa_bitmask_isbitset(numa_memnode_ptr, i))
704
744
      memnodecount++;
705
744
  }
706
744
  return memnodecount;
707
744
}
708
709
int
710
numa_num_configured_cpus(void)
711
742
{
712
713
742
  return maxconfiguredcpu+1;
714
742
}
715
716
int
717
numa_num_possible_nodes(void)
718
751
{
719
751
  return nodemask_sz;
720
751
}
721
722
int
723
numa_num_possible_cpus(void)
724
749
{
725
749
  return cpumask_sz;
726
749
}
727
728
int
729
numa_num_task_nodes(void)
730
0
{
731
0
  return numprocnode;
732
0
}
733
734
/*
735
 * for backward compatibility
736
 */
737
int
738
numa_num_thread_nodes(void)
739
0
{
740
0
  return numa_num_task_nodes();
741
0
}
742
743
int
744
numa_num_task_cpus(void)
745
0
{
746
0
  return numproccpu;
747
0
}
748
749
/*
750
 * for backward compatibility
751
 */
752
int
753
numa_num_thread_cpus(void)
754
0
{
755
0
  return numa_num_task_cpus();
756
0
}
757
758
/*
759
 * Return the number of the highest node in this running system,
760
 */
761
int
762
numa_max_node(void)
763
207
{
764
207
  return maxconfigurednode;
765
207
}
766
767
make_internal_alias(numa_max_node);
768
769
/*
770
 * Return the number of the highest possible node in a system,
771
 * which for v1 is the size of a numa.h nodemask_t(in bits)-1.
772
 * but for v2 is the size of a kernel nodemask_t(in bits)-1.
773
 */
774
SYMVER("numa_max_possible_node_v1", "numa_max_possible_node@libnuma_1.1")
775
int
776
numa_max_possible_node_v1(void)
777
0
{
778
0
  return ((sizeof(nodemask_t)*8)-1);
779
0
}
780
781
SYMVER("numa_max_possible_node_v2", "numa_max_possible_node@@libnuma_1.2")
782
int
783
numa_max_possible_node_v2(void)
784
751
{
785
751
  return numa_num_possible_nodes()-1;
786
751
}
787
788
make_internal_alias(numa_max_possible_node_v1);
789
make_internal_alias(numa_max_possible_node_v2);
790
791
/*
792
 * Allocate a bitmask for cpus, of a size large enough to
793
 * match the kernel's cpumask_t.
794
 */
795
struct bitmask *
796
numa_allocate_cpumask()
797
749
{
798
749
  int ncpus = numa_num_possible_cpus();
799
800
749
  return numa_bitmask_alloc(ncpus);
801
749
}
802
803
/*
804
 * Allocate a bitmask the size of a libnuma nodemask_t
805
 */
806
static struct bitmask *
807
allocate_nodemask_v1(void)
808
0
{
809
0
  int nnodes = numa_max_possible_node_v1_int()+1;
810
811
0
  return numa_bitmask_alloc(nnodes);
812
0
}
813
814
/*
815
 * Allocate a bitmask for nodes, of a size large enough to
816
 * match the kernel's nodemask_t.
817
 */
818
struct bitmask *
819
numa_allocate_nodemask(void)
820
750
{
821
750
  struct bitmask *bmp;
822
750
  int nnodes = numa_max_possible_node_v2_int() + 1;
823
824
750
  bmp = numa_bitmask_alloc(nnodes);
825
750
  return bmp;
826
750
}
827
828
/* (cache the result?) */
829
long long numa_node_size64(int node, long long *freep)
830
0
{
831
0
  size_t len = 0;
832
0
  char *line = NULL;
833
0
  long long size = -1;
834
0
  FILE *f;
835
0
  char fn[64];
836
0
  int ok = 0;
837
0
  int required = freep ? 2 : 1;
838
839
0
  if (freep)
840
0
    *freep = 0;
841
0
  sprintf(fn,"/sys/devices/system/node/node%d/meminfo", node);
842
0
  f = fopen(fn, "r");
843
0
  if (!f)
844
0
    return -1;
845
0
  while (getdelim(&line, &len, '\n', f) > 0) {
846
0
    char *end;
847
0
    char *s = strcasestr(line, "kB");
848
0
    if (!s)
849
0
      continue;
850
0
    --s;
851
0
    while (s > line && isspace(*s))
852
0
      --s;
853
0
    while (s > line && isdigit(*s))
854
0
      --s;
855
0
    if (strstr(line, "MemTotal")) {
856
0
      size = strtoull(s,&end,0) << 10;
857
0
      if (end == s)
858
0
        size = -1;
859
0
      else
860
0
        ok++;
861
0
    }
862
0
    if (freep && strstr(line, "MemFree")) {
863
0
      *freep = strtoull(s,&end,0) << 10;
864
0
      if (end == s)
865
0
        *freep = -1;
866
0
      else
867
0
        ok++;
868
0
    }
869
0
  }
870
0
  fclose(f);
871
0
  free(line);
872
0
  if (ok != required)
873
0
    numa_warn(W_badmeminfo, "Cannot parse sysfs meminfo (%d)", ok);
874
0
  return size;
875
0
}
876
877
make_internal_alias(numa_node_size64);
878
879
long numa_node_size(int node, long *freep)
880
0
{
881
0
  long long f2 = 0;
882
0
  long sz = numa_node_size64_int(node, &f2);
883
0
  if (freep)
884
0
    *freep = f2;
885
0
  return sz;
886
0
}
887
888
int numa_available(void)
889
0
{
890
0
  if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && (errno == ENOSYS || errno == EPERM))
891
0
    return -1;
892
0
  return 0;
893
0
}
894
895
SYMVER("numa_interleave_memory_v1", "numa_interleave_memory@libnuma_1.1")
896
void
897
numa_interleave_memory_v1(void *mem, size_t size, const nodemask_t *mask)
898
0
{
899
0
  struct bitmask bitmask;
900
901
0
  bitmask.size = sizeof(nodemask_t) * 8;
902
0
  bitmask.maskp = (unsigned long *)mask;
903
0
  dombind(mem, size, MPOL_INTERLEAVE, &bitmask);
904
0
}
905
906
SYMVER("numa_interleave_memory_v2", "numa_interleave_memory@@libnuma_1.2")
907
void
908
numa_interleave_memory_v2(void *mem, size_t size, struct bitmask *bmp)
909
0
{
910
0
  dombind(mem, size, MPOL_INTERLEAVE, bmp);
911
0
}
912
913
void
914
numa_weighted_interleave_memory(void *mem, size_t size, struct bitmask *bmp)
915
0
{
916
0
  dombind(mem, size, MPOL_WEIGHTED_INTERLEAVE, bmp);
917
0
}
918
919
void numa_tonode_memory(void *mem, size_t size, int node)
920
0
{
921
0
  struct bitmask *nodes;
922
923
0
  nodes = numa_allocate_nodemask();
924
0
  if (!nodes)
925
0
    return;
926
0
  numa_bitmask_setbit(nodes, node);
927
0
  dombind(mem, size, bind_policy, nodes);
928
0
  numa_bitmask_free(nodes);
929
0
}
930
931
SYMVER("numa_tonodemask_memory_v1", "numa_tonodemask_memory@libnuma_1.1")
932
void
933
numa_tonodemask_memory_v1(void *mem, size_t size, const nodemask_t *mask)
934
0
{
935
0
  struct bitmask bitmask;
936
937
0
  bitmask.maskp = (unsigned long *)mask;
938
0
  bitmask.size  = sizeof(nodemask_t);
939
0
  dombind(mem, size,  bind_policy, &bitmask);
940
0
}
941
942
SYMVER("numa_tonodemask_memory_v2", "numa_tonodemask_memory@@libnuma_1.2")
943
void
944
numa_tonodemask_memory_v2(void *mem, size_t size, struct bitmask *bmp)
945
0
{
946
0
  dombind(mem, size,  bind_policy, bmp);
947
0
}
948
949
void numa_setlocal_memory(void *mem, size_t size)
950
0
{
951
0
  dombind(mem, size, MPOL_LOCAL, NULL);
952
0
}
953
954
void numa_police_memory(void *mem, size_t size)
955
0
{
956
0
  int pagesize = numa_pagesize_int();
957
0
  unsigned long i;
958
0
  char *p = mem;
959
0
  for (i = 0; i < size; i += pagesize, p += pagesize)
960
0
    __atomic_and_fetch(p, 0xff, __ATOMIC_RELAXED);
961
962
0
}
963
964
make_internal_alias(numa_police_memory);
965
966
void *numa_alloc(size_t size)
967
0
{
968
0
  char *mem;
969
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
970
0
       0, 0);
971
0
  if (mem == (char *)-1)
972
0
    return NULL;
973
0
  numa_police_memory_int(mem, size);
974
0
  return mem;
975
0
}
976
977
void *numa_realloc(void *old_addr, size_t old_size, size_t new_size)
978
0
{
979
0
  char *mem;
980
0
  mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
981
0
  if (mem == (char *)-1)
982
0
    return NULL;
983
  /*
984
   *  The memory policy of the allocated pages is preserved by mremap(), so
985
   *  there is no need to (re)set it here. If the policy of the original
986
   *  allocation is not set, the new pages will be allocated according to the
987
   *  process' mempolicy. Trying to allocate explicitly the new pages on the
988
   *  same node as the original ones would require changing the policy of the
989
   *  newly allocated pages, which violates the numa_realloc() semantics.
990
   */
991
0
  return mem;
992
0
}
993
994
SYMVER("numa_alloc_interleaved_subset_v1", "numa_alloc_interleaved_subset@libnuma_1.1")
995
void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask)
996
0
{
997
0
  char *mem;
998
0
  struct bitmask bitmask;
999
1000
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
1001
0
      0, 0);
1002
0
  if (mem == (char *)-1)
1003
0
    return NULL;
1004
0
  bitmask.maskp = (unsigned long *)mask;
1005
0
  bitmask.size  = sizeof(nodemask_t);
1006
0
  mem = dombind_or_free(mem, size, MPOL_INTERLEAVE, &bitmask);
1007
0
  return mem;
1008
0
}
1009
1010
SYMVER("numa_alloc_interleaved_subset_v2", "numa_alloc_interleaved_subset@@libnuma_1.2")
1011
void *numa_alloc_interleaved_subset_v2(size_t size, struct bitmask *bmp)
1012
0
{
1013
0
  char *mem;
1014
1015
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
1016
0
       0, 0);
1017
0
  if (mem == (char *)-1)
1018
0
    return NULL;
1019
0
  mem = dombind_or_free(mem, size, MPOL_INTERLEAVE, bmp);
1020
0
  return mem;
1021
0
}
1022
1023
make_internal_alias(numa_alloc_interleaved_subset_v1);
1024
make_internal_alias(numa_alloc_interleaved_subset_v2);
1025
1026
void *
1027
numa_alloc_interleaved(size_t size)
1028
0
{
1029
0
  return numa_alloc_interleaved_subset_v2_int(size, numa_all_nodes_ptr);
1030
0
}
1031
1032
void *
1033
numa_alloc_weighted_interleaved_subset(size_t size, struct bitmask *bmp)
1034
0
{
1035
0
  char *mem;
1036
1037
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
1038
0
       0, 0);
1039
0
  if (mem == (char *)-1)
1040
0
    return NULL;
1041
0
  mem = dombind_or_free(mem, size, MPOL_WEIGHTED_INTERLEAVE, bmp);
1042
0
  return mem;
1043
0
}
1044
1045
void *
1046
numa_alloc_weighted_interleaved(size_t size)
1047
0
{
1048
0
  return numa_alloc_weighted_interleaved_subset(size, numa_all_nodes_ptr);
1049
0
}
1050
1051
/*
1052
 * given a user node mask, set memory policy to use those nodes
1053
 */
1054
SYMVER("numa_set_interleave_mask_v1", "numa_set_interleave_mask@libnuma_1.1")
1055
void
1056
numa_set_interleave_mask_v1(nodemask_t *mask)
1057
0
{
1058
0
  struct bitmask *bmp;
1059
0
  int nnodes = numa_max_possible_node_v1_int()+1;
1060
1061
0
  bmp = numa_bitmask_alloc(nnodes);
1062
0
  copy_nodemask_to_bitmask(mask, bmp);
1063
0
  if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
1064
0
    setpol(MPOL_DEFAULT, bmp);
1065
0
  else
1066
0
    setpol(MPOL_INTERLEAVE, bmp);
1067
0
  numa_bitmask_free(bmp);
1068
0
}
1069
1070
1071
SYMVER("numa_set_interleave_mask_v2", "numa_set_interleave_mask@@libnuma_1.2")
1072
void
1073
numa_set_interleave_mask_v2(struct bitmask *bmp)
1074
0
{
1075
0
  if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
1076
0
    setpol(MPOL_DEFAULT, bmp);
1077
0
  else
1078
0
    setpol(MPOL_INTERLEAVE, bmp);
1079
0
}
1080
1081
void
1082
numa_set_weighted_interleave_mask(struct bitmask *bmp)
1083
0
{
1084
0
  if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
1085
0
    setpol(MPOL_DEFAULT, bmp);
1086
0
  else
1087
0
    setpol(MPOL_WEIGHTED_INTERLEAVE, bmp);
1088
0
}
1089
1090
SYMVER("numa_get_interleave_mask_v1", "numa_get_interleave_mask@libnuma_1.1")
1091
nodemask_t
1092
numa_get_interleave_mask_v1(void)
1093
0
{
1094
0
  int oldpolicy = 0;
1095
0
  struct bitmask *bmp;
1096
0
  nodemask_t mask;
1097
1098
0
  bmp = allocate_nodemask_v1();
1099
0
  if (!bmp)
1100
0
    return numa_no_nodes;
1101
0
  getpol(&oldpolicy, bmp);
1102
0
  if (oldpolicy == MPOL_INTERLEAVE)
1103
0
    copy_bitmask_to_nodemask(bmp, &mask);
1104
0
  else
1105
0
    copy_bitmask_to_nodemask(numa_no_nodes_ptr, &mask);
1106
0
  numa_bitmask_free(bmp);
1107
0
  return mask;
1108
0
}
1109
1110
SYMVER("numa_get_interleave_mask_v2", "numa_get_interleave_mask@@libnuma_1.2")
1111
struct bitmask *
1112
numa_get_interleave_mask_v2(void)
1113
0
{
1114
0
  int oldpolicy = 0;
1115
0
  struct bitmask *bmp;
1116
1117
0
  bmp = numa_allocate_nodemask();
1118
0
  if (!bmp)
1119
0
    return NULL;
1120
0
  getpol(&oldpolicy, bmp);
1121
0
  if (oldpolicy != MPOL_INTERLEAVE)
1122
0
    copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
1123
0
  return bmp;
1124
0
}
1125
1126
struct bitmask *
1127
numa_get_weighted_interleave_mask(void)
1128
0
{
1129
0
  int oldpolicy = 0;
1130
0
  struct bitmask *bmp;
1131
1132
0
  bmp = numa_allocate_nodemask();
1133
0
  if (!bmp)
1134
0
    return NULL;
1135
0
  getpol(&oldpolicy, bmp);
1136
0
  if (oldpolicy != MPOL_WEIGHTED_INTERLEAVE)
1137
0
    copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
1138
0
  return bmp;
1139
0
}
1140
1141
/* (undocumented) */
1142
int numa_get_interleave_node(void)
1143
0
{
1144
0
  int nd;
1145
0
  if (get_mempolicy(&nd, NULL, 0, 0, MPOL_F_NODE) == 0)
1146
0
    return nd;
1147
0
  return 0;
1148
0
}
1149
1150
void *numa_alloc_onnode(size_t size, int node)
1151
0
{
1152
0
  char *mem;
1153
0
  struct bitmask *bmp;
1154
1155
0
  bmp = numa_allocate_nodemask();
1156
0
  if (!bmp)
1157
0
    return NULL;
1158
0
  numa_bitmask_setbit(bmp, node);
1159
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
1160
0
       0, 0);
1161
0
  if (mem == (char *)-1)
1162
0
    mem = NULL;
1163
0
  else
1164
0
    mem = dombind_or_free(mem, size, bind_policy, bmp);
1165
1166
0
  numa_bitmask_free(bmp);
1167
0
  return mem;
1168
0
}
1169
1170
void *numa_alloc_local(size_t size)
1171
0
{
1172
0
  char *mem;
1173
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
1174
0
       0, 0);
1175
0
  if (mem == (char *)-1)
1176
0
    mem =  NULL;
1177
0
  else
1178
0
    mem = dombind_or_free(mem, size, MPOL_LOCAL, NULL);
1179
0
  return mem;
1180
0
}
1181
1182
void numa_set_bind_policy(int strict)
1183
0
{
1184
0
  set_preferred_many();
1185
0
  if (strict)
1186
0
    bind_policy = MPOL_BIND;
1187
0
  else if (has_preferred_many)
1188
0
    bind_policy = MPOL_PREFERRED_MANY;
1189
0
  else
1190
0
    bind_policy = MPOL_PREFERRED;
1191
0
}
1192
1193
SYMVER("numa_set_membind_v1", "numa_set_membind@libnuma_1.1")
1194
void
1195
numa_set_membind_v1(const nodemask_t *mask)
1196
0
{
1197
0
  struct bitmask bitmask;
1198
1199
0
  bitmask.maskp = (unsigned long *)mask;
1200
0
  bitmask.size  = sizeof(nodemask_t);
1201
0
  setpol(MPOL_BIND, &bitmask);
1202
0
}
1203
1204
SYMVER("numa_set_membind_v2", "numa_set_membind@@libnuma_1.2")
1205
void
1206
numa_set_membind_v2(struct bitmask *bmp)
1207
0
{
1208
0
  setpol(MPOL_BIND, bmp);
1209
0
}
1210
1211
make_internal_alias(numa_set_membind_v2);
1212
1213
void
1214
numa_set_membind_balancing(struct bitmask *bmp)
1215
0
{
1216
  /* MPOL_F_NUMA_BALANCING: ignore if unsupported */
1217
0
  if (set_mempolicy(MPOL_BIND | MPOL_F_NUMA_BALANCING,
1218
0
        bmp->maskp, bmp->size + 1) < 0) {
1219
0
    if (errno == EINVAL) {
1220
0
      errno = 0;
1221
0
      numa_set_membind_v2(bmp);
1222
0
    } else
1223
0
      numa_error("set_mempolicy");
1224
0
  }
1225
0
}
1226
1227
/*
1228
 * copy a bitmask map body to a numa.h nodemask_t structure
1229
 */
1230
void
1231
copy_bitmask_to_nodemask(struct bitmask *bmp, nodemask_t *nmp)
1232
0
{
1233
0
  int max, i;
1234
1235
0
  memset(nmp, 0, sizeof(nodemask_t));
1236
0
        max = (sizeof(nodemask_t)*8);
1237
0
  for (i=0; i<bmp->size; i++) {
1238
0
    if (i >= max)
1239
0
      break;
1240
0
    if (numa_bitmask_isbitset(bmp, i))
1241
0
      nodemask_set_compat((nodemask_t *)nmp, i);
1242
0
  }
1243
0
}
1244
1245
/*
1246
 * copy a bitmask map body to another bitmask body
1247
 * fill a larger destination with zeroes
1248
 */
1249
void
1250
copy_bitmask_to_bitmask(struct bitmask *bmpfrom, struct bitmask *bmpto)
1251
210
{
1252
210
  int bytes;
1253
1254
210
  if (bmpfrom->size >= bmpto->size) {
1255
210
    memcpy(bmpto->maskp, bmpfrom->maskp, CPU_BYTES(bmpto->size));
1256
210
  } else if (bmpfrom->size < bmpto->size) {
1257
0
    bytes = CPU_BYTES(bmpfrom->size);
1258
0
    memcpy(bmpto->maskp, bmpfrom->maskp, bytes);
1259
0
    memset(((char *)bmpto->maskp)+bytes, 0,
1260
0
          CPU_BYTES(bmpto->size)-bytes);
1261
0
  }
1262
210
}
1263
1264
/*
1265
 * copy a numa.h nodemask_t structure to a bitmask map body
1266
 */
1267
void
1268
copy_nodemask_to_bitmask(nodemask_t *nmp, struct bitmask *bmp)
1269
0
{
1270
0
  int max, i;
1271
1272
0
  numa_bitmask_clearall(bmp);
1273
0
        max = (sizeof(nodemask_t)*8);
1274
0
  if (max > bmp->size)
1275
0
    max = bmp->size;
1276
0
  for (i=0; i<max; i++) {
1277
0
    if (nodemask_isset_compat(nmp, i))
1278
0
      numa_bitmask_setbit(bmp, i);
1279
0
  }
1280
0
}
1281
1282
SYMVER("numa_get_membind_v1", "numa_get_membind@libnuma_1.1")
1283
nodemask_t
1284
numa_get_membind_v1(void)
1285
0
{
1286
0
  int oldpolicy = 0;
1287
0
  struct bitmask *bmp;
1288
0
  nodemask_t nmp;
1289
1290
0
  bmp = allocate_nodemask_v1();
1291
0
  if (!bmp)
1292
0
    return numa_no_nodes;
1293
0
  getpol(&oldpolicy, bmp);
1294
0
  if (oldpolicy == MPOL_BIND) {
1295
0
    copy_bitmask_to_nodemask(bmp, &nmp);
1296
0
  } else {
1297
    /* copy the body of the map to numa_all_nodes */
1298
0
    copy_bitmask_to_nodemask(bmp, &numa_all_nodes);
1299
0
    nmp = numa_all_nodes;
1300
0
  }
1301
0
  numa_bitmask_free(bmp);
1302
0
  return nmp;
1303
0
}
1304
1305
SYMVER("numa_get_membind_v2", "numa_get_membind@@libnuma_1.2")
1306
struct bitmask *
1307
numa_get_membind_v2(void)
1308
0
{
1309
0
  int oldpolicy = 0;
1310
0
  struct bitmask *bmp = NULL;
1311
1312
0
  bmp = numa_allocate_nodemask();
1313
0
  if (!bmp)
1314
0
    return NULL;
1315
0
  getpol(&oldpolicy, bmp);
1316
0
  if (oldpolicy != MPOL_BIND)
1317
0
    copy_bitmask_to_bitmask(numa_all_nodes_ptr, bmp);
1318
0
  return bmp;
1319
0
}
1320
1321
//TODO:  do we need a v1 nodemask_t version?
1322
struct bitmask *numa_get_mems_allowed(void)
1323
0
{
1324
0
  struct bitmask *bmp;
1325
1326
  /*
1327
   * can change, so query on each call.
1328
   */
1329
0
  bmp = numa_allocate_nodemask();
1330
0
  if (!bmp)
1331
0
    return NULL;
1332
0
  if (get_mempolicy(NULL, bmp->maskp, bmp->size + 1, 0,
1333
0
        MPOL_F_MEMS_ALLOWED) < 0)
1334
0
    numa_error("get_mempolicy");
1335
0
  return bmp;
1336
0
}
1337
make_internal_alias(numa_get_mems_allowed);
1338
1339
void numa_free(void *mem, size_t size)
1340
0
{
1341
0
  munmap(mem, size);
1342
0
}
1343
1344
SYMVER("numa_parse_bitmap_v1", "numa_parse_bitmap@libnuma_1.1")
1345
int
1346
numa_parse_bitmap_v1(char *line, unsigned long *mask, int ncpus)
1347
0
{
1348
0
  int i;
1349
0
  char *p = strchr(line, '\n');
1350
0
  if (!p)
1351
0
    return -1;
1352
1353
0
  for (i = 0; p > line;i++) {
1354
0
    char *oldp, *endp;
1355
0
    oldp = p;
1356
0
    if (*p == ',')
1357
0
      --p;
1358
0
    while (p > line && *p != ',')
1359
0
      --p;
1360
    /* Eat two 32bit fields at a time to get longs */
1361
0
    if (p > line && sizeof(unsigned long) == 8) {
1362
0
      oldp--;
1363
0
      memmove(p, p+1, oldp-p+1);
1364
0
      while (p > line && *p != ',')
1365
0
        --p;
1366
0
    }
1367
0
    if (*p == ',')
1368
0
      p++;
1369
0
    if (i >= CPU_LONGS(ncpus))
1370
0
      return -1;
1371
0
    mask[i] = strtoul(p, &endp, 16);
1372
0
    if (endp != oldp)
1373
0
      return -1;
1374
0
    p--;
1375
0
  }
1376
0
  return 0;
1377
0
}
1378
1379
SYMVER("numa_parse_bitmap_v2", "numa_parse_bitmap@@libnuma_1.2")
1380
int
1381
numa_parse_bitmap_v2(char *line, struct bitmask *mask)
1382
1
{
1383
1
  int i, ncpus;
1384
1
  char *p = strchr(line, '\n');
1385
1
  if (!p)
1386
0
    return -1;
1387
1
  ncpus = mask->size;
1388
1389
2
  for (i = 0; p > line;i++) {
1390
1
    char *oldp, *endp;
1391
1
    oldp = p;
1392
1
    if (*p == ',')
1393
0
      --p;
1394
9
    while (p > line && *p != ',')
1395
8
      --p;
1396
    /* Eat two 32bit fields at a time to get longs */
1397
1
    if (p > line && sizeof(unsigned long) == 8) {
1398
0
      oldp--;
1399
0
      memmove(p, p+1, oldp-p+1);
1400
0
      while (p > line && *p != ',')
1401
0
        --p;
1402
0
    }
1403
1
    if (*p == ',')
1404
0
      p++;
1405
1
    if (i >= CPU_LONGS(ncpus))
1406
0
      return -1;
1407
1
    mask->maskp[i] = strtoul(p, &endp, 16);
1408
1
    if (endp != oldp)
1409
0
      return -1;
1410
1
    p--;
1411
1
  }
1412
1
  return 0;
1413
1
}
1414
1415
static void init_node_cpu_mask_v2(void)
1416
1
{
1417
1
  int nnodes = numa_max_possible_node_v2_int() + 1;
1418
1
  struct bitmask **new_ncm, **null_ncm = NULL;
1419
1
  new_ncm = calloc (nnodes, sizeof(struct bitmask *));
1420
  /* Check for races with another thread */
1421
1
  if (new_ncm && !__atomic_compare_exchange_n(&node_cpu_mask_v2, &null_ncm,
1422
1
      new_ncm, 1,
1423
1
      __ATOMIC_ACQUIRE, __ATOMIC_ACQUIRE)) {
1424
0
    free(new_ncm);
1425
0
  }
1426
1
}
1427
1428
static void cleanup_node_cpu_mask_v2(void)
1429
0
{
1430
0
  if (node_cpu_mask_v2) {
1431
0
    int i;
1432
0
    int nnodes;
1433
0
    nnodes = numa_max_possible_node_v2_int() + 1;
1434
0
    for (i = 0; i < nnodes; i++) {
1435
0
      FREE_AND_ZERO(node_cpu_mask_v2[i]);
1436
0
    }
1437
0
    free(node_cpu_mask_v2);
1438
0
    node_cpu_mask_v2 = NULL;
1439
0
  }
1440
0
}
1441
1442
/* This would be better with some locking, but I don't want to make libnuma
1443
   dependent on pthreads right now. The races are relatively harmless. */
1444
SYMVER("numa_node_to_cpus_v1", "numa_node_to_cpus@libnuma_1.1")
1445
int
1446
numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
1447
0
{
1448
0
  int err = 0;
1449
0
  char fn[64];
1450
0
  FILE *f;
1451
0
  char update;
1452
0
  char *line = NULL;
1453
0
  size_t len = 0;
1454
0
  struct bitmask bitmask;
1455
0
  int buflen_needed;
1456
0
  unsigned long *mask;
1457
0
  int ncpus = numa_num_possible_cpus();
1458
0
  int maxnode = numa_max_node_int();
1459
1460
0
  buflen_needed = CPU_BYTES(ncpus);
1461
0
  if ((unsigned)node > maxnode || bufferlen < buflen_needed) {
1462
0
    errno = ERANGE;
1463
0
    return -1;
1464
0
  }
1465
0
  if (bufferlen > buflen_needed)
1466
0
    memset(buffer, 0, bufferlen);
1467
0
  update = __atomic_fetch_and(&node_cpu_mask_v1_stale, 0, __ATOMIC_RELAXED);
1468
0
  if (node_cpu_mask_v1[node] && !update) {
1469
0
    memcpy(buffer, node_cpu_mask_v1[node], buflen_needed);
1470
0
    return 0;
1471
0
  }
1472
1473
0
  mask = malloc(buflen_needed);
1474
0
  if (!mask)
1475
0
    mask = (unsigned long *)buffer;
1476
0
  memset(mask, 0, buflen_needed);
1477
1478
0
  sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
1479
0
  f = fopen(fn, "r");
1480
0
  if (!f || getdelim(&line, &len, '\n', f) < 1) {
1481
0
    if (numa_bitmask_isbitset(numa_nodes_ptr, node)) {
1482
0
      numa_warn(W_nosysfs2,
1483
0
         "/sys not mounted or invalid. Assuming one node: %s",
1484
0
          strerror(errno));
1485
0
      numa_warn(W_nosysfs2,
1486
0
         "(cannot open or correctly parse %s)", fn);
1487
0
    }
1488
0
    bitmask.maskp = (unsigned long *)mask;
1489
0
    bitmask.size  = buflen_needed * 8;
1490
0
    numa_bitmask_setall(&bitmask);
1491
0
    err = -1;
1492
0
  }
1493
0
  if (f)
1494
0
    fclose(f);
1495
1496
0
  if (line && (numa_parse_bitmap_v1(line, mask, ncpus) < 0)) {
1497
0
    numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node");
1498
0
    bitmask.maskp = (unsigned long *)mask;
1499
0
    bitmask.size  = buflen_needed * 8;
1500
0
    numa_bitmask_setall(&bitmask);
1501
0
    err = -1;
1502
0
  }
1503
1504
0
  free(line);
1505
0
  memmove(buffer, mask, buflen_needed);
1506
1507
  /* slightly racy, see above */
1508
0
  if (node_cpu_mask_v1[node]) {
1509
0
    if (update) {
1510
      /*
1511
       * There may be readers on node_cpu_mask_v1[], hence it can not
1512
       * be freed.
1513
       */
1514
0
      memcpy(node_cpu_mask_v1[node], mask, buflen_needed);
1515
0
      free(mask);
1516
0
      mask = NULL;
1517
0
    } else if (mask != buffer)
1518
0
      free(mask);
1519
0
  } else {
1520
0
    node_cpu_mask_v1[node] = mask;
1521
0
  }
1522
0
  return err;
1523
0
}
1524
1525
/*
1526
 * test whether a node has cpus
1527
 */
1528
/* This would be better with some locking, but I don't want to make libnuma
1529
   dependent on pthreads right now. The races are relatively harmless. */
1530
/*
1531
 * deliver a bitmask of cpus representing the cpus on a given node
1532
 */
1533
SYMVER("numa_node_to_cpus_v2", "numa_node_to_cpus@@libnuma_1.2")
1534
int
1535
numa_node_to_cpus_v2(int node, struct bitmask *buffer)
1536
207
{
1537
207
  int err = 0;
1538
207
  int nnodes = numa_max_node();
1539
207
  char fn[64], *line = NULL;
1540
207
  FILE *f;
1541
207
  char update;
1542
207
  size_t len = 0;
1543
207
  struct bitmask *mask;
1544
1545
207
  if (!__atomic_load_n(&node_cpu_mask_v2, __ATOMIC_CONSUME))
1546
1
    init_node_cpu_mask_v2();
1547
1548
207
  if (node > nnodes) {
1549
0
    errno = ERANGE;
1550
0
    return -1;
1551
0
  }
1552
207
  numa_bitmask_clearall(buffer);
1553
1554
207
  update = __atomic_fetch_and(&node_cpu_mask_v2_stale, 0, __ATOMIC_RELAXED);
1555
207
  if (node_cpu_mask_v2[node] && !update) {
1556
    /* have already constructed a mask for this node */
1557
206
    if (buffer->size < node_cpu_mask_v2[node]->size) {
1558
0
      errno = EINVAL;
1559
0
      numa_error("map size mismatch");
1560
0
      return -1;
1561
0
    }
1562
206
    copy_bitmask_to_bitmask(node_cpu_mask_v2[node], buffer);
1563
206
    return 0;
1564
206
  }
1565
1566
  /* need a new mask for this node */
1567
1
  mask = numa_allocate_cpumask();
1568
1
  if (!mask)
1569
0
    return -1;
1570
1571
  /* this is a kernel cpumask_t (see node_read_cpumap()) */
1572
1
  sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
1573
1
  f = fopen(fn, "r");
1574
1
  if (!f || getdelim(&line, &len, '\n', f) < 1) {
1575
0
    if (numa_bitmask_isbitset(numa_nodes_ptr, node)) {
1576
0
      numa_warn(W_nosysfs2,
1577
0
         "/sys not mounted or invalid. Assuming one node: %s",
1578
0
          strerror(errno));
1579
0
      numa_warn(W_nosysfs2,
1580
0
         "(cannot open or correctly parse %s)", fn);
1581
0
    }
1582
0
    numa_bitmask_setall(mask);
1583
0
    err = -1;
1584
0
  }
1585
1
  if (f)
1586
1
    fclose(f);
1587
1588
1
  if (line && (numa_parse_bitmap_v2(line, mask) < 0)) {
1589
0
    numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node");
1590
0
    numa_bitmask_setall(mask);
1591
0
    err = -1;
1592
0
  }
1593
1594
1
  free(line);
1595
1
  copy_bitmask_to_bitmask(mask, buffer);
1596
1597
  /* slightly racy, see above */
1598
  /* save the mask we created */
1599
1
  if (node_cpu_mask_v2[node]) {
1600
0
    if (update) {
1601
0
      copy_bitmask_to_bitmask(mask, node_cpu_mask_v2[node]);
1602
0
      numa_bitmask_free(mask);
1603
0
      mask = NULL;
1604
    /* how could this be? */
1605
0
    } else if (mask != buffer)
1606
0
      numa_bitmask_free(mask);
1607
1
  } else {
1608
    /* we don't want to cache faulty result */
1609
1
    if (!err)
1610
1
      node_cpu_mask_v2[node] = mask;
1611
0
    else
1612
0
      numa_bitmask_free(mask);
1613
1
  }
1614
1
  return err;
1615
1
}
1616
1617
make_internal_alias(numa_node_to_cpus_v1);
1618
make_internal_alias(numa_node_to_cpus_v2);
1619
1620
void numa_node_to_cpu_update(void)
1621
0
{
1622
0
  __atomic_store_n(&node_cpu_mask_v1_stale, 1, __ATOMIC_RELAXED);
1623
0
  __atomic_store_n(&node_cpu_mask_v2_stale, 1, __ATOMIC_RELAXED);
1624
0
}
1625
1626
/* report the node of the specified cpu */
1627
int numa_node_of_cpu(int cpu)
1628
0
{
1629
0
  struct bitmask *bmp;
1630
0
  int ncpus, nnodes, node, ret;
1631
1632
0
  ncpus = numa_num_possible_cpus();
1633
0
  if (cpu > ncpus){
1634
0
    errno = EINVAL;
1635
0
    return -1;
1636
0
  }
1637
0
  bmp = numa_bitmask_alloc(ncpus);
1638
0
  nnodes = numa_max_node();
1639
0
  for (node = 0; node <= nnodes; node++){
1640
0
    if (numa_node_to_cpus_v2_int(node, bmp) < 0) {
1641
      /* It's possible for the node to not exist */
1642
0
      continue;
1643
0
    }
1644
0
    if (numa_bitmask_isbitset(bmp, cpu)){
1645
0
      ret = node;
1646
0
      goto end;
1647
0
    }
1648
0
  }
1649
0
  ret = -1;
1650
0
  errno = EINVAL;
1651
0
end:
1652
0
  numa_bitmask_free(bmp);
1653
0
  return ret;
1654
0
}
1655
1656
SYMVER("numa_run_on_node_mask_v1", "numa_run_on_node_mask@libnuma_1.1")
1657
int
1658
numa_run_on_node_mask_v1(const nodemask_t *mask)
1659
0
{
1660
0
  int ncpus = numa_num_possible_cpus();
1661
0
  int i, k, err;
1662
0
  unsigned long cpus[CPU_LONGS(ncpus)], nodecpus[CPU_LONGS(ncpus)];
1663
0
  memset(cpus, 0, CPU_BYTES(ncpus));
1664
0
  for (i = 0; i < NUMA_NUM_NODES; i++) {
1665
0
    if (mask->n[i / BITS_PER_LONG] == 0)
1666
0
      continue;
1667
0
    if (nodemask_isset_compat(mask, i)) {
1668
0
      if (numa_node_to_cpus_v1_int(i, nodecpus, CPU_BYTES(ncpus)) < 0) {
1669
0
        numa_warn(W_noderunmask,
1670
0
            "Cannot read node cpumask from sysfs");
1671
0
        continue;
1672
0
      }
1673
0
      for (k = 0; k < CPU_LONGS(ncpus); k++)
1674
0
        cpus[k] |= nodecpus[k];
1675
0
    }
1676
0
  }
1677
0
  err = numa_sched_setaffinity_v1(0, CPU_BYTES(ncpus), cpus);
1678
1679
  /* The sched_setaffinity API is broken because it expects
1680
     the user to guess the kernel cpuset size. Do this in a
1681
     brute force way. */
1682
0
  if (err < 0 && errno == EINVAL) {
1683
0
    int savederrno = errno;
1684
0
    char *bigbuf;
1685
0
    static int size = -1;
1686
0
    if (size == -1)
1687
0
      size = CPU_BYTES(ncpus) * 2;
1688
0
    bigbuf = malloc(CPU_BUFFER_SIZE);
1689
0
    if (!bigbuf) {
1690
0
      errno = ENOMEM;
1691
0
      return -1;
1692
0
    }
1693
0
    errno = savederrno;
1694
0
    while (size <= CPU_BUFFER_SIZE) {
1695
0
      memcpy(bigbuf, cpus, CPU_BYTES(ncpus));
1696
0
      memset(bigbuf + CPU_BYTES(ncpus), 0,
1697
0
             CPU_BUFFER_SIZE - CPU_BYTES(ncpus));
1698
0
      err = numa_sched_setaffinity_v1_int(0, size, (unsigned long *)bigbuf);
1699
0
      if (err == 0 || errno != EINVAL)
1700
0
        break;
1701
0
      size *= 2;
1702
0
    }
1703
0
    savederrno = errno;
1704
0
    free(bigbuf);
1705
0
    errno = savederrno;
1706
0
  }
1707
0
  return err;
1708
0
}
1709
1710
/*
1711
 * Given a node mask (size of a kernel nodemask_t) (probably populated by
1712
 * a user argument list) set up a map of cpus (map "cpus") on those nodes.
1713
 * Then set affinity to those cpus.
1714
 */
1715
SYMVER("numa_run_on_node_mask_v2", "numa_run_on_node_mask@@libnuma_1.2")
1716
int
1717
numa_run_on_node_mask_v2(struct bitmask *bmp)
1718
0
{
1719
0
  int ncpus, i, k, err;
1720
0
  struct bitmask *cpus, *nodecpus;
1721
1722
0
  cpus = numa_allocate_cpumask();
1723
0
  ncpus = cpus->size;
1724
0
  nodecpus = numa_allocate_cpumask();
1725
0
  if (!cpus || !nodecpus)
1726
0
    return -1;
1727
1728
0
  for (i = 0; i < bmp->size; i++) {
1729
0
    if (bmp->maskp[i / BITS_PER_LONG] == 0)
1730
0
      continue;
1731
0
    if (numa_bitmask_isbitset(bmp, i)) {
1732
      /*
1733
       * numa_all_nodes_ptr is cpuset aware; use only
1734
       * these nodes
1735
       */
1736
0
      if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
1737
0
        numa_warn(W_noderunmask,
1738
0
          "node %d not allowed", i);
1739
0
        continue;
1740
0
      }
1741
0
      if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
1742
0
        numa_warn(W_noderunmask,
1743
0
          "Cannot read node cpumask from sysfs");
1744
0
        continue;
1745
0
      }
1746
0
      for (k = 0; k < CPU_LONGS(ncpus); k++)
1747
0
        cpus->maskp[k] |= nodecpus->maskp[k];
1748
0
    }
1749
0
  }
1750
0
  err = numa_sched_setaffinity_v2_int(0, cpus);
1751
1752
0
  numa_bitmask_free(cpus);
1753
0
  numa_bitmask_free(nodecpus);
1754
1755
  /* used to have to consider that this could fail - it shouldn't now */
1756
0
  if (err < 0) {
1757
0
    numa_error("numa_sched_setaffinity_v2_int() failed");
1758
0
  }
1759
1760
0
  return err;
1761
0
}
1762
1763
make_internal_alias(numa_run_on_node_mask_v2);
1764
1765
/*
1766
 * Given a node mask (size of a kernel nodemask_t) (probably populated by
1767
 * a user argument list) set up a map of cpus (map "cpus") on those nodes
1768
 * without any cpuset awareness. Then set affinity to those cpus.
1769
 */
1770
int
1771
numa_run_on_node_mask_all(struct bitmask *bmp)
1772
0
{
1773
0
  int ncpus, i, k, err;
1774
0
  struct bitmask *cpus, *nodecpus;
1775
1776
0
  cpus = numa_allocate_cpumask();
1777
0
  ncpus = cpus->size;
1778
0
  nodecpus = numa_allocate_cpumask();
1779
0
  if (!cpus || !nodecpus)
1780
0
    return -1;
1781
1782
0
  for (i = 0; i < bmp->size; i++) {
1783
0
    if (bmp->maskp[i / BITS_PER_LONG] == 0)
1784
0
      continue;
1785
0
    if (numa_bitmask_isbitset(bmp, i)) {
1786
0
      if (!numa_bitmask_isbitset(numa_possible_nodes_ptr, i)) {
1787
0
        numa_warn(W_noderunmask,
1788
0
          "node %d not allowed", i);
1789
0
        continue;
1790
0
      }
1791
0
      if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
1792
0
        numa_warn(W_noderunmask,
1793
0
          "Cannot read node cpumask from sysfs");
1794
0
        continue;
1795
0
      }
1796
0
      for (k = 0; k < CPU_LONGS(ncpus); k++)
1797
0
        cpus->maskp[k] |= nodecpus->maskp[k];
1798
0
    }
1799
0
  }
1800
0
  err = numa_sched_setaffinity_v2_int(0, cpus);
1801
1802
0
  numa_bitmask_free(cpus);
1803
0
  numa_bitmask_free(nodecpus);
1804
1805
  /* With possible nodes freedom it can happen easily now */
1806
0
  if (err < 0) {
1807
0
    numa_error("numa_sched_setaffinity_v2_int() failed");
1808
0
  }
1809
1810
0
  return err;
1811
0
}
1812
1813
SYMVER("numa_get_run_node_mask_v1", "numa_get_run_node_mask@libnuma_1.1")
1814
nodemask_t
1815
numa_get_run_node_mask_v1(void)
1816
0
{
1817
0
  int ncpus = numa_num_configured_cpus();
1818
0
  int i, k;
1819
0
  int max = numa_max_node_int();
1820
0
  struct bitmask *bmp, *cpus, *nodecpus;
1821
0
  nodemask_t nmp;
1822
1823
0
  cpus = numa_allocate_cpumask();
1824
0
  if (!cpus)
1825
0
    return numa_no_nodes;
1826
0
  if (numa_sched_getaffinity_v2_int(0, cpus) < 0){
1827
0
    nmp = numa_no_nodes;
1828
0
    goto free_cpus;
1829
0
  }
1830
1831
0
  nodecpus = numa_allocate_cpumask();
1832
0
  if (!nodecpus) {
1833
0
    nmp = numa_no_nodes;
1834
0
    goto free_cpus;
1835
0
  }
1836
1837
0
  bmp = allocate_nodemask_v1(); /* the size of a nodemask_t */
1838
0
  if (!bmp) {
1839
0
    nmp = numa_no_nodes;
1840
0
    goto free_cpus2;
1841
0
  }
1842
1843
0
  for (i = 0; i <= max; i++) {
1844
0
    if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
1845
      /* It's possible for the node to not exist */
1846
0
      continue;
1847
0
    }
1848
0
    for (k = 0; k < CPU_LONGS(ncpus); k++) {
1849
0
      if (nodecpus->maskp[k] & cpus->maskp[k])
1850
0
        numa_bitmask_setbit(bmp, i);
1851
0
    }
1852
0
  }
1853
0
  copy_bitmask_to_nodemask(bmp, &nmp);
1854
0
  numa_bitmask_free(bmp);
1855
0
free_cpus2:
1856
0
  numa_bitmask_free(nodecpus);
1857
0
free_cpus:
1858
0
  numa_bitmask_free(cpus);
1859
0
  return nmp;
1860
0
}
1861
1862
SYMVER("numa_get_run_node_mask_v2", "numa_get_run_node_mask@@libnuma_1.2")
1863
struct bitmask *
1864
numa_get_run_node_mask_v2(void)
1865
0
{
1866
0
  int i, k;
1867
0
  int ncpus = numa_num_configured_cpus();
1868
0
  int max = numa_max_node_int();
1869
0
  struct bitmask *bmp, *cpus, *nodecpus;
1870
1871
0
  bmp = numa_allocate_cpumask();
1872
0
  cpus = numa_allocate_cpumask();
1873
0
  if (!bmp || !cpus)
1874
0
    return NULL;
1875
0
  if (numa_sched_getaffinity_v2_int(0, cpus) < 0){
1876
0
    copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
1877
0
    goto free_cpus;
1878
0
  }
1879
1880
0
  nodecpus = numa_allocate_cpumask();
1881
0
  for (i = 0; i <= max; i++) {
1882
    /*
1883
     * numa_all_nodes_ptr is cpuset aware; show only
1884
     * these nodes
1885
     */
1886
0
    if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
1887
0
      continue;
1888
0
    }
1889
0
    if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
1890
      /* It's possible for the node to not exist */
1891
0
      continue;
1892
0
    }
1893
0
    for (k = 0; k < CPU_LONGS(ncpus); k++) {
1894
0
      if (nodecpus->maskp[k] & cpus->maskp[k])
1895
0
        numa_bitmask_setbit(bmp, i);
1896
0
    }
1897
0
  }
1898
0
  numa_bitmask_free(nodecpus);
1899
0
free_cpus:
1900
0
  numa_bitmask_free(cpus);
1901
0
  return bmp;
1902
0
}
1903
1904
int
1905
numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bitmask *tonodes)
1906
0
{
1907
0
  int numa_num_nodes = numa_num_possible_nodes();
1908
1909
0
  return migrate_pages(pid, numa_num_nodes + 1, fromnodes->maskp,
1910
0
              tonodes->maskp);
1911
0
}
1912
1913
int numa_move_pages(int pid, unsigned long count,
1914
  void **pages, const int *nodes, int *status, int flags)
1915
0
{
1916
0
  return move_pages(pid, count, pages, nodes, status, flags);
1917
0
}
1918
1919
int numa_run_on_node(int node)
1920
0
{
1921
0
  int numa_num_nodes = numa_num_possible_nodes();
1922
0
  int ret = -1;
1923
0
  struct bitmask *cpus;
1924
1925
0
  if (node >= numa_num_nodes){
1926
0
    errno = EINVAL;
1927
0
    goto out;
1928
0
  }
1929
1930
0
  cpus = numa_allocate_cpumask();
1931
0
  if (!cpus)
1932
0
    return -1;
1933
1934
0
  if (node == -1)
1935
0
    numa_bitmask_setall(cpus);
1936
0
  else if (numa_node_to_cpus_v2_int(node, cpus) < 0){
1937
0
    numa_warn(W_noderunmask, "Cannot read node cpumask from sysfs");
1938
0
    goto free;
1939
0
  }
1940
1941
0
  ret = numa_sched_setaffinity_v2_int(0, cpus);
1942
0
free:
1943
0
  numa_bitmask_free(cpus);
1944
0
out:
1945
0
  return ret;
1946
0
}
1947
1948
static struct bitmask *__numa_preferred(void)
1949
0
{
1950
0
  int policy = 0;
1951
0
  struct bitmask *bmp;
1952
1953
0
  bmp = numa_allocate_nodemask();
1954
0
  if (!bmp)
1955
0
    return NULL;
1956
  /* could read the current CPU from /proc/self/status. Probably
1957
     not worth it. */
1958
0
  numa_bitmask_clearall(bmp);
1959
0
  getpol(&policy, bmp);
1960
1961
0
  if (policy != MPOL_PREFERRED &&
1962
0
      policy != MPOL_PREFERRED_MANY &&
1963
0
      policy != MPOL_BIND)
1964
0
    return bmp;
1965
1966
0
  if (policy == MPOL_PREFERRED && numa_bitmask_weight(bmp) > 1) {
1967
0
    errno = EINVAL;
1968
0
    numa_error(__FILE__);
1969
0
  }
1970
1971
0
  return bmp;
1972
0
}
1973
1974
int numa_preferred_err(void)
1975
0
{
1976
0
  int first_node = 0;
1977
0
  struct bitmask *bmp;
1978
1979
0
  bmp = __numa_preferred();
1980
0
  first_node = numa_find_first(bmp);
1981
0
  numa_bitmask_free(bmp);
1982
  
1983
0
  return first_node;
1984
0
}
1985
1986
int numa_preferred(void)
1987
0
{
1988
0
  int first_node = 0;
1989
1990
0
  first_node = numa_preferred_err();
1991
0
  first_node = first_node >= 0 ? first_node : 0;
1992
1993
0
  return first_node;
1994
0
}
1995
1996
static void __numa_set_preferred(struct bitmask *bmp)
1997
0
{
1998
0
  int nodes = numa_bitmask_weight(bmp);
1999
0
  if (nodes > 1) {
2000
0
    errno = EINVAL;
2001
0
    numa_error(__FILE__);
2002
0
  }
2003
2004
0
  setpol(nodes ? MPOL_PREFERRED : MPOL_LOCAL, bmp);
2005
0
}
2006
2007
void numa_set_preferred(int node)
2008
0
{
2009
0
  struct bitmask *bmp = numa_allocate_nodemask();
2010
0
  if (!bmp)
2011
0
    return;
2012
0
  numa_bitmask_setbit(bmp, node);
2013
0
  __numa_set_preferred(bmp);
2014
0
  numa_bitmask_free(bmp);
2015
0
}
2016
2017
int numa_has_preferred_many(void)
2018
0
{
2019
0
  set_preferred_many();
2020
0
  return has_preferred_many;
2021
0
}
2022
2023
void numa_set_preferred_many(struct bitmask *bitmask)
2024
0
{
2025
0
  int first_node = 0;
2026
2027
0
  set_preferred_many();
2028
0
  if (!has_preferred_many) {
2029
0
    numa_warn(W_nodeparse,
2030
0
      "Unable to handle MANY preferred nodes. Falling back to first node\n");
2031
0
    first_node = numa_find_first(bitmask);
2032
0
    numa_set_preferred(first_node);
2033
0
    return;
2034
0
  }
2035
0
  setpol(MPOL_PREFERRED_MANY, bitmask);
2036
0
}
2037
2038
struct bitmask *numa_preferred_many()
2039
0
{
2040
0
  return __numa_preferred();
2041
0
}
2042
2043
void numa_set_localalloc(void)
2044
0
{
2045
0
  setpol(MPOL_LOCAL, numa_no_nodes_ptr);
2046
0
}
2047
2048
SYMVER("numa_bind_v1", "numa_bind@libnuma_1.1")
2049
void numa_bind_v1(const nodemask_t *nodemask)
2050
0
{
2051
0
  struct bitmask bitmask;
2052
2053
0
  bitmask.maskp = (unsigned long *)nodemask;
2054
0
  bitmask.size  = sizeof(nodemask_t);
2055
0
  numa_run_on_node_mask_v2_int(&bitmask);
2056
0
  numa_set_membind_v2_int(&bitmask);
2057
0
}
2058
2059
SYMVER("numa_bind_v2", "numa_bind@@libnuma_1.2")
2060
void numa_bind_v2(struct bitmask *bmp)
2061
0
{
2062
0
  numa_run_on_node_mask_v2_int(bmp);
2063
0
  numa_set_membind_v2_int(bmp);
2064
0
}
2065
2066
void numa_set_strict(int flag)
2067
0
{
2068
0
  if (flag)
2069
0
    mbind_flags |= MPOL_MF_STRICT;
2070
0
  else
2071
0
    mbind_flags &= ~MPOL_MF_STRICT;
2072
0
}
2073
2074
/*
2075
 * Extract a node or processor number from the given string.
2076
 * Allow a relative node / processor specification within the allowed
2077
 * set if "relative" is nonzero
2078
 */
2079
static unsigned long get_nr(const char *s, char **end, struct bitmask *bmp, int relative)
2080
11.7k
{
2081
11.7k
  long i, nr;
2082
2083
11.7k
  if (!relative)
2084
6.68k
    return strtoul(s, end, 0);
2085
2086
5.06k
  nr = strtoul(s, end, 0);
2087
5.06k
  if (s == *end)
2088
4
    return nr;
2089
  /* Find the nth set bit */
2090
124k
  for (i = 0; nr >= 0 && i <= bmp->size; i++)
2091
119k
    if (numa_bitmask_isbitset(bmp, i))
2092
6.86k
      nr--;
2093
5.06k
  return i-1;
2094
5.06k
}
2095
2096
/*
2097
 * __numa_parse_nodestring() is called to create a node mask, given
2098
 * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10.
2099
 * (the + indicates that the numbers are nodeset-relative)
2100
 *
2101
 * The nodes may be specified as absolute, or relative to the current nodeset.
2102
 * The list of available nodes is in a map pointed to by "allowed_nodes_ptr",
2103
 * which may represent all nodes or the nodes in the current nodeset.
2104
 *
2105
 * The caller must free the returned bitmask.
2106
 */
2107
static struct bitmask *
2108
__numa_parse_nodestring(const char *s, struct bitmask *allowed_nodes_ptr)
2109
742
{
2110
742
  int invert = 0, relative = 0;
2111
742
  int conf_nodes = numa_num_configured_nodes();
2112
742
  char *end;
2113
742
  struct bitmask *mask;
2114
2115
742
  mask = numa_allocate_nodemask();
2116
742
  if (!mask)
2117
0
    return NULL;
2118
2119
742
  if (s[0] == 0){
2120
1
    copy_bitmask_to_bitmask(numa_no_nodes_ptr, mask);
2121
1
    return mask; /* return freeable mask */
2122
1
  }
2123
741
  if (*s == '!') {
2124
4
    invert = 1;
2125
4
    s++;
2126
4
  }
2127
741
  if (*s == '+') {
2128
305
    relative++;
2129
305
    s++;
2130
305
  }
2131
3.17k
  do {
2132
3.17k
    unsigned long arg;
2133
3.17k
    int i;
2134
3.17k
    if (isalpha(*s)) {
2135
143
      int n;
2136
143
      if (!strcmp(s,"all")) {
2137
1
        copy_bitmask_to_bitmask(allowed_nodes_ptr,
2138
1
              mask);
2139
1
        s+=4;
2140
1
        break;
2141
1
      }
2142
142
      n = resolve_affinity(s, mask);
2143
142
      if (n != NO_IO_AFFINITY) {
2144
59
        if (n < 0)
2145
59
          goto err;
2146
0
        s += strlen(s) + 1;
2147
0
        break;
2148
59
      }
2149
142
    }
2150
3.11k
    arg = get_nr(s, &end, allowed_nodes_ptr, relative);
2151
3.11k
    if (end == s) {
2152
125
      numa_warn(W_nodeparse, "unparseable node description `%s'\n", s);
2153
125
      goto err;
2154
125
    }
2155
2.98k
    if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg)) {
2156
238
      numa_warn(W_nodeparse, "node argument %ld is out of range\n", arg);
2157
238
      goto err;
2158
238
    }
2159
2.74k
    i = arg;
2160
2.74k
    numa_bitmask_setbit(mask, i);
2161
2.74k
    s = end;
2162
2.74k
    if (*s == '-') {
2163
1.62k
      char *end2;
2164
1.62k
      unsigned long arg2;
2165
1.62k
      arg2 = get_nr(++s, &end2, allowed_nodes_ptr, relative);
2166
1.62k
      if (end2 == s) {
2167
9
        numa_warn(W_nodeparse, "missing node argument %s\n", s);
2168
9
        goto err;
2169
9
      }
2170
1.61k
      if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg2)) {
2171
198
        numa_warn(W_nodeparse, "node argument %ld out of range\n", arg2);
2172
198
        goto err;
2173
198
      }
2174
2.57k
      while (arg <= arg2) {
2175
1.16k
        i = arg;
2176
1.16k
        if (numa_bitmask_isbitset(allowed_nodes_ptr,i))
2177
1.16k
          numa_bitmask_setbit(mask, i);
2178
1.16k
        arg++;
2179
1.16k
      }
2180
1.41k
      s = end2;
2181
1.41k
    }
2182
2.74k
  } while (*s++ == ',');
2183
112
  if (s[-1] != '\0')
2184
14
    goto err;
2185
98
  if (invert) {
2186
1
    int i;
2187
2
    for (i = 0; i < conf_nodes; i++) {
2188
1
      if (numa_bitmask_isbitset(mask, i))
2189
1
        numa_bitmask_clearbit(mask, i);
2190
0
      else
2191
0
        numa_bitmask_setbit(mask, i);
2192
1
    }
2193
1
  }
2194
98
  return mask;
2195
2196
643
err:
2197
643
  numa_bitmask_free(mask);
2198
643
  return NULL;
2199
112
}
2200
2201
/*
2202
 * numa_parse_nodestring() is called to create a bitmask from nodes available
2203
 * for this task.
2204
 */
2205
2206
struct bitmask * numa_parse_nodestring(const char *s)
2207
742
{
2208
742
  return __numa_parse_nodestring(s, numa_all_nodes_ptr);
2209
742
}
2210
2211
/*
2212
 * numa_parse_nodestring_all() is called to create a bitmask from all nodes
2213
 * available.
2214
 */
2215
2216
struct bitmask * numa_parse_nodestring_all(const char *s)
2217
0
{
2218
0
  return __numa_parse_nodestring(s, numa_possible_nodes_ptr);
2219
0
}
2220
2221
/*
2222
 * __numa_parse_cpustring() is called to create a bitmask, given
2223
 * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10.
2224
 * (the + indicates that the numbers are cpuset-relative)
2225
 *
2226
 * The cpus may be specified as absolute, or relative to the current cpuset.
2227
 * The list of available cpus for this task is in the map pointed to by
2228
 * "allowed_cpus_ptr", which may represent all cpus or the cpus in the
2229
 * current cpuset.
2230
 *
2231
 * The caller must free the returned bitmask.
2232
 */
2233
static struct bitmask *
2234
__numa_parse_cpustring(const char *s, struct bitmask *allowed_cpus_ptr)
2235
742
{
2236
742
  int invert = 0, relative=0;
2237
742
  int conf_cpus = numa_num_configured_cpus();
2238
742
  char *end;
2239
742
  struct bitmask *mask;
2240
742
  int i;
2241
2242
742
  mask = numa_allocate_cpumask();
2243
742
  if (!mask)
2244
0
    return NULL;
2245
2246
742
  if (s[0] == 0)
2247
1
    return mask;
2248
741
  if (*s == '!') {
2249
4
    invert = 1;
2250
4
    s++;
2251
4
  }
2252
741
  if (*s == '+') {
2253
305
    relative++;
2254
305
    s++;
2255
305
  }
2256
4.43k
  do {
2257
4.43k
    unsigned long arg;
2258
2259
4.43k
    if (!strcmp(s,"all")) {
2260
1
      copy_bitmask_to_bitmask(allowed_cpus_ptr, mask);
2261
1
      s+=4;
2262
1
      break;
2263
1
    }
2264
4.43k
    arg = get_nr(s, &end, allowed_cpus_ptr, relative);
2265
4.43k
    if (end == s) {
2266
192
      numa_warn(W_cpuparse, "unparseable cpu description `%s'\n", s);
2267
192
      goto err;
2268
192
    }
2269
4.24k
    if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg)) {
2270
150
      numa_warn(W_cpuparse, "cpu argument %s is out of range\n", s);
2271
150
      goto err;
2272
150
    }
2273
4.09k
    i = arg;
2274
4.09k
    numa_bitmask_setbit(mask, i);
2275
4.09k
    s = end;
2276
4.09k
    if (*s == '-') {
2277
2.58k
      char *end2;
2278
2.58k
      unsigned long arg2;
2279
2.58k
      arg2 = get_nr(++s, &end2, allowed_cpus_ptr, relative);
2280
2.58k
      if (end2 == s) {
2281
16
        numa_warn(W_cpuparse, "missing cpu argument %s\n", s);
2282
16
        goto err;
2283
16
      }
2284
2.57k
      if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg2)) {
2285
158
        numa_warn(W_cpuparse, "cpu argument %s out of range\n", s);
2286
158
        goto err;
2287
158
      }
2288
4.90k
      while (arg <= arg2) {
2289
2.49k
        i = arg;
2290
2.49k
        if (numa_bitmask_isbitset(allowed_cpus_ptr, i))
2291
2.49k
          numa_bitmask_setbit(mask, i);
2292
2.49k
        arg++;
2293
2.49k
      }
2294
2.41k
      s = end2;
2295
2.41k
    }
2296
4.09k
  } while (*s++ == ',');
2297
225
  if (s[-1] != '\0')
2298
19
    goto err;
2299
206
  if (invert) {
2300
99
    for (i = 0; i < conf_cpus; i++) {
2301
96
      if (numa_bitmask_isbitset(mask, i))
2302
4
        numa_bitmask_clearbit(mask, i);
2303
92
      else
2304
92
        numa_bitmask_setbit(mask, i);
2305
96
    }
2306
3
  }
2307
206
  return mask;
2308
2309
535
err:
2310
535
  numa_bitmask_free(mask);
2311
535
  return NULL;
2312
225
}
2313
2314
/*
2315
 * numa_parse_cpustring() is called to create a bitmask from cpus available
2316
 * for this task.
2317
 */
2318
2319
struct bitmask * numa_parse_cpustring(const char *s)
2320
742
{
2321
742
  return __numa_parse_cpustring(s, numa_all_cpus_ptr);
2322
742
}
2323
2324
/*
2325
 * numa_parse_cpustring_all() is called to create a bitmask from all cpus
2326
 * available.
2327
 */
2328
2329
struct bitmask * numa_parse_cpustring_all(const char *s)
2330
0
{
2331
0
  return __numa_parse_cpustring(s, numa_possible_cpus_ptr);
2332
0
}
2333
2334
int numa_has_home_node(void)
2335
0
{
2336
0
  void *mem;
2337
0
  static int has_home_node = -1;
2338
0
  int page_size = numa_pagesize();
2339
0
  struct bitmask *tmp = numa_get_mems_allowed();
2340
2341
0
  if (has_home_node >= 0)
2342
0
    goto out;
2343
2344
0
  has_home_node = 0;
2345
  /* Detect whether home_node is supported */
2346
0
  mem = mmap(0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
2347
0
  if (mem != MAP_FAILED) {
2348
0
    dombind(mem, page_size, MPOL_BIND, tmp);
2349
0
    if (set_mempolicy_home_node(mem, page_size, numa_find_first(tmp), 0) == 0)
2350
0
      has_home_node = 1;
2351
0
    munmap(mem, page_size);
2352
0
  }
2353
2354
0
out:
2355
0
  numa_bitmask_free(tmp);
2356
0
  return has_home_node;
2357
0
}
2358
2359
int numa_set_mempolicy_home_node(void *start, unsigned long len, int home_node, int flags)
2360
0
{
2361
0
  if (set_mempolicy_home_node(start, len, home_node, flags)) {
2362
0
    numa_error("set_mempolicy_home_node");
2363
0
    return -1;
2364
0
  }
2365
2366
0
  return 0;
2367
0
}