Coverage Report

Created: 2023-09-25 07:08

/src/numactl/libnuma.c
Line
Count
Source (jump to first uncovered line)
1
/* Simple NUMA library.
2
   Copyright (C) 2003,2004,2005,2008 Andi Kleen,SuSE Labs and
3
   Cliff Wickman,SGI.
4
5
   libnuma is free software; you can redistribute it and/or
6
   modify it under the terms of the GNU Lesser General Public
7
   License as published by the Free Software Foundation; version
8
   2.1.
9
10
   libnuma is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
   Lesser General Public License for more details.
14
15
   You should find a copy of v2.1 of the GNU Lesser General Public License
16
   somewhere on your Linux system; if not, write to the Free Software
17
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19
   All calls are undefined when numa_available returns an error. */
20
#define _GNU_SOURCE 1
21
#include <stdlib.h>
22
#include <stdio.h>
23
#include <unistd.h>
24
#include <string.h>
25
#include <sched.h>
26
#include <dirent.h>
27
#include <errno.h>
28
#include <stdarg.h>
29
#include <ctype.h>
30
#include <assert.h>
31
32
#include <sys/mman.h>
33
#include <limits.h>
34
35
#include "config.h"
36
#include "numa.h"
37
#include "numaif.h"
38
#include "numaint.h"
39
#include "util.h"
40
#include "affinity.h"
41
42
#define WEAK __attribute__((weak))
43
44
0
#define CPU_BUFFER_SIZE 4096     /* This limits you to 32768 CPUs */
45
46
/* these are the old (version 1) masks */
47
nodemask_t numa_no_nodes;
48
nodemask_t numa_all_nodes;
49
/* these are now the default bitmask (pointers to) (version 2) */
50
struct bitmask *numa_no_nodes_ptr = NULL;
51
struct bitmask *numa_all_nodes_ptr = NULL;
52
struct bitmask *numa_possible_nodes_ptr = NULL;
53
struct bitmask *numa_all_cpus_ptr = NULL;
54
struct bitmask *numa_possible_cpus_ptr = NULL;
55
/* I would prefer to use symbol versioning to create v1 and v2 versions
56
   of numa_no_nodes and numa_all_nodes, but the loader does not correctly
57
   handle versioning of BSS versus small data items */
58
59
struct bitmask *numa_nodes_ptr = NULL;
60
static struct bitmask *numa_memnode_ptr = NULL;
61
static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES];
62
static char node_cpu_mask_v1_stale = 1;
63
static struct bitmask **node_cpu_mask_v2;
64
static char node_cpu_mask_v2_stale = 1;
65
66
WEAK void numa_error(char *where);
67
68
#ifndef TLS
69
#warning "not threadsafe"
70
#define __thread
71
#endif
72
73
static __thread int bind_policy = MPOL_BIND;
74
static __thread unsigned int mbind_flags = 0;
75
static int sizes_set=0;
76
static int maxconfigurednode = -1;
77
static int maxconfiguredcpu = -1;
78
static int numprocnode = -1;
79
static int numproccpu = -1;
80
static int nodemask_sz = 0;
81
static int cpumask_sz = 0;
82
83
static int has_preferred_many = 0;
84
85
int numa_exit_on_error = 0;
86
int numa_exit_on_warn = 0;
87
static void set_sizes(void);
88
89
/*
90
 * There are two special functions, _init(void) and _fini(void), which
91
 * are called automatically by the dynamic loader whenever a library is loaded.
92
 *
93
 * The v1 library depends upon nodemask_t's of all nodes and no nodes.
94
 */
95
void __attribute__((constructor))
96
numa_init(void)
97
2
{
98
2
  int max,i;
99
100
2
  if (sizes_set)
101
0
    return;
102
103
2
  set_sizes();
104
  /* numa_all_nodes should represent existing nodes on this system */
105
2
        max = numa_num_configured_nodes();
106
4
        for (i = 0; i < max; i++)
107
2
                nodemask_set_compat((nodemask_t *)&numa_all_nodes, i);
108
2
  memset(&numa_no_nodes, 0, sizeof(numa_no_nodes));
109
110
  /* clear errno */
111
2
  errno = 0;
112
2
}
113
114
static void cleanup_node_cpu_mask_v2(void);
115
116
0
#define FREE_AND_ZERO(x) if (x) { \
117
0
    numa_bitmask_free(x); \
118
0
    x = NULL;   \
119
0
  }
120
121
void __attribute__((destructor))
122
numa_fini(void)
123
0
{
124
0
  FREE_AND_ZERO(numa_all_cpus_ptr);
125
0
  FREE_AND_ZERO(numa_possible_cpus_ptr);
126
0
  FREE_AND_ZERO(numa_all_nodes_ptr);
127
0
  FREE_AND_ZERO(numa_possible_nodes_ptr);
128
0
  FREE_AND_ZERO(numa_no_nodes_ptr);
129
0
  FREE_AND_ZERO(numa_memnode_ptr);
130
0
  FREE_AND_ZERO(numa_nodes_ptr);
131
0
  cleanup_node_cpu_mask_v2();
132
0
}
133
134
static int numa_find_first(struct bitmask *mask)
135
0
{
136
0
  int i;
137
0
  for (i = 0; i < mask->size; i++)
138
0
    if (numa_bitmask_isbitset(mask, i))
139
0
      return i;
140
0
  return -1;
141
0
}
142
143
/*
144
 * The following bitmask declarations, bitmask_*() routines, and associated
145
 * _setbit() and _getbit() routines are:
146
 * Copyright (c) 2004_2007 Silicon Graphics, Inc. (SGI) All rights reserved.
147
 * SGI publishes it under the terms of the GNU General Public License, v2,
148
 * as published by the Free Software Foundation.
149
 */
150
static unsigned int
151
_getbit(const struct bitmask *bmp, unsigned int n)
152
112k
{
153
112k
  if (n < bmp->size)
154
111k
    return (bmp->maskp[n/bitsperlong] >> (n % bitsperlong)) & 1;
155
727
  else
156
727
    return 0;
157
112k
}
158
159
static void
160
_setbit(struct bitmask *bmp, unsigned int n, unsigned int v)
161
18.6k
{
162
18.6k
  if (n < bmp->size) {
163
18.6k
    if (v)
164
7.55k
      bmp->maskp[n/bitsperlong] |= 1UL << (n % bitsperlong);
165
11.1k
    else
166
11.1k
      bmp->maskp[n/bitsperlong] &= ~(1UL << (n % bitsperlong));
167
18.6k
  }
168
18.6k
}
169
170
int
171
numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int i)
172
109k
{
173
109k
  return _getbit(bmp, i);
174
109k
}
175
176
struct bitmask *
177
numa_bitmask_setall(struct bitmask *bmp)
178
0
{
179
0
  unsigned int i;
180
0
  for (i = 0; i < bmp->size; i++)
181
0
    _setbit(bmp, i, 1);
182
0
  return bmp;
183
0
}
184
185
struct bitmask *
186
numa_bitmask_clearall(struct bitmask *bmp)
187
173
{
188
173
  unsigned int i;
189
11.2k
  for (i = 0; i < bmp->size; i++)
190
11.0k
    _setbit(bmp, i, 0);
191
173
  return bmp;
192
173
}
193
194
struct bitmask *
195
numa_bitmask_setbit(struct bitmask *bmp, unsigned int i)
196
7.55k
{
197
7.55k
  _setbit(bmp, i, 1);
198
7.55k
  return bmp;
199
7.55k
}
200
201
struct bitmask *
202
numa_bitmask_clearbit(struct bitmask *bmp, unsigned int i)
203
35
{
204
35
  _setbit(bmp, i, 0);
205
35
  return bmp;
206
35
}
207
208
unsigned int
209
numa_bitmask_nbytes(struct bitmask *bmp)
210
2
{
211
2
  return longsperbits(bmp->size) * sizeof(unsigned long);
212
2
}
213
214
/* where n is the number of bits in the map */
215
/* This function should not exit on failure, but right now we cannot really
216
   recover from this. */
217
struct bitmask *
218
numa_bitmask_alloc(unsigned int n)
219
1.38k
{
220
1.38k
  struct bitmask *bmp;
221
222
1.38k
  if (n < 1) {
223
0
    errno = EINVAL;
224
0
    numa_error("request to allocate mask for invalid number");
225
0
    exit(1);
226
0
  }
227
1.38k
  bmp = malloc(sizeof(*bmp));
228
1.38k
  if (bmp == 0)
229
0
    goto oom;
230
1.38k
  bmp->size = n;
231
1.38k
  bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long));
232
1.38k
  if (bmp->maskp == 0) {
233
0
    free(bmp);
234
0
    goto oom;
235
0
  }
236
1.38k
  return bmp;
237
238
0
oom:
239
0
  numa_error("Out of memory allocating bitmask");
240
0
  exit(1);
241
1.38k
}
242
243
void
244
numa_bitmask_free(struct bitmask *bmp)
245
1.37k
{
246
1.37k
  if (bmp == 0)
247
0
    return;
248
1.37k
  free(bmp->maskp);
249
1.37k
  bmp->maskp = (unsigned long *)0xdeadcdef;  /* double free tripwire */
250
1.37k
  free(bmp);
251
1.37k
  return;
252
1.37k
}
253
254
/* True if two bitmasks are equal */
255
int
256
numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask *bmp2)
257
0
{
258
0
  unsigned int i;
259
0
  for (i = 0; i < bmp1->size || i < bmp2->size; i++)
260
0
    if (_getbit(bmp1, i) != _getbit(bmp2, i))
261
0
      return 0;
262
0
  return 1;
263
0
}
264
265
/* Hamming Weight: number of set bits */
266
unsigned int numa_bitmask_weight(const struct bitmask *bmp)
267
4
{
268
4
  unsigned int i;
269
4
  unsigned int w = 0;
270
2.18k
  for (i = 0; i < bmp->size; i++)
271
2.17k
    if (_getbit(bmp, i))
272
66
      w++;
273
4
  return w;
274
4
}
275
276
/* *****end of bitmask_  routines ************ */
277
278
/* Next two can be overwritten by the application for different error handling */
279
WEAK void numa_error(char *where)
280
0
{
281
0
  int olde = errno;
282
0
  perror(where);
283
0
  if (numa_exit_on_error)
284
0
    exit(1);
285
0
  errno = olde;
286
0
}
287
288
WEAK void numa_warn(int num, char *fmt, ...)
289
1.06k
{
290
1.06k
  static unsigned warned;
291
1.06k
  va_list ap;
292
1.06k
  int olde = errno;
293
294
  /* Give each warning only once */
295
1.06k
  if ((1<<num) & warned)
296
1.05k
    return;
297
9
  warned |= (1<<num);
298
299
9
  va_start(ap,fmt);
300
9
  fprintf(stderr, "libnuma: Warning: ");
301
9
  vfprintf(stderr, fmt, ap);
302
9
  fputc('\n', stderr);
303
9
  va_end(ap);
304
305
9
  if (numa_exit_on_warn)
306
0
    exit(1);
307
308
9
  errno = olde;
309
9
}
310
311
static void setpol(int policy, struct bitmask *bmp)
312
0
{
313
0
  if (set_mempolicy(policy, bmp->maskp, bmp->size + 1) < 0)
314
0
    numa_error("set_mempolicy");
315
0
}
316
317
static void getpol(int *oldpolicy, struct bitmask *bmp)
318
0
{
319
0
  if (get_mempolicy(oldpolicy, bmp->maskp, bmp->size + 1, 0, 0) < 0)
320
0
    numa_error("get_mempolicy");
321
0
}
322
323
static void dombind(void *mem, size_t size, int pol, struct bitmask *bmp)
324
0
{
325
0
  if (mbind(mem, size, pol, bmp ? bmp->maskp : NULL, bmp ? bmp->size + 1 : 0,
326
0
      mbind_flags) < 0)
327
0
    numa_error("mbind");
328
0
}
329
330
/* (undocumented) */
331
/* gives the wrong answer for hugetlbfs mappings. */
332
int numa_pagesize(void)
333
0
{
334
0
  static int pagesize;
335
0
  if (pagesize > 0)
336
0
    return pagesize;
337
0
  pagesize = getpagesize();
338
0
  return pagesize;
339
0
}
340
341
make_internal_alias(numa_pagesize);
342
343
/*
344
 * Find nodes (numa_nodes_ptr), nodes with memory (numa_memnode_ptr)
345
 * and the highest numbered existing node (maxconfigurednode).
346
 */
347
static void
348
set_configured_nodes(void)
349
2
{
350
2
  DIR *d;
351
2
  struct dirent *de;
352
2
  long long freep;
353
354
2
  numa_memnode_ptr = numa_allocate_nodemask();
355
2
  numa_nodes_ptr = numa_allocate_nodemask();
356
357
2
  d = opendir("/sys/devices/system/node");
358
2
  if (!d) {
359
0
    maxconfigurednode = 0;
360
2
  } else {
361
24
    while ((de = readdir(d)) != NULL) {
362
22
      int nd;
363
22
      if (strncmp(de->d_name, "node", 4))
364
20
        continue;
365
2
      nd = strtoul(de->d_name+4, NULL, 0);
366
2
      numa_bitmask_setbit(numa_nodes_ptr, nd);
367
2
      if (numa_node_size64(nd, &freep) > 0)
368
2
        numa_bitmask_setbit(numa_memnode_ptr, nd);
369
2
      if (maxconfigurednode < nd)
370
2
        maxconfigurednode = nd;
371
2
    }
372
2
    closedir(d);
373
2
  }
374
2
}
375
376
static inline int is_digit(char s)
377
574
{
378
574
  return (s >= '0' && s <= '9')
379
574
    || (s >= 'a' && s <= 'f')
380
574
    || (s >= 'A' && s <= 'F');
381
574
}
382
383
/* Is string 'pre' a prefix of string 's'? */
384
static int strprefix(const char *s, const char *pre)
385
112
{
386
112
  return strncmp(s, pre, strlen(pre)) == 0;
387
112
}
388
389
static const char *mask_size_file = "/proc/self/status";
390
static const char *nodemask_prefix = "Mems_allowed:\t";
391
/*
392
 * (do this the way Paul Jackson's libcpuset does it)
393
 * The nodemask values in /proc/self/status are in an
394
 * ascii format that uses 9 characters for each 32 bits of mask.
395
 * (this could also be used to find the cpumask size)
396
 */
397
static void
398
set_nodemask_size(void)
399
2
{
400
2
  FILE *fp;
401
2
  char *buf = NULL;
402
2
  char *tmp_buf = NULL;
403
2
  int digit_len = 0;
404
2
  size_t bufsize = 0;
405
406
2
  if ((fp = fopen(mask_size_file, "r")) == NULL)
407
0
    goto done;
408
409
114
  while (getline(&buf, &bufsize, fp) > 0) {
410
112
    if (strprefix(buf, nodemask_prefix)) {
411
2
      tmp_buf = buf;
412
2
      tmp_buf += strlen(nodemask_prefix);
413
576
      while (*tmp_buf != '\n' && *tmp_buf != '\0') {
414
574
        if (is_digit(*tmp_buf))
415
512
          digit_len++;
416
574
        tmp_buf++;
417
574
      }
418
2
      nodemask_sz = digit_len * 4;
419
2
    }
420
112
  }
421
2
  free(buf);
422
2
  fclose(fp);
423
2
done:
424
2
  if (nodemask_sz == 0) {/* fall back on error */
425
0
    int pol;
426
0
    unsigned long *mask = NULL;
427
0
    nodemask_sz = 16;
428
0
    do {
429
0
      nodemask_sz <<= 1;
430
0
      mask = realloc(mask, nodemask_sz / 8);
431
0
      if (!mask)
432
0
        return;
433
0
    } while (get_mempolicy(&pol, mask, nodemask_sz + 1, 0, 0) < 0 && errno == EINVAL &&
434
0
        nodemask_sz < 4096*8);
435
0
    free(mask);
436
0
  }
437
2
}
438
439
/*
440
 * Read a mask consisting of a sequence of hexadecimal longs separated by
441
 * commas. Order them correctly and return the number of bits set.
442
 */
443
static int
444
read_mask(char *s, struct bitmask *bmp)
445
4
{
446
4
  char *end = s;
447
4
  int tmplen = (bmp->size + bitsperint - 1) / bitsperint;
448
4
  unsigned int tmp[tmplen];
449
4
  unsigned int *start = tmp;
450
4
  unsigned int i, n = 0, m = 0;
451
452
4
  if (!s)
453
0
    return 0; /* shouldn't happen */
454
455
4
  i = strtoul(s, &end, 16);
456
457
  /* Skip leading zeros */
458
66
  while (!i && *end++ == ',') {
459
62
    i = strtoul(end, &end, 16);
460
62
  }
461
462
4
  if (!i)
463
    /* End of string. No mask */
464
0
    return -1;
465
466
4
  start[n++] = i;
467
  /* Read sequence of ints */
468
4
  while (*end++ == ',') {
469
0
    i = strtoul(end, &end, 16);
470
0
    start[n++] = i;
471
472
    /* buffer overflow */
473
0
    if (n > tmplen)
474
0
      return -1;
475
0
  }
476
477
  /*
478
   * Invert sequence of ints if necessary since the first int
479
   * is the highest and we put it first because we read it first.
480
   */
481
8
  while (n) {
482
4
    int w;
483
4
    unsigned long x = 0;
484
    /* read into long values in an endian-safe way */
485
8
    for (w = 0; n && w < bitsperlong; w += bitsperint)
486
4
      x |= ((unsigned long)start[n-- - 1] << w);
487
488
4
    bmp->maskp[m++] = x;
489
4
  }
490
  /*
491
   * Return the number of bits set
492
   */
493
4
  return numa_bitmask_weight(bmp);
494
4
}
495
496
/*
497
 * Read a processes constraints in terms of nodes and cpus from
498
 * /proc/self/status.
499
 */
500
static void
501
set_task_constraints(void)
502
2
{
503
2
  int hicpu = maxconfiguredcpu;
504
2
  int i;
505
2
  char *buffer = NULL;
506
2
  size_t buflen = 0;
507
2
  FILE *f;
508
509
2
  numa_all_cpus_ptr = numa_allocate_cpumask();
510
2
  numa_possible_cpus_ptr = numa_allocate_cpumask();
511
2
  numa_all_nodes_ptr = numa_allocate_nodemask();
512
2
  numa_possible_nodes_ptr = numa_allocate_cpumask();
513
2
  numa_no_nodes_ptr = numa_allocate_nodemask();
514
515
2
  f = fopen(mask_size_file, "r");
516
2
  if (!f) {
517
    //numa_warn(W_cpumap, "Cannot parse %s", mask_size_file);
518
0
    return;
519
0
  }
520
521
114
  while (getline(&buffer, &buflen, f) > 0) {
522
    /* mask starts after [last] tab */
523
112
    char  *mask = strrchr(buffer,'\t') + 1;
524
525
112
    if (strncmp(buffer,"Cpus_allowed:",13) == 0)
526
2
      numproccpu = read_mask(mask, numa_all_cpus_ptr);
527
528
112
    if (strncmp(buffer,"Mems_allowed:",13) == 0) {
529
2
      numprocnode = read_mask(mask, numa_all_nodes_ptr);
530
2
    }
531
112
  }
532
2
  fclose(f);
533
2
  free(buffer);
534
535
66
  for (i = 0; i <= hicpu; i++)
536
64
    numa_bitmask_setbit(numa_possible_cpus_ptr, i);
537
4
  for (i = 0; i <= maxconfigurednode; i++)
538
2
    numa_bitmask_setbit(numa_possible_nodes_ptr, i);
539
540
  /*
541
   * Cpus_allowed in the kernel can be defined to all f's
542
   * i.e. it may be a superset of the actual available processors.
543
   * As such let's reduce numproccpu to the number of actual
544
   * available cpus.
545
   */
546
2
  if (numproccpu <= 0) {
547
0
    for (i = 0; i <= hicpu; i++)
548
0
      numa_bitmask_setbit(numa_all_cpus_ptr, i);
549
0
    numproccpu = hicpu+1;
550
0
  }
551
552
2
  if (numproccpu > hicpu+1) {
553
0
    numproccpu = hicpu+1;
554
0
    for (i=hicpu+1; i<numa_all_cpus_ptr->size; i++) {
555
0
      numa_bitmask_clearbit(numa_all_cpus_ptr, i);
556
0
    }
557
0
  }
558
559
2
  if (numprocnode <= 0) {
560
0
    for (i = 0; i <= maxconfigurednode; i++)
561
0
      numa_bitmask_setbit(numa_all_nodes_ptr, i);
562
0
    numprocnode = maxconfigurednode + 1;
563
0
  }
564
565
2
  return;
566
2
}
567
568
/*
569
 * Find the highest cpu number possible (in other words the size
570
 * of a kernel cpumask_t (in bits) - 1)
571
 */
572
static void
573
set_numa_max_cpu(void)
574
2
{
575
2
  int len = 4096;
576
2
  int n;
577
2
  int olde = errno;
578
2
  struct bitmask *buffer;
579
580
2
  do {
581
2
    buffer = numa_bitmask_alloc(len);
582
2
    n = numa_sched_getaffinity_v2_int(0, buffer);
583
    /* on success, returns size of kernel cpumask_t, in bytes */
584
2
    if (n < 0) {
585
0
      if (errno == EINVAL) {
586
0
        if (len >= 1024*1024)
587
0
          break;
588
0
        len *= 2;
589
0
        numa_bitmask_free(buffer);
590
0
        continue;
591
0
      } else {
592
0
        numa_warn(W_numcpus, "Unable to determine max cpu"
593
0
            " (sched_getaffinity: %s); guessing...",
594
0
            strerror(errno));
595
0
        n = sizeof(cpu_set_t);
596
0
        break;
597
0
      }
598
0
    }
599
2
  } while (n < 0);
600
0
  numa_bitmask_free(buffer);
601
2
  errno = olde;
602
2
  cpumask_sz = n*8;
603
2
}
604
605
/*
606
 * get the total (configured) number of cpus - both online and offline
607
 */
608
static void
609
set_configured_cpus(void)
610
2
{
611
2
  maxconfiguredcpu = sysconf(_SC_NPROCESSORS_CONF) - 1;
612
2
  if (maxconfiguredcpu == -1)
613
0
    numa_error("sysconf(NPROCESSORS_CONF) failed");
614
2
}
615
616
static void
617
set_kernel_abi()
618
2
{
619
2
  int oldp;
620
2
  struct bitmask *bmp, *tmp;
621
2
  bmp = numa_allocate_nodemask();
622
2
  tmp = numa_get_mems_allowed();
623
624
2
  if (get_mempolicy(&oldp, bmp->maskp, bmp->size + 1, 0, 0) < 0)
625
0
    goto out;
626
627
2
  if (set_mempolicy(MPOL_PREFERRED_MANY, tmp->maskp, tmp->size) == 0) {
628
0
    has_preferred_many++;
629
    /* reset the old memory policy */
630
0
    setpol(oldp, bmp);
631
0
  }
632
633
2
out:
634
2
  numa_bitmask_free(tmp);
635
2
  numa_bitmask_free(bmp);
636
2
}
637
638
/*
639
 * Initialize all the sizes.
640
 */
641
static void
642
set_sizes(void)
643
2
{
644
2
  sizes_set++;
645
2
  set_nodemask_size();  /* size of kernel nodemask_t */
646
2
  set_configured_nodes(); /* configured nodes listed in /sys */
647
2
  set_numa_max_cpu(); /* size of kernel cpumask_t */
648
2
  set_configured_cpus();  /* cpus listed in /sys/devices/system/cpu */
649
2
  set_task_constraints(); /* cpus and nodes for current task */
650
2
  set_kernel_abi(); /* man policy supported */
651
2
}
652
653
int
654
numa_num_configured_nodes(void)
655
685
{
656
  /*
657
  * NOTE: this function's behavior matches the documentation (ie: it
658
  * returns a count of nodes with memory) despite the poor function
659
  * naming.  We also cannot use the similarly poorly named
660
  * numa_all_nodes_ptr as it only tracks nodes with memory from which
661
  * the calling process can allocate.  Think sparse nodes, memory-less
662
  * nodes, cpusets...
663
  */
664
685
  int memnodecount=0, i;
665
666
1.37k
  for (i=0; i <= maxconfigurednode; i++) {
667
685
    if (numa_bitmask_isbitset(numa_memnode_ptr, i))
668
685
      memnodecount++;
669
685
  }
670
685
  return memnodecount;
671
685
}
672
673
int
674
numa_num_configured_cpus(void)
675
683
{
676
677
683
  return maxconfiguredcpu+1;
678
683
}
679
680
int
681
numa_num_possible_nodes(void)
682
696
{
683
696
  return nodemask_sz;
684
696
}
685
686
int
687
numa_num_possible_cpus(void)
688
690
{
689
690
  return cpumask_sz;
690
690
}
691
692
int
693
numa_num_task_nodes(void)
694
0
{
695
0
  return numprocnode;
696
0
}
697
698
/*
699
 * for backward compatibility
700
 */
701
int
702
numa_num_thread_nodes(void)
703
0
{
704
0
  return numa_num_task_nodes();
705
0
}
706
707
int
708
numa_num_task_cpus(void)
709
0
{
710
0
  return numproccpu;
711
0
}
712
713
/*
714
 * for backward compatibility
715
 */
716
int
717
numa_num_thread_cpus(void)
718
0
{
719
0
  return numa_num_task_cpus();
720
0
}
721
722
/*
723
 * Return the number of the highest node in this running system,
724
 */
725
int
726
numa_max_node(void)
727
173
{
728
173
  return maxconfigurednode;
729
173
}
730
731
make_internal_alias(numa_max_node);
732
733
/*
734
 * Return the number of the highest possible node in a system,
735
 * which for v1 is the size of a numa.h nodemask_t(in bits)-1.
736
 * but for v2 is the size of a kernel nodemask_t(in bits)-1.
737
 */
738
SYMVER("numa_max_possible_node_v1", "numa_max_possible_node@libnuma_1.1")
739
int
740
numa_max_possible_node_v1(void)
741
0
{
742
0
  return ((sizeof(nodemask_t)*8)-1);
743
0
}
744
745
SYMVER("numa_max_possible_node_v2", "numa_max_possible_node@@libnuma_1.2")
746
int
747
numa_max_possible_node_v2(void)
748
696
{
749
696
  return numa_num_possible_nodes()-1;
750
696
}
751
752
make_internal_alias(numa_max_possible_node_v1);
753
make_internal_alias(numa_max_possible_node_v2);
754
755
/*
756
 * Allocate a bitmask for cpus, of a size large enough to
757
 * match the kernel's cpumask_t.
758
 */
759
struct bitmask *
760
numa_allocate_cpumask()
761
690
{
762
690
  int ncpus = numa_num_possible_cpus();
763
764
690
  return numa_bitmask_alloc(ncpus);
765
690
}
766
767
/*
768
 * Allocate a bitmask the size of a libnuma nodemask_t
769
 */
770
static struct bitmask *
771
allocate_nodemask_v1(void)
772
0
{
773
0
  int nnodes = numa_max_possible_node_v1_int()+1;
774
775
0
  return numa_bitmask_alloc(nnodes);
776
0
}
777
778
/*
779
 * Allocate a bitmask for nodes, of a size large enough to
780
 * match the kernel's nodemask_t.
781
 */
782
struct bitmask *
783
numa_allocate_nodemask(void)
784
695
{
785
695
  struct bitmask *bmp;
786
695
  int nnodes = numa_max_possible_node_v2_int() + 1;
787
788
695
  bmp = numa_bitmask_alloc(nnodes);
789
695
  return bmp;
790
695
}
791
792
/* (cache the result?) */
793
long long numa_node_size64(int node, long long *freep)
794
2
{
795
2
  size_t len = 0;
796
2
  char *line = NULL;
797
2
  long long size = -1;
798
2
  FILE *f;
799
2
  char fn[64];
800
2
  int ok = 0;
801
2
  int required = freep ? 2 : 1;
802
803
2
  if (freep)
804
2
    *freep = -1;
805
2
  sprintf(fn,"/sys/devices/system/node/node%d/meminfo", node);
806
2
  f = fopen(fn, "r");
807
2
  if (!f)
808
0
    return -1;
809
70
  while (getdelim(&line, &len, '\n', f) > 0) {
810
68
    char *end;
811
68
    char *s = strcasestr(line, "kB");
812
68
    if (!s)
813
6
      continue;
814
62
    --s;
815
124
    while (s > line && isspace(*s))
816
62
      --s;
817
324
    while (s > line && isdigit(*s))
818
262
      --s;
819
62
    if (strstr(line, "MemTotal")) {
820
2
      size = strtoull(s,&end,0) << 10;
821
2
      if (end == s)
822
0
        size = -1;
823
2
      else
824
2
        ok++;
825
2
    }
826
62
    if (freep && strstr(line, "MemFree")) {
827
2
      *freep = strtoull(s,&end,0) << 10;
828
2
      if (end == s)
829
0
        *freep = -1;
830
2
      else
831
2
        ok++;
832
2
    }
833
62
  }
834
2
  fclose(f);
835
2
  free(line);
836
2
  if (ok != required)
837
0
    numa_warn(W_badmeminfo, "Cannot parse sysfs meminfo (%d)", ok);
838
2
  return size;
839
2
}
840
841
make_internal_alias(numa_node_size64);
842
843
long numa_node_size(int node, long *freep)
844
0
{
845
0
  long long f2;
846
0
  long sz = numa_node_size64_int(node, &f2);
847
0
  if (freep)
848
0
    *freep = f2;
849
0
  return sz;
850
0
}
851
852
int numa_available(void)
853
0
{
854
0
  if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && errno == ENOSYS)
855
0
    return -1;
856
0
  return 0;
857
0
}
858
859
SYMVER("numa_interleave_memory_v1", "numa_interleave_memory@libnuma_1.1")
860
void
861
numa_interleave_memory_v1(void *mem, size_t size, const nodemask_t *mask)
862
0
{
863
0
  struct bitmask bitmask;
864
865
0
  bitmask.size = sizeof(nodemask_t) * 8;
866
0
  bitmask.maskp = (unsigned long *)mask;
867
0
  dombind(mem, size, MPOL_INTERLEAVE, &bitmask);
868
0
}
869
870
SYMVER("numa_interleave_memory_v2", "numa_interleave_memory@@libnuma_1.2")
871
void
872
numa_interleave_memory_v2(void *mem, size_t size, struct bitmask *bmp)
873
0
{
874
0
  dombind(mem, size, MPOL_INTERLEAVE, bmp);
875
0
}
876
877
void numa_tonode_memory(void *mem, size_t size, int node)
878
0
{
879
0
  struct bitmask *nodes;
880
881
0
  nodes = numa_allocate_nodemask();
882
0
  numa_bitmask_setbit(nodes, node);
883
0
  dombind(mem, size, bind_policy, nodes);
884
0
  numa_bitmask_free(nodes);
885
0
}
886
887
SYMVER("numa_tonodemask_memory_v1", "numa_tonodemask_memory@libnuma_1.1")
888
void
889
numa_tonodemask_memory_v1(void *mem, size_t size, const nodemask_t *mask)
890
0
{
891
0
  struct bitmask bitmask;
892
893
0
  bitmask.maskp = (unsigned long *)mask;
894
0
  bitmask.size  = sizeof(nodemask_t);
895
0
  dombind(mem, size,  bind_policy, &bitmask);
896
0
}
897
898
SYMVER("numa_tonodemask_memory_v2", "numa_tonodemask_memory@@libnuma_1.2")
899
void
900
numa_tonodemask_memory_v2(void *mem, size_t size, struct bitmask *bmp)
901
0
{
902
0
  dombind(mem, size,  bind_policy, bmp);
903
0
}
904
905
void numa_setlocal_memory(void *mem, size_t size)
906
0
{
907
0
  dombind(mem, size, MPOL_LOCAL, NULL);
908
0
}
909
910
void numa_police_memory(void *mem, size_t size)
911
0
{
912
0
  int pagesize = numa_pagesize_int();
913
0
  unsigned long i;
914
0
  char *p = mem;
915
0
  for (i = 0; i < size; i += pagesize, p += pagesize)
916
0
    __atomic_and_fetch(p, 0xff, __ATOMIC_RELAXED);
917
918
0
}
919
920
make_internal_alias(numa_police_memory);
921
922
void *numa_alloc(size_t size)
923
0
{
924
0
  char *mem;
925
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
926
0
       0, 0);
927
0
  if (mem == (char *)-1)
928
0
    return NULL;
929
0
  numa_police_memory_int(mem, size);
930
0
  return mem;
931
0
}
932
933
void *numa_realloc(void *old_addr, size_t old_size, size_t new_size)
934
0
{
935
0
  char *mem;
936
0
  mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
937
0
  if (mem == (char *)-1)
938
0
    return NULL;
939
  /*
940
   *  The memory policy of the allocated pages is preserved by mremap(), so
941
   *  there is no need to (re)set it here. If the policy of the original
942
   *  allocation is not set, the new pages will be allocated according to the
943
   *  process' mempolicy. Trying to allocate explicitly the new pages on the
944
   *  same node as the original ones would require changing the policy of the
945
   *  newly allocated pages, which violates the numa_realloc() semantics.
946
   */
947
0
  return mem;
948
0
}
949
950
SYMVER("numa_alloc_interleaved_subset_v1", "numa_alloc_interleaved_subset@libnuma_1.1")
951
void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask)
952
0
{
953
0
  char *mem;
954
0
  struct bitmask bitmask;
955
956
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
957
0
      0, 0);
958
0
  if (mem == (char *)-1)
959
0
    return NULL;
960
0
  bitmask.maskp = (unsigned long *)mask;
961
0
  bitmask.size  = sizeof(nodemask_t);
962
0
  dombind(mem, size, MPOL_INTERLEAVE, &bitmask);
963
0
  return mem;
964
0
}
965
966
SYMVER("numa_alloc_interleaved_subset_v2", "numa_alloc_interleaved_subset@@libnuma_1.2")
967
void *numa_alloc_interleaved_subset_v2(size_t size, struct bitmask *bmp)
968
0
{
969
0
  char *mem;
970
971
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
972
0
       0, 0);
973
0
  if (mem == (char *)-1)
974
0
    return NULL;
975
0
  dombind(mem, size, MPOL_INTERLEAVE, bmp);
976
0
  return mem;
977
0
}
978
979
make_internal_alias(numa_alloc_interleaved_subset_v1);
980
make_internal_alias(numa_alloc_interleaved_subset_v2);
981
982
void *
983
numa_alloc_interleaved(size_t size)
984
0
{
985
0
  return numa_alloc_interleaved_subset_v2_int(size, numa_all_nodes_ptr);
986
0
}
987
988
/*
989
 * given a user node mask, set memory policy to use those nodes
990
 */
991
SYMVER("numa_set_interleave_mask_v1", "numa_set_interleave_mask@libnuma_1.1")
992
void
993
numa_set_interleave_mask_v1(nodemask_t *mask)
994
0
{
995
0
  struct bitmask *bmp;
996
0
  int nnodes = numa_max_possible_node_v1_int()+1;
997
998
0
  bmp = numa_bitmask_alloc(nnodes);
999
0
  copy_nodemask_to_bitmask(mask, bmp);
1000
0
  if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
1001
0
    setpol(MPOL_DEFAULT, bmp);
1002
0
  else
1003
0
    setpol(MPOL_INTERLEAVE, bmp);
1004
0
  numa_bitmask_free(bmp);
1005
0
}
1006
1007
1008
SYMVER("numa_set_interleave_mask_v2", "numa_set_interleave_mask@@libnuma_1.2")
1009
void
1010
numa_set_interleave_mask_v2(struct bitmask *bmp)
1011
0
{
1012
0
  if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
1013
0
    setpol(MPOL_DEFAULT, bmp);
1014
0
  else
1015
0
    setpol(MPOL_INTERLEAVE, bmp);
1016
0
}
1017
1018
SYMVER("numa_get_interleave_mask_v1", "numa_get_interleave_mask@libnuma_1.1")
1019
nodemask_t
1020
numa_get_interleave_mask_v1(void)
1021
0
{
1022
0
  int oldpolicy;
1023
0
  struct bitmask *bmp;
1024
0
  nodemask_t mask;
1025
1026
0
  bmp = allocate_nodemask_v1();
1027
0
  getpol(&oldpolicy, bmp);
1028
0
  if (oldpolicy == MPOL_INTERLEAVE)
1029
0
    copy_bitmask_to_nodemask(bmp, &mask);
1030
0
  else
1031
0
    copy_bitmask_to_nodemask(numa_no_nodes_ptr, &mask);
1032
0
  numa_bitmask_free(bmp);
1033
0
  return mask;
1034
0
}
1035
1036
SYMVER("numa_get_interleave_mask_v2", "numa_get_interleave_mask@@libnuma_1.2")
1037
struct bitmask *
1038
numa_get_interleave_mask_v2(void)
1039
0
{
1040
0
  int oldpolicy;
1041
0
  struct bitmask *bmp;
1042
1043
0
  bmp = numa_allocate_nodemask();
1044
0
  getpol(&oldpolicy, bmp);
1045
0
  if (oldpolicy != MPOL_INTERLEAVE)
1046
0
    copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
1047
0
  return bmp;
1048
0
}
1049
1050
/* (undocumented) */
1051
int numa_get_interleave_node(void)
1052
0
{
1053
0
  int nd;
1054
0
  if (get_mempolicy(&nd, NULL, 0, 0, MPOL_F_NODE) == 0)
1055
0
    return nd;
1056
0
  return 0;
1057
0
}
1058
1059
void *numa_alloc_onnode(size_t size, int node)
1060
0
{
1061
0
  char *mem;
1062
0
  struct bitmask *bmp;
1063
1064
0
  bmp = numa_allocate_nodemask();
1065
0
  numa_bitmask_setbit(bmp, node);
1066
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
1067
0
       0, 0);
1068
0
  if (mem == (char *)-1)
1069
0
    mem = NULL;
1070
0
  else
1071
0
    dombind(mem, size, bind_policy, bmp);
1072
0
  numa_bitmask_free(bmp);
1073
0
  return mem;
1074
0
}
1075
1076
void *numa_alloc_local(size_t size)
1077
0
{
1078
0
  char *mem;
1079
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
1080
0
       0, 0);
1081
0
  if (mem == (char *)-1)
1082
0
    mem =  NULL;
1083
0
  else
1084
0
    dombind(mem, size, MPOL_LOCAL, NULL);
1085
0
  return mem;
1086
0
}
1087
1088
void numa_set_bind_policy(int strict)
1089
0
{
1090
0
  if (strict)
1091
0
    bind_policy = MPOL_BIND;
1092
0
  else if (has_preferred_many)
1093
0
    bind_policy = MPOL_PREFERRED_MANY;
1094
0
  else
1095
0
    bind_policy = MPOL_PREFERRED;
1096
0
}
1097
1098
SYMVER("numa_set_membind_v1", "numa_set_membind@libnuma_1.1")
1099
void
1100
numa_set_membind_v1(const nodemask_t *mask)
1101
0
{
1102
0
  struct bitmask bitmask;
1103
1104
0
  bitmask.maskp = (unsigned long *)mask;
1105
0
  bitmask.size  = sizeof(nodemask_t);
1106
0
  setpol(MPOL_BIND, &bitmask);
1107
0
}
1108
1109
SYMVER("numa_set_membind_v2", "numa_set_membind@@libnuma_1.2")
1110
void
1111
numa_set_membind_v2(struct bitmask *bmp)
1112
0
{
1113
0
  setpol(MPOL_BIND, bmp);
1114
0
}
1115
1116
make_internal_alias(numa_set_membind_v2);
1117
1118
void
1119
numa_set_membind_balancing(struct bitmask *bmp)
1120
0
{
1121
  /* MPOL_F_NUMA_BALANCING: ignore if unsupported */
1122
0
  if (set_mempolicy(MPOL_BIND | MPOL_F_NUMA_BALANCING,
1123
0
        bmp->maskp, bmp->size + 1) < 0) {
1124
0
    if (errno == EINVAL) {
1125
0
      errno = 0;
1126
0
      numa_set_membind_v2(bmp);
1127
0
    } else
1128
0
      numa_error("set_mempolicy");
1129
0
  }
1130
0
}
1131
1132
/*
1133
 * copy a bitmask map body to a numa.h nodemask_t structure
1134
 */
1135
void
1136
copy_bitmask_to_nodemask(struct bitmask *bmp, nodemask_t *nmp)
1137
0
{
1138
0
  int max, i;
1139
1140
0
  memset(nmp, 0, sizeof(nodemask_t));
1141
0
        max = (sizeof(nodemask_t)*8);
1142
0
  for (i=0; i<bmp->size; i++) {
1143
0
    if (i >= max)
1144
0
      break;
1145
0
    if (numa_bitmask_isbitset(bmp, i))
1146
0
      nodemask_set_compat((nodemask_t *)nmp, i);
1147
0
  }
1148
0
}
1149
1150
/*
1151
 * copy a bitmask map body to another bitmask body
1152
 * fill a larger destination with zeroes
1153
 */
1154
void
1155
copy_bitmask_to_bitmask(struct bitmask *bmpfrom, struct bitmask *bmpto)
1156
178
{
1157
178
  int bytes;
1158
1159
178
  if (bmpfrom->size >= bmpto->size) {
1160
178
    memcpy(bmpto->maskp, bmpfrom->maskp, CPU_BYTES(bmpto->size));
1161
178
  } else if (bmpfrom->size < bmpto->size) {
1162
0
    bytes = CPU_BYTES(bmpfrom->size);
1163
0
    memcpy(bmpto->maskp, bmpfrom->maskp, bytes);
1164
0
    memset(((char *)bmpto->maskp)+bytes, 0,
1165
0
          CPU_BYTES(bmpto->size)-bytes);
1166
0
  }
1167
178
}
1168
1169
/*
1170
 * copy a numa.h nodemask_t structure to a bitmask map body
1171
 */
1172
void
1173
copy_nodemask_to_bitmask(nodemask_t *nmp, struct bitmask *bmp)
1174
0
{
1175
0
  int max, i;
1176
1177
0
  numa_bitmask_clearall(bmp);
1178
0
        max = (sizeof(nodemask_t)*8);
1179
0
  if (max > bmp->size)
1180
0
    max = bmp->size;
1181
0
  for (i=0; i<max; i++) {
1182
0
    if (nodemask_isset_compat(nmp, i))
1183
0
      numa_bitmask_setbit(bmp, i);
1184
0
  }
1185
0
}
1186
1187
SYMVER("numa_get_membind_v1", "numa_get_membind@libnuma_1.1")
1188
nodemask_t
1189
numa_get_membind_v1(void)
1190
0
{
1191
0
  int oldpolicy;
1192
0
  struct bitmask *bmp;
1193
0
  nodemask_t nmp;
1194
1195
0
  bmp = allocate_nodemask_v1();
1196
0
  getpol(&oldpolicy, bmp);
1197
0
  if (oldpolicy == MPOL_BIND) {
1198
0
    copy_bitmask_to_nodemask(bmp, &nmp);
1199
0
  } else {
1200
    /* copy the body of the map to numa_all_nodes */
1201
0
    copy_bitmask_to_nodemask(bmp, &numa_all_nodes);
1202
0
    nmp = numa_all_nodes;
1203
0
  }
1204
0
  numa_bitmask_free(bmp);
1205
0
  return nmp;
1206
0
}
1207
1208
SYMVER("numa_get_membind_v2", "numa_get_membind@@libnuma_1.2")
1209
struct bitmask *
1210
numa_get_membind_v2(void)
1211
0
{
1212
0
  int oldpolicy;
1213
0
  struct bitmask *bmp;
1214
1215
0
  bmp = numa_allocate_nodemask();
1216
0
  getpol(&oldpolicy, bmp);
1217
0
  if (oldpolicy != MPOL_BIND)
1218
0
    copy_bitmask_to_bitmask(numa_all_nodes_ptr, bmp);
1219
0
  return bmp;
1220
0
}
1221
1222
//TODO:  do we need a v1 nodemask_t version?
1223
struct bitmask *numa_get_mems_allowed(void)
1224
2
{
1225
2
  struct bitmask *bmp;
1226
1227
  /*
1228
   * can change, so query on each call.
1229
   */
1230
2
  bmp = numa_allocate_nodemask();
1231
2
  if (get_mempolicy(NULL, bmp->maskp, bmp->size + 1, 0,
1232
2
        MPOL_F_MEMS_ALLOWED) < 0)
1233
0
    numa_error("get_mempolicy");
1234
2
  return bmp;
1235
2
}
1236
make_internal_alias(numa_get_mems_allowed);
1237
1238
void numa_free(void *mem, size_t size)
1239
0
{
1240
0
  munmap(mem, size);
1241
0
}
1242
1243
SYMVER("numa_parse_bitmap_v1", "numa_parse_bitmap@libnuma_1.1")
1244
int
1245
numa_parse_bitmap_v1(char *line, unsigned long *mask, int ncpus)
1246
0
{
1247
0
  int i;
1248
0
  char *p = strchr(line, '\n');
1249
0
  if (!p)
1250
0
    return -1;
1251
1252
0
  for (i = 0; p > line;i++) {
1253
0
    char *oldp, *endp;
1254
0
    oldp = p;
1255
0
    if (*p == ',')
1256
0
      --p;
1257
0
    while (p > line && *p != ',')
1258
0
      --p;
1259
    /* Eat two 32bit fields at a time to get longs */
1260
0
    if (p > line && sizeof(unsigned long) == 8) {
1261
0
      oldp--;
1262
0
      memmove(p, p+1, oldp-p+1);
1263
0
      while (p > line && *p != ',')
1264
0
        --p;
1265
0
    }
1266
0
    if (*p == ',')
1267
0
      p++;
1268
0
    if (i >= CPU_LONGS(ncpus))
1269
0
      return -1;
1270
0
    mask[i] = strtoul(p, &endp, 16);
1271
0
    if (endp != oldp)
1272
0
      return -1;
1273
0
    p--;
1274
0
  }
1275
0
  return 0;
1276
0
}
1277
1278
SYMVER("numa_parse_bitmap_v2", "numa_parse_bitmap@@libnuma_1.2")
1279
int
1280
numa_parse_bitmap_v2(char *line, struct bitmask *mask)
1281
1
{
1282
1
  int i, ncpus;
1283
1
  char *p = strchr(line, '\n');
1284
1
  if (!p)
1285
0
    return -1;
1286
1
  ncpus = mask->size;
1287
1288
2
  for (i = 0; p > line;i++) {
1289
1
    char *oldp, *endp;
1290
1
    oldp = p;
1291
1
    if (*p == ',')
1292
0
      --p;
1293
9
    while (p > line && *p != ',')
1294
8
      --p;
1295
    /* Eat two 32bit fields at a time to get longs */
1296
1
    if (p > line && sizeof(unsigned long) == 8) {
1297
0
      oldp--;
1298
0
      memmove(p, p+1, oldp-p+1);
1299
0
      while (p > line && *p != ',')
1300
0
        --p;
1301
0
    }
1302
1
    if (*p == ',')
1303
0
      p++;
1304
1
    if (i >= CPU_LONGS(ncpus))
1305
0
      return -1;
1306
1
    mask->maskp[i] = strtoul(p, &endp, 16);
1307
1
    if (endp != oldp)
1308
0
      return -1;
1309
1
    p--;
1310
1
  }
1311
1
  return 0;
1312
1
}
1313
1314
static void init_node_cpu_mask_v2(void)
1315
1
{
1316
1
  int nnodes = numa_max_possible_node_v2_int() + 1;
1317
1
  node_cpu_mask_v2 = calloc (nnodes, sizeof(struct bitmask *));
1318
1
}
1319
1320
static void cleanup_node_cpu_mask_v2(void)
1321
0
{
1322
0
  if (node_cpu_mask_v2) {
1323
0
    int i;
1324
0
    int nnodes;
1325
0
    nnodes = numa_max_possible_node_v2_int() + 1;
1326
0
    for (i = 0; i < nnodes; i++) {
1327
0
      FREE_AND_ZERO(node_cpu_mask_v2[i]);
1328
0
    }
1329
0
    free(node_cpu_mask_v2);
1330
0
    node_cpu_mask_v2 = NULL;
1331
0
  }
1332
0
}
1333
1334
/* This would be better with some locking, but I don't want to make libnuma
1335
   dependent on pthreads right now. The races are relatively harmless. */
1336
SYMVER("numa_node_to_cpus_v1", "numa_node_to_cpus@libnuma_1.1")
1337
int
1338
numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
1339
0
{
1340
0
  int err = 0;
1341
0
  char fn[64];
1342
0
  FILE *f;
1343
0
  char update;
1344
0
  char *line = NULL;
1345
0
  size_t len = 0;
1346
0
  struct bitmask bitmask;
1347
0
  int buflen_needed;
1348
0
  unsigned long *mask;
1349
0
  int ncpus = numa_num_possible_cpus();
1350
0
  int maxnode = numa_max_node_int();
1351
1352
0
  buflen_needed = CPU_BYTES(ncpus);
1353
0
  if ((unsigned)node > maxnode || bufferlen < buflen_needed) {
1354
0
    errno = ERANGE;
1355
0
    return -1;
1356
0
  }
1357
0
  if (bufferlen > buflen_needed)
1358
0
    memset(buffer, 0, bufferlen);
1359
0
  update = __atomic_fetch_and(&node_cpu_mask_v1_stale, 0, __ATOMIC_RELAXED);
1360
0
  if (node_cpu_mask_v1[node] && !update) {
1361
0
    memcpy(buffer, node_cpu_mask_v1[node], buflen_needed);
1362
0
    return 0;
1363
0
  }
1364
1365
0
  mask = malloc(buflen_needed);
1366
0
  if (!mask)
1367
0
    mask = (unsigned long *)buffer;
1368
0
  memset(mask, 0, buflen_needed);
1369
1370
0
  sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
1371
0
  f = fopen(fn, "r");
1372
0
  if (!f || getdelim(&line, &len, '\n', f) < 1) {
1373
0
    if (numa_bitmask_isbitset(numa_nodes_ptr, node)) {
1374
0
      numa_warn(W_nosysfs2,
1375
0
         "/sys not mounted or invalid. Assuming one node: %s",
1376
0
          strerror(errno));
1377
0
      numa_warn(W_nosysfs2,
1378
0
         "(cannot open or correctly parse %s)", fn);
1379
0
    }
1380
0
    bitmask.maskp = (unsigned long *)mask;
1381
0
    bitmask.size  = buflen_needed * 8;
1382
0
    numa_bitmask_setall(&bitmask);
1383
0
    err = -1;
1384
0
  }
1385
0
  if (f)
1386
0
    fclose(f);
1387
1388
0
  if (line && (numa_parse_bitmap_v1(line, mask, ncpus) < 0)) {
1389
0
    numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node");
1390
0
    bitmask.maskp = (unsigned long *)mask;
1391
0
    bitmask.size  = buflen_needed * 8;
1392
0
    numa_bitmask_setall(&bitmask);
1393
0
    err = -1;
1394
0
  }
1395
1396
0
  free(line);
1397
0
  memcpy(buffer, mask, buflen_needed);
1398
1399
  /* slightly racy, see above */
1400
0
  if (node_cpu_mask_v1[node]) {
1401
0
    if (update) {
1402
      /*
1403
       * There may be readers on node_cpu_mask_v1[], hence it can not
1404
       * be freed.
1405
       */
1406
0
      memcpy(node_cpu_mask_v1[node], mask, buflen_needed);
1407
0
      free(mask);
1408
0
      mask = NULL;
1409
0
    } else if (mask != buffer)
1410
0
      free(mask);
1411
0
  } else {
1412
0
    node_cpu_mask_v1[node] = mask;
1413
0
  }
1414
0
  return err;
1415
0
}
1416
1417
/*
1418
 * test whether a node has cpus
1419
 */
1420
/* This would be better with some locking, but I don't want to make libnuma
1421
   dependent on pthreads right now. The races are relatively harmless. */
1422
/*
1423
 * deliver a bitmask of cpus representing the cpus on a given node
1424
 */
1425
SYMVER("numa_node_to_cpus_v2", "numa_node_to_cpus@@libnuma_1.2")
1426
int
1427
numa_node_to_cpus_v2(int node, struct bitmask *buffer)
1428
173
{
1429
173
  int err = 0;
1430
173
  int nnodes = numa_max_node();
1431
173
  char fn[64], *line = NULL;
1432
173
  FILE *f;
1433
173
  char update;
1434
173
  size_t len = 0;
1435
173
  struct bitmask *mask;
1436
1437
173
  if (!node_cpu_mask_v2)
1438
1
    init_node_cpu_mask_v2();
1439
1440
173
  if (node > nnodes) {
1441
0
    errno = ERANGE;
1442
0
    return -1;
1443
0
  }
1444
173
  numa_bitmask_clearall(buffer);
1445
1446
173
  update = __atomic_fetch_and(&node_cpu_mask_v2_stale, 0, __ATOMIC_RELAXED);
1447
173
  if (node_cpu_mask_v2[node] && !update) {
1448
    /* have already constructed a mask for this node */
1449
172
    if (buffer->size < node_cpu_mask_v2[node]->size) {
1450
0
      errno = EINVAL;
1451
0
      numa_error("map size mismatch");
1452
0
      return -1;
1453
0
    }
1454
172
    copy_bitmask_to_bitmask(node_cpu_mask_v2[node], buffer);
1455
172
    return 0;
1456
172
  }
1457
1458
  /* need a new mask for this node */
1459
1
  mask = numa_allocate_cpumask();
1460
1461
  /* this is a kernel cpumask_t (see node_read_cpumap()) */
1462
1
  sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
1463
1
  f = fopen(fn, "r");
1464
1
  if (!f || getdelim(&line, &len, '\n', f) < 1) {
1465
0
    if (numa_bitmask_isbitset(numa_nodes_ptr, node)) {
1466
0
      numa_warn(W_nosysfs2,
1467
0
         "/sys not mounted or invalid. Assuming one node: %s",
1468
0
          strerror(errno));
1469
0
      numa_warn(W_nosysfs2,
1470
0
         "(cannot open or correctly parse %s)", fn);
1471
0
    }
1472
0
    numa_bitmask_setall(mask);
1473
0
    err = -1;
1474
0
  }
1475
1
  if (f)
1476
1
    fclose(f);
1477
1478
1
  if (line && (numa_parse_bitmap_v2(line, mask) < 0)) {
1479
0
    numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node");
1480
0
    numa_bitmask_setall(mask);
1481
0
    err = -1;
1482
0
  }
1483
1484
1
  free(line);
1485
1
  copy_bitmask_to_bitmask(mask, buffer);
1486
1487
  /* slightly racy, see above */
1488
  /* save the mask we created */
1489
1
  if (node_cpu_mask_v2[node]) {
1490
0
    if (update) {
1491
0
      copy_bitmask_to_bitmask(mask, node_cpu_mask_v2[node]);
1492
0
      numa_bitmask_free(mask);
1493
0
      mask = NULL;
1494
    /* how could this be? */
1495
0
    } else if (mask != buffer)
1496
0
      numa_bitmask_free(mask);
1497
1
  } else {
1498
    /* we don't want to cache faulty result */
1499
1
    if (!err)
1500
1
      node_cpu_mask_v2[node] = mask;
1501
0
    else
1502
0
      numa_bitmask_free(mask);
1503
1
  }
1504
1
  return err;
1505
173
}
1506
1507
make_internal_alias(numa_node_to_cpus_v1);
1508
make_internal_alias(numa_node_to_cpus_v2);
1509
1510
void numa_node_to_cpu_update(void)
1511
0
{
1512
0
  __atomic_store_n(&node_cpu_mask_v1_stale, 1, __ATOMIC_RELAXED);
1513
0
  __atomic_store_n(&node_cpu_mask_v2_stale, 1, __ATOMIC_RELAXED);
1514
0
}
1515
1516
/* report the node of the specified cpu */
1517
int numa_node_of_cpu(int cpu)
1518
0
{
1519
0
  struct bitmask *bmp;
1520
0
  int ncpus, nnodes, node, ret;
1521
1522
0
  ncpus = numa_num_possible_cpus();
1523
0
  if (cpu > ncpus){
1524
0
    errno = EINVAL;
1525
0
    return -1;
1526
0
  }
1527
0
  bmp = numa_bitmask_alloc(ncpus);
1528
0
  nnodes = numa_max_node();
1529
0
  for (node = 0; node <= nnodes; node++){
1530
0
    if (numa_node_to_cpus_v2_int(node, bmp) < 0) {
1531
      /* It's possible for the node to not exist */
1532
0
      continue;
1533
0
    }
1534
0
    if (numa_bitmask_isbitset(bmp, cpu)){
1535
0
      ret = node;
1536
0
      goto end;
1537
0
    }
1538
0
  }
1539
0
  ret = -1;
1540
0
  errno = EINVAL;
1541
0
end:
1542
0
  numa_bitmask_free(bmp);
1543
0
  return ret;
1544
0
}
1545
1546
SYMVER("numa_run_on_node_mask_v1", "numa_run_on_node_mask@libnuma_1.1")
1547
int
1548
numa_run_on_node_mask_v1(const nodemask_t *mask)
1549
0
{
1550
0
  int ncpus = numa_num_possible_cpus();
1551
0
  int i, k, err;
1552
0
  unsigned long cpus[CPU_LONGS(ncpus)], nodecpus[CPU_LONGS(ncpus)];
1553
0
  memset(cpus, 0, CPU_BYTES(ncpus));
1554
0
  for (i = 0; i < NUMA_NUM_NODES; i++) {
1555
0
    if (mask->n[i / BITS_PER_LONG] == 0)
1556
0
      continue;
1557
0
    if (nodemask_isset_compat(mask, i)) {
1558
0
      if (numa_node_to_cpus_v1_int(i, nodecpus, CPU_BYTES(ncpus)) < 0) {
1559
0
        numa_warn(W_noderunmask,
1560
0
            "Cannot read node cpumask from sysfs");
1561
0
        continue;
1562
0
      }
1563
0
      for (k = 0; k < CPU_LONGS(ncpus); k++)
1564
0
        cpus[k] |= nodecpus[k];
1565
0
    }
1566
0
  }
1567
0
  err = numa_sched_setaffinity_v1(0, CPU_BYTES(ncpus), cpus);
1568
1569
  /* The sched_setaffinity API is broken because it expects
1570
     the user to guess the kernel cpuset size. Do this in a
1571
     brute force way. */
1572
0
  if (err < 0 && errno == EINVAL) {
1573
0
    int savederrno = errno;
1574
0
    char *bigbuf;
1575
0
    static int size = -1;
1576
0
    if (size == -1)
1577
0
      size = CPU_BYTES(ncpus) * 2;
1578
0
    bigbuf = malloc(CPU_BUFFER_SIZE);
1579
0
    if (!bigbuf) {
1580
0
      errno = ENOMEM;
1581
0
      return -1;
1582
0
    }
1583
0
    errno = savederrno;
1584
0
    while (size <= CPU_BUFFER_SIZE) {
1585
0
      memcpy(bigbuf, cpus, CPU_BYTES(ncpus));
1586
0
      memset(bigbuf + CPU_BYTES(ncpus), 0,
1587
0
             CPU_BUFFER_SIZE - CPU_BYTES(ncpus));
1588
0
      err = numa_sched_setaffinity_v1_int(0, size, (unsigned long *)bigbuf);
1589
0
      if (err == 0 || errno != EINVAL)
1590
0
        break;
1591
0
      size *= 2;
1592
0
    }
1593
0
    savederrno = errno;
1594
0
    free(bigbuf);
1595
0
    errno = savederrno;
1596
0
  }
1597
0
  return err;
1598
0
}
1599
1600
/*
1601
 * Given a node mask (size of a kernel nodemask_t) (probably populated by
1602
 * a user argument list) set up a map of cpus (map "cpus") on those nodes.
1603
 * Then set affinity to those cpus.
1604
 */
1605
SYMVER("numa_run_on_node_mask_v2", "numa_run_on_node_mask@@libnuma_1.2")
1606
int
1607
numa_run_on_node_mask_v2(struct bitmask *bmp)
1608
0
{
1609
0
  int ncpus, i, k, err;
1610
0
  struct bitmask *cpus, *nodecpus;
1611
1612
0
  cpus = numa_allocate_cpumask();
1613
0
  ncpus = cpus->size;
1614
0
  nodecpus = numa_allocate_cpumask();
1615
1616
0
  for (i = 0; i < bmp->size; i++) {
1617
0
    if (bmp->maskp[i / BITS_PER_LONG] == 0)
1618
0
      continue;
1619
0
    if (numa_bitmask_isbitset(bmp, i)) {
1620
      /*
1621
       * numa_all_nodes_ptr is cpuset aware; use only
1622
       * these nodes
1623
       */
1624
0
      if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
1625
0
        numa_warn(W_noderunmask,
1626
0
          "node %d not allowed", i);
1627
0
        continue;
1628
0
      }
1629
0
      if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
1630
0
        numa_warn(W_noderunmask,
1631
0
          "Cannot read node cpumask from sysfs");
1632
0
        continue;
1633
0
      }
1634
0
      for (k = 0; k < CPU_LONGS(ncpus); k++)
1635
0
        cpus->maskp[k] |= nodecpus->maskp[k];
1636
0
    }
1637
0
  }
1638
0
  err = numa_sched_setaffinity_v2_int(0, cpus);
1639
1640
0
  numa_bitmask_free(cpus);
1641
0
  numa_bitmask_free(nodecpus);
1642
1643
  /* used to have to consider that this could fail - it shouldn't now */
1644
0
  if (err < 0) {
1645
0
    numa_error("numa_sched_setaffinity_v2_int() failed");
1646
0
  }
1647
1648
0
  return err;
1649
0
}
1650
1651
make_internal_alias(numa_run_on_node_mask_v2);
1652
1653
/*
1654
 * Given a node mask (size of a kernel nodemask_t) (probably populated by
1655
 * a user argument list) set up a map of cpus (map "cpus") on those nodes
1656
 * without any cpuset awareness. Then set affinity to those cpus.
1657
 */
1658
int
1659
numa_run_on_node_mask_all(struct bitmask *bmp)
1660
0
{
1661
0
  int ncpus, i, k, err;
1662
0
  struct bitmask *cpus, *nodecpus;
1663
1664
0
  cpus = numa_allocate_cpumask();
1665
0
  ncpus = cpus->size;
1666
0
  nodecpus = numa_allocate_cpumask();
1667
1668
0
  for (i = 0; i < bmp->size; i++) {
1669
0
    if (bmp->maskp[i / BITS_PER_LONG] == 0)
1670
0
      continue;
1671
0
    if (numa_bitmask_isbitset(bmp, i)) {
1672
0
      if (!numa_bitmask_isbitset(numa_possible_nodes_ptr, i)) {
1673
0
        numa_warn(W_noderunmask,
1674
0
          "node %d not allowed", i);
1675
0
        continue;
1676
0
      }
1677
0
      if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
1678
0
        numa_warn(W_noderunmask,
1679
0
          "Cannot read node cpumask from sysfs");
1680
0
        continue;
1681
0
      }
1682
0
      for (k = 0; k < CPU_LONGS(ncpus); k++)
1683
0
        cpus->maskp[k] |= nodecpus->maskp[k];
1684
0
    }
1685
0
  }
1686
0
  err = numa_sched_setaffinity_v2_int(0, cpus);
1687
1688
0
  numa_bitmask_free(cpus);
1689
0
  numa_bitmask_free(nodecpus);
1690
1691
  /* With possible nodes freedom it can happen easily now */
1692
0
  if (err < 0) {
1693
0
    numa_error("numa_sched_setaffinity_v2_int() failed");
1694
0
  }
1695
1696
0
  return err;
1697
0
}
1698
1699
SYMVER("numa_get_run_node_mask_v1", "numa_get_run_node_mask@libnuma_1.1")
1700
nodemask_t
1701
numa_get_run_node_mask_v1(void)
1702
0
{
1703
0
  int ncpus = numa_num_configured_cpus();
1704
0
  int i, k;
1705
0
  int max = numa_max_node_int();
1706
0
  struct bitmask *bmp, *cpus, *nodecpus;
1707
0
  nodemask_t nmp;
1708
1709
0
  cpus = numa_allocate_cpumask();
1710
0
  if (numa_sched_getaffinity_v2_int(0, cpus) < 0){
1711
0
    nmp = numa_no_nodes;
1712
0
    goto free_cpus;
1713
0
  }
1714
1715
0
  nodecpus = numa_allocate_cpumask();
1716
0
  bmp = allocate_nodemask_v1(); /* the size of a nodemask_t */
1717
0
  for (i = 0; i <= max; i++) {
1718
0
    if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
1719
      /* It's possible for the node to not exist */
1720
0
      continue;
1721
0
    }
1722
0
    for (k = 0; k < CPU_LONGS(ncpus); k++) {
1723
0
      if (nodecpus->maskp[k] & cpus->maskp[k])
1724
0
        numa_bitmask_setbit(bmp, i);
1725
0
    }
1726
0
  }
1727
0
  copy_bitmask_to_nodemask(bmp, &nmp);
1728
0
  numa_bitmask_free(bmp);
1729
0
  numa_bitmask_free(nodecpus);
1730
0
free_cpus:
1731
0
  numa_bitmask_free(cpus);
1732
0
  return nmp;
1733
0
}
1734
1735
SYMVER("numa_get_run_node_mask_v2", "numa_get_run_node_mask@@libnuma_1.2")
1736
struct bitmask *
1737
numa_get_run_node_mask_v2(void)
1738
0
{
1739
0
  int i, k;
1740
0
  int ncpus = numa_num_configured_cpus();
1741
0
  int max = numa_max_node_int();
1742
0
  struct bitmask *bmp, *cpus, *nodecpus;
1743
1744
0
  bmp = numa_allocate_cpumask();
1745
0
  cpus = numa_allocate_cpumask();
1746
0
  if (numa_sched_getaffinity_v2_int(0, cpus) < 0){
1747
0
    copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
1748
0
    goto free_cpus;
1749
0
  }
1750
1751
0
  nodecpus = numa_allocate_cpumask();
1752
0
  for (i = 0; i <= max; i++) {
1753
    /*
1754
     * numa_all_nodes_ptr is cpuset aware; show only
1755
     * these nodes
1756
     */
1757
0
    if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
1758
0
      continue;
1759
0
    }
1760
0
    if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
1761
      /* It's possible for the node to not exist */
1762
0
      continue;
1763
0
    }
1764
0
    for (k = 0; k < CPU_LONGS(ncpus); k++) {
1765
0
      if (nodecpus->maskp[k] & cpus->maskp[k])
1766
0
        numa_bitmask_setbit(bmp, i);
1767
0
    }
1768
0
  }
1769
0
  numa_bitmask_free(nodecpus);
1770
0
free_cpus:
1771
0
  numa_bitmask_free(cpus);
1772
0
  return bmp;
1773
0
}
1774
1775
int
1776
numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bitmask *tonodes)
1777
0
{
1778
0
  int numa_num_nodes = numa_num_possible_nodes();
1779
1780
0
  return migrate_pages(pid, numa_num_nodes + 1, fromnodes->maskp,
1781
0
              tonodes->maskp);
1782
0
}
1783
1784
int numa_move_pages(int pid, unsigned long count,
1785
  void **pages, const int *nodes, int *status, int flags)
1786
0
{
1787
0
  return move_pages(pid, count, pages, nodes, status, flags);
1788
0
}
1789
1790
int numa_run_on_node(int node)
1791
0
{
1792
0
  int numa_num_nodes = numa_num_possible_nodes();
1793
0
  int ret = -1;
1794
0
  struct bitmask *cpus;
1795
1796
0
  if (node >= numa_num_nodes){
1797
0
    errno = EINVAL;
1798
0
    goto out;
1799
0
  }
1800
1801
0
  cpus = numa_allocate_cpumask();
1802
1803
0
  if (node == -1)
1804
0
    numa_bitmask_setall(cpus);
1805
0
  else if (numa_node_to_cpus_v2_int(node, cpus) < 0){
1806
0
    numa_warn(W_noderunmask, "Cannot read node cpumask from sysfs");
1807
0
    goto free;
1808
0
  }
1809
1810
0
  ret = numa_sched_setaffinity_v2_int(0, cpus);
1811
0
free:
1812
0
  numa_bitmask_free(cpus);
1813
0
out:
1814
0
  return ret;
1815
0
}
1816
1817
static struct bitmask *__numa_preferred(void)
1818
0
{
1819
0
  int policy;
1820
0
  struct bitmask *bmp;
1821
1822
0
  bmp = numa_allocate_nodemask();
1823
  /* could read the current CPU from /proc/self/status. Probably
1824
     not worth it. */
1825
0
  numa_bitmask_clearall(bmp);
1826
0
  getpol(&policy, bmp);
1827
1828
0
  if (policy != MPOL_PREFERRED &&
1829
0
      policy != MPOL_PREFERRED_MANY &&
1830
0
      policy != MPOL_BIND)
1831
0
    return bmp;
1832
1833
0
  if (numa_bitmask_weight(bmp) > 1)
1834
0
    numa_error(__FILE__);
1835
1836
0
  return bmp;
1837
0
}
1838
1839
int numa_preferred(void)
1840
0
{
1841
0
  int first_node = 0;
1842
0
  struct bitmask *bmp;
1843
1844
0
  bmp = __numa_preferred();
1845
0
  first_node = numa_find_first(bmp);
1846
0
  numa_bitmask_free(bmp);
1847
  
1848
0
  return first_node;
1849
0
}
1850
1851
static void __numa_set_preferred(struct bitmask *bmp)
1852
0
{
1853
0
  int nodes = numa_bitmask_weight(bmp);
1854
0
  if (nodes > 1)
1855
0
    numa_error(__FILE__);
1856
0
  setpol(nodes ? MPOL_PREFERRED : MPOL_LOCAL, bmp);
1857
0
}
1858
1859
void numa_set_preferred(int node)
1860
0
{
1861
0
  struct bitmask *bmp = numa_allocate_nodemask();
1862
0
  numa_bitmask_setbit(bmp, node);
1863
0
  __numa_set_preferred(bmp);
1864
0
  numa_bitmask_free(bmp);
1865
0
}
1866
1867
int numa_has_preferred_many(void)
1868
0
{
1869
0
  return has_preferred_many;
1870
0
}
1871
1872
void numa_set_preferred_many(struct bitmask *bitmask)
1873
0
{
1874
0
  int first_node = 0;
1875
1876
0
  if (!has_preferred_many) {
1877
0
    numa_warn(W_nodeparse,
1878
0
      "Unable to handle MANY preferred nodes. Falling back to first node\n");
1879
0
    first_node = numa_find_first(bitmask);
1880
0
    numa_set_preferred(first_node);
1881
0
    return;
1882
0
  }
1883
0
  setpol(MPOL_PREFERRED_MANY, bitmask);
1884
0
}
1885
1886
struct bitmask *numa_preferred_many()
1887
0
{
1888
0
  return __numa_preferred();
1889
0
}
1890
1891
void numa_set_localalloc(void)
1892
0
{
1893
0
  setpol(MPOL_LOCAL, numa_no_nodes_ptr);
1894
0
}
1895
1896
SYMVER("numa_bind_v1", "numa_bind@libnuma_1.1")
1897
void numa_bind_v1(const nodemask_t *nodemask)
1898
0
{
1899
0
  struct bitmask bitmask;
1900
1901
0
  bitmask.maskp = (unsigned long *)nodemask;
1902
0
  bitmask.size  = sizeof(nodemask_t);
1903
0
  numa_run_on_node_mask_v2_int(&bitmask);
1904
0
  numa_set_membind_v2_int(&bitmask);
1905
0
}
1906
1907
SYMVER("numa_bind_v2", "numa_bind@@libnuma_1.2")
1908
void numa_bind_v2(struct bitmask *bmp)
1909
0
{
1910
0
  numa_run_on_node_mask_v2_int(bmp);
1911
0
  numa_set_membind_v2_int(bmp);
1912
0
}
1913
1914
void numa_set_strict(int flag)
1915
0
{
1916
0
  if (flag)
1917
0
    mbind_flags |= MPOL_MF_STRICT;
1918
0
  else
1919
0
    mbind_flags &= ~MPOL_MF_STRICT;
1920
0
}
1921
1922
/*
1923
 * Extract a node or processor number from the given string.
1924
 * Allow a relative node / processor specification within the allowed
1925
 * set if "relative" is nonzero
1926
 */
1927
static unsigned long get_nr(const char *s, char **end, struct bitmask *bmp, int relative)
1928
8.76k
{
1929
8.76k
  long i, nr;
1930
1931
8.76k
  if (!relative)
1932
5.47k
    return strtoul(s, end, 0);
1933
1934
3.28k
  nr = strtoul(s, end, 0);
1935
3.28k
  if (s == *end)
1936
5
    return nr;
1937
  /* Find the nth set bit */
1938
101k
  for (i = 0; nr >= 0 && i <= bmp->size; i++)
1939
98.4k
    if (numa_bitmask_isbitset(bmp, i))
1940
4.77k
      nr--;
1941
3.27k
  return i-1;
1942
3.28k
}
1943
1944
/*
1945
 * __numa_parse_nodestring() is called to create a node mask, given
1946
 * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10.
1947
 * (the + indicates that the numbers are nodeset-relative)
1948
 *
1949
 * The nodes may be specified as absolute, or relative to the current nodeset.
1950
 * The list of available nodes is in a map pointed to by "allowed_nodes_ptr",
1951
 * which may represent all nodes or the nodes in the current nodeset.
1952
 *
1953
 * The caller must free the returned bitmask.
1954
 */
1955
static struct bitmask *
1956
__numa_parse_nodestring(const char *s, struct bitmask *allowed_nodes_ptr)
1957
683
{
1958
683
  int invert = 0, relative = 0;
1959
683
  int conf_nodes = numa_num_configured_nodes();
1960
683
  char *end;
1961
683
  struct bitmask *mask;
1962
1963
683
  mask = numa_allocate_nodemask();
1964
1965
683
  if (s[0] == 0){
1966
1
    copy_bitmask_to_bitmask(numa_no_nodes_ptr, mask);
1967
1
    return mask; /* return freeable mask */
1968
1
  }
1969
682
  if (*s == '!') {
1970
3
    invert = 1;
1971
3
    s++;
1972
3
  }
1973
682
  if (*s == '+') {
1974
276
    relative++;
1975
276
    s++;
1976
276
  }
1977
2.49k
  do {
1978
2.49k
    unsigned long arg;
1979
2.49k
    int i;
1980
2.49k
    if (isalpha(*s)) {
1981
142
      int n;
1982
142
      if (!strcmp(s,"all")) {
1983
2
        copy_bitmask_to_bitmask(allowed_nodes_ptr,
1984
2
              mask);
1985
2
        s+=4;
1986
2
        break;
1987
2
      }
1988
140
      n = resolve_affinity(s, mask);
1989
140
      if (n != NO_IO_AFFINITY) {
1990
60
        if (n < 0)
1991
60
          goto err;
1992
0
        s += strlen(s) + 1;
1993
0
        break;
1994
60
      }
1995
140
    }
1996
2.43k
    arg = get_nr(s, &end, allowed_nodes_ptr, relative);
1997
2.43k
    if (end == s) {
1998
115
      numa_warn(W_nodeparse, "unparseable node description `%s'\n", s);
1999
115
      goto err;
2000
115
    }
2001
2.32k
    if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg)) {
2002
229
      numa_warn(W_nodeparse, "node argument %d is out of range\n", arg);
2003
229
      goto err;
2004
229
    }
2005
2.09k
    i = arg;
2006
2.09k
    numa_bitmask_setbit(mask, i);
2007
2.09k
    s = end;
2008
2.09k
    if (*s == '-') {
2009
1.13k
      char *end2;
2010
1.13k
      unsigned long arg2;
2011
1.13k
      arg2 = get_nr(++s, &end2, allowed_nodes_ptr, relative);
2012
1.13k
      if (end2 == s) {
2013
9
        numa_warn(W_nodeparse, "missing node argument %s\n", s);
2014
9
        goto err;
2015
9
      }
2016
1.12k
      if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg2)) {
2017
159
        numa_warn(W_nodeparse, "node argument %d out of range\n", arg2);
2018
159
        goto err;
2019
159
      }
2020
1.67k
      while (arg <= arg2) {
2021
714
        i = arg;
2022
714
        if (numa_bitmask_isbitset(allowed_nodes_ptr,i))
2023
714
          numa_bitmask_setbit(mask, i);
2024
714
        arg++;
2025
714
      }
2026
962
      s = end2;
2027
962
    }
2028
2.09k
  } while (*s++ == ',');
2029
110
  if (s[-1] != '\0')
2030
13
    goto err;
2031
97
  if (invert) {
2032
2
    int i;
2033
4
    for (i = 0; i < conf_nodes; i++) {
2034
2
      if (numa_bitmask_isbitset(mask, i))
2035
2
        numa_bitmask_clearbit(mask, i);
2036
0
      else
2037
0
        numa_bitmask_setbit(mask, i);
2038
2
    }
2039
2
  }
2040
97
  return mask;
2041
2042
585
err:
2043
585
  numa_bitmask_free(mask);
2044
585
  return NULL;
2045
110
}
2046
2047
/*
2048
 * numa_parse_nodestring() is called to create a bitmask from nodes available
2049
 * for this task.
2050
 */
2051
2052
struct bitmask * numa_parse_nodestring(const char *s)
2053
683
{
2054
683
  return __numa_parse_nodestring(s, numa_all_nodes_ptr);
2055
683
}
2056
2057
/*
2058
 * numa_parse_nodestring_all() is called to create a bitmask from all nodes
2059
 * available.
2060
 */
2061
2062
struct bitmask * numa_parse_nodestring_all(const char *s)
2063
0
{
2064
0
  return __numa_parse_nodestring(s, numa_possible_nodes_ptr);
2065
0
}
2066
2067
/*
2068
 * __numa_parse_cpustring() is called to create a bitmask, given
2069
 * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10.
2070
 * (the + indicates that the numbers are cpuset-relative)
2071
 *
2072
 * The cpus may be specified as absolute, or relative to the current cpuset.
2073
 * The list of available cpus for this task is in the map pointed to by
2074
 * "allowed_cpus_ptr", which may represent all cpus or the cpus in the
2075
 * current cpuset.
2076
 *
2077
 * The caller must free the returned bitmask.
2078
 */
2079
static struct bitmask *
2080
__numa_parse_cpustring(const char *s, struct bitmask *allowed_cpus_ptr)
2081
683
{
2082
683
  int invert = 0, relative=0;
2083
683
  int conf_cpus = numa_num_configured_cpus();
2084
683
  char *end;
2085
683
  struct bitmask *mask;
2086
683
  int i;
2087
2088
683
  mask = numa_allocate_cpumask();
2089
2090
683
  if (s[0] == 0)
2091
1
    return mask;
2092
682
  if (*s == '!') {
2093
3
    invert = 1;
2094
3
    s++;
2095
3
  }
2096
682
  if (*s == '+') {
2097
276
    relative++;
2098
276
    s++;
2099
276
  }
2100
3.46k
  do {
2101
3.46k
    unsigned long arg;
2102
2103
3.46k
    if (!strcmp(s,"all")) {
2104
2
      copy_bitmask_to_bitmask(allowed_cpus_ptr, mask);
2105
2
      s+=4;
2106
2
      break;
2107
2
    }
2108
3.46k
    arg = get_nr(s, &end, allowed_cpus_ptr, relative);
2109
3.46k
    if (end == s) {
2110
178
      numa_warn(W_cpuparse, "unparseable cpu description `%s'\n", s);
2111
178
      goto err;
2112
178
    }
2113
3.28k
    if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg)) {
2114
151
      numa_warn(W_cpuparse, "cpu argument %s is out of range\n", s);
2115
151
      goto err;
2116
151
    }
2117
3.13k
    i = arg;
2118
3.13k
    numa_bitmask_setbit(mask, i);
2119
3.13k
    s = end;
2120
3.13k
    if (*s == '-') {
2121
1.73k
      char *end2;
2122
1.73k
      unsigned long arg2;
2123
1.73k
      arg2 = get_nr(++s, &end2, allowed_cpus_ptr, relative);
2124
1.73k
      if (end2 == s) {
2125
12
        numa_warn(W_cpuparse, "missing cpu argument %s\n", s);
2126
12
        goto err;
2127
12
      }
2128
1.72k
      if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg2)) {
2129
150
        numa_warn(W_cpuparse, "cpu argument %s out of range\n", s);
2130
150
        goto err;
2131
150
      }
2132
3.09k
      while (arg <= arg2) {
2133
1.52k
        i = arg;
2134
1.52k
        if (numa_bitmask_isbitset(allowed_cpus_ptr, i))
2135
1.52k
          numa_bitmask_setbit(mask, i);
2136
1.52k
        arg++;
2137
1.52k
      }
2138
1.57k
      s = end2;
2139
1.57k
    }
2140
3.13k
  } while (*s++ == ',');
2141
191
  if (s[-1] != '\0')
2142
19
    goto err;
2143
172
  if (invert) {
2144
66
    for (i = 0; i < conf_cpus; i++) {
2145
64
      if (numa_bitmask_isbitset(mask, i))
2146
33
        numa_bitmask_clearbit(mask, i);
2147
31
      else
2148
31
        numa_bitmask_setbit(mask, i);
2149
64
    }
2150
2
  }
2151
172
  return mask;
2152
2153
510
err:
2154
510
  numa_bitmask_free(mask);
2155
510
  return NULL;
2156
191
}
2157
2158
/*
2159
 * numa_parse_cpustring() is called to create a bitmask from cpus available
2160
 * for this task.
2161
 */
2162
2163
struct bitmask * numa_parse_cpustring(const char *s)
2164
683
{
2165
683
  return __numa_parse_cpustring(s, numa_all_cpus_ptr);
2166
683
}
2167
2168
/*
2169
 * numa_parse_cpustring_all() is called to create a bitmask from all cpus
2170
 * available.
2171
 */
2172
2173
struct bitmask * numa_parse_cpustring_all(const char *s)
2174
0
{
2175
0
  return __numa_parse_cpustring(s, numa_possible_cpus_ptr);
2176
0
}