Coverage Report

Created: 2025-02-14 06:29

/src/numactl/libnuma.c
Line
Count
Source (jump to first uncovered line)
1
/* Simple NUMA library.
2
   Copyright (C) 2003,2004,2005,2008 Andi Kleen,SuSE Labs and
3
   Cliff Wickman,SGI.
4
5
   libnuma is free software; you can redistribute it and/or
6
   modify it under the terms of the GNU Lesser General Public
7
   License as published by the Free Software Foundation; version
8
   2.1.
9
10
   libnuma is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
   Lesser General Public License for more details.
14
15
   You should find a copy of v2.1 of the GNU Lesser General Public License
16
   somewhere on your Linux system; if not, write to the Free Software
17
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18
19
   All calls are undefined when numa_available returns an error. */
20
#define _GNU_SOURCE 1
21
#include <stdlib.h>
22
#include <stdio.h>
23
#include <unistd.h>
24
#include <string.h>
25
#include <sched.h>
26
#include <dirent.h>
27
#include <errno.h>
28
#include <stdarg.h>
29
#include <ctype.h>
30
#include <assert.h>
31
32
#include <sys/mman.h>
33
#include <limits.h>
34
35
#include "config.h"
36
#include "numa.h"
37
#include "numaif.h"
38
#include "numaint.h"
39
#include "util.h"
40
#include "affinity.h"
41
42
#define WEAK __attribute__((weak))
43
44
0
#define CPU_BUFFER_SIZE 4096     /* This limits you to 32768 CPUs */
45
46
/* these are the old (version 1) masks */
47
nodemask_t numa_no_nodes;
48
nodemask_t numa_all_nodes;
49
/* these are now the default bitmask (pointers to) (version 2) */
50
struct bitmask *numa_no_nodes_ptr = NULL;
51
struct bitmask *numa_all_nodes_ptr = NULL;
52
struct bitmask *numa_possible_nodes_ptr = NULL;
53
struct bitmask *numa_all_cpus_ptr = NULL;
54
struct bitmask *numa_possible_cpus_ptr = NULL;
55
/* I would prefer to use symbol versioning to create v1 and v2 versions
56
   of numa_no_nodes and numa_all_nodes, but the loader does not correctly
57
   handle versioning of BSS versus small data items */
58
59
struct bitmask *numa_nodes_ptr = NULL;
60
static struct bitmask *numa_memnode_ptr = NULL;
61
static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES];
62
static char node_cpu_mask_v1_stale = 1;
63
static struct bitmask **node_cpu_mask_v2;
64
static char node_cpu_mask_v2_stale = 1;
65
66
WEAK void numa_error(char *where);
67
68
#ifndef TLS
69
#warning "not threadsafe"
70
#define __thread
71
#endif
72
73
static __thread int bind_policy = MPOL_BIND;
74
static __thread unsigned int mbind_flags = 0;
75
static int sizes_set=0;
76
static int maxconfigurednode = -1;
77
static int maxconfiguredcpu = -1;
78
static int numprocnode = -1;
79
static int numproccpu = -1;
80
static int nodemask_sz = 0;
81
static int cpumask_sz = 0;
82
83
static int has_preferred_many = -1;
84
85
int numa_exit_on_error = 0;
86
int numa_exit_on_warn = 0;
87
static void set_sizes(void);
88
89
/*
90
 * There are two special functions, _init(void) and _fini(void), which
91
 * are called automatically by the dynamic loader whenever a library is loaded.
92
 *
93
 * The v1 library depends upon nodemask_t's of all nodes and no nodes.
94
 */
95
void __attribute__((constructor))
96
numa_init(void)
97
2
{
98
2
  int max,i;
99
100
2
  if (sizes_set)
101
0
    return;
102
103
2
  set_sizes();
104
  /* numa_all_nodes should represent existing nodes on this system */
105
2
        max = numa_num_configured_nodes();
106
4
        for (i = 0; i < max; i++)
107
2
                nodemask_set_compat((nodemask_t *)&numa_all_nodes, i);
108
2
  memset(&numa_no_nodes, 0, sizeof(numa_no_nodes));
109
110
  /* clear errno */
111
2
  errno = 0;
112
2
}
113
114
static void cleanup_node_cpu_mask_v2(void);
115
116
0
#define FREE_AND_ZERO(x) if (x) { \
117
0
    numa_bitmask_free(x); \
118
0
    x = NULL;   \
119
0
  }
120
121
void __attribute__((destructor))
122
numa_fini(void)
123
0
{
124
0
  FREE_AND_ZERO(numa_all_cpus_ptr);
125
0
  FREE_AND_ZERO(numa_possible_cpus_ptr);
126
0
  FREE_AND_ZERO(numa_all_nodes_ptr);
127
0
  FREE_AND_ZERO(numa_possible_nodes_ptr);
128
0
  FREE_AND_ZERO(numa_no_nodes_ptr);
129
0
  FREE_AND_ZERO(numa_memnode_ptr);
130
0
  FREE_AND_ZERO(numa_nodes_ptr);
131
0
  cleanup_node_cpu_mask_v2();
132
0
}
133
134
static int numa_find_first(struct bitmask *mask)
135
0
{
136
0
  int i;
137
0
  for (i = 0; i < mask->size; i++)
138
0
    if (numa_bitmask_isbitset(mask, i))
139
0
      return i;
140
0
  return -1;
141
0
}
142
143
/*
144
 * The following bitmask declarations, bitmask_*() routines, and associated
145
 * _setbit() and _getbit() routines are:
146
 * Copyright (c) 2004_2007 Silicon Graphics, Inc. (SGI) All rights reserved.
147
 * SGI publishes it under the terms of the Library GNU General Public License,
148
 * v2, as published by the Free Software Foundation.
149
 */
150
static unsigned int
151
_getbit(const struct bitmask *bmp, unsigned int n)
152
113k
{
153
113k
  if (n < bmp->size)
154
113k
    return (bmp->maskp[n/bitsperlong] >> (n % bitsperlong)) & 1;
155
705
  else
156
705
    return 0;
157
113k
}
158
159
static void
160
_setbit(struct bitmask *bmp, unsigned int n, unsigned int v)
161
21.0k
{
162
21.0k
  if (n < bmp->size) {
163
21.0k
    if (v)
164
9.42k
      bmp->maskp[n/bitsperlong] |= 1UL << (n % bitsperlong);
165
11.5k
    else
166
11.5k
      bmp->maskp[n/bitsperlong] &= ~(1UL << (n % bitsperlong));
167
21.0k
  }
168
21.0k
}
169
170
int
171
numa_bitmask_isbitset(const struct bitmask *bmp, unsigned int i)
172
111k
{
173
111k
  return _getbit(bmp, i);
174
111k
}
175
176
struct bitmask *
177
numa_bitmask_setall(struct bitmask *bmp)
178
0
{
179
0
  unsigned int i;
180
0
  for (i = 0; i < bmp->size; i++)
181
0
    _setbit(bmp, i, 1);
182
0
  return bmp;
183
0
}
184
185
struct bitmask *
186
numa_bitmask_clearall(struct bitmask *bmp)
187
181
{
188
181
  unsigned int i;
189
11.7k
  for (i = 0; i < bmp->size; i++)
190
11.5k
    _setbit(bmp, i, 0);
191
181
  return bmp;
192
181
}
193
194
struct bitmask *
195
numa_bitmask_setbit(struct bitmask *bmp, unsigned int i)
196
9.42k
{
197
9.42k
  _setbit(bmp, i, 1);
198
9.42k
  return bmp;
199
9.42k
}
200
201
struct bitmask *
202
numa_bitmask_clearbit(struct bitmask *bmp, unsigned int i)
203
4
{
204
4
  _setbit(bmp, i, 0);
205
4
  return bmp;
206
4
}
207
208
unsigned int
209
numa_bitmask_nbytes(struct bitmask *bmp)
210
2
{
211
2
  return longsperbits(bmp->size) * sizeof(unsigned long);
212
2
}
213
214
/* where n is the number of bits in the map */
215
/* This function should not exit on failure, but right now we cannot really
216
   recover from this. */
217
struct bitmask *
218
numa_bitmask_alloc(unsigned int n)
219
1.36k
{
220
1.36k
  struct bitmask *bmp;
221
222
1.36k
  if (n < 1) {
223
0
    errno = EINVAL;
224
0
    numa_error("request to allocate mask for invalid number");
225
0
    return NULL;
226
0
  }
227
1.36k
  bmp = malloc(sizeof(*bmp));
228
1.36k
  if (bmp == 0)
229
0
    goto oom;
230
1.36k
  bmp->size = n;
231
1.36k
  bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long));
232
1.36k
  if (bmp->maskp == 0) {
233
0
    free(bmp);
234
0
    goto oom;
235
0
  }
236
1.36k
  return bmp;
237
238
0
oom:
239
0
  numa_error("Out of memory allocating bitmask");
240
0
  exit(1);
241
1.36k
}
242
243
void
244
numa_bitmask_free(struct bitmask *bmp)
245
1.35k
{
246
1.35k
  if (bmp == 0)
247
0
    return;
248
1.35k
  free(bmp->maskp);
249
1.35k
  bmp->maskp = (unsigned long *)0xdeadcdef;  /* double free tripwire */
250
1.35k
  free(bmp);
251
1.35k
  return;
252
1.35k
}
253
254
/* True if two bitmasks are equal */
255
int
256
numa_bitmask_equal(const struct bitmask *bmp1, const struct bitmask *bmp2)
257
0
{
258
0
  unsigned int i;
259
0
  for (i = 0; i < bmp1->size || i < bmp2->size; i++)
260
0
    if (_getbit(bmp1, i) != _getbit(bmp2, i))
261
0
      return 0;
262
0
  return 1;
263
0
}
264
265
/* Hamming Weight: number of set bits */
266
unsigned int numa_bitmask_weight(const struct bitmask *bmp)
267
4
{
268
4
  unsigned int i;
269
4
  unsigned int w = 0;
270
2.18k
  for (i = 0; i < bmp->size; i++)
271
2.17k
    if (_getbit(bmp, i))
272
66
      w++;
273
4
  return w;
274
4
}
275
276
/* *****end of bitmask_  routines ************ */
277
278
/* Next two can be overwritten by the application for different error handling */
279
WEAK void numa_error(char *where)
280
0
{
281
0
  int olde = errno;
282
0
  perror(where);
283
0
  if (numa_exit_on_error)
284
0
    exit(1);
285
0
  errno = olde;
286
0
}
287
288
WEAK void numa_warn(int num, char *fmt, ...)
289
1.04k
{
290
1.04k
  static unsigned warned;
291
1.04k
  va_list ap;
292
1.04k
  int olde = errno;
293
294
  /* Give each warning only once */
295
1.04k
  if ((1<<num) & warned)
296
1.03k
    return;
297
9
  warned |= (1<<num);
298
299
9
  va_start(ap,fmt);
300
9
  fprintf(stderr, "libnuma: Warning: ");
301
9
  vfprintf(stderr, fmt, ap);
302
9
  fputc('\n', stderr);
303
9
  va_end(ap);
304
305
9
  if (numa_exit_on_warn)
306
0
    exit(1);
307
308
9
  errno = olde;
309
9
}
310
311
static void setpol(int policy, struct bitmask *bmp)
312
0
{
313
0
  if (set_mempolicy(policy, bmp->maskp, bmp->size + 1) < 0)
314
0
    numa_error("set_mempolicy");
315
0
}
316
317
static void getpol(int *oldpolicy, struct bitmask *bmp)
318
0
{
319
0
  if (get_mempolicy(oldpolicy, bmp->maskp, bmp->size + 1, 0, 0) < 0)
320
0
    numa_error("get_mempolicy");
321
0
}
322
323
static void dombind(void *mem, size_t size, int pol, struct bitmask *bmp)
324
0
{
325
0
  if (mbind(mem, size, pol, bmp ? bmp->maskp : NULL, bmp ? bmp->size + 1 : 0,
326
0
      mbind_flags) < 0)
327
0
    numa_error("mbind");
328
0
}
329
330
/* (undocumented) */
331
/* gives the wrong answer for hugetlbfs mappings. */
332
int numa_pagesize(void)
333
0
{
334
0
  static int pagesize;
335
0
  if (pagesize > 0)
336
0
    return pagesize;
337
0
  pagesize = getpagesize();
338
0
  return pagesize;
339
0
}
340
341
make_internal_alias(numa_pagesize);
342
343
/*
344
 * Find nodes (numa_nodes_ptr), nodes with memory (numa_memnode_ptr)
345
 * and the highest numbered existing node (maxconfigurednode).
346
 */
347
static void
348
set_configured_nodes(void)
349
2
{
350
2
  DIR *d;
351
2
  struct dirent *de;
352
353
2
  numa_memnode_ptr = numa_allocate_nodemask();
354
2
  numa_nodes_ptr = numa_allocate_nodemask();
355
2
  if (!numa_memnode_ptr || !numa_nodes_ptr)
356
0
    return;
357
358
2
  d = opendir("/sys/devices/system/node");
359
2
  if (!d) {
360
0
    maxconfigurednode = 0;
361
2
  } else {
362
24
    while ((de = readdir(d)) != NULL) {
363
22
      int nd;
364
22
      if (strncmp(de->d_name, "node", 4))
365
20
        continue;
366
2
      nd = strtoul(de->d_name+4, NULL, 0);
367
2
      numa_bitmask_setbit(numa_nodes_ptr, nd);
368
2
      numa_bitmask_setbit(numa_memnode_ptr, nd);
369
2
      if (maxconfigurednode < nd)
370
2
        maxconfigurednode = nd;
371
2
    }
372
2
    closedir(d);
373
2
  }
374
2
}
375
376
static inline int is_digit(char s)
377
574
{
378
574
  return (s >= '0' && s <= '9')
379
574
    || (s >= 'a' && s <= 'f')
380
574
    || (s >= 'A' && s <= 'F');
381
574
}
382
383
/* Is string 'pre' a prefix of string 's'? */
384
static int strprefix(const char *s, const char *pre)
385
112
{
386
112
  return strncmp(s, pre, strlen(pre)) == 0;
387
112
}
388
389
static const char *mask_size_file = "/proc/self/status";
390
static const char *nodemask_prefix = "Mems_allowed:\t";
391
/*
392
 * (do this the way Paul Jackson's libcpuset does it)
393
 * The nodemask values in /proc/self/status are in an
394
 * ascii format that uses 9 characters for each 32 bits of mask.
395
 * (this could also be used to find the cpumask size)
396
 */
397
static void
398
set_nodemask_size(void)
399
2
{
400
2
  FILE *fp;
401
2
  char *buf = NULL;
402
2
  char *tmp_buf = NULL;
403
2
  int digit_len = 0;
404
2
  size_t bufsize = 0;
405
406
2
  if ((fp = fopen(mask_size_file, "r")) == NULL)
407
0
    goto done;
408
409
114
  while (getline(&buf, &bufsize, fp) > 0) {
410
112
    if (strprefix(buf, nodemask_prefix)) {
411
2
      tmp_buf = buf;
412
2
      tmp_buf += strlen(nodemask_prefix);
413
576
      while (*tmp_buf != '\n' && *tmp_buf != '\0') {
414
574
        if (is_digit(*tmp_buf))
415
512
          digit_len++;
416
574
        tmp_buf++;
417
574
      }
418
2
      nodemask_sz = digit_len * 4;
419
2
    }
420
112
  }
421
2
  free(buf);
422
2
  fclose(fp);
423
2
done:
424
2
  if (nodemask_sz == 0) {/* fall back on error */
425
0
    int pol;
426
0
    unsigned long *mask = NULL;
427
0
    nodemask_sz = 16;
428
0
    do {
429
0
      nodemask_sz <<= 1;
430
0
      mask = realloc(mask, nodemask_sz / 8 + sizeof(unsigned long));
431
0
      if (!mask)
432
0
        return;
433
0
    } while (get_mempolicy(&pol, mask, nodemask_sz + 1, 0, 0) < 0 && errno == EINVAL &&
434
0
        nodemask_sz < 4096*8);
435
0
    free(mask);
436
0
  }
437
2
}
438
439
/*
440
 * Read a mask consisting of a sequence of hexadecimal longs separated by
441
 * commas. Order them correctly and return the number of bits set.
442
 */
443
static int
444
read_mask(char *s, struct bitmask *bmp)
445
4
{
446
4
  char *end = s;
447
4
  int tmplen = (bmp->size + bitsperint - 1) / bitsperint;
448
4
  unsigned int tmp[tmplen];
449
4
  unsigned int *start = tmp;
450
4
  unsigned int i, n = 0, m = 0;
451
452
4
  if (!s)
453
0
    return 0; /* shouldn't happen */
454
455
4
  i = strtoul(s, &end, 16);
456
457
  /* Skip leading zeros */
458
66
  while (!i && *end++ == ',') {
459
62
    i = strtoul(end, &end, 16);
460
62
  }
461
462
4
  if (!i)
463
    /* End of string. No mask */
464
0
    return -1;
465
466
4
  start[n++] = i;
467
  /* Read sequence of ints */
468
4
  while (*end++ == ',') {
469
0
    i = strtoul(end, &end, 16);
470
0
    start[n++] = i;
471
472
    /* buffer overflow */
473
0
    if (n > tmplen)
474
0
      return -1;
475
0
  }
476
477
  /*
478
   * Invert sequence of ints if necessary since the first int
479
   * is the highest and we put it first because we read it first.
480
   */
481
8
  while (n) {
482
4
    int w;
483
4
    unsigned long x = 0;
484
    /* read into long values in an endian-safe way */
485
8
    for (w = 0; n && w < bitsperlong; w += bitsperint)
486
4
      x |= ((unsigned long)start[n-- - 1] << w);
487
488
4
    bmp->maskp[m++] = x;
489
4
  }
490
  /*
491
   * Return the number of bits set
492
   */
493
4
  return numa_bitmask_weight(bmp);
494
4
}
495
496
/*
497
 * Read a processes constraints in terms of nodes and cpus from
498
 * /proc/self/status.
499
 */
500
static void
501
set_task_constraints(void)
502
2
{
503
2
  int hicpu = maxconfiguredcpu;
504
2
  int i;
505
2
  char *buffer = NULL;
506
2
  size_t buflen = 0;
507
2
  FILE *f;
508
509
2
  numa_all_cpus_ptr = numa_allocate_cpumask();
510
2
  numa_possible_cpus_ptr = numa_allocate_cpumask();
511
2
  numa_all_nodes_ptr = numa_allocate_nodemask();
512
2
  numa_possible_nodes_ptr = numa_allocate_cpumask();
513
2
  numa_no_nodes_ptr = numa_allocate_nodemask();
514
515
  // partial leak shouldn't happen because its transient
516
2
  if (!numa_all_cpus_ptr || !numa_possible_cpus_ptr ||
517
2
    !numa_all_nodes_ptr ||
518
2
    !numa_possible_nodes_ptr ||
519
2
    !numa_no_nodes_ptr)
520
0
    return;
521
522
2
  f = fopen(mask_size_file, "r");
523
2
  if (!f) {
524
    //numa_warn(W_cpumap, "Cannot parse %s", mask_size_file);
525
0
    return;
526
0
  }
527
528
114
  while (getline(&buffer, &buflen, f) > 0) {
529
    /* mask starts after [last] tab */
530
112
    char  *mask = strrchr(buffer,'\t') + 1;
531
532
112
    if (strncmp(buffer,"Cpus_allowed:",13) == 0)
533
2
      numproccpu = read_mask(mask, numa_all_cpus_ptr);
534
535
112
    if (strncmp(buffer,"Mems_allowed:",13) == 0) {
536
2
      numprocnode = read_mask(mask, numa_all_nodes_ptr);
537
2
    }
538
112
  }
539
2
  fclose(f);
540
2
  free(buffer);
541
542
66
  for (i = 0; i <= hicpu; i++)
543
64
    numa_bitmask_setbit(numa_possible_cpus_ptr, i);
544
4
  for (i = 0; i <= maxconfigurednode; i++)
545
2
    numa_bitmask_setbit(numa_possible_nodes_ptr, i);
546
547
  /*
548
   * Cpus_allowed in the kernel can be defined to all f's
549
   * i.e. it may be a superset of the actual available processors.
550
   * As such let's reduce numproccpu to the number of actual
551
   * available cpus.
552
   */
553
2
  if (numproccpu <= 0) {
554
0
    for (i = 0; i <= hicpu; i++)
555
0
      numa_bitmask_setbit(numa_all_cpus_ptr, i);
556
0
    numproccpu = hicpu+1;
557
0
  }
558
559
2
  if (numproccpu > hicpu+1) {
560
0
    numproccpu = hicpu+1;
561
0
    for (i=hicpu+1; i<numa_all_cpus_ptr->size; i++) {
562
0
      numa_bitmask_clearbit(numa_all_cpus_ptr, i);
563
0
    }
564
0
  }
565
566
2
  if (numprocnode <= 0) {
567
0
    for (i = 0; i <= maxconfigurednode; i++)
568
0
      numa_bitmask_setbit(numa_all_nodes_ptr, i);
569
0
    numprocnode = maxconfigurednode + 1;
570
0
  }
571
572
2
  return;
573
2
}
574
575
/*
576
 * Find the highest cpu number possible (in other words the size
577
 * of a kernel cpumask_t (in bits) - 1)
578
 */
579
static void
580
set_numa_max_cpu(void)
581
2
{
582
2
  int len = 4096;
583
2
  int n;
584
2
  int olde = errno;
585
2
  struct bitmask *buffer;
586
587
2
  do {
588
2
    buffer = numa_bitmask_alloc(len);
589
2
    if (!buffer)
590
0
      return;
591
2
    n = numa_sched_getaffinity_v2_int(0, buffer);
592
    /* on success, returns size of kernel cpumask_t, in bytes */
593
2
    if (n < 0) {
594
0
      if (errno == EINVAL) {
595
0
        if (len >= 1024*1024)
596
0
          break;
597
0
        len *= 2;
598
0
        numa_bitmask_free(buffer);
599
0
        continue;
600
0
      } else {
601
0
        numa_warn(W_numcpus, "Unable to determine max cpu"
602
0
            " (sched_getaffinity: %s); guessing...",
603
0
            strerror(errno));
604
0
        n = sizeof(cpu_set_t);
605
0
        break;
606
0
      }
607
0
    }
608
2
  } while (n < 0);
609
2
  numa_bitmask_free(buffer);
610
2
  errno = olde;
611
2
  cpumask_sz = n*8;
612
2
}
613
614
/*
615
 * get the total (configured) number of cpus - both online and offline
616
 */
617
static void
618
set_configured_cpus(void)
619
2
{
620
2
  maxconfiguredcpu = sysconf(_SC_NPROCESSORS_CONF) - 1;
621
2
  if (maxconfiguredcpu == -1)
622
0
    numa_error("sysconf(NPROCESSORS_CONF) failed");
623
2
}
624
625
static void
626
set_preferred_many(void)
627
0
{
628
0
  int oldp;
629
0
  struct bitmask *bmp, *tmp;
630
0
  int old_errno;
631
632
0
  if (has_preferred_many >= 0)
633
0
    return;
634
635
0
  old_errno = errno;
636
637
0
  has_preferred_many = 0;
638
639
0
  bmp = numa_allocate_nodemask();
640
0
  tmp = numa_get_mems_allowed();
641
0
  if (!tmp || !bmp)
642
0
    goto out;
643
644
0
  if (get_mempolicy(&oldp, bmp->maskp, bmp->size + 1, 0, 0) < 0)
645
0
    goto out;
646
647
0
  if (set_mempolicy(MPOL_PREFERRED_MANY, tmp->maskp, tmp->size) == 0) {
648
0
    has_preferred_many = 1;
649
    /* reset the old memory policy ignoring error */
650
0
    (void)set_mempolicy(oldp, bmp->maskp, bmp->size+1);
651
0
  }
652
653
0
out:
654
0
  numa_bitmask_free(tmp);
655
0
  numa_bitmask_free(bmp);
656
0
  errno = old_errno;
657
0
}
658
659
/*
660
 * Initialize all the sizes.
661
 */
662
static void
663
set_sizes(void)
664
2
{
665
2
  sizes_set++;
666
2
  set_nodemask_size();  /* size of kernel nodemask_t */
667
2
  set_configured_nodes(); /* configured nodes listed in /sys */
668
2
  set_numa_max_cpu(); /* size of kernel cpumask_t */
669
2
  set_configured_cpus();  /* cpus listed in /sys/devices/system/cpu */
670
2
  set_task_constraints(); /* cpus and nodes for current task */
671
2
}
672
673
int
674
numa_num_configured_nodes(void)
675
677
{
676
  /*
677
  * NOTE: this function's behavior matches the documentation (ie: it
678
  * returns a count of nodes with memory) despite the poor function
679
  * naming.  We also cannot use the similarly poorly named
680
  * numa_all_nodes_ptr as it only tracks nodes with memory from which
681
  * the calling process can allocate.  Think sparse nodes, memory-less
682
  * nodes, cpusets...
683
  */
684
677
  int memnodecount=0, i;
685
686
1.35k
  for (i=0; i <= maxconfigurednode; i++) {
687
677
    if (numa_bitmask_isbitset(numa_memnode_ptr, i))
688
677
      memnodecount++;
689
677
  }
690
677
  return memnodecount;
691
677
}
692
693
int
694
numa_num_configured_cpus(void)
695
675
{
696
697
675
  return maxconfiguredcpu+1;
698
675
}
699
700
int
701
numa_num_possible_nodes(void)
702
684
{
703
684
  return nodemask_sz;
704
684
}
705
706
int
707
numa_num_possible_cpus(void)
708
682
{
709
682
  return cpumask_sz;
710
682
}
711
712
int
713
numa_num_task_nodes(void)
714
0
{
715
0
  return numprocnode;
716
0
}
717
718
/*
719
 * for backward compatibility
720
 */
721
int
722
numa_num_thread_nodes(void)
723
0
{
724
0
  return numa_num_task_nodes();
725
0
}
726
727
int
728
numa_num_task_cpus(void)
729
0
{
730
0
  return numproccpu;
731
0
}
732
733
/*
734
 * for backward compatibility
735
 */
736
int
737
numa_num_thread_cpus(void)
738
0
{
739
0
  return numa_num_task_cpus();
740
0
}
741
742
/*
743
 * Return the number of the highest node in this running system,
744
 */
745
int
746
numa_max_node(void)
747
181
{
748
181
  return maxconfigurednode;
749
181
}
750
751
make_internal_alias(numa_max_node);
752
753
/*
754
 * Return the number of the highest possible node in a system,
755
 * which for v1 is the size of a numa.h nodemask_t(in bits)-1.
756
 * but for v2 is the size of a kernel nodemask_t(in bits)-1.
757
 */
758
SYMVER("numa_max_possible_node_v1", "numa_max_possible_node@libnuma_1.1")
759
int
760
numa_max_possible_node_v1(void)
761
0
{
762
0
  return ((sizeof(nodemask_t)*8)-1);
763
0
}
764
765
SYMVER("numa_max_possible_node_v2", "numa_max_possible_node@@libnuma_1.2")
766
int
767
numa_max_possible_node_v2(void)
768
684
{
769
684
  return numa_num_possible_nodes()-1;
770
684
}
771
772
make_internal_alias(numa_max_possible_node_v1);
773
make_internal_alias(numa_max_possible_node_v2);
774
775
/*
776
 * Allocate a bitmask for cpus, of a size large enough to
777
 * match the kernel's cpumask_t.
778
 */
779
struct bitmask *
780
numa_allocate_cpumask()
781
682
{
782
682
  int ncpus = numa_num_possible_cpus();
783
784
682
  return numa_bitmask_alloc(ncpus);
785
682
}
786
787
/*
788
 * Allocate a bitmask the size of a libnuma nodemask_t
789
 */
790
static struct bitmask *
791
allocate_nodemask_v1(void)
792
0
{
793
0
  int nnodes = numa_max_possible_node_v1_int()+1;
794
795
0
  return numa_bitmask_alloc(nnodes);
796
0
}
797
798
/*
799
 * Allocate a bitmask for nodes, of a size large enough to
800
 * match the kernel's nodemask_t.
801
 */
802
struct bitmask *
803
numa_allocate_nodemask(void)
804
683
{
805
683
  struct bitmask *bmp;
806
683
  int nnodes = numa_max_possible_node_v2_int() + 1;
807
808
683
  bmp = numa_bitmask_alloc(nnodes);
809
683
  return bmp;
810
683
}
811
812
/* (cache the result?) */
813
long long numa_node_size64(int node, long long *freep)
814
0
{
815
0
  size_t len = 0;
816
0
  char *line = NULL;
817
0
  long long size = -1;
818
0
  FILE *f;
819
0
  char fn[64];
820
0
  int ok = 0;
821
0
  int required = freep ? 2 : 1;
822
823
0
  if (freep)
824
0
    *freep = 0;
825
0
  sprintf(fn,"/sys/devices/system/node/node%d/meminfo", node);
826
0
  f = fopen(fn, "r");
827
0
  if (!f)
828
0
    return -1;
829
0
  while (getdelim(&line, &len, '\n', f) > 0) {
830
0
    char *end;
831
0
    char *s = strcasestr(line, "kB");
832
0
    if (!s)
833
0
      continue;
834
0
    --s;
835
0
    while (s > line && isspace(*s))
836
0
      --s;
837
0
    while (s > line && isdigit(*s))
838
0
      --s;
839
0
    if (strstr(line, "MemTotal")) {
840
0
      size = strtoull(s,&end,0) << 10;
841
0
      if (end == s)
842
0
        size = -1;
843
0
      else
844
0
        ok++;
845
0
    }
846
0
    if (freep && strstr(line, "MemFree")) {
847
0
      *freep = strtoull(s,&end,0) << 10;
848
0
      if (end == s)
849
0
        *freep = -1;
850
0
      else
851
0
        ok++;
852
0
    }
853
0
  }
854
0
  fclose(f);
855
0
  free(line);
856
0
  if (ok != required)
857
0
    numa_warn(W_badmeminfo, "Cannot parse sysfs meminfo (%d)", ok);
858
0
  return size;
859
0
}
860
861
make_internal_alias(numa_node_size64);
862
863
long numa_node_size(int node, long *freep)
864
0
{
865
0
  long long f2 = 0;
866
0
  long sz = numa_node_size64_int(node, &f2);
867
0
  if (freep)
868
0
    *freep = f2;
869
0
  return sz;
870
0
}
871
872
int numa_available(void)
873
0
{
874
0
  if (get_mempolicy(NULL, NULL, 0, 0, 0) < 0 && (errno == ENOSYS || errno == EPERM))
875
0
    return -1;
876
0
  return 0;
877
0
}
878
879
SYMVER("numa_interleave_memory_v1", "numa_interleave_memory@libnuma_1.1")
880
void
881
numa_interleave_memory_v1(void *mem, size_t size, const nodemask_t *mask)
882
0
{
883
0
  struct bitmask bitmask;
884
885
0
  bitmask.size = sizeof(nodemask_t) * 8;
886
0
  bitmask.maskp = (unsigned long *)mask;
887
0
  dombind(mem, size, MPOL_INTERLEAVE, &bitmask);
888
0
}
889
890
SYMVER("numa_interleave_memory_v2", "numa_interleave_memory@@libnuma_1.2")
891
void
892
numa_interleave_memory_v2(void *mem, size_t size, struct bitmask *bmp)
893
0
{
894
0
  dombind(mem, size, MPOL_INTERLEAVE, bmp);
895
0
}
896
897
void
898
numa_weighted_interleave_memory(void *mem, size_t size, struct bitmask *bmp)
899
0
{
900
0
  dombind(mem, size, MPOL_WEIGHTED_INTERLEAVE, bmp);
901
0
}
902
903
void numa_tonode_memory(void *mem, size_t size, int node)
904
0
{
905
0
  struct bitmask *nodes;
906
907
0
  nodes = numa_allocate_nodemask();
908
0
  if (!nodes)
909
0
    return;
910
0
  numa_bitmask_setbit(nodes, node);
911
0
  dombind(mem, size, bind_policy, nodes);
912
0
  numa_bitmask_free(nodes);
913
0
}
914
915
SYMVER("numa_tonodemask_memory_v1", "numa_tonodemask_memory@libnuma_1.1")
916
void
917
numa_tonodemask_memory_v1(void *mem, size_t size, const nodemask_t *mask)
918
0
{
919
0
  struct bitmask bitmask;
920
921
0
  bitmask.maskp = (unsigned long *)mask;
922
0
  bitmask.size  = sizeof(nodemask_t);
923
0
  dombind(mem, size,  bind_policy, &bitmask);
924
0
}
925
926
SYMVER("numa_tonodemask_memory_v2", "numa_tonodemask_memory@@libnuma_1.2")
927
void
928
numa_tonodemask_memory_v2(void *mem, size_t size, struct bitmask *bmp)
929
0
{
930
0
  dombind(mem, size,  bind_policy, bmp);
931
0
}
932
933
void numa_setlocal_memory(void *mem, size_t size)
934
0
{
935
0
  dombind(mem, size, MPOL_LOCAL, NULL);
936
0
}
937
938
void numa_police_memory(void *mem, size_t size)
939
0
{
940
0
  int pagesize = numa_pagesize_int();
941
0
  unsigned long i;
942
0
  char *p = mem;
943
0
  for (i = 0; i < size; i += pagesize, p += pagesize)
944
0
    __atomic_and_fetch(p, 0xff, __ATOMIC_RELAXED);
945
946
0
}
947
948
make_internal_alias(numa_police_memory);
949
950
void *numa_alloc(size_t size)
951
0
{
952
0
  char *mem;
953
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
954
0
       0, 0);
955
0
  if (mem == (char *)-1)
956
0
    return NULL;
957
0
  numa_police_memory_int(mem, size);
958
0
  return mem;
959
0
}
960
961
void *numa_realloc(void *old_addr, size_t old_size, size_t new_size)
962
0
{
963
0
  char *mem;
964
0
  mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
965
0
  if (mem == (char *)-1)
966
0
    return NULL;
967
  /*
968
   *  The memory policy of the allocated pages is preserved by mremap(), so
969
   *  there is no need to (re)set it here. If the policy of the original
970
   *  allocation is not set, the new pages will be allocated according to the
971
   *  process' mempolicy. Trying to allocate explicitly the new pages on the
972
   *  same node as the original ones would require changing the policy of the
973
   *  newly allocated pages, which violates the numa_realloc() semantics.
974
   */
975
0
  return mem;
976
0
}
977
978
SYMVER("numa_alloc_interleaved_subset_v1", "numa_alloc_interleaved_subset@libnuma_1.1")
979
void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask)
980
0
{
981
0
  char *mem;
982
0
  struct bitmask bitmask;
983
984
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
985
0
      0, 0);
986
0
  if (mem == (char *)-1)
987
0
    return NULL;
988
0
  bitmask.maskp = (unsigned long *)mask;
989
0
  bitmask.size  = sizeof(nodemask_t);
990
0
  dombind(mem, size, MPOL_INTERLEAVE, &bitmask);
991
0
  return mem;
992
0
}
993
994
SYMVER("numa_alloc_interleaved_subset_v2", "numa_alloc_interleaved_subset@@libnuma_1.2")
995
void *numa_alloc_interleaved_subset_v2(size_t size, struct bitmask *bmp)
996
0
{
997
0
  char *mem;
998
999
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
1000
0
       0, 0);
1001
0
  if (mem == (char *)-1)
1002
0
    return NULL;
1003
0
  dombind(mem, size, MPOL_INTERLEAVE, bmp);
1004
0
  return mem;
1005
0
}
1006
1007
make_internal_alias(numa_alloc_interleaved_subset_v1);
1008
make_internal_alias(numa_alloc_interleaved_subset_v2);
1009
1010
void *
1011
numa_alloc_interleaved(size_t size)
1012
0
{
1013
0
  return numa_alloc_interleaved_subset_v2_int(size, numa_all_nodes_ptr);
1014
0
}
1015
1016
void *
1017
numa_alloc_weighted_interleaved_subset(size_t size, struct bitmask *bmp)
1018
0
{
1019
0
  char *mem;
1020
1021
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
1022
0
       0, 0);
1023
0
  if (mem == (char *)-1)
1024
0
    return NULL;
1025
0
  dombind(mem, size, MPOL_WEIGHTED_INTERLEAVE, bmp);
1026
0
  return mem;
1027
0
}
1028
1029
void *
1030
numa_alloc_weighted_interleaved(size_t size)
1031
0
{
1032
0
  return numa_alloc_weighted_interleaved_subset(size, numa_all_nodes_ptr);
1033
0
}
1034
1035
/*
1036
 * given a user node mask, set memory policy to use those nodes
1037
 */
1038
SYMVER("numa_set_interleave_mask_v1", "numa_set_interleave_mask@libnuma_1.1")
1039
void
1040
numa_set_interleave_mask_v1(nodemask_t *mask)
1041
0
{
1042
0
  struct bitmask *bmp;
1043
0
  int nnodes = numa_max_possible_node_v1_int()+1;
1044
1045
0
  bmp = numa_bitmask_alloc(nnodes);
1046
0
  copy_nodemask_to_bitmask(mask, bmp);
1047
0
  if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
1048
0
    setpol(MPOL_DEFAULT, bmp);
1049
0
  else
1050
0
    setpol(MPOL_INTERLEAVE, bmp);
1051
0
  numa_bitmask_free(bmp);
1052
0
}
1053
1054
1055
SYMVER("numa_set_interleave_mask_v2", "numa_set_interleave_mask@@libnuma_1.2")
1056
void
1057
numa_set_interleave_mask_v2(struct bitmask *bmp)
1058
0
{
1059
0
  if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
1060
0
    setpol(MPOL_DEFAULT, bmp);
1061
0
  else
1062
0
    setpol(MPOL_INTERLEAVE, bmp);
1063
0
}
1064
1065
void
1066
numa_set_weighted_interleave_mask(struct bitmask *bmp)
1067
0
{
1068
0
  if (numa_bitmask_equal(bmp, numa_no_nodes_ptr))
1069
0
    setpol(MPOL_DEFAULT, bmp);
1070
0
  else
1071
0
    setpol(MPOL_WEIGHTED_INTERLEAVE, bmp);
1072
0
}
1073
1074
SYMVER("numa_get_interleave_mask_v1", "numa_get_interleave_mask@libnuma_1.1")
1075
nodemask_t
1076
numa_get_interleave_mask_v1(void)
1077
0
{
1078
0
  int oldpolicy = 0;
1079
0
  struct bitmask *bmp;
1080
0
  nodemask_t mask;
1081
1082
0
  bmp = allocate_nodemask_v1();
1083
0
  if (!bmp)
1084
0
    return numa_no_nodes;
1085
0
  getpol(&oldpolicy, bmp);
1086
0
  if (oldpolicy == MPOL_INTERLEAVE)
1087
0
    copy_bitmask_to_nodemask(bmp, &mask);
1088
0
  else
1089
0
    copy_bitmask_to_nodemask(numa_no_nodes_ptr, &mask);
1090
0
  numa_bitmask_free(bmp);
1091
0
  return mask;
1092
0
}
1093
1094
SYMVER("numa_get_interleave_mask_v2", "numa_get_interleave_mask@@libnuma_1.2")
1095
struct bitmask *
1096
numa_get_interleave_mask_v2(void)
1097
0
{
1098
0
  int oldpolicy = 0;
1099
0
  struct bitmask *bmp;
1100
1101
0
  bmp = numa_allocate_nodemask();
1102
0
  if (!bmp)
1103
0
    return NULL;
1104
0
  getpol(&oldpolicy, bmp);
1105
0
  if (oldpolicy != MPOL_INTERLEAVE)
1106
0
    copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
1107
0
  return bmp;
1108
0
}
1109
1110
struct bitmask *
1111
numa_get_weighted_interleave_mask(void)
1112
0
{
1113
0
  int oldpolicy = 0;
1114
0
  struct bitmask *bmp;
1115
1116
0
  bmp = numa_allocate_nodemask();
1117
0
  if (!bmp)
1118
0
    return NULL;
1119
0
  getpol(&oldpolicy, bmp);
1120
0
  if (oldpolicy != MPOL_WEIGHTED_INTERLEAVE)
1121
0
    copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
1122
0
  return bmp;
1123
0
}
1124
1125
/* (undocumented) */
1126
int numa_get_interleave_node(void)
1127
0
{
1128
0
  int nd;
1129
0
  if (get_mempolicy(&nd, NULL, 0, 0, MPOL_F_NODE) == 0)
1130
0
    return nd;
1131
0
  return 0;
1132
0
}
1133
1134
void *numa_alloc_onnode(size_t size, int node)
1135
0
{
1136
0
  char *mem;
1137
0
  struct bitmask *bmp;
1138
1139
0
  bmp = numa_allocate_nodemask();
1140
0
  if (!bmp)
1141
0
    return NULL;
1142
0
  numa_bitmask_setbit(bmp, node);
1143
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
1144
0
       0, 0);
1145
0
  if (mem == (char *)-1)
1146
0
    mem = NULL;
1147
0
  else
1148
0
    dombind(mem, size, bind_policy, bmp);
1149
0
  numa_bitmask_free(bmp);
1150
0
  return mem;
1151
0
}
1152
1153
void *numa_alloc_local(size_t size)
1154
0
{
1155
0
  char *mem;
1156
0
  mem = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
1157
0
       0, 0);
1158
0
  if (mem == (char *)-1)
1159
0
    mem =  NULL;
1160
0
  else
1161
0
    dombind(mem, size, MPOL_LOCAL, NULL);
1162
0
  return mem;
1163
0
}
1164
1165
void numa_set_bind_policy(int strict)
1166
0
{
1167
0
  set_preferred_many();
1168
0
  if (strict)
1169
0
    bind_policy = MPOL_BIND;
1170
0
  else if (has_preferred_many)
1171
0
    bind_policy = MPOL_PREFERRED_MANY;
1172
0
  else
1173
0
    bind_policy = MPOL_PREFERRED;
1174
0
}
1175
1176
SYMVER("numa_set_membind_v1", "numa_set_membind@libnuma_1.1")
1177
void
1178
numa_set_membind_v1(const nodemask_t *mask)
1179
0
{
1180
0
  struct bitmask bitmask;
1181
1182
0
  bitmask.maskp = (unsigned long *)mask;
1183
0
  bitmask.size  = sizeof(nodemask_t);
1184
0
  setpol(MPOL_BIND, &bitmask);
1185
0
}
1186
1187
SYMVER("numa_set_membind_v2", "numa_set_membind@@libnuma_1.2")
1188
void
1189
numa_set_membind_v2(struct bitmask *bmp)
1190
0
{
1191
0
  setpol(MPOL_BIND, bmp);
1192
0
}
1193
1194
make_internal_alias(numa_set_membind_v2);
1195
1196
void
1197
numa_set_membind_balancing(struct bitmask *bmp)
1198
0
{
1199
  /* MPOL_F_NUMA_BALANCING: ignore if unsupported */
1200
0
  if (set_mempolicy(MPOL_BIND | MPOL_F_NUMA_BALANCING,
1201
0
        bmp->maskp, bmp->size + 1) < 0) {
1202
0
    if (errno == EINVAL) {
1203
0
      errno = 0;
1204
0
      numa_set_membind_v2(bmp);
1205
0
    } else
1206
0
      numa_error("set_mempolicy");
1207
0
  }
1208
0
}
1209
1210
/*
1211
 * copy a bitmask map body to a numa.h nodemask_t structure
1212
 */
1213
void
1214
copy_bitmask_to_nodemask(struct bitmask *bmp, nodemask_t *nmp)
1215
0
{
1216
0
  int max, i;
1217
1218
0
  memset(nmp, 0, sizeof(nodemask_t));
1219
0
        max = (sizeof(nodemask_t)*8);
1220
0
  for (i=0; i<bmp->size; i++) {
1221
0
    if (i >= max)
1222
0
      break;
1223
0
    if (numa_bitmask_isbitset(bmp, i))
1224
0
      nodemask_set_compat((nodemask_t *)nmp, i);
1225
0
  }
1226
0
}
1227
1228
/*
1229
 * copy a bitmask map body to another bitmask body
1230
 * fill a larger destination with zeroes
1231
 */
1232
void
1233
copy_bitmask_to_bitmask(struct bitmask *bmpfrom, struct bitmask *bmpto)
1234
184
{
1235
184
  int bytes;
1236
1237
184
  if (bmpfrom->size >= bmpto->size) {
1238
184
    memcpy(bmpto->maskp, bmpfrom->maskp, CPU_BYTES(bmpto->size));
1239
184
  } else if (bmpfrom->size < bmpto->size) {
1240
0
    bytes = CPU_BYTES(bmpfrom->size);
1241
0
    memcpy(bmpto->maskp, bmpfrom->maskp, bytes);
1242
0
    memset(((char *)bmpto->maskp)+bytes, 0,
1243
0
          CPU_BYTES(bmpto->size)-bytes);
1244
0
  }
1245
184
}
1246
1247
/*
1248
 * copy a numa.h nodemask_t structure to a bitmask map body
1249
 */
1250
void
1251
copy_nodemask_to_bitmask(nodemask_t *nmp, struct bitmask *bmp)
1252
0
{
1253
0
  int max, i;
1254
1255
0
  numa_bitmask_clearall(bmp);
1256
0
        max = (sizeof(nodemask_t)*8);
1257
0
  if (max > bmp->size)
1258
0
    max = bmp->size;
1259
0
  for (i=0; i<max; i++) {
1260
0
    if (nodemask_isset_compat(nmp, i))
1261
0
      numa_bitmask_setbit(bmp, i);
1262
0
  }
1263
0
}
1264
1265
SYMVER("numa_get_membind_v1", "numa_get_membind@libnuma_1.1")
1266
nodemask_t
1267
numa_get_membind_v1(void)
1268
0
{
1269
0
  int oldpolicy = 0;
1270
0
  struct bitmask *bmp;
1271
0
  nodemask_t nmp;
1272
1273
0
  bmp = allocate_nodemask_v1();
1274
0
  if (!bmp)
1275
0
    return numa_no_nodes;
1276
0
  getpol(&oldpolicy, bmp);
1277
0
  if (oldpolicy == MPOL_BIND) {
1278
0
    copy_bitmask_to_nodemask(bmp, &nmp);
1279
0
  } else {
1280
    /* copy the body of the map to numa_all_nodes */
1281
0
    copy_bitmask_to_nodemask(bmp, &numa_all_nodes);
1282
0
    nmp = numa_all_nodes;
1283
0
  }
1284
0
  numa_bitmask_free(bmp);
1285
0
  return nmp;
1286
0
}
1287
1288
SYMVER("numa_get_membind_v2", "numa_get_membind@@libnuma_1.2")
1289
struct bitmask *
1290
numa_get_membind_v2(void)
1291
0
{
1292
0
  int oldpolicy = 0;
1293
0
  struct bitmask *bmp = NULL;
1294
1295
0
  bmp = numa_allocate_nodemask();
1296
0
  if (!bmp)
1297
0
    return NULL;
1298
0
  getpol(&oldpolicy, bmp);
1299
0
  if (oldpolicy != MPOL_BIND)
1300
0
    copy_bitmask_to_bitmask(numa_all_nodes_ptr, bmp);
1301
0
  return bmp;
1302
0
}
1303
1304
//TODO:  do we need a v1 nodemask_t version?
1305
struct bitmask *numa_get_mems_allowed(void)
1306
0
{
1307
0
  struct bitmask *bmp;
1308
1309
  /*
1310
   * can change, so query on each call.
1311
   */
1312
0
  bmp = numa_allocate_nodemask();
1313
0
  if (!bmp)
1314
0
    return NULL;
1315
0
  if (get_mempolicy(NULL, bmp->maskp, bmp->size + 1, 0,
1316
0
        MPOL_F_MEMS_ALLOWED) < 0)
1317
0
    numa_error("get_mempolicy");
1318
0
  return bmp;
1319
0
}
1320
make_internal_alias(numa_get_mems_allowed);
1321
1322
void numa_free(void *mem, size_t size)
1323
0
{
1324
0
  munmap(mem, size);
1325
0
}
1326
1327
SYMVER("numa_parse_bitmap_v1", "numa_parse_bitmap@libnuma_1.1")
1328
int
1329
numa_parse_bitmap_v1(char *line, unsigned long *mask, int ncpus)
1330
0
{
1331
0
  int i;
1332
0
  char *p = strchr(line, '\n');
1333
0
  if (!p)
1334
0
    return -1;
1335
1336
0
  for (i = 0; p > line;i++) {
1337
0
    char *oldp, *endp;
1338
0
    oldp = p;
1339
0
    if (*p == ',')
1340
0
      --p;
1341
0
    while (p > line && *p != ',')
1342
0
      --p;
1343
    /* Eat two 32bit fields at a time to get longs */
1344
0
    if (p > line && sizeof(unsigned long) == 8) {
1345
0
      oldp--;
1346
0
      memmove(p, p+1, oldp-p+1);
1347
0
      while (p > line && *p != ',')
1348
0
        --p;
1349
0
    }
1350
0
    if (*p == ',')
1351
0
      p++;
1352
0
    if (i >= CPU_LONGS(ncpus))
1353
0
      return -1;
1354
0
    mask[i] = strtoul(p, &endp, 16);
1355
0
    if (endp != oldp)
1356
0
      return -1;
1357
0
    p--;
1358
0
  }
1359
0
  return 0;
1360
0
}
1361
1362
SYMVER("numa_parse_bitmap_v2", "numa_parse_bitmap@@libnuma_1.2")
1363
int
1364
numa_parse_bitmap_v2(char *line, struct bitmask *mask)
1365
1
{
1366
1
  int i, ncpus;
1367
1
  char *p = strchr(line, '\n');
1368
1
  if (!p)
1369
0
    return -1;
1370
1
  ncpus = mask->size;
1371
1372
2
  for (i = 0; p > line;i++) {
1373
1
    char *oldp, *endp;
1374
1
    oldp = p;
1375
1
    if (*p == ',')
1376
0
      --p;
1377
9
    while (p > line && *p != ',')
1378
8
      --p;
1379
    /* Eat two 32bit fields at a time to get longs */
1380
1
    if (p > line && sizeof(unsigned long) == 8) {
1381
0
      oldp--;
1382
0
      memmove(p, p+1, oldp-p+1);
1383
0
      while (p > line && *p != ',')
1384
0
        --p;
1385
0
    }
1386
1
    if (*p == ',')
1387
0
      p++;
1388
1
    if (i >= CPU_LONGS(ncpus))
1389
0
      return -1;
1390
1
    mask->maskp[i] = strtoul(p, &endp, 16);
1391
1
    if (endp != oldp)
1392
0
      return -1;
1393
1
    p--;
1394
1
  }
1395
1
  return 0;
1396
1
}
1397
1398
static void init_node_cpu_mask_v2(void)
1399
1
{
1400
1
  int nnodes = numa_max_possible_node_v2_int() + 1;
1401
1
  node_cpu_mask_v2 = calloc (nnodes, sizeof(struct bitmask *));
1402
1
}
1403
1404
static void cleanup_node_cpu_mask_v2(void)
1405
0
{
1406
0
  if (node_cpu_mask_v2) {
1407
0
    int i;
1408
0
    int nnodes;
1409
0
    nnodes = numa_max_possible_node_v2_int() + 1;
1410
0
    for (i = 0; i < nnodes; i++) {
1411
0
      FREE_AND_ZERO(node_cpu_mask_v2[i]);
1412
0
    }
1413
0
    free(node_cpu_mask_v2);
1414
0
    node_cpu_mask_v2 = NULL;
1415
0
  }
1416
0
}
1417
1418
/* This would be better with some locking, but I don't want to make libnuma
1419
   dependent on pthreads right now. The races are relatively harmless. */
1420
SYMVER("numa_node_to_cpus_v1", "numa_node_to_cpus@libnuma_1.1")
1421
int
1422
numa_node_to_cpus_v1(int node, unsigned long *buffer, int bufferlen)
1423
0
{
1424
0
  int err = 0;
1425
0
  char fn[64];
1426
0
  FILE *f;
1427
0
  char update;
1428
0
  char *line = NULL;
1429
0
  size_t len = 0;
1430
0
  struct bitmask bitmask;
1431
0
  int buflen_needed;
1432
0
  unsigned long *mask;
1433
0
  int ncpus = numa_num_possible_cpus();
1434
0
  int maxnode = numa_max_node_int();
1435
1436
0
  buflen_needed = CPU_BYTES(ncpus);
1437
0
  if ((unsigned)node > maxnode || bufferlen < buflen_needed) {
1438
0
    errno = ERANGE;
1439
0
    return -1;
1440
0
  }
1441
0
  if (bufferlen > buflen_needed)
1442
0
    memset(buffer, 0, bufferlen);
1443
0
  update = __atomic_fetch_and(&node_cpu_mask_v1_stale, 0, __ATOMIC_RELAXED);
1444
0
  if (node_cpu_mask_v1[node] && !update) {
1445
0
    memcpy(buffer, node_cpu_mask_v1[node], buflen_needed);
1446
0
    return 0;
1447
0
  }
1448
1449
0
  mask = malloc(buflen_needed);
1450
0
  if (!mask)
1451
0
    mask = (unsigned long *)buffer;
1452
0
  memset(mask, 0, buflen_needed);
1453
1454
0
  sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
1455
0
  f = fopen(fn, "r");
1456
0
  if (!f || getdelim(&line, &len, '\n', f) < 1) {
1457
0
    if (numa_bitmask_isbitset(numa_nodes_ptr, node)) {
1458
0
      numa_warn(W_nosysfs2,
1459
0
         "/sys not mounted or invalid. Assuming one node: %s",
1460
0
          strerror(errno));
1461
0
      numa_warn(W_nosysfs2,
1462
0
         "(cannot open or correctly parse %s)", fn);
1463
0
    }
1464
0
    bitmask.maskp = (unsigned long *)mask;
1465
0
    bitmask.size  = buflen_needed * 8;
1466
0
    numa_bitmask_setall(&bitmask);
1467
0
    err = -1;
1468
0
  }
1469
0
  if (f)
1470
0
    fclose(f);
1471
1472
0
  if (line && (numa_parse_bitmap_v1(line, mask, ncpus) < 0)) {
1473
0
    numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node");
1474
0
    bitmask.maskp = (unsigned long *)mask;
1475
0
    bitmask.size  = buflen_needed * 8;
1476
0
    numa_bitmask_setall(&bitmask);
1477
0
    err = -1;
1478
0
  }
1479
1480
0
  free(line);
1481
0
  memcpy(buffer, mask, buflen_needed);
1482
1483
  /* slightly racy, see above */
1484
0
  if (node_cpu_mask_v1[node]) {
1485
0
    if (update) {
1486
      /*
1487
       * There may be readers on node_cpu_mask_v1[], hence it can not
1488
       * be freed.
1489
       */
1490
0
      memcpy(node_cpu_mask_v1[node], mask, buflen_needed);
1491
0
      free(mask);
1492
0
      mask = NULL;
1493
0
    } else if (mask != buffer)
1494
0
      free(mask);
1495
0
  } else {
1496
0
    node_cpu_mask_v1[node] = mask;
1497
0
  }
1498
0
  return err;
1499
0
}
1500
1501
/*
1502
 * test whether a node has cpus
1503
 */
1504
/* This would be better with some locking, but I don't want to make libnuma
1505
   dependent on pthreads right now. The races are relatively harmless. */
1506
/*
1507
 * deliver a bitmask of cpus representing the cpus on a given node
1508
 */
1509
SYMVER("numa_node_to_cpus_v2", "numa_node_to_cpus@@libnuma_1.2")
1510
int
1511
numa_node_to_cpus_v2(int node, struct bitmask *buffer)
1512
181
{
1513
181
  int err = 0;
1514
181
  int nnodes = numa_max_node();
1515
181
  char fn[64], *line = NULL;
1516
181
  FILE *f;
1517
181
  char update;
1518
181
  size_t len = 0;
1519
181
  struct bitmask *mask;
1520
1521
181
  if (!node_cpu_mask_v2)
1522
1
    init_node_cpu_mask_v2();
1523
1524
181
  if (node > nnodes) {
1525
0
    errno = ERANGE;
1526
0
    return -1;
1527
0
  }
1528
181
  numa_bitmask_clearall(buffer);
1529
1530
181
  update = __atomic_fetch_and(&node_cpu_mask_v2_stale, 0, __ATOMIC_RELAXED);
1531
181
  if (node_cpu_mask_v2[node] && !update) {
1532
    /* have already constructed a mask for this node */
1533
180
    if (buffer->size < node_cpu_mask_v2[node]->size) {
1534
0
      errno = EINVAL;
1535
0
      numa_error("map size mismatch");
1536
0
      return -1;
1537
0
    }
1538
180
    copy_bitmask_to_bitmask(node_cpu_mask_v2[node], buffer);
1539
180
    return 0;
1540
180
  }
1541
1542
  /* need a new mask for this node */
1543
1
  mask = numa_allocate_cpumask();
1544
1
  if (!mask)
1545
0
    return -1;
1546
1547
  /* this is a kernel cpumask_t (see node_read_cpumap()) */
1548
1
  sprintf(fn, "/sys/devices/system/node/node%d/cpumap", node);
1549
1
  f = fopen(fn, "r");
1550
1
  if (!f || getdelim(&line, &len, '\n', f) < 1) {
1551
0
    if (numa_bitmask_isbitset(numa_nodes_ptr, node)) {
1552
0
      numa_warn(W_nosysfs2,
1553
0
         "/sys not mounted or invalid. Assuming one node: %s",
1554
0
          strerror(errno));
1555
0
      numa_warn(W_nosysfs2,
1556
0
         "(cannot open or correctly parse %s)", fn);
1557
0
    }
1558
0
    numa_bitmask_setall(mask);
1559
0
    err = -1;
1560
0
  }
1561
1
  if (f)
1562
1
    fclose(f);
1563
1564
1
  if (line && (numa_parse_bitmap_v2(line, mask) < 0)) {
1565
0
    numa_warn(W_cpumap, "Cannot parse cpumap. Assuming one node");
1566
0
    numa_bitmask_setall(mask);
1567
0
    err = -1;
1568
0
  }
1569
1570
1
  free(line);
1571
1
  copy_bitmask_to_bitmask(mask, buffer);
1572
1573
  /* slightly racy, see above */
1574
  /* save the mask we created */
1575
1
  if (node_cpu_mask_v2[node]) {
1576
0
    if (update) {
1577
0
      copy_bitmask_to_bitmask(mask, node_cpu_mask_v2[node]);
1578
0
      numa_bitmask_free(mask);
1579
0
      mask = NULL;
1580
    /* how could this be? */
1581
0
    } else if (mask != buffer)
1582
0
      numa_bitmask_free(mask);
1583
1
  } else {
1584
    /* we don't want to cache faulty result */
1585
1
    if (!err)
1586
1
      node_cpu_mask_v2[node] = mask;
1587
0
    else
1588
0
      numa_bitmask_free(mask);
1589
1
  }
1590
1
  return err;
1591
1
}
1592
1593
make_internal_alias(numa_node_to_cpus_v1);
1594
make_internal_alias(numa_node_to_cpus_v2);
1595
1596
void numa_node_to_cpu_update(void)
1597
0
{
1598
0
  __atomic_store_n(&node_cpu_mask_v1_stale, 1, __ATOMIC_RELAXED);
1599
0
  __atomic_store_n(&node_cpu_mask_v2_stale, 1, __ATOMIC_RELAXED);
1600
0
}
1601
1602
/* report the node of the specified cpu */
1603
int numa_node_of_cpu(int cpu)
1604
0
{
1605
0
  struct bitmask *bmp;
1606
0
  int ncpus, nnodes, node, ret;
1607
1608
0
  ncpus = numa_num_possible_cpus();
1609
0
  if (cpu > ncpus){
1610
0
    errno = EINVAL;
1611
0
    return -1;
1612
0
  }
1613
0
  bmp = numa_bitmask_alloc(ncpus);
1614
0
  nnodes = numa_max_node();
1615
0
  for (node = 0; node <= nnodes; node++){
1616
0
    if (numa_node_to_cpus_v2_int(node, bmp) < 0) {
1617
      /* It's possible for the node to not exist */
1618
0
      continue;
1619
0
    }
1620
0
    if (numa_bitmask_isbitset(bmp, cpu)){
1621
0
      ret = node;
1622
0
      goto end;
1623
0
    }
1624
0
  }
1625
0
  ret = -1;
1626
0
  errno = EINVAL;
1627
0
end:
1628
0
  numa_bitmask_free(bmp);
1629
0
  return ret;
1630
0
}
1631
1632
SYMVER("numa_run_on_node_mask_v1", "numa_run_on_node_mask@libnuma_1.1")
1633
int
1634
numa_run_on_node_mask_v1(const nodemask_t *mask)
1635
0
{
1636
0
  int ncpus = numa_num_possible_cpus();
1637
0
  int i, k, err;
1638
0
  unsigned long cpus[CPU_LONGS(ncpus)], nodecpus[CPU_LONGS(ncpus)];
1639
0
  memset(cpus, 0, CPU_BYTES(ncpus));
1640
0
  for (i = 0; i < NUMA_NUM_NODES; i++) {
1641
0
    if (mask->n[i / BITS_PER_LONG] == 0)
1642
0
      continue;
1643
0
    if (nodemask_isset_compat(mask, i)) {
1644
0
      if (numa_node_to_cpus_v1_int(i, nodecpus, CPU_BYTES(ncpus)) < 0) {
1645
0
        numa_warn(W_noderunmask,
1646
0
            "Cannot read node cpumask from sysfs");
1647
0
        continue;
1648
0
      }
1649
0
      for (k = 0; k < CPU_LONGS(ncpus); k++)
1650
0
        cpus[k] |= nodecpus[k];
1651
0
    }
1652
0
  }
1653
0
  err = numa_sched_setaffinity_v1(0, CPU_BYTES(ncpus), cpus);
1654
1655
  /* The sched_setaffinity API is broken because it expects
1656
     the user to guess the kernel cpuset size. Do this in a
1657
     brute force way. */
1658
0
  if (err < 0 && errno == EINVAL) {
1659
0
    int savederrno = errno;
1660
0
    char *bigbuf;
1661
0
    static int size = -1;
1662
0
    if (size == -1)
1663
0
      size = CPU_BYTES(ncpus) * 2;
1664
0
    bigbuf = malloc(CPU_BUFFER_SIZE);
1665
0
    if (!bigbuf) {
1666
0
      errno = ENOMEM;
1667
0
      return -1;
1668
0
    }
1669
0
    errno = savederrno;
1670
0
    while (size <= CPU_BUFFER_SIZE) {
1671
0
      memcpy(bigbuf, cpus, CPU_BYTES(ncpus));
1672
0
      memset(bigbuf + CPU_BYTES(ncpus), 0,
1673
0
             CPU_BUFFER_SIZE - CPU_BYTES(ncpus));
1674
0
      err = numa_sched_setaffinity_v1_int(0, size, (unsigned long *)bigbuf);
1675
0
      if (err == 0 || errno != EINVAL)
1676
0
        break;
1677
0
      size *= 2;
1678
0
    }
1679
0
    savederrno = errno;
1680
0
    free(bigbuf);
1681
0
    errno = savederrno;
1682
0
  }
1683
0
  return err;
1684
0
}
1685
1686
/*
1687
 * Given a node mask (size of a kernel nodemask_t) (probably populated by
1688
 * a user argument list) set up a map of cpus (map "cpus") on those nodes.
1689
 * Then set affinity to those cpus.
1690
 */
1691
SYMVER("numa_run_on_node_mask_v2", "numa_run_on_node_mask@@libnuma_1.2")
1692
int
1693
numa_run_on_node_mask_v2(struct bitmask *bmp)
1694
0
{
1695
0
  int ncpus, i, k, err;
1696
0
  struct bitmask *cpus, *nodecpus;
1697
1698
0
  cpus = numa_allocate_cpumask();
1699
0
  ncpus = cpus->size;
1700
0
  nodecpus = numa_allocate_cpumask();
1701
0
  if (!cpus || !nodecpus)
1702
0
    return -1;
1703
1704
0
  for (i = 0; i < bmp->size; i++) {
1705
0
    if (bmp->maskp[i / BITS_PER_LONG] == 0)
1706
0
      continue;
1707
0
    if (numa_bitmask_isbitset(bmp, i)) {
1708
      /*
1709
       * numa_all_nodes_ptr is cpuset aware; use only
1710
       * these nodes
1711
       */
1712
0
      if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
1713
0
        numa_warn(W_noderunmask,
1714
0
          "node %d not allowed", i);
1715
0
        continue;
1716
0
      }
1717
0
      if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
1718
0
        numa_warn(W_noderunmask,
1719
0
          "Cannot read node cpumask from sysfs");
1720
0
        continue;
1721
0
      }
1722
0
      for (k = 0; k < CPU_LONGS(ncpus); k++)
1723
0
        cpus->maskp[k] |= nodecpus->maskp[k];
1724
0
    }
1725
0
  }
1726
0
  err = numa_sched_setaffinity_v2_int(0, cpus);
1727
1728
0
  numa_bitmask_free(cpus);
1729
0
  numa_bitmask_free(nodecpus);
1730
1731
  /* used to have to consider that this could fail - it shouldn't now */
1732
0
  if (err < 0) {
1733
0
    numa_error("numa_sched_setaffinity_v2_int() failed");
1734
0
  }
1735
1736
0
  return err;
1737
0
}
1738
1739
make_internal_alias(numa_run_on_node_mask_v2);
1740
1741
/*
1742
 * Given a node mask (size of a kernel nodemask_t) (probably populated by
1743
 * a user argument list) set up a map of cpus (map "cpus") on those nodes
1744
 * without any cpuset awareness. Then set affinity to those cpus.
1745
 */
1746
int
1747
numa_run_on_node_mask_all(struct bitmask *bmp)
1748
0
{
1749
0
  int ncpus, i, k, err;
1750
0
  struct bitmask *cpus, *nodecpus;
1751
1752
0
  cpus = numa_allocate_cpumask();
1753
0
  ncpus = cpus->size;
1754
0
  nodecpus = numa_allocate_cpumask();
1755
0
  if (!cpus || !nodecpus)
1756
0
    return -1;
1757
1758
0
  for (i = 0; i < bmp->size; i++) {
1759
0
    if (bmp->maskp[i / BITS_PER_LONG] == 0)
1760
0
      continue;
1761
0
    if (numa_bitmask_isbitset(bmp, i)) {
1762
0
      if (!numa_bitmask_isbitset(numa_possible_nodes_ptr, i)) {
1763
0
        numa_warn(W_noderunmask,
1764
0
          "node %d not allowed", i);
1765
0
        continue;
1766
0
      }
1767
0
      if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
1768
0
        numa_warn(W_noderunmask,
1769
0
          "Cannot read node cpumask from sysfs");
1770
0
        continue;
1771
0
      }
1772
0
      for (k = 0; k < CPU_LONGS(ncpus); k++)
1773
0
        cpus->maskp[k] |= nodecpus->maskp[k];
1774
0
    }
1775
0
  }
1776
0
  err = numa_sched_setaffinity_v2_int(0, cpus);
1777
1778
0
  numa_bitmask_free(cpus);
1779
0
  numa_bitmask_free(nodecpus);
1780
1781
  /* With possible nodes freedom it can happen easily now */
1782
0
  if (err < 0) {
1783
0
    numa_error("numa_sched_setaffinity_v2_int() failed");
1784
0
  }
1785
1786
0
  return err;
1787
0
}
1788
1789
SYMVER("numa_get_run_node_mask_v1", "numa_get_run_node_mask@libnuma_1.1")
1790
nodemask_t
1791
numa_get_run_node_mask_v1(void)
1792
0
{
1793
0
  int ncpus = numa_num_configured_cpus();
1794
0
  int i, k;
1795
0
  int max = numa_max_node_int();
1796
0
  struct bitmask *bmp, *cpus, *nodecpus;
1797
0
  nodemask_t nmp;
1798
1799
0
  cpus = numa_allocate_cpumask();
1800
0
  if (!cpus)
1801
0
    return numa_no_nodes;
1802
0
  if (numa_sched_getaffinity_v2_int(0, cpus) < 0){
1803
0
    nmp = numa_no_nodes;
1804
0
    goto free_cpus;
1805
0
  }
1806
1807
0
  nodecpus = numa_allocate_cpumask();
1808
0
  if (!nodecpus) {
1809
0
    nmp = numa_no_nodes;
1810
0
    goto free_cpus;
1811
0
  }
1812
1813
0
  bmp = allocate_nodemask_v1(); /* the size of a nodemask_t */
1814
0
  if (!bmp) {
1815
0
    nmp = numa_no_nodes;
1816
0
    goto free_cpus2;
1817
0
  }
1818
1819
0
  for (i = 0; i <= max; i++) {
1820
0
    if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
1821
      /* It's possible for the node to not exist */
1822
0
      continue;
1823
0
    }
1824
0
    for (k = 0; k < CPU_LONGS(ncpus); k++) {
1825
0
      if (nodecpus->maskp[k] & cpus->maskp[k])
1826
0
        numa_bitmask_setbit(bmp, i);
1827
0
    }
1828
0
  }
1829
0
  copy_bitmask_to_nodemask(bmp, &nmp);
1830
0
  numa_bitmask_free(bmp);
1831
0
free_cpus2:
1832
0
  numa_bitmask_free(nodecpus);
1833
0
free_cpus:
1834
0
  numa_bitmask_free(cpus);
1835
0
  return nmp;
1836
0
}
1837
1838
SYMVER("numa_get_run_node_mask_v2", "numa_get_run_node_mask@@libnuma_1.2")
1839
struct bitmask *
1840
numa_get_run_node_mask_v2(void)
1841
0
{
1842
0
  int i, k;
1843
0
  int ncpus = numa_num_configured_cpus();
1844
0
  int max = numa_max_node_int();
1845
0
  struct bitmask *bmp, *cpus, *nodecpus;
1846
1847
0
  bmp = numa_allocate_cpumask();
1848
0
  cpus = numa_allocate_cpumask();
1849
0
  if (!bmp || !cpus)
1850
0
    return NULL;
1851
0
  if (numa_sched_getaffinity_v2_int(0, cpus) < 0){
1852
0
    copy_bitmask_to_bitmask(numa_no_nodes_ptr, bmp);
1853
0
    goto free_cpus;
1854
0
  }
1855
1856
0
  nodecpus = numa_allocate_cpumask();
1857
0
  for (i = 0; i <= max; i++) {
1858
    /*
1859
     * numa_all_nodes_ptr is cpuset aware; show only
1860
     * these nodes
1861
     */
1862
0
    if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i)) {
1863
0
      continue;
1864
0
    }
1865
0
    if (numa_node_to_cpus_v2_int(i, nodecpus) < 0) {
1866
      /* It's possible for the node to not exist */
1867
0
      continue;
1868
0
    }
1869
0
    for (k = 0; k < CPU_LONGS(ncpus); k++) {
1870
0
      if (nodecpus->maskp[k] & cpus->maskp[k])
1871
0
        numa_bitmask_setbit(bmp, i);
1872
0
    }
1873
0
  }
1874
0
  numa_bitmask_free(nodecpus);
1875
0
free_cpus:
1876
0
  numa_bitmask_free(cpus);
1877
0
  return bmp;
1878
0
}
1879
1880
int
1881
numa_migrate_pages(int pid, struct bitmask *fromnodes, struct bitmask *tonodes)
1882
0
{
1883
0
  int numa_num_nodes = numa_num_possible_nodes();
1884
1885
0
  return migrate_pages(pid, numa_num_nodes + 1, fromnodes->maskp,
1886
0
              tonodes->maskp);
1887
0
}
1888
1889
int numa_move_pages(int pid, unsigned long count,
1890
  void **pages, const int *nodes, int *status, int flags)
1891
0
{
1892
0
  return move_pages(pid, count, pages, nodes, status, flags);
1893
0
}
1894
1895
int numa_run_on_node(int node)
1896
0
{
1897
0
  int numa_num_nodes = numa_num_possible_nodes();
1898
0
  int ret = -1;
1899
0
  struct bitmask *cpus;
1900
1901
0
  if (node >= numa_num_nodes){
1902
0
    errno = EINVAL;
1903
0
    goto out;
1904
0
  }
1905
1906
0
  cpus = numa_allocate_cpumask();
1907
0
  if (!cpus)
1908
0
    return -1;
1909
1910
0
  if (node == -1)
1911
0
    numa_bitmask_setall(cpus);
1912
0
  else if (numa_node_to_cpus_v2_int(node, cpus) < 0){
1913
0
    numa_warn(W_noderunmask, "Cannot read node cpumask from sysfs");
1914
0
    goto free;
1915
0
  }
1916
1917
0
  ret = numa_sched_setaffinity_v2_int(0, cpus);
1918
0
free:
1919
0
  numa_bitmask_free(cpus);
1920
0
out:
1921
0
  return ret;
1922
0
}
1923
1924
static struct bitmask *__numa_preferred(void)
1925
0
{
1926
0
  int policy = 0;
1927
0
  struct bitmask *bmp;
1928
1929
0
  bmp = numa_allocate_nodemask();
1930
0
  if (!bmp)
1931
0
    return NULL;
1932
  /* could read the current CPU from /proc/self/status. Probably
1933
     not worth it. */
1934
0
  numa_bitmask_clearall(bmp);
1935
0
  getpol(&policy, bmp);
1936
1937
0
  if (policy != MPOL_PREFERRED &&
1938
0
      policy != MPOL_PREFERRED_MANY &&
1939
0
      policy != MPOL_BIND)
1940
0
    return bmp;
1941
1942
0
  if (policy == MPOL_PREFERRED && numa_bitmask_weight(bmp) > 1) {
1943
0
    errno = EINVAL;
1944
0
    numa_error(__FILE__);
1945
0
  }
1946
1947
0
  return bmp;
1948
0
}
1949
1950
int numa_preferred_err(void)
1951
0
{
1952
0
  int first_node = 0;
1953
0
  struct bitmask *bmp;
1954
1955
0
  bmp = __numa_preferred();
1956
0
  first_node = numa_find_first(bmp);
1957
0
  numa_bitmask_free(bmp);
1958
  
1959
0
  return first_node;
1960
0
}
1961
1962
int numa_preferred(void)
1963
0
{
1964
0
  int first_node = 0;
1965
1966
0
  first_node = numa_preferred_err();
1967
0
  first_node = first_node >= 0 ? first_node : 0;
1968
1969
0
  return first_node;
1970
0
}
1971
1972
static void __numa_set_preferred(struct bitmask *bmp)
1973
0
{
1974
0
  int nodes = numa_bitmask_weight(bmp);
1975
0
  if (nodes > 1) {
1976
0
    errno = EINVAL;
1977
0
    numa_error(__FILE__);
1978
0
  }
1979
1980
0
  setpol(nodes ? MPOL_PREFERRED : MPOL_LOCAL, bmp);
1981
0
}
1982
1983
void numa_set_preferred(int node)
1984
0
{
1985
0
  struct bitmask *bmp = numa_allocate_nodemask();
1986
0
  if (!bmp)
1987
0
    return;
1988
0
  numa_bitmask_setbit(bmp, node);
1989
0
  __numa_set_preferred(bmp);
1990
0
  numa_bitmask_free(bmp);
1991
0
}
1992
1993
int numa_has_preferred_many(void)
1994
0
{
1995
0
  set_preferred_many();
1996
0
  return has_preferred_many;
1997
0
}
1998
1999
void numa_set_preferred_many(struct bitmask *bitmask)
2000
0
{
2001
0
  int first_node = 0;
2002
2003
0
  set_preferred_many();
2004
0
  if (!has_preferred_many) {
2005
0
    numa_warn(W_nodeparse,
2006
0
      "Unable to handle MANY preferred nodes. Falling back to first node\n");
2007
0
    first_node = numa_find_first(bitmask);
2008
0
    numa_set_preferred(first_node);
2009
0
    return;
2010
0
  }
2011
0
  setpol(MPOL_PREFERRED_MANY, bitmask);
2012
0
}
2013
2014
struct bitmask *numa_preferred_many()
2015
0
{
2016
0
  return __numa_preferred();
2017
0
}
2018
2019
void numa_set_localalloc(void)
2020
0
{
2021
0
  setpol(MPOL_LOCAL, numa_no_nodes_ptr);
2022
0
}
2023
2024
SYMVER("numa_bind_v1", "numa_bind@libnuma_1.1")
2025
void numa_bind_v1(const nodemask_t *nodemask)
2026
0
{
2027
0
  struct bitmask bitmask;
2028
2029
0
  bitmask.maskp = (unsigned long *)nodemask;
2030
0
  bitmask.size  = sizeof(nodemask_t);
2031
0
  numa_run_on_node_mask_v2_int(&bitmask);
2032
0
  numa_set_membind_v2_int(&bitmask);
2033
0
}
2034
2035
SYMVER("numa_bind_v2", "numa_bind@@libnuma_1.2")
2036
void numa_bind_v2(struct bitmask *bmp)
2037
0
{
2038
0
  numa_run_on_node_mask_v2_int(bmp);
2039
0
  numa_set_membind_v2_int(bmp);
2040
0
}
2041
2042
void numa_set_strict(int flag)
2043
0
{
2044
0
  if (flag)
2045
0
    mbind_flags |= MPOL_MF_STRICT;
2046
0
  else
2047
0
    mbind_flags &= ~MPOL_MF_STRICT;
2048
0
}
2049
2050
/*
2051
 * Extract a node or processor number from the given string.
2052
 * Allow a relative node / processor specification within the allowed
2053
 * set if "relative" is nonzero
2054
 */
2055
static unsigned long get_nr(const char *s, char **end, struct bitmask *bmp, int relative)
2056
10.4k
{
2057
10.4k
  long i, nr;
2058
2059
10.4k
  if (!relative)
2060
6.20k
    return strtoul(s, end, 0);
2061
2062
4.27k
  nr = strtoul(s, end, 0);
2063
4.27k
  if (s == *end)
2064
8
    return nr;
2065
  /* Find the nth set bit */
2066
101k
  for (i = 0; nr >= 0 && i <= bmp->size; i++)
2067
97.4k
    if (numa_bitmask_isbitset(bmp, i))
2068
5.81k
      nr--;
2069
4.26k
  return i-1;
2070
4.27k
}
2071
2072
/*
2073
 * __numa_parse_nodestring() is called to create a node mask, given
2074
 * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10.
2075
 * (the + indicates that the numbers are nodeset-relative)
2076
 *
2077
 * The nodes may be specified as absolute, or relative to the current nodeset.
2078
 * The list of available nodes is in a map pointed to by "allowed_nodes_ptr",
2079
 * which may represent all nodes or the nodes in the current nodeset.
2080
 *
2081
 * The caller must free the returned bitmask.
2082
 */
2083
static struct bitmask *
2084
__numa_parse_nodestring(const char *s, struct bitmask *allowed_nodes_ptr)
2085
675
{
2086
675
  int invert = 0, relative = 0;
2087
675
  int conf_nodes = numa_num_configured_nodes();
2088
675
  char *end;
2089
675
  struct bitmask *mask;
2090
2091
675
  mask = numa_allocate_nodemask();
2092
675
  if (!mask)
2093
0
    return NULL;
2094
2095
675
  if (s[0] == 0){
2096
1
    copy_bitmask_to_bitmask(numa_no_nodes_ptr, mask);
2097
1
    return mask; /* return freeable mask */
2098
1
  }
2099
674
  if (*s == '!') {
2100
3
    invert = 1;
2101
3
    s++;
2102
3
  }
2103
674
  if (*s == '+') {
2104
274
    relative++;
2105
274
    s++;
2106
274
  }
2107
2.80k
  do {
2108
2.80k
    unsigned long arg;
2109
2.80k
    int i;
2110
2.80k
    if (isalpha(*s)) {
2111
141
      int n;
2112
141
      if (!strcmp(s,"all")) {
2113
1
        copy_bitmask_to_bitmask(allowed_nodes_ptr,
2114
1
              mask);
2115
1
        s+=4;
2116
1
        break;
2117
1
      }
2118
140
      n = resolve_affinity(s, mask);
2119
140
      if (n != NO_IO_AFFINITY) {
2120
57
        if (n < 0)
2121
57
          goto err;
2122
0
        s += strlen(s) + 1;
2123
0
        break;
2124
57
      }
2125
140
    }
2126
2.74k
    arg = get_nr(s, &end, allowed_nodes_ptr, relative);
2127
2.74k
    if (end == s) {
2128
116
      numa_warn(W_nodeparse, "unparseable node description `%s'\n", s);
2129
116
      goto err;
2130
116
    }
2131
2.62k
    if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg)) {
2132
201
      numa_warn(W_nodeparse, "node argument %ld is out of range\n", arg);
2133
201
      goto err;
2134
201
    }
2135
2.42k
    i = arg;
2136
2.42k
    numa_bitmask_setbit(mask, i);
2137
2.42k
    s = end;
2138
2.42k
    if (*s == '-') {
2139
1.47k
      char *end2;
2140
1.47k
      unsigned long arg2;
2141
1.47k
      arg2 = get_nr(++s, &end2, allowed_nodes_ptr, relative);
2142
1.47k
      if (end2 == s) {
2143
9
        numa_warn(W_nodeparse, "missing node argument %s\n", s);
2144
9
        goto err;
2145
9
      }
2146
1.46k
      if (!numa_bitmask_isbitset(allowed_nodes_ptr, arg2)) {
2147
178
        numa_warn(W_nodeparse, "node argument %ld out of range\n", arg2);
2148
178
        goto err;
2149
178
      }
2150
2.33k
      while (arg <= arg2) {
2151
1.04k
        i = arg;
2152
1.04k
        if (numa_bitmask_isbitset(allowed_nodes_ptr,i))
2153
1.04k
          numa_bitmask_setbit(mask, i);
2154
1.04k
        arg++;
2155
1.04k
      }
2156
1.29k
      s = end2;
2157
1.29k
    }
2158
2.42k
  } while (*s++ == ',');
2159
113
  if (s[-1] != '\0')
2160
15
    goto err;
2161
98
  if (invert) {
2162
1
    int i;
2163
2
    for (i = 0; i < conf_nodes; i++) {
2164
1
      if (numa_bitmask_isbitset(mask, i))
2165
1
        numa_bitmask_clearbit(mask, i);
2166
0
      else
2167
0
        numa_bitmask_setbit(mask, i);
2168
1
    }
2169
1
  }
2170
98
  return mask;
2171
2172
576
err:
2173
576
  numa_bitmask_free(mask);
2174
576
  return NULL;
2175
113
}
2176
2177
/*
2178
 * numa_parse_nodestring() is called to create a bitmask from nodes available
2179
 * for this task.
2180
 */
2181
2182
struct bitmask * numa_parse_nodestring(const char *s)
2183
675
{
2184
675
  return __numa_parse_nodestring(s, numa_all_nodes_ptr);
2185
675
}
2186
2187
/*
2188
 * numa_parse_nodestring_all() is called to create a bitmask from all nodes
2189
 * available.
2190
 */
2191
2192
struct bitmask * numa_parse_nodestring_all(const char *s)
2193
0
{
2194
0
  return __numa_parse_nodestring(s, numa_possible_nodes_ptr);
2195
0
}
2196
2197
/*
2198
 * __numa_parse_cpustring() is called to create a bitmask, given
2199
 * an ascii string such as 25 or 12-15 or 1,3,5-7 or +6-10.
2200
 * (the + indicates that the numbers are cpuset-relative)
2201
 *
2202
 * The cpus may be specified as absolute, or relative to the current cpuset.
2203
 * The list of available cpus for this task is in the map pointed to by
2204
 * "allowed_cpus_ptr", which may represent all cpus or the cpus in the
2205
 * current cpuset.
2206
 *
2207
 * The caller must free the returned bitmask.
2208
 */
2209
static struct bitmask *
2210
__numa_parse_cpustring(const char *s, struct bitmask *allowed_cpus_ptr)
2211
675
{
2212
675
  int invert = 0, relative=0;
2213
675
  int conf_cpus = numa_num_configured_cpus();
2214
675
  char *end;
2215
675
  struct bitmask *mask;
2216
675
  int i;
2217
2218
675
  mask = numa_allocate_cpumask();
2219
675
  if (!mask)
2220
0
    return NULL;
2221
2222
675
  if (s[0] == 0)
2223
1
    return mask;
2224
674
  if (*s == '!') {
2225
3
    invert = 1;
2226
3
    s++;
2227
3
  }
2228
674
  if (*s == '+') {
2229
274
    relative++;
2230
274
    s++;
2231
274
  }
2232
3.96k
  do {
2233
3.96k
    unsigned long arg;
2234
2235
3.96k
    if (!strcmp(s,"all")) {
2236
1
      copy_bitmask_to_bitmask(allowed_cpus_ptr, mask);
2237
1
      s+=4;
2238
1
      break;
2239
1
    }
2240
3.96k
    arg = get_nr(s, &end, allowed_cpus_ptr, relative);
2241
3.96k
    if (end == s) {
2242
182
      numa_warn(W_cpuparse, "unparseable cpu description `%s'\n", s);
2243
182
      goto err;
2244
182
    }
2245
3.78k
    if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg)) {
2246
132
      numa_warn(W_cpuparse, "cpu argument %s is out of range\n", s);
2247
132
      goto err;
2248
132
    }
2249
3.65k
    i = arg;
2250
3.65k
    numa_bitmask_setbit(mask, i);
2251
3.65k
    s = end;
2252
3.65k
    if (*s == '-') {
2253
2.28k
      char *end2;
2254
2.28k
      unsigned long arg2;
2255
2.28k
      arg2 = get_nr(++s, &end2, allowed_cpus_ptr, relative);
2256
2.28k
      if (end2 == s) {
2257
13
        numa_warn(W_cpuparse, "missing cpu argument %s\n", s);
2258
13
        goto err;
2259
13
      }
2260
2.27k
      if (!numa_bitmask_isbitset(allowed_cpus_ptr, arg2)) {
2261
152
        numa_warn(W_cpuparse, "cpu argument %s out of range\n", s);
2262
152
        goto err;
2263
152
      }
2264
4.29k
      while (arg <= arg2) {
2265
2.16k
        i = arg;
2266
2.16k
        if (numa_bitmask_isbitset(allowed_cpus_ptr, i))
2267
2.16k
          numa_bitmask_setbit(mask, i);
2268
2.16k
        arg++;
2269
2.16k
      }
2270
2.12k
      s = end2;
2271
2.12k
    }
2272
3.65k
  } while (*s++ == ',');
2273
195
  if (s[-1] != '\0')
2274
15
    goto err;
2275
180
  if (invert) {
2276
66
    for (i = 0; i < conf_cpus; i++) {
2277
64
      if (numa_bitmask_isbitset(mask, i))
2278
3
        numa_bitmask_clearbit(mask, i);
2279
61
      else
2280
61
        numa_bitmask_setbit(mask, i);
2281
64
    }
2282
2
  }
2283
180
  return mask;
2284
2285
494
err:
2286
494
  numa_bitmask_free(mask);
2287
494
  return NULL;
2288
195
}
2289
2290
/*
2291
 * numa_parse_cpustring() is called to create a bitmask from cpus available
2292
 * for this task.
2293
 */
2294
2295
struct bitmask * numa_parse_cpustring(const char *s)
2296
675
{
2297
675
  return __numa_parse_cpustring(s, numa_all_cpus_ptr);
2298
675
}
2299
2300
/*
2301
 * numa_parse_cpustring_all() is called to create a bitmask from all cpus
2302
 * available.
2303
 */
2304
2305
struct bitmask * numa_parse_cpustring_all(const char *s)
2306
0
{
2307
0
  return __numa_parse_cpustring(s, numa_possible_cpus_ptr);
2308
0
}
2309
2310
int numa_has_home_node(void)
2311
0
{
2312
0
  void *mem;
2313
0
  static int has_home_node = -1;
2314
0
  int page_size = numa_pagesize();
2315
0
  struct bitmask *tmp = numa_get_mems_allowed();
2316
2317
0
  if (has_home_node >= 0)
2318
0
    goto out;
2319
2320
0
  has_home_node = 0;
2321
  /* Detect whether home_node is supported */
2322
0
  mem = mmap(0, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
2323
0
  if (mem != MAP_FAILED) {
2324
0
    dombind(mem, page_size, MPOL_BIND, tmp);
2325
0
    if (set_mempolicy_home_node(mem, page_size, numa_find_first(tmp), 0) == 0)
2326
0
      has_home_node = 1;
2327
0
    munmap(mem, page_size);
2328
0
  }
2329
2330
0
out:
2331
0
  numa_bitmask_free(tmp);
2332
0
  return has_home_node;
2333
0
}
2334
2335
int numa_set_mempolicy_home_node(void *start, unsigned long len, int home_node, int flags)
2336
0
{
2337
0
  if (set_mempolicy_home_node(start, len, home_node, flags)) {
2338
0
    numa_error("set_mempolicy_home_node");
2339
0
    return -1;
2340
0
  }
2341
2342
0
  return 0;
2343
0
}