Coverage Report

Created: 2025-08-26 06:27

/src/numactl/affinity.c
Line
Count
Source (jump to first uncovered line)
1
/* Support for specifying IO affinity by various means.
2
   Copyright 2010 Intel Corporation
3
   Author: Andi Kleen
4
5
   libnuma is free software; you can redistribute it and/or
6
   modify it under the terms of the GNU Lesser General Public
7
   License as published by the Free Software Foundation; version
8
   2.1.
9
10
   libnuma is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
   Lesser General Public License for more details.
14
15
   You should find a copy of v2.1 of the GNU Lesser General Public License
16
   somewhere on your Linux system; if not, write to the Free Software
17
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
18
19
/* Notebook:
20
   - Separate real errors from no NUMA with fallback
21
   - Infiniband
22
   - FCoE?
23
   - Support for other special IO devices
24
   - Specifying cpu subsets inside the IO node?
25
   - Handle multiple IO nodes (needs kernel changes)
26
   - Better support for multi-path IO?
27
 */
28
#define _GNU_SOURCE 1
29
#include <string.h>
30
#include <errno.h>
31
#include <sys/stat.h>
32
#include <netdb.h>
33
#include <unistd.h>
34
#include <stdio.h>
35
#include <stdlib.h>
36
#include <sys/socket.h>
37
#include <sys/ioctl.h>
38
#include <net/if.h>
39
#include <dirent.h>
40
#include <linux/rtnetlink.h>
41
#include <linux/netlink.h>
42
#include <sys/types.h>
43
#include <sys/sysmacros.h>
44
#include <ctype.h>
45
#include <assert.h>
46
#include <regex.h>
47
#include <sys/sysmacros.h>
48
#include "numa.h"
49
#include "numaint.h"
50
#include "sysfs.h"
51
#include "affinity.h"
52
#include "rtnetlink.h"
53
54
static int badchar(const char *s)
55
45
{
56
45
  if (strpbrk(s, "/."))
57
1
    return 1;
58
44
  return 0;
59
45
}
60
61
static int node_parse_failure(int ret, char *cls, const char *dev)
62
51
{
63
51
  if (!cls)
64
8
    cls = "";
65
51
  if (ret == -2)
66
2
    numa_warn(W_node_parse1,
67
2
        "Kernel does not know node mask for%s%s device `%s'",
68
2
        *cls ? " " : "", cls, dev);
69
49
  else
70
49
    numa_warn(W_node_parse2,
71
49
        "Cannot read node mask for %s device `%s'",
72
49
        cls, dev);
73
51
  return -1;
74
51
}
75
76
/* Generic sysfs class lookup */
77
static int
78
affinity_class(struct bitmask *mask, char *cls, const char *dev)
79
45
{
80
45
  int ret;
81
45
  while (isspace(*dev))
82
194
    dev++;
83
45
  if (badchar(dev)) {
84
1
    numa_warn(W_badchar, "Illegal characters in `%s' specification",
85
1
        dev);
86
1
    return -1;
87
1
  }
88
89
  /* Somewhat hackish: extract device from symlink path.
90
     Better would be a direct backlink. This knows slightly too
91
     much about the actual sysfs layout. */
92
44
  char path[1024];
93
44
  char *fn = NULL;
94
44
  if (asprintf(&fn, "/sys/class/%s/%s", cls, dev) > 0 &&
95
44
      readlink(fn, path, sizeof path) > 0) {
96
4
    regex_t re;
97
4
    regmatch_t match[2];
98
4
    char *p;
99
100
4
    regcomp(&re, "(/devices/pci[0-9a-fA-F:/]+\\.[0-9]+)/",
101
4
      REG_EXTENDED);
102
4
    ret = regexec(&re, path, 2, match, 0);
103
4
    regfree(&re);
104
4
    if (ret == 0) {
105
1
      free(fn);
106
1
      assert(match[0].rm_so > 0);
107
1
      assert(match[0].rm_eo > 0);
108
1
      path[match[1].rm_eo + 1] = 0;
109
1
      p = path + match[0].rm_so;
110
1
      ret = sysfs_node_read(mask, "/sys/%s/numa_node", p);
111
1
      if (ret < 0)
112
1
        return node_parse_failure(ret, NULL, p);
113
0
      return ret;
114
1
    }
115
4
  }
116
43
  free(fn);
117
118
43
  ret = sysfs_node_read(mask, "/sys/class/%s/%s/device/numa_node",
119
43
            cls, dev);
120
43
  if (ret < 0)
121
43
    return node_parse_failure(ret, cls, dev);
122
0
  return 0;
123
43
}
124
125
/* Turn file (or device node) into class name */
126
static int affinity_file(struct bitmask *mask, char *cls, const char *file)
127
3
{
128
3
  struct stat st;
129
3
  DIR *dir;
130
3
  int n;
131
3
  unsigned maj = 0, min = 0;
132
3
  dev_t d;
133
3
  struct dirent *dep;
134
135
3
  cls = "block";
136
3
  char fn[sizeof("/sys/class/") + strlen(cls)];
137
3
  if (stat(file, &st) < 0) {
138
1
    numa_warn(W_blockdev1, "Cannot stat file %s", file);
139
1
    return -1;
140
1
  }
141
2
  d = st.st_dev;
142
2
  if (S_ISCHR(st.st_mode)) {
143
    /* Better choice than misc? Most likely misc will not work
144
       anyways unless the kernel is fixed. */
145
0
    cls = "misc";
146
0
    d = st.st_rdev;
147
2
  } else if (S_ISBLK(st.st_mode))
148
0
    d = st.st_rdev;
149
150
2
  sprintf(fn, "/sys/class/%s", cls);
151
2
  dir = opendir(fn);
152
2
  if (!dir) {
153
0
    numa_warn(W_blockdev2, "Cannot enumerate %s devices in sysfs",
154
0
        cls);
155
0
    return -1;
156
0
  }
157
25
  while ((dep = readdir(dir)) != NULL) {
158
24
    char *name = dep->d_name;
159
24
    int ret;
160
161
24
    if (*name == '.')
162
4
      continue;
163
20
    char *dev;
164
20
    char fn2[sizeof("/sys/class/block//dev") + strlen(name)];
165
166
20
    n = -1;
167
20
    if (sprintf(fn2, "/sys/class/block/%s/dev", name) < 0)
168
0
      break;
169
20
    dev = sysfs_read(fn2);
170
20
    if (dev) {
171
20
      n = sscanf(dev, "%u:%u", &maj, &min);
172
20
      free(dev);
173
20
    }
174
20
    if (n != 2) {
175
0
      numa_warn(W_blockdev3, "Cannot parse sysfs device %s",
176
0
          name);
177
0
      continue;
178
0
    }
179
180
20
    if (major(d) != maj || minor(d) != min)
181
19
      continue;
182
183
1
    ret = affinity_class(mask, "block", name);
184
1
    closedir(dir);
185
1
    return ret;
186
20
  }
187
1
  closedir(dir);
188
1
  numa_warn(W_blockdev5, "Cannot find block device %x:%x in sysfs for `%s'",
189
1
      maj, min, file);
190
1
  return -1;
191
2
}
192
193
/* Look up interface of route using rtnetlink. */
194
static int find_route(struct sockaddr *dst, int *iifp)
195
2
{
196
2
  struct rtattr *rta;
197
2
  const int hdrlen = NLMSG_LENGTH(sizeof(struct rtmsg));
198
2
  struct {
199
2
    struct nlmsghdr msg;
200
2
    struct rtmsg rt;
201
2
    char buf[256];
202
2
  } req = {
203
2
    .msg = {
204
2
      .nlmsg_len = hdrlen,
205
2
      .nlmsg_type = RTM_GETROUTE,
206
2
      .nlmsg_flags = NLM_F_REQUEST,
207
2
    },
208
2
    .rt = {
209
2
      .rtm_family = dst->sa_family,
210
2
    },
211
2
  };
212
2
  struct sockaddr_nl adr = {
213
2
    .nl_family = AF_NETLINK,
214
2
  };
215
216
2
  if (rta_put_address(&req.msg, RTA_DST, dst) < 0) {
217
0
    numa_warn(W_netlink1, "Cannot handle network family %x",
218
0
        dst->sa_family);
219
0
    return -1;
220
0
  }
221
222
2
  if (rtnetlink_request(&req.msg, sizeof req, &adr) < 0) {
223
0
    numa_warn(W_netlink2, "Cannot request rtnetlink route: %s",
224
0
        strerror(errno));
225
0
    return -1;
226
0
  }
227
228
  /* Fish the interface out of the netlink soup. */
229
2
  rta = NULL;
230
6
  while ((rta = rta_get(&req.msg, rta, hdrlen)) != NULL) {
231
6
    if (rta->rta_type == RTA_OIF) {
232
2
      memcpy(iifp, RTA_DATA(rta), sizeof(int));
233
2
      return 0;
234
2
    }
235
6
  }
236
237
0
  numa_warn(W_netlink3, "rtnetlink query did not return interface");
238
0
  return -1;
239
2
}
240
241
static int iif_to_name(int iif, struct ifreq *ifr)
242
2
{
243
2
  int n;
244
2
  int sk = socket(PF_INET, SOCK_DGRAM, 0);
245
2
  if (sk < 0)
246
0
    return -1;
247
2
  ifr->ifr_ifindex = iif;
248
2
  n = ioctl(sk, SIOCGIFNAME, ifr);
249
2
  close(sk);
250
2
  return n;
251
2
}
252
253
/* Resolve an IP address to the nodes of a network device.
254
   This generally only attempts to handle simple cases:
255
   no multi-path, no bounding etc. In these cases only
256
   the first interface or none is chosen. */
257
static int affinity_ip(struct bitmask *mask, char *cls, const char *id)
258
4
{
259
4
  struct addrinfo *ai;
260
4
  int n;
261
4
  int iif;
262
4
  struct ifreq ifr;
263
264
4
  if ((n = getaddrinfo(id, NULL, NULL, &ai)) != 0) {
265
2
    numa_warn(W_net1, "Cannot resolve %s: %s",
266
2
        id, gai_strerror(n));
267
2
    return -1;
268
2
  }
269
270
2
  if (find_route(&ai->ai_addr[0], &iif) < 0)
271
0
    goto out_ai;
272
273
2
  if (iif_to_name(iif, &ifr) < 0) {
274
0
    numa_warn(W_net2, "Cannot resolve network interface %d", iif);
275
0
    goto out_ai;
276
0
  }
277
278
2
  freeaddrinfo(ai);
279
2
  return affinity_class(mask, "net", ifr.ifr_name);
280
281
0
out_ai:
282
0
  freeaddrinfo(ai);
283
0
  return -1;
284
2
}
285
286
/* Look up affinity for a PCI device */
287
static int affinity_pci(struct bitmask *mask, char *cls, const char *id)
288
10
{
289
10
  unsigned seg, bus, dev, func;
290
10
  int n, ret;
291
292
  /* Func is optional. */
293
10
  if ((n = sscanf(id, "%x:%x:%x.%x",&seg,&bus,&dev,&func)) == 4 || n == 3) {
294
3
    if (n == 3)
295
2
      func = 0;
296
3
  }
297
  /* Segment is optional too */
298
7
  else if ((n = sscanf(id, "%x:%x.%x",&bus,&dev,&func)) == 3 || n == 2) {
299
4
    seg = 0;
300
4
    if (n == 2)
301
3
      func = 0;
302
4
  } else {
303
3
    numa_warn(W_pci1, "Cannot parse PCI device `%s'", id);
304
3
    return -1;
305
3
  }
306
7
  ret = sysfs_node_read(mask,
307
7
      "/sys/devices/pci%04x:%02x/%04x:%02x:%02x.%x/numa_node",
308
7
            seg, bus, seg, bus, dev, func);
309
7
  if (ret < 0)
310
7
    return node_parse_failure(ret, cls, id);
311
0
  return 0;
312
7
}
313
314
static struct handler {
315
  char first;
316
  char *name;
317
  char *cls;
318
  int (*handler)(struct bitmask *mask, char *cls, const char *desc);
319
} handlers[] = {
320
  { 'n', "netdev:", "net",   affinity_class },
321
  { 'i', "ip:",     NULL,    affinity_ip    },
322
  { 'f', "file:",   NULL,    affinity_file  },
323
  { 'b', "block:",  "block", affinity_class },
324
  { 'p', "pci:",    NULL,    affinity_pci   },
325
  {}
326
};
327
328
hidden int resolve_affinity(const char *id, struct bitmask *mask)
329
141
{
330
141
  struct handler *h;
331
332
721
  for (h = &handlers[0]; h->first; h++) {
333
639
    int len;
334
639
    if (id[0] != h->first)
335
525
      continue;
336
114
    len = strlen(h->name);
337
114
    if (!strncmp(id, h->name, len)) {
338
59
      int ret = h->handler(mask, h->cls, id + len);
339
59
      if (ret == -2) {
340
0
        numa_warn(W_nonode, "Kernel does not know node for %s\n",
341
0
            id + len);
342
0
      }
343
59
      return ret;
344
59
    }
345
114
  }
346
82
  return NO_IO_AFFINITY;
347
141
}