Total coverage: 220659 (12%)of 1891412
27 22 27 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 // SPDX-License-Identifier: GPL-2.0-or-later /* mpihelp-mul_1.c - MPI helper functions * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc. * * This file is part of GnuPG. * * Note: This code is heavily based on the GNU MP Library. * Actually it's the same code with only minor changes in the * way the data is stored; this is to support the abstraction * of an optional secure memory allocation which may be used * to avoid revealing of sensitive data due to paging etc. * The GNU MP Library itself is published under the LGPL; * however I decided to publish this code under the plain GPL. */ #include "mpi-internal.h" #include "longlong.h" mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, mpi_limb_t s2_limb) { mpi_limb_t cy_limb; mpi_size_t j; mpi_limb_t prod_high, prod_low; /* The loop counter and index J goes from -S1_SIZE to -1. This way * the loop becomes faster. */ j = -s1_size; /* Offset the base pointers to compensate for the negative indices. */ s1_ptr -= j; res_ptr -= j; cy_limb = 0; do { umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb); prod_low += cy_limb; cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high; res_ptr[j] = prod_low; } while (++j); return cy_limb; }
5 4 1 1 3 1 3 1 3 1 3 1 4 2 2 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 // SPDX-License-Identifier: GPL-2.0-or-later /* * inode.c - basic inode and dentry operations. * * Based on sysfs: * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel * * configfs Copyright (C) 2005 Oracle. All rights reserved. * * Please see Documentation/filesystems/configfs.rst for more * information. */ #undef DEBUG #include <linux/pagemap.h> #include <linux/namei.h> #include <linux/backing-dev.h> #include <linux/capability.h> #include <linux/sched.h> #include <linux/lockdep.h> #include <linux/slab.h> #include <linux/configfs.h> #include "configfs_internal.h" #ifdef CONFIG_LOCKDEP static struct lock_class_key default_group_class[MAX_LOCK_DEPTH]; #endif static const struct inode_operations configfs_inode_operations ={ .setattr = configfs_setattr, }; int configfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { struct inode * inode = d_inode(dentry); struct configfs_dirent * sd = dentry->d_fsdata; struct iattr * sd_iattr; unsigned int ia_valid = iattr->ia_valid; int error; if (!sd) return -EINVAL; sd_iattr = sd->s_iattr; if (!sd_iattr) { /* setting attributes for the first time, allocate now */ sd_iattr = kzalloc(sizeof(struct iattr), GFP_KERNEL); if (!sd_iattr) return -ENOMEM; /* assign default attributes */ sd_iattr->ia_mode = sd->s_mode; sd_iattr->ia_uid = GLOBAL_ROOT_UID; sd_iattr->ia_gid = GLOBAL_ROOT_GID; sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = current_time(inode); sd->s_iattr = sd_iattr; } /* attributes were changed atleast once in past */ error = simple_setattr(idmap, dentry, iattr); if (error) return error; if (ia_valid & ATTR_UID) sd_iattr->ia_uid = iattr->ia_uid; if (ia_valid & ATTR_GID) sd_iattr->ia_gid = iattr->ia_gid; if (ia_valid & ATTR_ATIME) sd_iattr->ia_atime = iattr->ia_atime; if (ia_valid & ATTR_MTIME) sd_iattr->ia_mtime = iattr->ia_mtime; if (ia_valid & ATTR_CTIME) sd_iattr->ia_ctime = iattr->ia_ctime; if (ia_valid & ATTR_MODE) { umode_t mode = iattr->ia_mode; if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) mode &= ~S_ISGID; sd_iattr->ia_mode = sd->s_mode = mode; } return error; } static inline void set_default_inode_attr(struct inode * inode, umode_t mode) { inode->i_mode = mode; simple_inode_init_ts(inode); } static inline void set_inode_attr(struct inode * inode, struct iattr * iattr) { inode->i_mode = iattr->ia_mode; inode->i_uid = iattr->ia_uid; inode->i_gid = iattr->ia_gid; inode_set_atime_to_ts(inode, iattr->ia_atime); inode_set_mtime_to_ts(inode, iattr->ia_mtime); inode_set_ctime_to_ts(inode, iattr->ia_ctime); } struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent *sd, struct super_block *s) { struct inode * inode = new_inode(s); if (inode) { inode->i_ino = get_next_ino(); inode->i_mapping->a_ops = &ram_aops; inode->i_op = &configfs_inode_operations; if (sd->s_iattr) { /* sysfs_dirent has non-default attributes * get them for the new inode from persistent copy * in sysfs_dirent */ set_inode_attr(inode, sd->s_iattr); } else set_default_inode_attr(inode, mode); } return inode; } #ifdef CONFIG_LOCKDEP static void configfs_set_inode_lock_class(struct configfs_dirent *sd, struct inode *inode) { int depth = sd->s_depth; if (depth > 0) { if (depth <= ARRAY_SIZE(default_group_class)) { lockdep_set_class(&inode->i_rwsem, &default_group_class[depth - 1]); } else { /* * In practice the maximum level of locking depth is * already reached. Just inform about possible reasons. */ pr_info("Too many levels of inodes for the locking correctness validator.\n"); pr_info("Spurious warnings may appear.\n"); } } } #else /* CONFIG_LOCKDEP */ static void configfs_set_inode_lock_class(struct configfs_dirent *sd, struct inode *inode) { } #endif /* CONFIG_LOCKDEP */ struct inode *configfs_create(struct dentry *dentry, umode_t mode) { struct inode *inode = NULL; struct configfs_dirent *sd; struct inode *p_inode; if (!dentry) return ERR_PTR(-ENOENT); if (d_really_is_positive(dentry)) return ERR_PTR(-EEXIST); sd = dentry->d_fsdata; inode = configfs_new_inode(mode, sd, dentry->d_sb); if (!inode) return ERR_PTR(-ENOMEM); p_inode = d_inode(dentry->d_parent); inode_set_mtime_to_ts(p_inode, inode_set_ctime_current(p_inode)); configfs_set_inode_lock_class(sd, inode); return inode; } /* * Get the name for corresponding element represented by the given configfs_dirent */ const unsigned char * configfs_get_name(struct configfs_dirent *sd) { struct configfs_attribute *attr; BUG_ON(!sd || !sd->s_element); /* These always have a dentry, so use that */ if (sd->s_type & (CONFIGFS_DIR | CONFIGFS_ITEM_LINK)) return sd->s_dentry->d_name.name; if (sd->s_type & (CONFIGFS_ITEM_ATTR | CONFIGFS_ITEM_BIN_ATTR)) { attr = sd->s_element; return attr->ca_name; } return NULL; } /* * Unhashes the dentry corresponding to given configfs_dirent * Called with parent inode's i_mutex held. */ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) { struct dentry * dentry = sd->s_dentry; if (dentry) { spin_lock(&dentry->d_lock); if (simple_positive(dentry)) { dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); simple_unlink(d_inode(parent), dentry); } else spin_unlock(&dentry->d_lock); } }
2 2 1 1 2 2 1 1 2 1 1 2 7 1 2 46 37 37 6 5 1 1 1 2 2 1 1 1 2 1 2 1 1 2 1 1 1 2 1 1 1 1 1 1 1 1 3 5 6 1 11 6 1 2 2 1 1 1 1 2 1 29 19 1 1 7 1 27 11 1 1 2 4 2 1 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 // SPDX-License-Identifier: GPL-2.0-or-later /* * Ioctl handler * Linux ethernet bridge * * Authors: * Lennert Buytenhek <buytenh@gnu.org> */ #include <linux/capability.h> #include <linux/compat.h> #include <linux/kernel.h> #include <linux/if_bridge.h> #include <linux/netdevice.h> #include <linux/slab.h> #include <linux/times.h> #include <net/net_namespace.h> #include <linux/uaccess.h> #include "br_private.h" static int get_bridge_ifindices(struct net *net, int *indices, int num) { struct net_device *dev; int i = 0; rcu_read_lock(); for_each_netdev_rcu(net, dev) { if (i >= num) break; if (netif_is_bridge_master(dev)) indices[i++] = dev->ifindex; } rcu_read_unlock(); return i; } /* called with RTNL */ static void get_port_ifindices(struct net_bridge *br, int *ifindices, int num) { struct net_bridge_port *p; list_for_each_entry(p, &br->port_list, list) { if (p->port_no < num) ifindices[p->port_no] = p->dev->ifindex; } } /* * Format up to a page worth of forwarding table entries * userbuf -- where to copy result * maxnum -- maximum number of entries desired * (limited to a page for sanity) * offset -- number of records to skip */ static int get_fdb_entries(struct net_bridge *br, void __user *userbuf, unsigned long maxnum, unsigned long offset) { int num; void *buf; size_t size; /* Clamp size to PAGE_SIZE, test maxnum to avoid overflow */ if (maxnum > PAGE_SIZE/sizeof(struct __fdb_entry)) maxnum = PAGE_SIZE/sizeof(struct __fdb_entry); size = maxnum * sizeof(struct __fdb_entry); buf = kmalloc(size, GFP_USER); if (!buf) return -ENOMEM; num = br_fdb_fillbuf(br, buf, maxnum, offset); if (num > 0) { if (copy_to_user(userbuf, buf, array_size(num, sizeof(struct __fdb_entry)))) num = -EFAULT; } kfree(buf); return num; } /* called with RTNL */ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) { struct net *net = dev_net(br->dev); struct net_device *dev; int ret; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; dev = __dev_get_by_index(net, ifindex); if (dev == NULL) return -EINVAL; if (isadd) ret = br_add_if(br, dev, NULL); else ret = br_del_if(br, dev); return ret; } #define BR_UARGS_MAX 4 static int br_dev_read_uargs(unsigned long *args, size_t nr_args, void __user **argp, void __user *data) { int ret; if (nr_args < 2 || nr_args > BR_UARGS_MAX) return -EINVAL; if (in_compat_syscall()) { unsigned int cargs[BR_UARGS_MAX]; int i; ret = copy_from_user(cargs, data, nr_args * sizeof(*cargs)); if (ret) goto fault; for (i = 0; i < nr_args; ++i) args[i] = cargs[i]; *argp = compat_ptr(args[1]); } else { ret = copy_from_user(args, data, nr_args * sizeof(*args)); if (ret) goto fault; *argp = (void __user *)args[1]; } return 0; fault: return -EFAULT; } /* * Legacy ioctl's through SIOCDEVPRIVATE * This interface is deprecated because it was too difficult * to do the translation for 32/64bit ioctl compatibility. */ int br_dev_siocdevprivate(struct net_device *dev, struct ifreq *rq, void __user *data, int cmd) { struct net_bridge *br = netdev_priv(dev); struct net_bridge_port *p = NULL; unsigned long args[4]; void __user *argp; int ret; ret = br_dev_read_uargs(args, ARRAY_SIZE(args), &argp, data); if (ret) return ret; switch (args[0]) { case BRCTL_ADD_IF: case BRCTL_DEL_IF: return add_del_if(br, args[1], args[0] == BRCTL_ADD_IF); case BRCTL_GET_BRIDGE_INFO: { struct __bridge_info b; memset(&b, 0, sizeof(struct __bridge_info)); rcu_read_lock(); memcpy(&b.designated_root, &br->designated_root, 8); memcpy(&b.bridge_id, &br->bridge_id, 8); b.root_path_cost = br->root_path_cost; b.max_age = jiffies_to_clock_t(br->max_age); b.hello_time = jiffies_to_clock_t(br->hello_time); b.forward_delay = br->forward_delay; b.bridge_max_age = br->bridge_max_age; b.bridge_hello_time = br->bridge_hello_time; b.bridge_forward_delay = jiffies_to_clock_t(br->bridge_forward_delay); b.topology_change = br->topology_change; b.topology_change_detected = br->topology_change_detected; b.root_port = br->root_port; b.stp_enabled = (br->stp_enabled != BR_NO_STP); b.ageing_time = jiffies_to_clock_t(br->ageing_time); b.hello_timer_value = br_timer_value(&br->hello_timer); b.tcn_timer_value = br_timer_value(&br->tcn_timer); b.topology_change_timer_value = br_timer_value(&br->topology_change_timer); b.gc_timer_value = br_timer_value(&br->gc_work.timer); rcu_read_unlock(); if (copy_to_user((void __user *)args[1], &b, sizeof(b))) return -EFAULT; return 0; } case BRCTL_GET_PORT_LIST: { int num, *indices; num = args[2]; if (num < 0) return -EINVAL; if (num == 0) num = 256; if (num > BR_MAX_PORTS) num = BR_MAX_PORTS; indices = kcalloc(num, sizeof(int), GFP_KERNEL); if (indices == NULL) return -ENOMEM; get_port_ifindices(br, indices, num); if (copy_to_user(argp, indices, array_size(num, sizeof(int)))) num = -EFAULT; kfree(indices); return num; } case BRCTL_SET_BRIDGE_FORWARD_DELAY: if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; ret = br_set_forward_delay(br, args[1]); break; case BRCTL_SET_BRIDGE_HELLO_TIME: if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; ret = br_set_hello_time(br, args[1]); break; case BRCTL_SET_BRIDGE_MAX_AGE: if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; ret = br_set_max_age(br, args[1]); break; case BRCTL_SET_AGEING_TIME: if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; ret = br_set_ageing_time(br, args[1]); break; case BRCTL_GET_PORT_INFO: { struct __port_info p; struct net_bridge_port *pt; rcu_read_lock(); if ((pt = br_get_port(br, args[2])) == NULL) { rcu_read_unlock(); return -EINVAL; } memset(&p, 0, sizeof(struct __port_info)); memcpy(&p.designated_root, &pt->designated_root, 8); memcpy(&p.designated_bridge, &pt->designated_bridge, 8); p.port_id = pt->port_id; p.designated_port = pt->designated_port; p.path_cost = pt->path_cost; p.designated_cost = pt->designated_cost; p.state = pt->state; p.top_change_ack = pt->topology_change_ack; p.config_pending = pt->config_pending; p.message_age_timer_value = br_timer_value(&pt->message_age_timer); p.forward_delay_timer_value = br_timer_value(&pt->forward_delay_timer); p.hold_timer_value = br_timer_value(&pt->hold_timer); rcu_read_unlock(); if (copy_to_user(argp, &p, sizeof(p))) return -EFAULT; return 0; } case BRCTL_SET_BRIDGE_STP_STATE: if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; ret = br_stp_set_enabled(br, args[1], NULL); break; case BRCTL_SET_BRIDGE_PRIORITY: if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; br_stp_set_bridge_priority(br, args[1]); ret = 0; break; case BRCTL_SET_PORT_PRIORITY: { if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; spin_lock_bh(&br->lock); if ((p = br_get_port(br, args[1])) == NULL) ret = -EINVAL; else ret = br_stp_set_port_priority(p, args[2]); spin_unlock_bh(&br->lock); break; } case BRCTL_SET_PATH_COST: { if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN)) return -EPERM; spin_lock_bh(&br->lock); if ((p = br_get_port(br, args[1])) == NULL) ret = -EINVAL; else ret = br_stp_set_path_cost(p, args[2]); spin_unlock_bh(&br->lock); break; } case BRCTL_GET_FDB_ENTRIES: return get_fdb_entries(br, argp, args[2], args[3]); default: ret = -EOPNOTSUPP; } if (!ret) { if (p) br_ifinfo_notify(RTM_NEWLINK, NULL, p); else netdev_state_change(br->dev); } return ret; } static int old_deviceless(struct net *net, void __user *data) { unsigned long args[3]; void __user *argp; int ret; ret = br_dev_read_uargs(args, ARRAY_SIZE(args), &argp, data); if (ret) return ret; switch (args[0]) { case BRCTL_GET_VERSION: return BRCTL_VERSION; case BRCTL_GET_BRIDGES: { int *indices; int ret = 0; if (args[2] >= 2048) return -ENOMEM; indices = kcalloc(args[2], sizeof(int), GFP_KERNEL); if (indices == NULL) return -ENOMEM; args[2] = get_bridge_ifindices(net, indices, args[2]); ret = copy_to_user(argp, indices, array_size(args[2], sizeof(int))) ? -EFAULT : args[2]; kfree(indices); return ret; } case BRCTL_ADD_BRIDGE: case BRCTL_DEL_BRIDGE: { char buf[IFNAMSIZ]; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (copy_from_user(buf, argp, IFNAMSIZ)) return -EFAULT; buf[IFNAMSIZ-1] = 0; if (args[0] == BRCTL_ADD_BRIDGE) return br_add_bridge(net, buf); return br_del_bridge(net, buf); } } return -EOPNOTSUPP; } int br_ioctl_stub(struct net *net, unsigned int cmd, void __user *uarg) { int ret = -EOPNOTSUPP; struct ifreq ifr; if (cmd == SIOCBRADDIF || cmd == SIOCBRDELIF) { void __user *data; char *colon; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (get_user_ifreq(&ifr, &data, uarg)) return -EFAULT; ifr.ifr_name[IFNAMSIZ - 1] = 0; colon = strchr(ifr.ifr_name, ':'); if (colon) *colon = 0; } rtnl_lock(); switch (cmd) { case SIOCGIFBR: case SIOCSIFBR: ret = old_deviceless(net, uarg); break; case SIOCBRADDBR: case SIOCBRDELBR: { char buf[IFNAMSIZ]; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; break; } if (copy_from_user(buf, uarg, IFNAMSIZ)) { ret = -EFAULT; break; } buf[IFNAMSIZ-1] = 0; if (cmd == SIOCBRADDBR) ret = br_add_bridge(net, buf); else ret = br_del_bridge(net, buf); } break; case SIOCBRADDIF: case SIOCBRDELIF: { struct net_device *dev; dev = __dev_get_by_name(net, ifr.ifr_name); if (!dev || !netif_device_present(dev)) { ret = -ENODEV; break; } if (!netif_is_bridge_master(dev)) { ret = -EOPNOTSUPP; break; } ret = add_del_if(netdev_priv(dev), ifr.ifr_ifindex, cmd == SIOCBRADDIF); } break; } rtnl_unlock(); return ret; }
1 3 131 757 1215 4 57 460 1093 4394 3997 4458 8877 9556 306 216 4 6404 22 43 22 1 58 9 55 5 5 7 27 56 138 224 1 2 337 10 7 12654 1 10 4 337 7 10 10 5 10 18 8 63 16 1032 88 16 131 33 1 2882 70 1303 70 5 19 22 87 21 79 26 13 9 197 6 13 14 3 33 3 3 64 10 22 37 9 60 70 72 58 758 1886 298 62 39 13 36 478 34 446 460 459 28 44 419 2 419 34 12251 156 156 11141 5 5 8 24 2 114 1 3 3 709 17 7607 7729 7757 5897 5964 109 5 149 6008 311 293 6870 604 114 486 11 6 6 1163 106 1107 825 525 7663 474 988 55 16 18 11 7594 1 150 259 214 3 37 502 409 49 243 24 1094 137 1076 70 36 2 211 1 2 124 12 137 286 882 2323 9673 11 490 672 4 653 1 2 22 2 1 1 2 7 4 104 3 37 28 109 4 5 9 194 118 80 2 195 154 24 153 1 77 17 24 24 24 197 5 36 7105 34 20 222 78 233 6895 1 216 16 16 1 35 544 2 3 545 5 1 546 2 331 275 372 753 15 4 75 260 19 3 2 87 6 12 136 75 60 15 6 1 69 5 12474 40 9778 45 66 68 173 8 7282 22 195 40 18 444 9798 67 66 1 114 7180 15 1 3 28 41 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Definitions for the 'struct sk_buff' memory handlers. * * Authors: * Alan Cox, <gw4pts@gw4pts.ampr.org> * Florian La Roche, <rzsfl@rz.uni-sb.de> */ #ifndef _LINUX_SKBUFF_H #define _LINUX_SKBUFF_H #include <linux/kernel.h> #include <linux/compiler.h> #include <linux/time.h> #include <linux/bug.h> #include <linux/bvec.h> #include <linux/cache.h> #include <linux/rbtree.h> #include <linux/socket.h> #include <linux/refcount.h> #include <linux/atomic.h> #include <asm/types.h> #include <linux/spinlock.h> #include <net/checksum.h> #include <linux/rcupdate.h> #include <linux/dma-mapping.h> #include <linux/netdev_features.h> #include <net/flow_dissector.h> #include <linux/in6.h> #include <linux/if_packet.h> #include <linux/llist.h> #include <linux/page_frag_cache.h> #include <net/flow.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <linux/netfilter/nf_conntrack_common.h> #endif #include <net/net_debug.h> #include <net/dropreason-core.h> #include <net/netmem.h> /** * DOC: skb checksums * * The interface for checksum offload between the stack and networking drivers * is as follows... * * IP checksum related features * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * Drivers advertise checksum offload capabilities in the features of a device. * From the stack's point of view these are capabilities offered by the driver. * A driver typically only advertises features that it is capable of offloading * to its device. * * .. flat-table:: Checksum related device features * :widths: 1 10 * * * - %NETIF_F_HW_CSUM * - The driver (or its device) is able to compute one * IP (one's complement) checksum for any combination * of protocols or protocol layering. The checksum is * computed and set in a packet per the CHECKSUM_PARTIAL * interface (see below). * * * - %NETIF_F_IP_CSUM * - Driver (device) is only able to checksum plain * TCP or UDP packets over IPv4. These are specifically * unencapsulated packets of the form IPv4|TCP or * IPv4|UDP where the Protocol field in the IPv4 header * is TCP or UDP. The IPv4 header may contain IP options. * This feature cannot be set in features for a device * with NETIF_F_HW_CSUM also set. This feature is being * DEPRECATED (see below). * * * - %NETIF_F_IPV6_CSUM * - Driver (device) is only able to checksum plain * TCP or UDP packets over IPv6. These are specifically * unencapsulated packets of the form IPv6|TCP or * IPv6|UDP where the Next Header field in the IPv6 * header is either TCP or UDP. IPv6 extension headers * are not supported with this feature. This feature * cannot be set in features for a device with * NETIF_F_HW_CSUM also set. This feature is being * DEPRECATED (see below). * * * - %NETIF_F_RXCSUM * - Driver (device) performs receive checksum offload. * This flag is only used to disable the RX checksum * feature for a device. The stack will accept receive * checksum indication in packets received on a device * regardless of whether NETIF_F_RXCSUM is set. * * Checksumming of received packets by device * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * Indication of checksum verification is set in &sk_buff.ip_summed. * Possible values are: * * - %CHECKSUM_NONE * * Device did not checksum this packet e.g. due to lack of capabilities. * The packet contains full (though not verified) checksum in packet but * not in skb->csum. Thus, skb->csum is undefined in this case. * * - %CHECKSUM_UNNECESSARY * * The hardware you're dealing with doesn't calculate the full checksum * (as in %CHECKSUM_COMPLETE), but it does parse headers and verify checksums * for specific protocols. For such packets it will set %CHECKSUM_UNNECESSARY * if their checksums are okay. &sk_buff.csum is still undefined in this case * though. A driver or device must never modify the checksum field in the * packet even if checksum is verified. * * %CHECKSUM_UNNECESSARY is applicable to following protocols: * * - TCP: IPv6 and IPv4. * - UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a * zero UDP checksum for either IPv4 or IPv6, the networking stack * may perform further validation in this case. * - GRE: only if the checksum is present in the header. * - SCTP: indicates the CRC in SCTP header has been validated. * - FCOE: indicates the CRC in FC frame has been validated. * * &sk_buff.csum_level indicates the number of consecutive checksums found in * the packet minus one that have been verified as %CHECKSUM_UNNECESSARY. * For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet * and a device is able to verify the checksums for UDP (possibly zero), * GRE (checksum flag is set) and TCP, &sk_buff.csum_level would be set to * two. If the device were only able to verify the UDP checksum and not * GRE, either because it doesn't support GRE checksum or because GRE * checksum is bad, skb->csum_level would be set to zero (TCP checksum is * not considered in this case). * * - %CHECKSUM_COMPLETE * * This is the most generic way. The device supplied checksum of the _whole_ * packet as seen by netif_rx() and fills in &sk_buff.csum. This means the * hardware doesn't need to parse L3/L4 headers to implement this. * * Notes: * * - Even if device supports only some protocols, but is able to produce * skb->csum, it MUST use CHECKSUM_COMPLETE, not CHECKSUM_UNNECESSARY. * - CHECKSUM_COMPLETE is not applicable to SCTP and FCoE protocols. * * - %CHECKSUM_PARTIAL * * A checksum is set up to be offloaded to a device as described in the * output description for CHECKSUM_PARTIAL. This may occur on a packet * received directly from another Linux OS, e.g., a virtualized Linux kernel * on the same host, or it may be set in the input path in GRO or remote * checksum offload. For the purposes of checksum verification, the checksum * referred to by skb->csum_start + skb->csum_offset and any preceding * checksums in the packet are considered verified. Any checksums in the * packet that are after the checksum being offloaded are not considered to * be verified. * * Checksumming on transmit for non-GSO * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * The stack requests checksum offload in the &sk_buff.ip_summed for a packet. * Values are: * * - %CHECKSUM_PARTIAL * * The driver is required to checksum the packet as seen by hard_start_xmit() * from &sk_buff.csum_start up to the end, and to record/write the checksum at * offset &sk_buff.csum_start + &sk_buff.csum_offset. * A driver may verify that the * csum_start and csum_offset values are valid values given the length and * offset of the packet, but it should not attempt to validate that the * checksum refers to a legitimate transport layer checksum -- it is the * purview of the stack to validate that csum_start and csum_offset are set * correctly. * * When the stack requests checksum offload for a packet, the driver MUST * ensure that the checksum is set correctly. A driver can either offload the * checksum calculation to the device, or call skb_checksum_help (in the case * that the device does not support offload for a particular checksum). * * %NETIF_F_IP_CSUM and %NETIF_F_IPV6_CSUM are being deprecated in favor of * %NETIF_F_HW_CSUM. New devices should use %NETIF_F_HW_CSUM to indicate * checksum offload capability. * skb_csum_hwoffload_help() can be called to resolve %CHECKSUM_PARTIAL based * on network device checksumming capabilities: if a packet does not match * them, skb_checksum_help() or skb_crc32c_help() (depending on the value of * &sk_buff.csum_not_inet, see :ref:`crc`) * is called to resolve the checksum. * * - %CHECKSUM_NONE * * The skb was already checksummed by the protocol, or a checksum is not * required. * * - %CHECKSUM_UNNECESSARY * * This has the same meaning as CHECKSUM_NONE for checksum offload on * output. * * - %CHECKSUM_COMPLETE * * Not used in checksum output. If a driver observes a packet with this value * set in skbuff, it should treat the packet as if %CHECKSUM_NONE were set. * * .. _crc: * * Non-IP checksum (CRC) offloads * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * .. flat-table:: * :widths: 1 10 * * * - %NETIF_F_SCTP_CRC * - This feature indicates that a device is capable of * offloading the SCTP CRC in a packet. To perform this offload the stack * will set csum_start and csum_offset accordingly, set ip_summed to * %CHECKSUM_PARTIAL and set csum_not_inet to 1, to provide an indication * in the skbuff that the %CHECKSUM_PARTIAL refers to CRC32c. * A driver that supports both IP checksum offload and SCTP CRC32c offload * must verify which offload is configured for a packet by testing the * value of &sk_buff.csum_not_inet; skb_crc32c_csum_help() is provided to * resolve %CHECKSUM_PARTIAL on skbs where csum_not_inet is set to 1. * * * - %NETIF_F_FCOE_CRC * - This feature indicates that a device is capable of offloading the FCOE * CRC in a packet. To perform this offload the stack will set ip_summed * to %CHECKSUM_PARTIAL and set csum_start and csum_offset * accordingly. Note that there is no indication in the skbuff that the * %CHECKSUM_PARTIAL refers to an FCOE checksum, so a driver that supports * both IP checksum offload and FCOE CRC offload must verify which offload * is configured for a packet, presumably by inspecting packet headers. * * Checksumming on output with GSO * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * In the case of a GSO packet (skb_is_gso() is true), checksum offload * is implied by the SKB_GSO_* flags in gso_type. Most obviously, if the * gso_type is %SKB_GSO_TCPV4 or %SKB_GSO_TCPV6, TCP checksum offload as * part of the GSO operation is implied. If a checksum is being offloaded * with GSO then ip_summed is %CHECKSUM_PARTIAL, and both csum_start and * csum_offset are set to refer to the outermost checksum being offloaded * (two offloaded checksums are possible with UDP encapsulation). */ /* Don't change this without changing skb_csum_unnecessary! */ #define CHECKSUM_NONE 0 #define CHECKSUM_UNNECESSARY 1 #define CHECKSUM_COMPLETE 2 #define CHECKSUM_PARTIAL 3 /* Maximum value in skb->csum_level */ #define SKB_MAX_CSUM_LEVEL 3 #define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES) #define SKB_WITH_OVERHEAD(X) \ ((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) /* For X bytes available in skb->head, what is the minimal * allocation needed, knowing struct skb_shared_info needs * to be aligned. */ #define SKB_HEAD_ALIGN(X) (SKB_DATA_ALIGN(X) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define SKB_MAX_ORDER(X, ORDER) \ SKB_WITH_OVERHEAD((PAGE_SIZE << (ORDER)) - (X)) #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) /* return minimum truesize of one skb containing X bytes of data */ #define SKB_TRUESIZE(X) ((X) + \ SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) struct net_device; struct scatterlist; struct pipe_inode_info; struct iov_iter; struct napi_struct; struct bpf_prog; union bpf_attr; struct skb_ext; struct ts_config; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) struct nf_bridge_info { enum { BRNF_PROTO_UNCHANGED, BRNF_PROTO_8021Q, BRNF_PROTO_PPPOE } orig_proto:8; u8 pkt_otherhost:1; u8 in_prerouting:1; u8 bridged_dnat:1; u8 sabotage_in_done:1; __u16 frag_max_size; int physinif; /* always valid & non-NULL from FORWARD on, for physdev match */ struct net_device *physoutdev; union { /* prerouting: detect dnat in orig/reply direction */ __be32 ipv4_daddr; struct in6_addr ipv6_daddr; /* after prerouting + nat detected: store original source * mac since neigh resolution overwrites it, only used while * skb is out in neigh layer. */ char neigh_header[8]; }; }; #endif #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) /* Chain in tc_skb_ext will be used to share the tc chain with * ovs recirc_id. It will be set to the current chain by tc * and read by ovs to recirc_id. */ struct tc_skb_ext { union { u64 act_miss_cookie; __u32 chain; }; __u16 mru; __u16 zone; u8 post_ct:1; u8 post_ct_snat:1; u8 post_ct_dnat:1; u8 act_miss:1; /* Set if act_miss_cookie is used */ u8 l2_miss:1; /* Set by bridge upon FDB or MDB miss */ }; #endif struct sk_buff_head { /* These two members must be first to match sk_buff. */ struct_group_tagged(sk_buff_list, list, struct sk_buff *next; struct sk_buff *prev; ); __u32 qlen; spinlock_t lock; }; struct sk_buff; #ifndef CONFIG_MAX_SKB_FRAGS # define CONFIG_MAX_SKB_FRAGS 17 #endif #define MAX_SKB_FRAGS CONFIG_MAX_SKB_FRAGS /* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to * segment using its current segmentation instead. */ #define GSO_BY_FRAGS 0xFFFF typedef struct skb_frag { netmem_ref netmem; unsigned int len; unsigned int offset; } skb_frag_t; /** * skb_frag_size() - Returns the size of a skb fragment * @frag: skb fragment */ static inline unsigned int skb_frag_size(const skb_frag_t *frag) { return frag->len; } /** * skb_frag_size_set() - Sets the size of a skb fragment * @frag: skb fragment * @size: size of fragment */ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) { frag->len = size; } /** * skb_frag_size_add() - Increments the size of a skb fragment by @delta * @frag: skb fragment * @delta: value to add */ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) { frag->len += delta; } /** * skb_frag_size_sub() - Decrements the size of a skb fragment by @delta * @frag: skb fragment * @delta: value to subtract */ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) { frag->len -= delta; } /** * skb_frag_must_loop - Test if %p is a high memory page * @p: fragment's page */ static inline bool skb_frag_must_loop(struct page *p) { #if defined(CONFIG_HIGHMEM) if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) || PageHighMem(p)) return true; #endif return false; } /** * skb_frag_foreach_page - loop over pages in a fragment * * @f: skb frag to operate on * @f_off: offset from start of f->netmem * @f_len: length from f_off to loop over * @p: (temp var) current page * @p_off: (temp var) offset from start of current page, * non-zero only on first page. * @p_len: (temp var) length in current page, * < PAGE_SIZE only on first and last page. * @copied: (temp var) length so far, excluding current p_len. * * A fragment can hold a compound page, in which case per-page * operations, notably kmap_atomic, must be called for each * regular page. */ #define skb_frag_foreach_page(f, f_off, f_len, p, p_off, p_len, copied) \ for (p = skb_frag_page(f) + ((f_off) >> PAGE_SHIFT), \ p_off = (f_off) & (PAGE_SIZE - 1), \ p_len = skb_frag_must_loop(p) ? \ min_t(u32, f_len, PAGE_SIZE - p_off) : f_len, \ copied = 0; \ copied < f_len; \ copied += p_len, p++, p_off = 0, \ p_len = min_t(u32, f_len - copied, PAGE_SIZE)) \ /** * struct skb_shared_hwtstamps - hardware time stamps * @hwtstamp: hardware time stamp transformed into duration * since arbitrary point in time * @netdev_data: address/cookie of network device driver used as * reference to actual hardware time stamp * * Software time stamps generated by ktime_get_real() are stored in * skb->tstamp. * * hwtstamps can only be compared against other hwtstamps from * the same device. * * This structure is attached to packets as part of the * &skb_shared_info. Use skb_hwtstamps() to get a pointer. */ struct skb_shared_hwtstamps { union { ktime_t hwtstamp; void *netdev_data; }; }; /* Definitions for tx_flags in struct skb_shared_info */ enum { /* generate hardware time stamp */ SKBTX_HW_TSTAMP_NOBPF = 1 << 0, /* generate software time stamp when queueing packet to NIC */ SKBTX_SW_TSTAMP = 1 << 1, /* device driver is going to provide hardware time stamp */ SKBTX_IN_PROGRESS = 1 << 2, /* generate software time stamp on packet tx completion */ SKBTX_COMPLETION_TSTAMP = 1 << 3, /* determine hardware time stamp based on time or cycles */ SKBTX_HW_TSTAMP_NETDEV = 1 << 5, /* generate software time stamp when entering packet scheduling */ SKBTX_SCHED_TSTAMP = 1 << 6, /* used for bpf extension when a bpf program is loaded */ SKBTX_BPF = 1 << 7, }; #define SKBTX_HW_TSTAMP (SKBTX_HW_TSTAMP_NOBPF | SKBTX_BPF) #define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \ SKBTX_SCHED_TSTAMP | \ SKBTX_BPF | \ SKBTX_COMPLETION_TSTAMP) #define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | \ SKBTX_ANY_SW_TSTAMP) /* Definitions for flags in struct skb_shared_info */ enum { /* use zcopy routines */ SKBFL_ZEROCOPY_ENABLE = BIT(0), /* This indicates at least one fragment might be overwritten * (as in vmsplice(), sendfile() ...) * If we need to compute a TX checksum, we'll need to copy * all frags to avoid possible bad checksum */ SKBFL_SHARED_FRAG = BIT(1), /* segment contains only zerocopy data and should not be * charged to the kernel memory. */ SKBFL_PURE_ZEROCOPY = BIT(2), SKBFL_DONT_ORPHAN = BIT(3), /* page references are managed by the ubuf_info, so it's safe to * use frags only up until ubuf_info is released */ SKBFL_MANAGED_FRAG_REFS = BIT(4), }; #define SKBFL_ZEROCOPY_FRAG (SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG) #define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY | \ SKBFL_DONT_ORPHAN | SKBFL_MANAGED_FRAG_REFS) struct ubuf_info_ops { void (*complete)(struct sk_buff *, struct ubuf_info *, bool zerocopy_success); /* has to be compatible with skb_zcopy_set() */ int (*link_skb)(struct sk_buff *skb, struct ubuf_info *uarg); }; /* * The callback notifies userspace to release buffers when skb DMA is done in * lower device, the skb last reference should be 0 when calling this. * The zerocopy_success argument is true if zero copy transmit occurred, * false on data copy or out of memory error caused by data copy attempt. * The ctx field is used to track device context. * The desc field is used to track userspace buffer index. */ struct ubuf_info { const struct ubuf_info_ops *ops; refcount_t refcnt; u8 flags; }; struct ubuf_info_msgzc { struct ubuf_info ubuf; union { struct { unsigned long desc; void *ctx; }; struct { u32 id; u16 len; u16 zerocopy:1; u32 bytelen; }; }; struct mmpin { struct user_struct *user; unsigned int num_pg; } mmp; }; #define skb_uarg(SKB) ((struct ubuf_info *)(skb_shinfo(SKB)->destructor_arg)) #define uarg_to_msgzc(ubuf_ptr) container_of((ubuf_ptr), struct ubuf_info_msgzc, \ ubuf) int mm_account_pinned_pages(struct mmpin *mmp, size_t size); void mm_unaccount_pinned_pages(struct mmpin *mmp); /* Preserve some data across TX submission and completion. * * Note, this state is stored in the driver. Extending the layout * might need some special care. */ struct xsk_tx_metadata_compl { __u64 *tx_timestamp; }; /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ struct skb_shared_info { __u8 flags; __u8 meta_len; __u8 nr_frags; __u8 tx_flags; unsigned short gso_size; /* Warning: this field is not always filled in (UFO)! */ unsigned short gso_segs; struct sk_buff *frag_list; union { struct skb_shared_hwtstamps hwtstamps; struct xsk_tx_metadata_compl xsk_meta; }; unsigned int gso_type; u32 tskey; /* * Warning : all fields before dataref are cleared in __alloc_skb() */ atomic_t dataref; union { struct { u32 xdp_frags_size; u32 xdp_frags_truesize; }; /* * Intermediate layers must ensure that destructor_arg * remains valid until skb destructor. */ void *destructor_arg; }; /* must be last field, see pskb_expand_head() */ skb_frag_t frags[MAX_SKB_FRAGS]; }; /** * DOC: dataref and headerless skbs * * Transport layers send out clones of payload skbs they hold for * retransmissions. To allow lower layers of the stack to prepend their headers * we split &skb_shared_info.dataref into two halves. * The lower 16 bits count the overall number of references. * The higher 16 bits indicate how many of the references are payload-only. * skb_header_cloned() checks if skb is allowed to add / write the headers. * * The creator of the skb (e.g. TCP) marks its skb as &sk_buff.nohdr * (via __skb_header_release()). Any clone created from marked skb will get * &sk_buff.hdr_len populated with the available headroom. * If there's the only clone in existence it's able to modify the headroom * at will. The sequence of calls inside the transport layer is:: * * <alloc skb> * skb_reserve() * __skb_header_release() * skb_clone() * // send the clone down the stack * * This is not a very generic construct and it depends on the transport layers * doing the right thing. In practice there's usually only one payload-only skb. * Having multiple payload-only skbs with different lengths of hdr_len is not * possible. The payload-only skbs should never leave their owner. */ #define SKB_DATAREF_SHIFT 16 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) enum { SKB_FCLONE_UNAVAILABLE, /* skb has no fclone (from head_cache) */ SKB_FCLONE_ORIG, /* orig skb (from fclone_cache) */ SKB_FCLONE_CLONE, /* companion fclone skb (from fclone_cache) */ }; enum { SKB_GSO_TCPV4 = 1 << 0, /* This indicates the skb is from an untrusted source. */ SKB_GSO_DODGY = 1 << 1, /* This indicates the tcp segment has CWR set. */ SKB_GSO_TCP_ECN = 1 << 2, SKB_GSO_TCP_FIXEDID = 1 << 3, SKB_GSO_TCPV6 = 1 << 4, SKB_GSO_FCOE = 1 << 5, SKB_GSO_GRE = 1 << 6, SKB_GSO_GRE_CSUM = 1 << 7, SKB_GSO_IPXIP4 = 1 << 8, SKB_GSO_IPXIP6 = 1 << 9, SKB_GSO_UDP_TUNNEL = 1 << 10, SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, SKB_GSO_PARTIAL = 1 << 12, SKB_GSO_TUNNEL_REMCSUM = 1 << 13, SKB_GSO_SCTP = 1 << 14, SKB_GSO_ESP = 1 << 15, SKB_GSO_UDP = 1 << 16, SKB_GSO_UDP_L4 = 1 << 17, SKB_GSO_FRAGLIST = 1 << 18, SKB_GSO_TCP_ACCECN = 1 << 19, }; #if BITS_PER_LONG > 32 #define NET_SKBUFF_DATA_USES_OFFSET 1 #endif #ifdef NET_SKBUFF_DATA_USES_OFFSET typedef unsigned int sk_buff_data_t; #else typedef unsigned char *sk_buff_data_t; #endif enum skb_tstamp_type { SKB_CLOCK_REALTIME, SKB_CLOCK_MONOTONIC, SKB_CLOCK_TAI, __SKB_CLOCK_MAX = SKB_CLOCK_TAI, }; /** * DOC: Basic sk_buff geometry * * struct sk_buff itself is a metadata structure and does not hold any packet * data. All the data is held in associated buffers. * * &sk_buff.head points to the main "head" buffer. The head buffer is divided * into two parts: * * - data buffer, containing headers and sometimes payload; * this is the part of the skb operated on by the common helpers * such as skb_put() or skb_pull(); * - shared info (struct skb_shared_info) which holds an array of pointers * to read-only data in the (page, offset, length) format. * * Optionally &skb_shared_info.frag_list may point to another skb. * * Basic diagram may look like this:: * * --------------- * | sk_buff | * --------------- * ,--------------------------- + head * / ,----------------- + data * / / ,----------- + tail * | | | , + end * | | | | * v v v v * ----------------------------------------------- * | headroom | data | tailroom | skb_shared_info | * ----------------------------------------------- * + [page frag] * + [page frag] * + [page frag] * + [page frag] --------- * + frag_list --> | sk_buff | * --------- * */ /** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list * @tstamp: Time we arrived/left * @skb_mstamp_ns: (aka @tstamp) earliest departure time; start point * for retransmit timer * @rbnode: RB tree node, alternative to next/prev for netem/tcp * @list: queue head * @ll_node: anchor in an llist (eg socket defer_list) * @sk: Socket we are owned by * @dev: Device we arrived on/are leaving by * @dev_scratch: (aka @dev) alternate use of @dev when @dev would be %NULL * @cb: Control buffer. Free for use by every layer. Put private vars here * @_skb_refdst: destination entry (with norefcount bit) * @len: Length of actual data * @data_len: Data length * @mac_len: Length of link layer header * @hdr_len: writable header length of cloned skb * @csum: Checksum (must include start/offset pair) * @csum_start: Offset from skb->head where checksumming should start * @csum_offset: Offset from csum_start where checksum should be stored * @priority: Packet queueing priority * @ignore_df: allow local fragmentation * @cloned: Head may be cloned (check refcnt to be sure) * @ip_summed: Driver fed us an IP checksum * @nohdr: Payload reference only, must not modify header * @pkt_type: Packet class * @fclone: skbuff clone status * @ipvs_property: skbuff is owned by ipvs * @inner_protocol_type: whether the inner protocol is * ENCAP_TYPE_ETHER or ENCAP_TYPE_IPPROTO * @remcsum_offload: remote checksum offload is enabled * @offload_fwd_mark: Packet was L2-forwarded in hardware * @offload_l3_fwd_mark: Packet was L3-forwarded in hardware * @tc_skip_classify: do not classify packet. set by IFB device * @tc_at_ingress: used within tc_classify to distinguish in/egress * @redirected: packet was redirected by packet classifier * @from_ingress: packet was redirected from the ingress path * @nf_skip_egress: packet shall skip nf egress - see netfilter_netdev.h * @peeked: this packet has been seen already, so stats have been * done for it, don't do them again * @nf_trace: netfilter packet trace flag * @protocol: Packet protocol from driver * @destructor: Destruct function * @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue) * @_sk_redir: socket redirection information for skmsg * @_nfct: Associated connection, if any (with nfctinfo bits) * @skb_iif: ifindex of device we arrived on * @tc_index: Traffic control index * @hash: the packet hash * @queue_mapping: Queue mapping for multiqueue devices * @head_frag: skb was allocated from page fragments, * not allocated by kmalloc() or vmalloc(). * @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves * @pp_recycle: mark the packet for recycling instead of freeing (implies * page_pool support on driver) * @active_extensions: active extensions (skb_ext_id types) * @ndisc_nodetype: router type (from link layer) * @ooo_okay: allow the mapping of a socket to a queue to be changed * @l4_hash: indicate hash is a canonical 4-tuple hash over transport * ports. * @sw_hash: indicates hash was computed in software stack * @wifi_acked_valid: wifi_acked was set * @wifi_acked: whether frame was acked on wifi or not * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @encapsulation: indicates the inner headers in the skbuff are valid * @encap_hdr_csum: software checksum is needed * @csum_valid: checksum is already valid * @csum_not_inet: use CRC32c to resolve CHECKSUM_PARTIAL * @csum_complete_sw: checksum was completed by software * @csum_level: indicates the number of consecutive checksums found in * the packet minus one that have been verified as * CHECKSUM_UNNECESSARY (max 3) * @unreadable: indicates that at least 1 of the fragments in this skb is * unreadable. * @dst_pending_confirm: need to confirm neighbour * @decrypted: Decrypted SKB * @slow_gro: state present at GRO time, slower prepare step required * @tstamp_type: When set, skb->tstamp has the * delivery_time clock base of skb->tstamp. * @napi_id: id of the NAPI struct this skb came from * @sender_cpu: (aka @napi_id) source CPU in XPS * @alloc_cpu: CPU which did the skb allocation. * @secmark: security marking * @mark: Generic packet mark * @reserved_tailroom: (aka @mark) number of bytes of free space available * at the tail of an sk_buff * @vlan_all: vlan fields (proto & tci) * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information * @inner_protocol: Protocol (encapsulation) * @inner_ipproto: (aka @inner_protocol) stores ipproto when * skb->inner_protocol_type == ENCAP_TYPE_IPPROTO; * @inner_transport_header: Inner transport layer header (encapsulation) * @inner_network_header: Network layer header (encapsulation) * @inner_mac_header: Link layer header (encapsulation) * @transport_header: Transport layer header * @network_header: Network layer header * @mac_header: Link layer header * @kcov_handle: KCOV remote handle for remote coverage collection * @tail: Tail pointer * @end: End pointer * @head: Head of buffer * @data: Data head pointer * @truesize: Buffer size * @users: User count - see {datagram,tcp}.c * @extensions: allocated extensions, valid if active_extensions is nonzero */ struct sk_buff { union { struct { /* These two members must be first to match sk_buff_head. */ struct sk_buff *next; struct sk_buff *prev; union { struct net_device *dev; /* Some protocols might use this space to store information, * while device pointer would be NULL. * UDP receive path is one user. */ unsigned long dev_scratch; }; }; struct rb_node rbnode; /* used in netem, ip4 defrag, and tcp stack */ struct list_head list; struct llist_node ll_node; }; struct sock *sk; union { ktime_t tstamp; u64 skb_mstamp_ns; /* earliest departure time */ }; /* * This is the control buffer. It is free to use for every * layer. Please put your private variables there. If you * want to keep them across layers you have to do a skb_clone() * first. This is owned by whoever has the skb queued ATM. */ char cb[48] __aligned(8); union { struct { unsigned long _skb_refdst; void (*destructor)(struct sk_buff *skb); }; struct list_head tcp_tsorted_anchor; #ifdef CONFIG_NET_SOCK_MSG unsigned long _sk_redir; #endif }; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) unsigned long _nfct; #endif unsigned int len, data_len; __u16 mac_len, hdr_len; /* Following fields are _not_ copied in __copy_skb_header() * Note that queue_mapping is here mostly to fill a hole. */ __u16 queue_mapping; /* if you move cloned around you also must adapt those constants */ #ifdef __BIG_ENDIAN_BITFIELD #define CLONED_MASK (1 << 7) #else #define CLONED_MASK 1 #endif #define CLONED_OFFSET offsetof(struct sk_buff, __cloned_offset) /* private: */ __u8 __cloned_offset[0]; /* public: */ __u8 cloned:1, nohdr:1, fclone:2, peeked:1, head_frag:1, pfmemalloc:1, pp_recycle:1; /* page_pool recycle indicator */ #ifdef CONFIG_SKB_EXTENSIONS __u8 active_extensions; #endif /* Fields enclosed in headers group are copied * using a single memcpy() in __copy_skb_header() */ struct_group(headers, /* private: */ __u8 __pkt_type_offset[0]; /* public: */ __u8 pkt_type:3; /* see PKT_TYPE_MAX */ __u8 ignore_df:1; __u8 dst_pending_confirm:1; __u8 ip_summed:2; __u8 ooo_okay:1; /* private: */ __u8 __mono_tc_offset[0]; /* public: */ __u8 tstamp_type:2; /* See skb_tstamp_type */ #ifdef CONFIG_NET_XGRESS __u8 tc_at_ingress:1; /* See TC_AT_INGRESS_MASK */ __u8 tc_skip_classify:1; #endif __u8 remcsum_offload:1; __u8 csum_complete_sw:1; __u8 csum_level:2; __u8 inner_protocol_type:1; __u8 l4_hash:1; __u8 sw_hash:1; #ifdef CONFIG_WIRELESS __u8 wifi_acked_valid:1; __u8 wifi_acked:1; #endif __u8 no_fcs:1; /* Indicates the inner headers are valid in the skbuff. */ __u8 encapsulation:1; __u8 encap_hdr_csum:1; __u8 csum_valid:1; #ifdef CONFIG_IPV6_NDISC_NODETYPE __u8 ndisc_nodetype:2; #endif #if IS_ENABLED(CONFIG_IP_VS) __u8 ipvs_property:1; #endif #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES) __u8 nf_trace:1; #endif #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; __u8 offload_l3_fwd_mark:1; #endif __u8 redirected:1; #ifdef CONFIG_NET_REDIRECT __u8 from_ingress:1; #endif #ifdef CONFIG_NETFILTER_SKIP_EGRESS __u8 nf_skip_egress:1; #endif #ifdef CONFIG_SKB_DECRYPTED __u8 decrypted:1; #endif __u8 slow_gro:1; #if IS_ENABLED(CONFIG_IP_SCTP) __u8 csum_not_inet:1; #endif __u8 unreadable:1; #if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS) __u16 tc_index; /* traffic control index */ #endif u16 alloc_cpu; union { __wsum csum; struct { __u16 csum_start; __u16 csum_offset; }; }; __u32 priority; int skb_iif; __u32 hash; union { u32 vlan_all; struct { __be16 vlan_proto; __u16 vlan_tci; }; }; #if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS) union { unsigned int napi_id; unsigned int sender_cpu; }; #endif #ifdef CONFIG_NETWORK_SECMARK __u32 secmark; #endif union { __u32 mark; __u32 reserved_tailroom; }; union { __be16 inner_protocol; __u8 inner_ipproto; }; __u16 inner_transport_header; __u16 inner_network_header; __u16 inner_mac_header; __be16 protocol; __u16 transport_header; __u16 network_header; __u16 mac_header; #ifdef CONFIG_KCOV u64 kcov_handle; #endif ); /* end headers group */ /* These elements must be at the end, see alloc_skb() for details. */ sk_buff_data_t tail; sk_buff_data_t end; unsigned char *head, *data; unsigned int truesize; refcount_t users; #ifdef CONFIG_SKB_EXTENSIONS /* only usable after checking ->active_extensions != 0 */ struct skb_ext *extensions; #endif }; /* if you move pkt_type around you also must adapt those constants */ #ifdef __BIG_ENDIAN_BITFIELD #define PKT_TYPE_MAX (7 << 5) #else #define PKT_TYPE_MAX 7 #endif #define PKT_TYPE_OFFSET offsetof(struct sk_buff, __pkt_type_offset) /* if you move tc_at_ingress or tstamp_type * around, you also must adapt these constants. */ #ifdef __BIG_ENDIAN_BITFIELD #define SKB_TSTAMP_TYPE_MASK (3 << 6) #define SKB_TSTAMP_TYPE_RSHIFT (6) #define TC_AT_INGRESS_MASK (1 << 5) #else #define SKB_TSTAMP_TYPE_MASK (3) #define TC_AT_INGRESS_MASK (1 << 2) #endif #define SKB_BF_MONO_TC_OFFSET offsetof(struct sk_buff, __mono_tc_offset) #ifdef __KERNEL__ /* * Handling routines are only of interest to the kernel */ #define SKB_ALLOC_FCLONE 0x01 #define SKB_ALLOC_RX 0x02 #define SKB_ALLOC_NAPI 0x04 /** * skb_pfmemalloc - Test if the skb was allocated from PFMEMALLOC reserves * @skb: buffer */ static inline bool skb_pfmemalloc(const struct sk_buff *skb) { return unlikely(skb->pfmemalloc); } /* * skb might have a dst pointer attached, refcounted or not. * _skb_refdst low order bit is set if refcount was _not_ taken */ #define SKB_DST_NOREF 1UL #define SKB_DST_PTRMASK ~(SKB_DST_NOREF) /** * skb_dst - returns skb dst_entry * @skb: buffer * * Returns: skb dst_entry, regardless of reference taken or not. */ static inline struct dst_entry *skb_dst(const struct sk_buff *skb) { /* If refdst was not refcounted, check we still are in a * rcu_read_lock section */ WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) && !rcu_read_lock_held() && !rcu_read_lock_bh_held()); return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK); } /** * skb_dst_set - sets skb dst * @skb: buffer * @dst: dst entry * * Sets skb dst, assuming a reference was taken on dst and should * be released by skb_dst_drop() */ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst) { skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst; } /** * skb_dst_set_noref - sets skb dst, hopefully, without taking reference * @skb: buffer * @dst: dst entry * * Sets skb dst, assuming a reference was not taken on dst. * If dst entry is cached, we do not take reference and dst_release * will be avoided by refdst_drop. If dst entry is not cached, we take * reference, so that last dst_release can destroy the dst immediately. */ static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst) { WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); skb->slow_gro |= !!dst; skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF; } /** * skb_dst_is_noref - Test if skb dst isn't refcounted * @skb: buffer */ static inline bool skb_dst_is_noref(const struct sk_buff *skb) { return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb); } /* For mangling skb->pkt_type from user space side from applications * such as nft, tc, etc, we only allow a conservative subset of * possible pkt_types to be set. */ static inline bool skb_pkt_type_ok(u32 ptype) { return ptype <= PACKET_OTHERHOST; } /** * skb_napi_id - Returns the skb's NAPI id * @skb: buffer */ static inline unsigned int skb_napi_id(const struct sk_buff *skb) { #ifdef CONFIG_NET_RX_BUSY_POLL return skb->napi_id; #else return 0; #endif } static inline bool skb_wifi_acked_valid(const struct sk_buff *skb) { #ifdef CONFIG_WIRELESS return skb->wifi_acked_valid; #else return 0; #endif } /** * skb_unref - decrement the skb's reference count * @skb: buffer * * Returns: true if we can free the skb. */ static inline bool skb_unref(struct sk_buff *skb) { if (unlikely(!skb)) return false; if (!IS_ENABLED(CONFIG_DEBUG_NET) && likely(refcount_read(&skb->users) == 1)) smp_rmb(); else if (likely(!refcount_dec_and_test(&skb->users))) return false; return true; } static inline bool skb_data_unref(const struct sk_buff *skb, struct skb_shared_info *shinfo) { int bias; if (!skb->cloned) return true; bias = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1; if (atomic_read(&shinfo->dataref) == bias) smp_rmb(); else if (atomic_sub_return(bias, &shinfo->dataref)) return false; return true; } void __fix_address sk_skb_reason_drop(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason reason); static inline void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) { sk_skb_reason_drop(NULL, skb, reason); } /** * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason * @skb: buffer to free */ static inline void kfree_skb(struct sk_buff *skb) { kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); } void skb_release_head_state(struct sk_buff *skb); void kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason); void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); void skb_tx_error(struct sk_buff *skb); static inline void kfree_skb_list(struct sk_buff *segs) { kfree_skb_list_reason(segs, SKB_DROP_REASON_NOT_SPECIFIED); } #ifdef CONFIG_TRACEPOINTS void consume_skb(struct sk_buff *skb); #else static inline void consume_skb(struct sk_buff *skb) { return kfree_skb(skb); } #endif void __consume_stateless_skb(struct sk_buff *skb); void __kfree_skb(struct sk_buff *skb); void kfree_skb_partial(struct sk_buff *skb, bool head_stolen); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize); struct sk_buff *__alloc_skb(unsigned int size, gfp_t priority, int flags, int node); struct sk_buff *__build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb(void *data, unsigned int frag_size); struct sk_buff *build_skb_around(struct sk_buff *skb, void *data, unsigned int frag_size); void skb_attempt_defer_free(struct sk_buff *skb); u32 napi_skb_cache_get_bulk(void **skbs, u32 n); struct sk_buff *napi_build_skb(void *data, unsigned int frag_size); struct sk_buff *slab_build_skb(void *data); /** * alloc_skb - allocate a network buffer * @size: size to allocate * @priority: allocation mask * * This function is a convenient wrapper around __alloc_skb(). */ static inline struct sk_buff *alloc_skb(unsigned int size, gfp_t priority) { return __alloc_skb(size, priority, 0, NUMA_NO_NODE); } struct sk_buff *alloc_skb_with_frags(unsigned long header_len, unsigned long data_len, int max_page_order, int *errcode, gfp_t gfp_mask); struct sk_buff *alloc_skb_for_msg(struct sk_buff *first); /* Layout of fast clones : [skb1][skb2][fclone_ref] */ struct sk_buff_fclones { struct sk_buff skb1; struct sk_buff skb2; refcount_t fclone_ref; }; /** * skb_fclone_busy - check if fclone is busy * @sk: socket * @skb: buffer * * Returns: true if skb is a fast clone, and its clone is not freed. * Some drivers call skb_orphan() in their ndo_start_xmit(), * so we also check that didn't happen. */ static inline bool skb_fclone_busy(const struct sock *sk, const struct sk_buff *skb) { const struct sk_buff_fclones *fclones; fclones = container_of(skb, struct sk_buff_fclones, skb1); return skb->fclone == SKB_FCLONE_ORIG && refcount_read(&fclones->fclone_ref) > 1 && READ_ONCE(fclones->skb2.sk) == sk; } /** * alloc_skb_fclone - allocate a network buffer from fclone cache * @size: size to allocate * @priority: allocation mask * * This function is a convenient wrapper around __alloc_skb(). */ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, gfp_t priority) { return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE); } struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); void skb_headers_offset_update(struct sk_buff *skb, int off); int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask); struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); void skb_copy_header(struct sk_buff *new, const struct sk_buff *old); struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority); struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, gfp_t gfp_mask, bool fclone); static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask) { return __pskb_copy_fclone(skb, headroom, gfp_mask, false); } int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask); struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom); struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom); struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t priority); int __must_check skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, int offset, int len); int __must_check skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len); int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error); /** * skb_pad - zero pad the tail of an skb * @skb: buffer to pad * @pad: space to pad * * Ensure that a buffer is followed by a padding area that is zero * filled. Used by network drivers which may DMA or transfer data * beyond the buffer end onto the wire. * * May return error in out of memory cases. The skb is freed on error. */ static inline int skb_pad(struct sk_buff *skb, int pad) { return __skb_pad(skb, pad, true); } #define dev_kfree_skb(a) consume_skb(a) int skb_append_pagefrags(struct sk_buff *skb, struct page *page, int offset, size_t size, size_t max_frags); struct skb_seq_state { __u32 lower_offset; __u32 upper_offset; __u32 frag_idx; __u32 stepped_offset; struct sk_buff *root_skb; struct sk_buff *cur_skb; __u8 *frag_data; __u32 frag_off; }; void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, unsigned int to, struct skb_seq_state *st); unsigned int skb_seq_read(unsigned int consumed, const u8 **data, struct skb_seq_state *st); void skb_abort_seq_read(struct skb_seq_state *st); int skb_copy_seq_read(struct skb_seq_state *st, int offset, void *to, int len); unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config); /* * Packet hash types specify the type of hash in skb_set_hash. * * Hash types refer to the protocol layer addresses which are used to * construct a packet's hash. The hashes are used to differentiate or identify * flows of the protocol layer for the hash type. Hash types are either * layer-2 (L2), layer-3 (L3), or layer-4 (L4). * * Properties of hashes: * * 1) Two packets in different flows have different hash values * 2) Two packets in the same flow should have the same hash value * * A hash at a higher layer is considered to be more specific. A driver should * set the most specific hash possible. * * A driver cannot indicate a more specific hash than the layer at which a hash * was computed. For instance an L3 hash cannot be set as an L4 hash. * * A driver may indicate a hash level which is less specific than the * actual layer the hash was computed on. For instance, a hash computed * at L4 may be considered an L3 hash. This should only be done if the * driver can't unambiguously determine that the HW computed the hash at * the higher layer. Note that the "should" in the second property above * permits this. */ enum pkt_hash_types { PKT_HASH_TYPE_NONE, /* Undefined type */ PKT_HASH_TYPE_L2, /* Input: src_MAC, dest_MAC */ PKT_HASH_TYPE_L3, /* Input: src_IP, dst_IP */ PKT_HASH_TYPE_L4, /* Input: src_IP, dst_IP, src_port, dst_port */ }; static inline void skb_clear_hash(struct sk_buff *skb) { skb->hash = 0; skb->sw_hash = 0; skb->l4_hash = 0; } static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb) { if (!skb->l4_hash) skb_clear_hash(skb); } static inline void __skb_set_hash(struct sk_buff *skb, __u32 hash, bool is_sw, bool is_l4) { skb->l4_hash = is_l4; skb->sw_hash = is_sw; skb->hash = hash; } static inline void skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type) { /* Used by drivers to set hash from HW */ __skb_set_hash(skb, hash, false, type == PKT_HASH_TYPE_L4); } static inline void __skb_set_sw_hash(struct sk_buff *skb, __u32 hash, bool is_l4) { __skb_set_hash(skb, hash, true, is_l4); } u32 __skb_get_hash_symmetric_net(const struct net *net, const struct sk_buff *skb); static inline u32 __skb_get_hash_symmetric(const struct sk_buff *skb) { return __skb_get_hash_symmetric_net(NULL, skb); } void __skb_get_hash_net(const struct net *net, struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); u32 __skb_get_poff(const struct sk_buff *skb, const void *data, const struct flow_keys_basic *keys, int hlen); __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, const void *data, int hlen_proto); void skb_flow_dissector_init(struct flow_dissector *flow_dissector, const struct flow_dissector_key *key, unsigned int key_count); struct bpf_flow_dissector; u32 bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, __be16 proto, int nhoff, int hlen, unsigned int flags); bool __skb_flow_dissect(const struct net *net, const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, const void *data, __be16 proto, int nhoff, int hlen, unsigned int flags); static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, unsigned int flags) { return __skb_flow_dissect(NULL, skb, flow_dissector, target_container, NULL, 0, 0, 0, flags); } static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, struct flow_keys *flow, unsigned int flags) { memset(flow, 0, sizeof(*flow)); return __skb_flow_dissect(NULL, skb, &flow_keys_dissector, flow, NULL, 0, 0, 0, flags); } static inline bool skb_flow_dissect_flow_keys_basic(const struct net *net, const struct sk_buff *skb, struct flow_keys_basic *flow, const void *data, __be16 proto, int nhoff, int hlen, unsigned int flags) { memset(flow, 0, sizeof(*flow)); return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow, data, proto, nhoff, hlen, flags); } void skb_flow_dissect_meta(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container); /* Gets a skb connection tracking info, ctinfo map should be a * map of mapsize to translate enum ip_conntrack_info states * to user states. */ void skb_flow_dissect_ct(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, u16 *ctinfo_map, size_t mapsize, bool post_ct, u16 zone); void skb_flow_dissect_tunnel_info(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container); void skb_flow_dissect_hash(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container); static inline __u32 skb_get_hash_net(const struct net *net, struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) __skb_get_hash_net(net, skb); return skb->hash; } static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) __skb_get_hash_net(NULL, skb); return skb->hash; } static inline __u32 skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6) { if (!skb->l4_hash && !skb->sw_hash) { struct flow_keys keys; __u32 hash = __get_hash_from_flowi6(fl6, &keys); __skb_set_sw_hash(skb, hash, flow_keys_have_l4(&keys)); } return skb->hash; } __u32 skb_get_hash_perturb(const struct sk_buff *skb, const siphash_key_t *perturb); static inline __u32 skb_get_hash_raw(const struct sk_buff *skb) { return skb->hash; } static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from) { to->hash = from->hash; to->sw_hash = from->sw_hash; to->l4_hash = from->l4_hash; }; static inline int skb_cmp_decrypted(const struct sk_buff *skb1, const struct sk_buff *skb2) { #ifdef CONFIG_SKB_DECRYPTED return skb2->decrypted - skb1->decrypted; #else return 0; #endif } static inline bool skb_is_decrypted(const struct sk_buff *skb) { #ifdef CONFIG_SKB_DECRYPTED return skb->decrypted; #else return false; #endif } static inline void skb_copy_decrypted(struct sk_buff *to, const struct sk_buff *from) { #ifdef CONFIG_SKB_DECRYPTED to->decrypted = from->decrypted; #endif } #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { return skb->head + skb->end; } static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end; } static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) { skb->end = offset; } #else static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { return skb->end; } static inline unsigned int skb_end_offset(const struct sk_buff *skb) { return skb->end - skb->head; } static inline void skb_set_end_offset(struct sk_buff *skb, unsigned int offset) { skb->end = skb->head + offset; } #endif extern const struct ubuf_info_ops msg_zerocopy_ubuf_ops; struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, struct ubuf_info *uarg, bool devmem); void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref); struct net_devmem_dmabuf_binding; int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, struct sk_buff *skb, struct iov_iter *from, size_t length, struct net_devmem_dmabuf_binding *binding); int zerocopy_fill_skb_from_iter(struct sk_buff *skb, struct iov_iter *from, size_t length); static inline int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len) { return __zerocopy_sg_from_iter(msg, skb->sk, skb, &msg->msg_iter, len, NULL); } int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, struct msghdr *msg, int len, struct ubuf_info *uarg, struct net_devmem_dmabuf_binding *binding); /* Internal */ #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB))) static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) { return &skb_shinfo(skb)->hwtstamps; } static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) { bool is_zcopy = skb && skb_shinfo(skb)->flags & SKBFL_ZEROCOPY_ENABLE; return is_zcopy ? skb_uarg(skb) : NULL; } static inline bool skb_zcopy_pure(const struct sk_buff *skb) { return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY; } static inline bool skb_zcopy_managed(const struct sk_buff *skb) { return skb_shinfo(skb)->flags & SKBFL_MANAGED_FRAG_REFS; } static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1, const struct sk_buff *skb2) { return skb_zcopy_pure(skb1) == skb_zcopy_pure(skb2); } static inline void net_zcopy_get(struct ubuf_info *uarg) { refcount_inc(&uarg->refcnt); } static inline void skb_zcopy_init(struct sk_buff *skb, struct ubuf_info *uarg) { skb_shinfo(skb)->destructor_arg = uarg; skb_shinfo(skb)->flags |= uarg->flags; } static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg, bool *have_ref) { if (skb && uarg && !skb_zcopy(skb)) { if (unlikely(have_ref && *have_ref)) *have_ref = false; else net_zcopy_get(uarg); skb_zcopy_init(skb, uarg); } } static inline void skb_zcopy_set_nouarg(struct sk_buff *skb, void *val) { skb_shinfo(skb)->destructor_arg = (void *)((uintptr_t) val | 0x1UL); skb_shinfo(skb)->flags |= SKBFL_ZEROCOPY_FRAG; } static inline bool skb_zcopy_is_nouarg(struct sk_buff *skb) { return (uintptr_t) skb_shinfo(skb)->destructor_arg & 0x1UL; } static inline void *skb_zcopy_get_nouarg(struct sk_buff *skb) { return (void *)((uintptr_t) skb_shinfo(skb)->destructor_arg & ~0x1UL); } static inline void net_zcopy_put(struct ubuf_info *uarg) { if (uarg) uarg->ops->complete(NULL, uarg, true); } static inline void net_zcopy_put_abort(struct ubuf_info *uarg, bool have_uref) { if (uarg) { if (uarg->ops == &msg_zerocopy_ubuf_ops) msg_zerocopy_put_abort(uarg, have_uref); else if (have_uref) net_zcopy_put(uarg); } } /* Release a reference on a zerocopy structure */ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success) { struct ubuf_info *uarg = skb_zcopy(skb); if (uarg) { if (!skb_zcopy_is_nouarg(skb)) uarg->ops->complete(skb, uarg, zerocopy_success); skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY; } } void __skb_zcopy_downgrade_managed(struct sk_buff *skb); static inline void skb_zcopy_downgrade_managed(struct sk_buff *skb) { if (unlikely(skb_zcopy_managed(skb))) __skb_zcopy_downgrade_managed(skb); } /* Return true if frags in this skb are readable by the host. */ static inline bool skb_frags_readable(const struct sk_buff *skb) { return !skb->unreadable; } static inline void skb_mark_not_on_list(struct sk_buff *skb) { skb->next = NULL; } static inline void skb_poison_list(struct sk_buff *skb) { #ifdef CONFIG_DEBUG_NET skb->next = SKB_LIST_POISON_NEXT; #endif } /* Iterate through singly-linked GSO fragments of an skb. */ #define skb_list_walk_safe(first, skb, next_skb) \ for ((skb) = (first), (next_skb) = (skb) ? (skb)->next : NULL; (skb); \ (skb) = (next_skb), (next_skb) = (skb) ? (skb)->next : NULL) static inline void skb_list_del_init(struct sk_buff *skb) { __list_del_entry(&skb->list); skb_mark_not_on_list(skb); } /** * skb_queue_empty - check if a queue is empty * @list: queue head * * Returns true if the queue is empty, false otherwise. */ static inline int skb_queue_empty(const struct sk_buff_head *list) { return list->next == (const struct sk_buff *) list; } /** * skb_queue_empty_lockless - check if a queue is empty * @list: queue head * * Returns true if the queue is empty, false otherwise. * This variant can be used in lockless contexts. */ static inline bool skb_queue_empty_lockless(const struct sk_buff_head *list) { return READ_ONCE(list->next) == (const struct sk_buff *) list; } /** * skb_queue_is_last - check if skb is the last entry in the queue * @list: queue head * @skb: buffer * * Returns true if @skb is the last buffer on the list. */ static inline bool skb_queue_is_last(const struct sk_buff_head *list, const struct sk_buff *skb) { return skb->next == (const struct sk_buff *) list; } /** * skb_queue_is_first - check if skb is the first entry in the queue * @list: queue head * @skb: buffer * * Returns true if @skb is the first buffer on the list. */ static inline bool skb_queue_is_first(const struct sk_buff_head *list, const struct sk_buff *skb) { return skb->prev == (const struct sk_buff *) list; } /** * skb_queue_next - return the next packet in the queue * @list: queue head * @skb: current buffer * * Return the next packet in @list after @skb. It is only valid to * call this if skb_queue_is_last() evaluates to false. */ static inline struct sk_buff *skb_queue_next(const struct sk_buff_head *list, const struct sk_buff *skb) { /* This BUG_ON may seem severe, but if we just return then we * are going to dereference garbage. */ BUG_ON(skb_queue_is_last(list, skb)); return skb->next; } /** * skb_queue_prev - return the prev packet in the queue * @list: queue head * @skb: current buffer * * Return the prev packet in @list before @skb. It is only valid to * call this if skb_queue_is_first() evaluates to false. */ static inline struct sk_buff *skb_queue_prev(const struct sk_buff_head *list, const struct sk_buff *skb) { /* This BUG_ON may seem severe, but if we just return then we * are going to dereference garbage. */ BUG_ON(skb_queue_is_first(list, skb)); return skb->prev; } /** * skb_get - reference buffer * @skb: buffer to reference * * Makes another reference to a socket buffer and returns a pointer * to the buffer. */ static inline struct sk_buff *skb_get(struct sk_buff *skb) { refcount_inc(&skb->users); return skb; } /* * If users == 1, we are the only owner and can avoid redundant atomic changes. */ /** * skb_cloned - is the buffer a clone * @skb: buffer to check * * Returns true if the buffer was generated with skb_clone() and is * one of multiple shared copies of the buffer. Cloned buffers are * shared data so must not be written to under normal circumstances. */ static inline int skb_cloned(const struct sk_buff *skb) { return skb->cloned && (atomic_read(&skb_shinfo(skb)->dataref) & SKB_DATAREF_MASK) != 1; } static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_cloned(skb)) return pskb_expand_head(skb, 0, 0, pri); return 0; } /* This variant of skb_unclone() makes sure skb->truesize * and skb_end_offset() are not changed, whenever a new skb->head is needed. * * Indeed there is no guarantee that ksize(kmalloc(X)) == ksize(kmalloc(X)) * when various debugging features are in place. */ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri); static inline int skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_cloned(skb)) return __skb_unclone_keeptruesize(skb, pri); return 0; } /** * skb_header_cloned - is the header a clone * @skb: buffer to check * * Returns true if modifying the header part of the buffer requires * the data to be copied. */ static inline int skb_header_cloned(const struct sk_buff *skb) { int dataref; if (!skb->cloned) return 0; dataref = atomic_read(&skb_shinfo(skb)->dataref); dataref = (dataref & SKB_DATAREF_MASK) - (dataref >> SKB_DATAREF_SHIFT); return dataref != 1; } static inline int skb_header_unclone(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_header_cloned(skb)) return pskb_expand_head(skb, 0, 0, pri); return 0; } /** * __skb_header_release() - allow clones to use the headroom * @skb: buffer to operate on * * See "DOC: dataref and headerless skbs". */ static inline void __skb_header_release(struct sk_buff *skb) { skb->nohdr = 1; atomic_set(&skb_shinfo(skb)->dataref, 1 + (1 << SKB_DATAREF_SHIFT)); } /** * skb_shared - is the buffer shared * @skb: buffer to check * * Returns true if more than one person has a reference to this * buffer. */ static inline int skb_shared(const struct sk_buff *skb) { return refcount_read(&skb->users) != 1; } /** * skb_share_check - check if buffer is shared and if so clone it * @skb: buffer to check * @pri: priority for memory allocation * * If the buffer is shared the buffer is cloned and the old copy * drops a reference. A new clone with a single reference is returned. * If the buffer is not shared the original buffer is returned. When * being called from interrupt status or with spinlocks held pri must * be GFP_ATOMIC. * * NULL is returned on a memory allocation failure. */ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_shared(skb)) { struct sk_buff *nskb = skb_clone(skb, pri); if (likely(nskb)) consume_skb(skb); else kfree_skb(skb); skb = nskb; } return skb; } /* * Copy shared buffers into a new sk_buff. We effectively do COW on * packets to handle cases where we have a local reader and forward * and a couple of other messy ones. The normal one is tcpdumping * a packet that's being forwarded. */ /** * skb_unshare - make a copy of a shared buffer * @skb: buffer to check * @pri: priority for memory allocation * * If the socket buffer is a clone then this function creates a new * copy of the data, drops a reference count on the old copy and returns * the new copy with the reference count at 1. If the buffer is not a clone * the original buffer is returned. When called with a spinlock held or * from interrupt state @pri must be %GFP_ATOMIC * * %NULL is returned on a memory allocation failure. */ static inline struct sk_buff *skb_unshare(struct sk_buff *skb, gfp_t pri) { might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, pri); /* Free our shared copy */ if (likely(nskb)) consume_skb(skb); else kfree_skb(skb); skb = nskb; } return skb; } /** * skb_peek - peek at the head of an &sk_buff_head * @list_: list to peek at * * Peek an &sk_buff. Unlike most other operations you _MUST_ * be careful with this one. A peek leaves the buffer on the * list and someone else may run off with it. You must hold * the appropriate locks or have a private queue to do this. * * Returns %NULL for an empty list or a pointer to the head element. * The reference count is not incremented and the reference is therefore * volatile. Use with caution. */ static inline struct sk_buff *skb_peek(const struct sk_buff_head *list_) { struct sk_buff *skb = list_->next; if (skb == (struct sk_buff *)list_) skb = NULL; return skb; } /** * __skb_peek - peek at the head of a non-empty &sk_buff_head * @list_: list to peek at * * Like skb_peek(), but the caller knows that the list is not empty. */ static inline struct sk_buff *__skb_peek(const struct sk_buff_head *list_) { return list_->next; } /** * skb_peek_next - peek skb following the given one from a queue * @skb: skb to start from * @list_: list to peek at * * Returns %NULL when the end of the list is met or a pointer to the * next element. The reference count is not incremented and the * reference is therefore volatile. Use with caution. */ static inline struct sk_buff *skb_peek_next(struct sk_buff *skb, const struct sk_buff_head *list_) { struct sk_buff *next = skb->next; if (next == (struct sk_buff *)list_) next = NULL; return next; } /** * skb_peek_tail - peek at the tail of an &sk_buff_head * @list_: list to peek at * * Peek an &sk_buff. Unlike most other operations you _MUST_ * be careful with this one. A peek leaves the buffer on the * list and someone else may run off with it. You must hold * the appropriate locks or have a private queue to do this. * * Returns %NULL for an empty list or a pointer to the tail element. * The reference count is not incremented and the reference is therefore * volatile. Use with caution. */ static inline struct sk_buff *skb_peek_tail(const struct sk_buff_head *list_) { struct sk_buff *skb = READ_ONCE(list_->prev); if (skb == (struct sk_buff *)list_) skb = NULL; return skb; } /** * skb_queue_len - get queue length * @list_: list to measure * * Return the length of an &sk_buff queue. */ static inline __u32 skb_queue_len(const struct sk_buff_head *list_) { return list_->qlen; } /** * skb_queue_len_lockless - get queue length * @list_: list to measure * * Return the length of an &sk_buff queue. * This variant can be used in lockless contexts. */ static inline __u32 skb_queue_len_lockless(const struct sk_buff_head *list_) { return READ_ONCE(list_->qlen); } /** * __skb_queue_head_init - initialize non-spinlock portions of sk_buff_head * @list: queue to initialize * * This initializes only the list and queue length aspects of * an sk_buff_head object. This allows to initialize the list * aspects of an sk_buff_head without reinitializing things like * the spinlock. It can also be used for on-stack sk_buff_head * objects where the spinlock is known to not be used. */ static inline void __skb_queue_head_init(struct sk_buff_head *list) { list->prev = list->next = (struct sk_buff *)list; list->qlen = 0; } /* * This function creates a split out lock class for each invocation; * this is needed for now since a whole lot of users of the skb-queue * infrastructure in drivers have different locking usage (in hardirq) * than the networking core (in softirq only). In the long run either the * network layer or drivers should need annotation to consolidate the * main types of usage into 3 classes. */ static inline void skb_queue_head_init(struct sk_buff_head *list) { spin_lock_init(&list->lock); __skb_queue_head_init(list); } static inline void skb_queue_head_init_class(struct sk_buff_head *list, struct lock_class_key *class) { skb_queue_head_init(list); lockdep_set_class(&list->lock, class); } /* * Insert an sk_buff on a list. * * The "__skb_xxxx()" functions are the non-atomic ones that * can only be called with interrupts disabled. */ static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) { /* See skb_queue_empty_lockless() and skb_peek_tail() * for the opposite READ_ONCE() */ WRITE_ONCE(newsk->next, next); WRITE_ONCE(newsk->prev, prev); WRITE_ONCE(((struct sk_buff_list *)next)->prev, newsk); WRITE_ONCE(((struct sk_buff_list *)prev)->next, newsk); WRITE_ONCE(list->qlen, list->qlen + 1); } static inline void __skb_queue_splice(const struct sk_buff_head *list, struct sk_buff *prev, struct sk_buff *next) { struct sk_buff *first = list->next; struct sk_buff *last = list->prev; WRITE_ONCE(first->prev, prev); WRITE_ONCE(prev->next, first); WRITE_ONCE(last->next, next); WRITE_ONCE(next->prev, last); } /** * skb_queue_splice - join two skb lists, this is designed for stacks * @list: the new list to add * @head: the place to add it in the first list */ static inline void skb_queue_splice(const struct sk_buff_head *list, struct sk_buff_head *head) { if (!skb_queue_empty(list)) { __skb_queue_splice(list, (struct sk_buff *) head, head->next); head->qlen += list->qlen; } } /** * skb_queue_splice_init - join two skb lists and reinitialise the emptied list * @list: the new list to add * @head: the place to add it in the first list * * The list at @list is reinitialised */ static inline void skb_queue_splice_init(struct sk_buff_head *list, struct sk_buff_head *head) { if (!skb_queue_empty(list)) { __skb_queue_splice(list, (struct sk_buff *) head, head->next); head->qlen += list->qlen; __skb_queue_head_init(list); } } /** * skb_queue_splice_tail - join two skb lists, each list being a queue * @list: the new list to add * @head: the place to add it in the first list */ static inline void skb_queue_splice_tail(const struct sk_buff_head *list, struct sk_buff_head *head) { if (!skb_queue_empty(list)) { __skb_queue_splice(list, head->prev, (struct sk_buff *) head); head->qlen += list->qlen; } } /** * skb_queue_splice_tail_init - join two skb lists and reinitialise the emptied list * @list: the new list to add * @head: the place to add it in the first list * * Each of the lists is a queue. * The list at @list is reinitialised */ static inline void skb_queue_splice_tail_init(struct sk_buff_head *list, struct sk_buff_head *head) { if (!skb_queue_empty(list)) { __skb_queue_splice(list, head->prev, (struct sk_buff *) head); head->qlen += list->qlen; __skb_queue_head_init(list); } } /** * __skb_queue_after - queue a buffer at the list head * @list: list to use * @prev: place after this buffer * @newsk: buffer to queue * * Queue a buffer int the middle of a list. This function takes no locks * and you must therefore hold required locks before calling it. * * A buffer cannot be placed on two lists at the same time. */ static inline void __skb_queue_after(struct sk_buff_head *list, struct sk_buff *prev, struct sk_buff *newsk) { __skb_insert(newsk, prev, ((struct sk_buff_list *)prev)->next, list); } void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); static inline void __skb_queue_before(struct sk_buff_head *list, struct sk_buff *next, struct sk_buff *newsk) { __skb_insert(newsk, ((struct sk_buff_list *)next)->prev, next, list); } /** * __skb_queue_head - queue a buffer at the list head * @list: list to use * @newsk: buffer to queue * * Queue a buffer at the start of a list. This function takes no locks * and you must therefore hold required locks before calling it. * * A buffer cannot be placed on two lists at the same time. */ static inline void __skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) { __skb_queue_after(list, (struct sk_buff *)list, newsk); } void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); /** * __skb_queue_tail - queue a buffer at the list tail * @list: list to use * @newsk: buffer to queue * * Queue a buffer at the end of a list. This function takes no locks * and you must therefore hold required locks before calling it. * * A buffer cannot be placed on two lists at the same time. */ static inline void __skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) { __skb_queue_before(list, (struct sk_buff *)list, newsk); } void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); /* * remove sk_buff from list. _Must_ be called atomically, and with * the list known.. */ void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list); static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { struct sk_buff *next, *prev; WRITE_ONCE(list->qlen, list->qlen - 1); next = skb->next; prev = skb->prev; skb->next = skb->prev = NULL; WRITE_ONCE(next->prev, prev); WRITE_ONCE(prev->next, next); } /** * __skb_dequeue - remove from the head of the queue * @list: list to dequeue from * * Remove the head of the list. This function does not take any locks * so must be used with appropriate locks held only. The head item is * returned or %NULL if the list is empty. */ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek(list); if (skb) __skb_unlink(skb, list); return skb; } struct sk_buff *skb_dequeue(struct sk_buff_head *list); /** * __skb_dequeue_tail - remove from the tail of the queue * @list: list to dequeue from * * Remove the tail of the list. This function does not take any locks * so must be used with appropriate locks held only. The tail item is * returned or %NULL if the list is empty. */ static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) { struct sk_buff *skb = skb_peek_tail(list); if (skb) __skb_unlink(skb, list); return skb; } struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); static inline bool skb_is_nonlinear(const struct sk_buff *skb) { return skb->data_len; } static inline unsigned int skb_headlen(const struct sk_buff *skb) { return skb->len - skb->data_len; } static inline unsigned int __skb_pagelen(const struct sk_buff *skb) { unsigned int i, len = 0; for (i = skb_shinfo(skb)->nr_frags - 1; (int)i >= 0; i--) len += skb_frag_size(&skb_shinfo(skb)->frags[i]); return len; } static inline unsigned int skb_pagelen(const struct sk_buff *skb) { return skb_headlen(skb) + __skb_pagelen(skb); } static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag, netmem_ref netmem, int off, int size) { frag->netmem = netmem; frag->offset = off; skb_frag_size_set(frag, size); } static inline void skb_frag_fill_page_desc(skb_frag_t *frag, struct page *page, int off, int size) { skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size); } static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo, int i, netmem_ref netmem, int off, int size) { skb_frag_t *frag = &shinfo->frags[i]; skb_frag_fill_netmem_desc(frag, netmem, off, size); } static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, int i, struct page *page, int off, int size) { __skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off, size); } /** * skb_len_add - adds a number to len fields of skb * @skb: buffer to add len to * @delta: number of bytes to add */ static inline void skb_len_add(struct sk_buff *skb, int delta) { skb->len += delta; skb->data_len += delta; skb->truesize += delta; } /** * __skb_fill_netmem_desc - initialise a fragment in an skb * @skb: buffer containing fragment to be initialised * @i: fragment index to initialise * @netmem: the netmem to use for this fragment * @off: the offset to the data with @page * @size: the length of the data * * Initialises the @i'th fragment of @skb to point to &size bytes at * offset @off within @page. * * Does not take any additional reference on the fragment. */ static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, netmem_ref netmem, int off, int size) { struct page *page; __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size); if (netmem_is_net_iov(netmem)) { skb->unreadable = true; return; } page = netmem_to_page(netmem); /* Propagate page pfmemalloc to the skb if we can. The problem is * that not all callers have unique ownership of the page but rely * on page_is_pfmemalloc doing the right thing(tm). */ page = compound_head(page); if (page_is_pfmemalloc(page)) skb->pfmemalloc = true; } static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size) { __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); } static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i, netmem_ref netmem, int off, int size) { __skb_fill_netmem_desc(skb, i, netmem, off, size); skb_shinfo(skb)->nr_frags = i + 1; } /** * skb_fill_page_desc - initialise a paged fragment in an skb * @skb: buffer containing fragment to be initialised * @i: paged fragment index to initialise * @page: the page to use for this fragment * @off: the offset to the data with @page * @size: the length of the data * * As per __skb_fill_page_desc() -- initialises the @i'th fragment of * @skb to point to @size bytes at offset @off within @page. In * addition updates @skb such that @i is the last fragment. * * Does not take any additional reference on the fragment. */ static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size) { skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); } /** * skb_fill_page_desc_noacc - initialise a paged fragment in an skb * @skb: buffer containing fragment to be initialised * @i: paged fragment index to initialise * @page: the page to use for this fragment * @off: the offset to the data with @page * @size: the length of the data * * Variant of skb_fill_page_desc() which does not deal with * pfmemalloc, if page is not owned by us. */ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, struct page *page, int off, int size) { struct skb_shared_info *shinfo = skb_shinfo(skb); __skb_fill_page_desc_noacc(shinfo, i, page, off, size); shinfo->nr_frags = i + 1; } void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, int off, int size, unsigned int truesize); static inline void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size, unsigned int truesize) { skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size, truesize); } void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize); #define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) { return skb->head + skb->tail; } static inline void skb_reset_tail_pointer(struct sk_buff *skb) { skb->tail = skb->data - skb->head; } static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) { skb_reset_tail_pointer(skb); skb->tail += offset; } #else /* NET_SKBUFF_DATA_USES_OFFSET */ static inline unsigned char *skb_tail_pointer(const struct sk_buff *skb) { return skb->tail; } static inline void skb_reset_tail_pointer(struct sk_buff *skb) { skb->tail = skb->data; } static inline void skb_set_tail_pointer(struct sk_buff *skb, const int offset) { skb->tail = skb->data + offset; } #endif /* NET_SKBUFF_DATA_USES_OFFSET */ static inline void skb_assert_len(struct sk_buff *skb) { #ifdef CONFIG_DEBUG_NET if (WARN_ONCE(!skb->len, "%s\n", __func__)) DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false); #endif /* CONFIG_DEBUG_NET */ } #if defined(CONFIG_FAIL_SKB_REALLOC) void skb_might_realloc(struct sk_buff *skb); #else static inline void skb_might_realloc(struct sk_buff *skb) {} #endif /* * Add data to an sk_buff */ void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len); void *skb_put(struct sk_buff *skb, unsigned int len); static inline void *__skb_put(struct sk_buff *skb, unsigned int len) { void *tmp = skb_tail_pointer(skb); SKB_LINEAR_ASSERT(skb); skb->tail += len; skb->len += len; return tmp; } static inline void *__skb_put_zero(struct sk_buff *skb, unsigned int len) { void *tmp = __skb_put(skb, len); memset(tmp, 0, len); return tmp; } static inline void *__skb_put_data(struct sk_buff *skb, const void *data, unsigned int len) { void *tmp = __skb_put(skb, len); memcpy(tmp, data, len); return tmp; } static inline void __skb_put_u8(struct sk_buff *skb, u8 val) { *(u8 *)__skb_put(skb, 1) = val; } static inline void *skb_put_zero(struct sk_buff *skb, unsigned int len) { void *tmp = skb_put(skb, len); memset(tmp, 0, len); return tmp; } static inline void *skb_put_data(struct sk_buff *skb, const void *data, unsigned int len) { void *tmp = skb_put(skb, len); memcpy(tmp, data, len); return tmp; } static inline void skb_put_u8(struct sk_buff *skb, u8 val) { *(u8 *)skb_put(skb, 1) = val; } void *skb_push(struct sk_buff *skb, unsigned int len); static inline void *__skb_push(struct sk_buff *skb, unsigned int len) { DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); skb->data -= len; skb->len += len; return skb->data; } void *skb_pull(struct sk_buff *skb, unsigned int len); static inline void *__skb_pull(struct sk_buff *skb, unsigned int len) { DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); skb->len -= len; if (unlikely(skb->len < skb->data_len)) { #if defined(CONFIG_DEBUG_NET) skb->len += len; pr_err("__skb_pull(len=%u)\n", len); skb_dump(KERN_ERR, skb, false); #endif BUG(); } return skb->data += len; } static inline void *skb_pull_inline(struct sk_buff *skb, unsigned int len) { return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); } void *skb_pull_data(struct sk_buff *skb, size_t len); void *__pskb_pull_tail(struct sk_buff *skb, int delta); static inline enum skb_drop_reason pskb_may_pull_reason(struct sk_buff *skb, unsigned int len) { DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); skb_might_realloc(skb); if (likely(len <= skb_headlen(skb))) return SKB_NOT_DROPPED_YET; if (unlikely(len > skb->len)) return SKB_DROP_REASON_PKT_TOO_SMALL; if (unlikely(!__pskb_pull_tail(skb, len - skb_headlen(skb)))) return SKB_DROP_REASON_NOMEM; return SKB_NOT_DROPPED_YET; } static inline bool pskb_may_pull(struct sk_buff *skb, unsigned int len) { return pskb_may_pull_reason(skb, len) == SKB_NOT_DROPPED_YET; } static inline void *pskb_pull(struct sk_buff *skb, unsigned int len) { if (!pskb_may_pull(skb, len)) return NULL; skb->len -= len; return skb->data += len; } void skb_condense(struct sk_buff *skb); /** * skb_headroom - bytes at buffer head * @skb: buffer to check * * Return the number of bytes of free space at the head of an &sk_buff. */ static inline unsigned int skb_headroom(const struct sk_buff *skb) { return skb->data - skb->head; } /** * skb_tailroom - bytes at buffer end * @skb: buffer to check * * Return the number of bytes of free space at the tail of an sk_buff */ static inline int skb_tailroom(const struct sk_buff *skb) { return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail; } /** * skb_availroom - bytes at buffer end * @skb: buffer to check * * Return the number of bytes of free space at the tail of an sk_buff * allocated by sk_stream_alloc() */ static inline int skb_availroom(const struct sk_buff *skb) { if (skb_is_nonlinear(skb)) return 0; return skb->end - skb->tail - skb->reserved_tailroom; } /** * skb_reserve - adjust headroom * @skb: buffer to alter * @len: bytes to move * * Increase the headroom of an empty &sk_buff by reducing the tail * room. This is only allowed for an empty buffer. */ static inline void skb_reserve(struct sk_buff *skb, int len) { skb->data += len; skb->tail += len; } /** * skb_tailroom_reserve - adjust reserved_tailroom * @skb: buffer to alter * @mtu: maximum amount of headlen permitted * @needed_tailroom: minimum amount of reserved_tailroom * * Set reserved_tailroom so that headlen can be as large as possible but * not larger than mtu and tailroom cannot be smaller than * needed_tailroom. * The required headroom should already have been reserved before using * this function. */ static inline void skb_tailroom_reserve(struct sk_buff *skb, unsigned int mtu, unsigned int needed_tailroom) { SKB_LINEAR_ASSERT(skb); if (mtu < skb_tailroom(skb) - needed_tailroom) /* use at most mtu */ skb->reserved_tailroom = skb_tailroom(skb) - mtu; else /* use up to all available space */ skb->reserved_tailroom = needed_tailroom; } #define ENCAP_TYPE_ETHER 0 #define ENCAP_TYPE_IPPROTO 1 static inline void skb_set_inner_protocol(struct sk_buff *skb, __be16 protocol) { skb->inner_protocol = protocol; skb->inner_protocol_type = ENCAP_TYPE_ETHER; } static inline void skb_set_inner_ipproto(struct sk_buff *skb, __u8 ipproto) { skb->inner_ipproto = ipproto; skb->inner_protocol_type = ENCAP_TYPE_IPPROTO; } static inline void skb_reset_inner_headers(struct sk_buff *skb) { skb->inner_mac_header = skb->mac_header; skb->inner_network_header = skb->network_header; skb->inner_transport_header = skb->transport_header; } static inline int skb_mac_header_was_set(const struct sk_buff *skb) { return skb->mac_header != (typeof(skb->mac_header))~0U; } static inline void skb_reset_mac_len(struct sk_buff *skb) { if (!skb_mac_header_was_set(skb)) { DEBUG_NET_WARN_ON_ONCE(1); skb->mac_len = 0; } else { skb->mac_len = skb->network_header - skb->mac_header; } } static inline unsigned char *skb_inner_transport_header(const struct sk_buff *skb) { return skb->head + skb->inner_transport_header; } static inline int skb_inner_transport_offset(const struct sk_buff *skb) { return skb_inner_transport_header(skb) - skb->data; } static inline void skb_reset_inner_transport_header(struct sk_buff *skb) { long offset = skb->data - skb->head; DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_transport_header))offset); skb->inner_transport_header = offset; } static inline void skb_set_inner_transport_header(struct sk_buff *skb, const int offset) { skb_reset_inner_transport_header(skb); skb->inner_transport_header += offset; } static inline unsigned char *skb_inner_network_header(const struct sk_buff *skb) { return skb->head + skb->inner_network_header; } static inline void skb_reset_inner_network_header(struct sk_buff *skb) { long offset = skb->data - skb->head; DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_network_header))offset); skb->inner_network_header = offset; } static inline void skb_set_inner_network_header(struct sk_buff *skb, const int offset) { skb_reset_inner_network_header(skb); skb->inner_network_header += offset; } static inline bool skb_inner_network_header_was_set(const struct sk_buff *skb) { return skb->inner_network_header > 0; } static inline unsigned char *skb_inner_mac_header(const struct sk_buff *skb) { return skb->head + skb->inner_mac_header; } static inline void skb_reset_inner_mac_header(struct sk_buff *skb) { long offset = skb->data - skb->head; DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->inner_mac_header))offset); skb->inner_mac_header = offset; } static inline void skb_set_inner_mac_header(struct sk_buff *skb, const int offset) { skb_reset_inner_mac_header(skb); skb->inner_mac_header += offset; } static inline bool skb_transport_header_was_set(const struct sk_buff *skb) { return skb->transport_header != (typeof(skb->transport_header))~0U; } static inline unsigned char *skb_transport_header(const struct sk_buff *skb) { DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)); return skb->head + skb->transport_header; } static inline void skb_reset_transport_header(struct sk_buff *skb) { long offset = skb->data - skb->head; DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->transport_header))offset); skb->transport_header = offset; } /** * skb_reset_transport_header_careful - conditionally reset transport header * @skb: buffer to alter * * Hardened version of skb_reset_transport_header(). * * Returns: true if the operation was a success. */ static inline bool __must_check skb_reset_transport_header_careful(struct sk_buff *skb) { long offset = skb->data - skb->head; if (unlikely(offset != (typeof(skb->transport_header))offset)) return false; if (unlikely(offset == (typeof(skb->transport_header))~0U)) return false; skb->transport_header = offset; return true; } static inline void skb_set_transport_header(struct sk_buff *skb, const int offset) { skb_reset_transport_header(skb); skb->transport_header += offset; } static inline unsigned char *skb_network_header(const struct sk_buff *skb) { return skb->head + skb->network_header; } static inline void skb_reset_network_header(struct sk_buff *skb) { long offset = skb->data - skb->head; DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->network_header))offset); skb->network_header = offset; } static inline void skb_set_network_header(struct sk_buff *skb, const int offset) { skb_reset_network_header(skb); skb->network_header += offset; } static inline unsigned char *skb_mac_header(const struct sk_buff *skb) { DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb)); return skb->head + skb->mac_header; } static inline int skb_mac_offset(const struct sk_buff *skb) { return skb_mac_header(skb) - skb->data; } static inline u32 skb_mac_header_len(const struct sk_buff *skb) { DEBUG_NET_WARN_ON_ONCE(!skb_mac_header_was_set(skb)); return skb->network_header - skb->mac_header; } static inline void skb_unset_mac_header(struct sk_buff *skb) { skb->mac_header = (typeof(skb->mac_header))~0U; } static inline void skb_reset_mac_header(struct sk_buff *skb) { long offset = skb->data - skb->head; DEBUG_NET_WARN_ON_ONCE(offset != (typeof(skb->mac_header))offset); skb->mac_header = offset; } static inline void skb_set_mac_header(struct sk_buff *skb, const int offset) { skb_reset_mac_header(skb); skb->mac_header += offset; } static inline void skb_pop_mac_header(struct sk_buff *skb) { skb->mac_header = skb->network_header; } static inline void skb_probe_transport_header(struct sk_buff *skb) { struct flow_keys_basic keys; if (skb_transport_header_was_set(skb)) return; if (skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, NULL, 0, 0, 0, 0)) skb_set_transport_header(skb, keys.control.thoff); } static inline void skb_mac_header_rebuild(struct sk_buff *skb) { if (skb_mac_header_was_set(skb)) { const unsigned char *old_mac = skb_mac_header(skb); skb_set_mac_header(skb, -skb->mac_len); memmove(skb_mac_header(skb), old_mac, skb->mac_len); } } /* Move the full mac header up to current network_header. * Leaves skb->data pointing at offset skb->mac_len into the mac_header. * Must be provided the complete mac header length. */ static inline void skb_mac_header_rebuild_full(struct sk_buff *skb, u32 full_mac_len) { if (skb_mac_header_was_set(skb)) { const unsigned char *old_mac = skb_mac_header(skb); skb_set_mac_header(skb, -full_mac_len); memmove(skb_mac_header(skb), old_mac, full_mac_len); __skb_push(skb, full_mac_len - skb->mac_len); } } static inline int skb_checksum_start_offset(const struct sk_buff *skb) { return skb->csum_start - skb_headroom(skb); } static inline unsigned char *skb_checksum_start(const struct sk_buff *skb) { return skb->head + skb->csum_start; } static inline int skb_transport_offset(const struct sk_buff *skb) { return skb_transport_header(skb) - skb->data; } static inline u32 skb_network_header_len(const struct sk_buff *skb) { DEBUG_NET_WARN_ON_ONCE(!skb_transport_header_was_set(skb)); return skb->transport_header - skb->network_header; } static inline u32 skb_inner_network_header_len(const struct sk_buff *skb) { return skb->inner_transport_header - skb->inner_network_header; } static inline int skb_network_offset(const struct sk_buff *skb) { return skb_network_header(skb) - skb->data; } static inline int skb_inner_network_offset(const struct sk_buff *skb) { return skb_inner_network_header(skb) - skb->data; } static inline enum skb_drop_reason pskb_network_may_pull_reason(struct sk_buff *skb, unsigned int len) { return pskb_may_pull_reason(skb, skb_network_offset(skb) + len); } static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) { return pskb_network_may_pull_reason(skb, len) == SKB_NOT_DROPPED_YET; } /* * CPUs often take a performance hit when accessing unaligned memory * locations. The actual performance hit varies, it can be small if the * hardware handles it or large if we have to take an exception and fix it * in software. * * Since an ethernet header is 14 bytes network drivers often end up with * the IP header at an unaligned offset. The IP header can be aligned by * shifting the start of the packet by 2 bytes. Drivers should do this * with: * * skb_reserve(skb, NET_IP_ALIGN); * * The downside to this alignment of the IP header is that the DMA is now * unaligned. On some architectures the cost of an unaligned DMA is high * and this cost outweighs the gains made by aligning the IP header. * * Since this trade off varies between architectures, we allow NET_IP_ALIGN * to be overridden. */ #ifndef NET_IP_ALIGN #define NET_IP_ALIGN 2 #endif /* * The networking layer reserves some headroom in skb data (via * dev_alloc_skb). This is used to avoid having to reallocate skb data when * the header has to grow. In the default case, if the header has to grow * 32 bytes or less we avoid the reallocation. * * Unfortunately this headroom changes the DMA alignment of the resulting * network packet. As for NET_IP_ALIGN, this unaligned DMA is expensive * on some architectures. An architecture can override this value, * perhaps setting it to a cacheline in size (since that will maintain * cacheline alignment of the DMA). It must be a power of 2. * * Various parts of the networking layer expect at least 32 bytes of * headroom, you should not reduce this. * * Using max(32, L1_CACHE_BYTES) makes sense (especially with RPS) * to reduce average number of cache lines per packet. * get_rps_cpu() for example only access one 64 bytes aligned block : * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8) */ #ifndef NET_SKB_PAD #define NET_SKB_PAD max(32, L1_CACHE_BYTES) #endif int ___pskb_trim(struct sk_buff *skb, unsigned int len); static inline void __skb_set_length(struct sk_buff *skb, unsigned int len) { if (WARN_ON(skb_is_nonlinear(skb))) return; skb->len = len; skb_set_tail_pointer(skb, len); } static inline void __skb_trim(struct sk_buff *skb, unsigned int len) { __skb_set_length(skb, len); } void skb_trim(struct sk_buff *skb, unsigned int len); static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) { if (skb->data_len) return ___pskb_trim(skb, len); __skb_trim(skb, len); return 0; } static inline int pskb_trim(struct sk_buff *skb, unsigned int len) { skb_might_realloc(skb); return (len < skb->len) ? __pskb_trim(skb, len) : 0; } /** * pskb_trim_unique - remove end from a paged unique (not cloned) buffer * @skb: buffer to alter * @len: new length * * This is identical to pskb_trim except that the caller knows that * the skb is not cloned so we should never get an error due to out- * of-memory. */ static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len) { int err = pskb_trim(skb, len); BUG_ON(err); } static inline int __skb_grow(struct sk_buff *skb, unsigned int len) { unsigned int diff = len - skb->len; if (skb_tailroom(skb) < diff) { int ret = pskb_expand_head(skb, 0, diff - skb_tailroom(skb), GFP_ATOMIC); if (ret) return ret; } __skb_set_length(skb, len); return 0; } /** * skb_orphan - orphan a buffer * @skb: buffer to orphan * * If a buffer currently has an owner then we call the owner's * destructor function and make the @skb unowned. The buffer continues * to exist but is no longer charged to its former owner. */ static inline void skb_orphan(struct sk_buff *skb) { if (skb->destructor) { skb->destructor(skb); skb->destructor = NULL; skb->sk = NULL; } else { BUG_ON(skb->sk); } } /** * skb_orphan_frags - orphan the frags contained in a buffer * @skb: buffer to orphan frags from * @gfp_mask: allocation mask for replacement pages * * For each frag in the SKB which needs a destructor (i.e. has an * owner) create a copy of that frag and release the original * page by calling the destructor. */ static inline int skb_orphan_frags(struct sk_buff *skb, gfp_t gfp_mask) { if (likely(!skb_zcopy(skb))) return 0; if (skb_shinfo(skb)->flags & SKBFL_DONT_ORPHAN) return 0; return skb_copy_ubufs(skb, gfp_mask); } /* Frags must be orphaned, even if refcounted, if skb might loop to rx path */ static inline int skb_orphan_frags_rx(struct sk_buff *skb, gfp_t gfp_mask) { if (likely(!skb_zcopy(skb))) return 0; return skb_copy_ubufs(skb, gfp_mask); } /** * __skb_queue_purge_reason - empty a list * @list: list to empty * @reason: drop reason * * Delete all buffers on an &sk_buff list. Each buffer is removed from * the list and one reference dropped. This function does not take the * list lock and the caller must hold the relevant locks to use it. */ static inline void __skb_queue_purge_reason(struct sk_buff_head *list, enum skb_drop_reason reason) { struct sk_buff *skb; while ((skb = __skb_dequeue(list)) != NULL) kfree_skb_reason(skb, reason); } static inline void __skb_queue_purge(struct sk_buff_head *list) { __skb_queue_purge_reason(list, SKB_DROP_REASON_QUEUE_PURGE); } void skb_queue_purge_reason(struct sk_buff_head *list, enum skb_drop_reason reason); static inline void skb_queue_purge(struct sk_buff_head *list) { skb_queue_purge_reason(list, SKB_DROP_REASON_QUEUE_PURGE); } unsigned int skb_rbtree_purge(struct rb_root *root); void skb_errqueue_purge(struct sk_buff_head *list); void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); /** * netdev_alloc_frag - allocate a page fragment * @fragsz: fragment size * * Allocates a frag from a page for receive buffer. * Uses GFP_ATOMIC allocations. */ static inline void *netdev_alloc_frag(unsigned int fragsz) { return __netdev_alloc_frag_align(fragsz, ~0u); } static inline void *netdev_alloc_frag_align(unsigned int fragsz, unsigned int align) { WARN_ON_ONCE(!is_power_of_2(align)); return __netdev_alloc_frag_align(fragsz, -align); } struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, gfp_t gfp_mask); /** * netdev_alloc_skb - allocate an skbuff for rx on a specific device * @dev: network device to receive on * @length: length to allocate * * Allocate a new &sk_buff and assign it a usage count of one. The * buffer has unspecified headroom built in. Users should allocate * the headroom they think they need without accounting for the * built in space. The built in space is used for optimisations. * * %NULL is returned if there is no free memory. Although this function * allocates memory it can be called from an interrupt. */ static inline struct sk_buff *netdev_alloc_skb(struct net_device *dev, unsigned int length) { return __netdev_alloc_skb(dev, length, GFP_ATOMIC); } /* legacy helper around __netdev_alloc_skb() */ static inline struct sk_buff *__dev_alloc_skb(unsigned int length, gfp_t gfp_mask) { return __netdev_alloc_skb(NULL, length, gfp_mask); } /* legacy helper around netdev_alloc_skb() */ static inline struct sk_buff *dev_alloc_skb(unsigned int length) { return netdev_alloc_skb(NULL, length); } static inline struct sk_buff *__netdev_alloc_skb_ip_align(struct net_device *dev, unsigned int length, gfp_t gfp) { struct sk_buff *skb = __netdev_alloc_skb(dev, length + NET_IP_ALIGN, gfp); if (NET_IP_ALIGN && skb) skb_reserve(skb, NET_IP_ALIGN); return skb; } static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev, unsigned int length) { return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC); } static inline void skb_free_frag(void *addr) { page_frag_free(addr); } void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask); static inline void *napi_alloc_frag(unsigned int fragsz) { return __napi_alloc_frag_align(fragsz, ~0u); } static inline void *napi_alloc_frag_align(unsigned int fragsz, unsigned int align) { WARN_ON_ONCE(!is_power_of_2(align)); return __napi_alloc_frag_align(fragsz, -align); } struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int length); void napi_consume_skb(struct sk_buff *skb, int budget); void napi_skb_free_stolen_head(struct sk_buff *skb); void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason); /** * __dev_alloc_pages - allocate page for network Rx * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx * @order: size of the allocation * * Allocate a new page. * * %NULL is returned if there is no free memory. */ static inline struct page *__dev_alloc_pages_noprof(gfp_t gfp_mask, unsigned int order) { /* This piece of code contains several assumptions. * 1. This is for device Rx, therefore a cold page is preferred. * 2. The expectation is the user wants a compound page. * 3. If requesting a order 0 page it will not be compound * due to the check to see if order has a value in prep_new_page * 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to * code in gfp_to_alloc_flags that should be enforcing this. */ gfp_mask |= __GFP_COMP | __GFP_MEMALLOC; return alloc_pages_node_noprof(NUMA_NO_NODE, gfp_mask, order); } #define __dev_alloc_pages(...) alloc_hooks(__dev_alloc_pages_noprof(__VA_ARGS__)) /* * This specialized allocator has to be a macro for its allocations to be * accounted separately (to have a separate alloc_tag). */ #define dev_alloc_pages(_order) __dev_alloc_pages(GFP_ATOMIC | __GFP_NOWARN, _order) /** * __dev_alloc_page - allocate a page for network Rx * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx * * Allocate a new page. * * %NULL is returned if there is no free memory. */ static inline struct page *__dev_alloc_page_noprof(gfp_t gfp_mask) { return __dev_alloc_pages_noprof(gfp_mask, 0); } #define __dev_alloc_page(...) alloc_hooks(__dev_alloc_page_noprof(__VA_ARGS__)) /* * This specialized allocator has to be a macro for its allocations to be * accounted separately (to have a separate alloc_tag). */ #define dev_alloc_page() dev_alloc_pages(0) /** * dev_page_is_reusable - check whether a page can be reused for network Rx * @page: the page to test * * A page shouldn't be considered for reusing/recycling if it was allocated * under memory pressure or at a distant memory node. * * Returns: false if this page should be returned to page allocator, true * otherwise. */ static inline bool dev_page_is_reusable(const struct page *page) { return likely(page_to_nid(page) == numa_mem_id() && !page_is_pfmemalloc(page)); } /** * skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page * @page: The page that was allocated from skb_alloc_page * @skb: The skb that may need pfmemalloc set */ static inline void skb_propagate_pfmemalloc(const struct page *page, struct sk_buff *skb) { if (page_is_pfmemalloc(page)) skb->pfmemalloc = true; } /** * skb_frag_off() - Returns the offset of a skb fragment * @frag: the paged fragment */ static inline unsigned int skb_frag_off(const skb_frag_t *frag) { return frag->offset; } /** * skb_frag_off_add() - Increments the offset of a skb fragment by @delta * @frag: skb fragment * @delta: value to add */ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) { frag->offset += delta; } /** * skb_frag_off_set() - Sets the offset of a skb fragment * @frag: skb fragment * @offset: offset of fragment */ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) { frag->offset = offset; } /** * skb_frag_off_copy() - Sets the offset of a skb fragment from another fragment * @fragto: skb fragment where offset is set * @fragfrom: skb fragment offset is copied from */ static inline void skb_frag_off_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { fragto->offset = fragfrom->offset; } /* Return: true if the skb_frag contains a net_iov. */ static inline bool skb_frag_is_net_iov(const skb_frag_t *frag) { return netmem_is_net_iov(frag->netmem); } /** * skb_frag_net_iov - retrieve the net_iov referred to by fragment * @frag: the fragment * * Return: the &struct net_iov associated with @frag. Returns NULL if this * frag has no associated net_iov. */ static inline struct net_iov *skb_frag_net_iov(const skb_frag_t *frag) { if (!skb_frag_is_net_iov(frag)) return NULL; return netmem_to_net_iov(frag->netmem); } /** * skb_frag_page - retrieve the page referred to by a paged fragment * @frag: the paged fragment * * Return: the &struct page associated with @frag. Returns NULL if this frag * has no associated page. */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { if (skb_frag_is_net_iov(frag)) return NULL; return netmem_to_page(frag->netmem); } /** * skb_frag_netmem - retrieve the netmem referred to by a fragment * @frag: the fragment * * Return: the &netmem_ref associated with @frag. */ static inline netmem_ref skb_frag_netmem(const skb_frag_t *frag) { return frag->netmem; } int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, unsigned int headroom); int skb_cow_data_for_xdp(struct page_pool *pool, struct sk_buff **pskb, const struct bpf_prog *prog); /** * skb_frag_address - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer * * Returns: the address of the data within @frag. The page must already * be mapped. */ static inline void *skb_frag_address(const skb_frag_t *frag) { if (!skb_frag_page(frag)) return NULL; return page_address(skb_frag_page(frag)) + skb_frag_off(frag); } /** * skb_frag_address_safe - gets the address of the data contained in a paged fragment * @frag: the paged fragment buffer * * Returns: the address of the data within @frag. Checks that the page * is mapped and returns %NULL otherwise. */ static inline void *skb_frag_address_safe(const skb_frag_t *frag) { struct page *page = skb_frag_page(frag); void *ptr; if (!page) return NULL; ptr = page_address(page); if (unlikely(!ptr)) return NULL; return ptr + skb_frag_off(frag); } /** * skb_frag_page_copy() - sets the page in a fragment from another fragment * @fragto: skb fragment where page is set * @fragfrom: skb fragment page is copied from */ static inline void skb_frag_page_copy(skb_frag_t *fragto, const skb_frag_t *fragfrom) { fragto->netmem = fragfrom->netmem; } bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); /** * __skb_frag_dma_map - maps a paged fragment via the DMA API * @dev: the device to map the fragment to * @frag: the paged fragment to map * @offset: the offset within the fragment (starting at the * fragment's own offset) * @size: the number of bytes to map * @dir: the direction of the mapping (``PCI_DMA_*``) * * Maps the page associated with @frag to @device. */ static inline dma_addr_t __skb_frag_dma_map(struct device *dev, const skb_frag_t *frag, size_t offset, size_t size, enum dma_data_direction dir) { if (skb_frag_is_net_iov(frag)) { return netmem_to_net_iov(frag->netmem)->dma_addr + offset + frag->offset; } return dma_map_page(dev, skb_frag_page(frag), skb_frag_off(frag) + offset, size, dir); } #define skb_frag_dma_map(dev, frag, ...) \ CONCATENATE(_skb_frag_dma_map, \ COUNT_ARGS(__VA_ARGS__))(dev, frag, ##__VA_ARGS__) #define __skb_frag_dma_map1(dev, frag, offset, uf, uo) ({ \ const skb_frag_t *uf = (frag); \ size_t uo = (offset); \ \ __skb_frag_dma_map(dev, uf, uo, skb_frag_size(uf) - uo, \ DMA_TO_DEVICE); \ }) #define _skb_frag_dma_map1(dev, frag, offset) \ __skb_frag_dma_map1(dev, frag, offset, __UNIQUE_ID(frag_), \ __UNIQUE_ID(offset_)) #define _skb_frag_dma_map0(dev, frag) \ _skb_frag_dma_map1(dev, frag, 0) #define _skb_frag_dma_map2(dev, frag, offset, size) \ __skb_frag_dma_map(dev, frag, offset, size, DMA_TO_DEVICE) #define _skb_frag_dma_map3(dev, frag, offset, size, dir) \ __skb_frag_dma_map(dev, frag, offset, size, dir) static inline struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) { return __pskb_copy(skb, skb_headroom(skb), gfp_mask); } static inline struct sk_buff *pskb_copy_for_clone(struct sk_buff *skb, gfp_t gfp_mask) { return __pskb_copy_fclone(skb, skb_headroom(skb), gfp_mask, true); } /** * skb_clone_writable - is the header of a clone writable * @skb: buffer to check * @len: length up to which to write * * Returns true if modifying the header part of the cloned buffer * does not requires the data to be copied. */ static inline int skb_clone_writable(const struct sk_buff *skb, unsigned int len) { return !skb_header_cloned(skb) && skb_headroom(skb) + len <= skb->hdr_len; } static inline int skb_try_make_writable(struct sk_buff *skb, unsigned int write_len) { return skb_cloned(skb) && !skb_clone_writable(skb, write_len) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom, int cloned) { int delta = 0; if (headroom > skb_headroom(skb)) delta = headroom - skb_headroom(skb); if (delta || cloned) return pskb_expand_head(skb, ALIGN(delta, NET_SKB_PAD), 0, GFP_ATOMIC); return 0; } /** * skb_cow - copy header of skb when it is required * @skb: buffer to cow * @headroom: needed headroom * * If the skb passed lacks sufficient headroom or its data part * is shared, data is reallocated. If reallocation fails, an error * is returned and original skb is not changed. * * The result is skb with writable area skb->head...skb->tail * and at least @headroom of space at head. */ static inline int skb_cow(struct sk_buff *skb, unsigned int headroom) { return __skb_cow(skb, headroom, skb_cloned(skb)); } /** * skb_cow_head - skb_cow but only making the head writable * @skb: buffer to cow * @headroom: needed headroom * * This function is identical to skb_cow except that we replace the * skb_cloned check by skb_header_cloned. It should be used when * you only need to push on some header and do not need to modify * the data. */ static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom) { return __skb_cow(skb, headroom, skb_header_cloned(skb)); } /** * skb_padto - pad an skbuff up to a minimal size * @skb: buffer to pad * @len: minimal length * * Pads up a buffer to ensure the trailing bytes exist and are * blanked. If the buffer already contains sufficient data it * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ static inline int skb_padto(struct sk_buff *skb, unsigned int len) { unsigned int size = skb->len; if (likely(size >= len)) return 0; return skb_pad(skb, len - size); } /** * __skb_put_padto - increase size and pad an skbuff up to a minimal size * @skb: buffer to pad * @len: minimal length * @free_on_error: free buffer on error * * Pads up a buffer to ensure the trailing bytes exist and are * blanked. If the buffer already contains sufficient data it * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error if @free_on_error is true. */ static inline int __must_check __skb_put_padto(struct sk_buff *skb, unsigned int len, bool free_on_error) { unsigned int size = skb->len; if (unlikely(size < len)) { len -= size; if (__skb_pad(skb, len, free_on_error)) return -ENOMEM; __skb_put(skb, len); } return 0; } /** * skb_put_padto - increase size and pad an skbuff up to a minimal size * @skb: buffer to pad * @len: minimal length * * Pads up a buffer to ensure the trailing bytes exist and are * blanked. If the buffer already contains sufficient data it * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int len) { return __skb_put_padto(skb, len, true); } bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) __must_check; static inline bool skb_can_coalesce_netmem(struct sk_buff *skb, int i, netmem_ref netmem, int off) { if (skb_zcopy(skb)) return false; if (i) { const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; return netmem == skb_frag_netmem(frag) && off == skb_frag_off(frag) + skb_frag_size(frag); } return false; } static inline bool skb_can_coalesce(struct sk_buff *skb, int i, const struct page *page, int off) { return skb_can_coalesce_netmem(skb, i, page_to_netmem(page), off); } static inline int __skb_linearize(struct sk_buff *skb) { return __pskb_pull_tail(skb, skb->data_len) ? 0 : -ENOMEM; } /** * skb_linearize - convert paged skb to linear one * @skb: buffer to linarize * * If there is no free memory -ENOMEM is returned, otherwise zero * is returned and the old skb data released. */ static inline int skb_linearize(struct sk_buff *skb) { return skb_is_nonlinear(skb) ? __skb_linearize(skb) : 0; } /** * skb_has_shared_frag - can any frag be overwritten * @skb: buffer to test * * Return: true if the skb has at least one frag that might be modified * by an external entity (as in vmsplice()/sendfile()) */ static inline bool skb_has_shared_frag(const struct sk_buff *skb) { return skb_is_nonlinear(skb) && skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG; } /** * skb_linearize_cow - make sure skb is linear and writable * @skb: buffer to process * * If there is no free memory -ENOMEM is returned, otherwise zero * is returned and the old skb data released. */ static inline int skb_linearize_cow(struct sk_buff *skb) { return skb_is_nonlinear(skb) || skb_cloned(skb) ? __skb_linearize(skb) : 0; } static __always_inline void __skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len, unsigned int off) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_block_sub(skb->csum, csum_partial(start, len, 0), off); else if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_start_offset(skb) < 0) skb->ip_summed = CHECKSUM_NONE; } /** * skb_postpull_rcsum - update checksum for received skb after pull * @skb: buffer to update * @start: start of data before pull * @len: length of data pulled * * After doing a pull on a received packet, you need to call this to * update the CHECKSUM_COMPLETE checksum, or set ip_summed to * CHECKSUM_NONE so that it can be recomputed from scratch. */ static inline void skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = wsum_negate(csum_partial(start, len, wsum_negate(skb->csum))); else if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_start_offset(skb) < 0) skb->ip_summed = CHECKSUM_NONE; } static __always_inline void __skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len, unsigned int off) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_block_add(skb->csum, csum_partial(start, len, 0), off); } /** * skb_postpush_rcsum - update checksum for received skb after push * @skb: buffer to update * @start: start of data after push * @len: length of data pushed * * After doing a push on a received packet, you need to call this to * update the CHECKSUM_COMPLETE checksum. */ static inline void skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { __skb_postpush_rcsum(skb, start, len, 0); } void *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); /** * skb_push_rcsum - push skb and update receive checksum * @skb: buffer to update * @len: length of data pulled * * This function performs an skb_push on the packet and updates * the CHECKSUM_COMPLETE checksum. It should be used on * receive path processing instead of skb_push unless you know * that the checksum difference is zero (e.g., a valid IP header) * or you are setting ip_summed to CHECKSUM_NONE. */ static inline void *skb_push_rcsum(struct sk_buff *skb, unsigned int len) { skb_push(skb, len); skb_postpush_rcsum(skb, skb->data, len); return skb->data; } int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); /** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim * @len: new length * * This is exactly the same as pskb_trim except that it ensures the * checksum of received packets are still valid after the operation. * It can change skb pointers. */ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { skb_might_realloc(skb); if (likely(len >= skb->len)) return 0; return pskb_trim_rcsum_slow(skb, len); } static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; __skb_trim(skb, len); return 0; } static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; return __skb_grow(skb, len); } #define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) #define skb_rb_first(root) rb_to_skb(rb_first(root)) #define skb_rb_last(root) rb_to_skb(rb_last(root)) #define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode)) #define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode)) #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ skb != (struct sk_buff *)(queue); \ skb = skb->next) #define skb_queue_walk_safe(queue, skb, tmp) \ for (skb = (queue)->next, tmp = skb->next; \ skb != (struct sk_buff *)(queue); \ skb = tmp, tmp = skb->next) #define skb_queue_walk_from(queue, skb) \ for (; skb != (struct sk_buff *)(queue); \ skb = skb->next) #define skb_rbtree_walk(skb, root) \ for (skb = skb_rb_first(root); skb != NULL; \ skb = skb_rb_next(skb)) #define skb_rbtree_walk_from(skb) \ for (; skb != NULL; \ skb = skb_rb_next(skb)) #define skb_rbtree_walk_from_safe(skb, tmp) \ for (; tmp = skb ? skb_rb_next(skb) : NULL, (skb != NULL); \ skb = tmp) #define skb_queue_walk_from_safe(queue, skb, tmp) \ for (tmp = skb->next; \ skb != (struct sk_buff *)(queue); \ skb = tmp, tmp = skb->next) #define skb_queue_reverse_walk(queue, skb) \ for (skb = (queue)->prev; \ skb != (struct sk_buff *)(queue); \ skb = skb->prev) #define skb_queue_reverse_walk_safe(queue, skb, tmp) \ for (skb = (queue)->prev, tmp = skb->prev; \ skb != (struct sk_buff *)(queue); \ skb = tmp, tmp = skb->prev) #define skb_queue_reverse_walk_from_safe(queue, skb, tmp) \ for (tmp = skb->prev; \ skb != (struct sk_buff *)(queue); \ skb = tmp, tmp = skb->prev) static inline bool skb_has_frag_list(const struct sk_buff *skb) { return skb_shinfo(skb)->frag_list != NULL; } static inline void skb_frag_list_init(struct sk_buff *skb) { skb_shinfo(skb)->frag_list = NULL; } #define skb_walk_frags(skb, iter) \ for (iter = skb_shinfo(skb)->frag_list; iter; iter = iter->next) int __skb_wait_for_more_packets(struct sock *sk, struct sk_buff_head *queue, int *err, long *timeo_p, const struct sk_buff *skb); struct sk_buff *__skb_try_recv_from_queue(struct sk_buff_head *queue, unsigned int flags, int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_try_recv_datagram(struct sock *sk, struct sk_buff_head *queue, unsigned int flags, int *off, int *err, struct sk_buff **last); struct sk_buff *__skb_recv_datagram(struct sock *sk, struct sk_buff_head *sk_queue, unsigned int flags, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags, int *err); __poll_t datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, struct iov_iter *to, int size); static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, struct msghdr *msg, int size) { return skb_copy_datagram_iter(from, offset, &msg->msg_iter, size); } int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, struct msghdr *msg); int skb_copy_and_crc32c_datagram_iter(const struct sk_buff *skb, int offset, struct iov_iter *to, int len, u32 *crcp); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); void skb_free_datagram(struct sock *sk, struct sk_buff *skb); int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len); int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len); __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, u8 *to, int len); int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, struct pipe_inode_info *pipe, unsigned int len, unsigned int flags); int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, int len); int skb_send_sock_locked_with_flags(struct sock *sk, struct sk_buff *skb, int offset, int len, int flags); int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); unsigned int skb_zerocopy_headlen(const struct sk_buff *from); int skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen); void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen); void skb_scrub_packet(struct sk_buff *skb, bool xnet); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features, unsigned int offset); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len); int skb_ensure_writable_head_tail(struct sk_buff *skb, struct net_device *dev); int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); int skb_eth_pop(struct sk_buff *skb); int skb_eth_push(struct sk_buff *skb, const unsigned char *dst, const unsigned char *src); int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto, int mac_len, bool ethernet); int skb_mpls_pop(struct sk_buff *skb, __be16 next_proto, int mac_len, bool ethernet); int skb_mpls_update_lse(struct sk_buff *skb, __be32 mpls_lse); int skb_mpls_dec_ttl(struct sk_buff *skb); struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, gfp_t gfp); static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) { return copy_from_iter_full(data, len, &msg->msg_iter) ? 0 : -EFAULT; } static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) { return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT; } __wsum skb_checksum(const struct sk_buff *skb, int offset, int len, __wsum csum); u32 skb_crc32c(const struct sk_buff *skb, int offset, int len, u32 crc); static inline void * __must_check __skb_header_pointer(const struct sk_buff *skb, int offset, int len, const void *data, int hlen, void *buffer) { if (likely(hlen - offset >= len)) return (void *)data + offset; if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0)) return NULL; return buffer; } static inline void * __must_check skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) { return __skb_header_pointer(skb, offset, len, skb->data, skb_headlen(skb), buffer); } static inline void * __must_check skb_pointer_if_linear(const struct sk_buff *skb, int offset, int len) { if (likely(skb_headlen(skb) - offset >= len)) return skb->data + offset; return NULL; } /** * skb_needs_linearize - check if we need to linearize a given skb * depending on the given device features. * @skb: socket buffer to check * @features: net device features * * Returns true if either: * 1. skb has frag_list and the device doesn't support FRAGLIST, or * 2. skb is fragmented and the device does not support SG. */ static inline bool skb_needs_linearize(struct sk_buff *skb, netdev_features_t features) { return skb_is_nonlinear(skb) && ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) || (skb_shinfo(skb)->nr_frags && !(features & NETIF_F_SG))); } static inline void skb_copy_from_linear_data(const struct sk_buff *skb, void *to, const unsigned int len) { memcpy(to, skb->data, len); } static inline void skb_copy_from_linear_data_offset(const struct sk_buff *skb, const int offset, void *to, const unsigned int len) { memcpy(to, skb->data + offset, len); } static inline void skb_copy_to_linear_data(struct sk_buff *skb, const void *from, const unsigned int len) { memcpy(skb->data, from, len); } static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb, const int offset, const void *from, const unsigned int len) { memcpy(skb->data + offset, from, len); } void skb_init(void); static inline ktime_t skb_get_ktime(const struct sk_buff *skb) { return skb->tstamp; } /** * skb_get_timestamp - get timestamp from a skb * @skb: skb to get stamp from * @stamp: pointer to struct __kernel_old_timeval to store stamp in * * Timestamps are stored in the skb as offsets to a base timestamp. * This function converts the offset back to a struct timeval and stores * it in stamp. */ static inline void skb_get_timestamp(const struct sk_buff *skb, struct __kernel_old_timeval *stamp) { *stamp = ns_to_kernel_old_timeval(skb->tstamp); } static inline void skb_get_new_timestamp(const struct sk_buff *skb, struct __kernel_sock_timeval *stamp) { struct timespec64 ts = ktime_to_timespec64(skb->tstamp); stamp->tv_sec = ts.tv_sec; stamp->tv_usec = ts.tv_nsec / 1000; } static inline void skb_get_timestampns(const struct sk_buff *skb, struct __kernel_old_timespec *stamp) { struct timespec64 ts = ktime_to_timespec64(skb->tstamp); stamp->tv_sec = ts.tv_sec; stamp->tv_nsec = ts.tv_nsec; } static inline void skb_get_new_timestampns(const struct sk_buff *skb, struct __kernel_timespec *stamp) { struct timespec64 ts = ktime_to_timespec64(skb->tstamp); stamp->tv_sec = ts.tv_sec; stamp->tv_nsec = ts.tv_nsec; } static inline void __net_timestamp(struct sk_buff *skb) { skb->tstamp = ktime_get_real(); skb->tstamp_type = SKB_CLOCK_REALTIME; } static inline ktime_t net_timedelta(ktime_t t) { return ktime_sub(ktime_get_real(), t); } static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt, u8 tstamp_type) { skb->tstamp = kt; if (kt) skb->tstamp_type = tstamp_type; else skb->tstamp_type = SKB_CLOCK_REALTIME; } static inline void skb_set_delivery_type_by_clockid(struct sk_buff *skb, ktime_t kt, clockid_t clockid) { u8 tstamp_type = SKB_CLOCK_REALTIME; switch (clockid) { case CLOCK_REALTIME: break; case CLOCK_MONOTONIC: tstamp_type = SKB_CLOCK_MONOTONIC; break; case CLOCK_TAI: tstamp_type = SKB_CLOCK_TAI; break; default: WARN_ON_ONCE(1); kt = 0; } skb_set_delivery_time(skb, kt, tstamp_type); } DECLARE_STATIC_KEY_FALSE(netstamp_needed_key); /* It is used in the ingress path to clear the delivery_time. * If needed, set the skb->tstamp to the (rcv) timestamp. */ static inline void skb_clear_delivery_time(struct sk_buff *skb) { if (skb->tstamp_type) { skb->tstamp_type = SKB_CLOCK_REALTIME; if (static_branch_unlikely(&netstamp_needed_key)) skb->tstamp = ktime_get_real(); else skb->tstamp = 0; } } static inline void skb_clear_tstamp(struct sk_buff *skb) { if (skb->tstamp_type) return; skb->tstamp = 0; } static inline ktime_t skb_tstamp(const struct sk_buff *skb) { if (skb->tstamp_type) return 0; return skb->tstamp; } static inline ktime_t skb_tstamp_cond(const struct sk_buff *skb, bool cond) { if (skb->tstamp_type != SKB_CLOCK_MONOTONIC && skb->tstamp) return skb->tstamp; if (static_branch_unlikely(&netstamp_needed_key) || cond) return ktime_get_real(); return 0; } static inline u8 skb_metadata_len(const struct sk_buff *skb) { return skb_shinfo(skb)->meta_len; } static inline void *skb_metadata_end(const struct sk_buff *skb) { return skb_mac_header(skb); } static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, const struct sk_buff *skb_b, u8 meta_len) { const void *a = skb_metadata_end(skb_a); const void *b = skb_metadata_end(skb_b); u64 diffs = 0; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || BITS_PER_LONG != 64) goto slow; /* Using more efficient variant than plain call to memcmp(). */ switch (meta_len) { #define __it(x, op) (x -= sizeof(u##op)) #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) case 32: diffs |= __it_diff(a, b, 64); fallthrough; case 24: diffs |= __it_diff(a, b, 64); fallthrough; case 16: diffs |= __it_diff(a, b, 64); fallthrough; case 8: diffs |= __it_diff(a, b, 64); break; case 28: diffs |= __it_diff(a, b, 64); fallthrough; case 20: diffs |= __it_diff(a, b, 64); fallthrough; case 12: diffs |= __it_diff(a, b, 64); fallthrough; case 4: diffs |= __it_diff(a, b, 32); break; default: slow: return memcmp(a - meta_len, b - meta_len, meta_len); } return diffs; } static inline bool skb_metadata_differs(const struct sk_buff *skb_a, const struct sk_buff *skb_b) { u8 len_a = skb_metadata_len(skb_a); u8 len_b = skb_metadata_len(skb_b); if (!(len_a | len_b)) return false; return len_a != len_b ? true : __skb_metadata_differs(skb_a, skb_b, len_a); } static inline void skb_metadata_set(struct sk_buff *skb, u8 meta_len) { skb_shinfo(skb)->meta_len = meta_len; } static inline void skb_metadata_clear(struct sk_buff *skb) { skb_metadata_set(skb, 0); } struct sk_buff *skb_clone_sk(struct sk_buff *skb); #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING void skb_clone_tx_timestamp(struct sk_buff *skb); bool skb_defer_rx_timestamp(struct sk_buff *skb); #else /* CONFIG_NETWORK_PHY_TIMESTAMPING */ static inline void skb_clone_tx_timestamp(struct sk_buff *skb) { } static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) { return false; } #endif /* !CONFIG_NETWORK_PHY_TIMESTAMPING */ /** * skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps * * PHY drivers may accept clones of transmitted packets for * timestamping via their phy_driver.txtstamp method. These drivers * must call this function to return the skb back to the stack with a * timestamp. * * @skb: clone of the original outgoing packet * @hwtstamps: hardware time stamps * */ void skb_complete_tx_timestamp(struct sk_buff *skb, struct skb_shared_hwtstamps *hwtstamps); void __skb_tstamp_tx(struct sk_buff *orig_skb, const struct sk_buff *ack_skb, struct skb_shared_hwtstamps *hwtstamps, struct sock *sk, int tstype); /** * skb_tstamp_tx - queue clone of skb with send time stamps * @orig_skb: the original outgoing packet * @hwtstamps: hardware time stamps, may be NULL if not available * * If the skb has a socket associated, then this function clones the * skb (thus sharing the actual data and optional structures), stores * the optional hardware time stamping information (if non NULL) or * generates a software time stamp (otherwise), then queues the clone * to the error queue of the socket. Errors are silently ignored. */ void skb_tstamp_tx(struct sk_buff *orig_skb, struct skb_shared_hwtstamps *hwtstamps); /** * skb_tx_timestamp() - Driver hook for transmit timestamping * * Ethernet MAC Drivers should call this function in their hard_xmit() * function immediately before giving the sk_buff to the MAC hardware. * * Specifically, one should make absolutely sure that this function is * called before TX completion of this packet can trigger. Otherwise * the packet could potentially already be freed. * * @skb: A socket buffer. */ static inline void skb_tx_timestamp(struct sk_buff *skb) { skb_clone_tx_timestamp(skb); if (skb_shinfo(skb)->tx_flags & (SKBTX_SW_TSTAMP | SKBTX_BPF)) skb_tstamp_tx(skb, NULL); } /** * skb_complete_wifi_ack - deliver skb with wifi status * * @skb: the original outgoing packet * @acked: ack status * */ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked); __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len); __sum16 __skb_checksum_complete(struct sk_buff *skb); static inline int skb_csum_unnecessary(const struct sk_buff *skb) { return ((skb->ip_summed == CHECKSUM_UNNECESSARY) || skb->csum_valid || (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_start_offset(skb) >= 0)); } /** * skb_checksum_complete - Calculate checksum of an entire packet * @skb: packet to process * * This function calculates the checksum over the entire packet plus * the value of skb->csum. The latter can be used to supply the * checksum of a pseudo header as used by TCP/UDP. It returns the * checksum. * * For protocols that contain complete checksums such as ICMP/TCP/UDP, * this function can be used to verify that checksum on received * packets. In that case the function should return zero if the * checksum is correct. In particular, this function will return zero * if skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that the * hardware has already verified the correctness of the checksum. */ static inline __sum16 skb_checksum_complete(struct sk_buff *skb) { return skb_csum_unnecessary(skb) ? 0 : __skb_checksum_complete(skb); } static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_UNNECESSARY) { if (skb->csum_level == 0) skb->ip_summed = CHECKSUM_NONE; else skb->csum_level--; } } static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_UNNECESSARY) { if (skb->csum_level < SKB_MAX_CSUM_LEVEL) skb->csum_level++; } else if (skb->ip_summed == CHECKSUM_NONE) { skb->ip_summed = CHECKSUM_UNNECESSARY; skb->csum_level = 0; } } static inline void __skb_reset_checksum_unnecessary(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_UNNECESSARY) { skb->ip_summed = CHECKSUM_NONE; skb->csum_level = 0; } } /* Check if we need to perform checksum complete validation. * * Returns: true if checksum complete is needed, false otherwise * (either checksum is unnecessary or zero checksum is allowed). */ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb, bool zero_okay, __sum16 check) { if (skb_csum_unnecessary(skb) || (zero_okay && !check)) { skb->csum_valid = 1; __skb_decr_checksum_unnecessary(skb); return false; } return true; } /* For small packets <= CHECKSUM_BREAK perform checksum complete directly * in checksum_init. */ #define CHECKSUM_BREAK 76 /* Unset checksum-complete * * Unset checksum complete can be done when packet is being modified * (uncompressed for instance) and checksum-complete value is * invalidated. */ static inline void skb_checksum_complete_unset(struct sk_buff *skb) { if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; } /* Validate (init) checksum based on checksum complete. * * Return values: * 0: checksum is validated or try to in skb_checksum_complete. In the latter * case the ip_summed will not be CHECKSUM_UNNECESSARY and the pseudo * checksum is stored in skb->csum for use in __skb_checksum_complete * non-zero: value of invalid checksum * */ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb, bool complete, __wsum psum) { if (skb->ip_summed == CHECKSUM_COMPLETE) { if (!csum_fold(csum_add(psum, skb->csum))) { skb->csum_valid = 1; return 0; } } skb->csum = psum; if (complete || skb->len <= CHECKSUM_BREAK) { __sum16 csum; csum = __skb_checksum_complete(skb); skb->csum_valid = !csum; return csum; } return 0; } static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto) { return 0; } /* Perform checksum validate (init). Note that this is a macro since we only * want to calculate the pseudo header which is an input function if necessary. * First we try to validate without any computation (checksum unnecessary) and * then calculate based on checksum complete calling the function to compute * pseudo header. * * Return values: * 0: checksum is validated or try to in skb_checksum_complete * non-zero: value of invalid checksum */ #define __skb_checksum_validate(skb, proto, complete, \ zero_okay, check, compute_pseudo) \ ({ \ __sum16 __ret = 0; \ skb->csum_valid = 0; \ if (__skb_checksum_validate_needed(skb, zero_okay, check)) \ __ret = __skb_checksum_validate_complete(skb, \ complete, compute_pseudo(skb, proto)); \ __ret; \ }) #define skb_checksum_init(skb, proto, compute_pseudo) \ __skb_checksum_validate(skb, proto, false, false, 0, compute_pseudo) #define skb_checksum_init_zero_check(skb, proto, check, compute_pseudo) \ __skb_checksum_validate(skb, proto, false, true, check, compute_pseudo) #define skb_checksum_validate(skb, proto, compute_pseudo) \ __skb_checksum_validate(skb, proto, true, false, 0, compute_pseudo) #define skb_checksum_validate_zero_check(skb, proto, check, \ compute_pseudo) \ __skb_checksum_validate(skb, proto, true, true, check, compute_pseudo) #define skb_checksum_simple_validate(skb) \ __skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo) static inline bool __skb_checksum_convert_check(struct sk_buff *skb) { return (skb->ip_summed == CHECKSUM_NONE && skb->csum_valid); } static inline void __skb_checksum_convert(struct sk_buff *skb, __wsum pseudo) { skb->csum = ~pseudo; skb->ip_summed = CHECKSUM_COMPLETE; } #define skb_checksum_try_convert(skb, proto, compute_pseudo) \ do { \ if (__skb_checksum_convert_check(skb)) \ __skb_checksum_convert(skb, compute_pseudo(skb, proto)); \ } while (0) static inline void skb_remcsum_adjust_partial(struct sk_buff *skb, void *ptr, u16 start, u16 offset) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum_start = ((unsigned char *)ptr + start) - skb->head; skb->csum_offset = offset - start; } /* Update skbuf and packet to reflect the remote checksum offload operation. * When called, ptr indicates the starting point for skb->csum when * ip_summed is CHECKSUM_COMPLETE. If we need create checksum complete * here, skb_postpull_rcsum is done so skb->csum start is ptr. */ static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr, int start, int offset, bool nopartial) { __wsum delta; if (!nopartial) { skb_remcsum_adjust_partial(skb, ptr, start, offset); return; } if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) { __skb_checksum_complete(skb); skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data); } delta = remcsum_adjust(ptr, skb->csum, start, offset); /* Adjust skb->csum since we changed the packet */ skb->csum = csum_add(skb->csum, delta); } static inline struct nf_conntrack *skb_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) return (void *)(skb->_nfct & NFCT_PTRMASK); #else return NULL; #endif } static inline unsigned long skb_get_nfct(const struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) return skb->_nfct; #else return 0UL; #endif } static inline void skb_set_nfct(struct sk_buff *skb, unsigned long nfct) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) skb->slow_gro |= !!nfct; skb->_nfct = nfct; #endif } #ifdef CONFIG_SKB_EXTENSIONS enum skb_ext_id { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) SKB_EXT_BRIDGE_NF, #endif #ifdef CONFIG_XFRM SKB_EXT_SEC_PATH, #endif #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) TC_SKB_EXT, #endif #if IS_ENABLED(CONFIG_MPTCP) SKB_EXT_MPTCP, #endif #if IS_ENABLED(CONFIG_MCTP_FLOWS) SKB_EXT_MCTP, #endif SKB_EXT_NUM, /* must be last */ }; /** * struct skb_ext - sk_buff extensions * @refcnt: 1 on allocation, deallocated on 0 * @offset: offset to add to @data to obtain extension address * @chunks: size currently allocated, stored in SKB_EXT_ALIGN_SHIFT units * @data: start of extension data, variable sized * * Note: offsets/lengths are stored in chunks of 8 bytes, this allows * to use 'u8' types while allowing up to 2kb worth of extension data. */ struct skb_ext { refcount_t refcnt; u8 offset[SKB_EXT_NUM]; /* in chunks of 8 bytes */ u8 chunks; /* same */ char data[] __aligned(8); }; struct skb_ext *__skb_ext_alloc(gfp_t flags); void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id, struct skb_ext *ext); void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id); void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id); void __skb_ext_put(struct skb_ext *ext); static inline void skb_ext_put(struct sk_buff *skb) { if (skb->active_extensions) __skb_ext_put(skb->extensions); } static inline void __skb_ext_copy(struct sk_buff *dst, const struct sk_buff *src) { dst->active_extensions = src->active_extensions; if (src->active_extensions) { struct skb_ext *ext = src->extensions; refcount_inc(&ext->refcnt); dst->extensions = ext; } } static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *src) { skb_ext_put(dst); __skb_ext_copy(dst, src); } static inline bool __skb_ext_exist(const struct skb_ext *ext, enum skb_ext_id i) { return !!ext->offset[i]; } static inline bool skb_ext_exist(const struct sk_buff *skb, enum skb_ext_id id) { return skb->active_extensions & (1 << id); } static inline void skb_ext_del(struct sk_buff *skb, enum skb_ext_id id) { if (skb_ext_exist(skb, id)) __skb_ext_del(skb, id); } static inline void *skb_ext_find(const struct sk_buff *skb, enum skb_ext_id id) { if (skb_ext_exist(skb, id)) { struct skb_ext *ext = skb->extensions; return (void *)ext + (ext->offset[id] << 3); } return NULL; } static inline void skb_ext_reset(struct sk_buff *skb) { if (unlikely(skb->active_extensions)) { __skb_ext_put(skb->extensions); skb->active_extensions = 0; } } static inline bool skb_has_extensions(struct sk_buff *skb) { return unlikely(skb->active_extensions); } #else static inline void skb_ext_put(struct sk_buff *skb) {} static inline void skb_ext_reset(struct sk_buff *skb) {} static inline void skb_ext_del(struct sk_buff *skb, int unused) {} static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {} static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {} static inline bool skb_has_extensions(struct sk_buff *skb) { return false; } #endif /* CONFIG_SKB_EXTENSIONS */ static inline void nf_reset_ct(struct sk_buff *skb) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb_nfct(skb)); skb->_nfct = 0; #endif } static inline void nf_reset_trace(struct sk_buff *skb) { #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES) skb->nf_trace = 0; #endif } static inline void ipvs_reset(struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IP_VS) skb->ipvs_property = 0; #endif } /* Note: This doesn't put any conntrack info in dst. */ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src, bool copy) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) dst->_nfct = src->_nfct; nf_conntrack_get(skb_nfct(src)); #endif #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || IS_ENABLED(CONFIG_NF_TABLES) if (copy) dst->nf_trace = src->nf_trace; #endif } static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_conntrack_put(skb_nfct(dst)); #endif dst->slow_gro = src->slow_gro; __nf_copy(dst, src, true); } #ifdef CONFIG_NETWORK_SECMARK static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) { to->secmark = from->secmark; } static inline void skb_init_secmark(struct sk_buff *skb) { skb->secmark = 0; } #else static inline void skb_copy_secmark(struct sk_buff *to, const struct sk_buff *from) { } static inline void skb_init_secmark(struct sk_buff *skb) { } #endif static inline int secpath_exists(const struct sk_buff *skb) { #ifdef CONFIG_XFRM return skb_ext_exist(skb, SKB_EXT_SEC_PATH); #else return 0; #endif } static inline bool skb_irq_freeable(const struct sk_buff *skb) { return !skb->destructor && !secpath_exists(skb) && !skb_nfct(skb) && !skb->_skb_refdst && !skb_has_frag_list(skb); } static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping) { skb->queue_mapping = queue_mapping; } static inline u16 skb_get_queue_mapping(const struct sk_buff *skb) { return skb->queue_mapping; } static inline void skb_copy_queue_mapping(struct sk_buff *to, const struct sk_buff *from) { to->queue_mapping = from->queue_mapping; } static inline void skb_record_rx_queue(struct sk_buff *skb, u16 rx_queue) { skb->queue_mapping = rx_queue + 1; } static inline u16 skb_get_rx_queue(const struct sk_buff *skb) { return skb->queue_mapping - 1; } static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) { return skb->queue_mapping != 0; } static inline void skb_set_dst_pending_confirm(struct sk_buff *skb, u32 val) { skb->dst_pending_confirm = val; } static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb) { return skb->dst_pending_confirm != 0; } static inline struct sec_path *skb_sec_path(const struct sk_buff *skb) { #ifdef CONFIG_XFRM return skb_ext_find(skb, SKB_EXT_SEC_PATH); #else return NULL; #endif } static inline bool skb_is_gso(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_size; } /* Note: Should be called only if skb_is_gso(skb) is true */ static inline bool skb_is_gso_v6(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; } /* Note: Should be called only if skb_is_gso(skb) is true */ static inline bool skb_is_gso_sctp(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP; } /* Note: Should be called only if skb_is_gso(skb) is true */ static inline bool skb_is_gso_tcp(const struct sk_buff *skb) { return skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6); } static inline void skb_gso_reset(struct sk_buff *skb) { skb_shinfo(skb)->gso_size = 0; skb_shinfo(skb)->gso_segs = 0; skb_shinfo(skb)->gso_type = 0; } static inline void skb_increase_gso_size(struct skb_shared_info *shinfo, u16 increment) { if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS)) return; shinfo->gso_size += increment; } static inline void skb_decrease_gso_size(struct skb_shared_info *shinfo, u16 decrement) { if (WARN_ON_ONCE(shinfo->gso_size == GSO_BY_FRAGS)) return; shinfo->gso_size -= decrement; } void __skb_warn_lro_forwarding(const struct sk_buff *skb); static inline bool skb_warn_if_lro(const struct sk_buff *skb) { /* LRO sets gso_size but not gso_type, whereas if GSO is really * wanted then gso_type will be set. */ const struct skb_shared_info *shinfo = skb_shinfo(skb); if (skb_is_nonlinear(skb) && shinfo->gso_size != 0 && unlikely(shinfo->gso_type == 0)) { __skb_warn_lro_forwarding(skb); return true; } return false; } static inline void skb_forward_csum(struct sk_buff *skb) { /* Unfortunately we don't support this one. Any brave souls? */ if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; } /** * skb_checksum_none_assert - make sure skb ip_summed is CHECKSUM_NONE * @skb: skb to check * * fresh skbs have their ip_summed set to CHECKSUM_NONE. * Instead of forcing ip_summed to CHECKSUM_NONE, we can * use this helper, to document places where we make this assertion. */ static inline void skb_checksum_none_assert(const struct sk_buff *skb) { DEBUG_NET_WARN_ON_ONCE(skb->ip_summed != CHECKSUM_NONE); } bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); int skb_checksum_setup(struct sk_buff *skb, bool recalculate); struct sk_buff *skb_checksum_trimmed(struct sk_buff *skb, unsigned int transport_len, __sum16(*skb_chkf)(struct sk_buff *skb)); /** * skb_head_is_locked - Determine if the skb->head is locked down * @skb: skb to check * * The head on skbs build around a head frag can be removed if they are * not cloned. This function returns true if the skb head is locked down * due to either being allocated via kmalloc, or by being a clone with * multiple references to the head. */ static inline bool skb_head_is_locked(const struct sk_buff *skb) { return !skb->head_frag || skb_cloned(skb); } /* Local Checksum Offload. * Compute outer checksum based on the assumption that the * inner checksum will be offloaded later. * See Documentation/networking/checksum-offloads.rst for * explanation of how this works. * Fill in outer checksum adjustment (e.g. with sum of outer * pseudo-header) before calling. * Also ensure that inner checksum is in linear data area. */ static inline __wsum lco_csum(struct sk_buff *skb) { unsigned char *csum_start = skb_checksum_start(skb); unsigned char *l4_hdr = skb_transport_header(skb); __wsum partial; /* Start with complement of inner checksum adjustment */ partial = ~csum_unfold(*(__force __sum16 *)(csum_start + skb->csum_offset)); /* Add in checksum of our headers (incl. outer checksum * adjustment filled in by caller) and return result. */ return csum_partial(l4_hdr, csum_start - l4_hdr, partial); } static inline bool skb_is_redirected(const struct sk_buff *skb) { return skb->redirected; } static inline void skb_set_redirected(struct sk_buff *skb, bool from_ingress) { skb->redirected = 1; #ifdef CONFIG_NET_REDIRECT skb->from_ingress = from_ingress; if (skb->from_ingress) skb_clear_tstamp(skb); #endif } static inline void skb_reset_redirect(struct sk_buff *skb) { skb->redirected = 0; } static inline void skb_set_redirected_noclear(struct sk_buff *skb, bool from_ingress) { skb->redirected = 1; #ifdef CONFIG_NET_REDIRECT skb->from_ingress = from_ingress; #endif } static inline bool skb_csum_is_sctp(struct sk_buff *skb) { #if IS_ENABLED(CONFIG_IP_SCTP) return skb->csum_not_inet; #else return 0; #endif } static inline void skb_reset_csum_not_inet(struct sk_buff *skb) { skb->ip_summed = CHECKSUM_NONE; #if IS_ENABLED(CONFIG_IP_SCTP) skb->csum_not_inet = 0; #endif } static inline void skb_set_kcov_handle(struct sk_buff *skb, const u64 kcov_handle) { #ifdef CONFIG_KCOV skb->kcov_handle = kcov_handle; #endif } static inline u64 skb_get_kcov_handle(struct sk_buff *skb) { #ifdef CONFIG_KCOV return skb->kcov_handle; #else return 0; #endif } static inline void skb_mark_for_recycle(struct sk_buff *skb) { #ifdef CONFIG_PAGE_POOL skb->pp_recycle = 1; #endif } ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter, ssize_t maxsize); #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */
1 43 44 44 9 6 6 223 220 187 2 3 4 3 4 13 1 7 1 5 8 1 1 1 1 4 3 2 2 1 333 2 2 102 2 100 100 100 99 107 5 101 91 15 331 328 333 330 8 89 90 86 43 3 5 12 2 2 6 3 3 3 3 6 2 3 1 2 3 4 4 4 6 2 1 1 2 3 143 4 30 109 3 5 2 4 4 4 4 5 2 2 1 1 1 1 1 16 2 1 7 6 2 1 1 10 2 10 10 4 6 5 2 3 1 1 7 3 1 3 2 3 1 2 3 680 853 683 851 96 97 3 221 6 1 218 12 1 2 1 1 3 4 1 2 1 4 2 2 3 1 1 1 1 2 7 170 24 4 1 3 1 1 6 6 5 3 3 3 4 1 1 1 2 2 5 12 12 3 1 14 1 19 1 1 1 5 1 3 3 2 1 1 6 1 1 1 1 1 1 4 4 1 1 1 1 3 1 3 6 4 1 1 2 1 1 5 3 5 1 1 1 1 1 170 181 4 4 2 1 1 2 1 119 123 1 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 3 3 2 1 1 4 1 1 1 1 1 1 4 1 1 1 2 1 1 1 4 3 1 4 5 1 1 1 1 1 1 1 1 6 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 1 61 6 69 422 205 232 423 311 134 422 218 134 424 231 202 329 333 331 327 2 331 330 329 330 316 19 329 96 95 94 1 77 329 389 39 354 392 1 390 550 637 425 424 425 422 203 230 111 198 310 321 150 173 6 317 310 10 13 310 312 1395 255 800 369 368 372 367 367 809 1050 182 182 1404 1400 1400 1403 1403 6983 7002 18 18 1 1 2 152 149 152 72 72 2 71 17 31 1 32 666 6 666 5 5 114 423 427 423 115 114 15 11 4 3 2 1 1043 1040 1031 4 22 7 5 15 4 1032 31 7 7 1 4 1 1 2 1 1 1 6 2 1 2 1 4 1 2 26 14 3 2 19 145 165 221 174 10 99 75 182 180 181 122 121 122 528 531 456 462 159 19 20 33 33 34 33 279 282 287 287 286 284 288 329 326 328 325 289 113 87 1 1 10 24 4 1 1 1 1 1 2 8 6 4 369 369 369 277 112 368 214 218 216 203 38 217 216 9288 9331 9280 9289 6581 2845 9275 9297 425 421 2 426 420 7 428 425 16 369 369 4 370 53 62 1 52 9 327 165 262 255 232 122 8 8 1 426 390 36 424 392 37 11402 11377 120 11388 11285 449 9740 1820 11383 11389 144 145 2 5 5 5 1 1 1 2 3 3 9 11 6 3 2 1 4 975 39 17 23 1665 149 17 135 151 150 57 2 3 3 1 1 15 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 15 4 6 4 2 77 34 126 96 12 17 84 13 29 111 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Generic socket support routines. Memory allocators, socket lock/release * handler for protocols to use and generic option handler. * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Florian La Roche, <flla@stud.uni-sb.de> * Alan Cox, <A.Cox@swansea.ac.uk> * * Fixes: * Alan Cox : Numerous verify_area() problems * Alan Cox : Connecting on a connecting socket * now returns an error for tcp. * Alan Cox : sock->protocol is set correctly. * and is not sometimes left as 0. * Alan Cox : connect handles icmp errors on a * connect properly. Unfortunately there * is a restart syscall nasty there. I * can't match BSD without hacking the C * library. Ideas urgently sought! * Alan Cox : Disallow bind() to addresses that are * not ours - especially broadcast ones!! * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost) * Alan Cox : sock_wfree/sock_rfree don't destroy sockets, * instead they leave that for the DESTROY timer. * Alan Cox : Clean up error flag in accept * Alan Cox : TCP ack handling is buggy, the DESTROY timer * was buggy. Put a remove_sock() in the handler * for memory when we hit 0. Also altered the timer * code. The ACK stuff can wait and needs major * TCP layer surgery. * Alan Cox : Fixed TCP ack bug, removed remove sock * and fixed timer/inet_bh race. * Alan Cox : Added zapped flag for TCP * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing. * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so... * Rick Sladkey : Relaxed UDP rules for matching packets. * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support * Pauline Middelink : identd support * Alan Cox : Fixed connect() taking signals I think. * Alan Cox : SO_LINGER supported * Alan Cox : Error reporting fixes * Anonymous : inet_create tidied up (sk->reuse setting) * Alan Cox : inet sockets don't set sk->type! * Alan Cox : Split socket option code * Alan Cox : Callbacks * Alan Cox : Nagle flag for Charles & Johannes stuff * Alex : Removed restriction on inet fioctl * Alan Cox : Splitting INET from NET core * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt() * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code * Alan Cox : Split IP from generic code * Alan Cox : New kfree_skbmem() * Alan Cox : Make SO_DEBUG superuser only. * Alan Cox : Allow anyone to clear SO_DEBUG * (compatibility fix) * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput. * Alan Cox : Allocator for a socket is settable. * Alan Cox : SO_ERROR includes soft errors. * Alan Cox : Allow NULL arguments on some SO_ opts * Alan Cox : Generic socket allocation to make hooks * easier (suggested by Craig Metz). * Michael Pall : SO_ERROR returns positive errno again * Steve Whitehouse: Added default destructor to free * protocol private data. * Steve Whitehouse: Added various other default routines * common to several socket families. * Chris Evans : Call suser() check last on F_SETOWN * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER. * Andi Kleen : Add sock_kmalloc()/sock_kfree_s() * Andi Kleen : Fix write_space callback * Chris Evans : Security fixes - signedness again * Arnaldo C. Melo : cleanups, use skb_queue_purge * * To Fix: */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/unaligned.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/errqueue.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/interrupt.h> #include <linux/poll.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/init.h> #include <linux/highmem.h> #include <linux/user_namespace.h> #include <linux/static_key.h> #include <linux/memcontrol.h> #include <linux/prefetch.h> #include <linux/compat.h> #include <linux/mroute.h> #include <linux/mroute6.h> #include <linux/icmpv6.h> #include <linux/uaccess.h> #include <linux/netdevice.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <linux/skbuff_ref.h> #include <net/net_namespace.h> #include <net/request_sock.h> #include <net/sock.h> #include <net/proto_memory.h> #include <linux/net_tstamp.h> #include <net/xfrm.h> #include <linux/ipsec.h> #include <net/cls_cgroup.h> #include <net/netprio_cgroup.h> #include <linux/sock_diag.h> #include <linux/filter.h> #include <net/sock_reuseport.h> #include <net/bpf_sk_storage.h> #include <trace/events/sock.h> #include <net/tcp.h> #include <net/busy_poll.h> #include <net/phonet/phonet.h> #include <linux/ethtool.h> #include <uapi/linux/pidfd.h> #include "dev.h" static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); static void sock_def_write_space_wfree(struct sock *sk); static void sock_def_write_space(struct sock *sk); /** * sk_ns_capable - General socket capability test * @sk: Socket to use a capability on or through * @user_ns: The user namespace of the capability to use * @cap: The capability to use * * Test to see if the opener of the socket had when the socket was * created and the current process has the capability @cap in the user * namespace @user_ns. */ bool sk_ns_capable(const struct sock *sk, struct user_namespace *user_ns, int cap) { return file_ns_capable(sk->sk_socket->file, user_ns, cap) && ns_capable(user_ns, cap); } EXPORT_SYMBOL(sk_ns_capable); /** * sk_capable - Socket global capability test * @sk: Socket to use a capability on or through * @cap: The global capability to use * * Test to see if the opener of the socket had when the socket was * created and the current process has the capability @cap in all user * namespaces. */ bool sk_capable(const struct sock *sk, int cap) { return sk_ns_capable(sk, &init_user_ns, cap); } EXPORT_SYMBOL(sk_capable); /** * sk_net_capable - Network namespace socket capability test * @sk: Socket to use a capability on or through * @cap: The capability to use * * Test to see if the opener of the socket had when the socket was created * and the current process has the capability @cap over the network namespace * the socket is a member of. */ bool sk_net_capable(const struct sock *sk, int cap) { return sk_ns_capable(sk, sock_net(sk)->user_ns, cap); } EXPORT_SYMBOL(sk_net_capable); /* * Each address family might have different locking rules, so we have * one slock key per address family and separate keys for internal and * userspace sockets. */ static struct lock_class_key af_family_keys[AF_MAX]; static struct lock_class_key af_family_kern_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; static struct lock_class_key af_family_kern_slock_keys[AF_MAX]; /* * Make lock validator output more readable. (we pre-construct these * strings build-time, so that runtime initialization of socket * locks is fast): */ #define _sock_locks(x) \ x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \ x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \ x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \ x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \ x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \ x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \ x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \ x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \ x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \ x "27" , x "28" , x "AF_CAN" , \ x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \ x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \ x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \ x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \ x "AF_QIPCRTR", x "AF_SMC" , x "AF_XDP" , \ x "AF_MCTP" , \ x "AF_MAX" static const char *const af_family_key_strings[AF_MAX+1] = { _sock_locks("sk_lock-") }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { _sock_locks("slock-") }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { _sock_locks("clock-") }; static const char *const af_family_kern_key_strings[AF_MAX+1] = { _sock_locks("k-sk_lock-") }; static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = { _sock_locks("k-slock-") }; static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = { _sock_locks("k-clock-") }; static const char *const af_family_rlock_key_strings[AF_MAX+1] = { _sock_locks("rlock-") }; static const char *const af_family_wlock_key_strings[AF_MAX+1] = { _sock_locks("wlock-") }; static const char *const af_family_elock_key_strings[AF_MAX+1] = { _sock_locks("elock-") }; /* * sk_callback_lock and sk queues locking rules are per-address-family, * so split the lock classes by using a per-AF key: */ static struct lock_class_key af_callback_keys[AF_MAX]; static struct lock_class_key af_rlock_keys[AF_MAX]; static struct lock_class_key af_wlock_keys[AF_MAX]; static struct lock_class_key af_elock_keys[AF_MAX]; static struct lock_class_key af_kern_callback_keys[AF_MAX]; /* Run time adjustable parameters. */ __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; EXPORT_SYMBOL(sysctl_wmem_max); __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; EXPORT_SYMBOL(sysctl_rmem_max); __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; DEFINE_STATIC_KEY_FALSE(memalloc_socks_key); EXPORT_SYMBOL_GPL(memalloc_socks_key); /** * sk_set_memalloc - sets %SOCK_MEMALLOC * @sk: socket to set it on * * Set %SOCK_MEMALLOC on a socket for access to emergency reserves. * It's the responsibility of the admin to adjust min_free_kbytes * to meet the requirements */ void sk_set_memalloc(struct sock *sk) { sock_set_flag(sk, SOCK_MEMALLOC); sk->sk_allocation |= __GFP_MEMALLOC; static_branch_inc(&memalloc_socks_key); } EXPORT_SYMBOL_GPL(sk_set_memalloc); void sk_clear_memalloc(struct sock *sk) { sock_reset_flag(sk, SOCK_MEMALLOC); sk->sk_allocation &= ~__GFP_MEMALLOC; static_branch_dec(&memalloc_socks_key); /* * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward * progress of swapping. SOCK_MEMALLOC may be cleared while * it has rmem allocations due to the last swapfile being deactivated * but there is a risk that the socket is unusable due to exceeding * the rmem limits. Reclaim the reserves and obey rmem limits again. */ sk_mem_reclaim(sk); } EXPORT_SYMBOL_GPL(sk_clear_memalloc); int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { int ret; unsigned int noreclaim_flag; /* these should have been dropped before queueing */ BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); noreclaim_flag = memalloc_noreclaim_save(); ret = INDIRECT_CALL_INET(sk->sk_backlog_rcv, tcp_v6_do_rcv, tcp_v4_do_rcv, sk, skb); memalloc_noreclaim_restore(noreclaim_flag); return ret; } EXPORT_SYMBOL(__sk_backlog_rcv); void sk_error_report(struct sock *sk) { sk->sk_error_report(sk); switch (sk->sk_family) { case AF_INET: fallthrough; case AF_INET6: trace_inet_sk_error_report(sk); break; default: break; } } EXPORT_SYMBOL(sk_error_report); int sock_get_timeout(long timeo, void *optval, bool old_timeval) { struct __kernel_sock_timeval tv; if (timeo == MAX_SCHEDULE_TIMEOUT) { tv.tv_sec = 0; tv.tv_usec = 0; } else { tv.tv_sec = timeo / HZ; tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ; } if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec }; *(struct old_timeval32 *)optval = tv32; return sizeof(tv32); } if (old_timeval) { struct __kernel_old_timeval old_tv; old_tv.tv_sec = tv.tv_sec; old_tv.tv_usec = tv.tv_usec; *(struct __kernel_old_timeval *)optval = old_tv; return sizeof(old_tv); } *(struct __kernel_sock_timeval *)optval = tv; return sizeof(tv); } EXPORT_SYMBOL(sock_get_timeout); int sock_copy_user_timeval(struct __kernel_sock_timeval *tv, sockptr_t optval, int optlen, bool old_timeval) { if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) { struct old_timeval32 tv32; if (optlen < sizeof(tv32)) return -EINVAL; if (copy_from_sockptr(&tv32, optval, sizeof(tv32))) return -EFAULT; tv->tv_sec = tv32.tv_sec; tv->tv_usec = tv32.tv_usec; } else if (old_timeval) { struct __kernel_old_timeval old_tv; if (optlen < sizeof(old_tv)) return -EINVAL; if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv))) return -EFAULT; tv->tv_sec = old_tv.tv_sec; tv->tv_usec = old_tv.tv_usec; } else { if (optlen < sizeof(*tv)) return -EINVAL; if (copy_from_sockptr(tv, optval, sizeof(*tv))) return -EFAULT; } return 0; } EXPORT_SYMBOL(sock_copy_user_timeval); static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, bool old_timeval) { struct __kernel_sock_timeval tv; int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval); long val; if (err) return err; if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC) return -EDOM; if (tv.tv_sec < 0) { static int warned __read_mostly; WRITE_ONCE(*timeo_p, 0); if (warned < 10 && net_ratelimit()) { warned++; pr_info("%s: `%s' (pid %d) tries to set negative timeout\n", __func__, current->comm, task_pid_nr(current)); } return 0; } val = MAX_SCHEDULE_TIMEOUT; if ((tv.tv_sec || tv.tv_usec) && (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))) val = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ); WRITE_ONCE(*timeo_p, val); return 0; } static bool sk_set_prio_allowed(const struct sock *sk, int val) { return ((val >= TC_PRIO_BESTEFFORT && val <= TC_PRIO_INTERACTIVE) || sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) || sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)); } static bool sock_needs_netstamp(const struct sock *sk) { switch (sk->sk_family) { case AF_UNSPEC: case AF_UNIX: return false; default: return true; } } static void sock_disable_timestamp(struct sock *sk, unsigned long flags) { if (sk->sk_flags & flags) { sk->sk_flags &= ~flags; if (sock_needs_netstamp(sk) && !(sk->sk_flags & SK_FLAGS_TIMESTAMP)) net_disable_timestamp(); } } int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { unsigned long flags; struct sk_buff_head *list = &sk->sk_receive_queue; if (atomic_read(&sk->sk_rmem_alloc) >= READ_ONCE(sk->sk_rcvbuf)) { atomic_inc(&sk->sk_drops); trace_sock_rcvqueue_full(sk, skb); return -ENOMEM; } if (!sk_rmem_schedule(sk, skb, skb->truesize)) { atomic_inc(&sk->sk_drops); return -ENOBUFS; } skb->dev = NULL; skb_set_owner_r(skb, sk); /* we escape from rcu protected region, make sure we dont leak * a norefcounted dst */ skb_dst_force(skb); spin_lock_irqsave(&list->lock, flags); sock_skb_set_dropcount(sk, skb); __skb_queue_tail(list, skb); spin_unlock_irqrestore(&list->lock, flags); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_data_ready(sk); return 0; } EXPORT_SYMBOL(__sock_queue_rcv_skb); int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb, enum skb_drop_reason *reason) { enum skb_drop_reason drop_reason; int err; err = sk_filter_reason(sk, skb, &drop_reason); if (err) goto out; err = __sock_queue_rcv_skb(sk, skb); switch (err) { case -ENOMEM: drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; break; case -ENOBUFS: drop_reason = SKB_DROP_REASON_PROTO_MEM; break; default: drop_reason = SKB_NOT_DROPPED_YET; break; } out: if (reason) *reason = drop_reason; return err; } EXPORT_SYMBOL(sock_queue_rcv_skb_reason); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, unsigned int trim_cap, bool refcounted) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; int rc = NET_RX_SUCCESS; int err; if (sk_filter_trim_cap(sk, skb, trim_cap, &reason)) goto discard_and_relse; skb->dev = NULL; if (sk_rcvqueues_full(sk, READ_ONCE(sk->sk_rcvbuf))) { atomic_inc(&sk->sk_drops); reason = SKB_DROP_REASON_SOCKET_RCVBUFF; goto discard_and_relse; } if (nested) bh_lock_sock_nested(sk); else bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { /* * trylock + unlock semantics: */ mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, _RET_IP_); } else if ((err = sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)))) { bh_unlock_sock(sk); if (err == -ENOMEM) reason = SKB_DROP_REASON_PFMEMALLOC; if (err == -ENOBUFS) reason = SKB_DROP_REASON_SOCKET_BACKLOG; atomic_inc(&sk->sk_drops); goto discard_and_relse; } bh_unlock_sock(sk); out: if (refcounted) sock_put(sk); return rc; discard_and_relse: sk_skb_reason_drop(sk, skb, reason); goto out; } EXPORT_SYMBOL(__sk_receive_skb); INDIRECT_CALLABLE_DECLARE(struct dst_entry *ip6_dst_check(struct dst_entry *, u32)); INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, u32)); struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) { struct dst_entry *dst = __sk_dst_get(sk); if (dst && READ_ONCE(dst->obsolete) && INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check, dst, cookie) == NULL) { sk_tx_queue_clear(sk); WRITE_ONCE(sk->sk_dst_pending_confirm, 0); RCU_INIT_POINTER(sk->sk_dst_cache, NULL); dst_release(dst); return NULL; } return dst; } EXPORT_SYMBOL(__sk_dst_check); struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) { struct dst_entry *dst = sk_dst_get(sk); if (dst && READ_ONCE(dst->obsolete) && INDIRECT_CALL_INET(dst->ops->check, ip6_dst_check, ipv4_dst_check, dst, cookie) == NULL) { sk_dst_reset(sk); dst_release(dst); return NULL; } return dst; } EXPORT_SYMBOL(sk_dst_check); static int sock_bindtoindex_locked(struct sock *sk, int ifindex) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES struct net *net = sock_net(sk); /* Sorry... */ ret = -EPERM; if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW)) goto out; ret = -EINVAL; if (ifindex < 0) goto out; /* Paired with all READ_ONCE() done locklessly. */ WRITE_ONCE(sk->sk_bound_dev_if, ifindex); if (sk->sk_prot->rehash) sk->sk_prot->rehash(sk); sk_dst_reset(sk); ret = 0; out: #endif return ret; } int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk) { int ret; if (lock_sk) lock_sock(sk); ret = sock_bindtoindex_locked(sk, ifindex); if (lock_sk) release_sock(sk); return ret; } EXPORT_SYMBOL(sock_bindtoindex); static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES struct net *net = sock_net(sk); char devname[IFNAMSIZ]; int index; ret = -EINVAL; if (optlen < 0) goto out; /* Bind this socket to a particular device like "eth0", * as specified in the passed interface name. If the * name is "" or the option length is zero the socket * is not bound. */ if (optlen > IFNAMSIZ - 1) optlen = IFNAMSIZ - 1; memset(devname, 0, sizeof(devname)); ret = -EFAULT; if (copy_from_sockptr(devname, optval, optlen)) goto out; index = 0; if (devname[0] != '\0') { struct net_device *dev; rcu_read_lock(); dev = dev_get_by_name_rcu(net, devname); if (dev) index = dev->ifindex; rcu_read_unlock(); ret = -ENODEV; if (!dev) goto out; } sockopt_lock_sock(sk); ret = sock_bindtoindex_locked(sk, index); sockopt_release_sock(sk); out: #endif return ret; } static int sock_getbindtodevice(struct sock *sk, sockptr_t optval, sockptr_t optlen, int len) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); struct net *net = sock_net(sk); char devname[IFNAMSIZ]; if (bound_dev_if == 0) { len = 0; goto zero; } ret = -EINVAL; if (len < IFNAMSIZ) goto out; ret = netdev_get_name(net, devname, bound_dev_if); if (ret) goto out; len = strlen(devname) + 1; ret = -EFAULT; if (copy_to_sockptr(optval, devname, len)) goto out; zero: ret = -EFAULT; if (copy_to_sockptr(optlen, &len, sizeof(int))) goto out; ret = 0; out: #endif return ret; } bool sk_mc_loop(const struct sock *sk) { if (dev_recursion_level()) return false; if (!sk) return true; /* IPV6_ADDRFORM can change sk->sk_family under us. */ switch (READ_ONCE(sk->sk_family)) { case AF_INET: return inet_test_bit(MC_LOOP, sk); #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: return inet6_test_bit(MC6_LOOP, sk); #endif } WARN_ON_ONCE(1); return true; } EXPORT_SYMBOL(sk_mc_loop); void sock_set_reuseaddr(struct sock *sk) { lock_sock(sk); sk->sk_reuse = SK_CAN_REUSE; release_sock(sk); } EXPORT_SYMBOL(sock_set_reuseaddr); void sock_set_reuseport(struct sock *sk) { lock_sock(sk); sk->sk_reuseport = true; release_sock(sk); } EXPORT_SYMBOL(sock_set_reuseport); void sock_no_linger(struct sock *sk) { lock_sock(sk); WRITE_ONCE(sk->sk_lingertime, 0); sock_set_flag(sk, SOCK_LINGER); release_sock(sk); } EXPORT_SYMBOL(sock_no_linger); void sock_set_priority(struct sock *sk, u32 priority) { WRITE_ONCE(sk->sk_priority, priority); } EXPORT_SYMBOL(sock_set_priority); void sock_set_sndtimeo(struct sock *sk, s64 secs) { if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1) WRITE_ONCE(sk->sk_sndtimeo, secs * HZ); else WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT); } EXPORT_SYMBOL(sock_set_sndtimeo); static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns) { sock_valbool_flag(sk, SOCK_RCVTSTAMP, val); sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, val && ns); if (val) { sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new); sock_enable_timestamp(sk, SOCK_TIMESTAMP); } } void sock_set_timestamp(struct sock *sk, int optname, bool valbool) { switch (optname) { case SO_TIMESTAMP_OLD: __sock_set_timestamps(sk, valbool, false, false); break; case SO_TIMESTAMP_NEW: __sock_set_timestamps(sk, valbool, true, false); break; case SO_TIMESTAMPNS_OLD: __sock_set_timestamps(sk, valbool, false, true); break; case SO_TIMESTAMPNS_NEW: __sock_set_timestamps(sk, valbool, true, true); break; } } static int sock_timestamping_bind_phc(struct sock *sk, int phc_index) { struct net *net = sock_net(sk); struct net_device *dev = NULL; bool match = false; int *vclock_index; int i, num; if (sk->sk_bound_dev_if) dev = dev_get_by_index(net, sk->sk_bound_dev_if); if (!dev) { pr_err("%s: sock not bind to device\n", __func__); return -EOPNOTSUPP; } num = ethtool_get_phc_vclocks(dev, &vclock_index); dev_put(dev); for (i = 0; i < num; i++) { if (*(vclock_index + i) == phc_index) { match = true; break; } } if (num > 0) kfree(vclock_index); if (!match) return -EINVAL; WRITE_ONCE(sk->sk_bind_phc, phc_index); return 0; } int sock_set_timestamping(struct sock *sk, int optname, struct so_timestamping timestamping) { int val = timestamping.flags; int ret; if (val & ~SOF_TIMESTAMPING_MASK) return -EINVAL; if (val & SOF_TIMESTAMPING_OPT_ID_TCP && !(val & SOF_TIMESTAMPING_OPT_ID)) return -EINVAL; if (val & SOF_TIMESTAMPING_OPT_ID && !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) { if (sk_is_tcp(sk)) { if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) return -EINVAL; if (val & SOF_TIMESTAMPING_OPT_ID_TCP) atomic_set(&sk->sk_tskey, tcp_sk(sk)->write_seq); else atomic_set(&sk->sk_tskey, tcp_sk(sk)->snd_una); } else { atomic_set(&sk->sk_tskey, 0); } } if (val & SOF_TIMESTAMPING_OPT_STATS && !(val & SOF_TIMESTAMPING_OPT_TSONLY)) return -EINVAL; if (val & SOF_TIMESTAMPING_BIND_PHC) { ret = sock_timestamping_bind_phc(sk, timestamping.bind_phc); if (ret) return ret; } WRITE_ONCE(sk->sk_tsflags, val); sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW); sock_valbool_flag(sk, SOCK_TIMESTAMPING_ANY, !!(val & TSFLAGS_ANY)); if (val & SOF_TIMESTAMPING_RX_SOFTWARE) sock_enable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE); else sock_disable_timestamp(sk, (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)); return 0; } #if defined(CONFIG_CGROUP_BPF) void bpf_skops_tx_timestamping(struct sock *sk, struct sk_buff *skb, int op) { struct bpf_sock_ops_kern sock_ops; memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp)); sock_ops.op = op; sock_ops.is_fullsock = 1; sock_ops.sk = sk; bpf_skops_init_skb(&sock_ops, skb, 0); __cgroup_bpf_run_filter_sock_ops(sk, &sock_ops, CGROUP_SOCK_OPS); } #endif void sock_set_keepalive(struct sock *sk) { lock_sock(sk); if (sk->sk_prot->keepalive) sk->sk_prot->keepalive(sk, true); sock_valbool_flag(sk, SOCK_KEEPOPEN, true); release_sock(sk); } EXPORT_SYMBOL(sock_set_keepalive); static void __sock_set_rcvbuf(struct sock *sk, int val) { /* Ensure val * 2 fits into an int, to prevent max_t() from treating it * as a negative value. */ val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; /* We double it on the way in to account for "struct sk_buff" etc. * overhead. Applications assume that the SO_RCVBUF setting they make * will allow that much actual data to be received on that socket. * * Applications are unaware that "struct sk_buff" and other overheads * allocate from the receive buffer during socket buffer allocation. * * And after considering the possible alternatives, returning the value * we actually used in getsockopt is the most desirable behavior. */ WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); } void sock_set_rcvbuf(struct sock *sk, int val) { lock_sock(sk); __sock_set_rcvbuf(sk, val); release_sock(sk); } EXPORT_SYMBOL(sock_set_rcvbuf); static void __sock_set_mark(struct sock *sk, u32 val) { if (val != sk->sk_mark) { WRITE_ONCE(sk->sk_mark, val); sk_dst_reset(sk); } } void sock_set_mark(struct sock *sk, u32 val) { lock_sock(sk); __sock_set_mark(sk, val); release_sock(sk); } EXPORT_SYMBOL(sock_set_mark); static void sock_release_reserved_memory(struct sock *sk, int bytes) { /* Round down bytes to multiple of pages */ bytes = round_down(bytes, PAGE_SIZE); WARN_ON(bytes > sk->sk_reserved_mem); WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes); sk_mem_reclaim(sk); } static int sock_reserve_memory(struct sock *sk, int bytes) { long allocated; bool charged; int pages; if (!mem_cgroup_sockets_enabled || !sk->sk_memcg || !sk_has_account(sk)) return -EOPNOTSUPP; if (!bytes) return 0; pages = sk_mem_pages(bytes); /* pre-charge to memcg */ charged = mem_cgroup_charge_skmem(sk->sk_memcg, pages, GFP_KERNEL | __GFP_RETRY_MAYFAIL); if (!charged) return -ENOMEM; /* pre-charge to forward_alloc */ sk_memory_allocated_add(sk, pages); allocated = sk_memory_allocated(sk); /* If the system goes into memory pressure with this * precharge, give up and return error. */ if (allocated > sk_prot_mem_limits(sk, 1)) { sk_memory_allocated_sub(sk, pages); mem_cgroup_uncharge_skmem(sk->sk_memcg, pages); return -ENOMEM; } sk_forward_alloc_add(sk, pages << PAGE_SHIFT); WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem + (pages << PAGE_SHIFT)); return 0; } #ifdef CONFIG_PAGE_POOL /* This is the number of tokens and frags that the user can SO_DEVMEM_DONTNEED * in 1 syscall. The limit exists to limit the amount of memory the kernel * allocates to copy these tokens, and to prevent looping over the frags for * too long. */ #define MAX_DONTNEED_TOKENS 128 #define MAX_DONTNEED_FRAGS 1024 static noinline_for_stack int sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen) { unsigned int num_tokens, i, j, k, netmem_num = 0; struct dmabuf_token *tokens; int ret = 0, num_frags = 0; netmem_ref netmems[16]; if (!sk_is_tcp(sk)) return -EBADF; if (optlen % sizeof(*tokens) || optlen > sizeof(*tokens) * MAX_DONTNEED_TOKENS) return -EINVAL; num_tokens = optlen / sizeof(*tokens); tokens = kvmalloc_array(num_tokens, sizeof(*tokens), GFP_KERNEL); if (!tokens) return -ENOMEM; if (copy_from_sockptr(tokens, optval, optlen)) { kvfree(tokens); return -EFAULT; } xa_lock_bh(&sk->sk_user_frags); for (i = 0; i < num_tokens; i++) { for (j = 0; j < tokens[i].token_count; j++) { if (++num_frags > MAX_DONTNEED_FRAGS) goto frag_limit_reached; netmem_ref netmem = (__force netmem_ref)__xa_erase( &sk->sk_user_frags, tokens[i].token_start + j); if (!netmem || WARN_ON_ONCE(!netmem_is_net_iov(netmem))) continue; netmems[netmem_num++] = netmem; if (netmem_num == ARRAY_SIZE(netmems)) { xa_unlock_bh(&sk->sk_user_frags); for (k = 0; k < netmem_num; k++) WARN_ON_ONCE(!napi_pp_put_page(netmems[k])); netmem_num = 0; xa_lock_bh(&sk->sk_user_frags); } ret++; } } frag_limit_reached: xa_unlock_bh(&sk->sk_user_frags); for (k = 0; k < netmem_num; k++) WARN_ON_ONCE(!napi_pp_put_page(netmems[k])); kvfree(tokens); return ret; } #endif void sockopt_lock_sock(struct sock *sk) { /* When current->bpf_ctx is set, the setsockopt is called from * a bpf prog. bpf has ensured the sk lock has been * acquired before calling setsockopt(). */ if (has_current_bpf_ctx()) return; lock_sock(sk); } EXPORT_SYMBOL(sockopt_lock_sock); void sockopt_release_sock(struct sock *sk) { if (has_current_bpf_ctx()) return; release_sock(sk); } EXPORT_SYMBOL(sockopt_release_sock); bool sockopt_ns_capable(struct user_namespace *ns, int cap) { return has_current_bpf_ctx() || ns_capable(ns, cap); } EXPORT_SYMBOL(sockopt_ns_capable); bool sockopt_capable(int cap) { return has_current_bpf_ctx() || capable(cap); } EXPORT_SYMBOL(sockopt_capable); static int sockopt_validate_clockid(__kernel_clockid_t value) { switch (value) { case CLOCK_REALTIME: case CLOCK_MONOTONIC: case CLOCK_TAI: return 0; } return -EINVAL; } /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. */ int sk_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, unsigned int optlen) { struct so_timestamping timestamping; struct socket *sock = sk->sk_socket; struct sock_txtime sk_txtime; int val; int valbool; struct linger ling; int ret = 0; /* * Options without arguments */ if (optname == SO_BINDTODEVICE) return sock_setbindtodevice(sk, optval, optlen); if (optlen < sizeof(int)) return -EINVAL; if (copy_from_sockptr(&val, optval, sizeof(val))) return -EFAULT; valbool = val ? 1 : 0; /* handle options which do not require locking the socket. */ switch (optname) { case SO_PRIORITY: if (sk_set_prio_allowed(sk, val)) { sock_set_priority(sk, val); return 0; } return -EPERM; case SO_TYPE: case SO_PROTOCOL: case SO_DOMAIN: case SO_ERROR: return -ENOPROTOOPT; #ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: if (val < 0) return -EINVAL; WRITE_ONCE(sk->sk_ll_usec, val); return 0; case SO_PREFER_BUSY_POLL: if (valbool && !sockopt_capable(CAP_NET_ADMIN)) return -EPERM; WRITE_ONCE(sk->sk_prefer_busy_poll, valbool); return 0; case SO_BUSY_POLL_BUDGET: if (val > READ_ONCE(sk->sk_busy_poll_budget) && !sockopt_capable(CAP_NET_ADMIN)) return -EPERM; if (val < 0 || val > U16_MAX) return -EINVAL; WRITE_ONCE(sk->sk_busy_poll_budget, val); return 0; #endif case SO_MAX_PACING_RATE: { unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val; unsigned long pacing_rate; if (sizeof(ulval) != sizeof(val) && optlen >= sizeof(ulval) && copy_from_sockptr(&ulval, optval, sizeof(ulval))) { return -EFAULT; } if (ulval != ~0UL) cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); /* Pairs with READ_ONCE() from sk_getsockopt() */ WRITE_ONCE(sk->sk_max_pacing_rate, ulval); pacing_rate = READ_ONCE(sk->sk_pacing_rate); if (ulval < pacing_rate) WRITE_ONCE(sk->sk_pacing_rate, ulval); return 0; } case SO_TXREHASH: if (!sk_is_tcp(sk)) return -EOPNOTSUPP; if (val < -1 || val > 1) return -EINVAL; if ((u8)val == SOCK_TXREHASH_DEFAULT) val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash); /* Paired with READ_ONCE() in tcp_rtx_synack() * and sk_getsockopt(). */ WRITE_ONCE(sk->sk_txrehash, (u8)val); return 0; case SO_PEEK_OFF: { int (*set_peek_off)(struct sock *sk, int val); set_peek_off = READ_ONCE(sock->ops)->set_peek_off; if (set_peek_off) ret = set_peek_off(sk, val); else ret = -EOPNOTSUPP; return ret; } #ifdef CONFIG_PAGE_POOL case SO_DEVMEM_DONTNEED: return sock_devmem_dontneed(sk, optval, optlen); #endif case SO_SNDTIMEO_OLD: case SO_SNDTIMEO_NEW: return sock_set_timeout(&sk->sk_sndtimeo, optval, optlen, optname == SO_SNDTIMEO_OLD); case SO_RCVTIMEO_OLD: case SO_RCVTIMEO_NEW: return sock_set_timeout(&sk->sk_rcvtimeo, optval, optlen, optname == SO_RCVTIMEO_OLD); } sockopt_lock_sock(sk); switch (optname) { case SO_DEBUG: if (val && !sockopt_capable(CAP_NET_ADMIN)) ret = -EACCES; else sock_valbool_flag(sk, SOCK_DBG, valbool); break; case SO_REUSEADDR: sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE); break; case SO_REUSEPORT: if (valbool && !sk_is_inet(sk)) ret = -EOPNOTSUPP; else sk->sk_reuseport = valbool; break; case SO_DONTROUTE: sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool); sk_dst_reset(sk); break; case SO_BROADCAST: sock_valbool_flag(sk, SOCK_BROADCAST, valbool); break; case SO_SNDBUF: /* Don't error on this BSD doesn't and if you think * about it this is right. Otherwise apps have to * play 'guess the biggest size' games. RCVBUF/SNDBUF * are treated in BSD as hints */ val = min_t(u32, val, READ_ONCE(sysctl_wmem_max)); set_sndbuf: /* Ensure val * 2 fits into an int, to prevent max_t() * from treating it as a negative value. */ val = min_t(int, val, INT_MAX / 2); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; WRITE_ONCE(sk->sk_sndbuf, max_t(int, val * 2, SOCK_MIN_SNDBUF)); /* Wake up sending tasks if we upped the value. */ sk->sk_write_space(sk); break; case SO_SNDBUFFORCE: if (!sockopt_capable(CAP_NET_ADMIN)) { ret = -EPERM; break; } /* No negative values (to prevent underflow, as val will be * multiplied by 2). */ if (val < 0) val = 0; goto set_sndbuf; case SO_RCVBUF: /* Don't error on this BSD doesn't and if you think * about it this is right. Otherwise apps have to * play 'guess the biggest size' games. RCVBUF/SNDBUF * are treated in BSD as hints */ __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max))); break; case SO_RCVBUFFORCE: if (!sockopt_capable(CAP_NET_ADMIN)) { ret = -EPERM; break; } /* No negative values (to prevent underflow, as val will be * multiplied by 2). */ __sock_set_rcvbuf(sk, max(val, 0)); break; case SO_KEEPALIVE: if (sk->sk_prot->keepalive) sk->sk_prot->keepalive(sk, valbool); sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool); break; case SO_OOBINLINE: sock_valbool_flag(sk, SOCK_URGINLINE, valbool); break; case SO_NO_CHECK: sk->sk_no_check_tx = valbool; break; case SO_LINGER: if (optlen < sizeof(ling)) { ret = -EINVAL; /* 1003.1g */ break; } if (copy_from_sockptr(&ling, optval, sizeof(ling))) { ret = -EFAULT; break; } if (!ling.l_onoff) { sock_reset_flag(sk, SOCK_LINGER); } else { unsigned long t_sec = ling.l_linger; if (t_sec >= MAX_SCHEDULE_TIMEOUT / HZ) WRITE_ONCE(sk->sk_lingertime, MAX_SCHEDULE_TIMEOUT); else WRITE_ONCE(sk->sk_lingertime, t_sec * HZ); sock_set_flag(sk, SOCK_LINGER); } break; case SO_BSDCOMPAT: break; case SO_TIMESTAMP_OLD: case SO_TIMESTAMP_NEW: case SO_TIMESTAMPNS_OLD: case SO_TIMESTAMPNS_NEW: sock_set_timestamp(sk, optname, valbool); break; case SO_TIMESTAMPING_NEW: case SO_TIMESTAMPING_OLD: if (optlen == sizeof(timestamping)) { if (copy_from_sockptr(&timestamping, optval, sizeof(timestamping))) { ret = -EFAULT; break; } } else { memset(&timestamping, 0, sizeof(timestamping)); timestamping.flags = val; } ret = sock_set_timestamping(sk, optname, timestamping); break; case SO_RCVLOWAT: { int (*set_rcvlowat)(struct sock *sk, int val) = NULL; if (val < 0) val = INT_MAX; if (sock) set_rcvlowat = READ_ONCE(sock->ops)->set_rcvlowat; if (set_rcvlowat) ret = set_rcvlowat(sk, val); else WRITE_ONCE(sk->sk_rcvlowat, val ? : 1); break; } case SO_ATTACH_FILTER: { struct sock_fprog fprog; ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); if (!ret) ret = sk_attach_filter(&fprog, sk); break; } case SO_ATTACH_BPF: ret = -EINVAL; if (optlen == sizeof(u32)) { u32 ufd; ret = -EFAULT; if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) break; ret = sk_attach_bpf(ufd, sk); } break; case SO_ATTACH_REUSEPORT_CBPF: { struct sock_fprog fprog; ret = copy_bpf_fprog_from_user(&fprog, optval, optlen); if (!ret) ret = sk_reuseport_attach_filter(&fprog, sk); break; } case SO_ATTACH_REUSEPORT_EBPF: ret = -EINVAL; if (optlen == sizeof(u32)) { u32 ufd; ret = -EFAULT; if (copy_from_sockptr(&ufd, optval, sizeof(ufd))) break; ret = sk_reuseport_attach_bpf(ufd, sk); } break; case SO_DETACH_REUSEPORT_BPF: ret = reuseport_detach_prog(sk); break; case SO_DETACH_FILTER: ret = sk_detach_filter(sk); break; case SO_LOCK_FILTER: if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool) ret = -EPERM; else sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool); break; case SO_MARK: if (!sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; break; } __sock_set_mark(sk, val); break; case SO_RCVMARK: sock_valbool_flag(sk, SOCK_RCVMARK, valbool); break; case SO_RCVPRIORITY: sock_valbool_flag(sk, SOCK_RCVPRIORITY, valbool); break; case SO_RXQ_OVFL: sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); break; case SO_WIFI_STATUS: sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool); break; case SO_NOFCS: sock_valbool_flag(sk, SOCK_NOFCS, valbool); break; case SO_SELECT_ERR_QUEUE: sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool); break; case SO_PASSCRED: if (sk_may_scm_recv(sk)) sk->sk_scm_credentials = valbool; else ret = -EOPNOTSUPP; break; case SO_PASSSEC: if (IS_ENABLED(CONFIG_SECURITY_NETWORK) && sk_may_scm_recv(sk)) sk->sk_scm_security = valbool; else ret = -EOPNOTSUPP; break; case SO_PASSPIDFD: if (sk_is_unix(sk)) sk->sk_scm_pidfd = valbool; else ret = -EOPNOTSUPP; break; case SO_PASSRIGHTS: if (sk_is_unix(sk)) sk->sk_scm_rights = valbool; else ret = -EOPNOTSUPP; break; case SO_INCOMING_CPU: reuseport_update_incoming_cpu(sk, val); break; case SO_CNX_ADVICE: if (val == 1) dst_negative_advice(sk); break; case SO_ZEROCOPY: if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) { if (!(sk_is_tcp(sk) || (sk->sk_type == SOCK_DGRAM && sk->sk_protocol == IPPROTO_UDP))) ret = -EOPNOTSUPP; } else if (sk->sk_family != PF_RDS) { ret = -EOPNOTSUPP; } if (!ret) { if (val < 0 || val > 1) ret = -EINVAL; else sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool); } break; case SO_TXTIME: if (optlen != sizeof(struct sock_txtime)) { ret = -EINVAL; break; } else if (copy_from_sockptr(&sk_txtime, optval, sizeof(struct sock_txtime))) { ret = -EFAULT; break; } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) { ret = -EINVAL; break; } /* CLOCK_MONOTONIC is only used by sch_fq, and this packet * scheduler has enough safe guards. */ if (sk_txtime.clockid != CLOCK_MONOTONIC && !sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) { ret = -EPERM; break; } ret = sockopt_validate_clockid(sk_txtime.clockid); if (ret) break; sock_valbool_flag(sk, SOCK_TXTIME, true); sk->sk_clockid = sk_txtime.clockid; sk->sk_txtime_deadline_mode = !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE); sk->sk_txtime_report_errors = !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS); break; case SO_BINDTOIFINDEX: ret = sock_bindtoindex_locked(sk, val); break; case SO_BUF_LOCK: if (val & ~SOCK_BUF_LOCK_MASK) { ret = -EINVAL; break; } sk->sk_userlocks = val | (sk->sk_userlocks & ~SOCK_BUF_LOCK_MASK); break; case SO_RESERVE_MEM: { int delta; if (val < 0) { ret = -EINVAL; break; } delta = val - sk->sk_reserved_mem; if (delta < 0) sock_release_reserved_memory(sk, -delta); else ret = sock_reserve_memory(sk, delta); break; } default: ret = -ENOPROTOOPT; break; } sockopt_release_sock(sk); return ret; } int sock_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { return sk_setsockopt(sock->sk, level, optname, optval, optlen); } EXPORT_SYMBOL(sock_setsockopt); static const struct cred *sk_get_peer_cred(struct sock *sk) { const struct cred *cred; spin_lock(&sk->sk_peer_lock); cred = get_cred(sk->sk_peer_cred); spin_unlock(&sk->sk_peer_lock); return cred; } static void cred_to_ucred(struct pid *pid, const struct cred *cred, struct ucred *ucred) { ucred->pid = pid_vnr(pid); ucred->uid = ucred->gid = -1; if (cred) { struct user_namespace *current_ns = current_user_ns(); ucred->uid = from_kuid_munged(current_ns, cred->euid); ucred->gid = from_kgid_munged(current_ns, cred->egid); } } static int groups_to_user(sockptr_t dst, const struct group_info *src) { struct user_namespace *user_ns = current_user_ns(); int i; for (i = 0; i < src->ngroups; i++) { gid_t gid = from_kgid_munged(user_ns, src->gid[i]); if (copy_to_sockptr_offset(dst, i * sizeof(gid), &gid, sizeof(gid))) return -EFAULT; } return 0; } int sk_getsockopt(struct sock *sk, int level, int optname, sockptr_t optval, sockptr_t optlen) { struct socket *sock = sk->sk_socket; union { int val; u64 val64; unsigned long ulval; struct linger ling; struct old_timeval32 tm32; struct __kernel_old_timeval tm; struct __kernel_sock_timeval stm; struct sock_txtime txtime; struct so_timestamping timestamping; } v; int lv = sizeof(int); int len; if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; if (len < 0) return -EINVAL; memset(&v, 0, sizeof(v)); switch (optname) { case SO_DEBUG: v.val = sock_flag(sk, SOCK_DBG); break; case SO_DONTROUTE: v.val = sock_flag(sk, SOCK_LOCALROUTE); break; case SO_BROADCAST: v.val = sock_flag(sk, SOCK_BROADCAST); break; case SO_SNDBUF: v.val = READ_ONCE(sk->sk_sndbuf); break; case SO_RCVBUF: v.val = READ_ONCE(sk->sk_rcvbuf); break; case SO_REUSEADDR: v.val = sk->sk_reuse; break; case SO_REUSEPORT: v.val = sk->sk_reuseport; break; case SO_KEEPALIVE: v.val = sock_flag(sk, SOCK_KEEPOPEN); break; case SO_TYPE: v.val = sk->sk_type; break; case SO_PROTOCOL: v.val = sk->sk_protocol; break; case SO_DOMAIN: v.val = sk->sk_family; break; case SO_ERROR: v.val = -sock_error(sk); if (v.val == 0) v.val = xchg(&sk->sk_err_soft, 0); break; case SO_OOBINLINE: v.val = sock_flag(sk, SOCK_URGINLINE); break; case SO_NO_CHECK: v.val = sk->sk_no_check_tx; break; case SO_PRIORITY: v.val = READ_ONCE(sk->sk_priority); break; case SO_LINGER: lv = sizeof(v.ling); v.ling.l_onoff = sock_flag(sk, SOCK_LINGER); v.ling.l_linger = READ_ONCE(sk->sk_lingertime) / HZ; break; case SO_BSDCOMPAT: break; case SO_TIMESTAMP_OLD: v.val = sock_flag(sk, SOCK_RCVTSTAMP) && !sock_flag(sk, SOCK_TSTAMP_NEW) && !sock_flag(sk, SOCK_RCVTSTAMPNS); break; case SO_TIMESTAMPNS_OLD: v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW); break; case SO_TIMESTAMP_NEW: v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW); break; case SO_TIMESTAMPNS_NEW: v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW); break; case SO_TIMESTAMPING_OLD: case SO_TIMESTAMPING_NEW: lv = sizeof(v.timestamping); /* For the later-added case SO_TIMESTAMPING_NEW: Be strict about only * returning the flags when they were set through the same option. * Don't change the beviour for the old case SO_TIMESTAMPING_OLD. */ if (optname == SO_TIMESTAMPING_OLD || sock_flag(sk, SOCK_TSTAMP_NEW)) { v.timestamping.flags = READ_ONCE(sk->sk_tsflags); v.timestamping.bind_phc = READ_ONCE(sk->sk_bind_phc); } break; case SO_RCVTIMEO_OLD: case SO_RCVTIMEO_NEW: lv = sock_get_timeout(READ_ONCE(sk->sk_rcvtimeo), &v, SO_RCVTIMEO_OLD == optname); break; case SO_SNDTIMEO_OLD: case SO_SNDTIMEO_NEW: lv = sock_get_timeout(READ_ONCE(sk->sk_sndtimeo), &v, SO_SNDTIMEO_OLD == optname); break; case SO_RCVLOWAT: v.val = READ_ONCE(sk->sk_rcvlowat); break; case SO_SNDLOWAT: v.val = 1; break; case SO_PASSCRED: if (!sk_may_scm_recv(sk)) return -EOPNOTSUPP; v.val = sk->sk_scm_credentials; break; case SO_PASSPIDFD: if (!sk_is_unix(sk)) return -EOPNOTSUPP; v.val = sk->sk_scm_pidfd; break; case SO_PASSRIGHTS: if (!sk_is_unix(sk)) return -EOPNOTSUPP; v.val = sk->sk_scm_rights; break; case SO_PEERCRED: { struct ucred peercred; if (len > sizeof(peercred)) len = sizeof(peercred); spin_lock(&sk->sk_peer_lock); cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred); spin_unlock(&sk->sk_peer_lock); if (copy_to_sockptr(optval, &peercred, len)) return -EFAULT; goto lenout; } case SO_PEERPIDFD: { struct pid *peer_pid; struct file *pidfd_file = NULL; unsigned int flags = 0; int pidfd; if (len > sizeof(pidfd)) len = sizeof(pidfd); spin_lock(&sk->sk_peer_lock); peer_pid = get_pid(sk->sk_peer_pid); spin_unlock(&sk->sk_peer_lock); if (!peer_pid) return -ENODATA; /* The use of PIDFD_STALE requires stashing of struct pid * on pidfs with pidfs_register_pid() and only AF_UNIX * were prepared for this. */ if (sk->sk_family == AF_UNIX) flags = PIDFD_STALE; pidfd = pidfd_prepare(peer_pid, flags, &pidfd_file); put_pid(peer_pid); if (pidfd < 0) return pidfd; if (copy_to_sockptr(optval, &pidfd, len) || copy_to_sockptr(optlen, &len, sizeof(int))) { put_unused_fd(pidfd); fput(pidfd_file); return -EFAULT; } fd_install(pidfd, pidfd_file); return 0; } case SO_PEERGROUPS: { const struct cred *cred; int ret, n; cred = sk_get_peer_cred(sk); if (!cred) return -ENODATA; n = cred->group_info->ngroups; if (len < n * sizeof(gid_t)) { len = n * sizeof(gid_t); put_cred(cred); return copy_to_sockptr(optlen, &len, sizeof(int)) ? -EFAULT : -ERANGE; } len = n * sizeof(gid_t); ret = groups_to_user(optval, cred->group_info); put_cred(cred); if (ret) return ret; goto lenout; } case SO_PEERNAME: { struct sockaddr_storage address; lv = READ_ONCE(sock->ops)->getname(sock, (struct sockaddr *)&address, 2); if (lv < 0) return -ENOTCONN; if (lv < len) return -EINVAL; if (copy_to_sockptr(optval, &address, len)) return -EFAULT; goto lenout; } /* Dubious BSD thing... Probably nobody even uses it, but * the UNIX standard wants it for whatever reason... -DaveM */ case SO_ACCEPTCONN: v.val = sk->sk_state == TCP_LISTEN; break; case SO_PASSSEC: if (!IS_ENABLED(CONFIG_SECURITY_NETWORK) || !sk_may_scm_recv(sk)) return -EOPNOTSUPP; v.val = sk->sk_scm_security; break; case SO_PEERSEC: return security_socket_getpeersec_stream(sock, optval, optlen, len); case SO_MARK: v.val = READ_ONCE(sk->sk_mark); break; case SO_RCVMARK: v.val = sock_flag(sk, SOCK_RCVMARK); break; case SO_RCVPRIORITY: v.val = sock_flag(sk, SOCK_RCVPRIORITY); break; case SO_RXQ_OVFL: v.val = sock_flag(sk, SOCK_RXQ_OVFL); break; case SO_WIFI_STATUS: v.val = sock_flag(sk, SOCK_WIFI_STATUS); break; case SO_PEEK_OFF: if (!READ_ONCE(sock->ops)->set_peek_off) return -EOPNOTSUPP; v.val = READ_ONCE(sk->sk_peek_off); break; case SO_NOFCS: v.val = sock_flag(sk, SOCK_NOFCS); break; case SO_BINDTODEVICE: return sock_getbindtodevice(sk, optval, optlen, len); case SO_GET_FILTER: len = sk_get_filter(sk, optval, len); if (len < 0) return len; goto lenout; case SO_LOCK_FILTER: v.val = sock_flag(sk, SOCK_FILTER_LOCKED); break; case SO_BPF_EXTENSIONS: v.val = bpf_tell_extensions(); break; case SO_SELECT_ERR_QUEUE: v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE); break; #ifdef CONFIG_NET_RX_BUSY_POLL case SO_BUSY_POLL: v.val = READ_ONCE(sk->sk_ll_usec); break; case SO_PREFER_BUSY_POLL: v.val = READ_ONCE(sk->sk_prefer_busy_poll); break; #endif case SO_MAX_PACING_RATE: /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */ if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) { lv = sizeof(v.ulval); v.ulval = READ_ONCE(sk->sk_max_pacing_rate); } else { /* 32bit version */ v.val = min_t(unsigned long, ~0U, READ_ONCE(sk->sk_max_pacing_rate)); } break; case SO_INCOMING_CPU: v.val = READ_ONCE(sk->sk_incoming_cpu); break; case SO_MEMINFO: { u32 meminfo[SK_MEMINFO_VARS]; sk_get_meminfo(sk, meminfo); len = min_t(unsigned int, len, sizeof(meminfo)); if (copy_to_sockptr(optval, &meminfo, len)) return -EFAULT; goto lenout; } #ifdef CONFIG_NET_RX_BUSY_POLL case SO_INCOMING_NAPI_ID: v.val = READ_ONCE(sk->sk_napi_id); /* aggregate non-NAPI IDs down to 0 */ if (!napi_id_valid(v.val)) v.val = 0; break; #endif case SO_COOKIE: lv = sizeof(u64); if (len < lv) return -EINVAL; v.val64 = sock_gen_cookie(sk); break; case SO_ZEROCOPY: v.val = sock_flag(sk, SOCK_ZEROCOPY); break; case SO_TXTIME: lv = sizeof(v.txtime); v.txtime.clockid = sk->sk_clockid; v.txtime.flags |= sk->sk_txtime_deadline_mode ? SOF_TXTIME_DEADLINE_MODE : 0; v.txtime.flags |= sk->sk_txtime_report_errors ? SOF_TXTIME_REPORT_ERRORS : 0; break; case SO_BINDTOIFINDEX: v.val = READ_ONCE(sk->sk_bound_dev_if); break; case SO_NETNS_COOKIE: lv = sizeof(u64); if (len != lv) return -EINVAL; v.val64 = sock_net(sk)->net_cookie; break; case SO_BUF_LOCK: v.val = sk->sk_userlocks & SOCK_BUF_LOCK_MASK; break; case SO_RESERVE_MEM: v.val = READ_ONCE(sk->sk_reserved_mem); break; case SO_TXREHASH: if (!sk_is_tcp(sk)) return -EOPNOTSUPP; /* Paired with WRITE_ONCE() in sk_setsockopt() */ v.val = READ_ONCE(sk->sk_txrehash); break; default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). */ return -ENOPROTOOPT; } if (len > lv) len = lv; if (copy_to_sockptr(optval, &v, len)) return -EFAULT; lenout: if (copy_to_sockptr(optlen, &len, sizeof(int))) return -EFAULT; return 0; } /* * Initialize an sk_lock. * * (We also register the sk_lock with the lock validator.) */ static inline void sock_lock_init(struct sock *sk) { sk_owner_clear(sk); if (sk->sk_kern_sock) sock_lock_init_class_and_name( sk, af_family_kern_slock_key_strings[sk->sk_family], af_family_kern_slock_keys + sk->sk_family, af_family_kern_key_strings[sk->sk_family], af_family_kern_keys + sk->sk_family); else sock_lock_init_class_and_name( sk, af_family_slock_key_strings[sk->sk_family], af_family_slock_keys + sk->sk_family, af_family_key_strings[sk->sk_family], af_family_keys + sk->sk_family); } /* * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet, * even temporarily, because of RCU lookups. sk_node should also be left as is. * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end */ static void sock_copy(struct sock *nsk, const struct sock *osk) { const struct proto *prot = READ_ONCE(osk->sk_prot); #ifdef CONFIG_SECURITY_NETWORK void *sptr = nsk->sk_security; #endif /* If we move sk_tx_queue_mapping out of the private section, * we must check if sk_tx_queue_clear() is called after * sock_copy() in sk_clone_lock(). */ BUILD_BUG_ON(offsetof(struct sock, sk_tx_queue_mapping) < offsetof(struct sock, sk_dontcopy_begin) || offsetof(struct sock, sk_tx_queue_mapping) >= offsetof(struct sock, sk_dontcopy_end)); memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin)); unsafe_memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end, prot->obj_size - offsetof(struct sock, sk_dontcopy_end), /* alloc is larger than struct, see sk_prot_alloc() */); #ifdef CONFIG_SECURITY_NETWORK nsk->sk_security = sptr; security_sk_clone(osk, nsk); #endif } static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, int family) { struct sock *sk; struct kmem_cache *slab; slab = prot->slab; if (slab != NULL) { sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); if (!sk) return sk; if (want_init_on_alloc(priority)) sk_prot_clear_nulls(sk, prot->obj_size); } else sk = kmalloc(prot->obj_size, priority); if (sk != NULL) { if (security_sk_alloc(sk, family, priority)) goto out_free; if (!try_module_get(prot->owner)) goto out_free_sec; } return sk; out_free_sec: security_sk_free(sk); out_free: if (slab != NULL) kmem_cache_free(slab, sk); else kfree(sk); return NULL; } static void sk_prot_free(struct proto *prot, struct sock *sk) { struct kmem_cache *slab; struct module *owner; owner = prot->owner; slab = prot->slab; cgroup_sk_free(&sk->sk_cgrp_data); mem_cgroup_sk_free(sk); security_sk_free(sk); sk_owner_put(sk); if (slab != NULL) kmem_cache_free(slab, sk); else kfree(sk); module_put(owner); } /** * sk_alloc - All socket objects are allocated here * @net: the applicable net namespace * @family: protocol family * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) * @prot: struct proto associated with this new sock instance * @kern: is this to be a kernel socket? */ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern) { struct sock *sk; sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family); if (sk) { sk->sk_family = family; /* * See comment in struct sock definition to understand * why we need sk_prot_creator -acme */ sk->sk_prot = sk->sk_prot_creator = prot; sk->sk_kern_sock = kern; sock_lock_init(sk); sk->sk_net_refcnt = kern ? 0 : 1; if (likely(sk->sk_net_refcnt)) { get_net_track(net, &sk->ns_tracker, priority); sock_inuse_add(net, 1); } else { net_passive_inc(net); __netns_tracker_alloc(net, &sk->ns_tracker, false, priority); } sock_net_set(sk, net); refcount_set(&sk->sk_wmem_alloc, 1); mem_cgroup_sk_alloc(sk); cgroup_sk_alloc(&sk->sk_cgrp_data); sock_update_classid(&sk->sk_cgrp_data); sock_update_netprioidx(&sk->sk_cgrp_data); sk_tx_queue_clear(sk); } return sk; } EXPORT_SYMBOL(sk_alloc); /* Sockets having SOCK_RCU_FREE will call this function after one RCU * grace period. This is the case for UDP sockets and TCP listeners. */ static void __sk_destruct(struct rcu_head *head) { struct sock *sk = container_of(head, struct sock, sk_rcu); struct net *net = sock_net(sk); struct sk_filter *filter; if (sk->sk_destruct) sk->sk_destruct(sk); filter = rcu_dereference_check(sk->sk_filter, refcount_read(&sk->sk_wmem_alloc) == 0); if (filter) { sk_filter_uncharge(sk, filter); RCU_INIT_POINTER(sk->sk_filter, NULL); } sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP); #ifdef CONFIG_BPF_SYSCALL bpf_sk_storage_free(sk); #endif if (atomic_read(&sk->sk_omem_alloc)) pr_debug("%s: optmem leakage (%d bytes) detected\n", __func__, atomic_read(&sk->sk_omem_alloc)); if (sk->sk_frag.page) { put_page(sk->sk_frag.page); sk->sk_frag.page = NULL; } /* We do not need to acquire sk->sk_peer_lock, we are the last user. */ put_cred(sk->sk_peer_cred); put_pid(sk->sk_peer_pid); if (likely(sk->sk_net_refcnt)) { put_net_track(net, &sk->ns_tracker); } else { __netns_tracker_free(net, &sk->ns_tracker, false); net_passive_dec(net); } sk_prot_free(sk->sk_prot_creator, sk); } void sk_net_refcnt_upgrade(struct sock *sk) { struct net *net = sock_net(sk); WARN_ON_ONCE(sk->sk_net_refcnt); __netns_tracker_free(net, &sk->ns_tracker, false); net_passive_dec(net); sk->sk_net_refcnt = 1; get_net_track(net, &sk->ns_tracker, GFP_KERNEL); sock_inuse_add(net, 1); } EXPORT_SYMBOL_GPL(sk_net_refcnt_upgrade); void sk_destruct(struct sock *sk) { bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE); if (rcu_access_pointer(sk->sk_reuseport_cb)) { reuseport_detach_sock(sk); use_call_rcu = true; } if (use_call_rcu) call_rcu(&sk->sk_rcu, __sk_destruct); else __sk_destruct(&sk->sk_rcu); } static void __sk_free(struct sock *sk) { if (likely(sk->sk_net_refcnt)) sock_inuse_add(sock_net(sk), -1); if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk))) sock_diag_broadcast_destroy(sk); else sk_destruct(sk); } void sk_free(struct sock *sk) { /* * We subtract one from sk_wmem_alloc and can know if * some packets are still in some tx queue. * If not null, sock_wfree() will call __sk_free(sk) later */ if (refcount_dec_and_test(&sk->sk_wmem_alloc)) __sk_free(sk); } EXPORT_SYMBOL(sk_free); static void sk_init_common(struct sock *sk) { skb_queue_head_init(&sk->sk_receive_queue); skb_queue_head_init(&sk->sk_write_queue); skb_queue_head_init(&sk->sk_error_queue); rwlock_init(&sk->sk_callback_lock); lockdep_set_class_and_name(&sk->sk_receive_queue.lock, af_rlock_keys + sk->sk_family, af_family_rlock_key_strings[sk->sk_family]); lockdep_set_class_and_name(&sk->sk_write_queue.lock, af_wlock_keys + sk->sk_family, af_family_wlock_key_strings[sk->sk_family]); lockdep_set_class_and_name(&sk->sk_error_queue.lock, af_elock_keys + sk->sk_family, af_family_elock_key_strings[sk->sk_family]); if (sk->sk_kern_sock) lockdep_set_class_and_name(&sk->sk_callback_lock, af_kern_callback_keys + sk->sk_family, af_family_kern_clock_key_strings[sk->sk_family]); else lockdep_set_class_and_name(&sk->sk_callback_lock, af_callback_keys + sk->sk_family, af_family_clock_key_strings[sk->sk_family]); } /** * sk_clone_lock - clone a socket, and lock its clone * @sk: the socket to clone * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) * * Caller must unlock socket even in error path (bh_unlock_sock(newsk)) */ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) { struct proto *prot = READ_ONCE(sk->sk_prot); struct sk_filter *filter; bool is_charged = true; struct sock *newsk; newsk = sk_prot_alloc(prot, priority, sk->sk_family); if (!newsk) goto out; sock_copy(newsk, sk); newsk->sk_prot_creator = prot; /* SANITY */ if (likely(newsk->sk_net_refcnt)) { get_net_track(sock_net(newsk), &newsk->ns_tracker, priority); sock_inuse_add(sock_net(newsk), 1); } else { /* Kernel sockets are not elevating the struct net refcount. * Instead, use a tracker to more easily detect if a layer * is not properly dismantling its kernel sockets at netns * destroy time. */ net_passive_inc(sock_net(newsk)); __netns_tracker_alloc(sock_net(newsk), &newsk->ns_tracker, false, priority); } sk_node_init(&newsk->sk_node); sock_lock_init(newsk); bh_lock_sock(newsk); newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL; newsk->sk_backlog.len = 0; atomic_set(&newsk->sk_rmem_alloc, 0); /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */ refcount_set(&newsk->sk_wmem_alloc, 1); atomic_set(&newsk->sk_omem_alloc, 0); sk_init_common(newsk); newsk->sk_dst_cache = NULL; newsk->sk_dst_pending_confirm = 0; newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; newsk->sk_reserved_mem = 0; atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); /* sk->sk_memcg will be populated at accept() time */ newsk->sk_memcg = NULL; cgroup_sk_clone(&newsk->sk_cgrp_data); rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); if (filter != NULL) /* though it's an empty new sock, the charging may fail * if sysctl_optmem_max was changed between creation of * original socket and cloning */ is_charged = sk_filter_charge(newsk, filter); RCU_INIT_POINTER(newsk->sk_filter, filter); rcu_read_unlock(); if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { /* We need to make sure that we don't uncharge the new * socket if we couldn't charge it in the first place * as otherwise we uncharge the parent's filter. */ if (!is_charged) RCU_INIT_POINTER(newsk->sk_filter, NULL); goto free; } RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); if (bpf_sk_storage_clone(sk, newsk)) goto free; /* Clear sk_user_data if parent had the pointer tagged * as not suitable for copying when cloning. */ if (sk_user_data_is_nocopy(newsk)) newsk->sk_user_data = NULL; newsk->sk_err = 0; newsk->sk_err_soft = 0; newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); /* Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.rst for details) */ smp_wmb(); refcount_set(&newsk->sk_refcnt, 2); sk_set_socket(newsk, NULL); sk_tx_queue_clear(newsk); RCU_INIT_POINTER(newsk->sk_wq, NULL); if (newsk->sk_prot->sockets_allocated) sk_sockets_allocated_inc(newsk); if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP) net_enable_timestamp(); out: return newsk; free: /* It is still raw copy of parent, so invalidate * destructor and make plain sk_free() */ newsk->sk_destruct = NULL; bh_unlock_sock(newsk); sk_free(newsk); newsk = NULL; goto out; } EXPORT_SYMBOL_GPL(sk_clone_lock); static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst) { bool is_ipv6 = false; u32 max_size; #if IS_ENABLED(CONFIG_IPV6) is_ipv6 = (sk->sk_family == AF_INET6 && !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)); #endif /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */ max_size = is_ipv6 ? READ_ONCE(dst_dev(dst)->gso_max_size) : READ_ONCE(dst_dev(dst)->gso_ipv4_max_size); if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk)) max_size = GSO_LEGACY_MAX_SIZE; return max_size - (MAX_TCP_HEADER + 1); } void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { u32 max_segs = 1; sk->sk_route_caps = dst_dev(dst)->features; if (sk_is_tcp(sk)) { struct inet_connection_sock *icsk = inet_csk(sk); sk->sk_route_caps |= NETIF_F_GSO; icsk->icsk_ack.dst_quick_ack = dst_metric(dst, RTAX_QUICKACK); } if (sk->sk_route_caps & NETIF_F_GSO) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; if (unlikely(sk->sk_gso_disabled)) sk->sk_route_caps &= ~NETIF_F_GSO_MASK; if (sk_can_gso(sk)) { if (dst->header_len && !xfrm_dst_offload_ok(dst)) { sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst); /* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */ max_segs = max_t(u32, READ_ONCE(dst_dev(dst)->gso_max_segs), 1); } } sk->sk_gso_max_segs = max_segs; sk_dst_set(sk, dst); } EXPORT_SYMBOL_GPL(sk_setup_caps); /* * Simple resource managers for sockets. */ /* * Write buffer destructor automatically called from kfree_skb. */ void sock_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; unsigned int len = skb->truesize; bool free; if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) { if (sock_flag(sk, SOCK_RCU_FREE) && sk->sk_write_space == sock_def_write_space) { rcu_read_lock(); free = refcount_sub_and_test(len, &sk->sk_wmem_alloc); sock_def_write_space_wfree(sk); rcu_read_unlock(); if (unlikely(free)) __sk_free(sk); return; } /* * Keep a reference on sk_wmem_alloc, this will be released * after sk_write_space() call */ WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc)); sk->sk_write_space(sk); len = 1; } /* * if sk_wmem_alloc reaches 0, we must finish what sk_free() * could not do because of in-flight packets */ if (refcount_sub_and_test(len, &sk->sk_wmem_alloc)) __sk_free(sk); } EXPORT_SYMBOL(sock_wfree); /* This variant of sock_wfree() is used by TCP, * since it sets SOCK_USE_WRITE_QUEUE. */ void __sock_wfree(struct sk_buff *skb) { struct sock *sk = skb->sk; if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc)) __sk_free(sk); } void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) { skb_orphan(skb); #ifdef CONFIG_INET if (unlikely(!sk_fullsock(sk))) return skb_set_owner_edemux(skb, sk); #endif skb->sk = sk; skb->destructor = sock_wfree; skb_set_hash_from_sk(skb, sk); /* * We used to take a refcount on sk, but following operation * is enough to guarantee sk_free() won't free this sock until * all in-flight packets are completed */ refcount_add(skb->truesize, &sk->sk_wmem_alloc); } EXPORT_SYMBOL(skb_set_owner_w); static bool can_skb_orphan_partial(const struct sk_buff *skb) { /* Drivers depend on in-order delivery for crypto offload, * partial orphan breaks out-of-order-OK logic. */ if (skb_is_decrypted(skb)) return false; return (skb->destructor == sock_wfree || (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree)); } /* This helper is used by netem, as it can hold packets in its * delay queue. We want to allow the owner socket to send more * packets, as if they were already TX completed by a typical driver. * But we also want to keep skb->sk set because some packet schedulers * rely on it (sch_fq for example). */ void skb_orphan_partial(struct sk_buff *skb) { if (skb_is_tcp_pure_ack(skb)) return; if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk)) return; skb_orphan(skb); } EXPORT_SYMBOL(skb_orphan_partial); /* * Read buffer destructor automatically called from kfree_skb. */ void sock_rfree(struct sk_buff *skb) { struct sock *sk = skb->sk; unsigned int len = skb->truesize; atomic_sub(len, &sk->sk_rmem_alloc); sk_mem_uncharge(sk, len); } EXPORT_SYMBOL(sock_rfree); /* * Buffer destructor for skbs that are not used directly in read or write * path, e.g. for error handler skbs. Automatically called from kfree_skb. */ void sock_efree(struct sk_buff *skb) { sock_put(skb->sk); } EXPORT_SYMBOL(sock_efree); /* Buffer destructor for prefetch/receive path where reference count may * not be held, e.g. for listen sockets. */ #ifdef CONFIG_INET void sock_pfree(struct sk_buff *skb) { struct sock *sk = skb->sk; if (!sk_is_refcounted(sk)) return; if (sk->sk_state == TCP_NEW_SYN_RECV && inet_reqsk(sk)->syncookie) { inet_reqsk(sk)->rsk_listener = NULL; reqsk_free(inet_reqsk(sk)); return; } sock_gen_put(sk); } EXPORT_SYMBOL(sock_pfree); #endif /* CONFIG_INET */ unsigned long __sock_i_ino(struct sock *sk) { unsigned long ino; read_lock(&sk->sk_callback_lock); ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; read_unlock(&sk->sk_callback_lock); return ino; } EXPORT_SYMBOL(__sock_i_ino); unsigned long sock_i_ino(struct sock *sk) { unsigned long ino; local_bh_disable(); ino = __sock_i_ino(sk); local_bh_enable(); return ino; } EXPORT_SYMBOL(sock_i_ino); /* * Allocate a skb from the socket's send buffer. */ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority) { if (force || refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) { struct sk_buff *skb = alloc_skb(size, priority); if (skb) { skb_set_owner_w(skb, sk); return skb; } } return NULL; } EXPORT_SYMBOL(sock_wmalloc); static void sock_ofree(struct sk_buff *skb) { struct sock *sk = skb->sk; atomic_sub(skb->truesize, &sk->sk_omem_alloc); } struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, gfp_t priority) { struct sk_buff *skb; /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */ if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) > READ_ONCE(sock_net(sk)->core.sysctl_optmem_max)) return NULL; skb = alloc_skb(size, priority); if (!skb) return NULL; atomic_add(skb->truesize, &sk->sk_omem_alloc); skb->sk = sk; skb->destructor = sock_ofree; return skb; } /* * Allocate a memory block from the socket's option memory buffer. */ void *sock_kmalloc(struct sock *sk, int size, gfp_t priority) { int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max); if ((unsigned int)size <= optmem_max && atomic_read(&sk->sk_omem_alloc) + size < optmem_max) { void *mem; /* First do the add, to avoid the race if kmalloc * might sleep. */ atomic_add(size, &sk->sk_omem_alloc); mem = kmalloc(size, priority); if (mem) return mem; atomic_sub(size, &sk->sk_omem_alloc); } return NULL; } EXPORT_SYMBOL(sock_kmalloc); /* * Duplicate the input "src" memory block using the socket's * option memory buffer. */ void *sock_kmemdup(struct sock *sk, const void *src, int size, gfp_t priority) { void *mem; mem = sock_kmalloc(sk, size, priority); if (mem) memcpy(mem, src, size); return mem; } EXPORT_SYMBOL(sock_kmemdup); /* Free an option memory block. Note, we actually want the inline * here as this allows gcc to detect the nullify and fold away the * condition entirely. */ static inline void __sock_kfree_s(struct sock *sk, void *mem, int size, const bool nullify) { if (WARN_ON_ONCE(!mem)) return; if (nullify) kfree_sensitive(mem); else kfree(mem); atomic_sub(size, &sk->sk_omem_alloc); } void sock_kfree_s(struct sock *sk, void *mem, int size) { __sock_kfree_s(sk, mem, size, false); } EXPORT_SYMBOL(sock_kfree_s); void sock_kzfree_s(struct sock *sk, void *mem, int size) { __sock_kfree_s(sk, mem, size, true); } EXPORT_SYMBOL(sock_kzfree_s); /* It is almost wait_for_tcp_memory minus release_sock/lock_sock. I think, these locks should be removed for datagram sockets. */ static long sock_wait_for_wmem(struct sock *sk, long timeo) { DEFINE_WAIT(wait); sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); for (;;) { if (!timeo) break; if (signal_pending(current)) break; set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) break; if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) break; if (READ_ONCE(sk->sk_err)) break; timeo = schedule_timeout(timeo); } finish_wait(sk_sleep(sk), &wait); return timeo; } /* * Generic send/receive buffer handlers */ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, unsigned long data_len, int noblock, int *errcode, int max_page_order) { struct sk_buff *skb; long timeo; int err; timeo = sock_sndtimeo(sk, noblock); for (;;) { err = sock_error(sk); if (err != 0) goto failure; err = -EPIPE; if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) goto failure; if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf)) break; sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); err = -EAGAIN; if (!timeo) goto failure; if (signal_pending(current)) goto interrupted; timeo = sock_wait_for_wmem(sk, timeo); } skb = alloc_skb_with_frags(header_len, data_len, max_page_order, errcode, sk->sk_allocation); if (skb) skb_set_owner_w(skb, sk); return skb; interrupted: err = sock_intr_errno(timeo); failure: *errcode = err; return NULL; } EXPORT_SYMBOL(sock_alloc_send_pskb); int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, struct sockcm_cookie *sockc) { u32 tsflags; BUILD_BUG_ON(SOF_TIMESTAMPING_LAST == (1 << 31)); switch (cmsg->cmsg_type) { case SO_MARK: if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) && !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) return -EINVAL; sockc->mark = *(u32 *)CMSG_DATA(cmsg); break; case SO_TIMESTAMPING_OLD: case SO_TIMESTAMPING_NEW: if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) return -EINVAL; tsflags = *(u32 *)CMSG_DATA(cmsg); if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK) return -EINVAL; sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK; sockc->tsflags |= tsflags; break; case SCM_TXTIME: if (!sock_flag(sk, SOCK_TXTIME)) return -EINVAL; if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64))) return -EINVAL; sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg)); break; case SCM_TS_OPT_ID: if (sk_is_tcp(sk)) return -EINVAL; tsflags = READ_ONCE(sk->sk_tsflags); if (!(tsflags & SOF_TIMESTAMPING_OPT_ID)) return -EINVAL; if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) return -EINVAL; sockc->ts_opt_id = *(u32 *)CMSG_DATA(cmsg); sockc->tsflags |= SOCKCM_FLAG_TS_OPT_ID; break; /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */ case SCM_RIGHTS: case SCM_CREDENTIALS: break; case SO_PRIORITY: if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) return -EINVAL; if (!sk_set_prio_allowed(sk, *(u32 *)CMSG_DATA(cmsg))) return -EPERM; sockc->priority = *(u32 *)CMSG_DATA(cmsg); break; case SCM_DEVMEM_DMABUF: if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32))) return -EINVAL; sockc->dmabuf_id = *(u32 *)CMSG_DATA(cmsg); break; default: return -EINVAL; } return 0; } EXPORT_SYMBOL(__sock_cmsg_send); int sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct sockcm_cookie *sockc) { struct cmsghdr *cmsg; int ret; for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; if (cmsg->cmsg_level != SOL_SOCKET) continue; ret = __sock_cmsg_send(sk, cmsg, sockc); if (ret) return ret; } return 0; } EXPORT_SYMBOL(sock_cmsg_send); static void sk_enter_memory_pressure(struct sock *sk) { if (!sk->sk_prot->enter_memory_pressure) return; sk->sk_prot->enter_memory_pressure(sk); } static void sk_leave_memory_pressure(struct sock *sk) { if (sk->sk_prot->leave_memory_pressure) { INDIRECT_CALL_INET_1(sk->sk_prot->leave_memory_pressure, tcp_leave_memory_pressure, sk); } else { unsigned long *memory_pressure = sk->sk_prot->memory_pressure; if (memory_pressure && READ_ONCE(*memory_pressure)) WRITE_ONCE(*memory_pressure, 0); } } DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); /** * skb_page_frag_refill - check that a page_frag contains enough room * @sz: minimum size of the fragment we want to get * @pfrag: pointer to page_frag * @gfp: priority for memory allocation * * Note: While this allocator tries to use high order pages, there is * no guarantee that allocations succeed. Therefore, @sz MUST be * less or equal than PAGE_SIZE. */ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp) { if (pfrag->page) { if (page_ref_count(pfrag->page) == 1) { pfrag->offset = 0; return true; } if (pfrag->offset + sz <= pfrag->size) return true; put_page(pfrag->page); } pfrag->offset = 0; if (SKB_FRAG_PAGE_ORDER && !static_branch_unlikely(&net_high_order_alloc_disable_key)) { /* Avoid direct reclaim but allow kswapd to wake */ pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY, SKB_FRAG_PAGE_ORDER); if (likely(pfrag->page)) { pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER; return true; } } pfrag->page = alloc_page(gfp); if (likely(pfrag->page)) { pfrag->size = PAGE_SIZE; return true; } return false; } EXPORT_SYMBOL(skb_page_frag_refill); bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) { if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation))) return true; sk_enter_memory_pressure(sk); sk_stream_moderate_sndbuf(sk); return false; } EXPORT_SYMBOL(sk_page_frag_refill); void __lock_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) { DEFINE_WAIT(wait); for (;;) { prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait, TASK_UNINTERRUPTIBLE); spin_unlock_bh(&sk->sk_lock.slock); schedule(); spin_lock_bh(&sk->sk_lock.slock); if (!sock_owned_by_user(sk)) break; } finish_wait(&sk->sk_lock.wq, &wait); } void __release_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) { struct sk_buff *skb, *next; while ((skb = sk->sk_backlog.head) != NULL) { sk->sk_backlog.head = sk->sk_backlog.tail = NULL; spin_unlock_bh(&sk->sk_lock.slock); do { next = skb->next; prefetch(next); DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb)); skb_mark_not_on_list(skb); sk_backlog_rcv(sk, skb); cond_resched(); skb = next; } while (skb != NULL); spin_lock_bh(&sk->sk_lock.slock); } /* * Doing the zeroing here guarantee we can not loop forever * while a wild producer attempts to flood us. */ sk->sk_backlog.len = 0; } void __sk_flush_backlog(struct sock *sk) { spin_lock_bh(&sk->sk_lock.slock); __release_sock(sk); if (sk->sk_prot->release_cb) INDIRECT_CALL_INET_1(sk->sk_prot->release_cb, tcp_release_cb, sk); spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL_GPL(__sk_flush_backlog); /** * sk_wait_data - wait for data to arrive at sk_receive_queue * @sk: sock to wait on * @timeo: for how long * @skb: last skb seen on sk_receive_queue * * Now socket state including sk->sk_err is changed only under lock, * hence we may omit checks after joining wait queue. * We check receive queue before schedule() only as optimization; * it is very likely that release_sock() added new data. */ int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb) { DEFINE_WAIT_FUNC(wait, woken_wake_function); int rc; add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); remove_wait_queue(sk_sleep(sk), &wait); return rc; } EXPORT_SYMBOL(sk_wait_data); /** * __sk_mem_raise_allocated - increase memory_allocated * @sk: socket * @size: memory size to allocate * @amt: pages to allocate * @kind: allocation type * * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc. * * Unlike the globally shared limits among the sockets under same protocol, * consuming the budget of a memcg won't have direct effect on other ones. * So be optimistic about memcg's tolerance, and leave the callers to decide * whether or not to raise allocated through sk_under_memory_pressure() or * its variants. */ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) { struct mem_cgroup *memcg = mem_cgroup_sockets_enabled ? sk->sk_memcg : NULL; struct proto *prot = sk->sk_prot; bool charged = true; long allocated; sk_memory_allocated_add(sk, amt); allocated = sk_memory_allocated(sk); if (memcg) { charged = mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge()); if (!charged) goto suppress_allocation; } /* Under limit. */ if (allocated <= sk_prot_mem_limits(sk, 0)) { sk_leave_memory_pressure(sk); return 1; } /* Under pressure. */ if (allocated > sk_prot_mem_limits(sk, 1)) sk_enter_memory_pressure(sk); /* Over hard limit. */ if (allocated > sk_prot_mem_limits(sk, 2)) goto suppress_allocation; /* Guarantee minimum buffer size under pressure (either global * or memcg) to make sure features described in RFC 7323 (TCP * Extensions for High Performance) work properly. * * This rule does NOT stand when exceeds global or memcg's hard * limit, or else a DoS attack can be taken place by spawning * lots of sockets whose usage are under minimum buffer size. */ if (kind == SK_MEM_RECV) { if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot)) return 1; } else { /* SK_MEM_SEND */ int wmem0 = sk_get_wmem0(sk, prot); if (sk->sk_type == SOCK_STREAM) { if (sk->sk_wmem_queued < wmem0) return 1; } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) { return 1; } } if (sk_has_memory_pressure(sk)) { u64 alloc; /* The following 'average' heuristic is within the * scope of global accounting, so it only makes * sense for global memory pressure. */ if (!sk_under_global_memory_pressure(sk)) return 1; /* Try to be fair among all the sockets under global * pressure by allowing the ones that below average * usage to raise. */ alloc = sk_sockets_allocated_read_positive(sk); if (sk_prot_mem_limits(sk, 2) > alloc * sk_mem_pages(sk->sk_wmem_queued + atomic_read(&sk->sk_rmem_alloc) + sk->sk_forward_alloc)) return 1; } suppress_allocation: if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) { sk_stream_moderate_sndbuf(sk); /* Fail only if socket is _under_ its sndbuf. * In this case we cannot block, so that we have to fail. */ if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) { /* Force charge with __GFP_NOFAIL */ if (memcg && !charged) { mem_cgroup_charge_skmem(memcg, amt, gfp_memcg_charge() | __GFP_NOFAIL); } return 1; } } if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged)) trace_sock_exceed_buf_limit(sk, prot, allocated, kind); sk_memory_allocated_sub(sk, amt); if (memcg && charged) mem_cgroup_uncharge_skmem(memcg, amt); return 0; } /** * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated * @sk: socket * @size: memory size to allocate * @kind: allocation type * * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means * rmem allocation. This function assumes that protocols which have * memory_pressure use sk_wmem_queued as write buffer accounting. */ int __sk_mem_schedule(struct sock *sk, int size, int kind) { int ret, amt = sk_mem_pages(size); sk_forward_alloc_add(sk, amt << PAGE_SHIFT); ret = __sk_mem_raise_allocated(sk, size, amt, kind); if (!ret) sk_forward_alloc_add(sk, -(amt << PAGE_SHIFT)); return ret; } EXPORT_SYMBOL(__sk_mem_schedule); /** * __sk_mem_reduce_allocated - reclaim memory_allocated * @sk: socket * @amount: number of quanta * * Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc */ void __sk_mem_reduce_allocated(struct sock *sk, int amount) { sk_memory_allocated_sub(sk, amount); if (mem_cgroup_sockets_enabled && sk->sk_memcg) mem_cgroup_uncharge_skmem(sk->sk_memcg, amount); if (sk_under_global_memory_pressure(sk) && (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0))) sk_leave_memory_pressure(sk); } /** * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated * @sk: socket * @amount: number of bytes (rounded down to a PAGE_SIZE multiple) */ void __sk_mem_reclaim(struct sock *sk, int amount) { amount >>= PAGE_SHIFT; sk_forward_alloc_add(sk, -(amount << PAGE_SHIFT)); __sk_mem_reduce_allocated(sk, amount); } EXPORT_SYMBOL(__sk_mem_reclaim); int sk_set_peek_off(struct sock *sk, int val) { WRITE_ONCE(sk->sk_peek_off, val); return 0; } EXPORT_SYMBOL_GPL(sk_set_peek_off); /* * Set of default routines for initialising struct proto_ops when * the protocol does not support a particular function. In certain * cases where it makes no sense for a protocol to have a "do nothing" * function, some default processing is provided. */ int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_bind); int sock_no_connect(struct socket *sock, struct sockaddr *saddr, int len, int flags) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_connect); int sock_no_socketpair(struct socket *sock1, struct socket *sock2) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_socketpair); int sock_no_accept(struct socket *sock, struct socket *newsock, struct proto_accept_arg *arg) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_accept); int sock_no_getname(struct socket *sock, struct sockaddr *saddr, int peer) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_getname); int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_ioctl); int sock_no_listen(struct socket *sock, int backlog) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_listen); int sock_no_shutdown(struct socket *sock, int how) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_shutdown); int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_sendmsg); int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_sendmsg_locked); int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) { return -EOPNOTSUPP; } EXPORT_SYMBOL(sock_no_recvmsg); int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma) { /* Mirror missing mmap method error code */ return -ENODEV; } EXPORT_SYMBOL(sock_no_mmap); /* * When a file is received (via SCM_RIGHTS, etc), we must bump the * various sock-based usage counts. */ void __receive_sock(struct file *file) { struct socket *sock; sock = sock_from_file(file); if (sock) { sock_update_netprioidx(&sock->sk->sk_cgrp_data); sock_update_classid(&sock->sk->sk_cgrp_data); } } /* * Default Socket Callbacks */ static void sock_def_wakeup(struct sock *sk) { struct socket_wq *wq; rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_all(&wq->wait); rcu_read_unlock(); } static void sock_def_error_report(struct sock *sk) { struct socket_wq *wq; rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_poll(&wq->wait, EPOLLERR); sk_wake_async_rcu(sk, SOCK_WAKE_IO, POLL_ERR); rcu_read_unlock(); } void sock_def_readable(struct sock *sk) { struct socket_wq *wq; trace_sk_data_ready(sk); rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI | EPOLLRDNORM | EPOLLRDBAND); sk_wake_async_rcu(sk, SOCK_WAKE_WAITD, POLL_IN); rcu_read_unlock(); } static void sock_def_write_space(struct sock *sk) { struct socket_wq *wq; rcu_read_lock(); /* Do not wake up a writer until he can make "significant" * progress. --DaveM */ if (sock_writeable(sk)) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); /* Should agree with poll, otherwise some programs break */ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); } /* An optimised version of sock_def_write_space(), should only be called * for SOCK_RCU_FREE sockets under RCU read section and after putting * ->sk_wmem_alloc. */ static void sock_def_write_space_wfree(struct sock *sk) { /* Do not wake up a writer until he can make "significant" * progress. --DaveM */ if (sock_writeable(sk)) { struct socket_wq *wq = rcu_dereference(sk->sk_wq); /* rely on refcount_sub from sock_wfree() */ smp_mb__after_atomic(); if (wq && waitqueue_active(&wq->wait)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); /* Should agree with poll, otherwise some programs break */ sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT); } } static void sock_def_destruct(struct sock *sk) { } void sk_send_sigurg(struct sock *sk) { if (sk->sk_socket && sk->sk_socket->file) if (send_sigurg(sk->sk_socket->file)) sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); } EXPORT_SYMBOL(sk_send_sigurg); void sk_reset_timer(struct sock *sk, struct timer_list* timer, unsigned long expires) { if (!mod_timer(timer, expires)) sock_hold(sk); } EXPORT_SYMBOL(sk_reset_timer); void sk_stop_timer(struct sock *sk, struct timer_list* timer) { if (timer_delete(timer)) __sock_put(sk); } EXPORT_SYMBOL(sk_stop_timer); void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer) { if (timer_delete_sync(timer)) __sock_put(sk); } EXPORT_SYMBOL(sk_stop_timer_sync); void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid) { sk_init_common(sk); sk->sk_send_head = NULL; timer_setup(&sk->sk_timer, NULL, 0); sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default); sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default); sk->sk_state = TCP_CLOSE; sk->sk_use_task_frag = true; sk_set_socket(sk, sock); sock_set_flag(sk, SOCK_ZAPPED); if (sock) { sk->sk_type = sock->type; RCU_INIT_POINTER(sk->sk_wq, &sock->wq); sock->sk = sk; } else { RCU_INIT_POINTER(sk->sk_wq, NULL); } sk->sk_uid = uid; sk->sk_state_change = sock_def_wakeup; sk->sk_data_ready = sock_def_readable; sk->sk_write_space = sock_def_write_space; sk->sk_error_report = sock_def_error_report; sk->sk_destruct = sock_def_destruct; sk->sk_frag.page = NULL; sk->sk_frag.offset = 0; sk->sk_peek_off = -1; sk->sk_peer_pid = NULL; sk->sk_peer_cred = NULL; spin_lock_init(&sk->sk_peer_lock); sk->sk_write_pending = 0; sk->sk_rcvlowat = 1; sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_stamp = SK_DEFAULT_STAMP; #if BITS_PER_LONG==32 seqlock_init(&sk->sk_stamp_seq); #endif atomic_set(&sk->sk_zckey, 0); #ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read); #endif sk->sk_max_pacing_rate = ~0UL; sk->sk_pacing_rate = ~0UL; WRITE_ONCE(sk->sk_pacing_shift, 10); sk->sk_incoming_cpu = -1; sk_rx_queue_clear(sk); /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.rst for details) */ smp_wmb(); refcount_set(&sk->sk_refcnt, 1); atomic_set(&sk->sk_drops, 0); } EXPORT_SYMBOL(sock_init_data_uid); void sock_init_data(struct socket *sock, struct sock *sk) { kuid_t uid = sock ? SOCK_INODE(sock)->i_uid : make_kuid(sock_net(sk)->user_ns, 0); sock_init_data_uid(sock, sk, uid); } EXPORT_SYMBOL(sock_init_data); void lock_sock_nested(struct sock *sk, int subclass) { /* The sk_lock has mutex_lock() semantics here. */ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); might_sleep(); spin_lock_bh(&sk->sk_lock.slock); if (sock_owned_by_user_nocheck(sk)) __lock_sock(sk); sk->sk_lock.owned = 1; spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(lock_sock_nested); void release_sock(struct sock *sk) { spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_backlog.tail) __release_sock(sk); if (sk->sk_prot->release_cb) INDIRECT_CALL_INET_1(sk->sk_prot->release_cb, tcp_release_cb, sk); sock_release_ownership(sk); if (waitqueue_active(&sk->sk_lock.wq)) wake_up(&sk->sk_lock.wq); spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(release_sock); bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) { might_sleep(); spin_lock_bh(&sk->sk_lock.slock); if (!sock_owned_by_user_nocheck(sk)) { /* * Fast path return with bottom halves disabled and * sock::sk_lock.slock held. * * The 'mutex' is not contended and holding * sock::sk_lock.slock prevents all other lockers to * proceed so the corresponding unlock_sock_fast() can * avoid the slow path of release_sock() completely and * just release slock. * * From a semantical POV this is equivalent to 'acquiring' * the 'mutex', hence the corresponding lockdep * mutex_release() has to happen in the fast path of * unlock_sock_fast(). */ return false; } __lock_sock(sk); sk->sk_lock.owned = 1; __acquire(&sk->sk_lock.slock); spin_unlock_bh(&sk->sk_lock.slock); return true; } EXPORT_SYMBOL(__lock_sock_fast); int sock_gettstamp(struct socket *sock, void __user *userstamp, bool timeval, bool time32) { struct sock *sk = sock->sk; struct timespec64 ts; sock_enable_timestamp(sk, SOCK_TIMESTAMP); ts = ktime_to_timespec64(sock_read_timestamp(sk)); if (ts.tv_sec == -1) return -ENOENT; if (ts.tv_sec == 0) { ktime_t kt = ktime_get_real(); sock_write_timestamp(sk, kt); ts = ktime_to_timespec64(kt); } if (timeval) ts.tv_nsec /= 1000; #ifdef CONFIG_COMPAT_32BIT_TIME if (time32) return put_old_timespec32(&ts, userstamp); #endif #ifdef CONFIG_SPARC64 /* beware of padding in sparc64 timeval */ if (timeval && !in_compat_syscall()) { struct __kernel_old_timeval __user tv = { .tv_sec = ts.tv_sec, .tv_usec = ts.tv_nsec, }; if (copy_to_user(userstamp, &tv, sizeof(tv))) return -EFAULT; return 0; } #endif return put_timespec64(&ts, userstamp); } EXPORT_SYMBOL(sock_gettstamp); void sock_enable_timestamp(struct sock *sk, enum sock_flags flag) { if (!sock_flag(sk, flag)) { unsigned long previous_flags = sk->sk_flags; sock_set_flag(sk, flag); /* * we just set one of the two flags which require net * time stamping, but time stamping might have been on * already because of the other one */ if (sock_needs_netstamp(sk) && !(previous_flags & SK_FLAGS_TIMESTAMP)) net_enable_timestamp(); } } int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type) { struct sock_exterr_skb *serr; struct sk_buff *skb; int copied, err; err = -EAGAIN; skb = sock_dequeue_err_skb(sk); if (skb == NULL) goto out; copied = skb->len; if (copied > len) { msg->msg_flags |= MSG_TRUNC; copied = len; } err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free_skb; sock_recv_timestamp(msg, sk, skb); serr = SKB_EXT_ERR(skb); put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee); msg->msg_flags |= MSG_ERRQUEUE; err = copied; out_free_skb: kfree_skb(skb); out: return err; } EXPORT_SYMBOL(sock_recv_errqueue); /* * Get a socket option on an socket. * * FIX: POSIX 1003.1g is very ambiguous here. It states that * asynchronous errors should be reported by getsockopt. We assume * this means if you specify SO_ERROR (otherwise what is the point of it). */ int sock_common_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(sock_common_getsockopt); int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; int addr_len = 0; int err; err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; return err; } EXPORT_SYMBOL(sock_common_recvmsg); /* * Set socket options on an inet socket. */ int sock_common_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(sock_common_setsockopt); void sk_common_release(struct sock *sk) { if (sk->sk_prot->destroy) sk->sk_prot->destroy(sk); /* * Observation: when sk_common_release is called, processes have * no access to socket. But net still has. * Step one, detach it from networking: * * A. Remove from hash tables. */ sk->sk_prot->unhash(sk); /* * In this point socket cannot receive new packets, but it is possible * that some packets are in flight because some CPU runs receiver and * did hash table lookup before we unhashed socket. They will achieve * receive queue and will be purged by socket destructor. * * Also we still have packets pending on receive queue and probably, * our own packets waiting in device queues. sock_destroy will drain * receive queue, but transmitted packets will delay socket destruction * until the last reference will be released. */ sock_orphan(sk); xfrm_sk_free_policy(sk); sock_put(sk); } EXPORT_SYMBOL(sk_common_release); void sk_get_meminfo(const struct sock *sk, u32 *mem) { memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS); mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk); mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf); mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk); mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf); mem[SK_MEMINFO_FWD_ALLOC] = READ_ONCE(sk->sk_forward_alloc); mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued); mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc); mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len); mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops); } #ifdef CONFIG_PROC_FS static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); int sock_prot_inuse_get(struct net *net, struct proto *prot) { int cpu, idx = prot->inuse_idx; int res = 0; for_each_possible_cpu(cpu) res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx]; return res >= 0 ? res : 0; } EXPORT_SYMBOL_GPL(sock_prot_inuse_get); int sock_inuse_get(struct net *net) { int cpu, res = 0; for_each_possible_cpu(cpu) res += per_cpu_ptr(net->core.prot_inuse, cpu)->all; return res; } EXPORT_SYMBOL_GPL(sock_inuse_get); static int __net_init sock_inuse_init_net(struct net *net) { net->core.prot_inuse = alloc_percpu(struct prot_inuse); if (net->core.prot_inuse == NULL) return -ENOMEM; return 0; } static void __net_exit sock_inuse_exit_net(struct net *net) { free_percpu(net->core.prot_inuse); } static struct pernet_operations net_inuse_ops = { .init = sock_inuse_init_net, .exit = sock_inuse_exit_net, }; static __init int net_inuse_init(void) { if (register_pernet_subsys(&net_inuse_ops)) panic("Cannot initialize net inuse counters"); return 0; } core_initcall(net_inuse_init); static int assign_proto_idx(struct proto *prot) { prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); if (unlikely(prot->inuse_idx == PROTO_INUSE_NR)) { pr_err("PROTO_INUSE_NR exhausted\n"); return -ENOSPC; } set_bit(prot->inuse_idx, proto_inuse_idx); return 0; } static void release_proto_idx(struct proto *prot) { if (prot->inuse_idx != PROTO_INUSE_NR) clear_bit(prot->inuse_idx, proto_inuse_idx); } #else static inline int assign_proto_idx(struct proto *prot) { return 0; } static inline void release_proto_idx(struct proto *prot) { } #endif static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot) { if (!twsk_prot) return; kfree(twsk_prot->twsk_slab_name); twsk_prot->twsk_slab_name = NULL; kmem_cache_destroy(twsk_prot->twsk_slab); twsk_prot->twsk_slab = NULL; } static int tw_prot_init(const struct proto *prot) { struct timewait_sock_ops *twsk_prot = prot->twsk_prot; if (!twsk_prot) return 0; twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name); if (!twsk_prot->twsk_slab_name) return -ENOMEM; twsk_prot->twsk_slab = kmem_cache_create(twsk_prot->twsk_slab_name, twsk_prot->twsk_obj_size, 0, SLAB_ACCOUNT | prot->slab_flags, NULL); if (!twsk_prot->twsk_slab) { pr_crit("%s: Can't create timewait sock SLAB cache!\n", prot->name); return -ENOMEM; } return 0; } static void req_prot_cleanup(struct request_sock_ops *rsk_prot) { if (!rsk_prot) return; kfree(rsk_prot->slab_name); rsk_prot->slab_name = NULL; kmem_cache_destroy(rsk_prot->slab); rsk_prot->slab = NULL; } static int req_prot_init(const struct proto *prot) { struct request_sock_ops *rsk_prot = prot->rsk_prot; if (!rsk_prot) return 0; rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s", prot->name); if (!rsk_prot->slab_name) return -ENOMEM; rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name, rsk_prot->obj_size, 0, SLAB_ACCOUNT | prot->slab_flags, NULL); if (!rsk_prot->slab) { pr_crit("%s: Can't create request sock SLAB cache!\n", prot->name); return -ENOMEM; } return 0; } int proto_register(struct proto *prot, int alloc_slab) { int ret = -ENOBUFS; if (prot->memory_allocated && !prot->sysctl_mem) { pr_err("%s: missing sysctl_mem\n", prot->name); return -EINVAL; } if (prot->memory_allocated && !prot->per_cpu_fw_alloc) { pr_err("%s: missing per_cpu_fw_alloc\n", prot->name); return -EINVAL; } if (alloc_slab) { prot->slab = kmem_cache_create_usercopy(prot->name, prot->obj_size, 0, SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT | prot->slab_flags, prot->useroffset, prot->usersize, NULL); if (prot->slab == NULL) { pr_crit("%s: Can't create sock SLAB cache!\n", prot->name); goto out; } if (req_prot_init(prot)) goto out_free_request_sock_slab; if (tw_prot_init(prot)) goto out_free_timewait_sock_slab; } mutex_lock(&proto_list_mutex); ret = assign_proto_idx(prot); if (ret) { mutex_unlock(&proto_list_mutex); goto out_free_timewait_sock_slab; } list_add(&prot->node, &proto_list); mutex_unlock(&proto_list_mutex); return ret; out_free_timewait_sock_slab: if (alloc_slab) tw_prot_cleanup(prot->twsk_prot); out_free_request_sock_slab: if (alloc_slab) { req_prot_cleanup(prot->rsk_prot); kmem_cache_destroy(prot->slab); prot->slab = NULL; } out: return ret; } EXPORT_SYMBOL(proto_register); void proto_unregister(struct proto *prot) { mutex_lock(&proto_list_mutex); release_proto_idx(prot); list_del(&prot->node); mutex_unlock(&proto_list_mutex); kmem_cache_destroy(prot->slab); prot->slab = NULL; req_prot_cleanup(prot->rsk_prot); tw_prot_cleanup(prot->twsk_prot); } EXPORT_SYMBOL(proto_unregister); int sock_load_diag_module(int family, int protocol) { if (!protocol) { if (!sock_is_registered(family)) return -ENOENT; return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, NETLINK_SOCK_DIAG, family); } #ifdef CONFIG_INET if (family == AF_INET && protocol != IPPROTO_RAW && protocol < MAX_INET_PROTOS && !rcu_access_pointer(inet_protos[protocol])) return -ENOENT; #endif return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK, NETLINK_SOCK_DIAG, family, protocol); } EXPORT_SYMBOL(sock_load_diag_module); #ifdef CONFIG_PROC_FS static void *proto_seq_start(struct seq_file *seq, loff_t *pos) __acquires(proto_list_mutex) { mutex_lock(&proto_list_mutex); return seq_list_start_head(&proto_list, *pos); } static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos) { return seq_list_next(v, &proto_list, pos); } static void proto_seq_stop(struct seq_file *seq, void *v) __releases(proto_list_mutex) { mutex_unlock(&proto_list_mutex); } static char proto_method_implemented(const void *method) { return method == NULL ? 'n' : 'y'; } static long sock_prot_memory_allocated(struct proto *proto) { return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L; } static const char *sock_prot_memory_pressure(struct proto *proto) { return proto->memory_pressure != NULL ? proto_memory_pressure(proto) ? "yes" : "no" : "NI"; } static void proto_seq_printf(struct seq_file *seq, struct proto *proto) { seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s " "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n", proto->name, proto->obj_size, sock_prot_inuse_get(seq_file_net(seq), proto), sock_prot_memory_allocated(proto), sock_prot_memory_pressure(proto), proto->max_header, proto->slab == NULL ? "no" : "yes", module_name(proto->owner), proto_method_implemented(proto->close), proto_method_implemented(proto->connect), proto_method_implemented(proto->disconnect), proto_method_implemented(proto->accept), proto_method_implemented(proto->ioctl), proto_method_implemented(proto->init), proto_method_implemented(proto->destroy), proto_method_implemented(proto->shutdown), proto_method_implemented(proto->setsockopt), proto_method_implemented(proto->getsockopt), proto_method_implemented(proto->sendmsg), proto_method_implemented(proto->recvmsg), proto_method_implemented(proto->bind), proto_method_implemented(proto->backlog_rcv), proto_method_implemented(proto->hash), proto_method_implemented(proto->unhash), proto_method_implemented(proto->get_port), proto_method_implemented(proto->enter_memory_pressure)); } static int proto_seq_show(struct seq_file *seq, void *v) { if (v == &proto_list) seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s", "protocol", "size", "sockets", "memory", "press", "maxhdr", "slab", "module", "cl co di ac io in de sh ss gs se re bi br ha uh gp em\n"); else proto_seq_printf(seq, list_entry(v, struct proto, node)); return 0; } static const struct seq_operations proto_seq_ops = { .start = proto_seq_start, .next = proto_seq_next, .stop = proto_seq_stop, .show = proto_seq_show, }; static __net_init int proto_init_net(struct net *net) { if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops, sizeof(struct seq_net_private))) return -ENOMEM; return 0; } static __net_exit void proto_exit_net(struct net *net) { remove_proc_entry("protocols", net->proc_net); } static __net_initdata struct pernet_operations proto_net_ops = { .init = proto_init_net, .exit = proto_exit_net, }; static int __init proto_init(void) { return register_pernet_subsys(&proto_net_ops); } subsys_initcall(proto_init); #endif /* PROC_FS */ #ifdef CONFIG_NET_RX_BUSY_POLL bool sk_busy_loop_end(void *p, unsigned long start_time) { struct sock *sk = p; if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) return true; if (sk_is_udp(sk) && !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue)) return true; return sk_busy_loop_timeout(sk, start_time); } EXPORT_SYMBOL(sk_busy_loop_end); #endif /* CONFIG_NET_RX_BUSY_POLL */ int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len) { if (!sk->sk_prot->bind_add) return -EOPNOTSUPP; return sk->sk_prot->bind_add(sk, addr, addr_len); } EXPORT_SYMBOL(sock_bind_add); /* Copy 'size' bytes from userspace and return `size` back to userspace */ int sock_ioctl_inout(struct sock *sk, unsigned int cmd, void __user *arg, void *karg, size_t size) { int ret; if (copy_from_user(karg, arg, size)) return -EFAULT; ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, karg); if (ret) return ret; if (copy_to_user(arg, karg, size)) return -EFAULT; return 0; } EXPORT_SYMBOL(sock_ioctl_inout); /* This is the most common ioctl prep function, where the result (4 bytes) is * copied back to userspace if the ioctl() returns successfully. No input is * copied from userspace as input argument. */ static int sock_ioctl_out(struct sock *sk, unsigned int cmd, void __user *arg) { int ret, karg = 0; ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, &karg); if (ret) return ret; return put_user(karg, (int __user *)arg); } /* A wrapper around sock ioctls, which copies the data from userspace * (depending on the protocol/ioctl), and copies back the result to userspace. * The main motivation for this function is to pass kernel memory to the * protocol ioctl callbacks, instead of userspace memory. */ int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) { int rc = 1; if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET) rc = ipmr_sk_ioctl(sk, cmd, arg); else if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET6) rc = ip6mr_sk_ioctl(sk, cmd, arg); else if (sk_is_phonet(sk)) rc = phonet_sk_ioctl(sk, cmd, arg); /* If ioctl was processed, returns its value */ if (rc <= 0) return rc; /* Otherwise call the default handler */ return sock_ioctl_out(sk, cmd, arg); } EXPORT_SYMBOL(sk_ioctl); static int __init sock_struct_check(void) { CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_drops); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_peek_off); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_error_queue); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_receive_queue); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rx, sk_backlog); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_ifindex); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rx_dst_cookie); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvbuf); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_filter); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_wq); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_data_ready); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvtimeo); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rx, sk_rcvlowat); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_err); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_socket); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_rxtx, sk_memcg); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_lock); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_reserved_mem); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_forward_alloc); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_rxtx, sk_tsflags); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_omem_alloc); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_sndbuf); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_queued); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_wmem_alloc); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tsq_flags); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_send_head); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_queue); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_write_pending); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_dst_pending_confirm); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_status); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_frag); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_timer); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_pacing_rate); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_zckey); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_write_tx, sk_tskey); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_max_pacing_rate); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_sndtimeo); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_priority); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_mark); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_dst_cache); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_route_caps); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_type); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_allocation); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_txhash); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_gso_max_segs); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_pacing_shift); CACHELINE_ASSERT_GROUP_MEMBER(struct sock, sock_read_tx, sk_use_task_frag); return 0; } core_initcall(sock_struct_check);
32 31 2 29 29 136 138 30 31 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 // SPDX-License-Identifier: GPL-2.0-only #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netfilter.h> #include <linux/rhashtable.h> #include <linux/netdevice.h> #include <net/ip.h> #include <net/ip6_route.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_tuple.h> static DEFINE_MUTEX(flowtable_lock); static LIST_HEAD(flowtables); static void flow_offload_fill_dir(struct flow_offload *flow, enum flow_offload_tuple_dir dir) { struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple; struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple; ft->dir = dir; switch (ctt->src.l3num) { case NFPROTO_IPV4: ft->src_v4 = ctt->src.u3.in; ft->dst_v4 = ctt->dst.u3.in; break; case NFPROTO_IPV6: ft->src_v6 = ctt->src.u3.in6; ft->dst_v6 = ctt->dst.u3.in6; break; } ft->l3proto = ctt->src.l3num; ft->l4proto = ctt->dst.protonum; switch (ctt->dst.protonum) { case IPPROTO_TCP: case IPPROTO_UDP: ft->src_port = ctt->src.u.tcp.port; ft->dst_port = ctt->dst.u.tcp.port; break; } } struct flow_offload *flow_offload_alloc(struct nf_conn *ct) { struct flow_offload *flow; if (unlikely(nf_ct_is_dying(ct))) return NULL; flow = kzalloc(sizeof(*flow), GFP_ATOMIC); if (!flow) return NULL; refcount_inc(&ct->ct_general.use); flow->ct = ct; flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL); flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY); if (ct->status & IPS_SRC_NAT) __set_bit(NF_FLOW_SNAT, &flow->flags); if (ct->status & IPS_DST_NAT) __set_bit(NF_FLOW_DNAT, &flow->flags); return flow; } EXPORT_SYMBOL_GPL(flow_offload_alloc); static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple) { if (flow_tuple->l3proto == NFPROTO_IPV6) return rt6_get_cookie(dst_rt6_info(flow_tuple->dst_cache)); return 0; } static struct dst_entry *nft_route_dst_fetch(struct nf_flow_route *route, enum flow_offload_tuple_dir dir) { struct dst_entry *dst = route->tuple[dir].dst; route->tuple[dir].dst = NULL; return dst; } static int flow_offload_fill_route(struct flow_offload *flow, struct nf_flow_route *route, enum flow_offload_tuple_dir dir) { struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple; struct dst_entry *dst = nft_route_dst_fetch(route, dir); int i, j = 0; switch (flow_tuple->l3proto) { case NFPROTO_IPV4: flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true); break; case NFPROTO_IPV6: flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true); break; } flow_tuple->iifidx = route->tuple[dir].in.ifindex; for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) { flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id; flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto; if (route->tuple[dir].in.ingress_vlans & BIT(i)) flow_tuple->in_vlan_ingress |= BIT(j); j++; } flow_tuple->encap_num = route->tuple[dir].in.num_encaps; switch (route->tuple[dir].xmit_type) { case FLOW_OFFLOAD_XMIT_DIRECT: memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest, ETH_ALEN); memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source, ETH_ALEN); flow_tuple->out.ifidx = route->tuple[dir].out.ifindex; flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex; dst_release(dst); break; case FLOW_OFFLOAD_XMIT_XFRM: case FLOW_OFFLOAD_XMIT_NEIGH: flow_tuple->dst_cache = dst; flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple); break; default: WARN_ON_ONCE(1); break; } flow_tuple->xmit_type = route->tuple[dir].xmit_type; return 0; } static void nft_flow_dst_release(struct flow_offload *flow, enum flow_offload_tuple_dir dir) { if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH || flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM) dst_release(flow->tuplehash[dir].tuple.dst_cache); } void flow_offload_route_init(struct flow_offload *flow, struct nf_flow_route *route) { flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL); flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY); flow->type = NF_FLOW_OFFLOAD_ROUTE; } EXPORT_SYMBOL_GPL(flow_offload_route_init); static inline bool nf_flow_has_expired(const struct flow_offload *flow) { return nf_flow_timeout_delta(flow->timeout) <= 0; } static void flow_offload_fixup_tcp(struct nf_conn *ct, u8 tcp_state) { struct ip_ct_tcp *tcp = &ct->proto.tcp; spin_lock_bh(&ct->lock); if (tcp->state != tcp_state) tcp->state = tcp_state; /* syn packet triggers the TCP reopen case from conntrack. */ if (tcp->state == TCP_CONNTRACK_CLOSE) ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; /* Conntrack state is outdated due to offload bypass. * Clear IP_CT_TCP_FLAG_MAXACK_SET, otherwise conntracks * TCP reset validation will fail. */ tcp->seen[0].td_maxwin = 0; tcp->seen[0].flags &= ~IP_CT_TCP_FLAG_MAXACK_SET; tcp->seen[1].td_maxwin = 0; tcp->seen[1].flags &= ~IP_CT_TCP_FLAG_MAXACK_SET; spin_unlock_bh(&ct->lock); } static void flow_offload_fixup_ct(struct flow_offload *flow) { struct nf_conn *ct = flow->ct; struct net *net = nf_ct_net(ct); int l4num = nf_ct_protonum(ct); bool expired, closing = false; u32 offload_timeout = 0; s32 timeout; if (l4num == IPPROTO_TCP) { const struct nf_tcp_net *tn = nf_tcp_pernet(net); u8 tcp_state; /* Enter CLOSE state if fin/rst packet has been seen, this * allows TCP reopen from conntrack. Otherwise, pick up from * the last seen TCP state. */ closing = test_bit(NF_FLOW_CLOSING, &flow->flags); if (closing) { flow_offload_fixup_tcp(ct, TCP_CONNTRACK_CLOSE); timeout = READ_ONCE(tn->timeouts[TCP_CONNTRACK_CLOSE]); expired = false; } else { tcp_state = READ_ONCE(ct->proto.tcp.state); flow_offload_fixup_tcp(ct, tcp_state); timeout = READ_ONCE(tn->timeouts[tcp_state]); expired = nf_flow_has_expired(flow); } offload_timeout = READ_ONCE(tn->offload_timeout); } else if (l4num == IPPROTO_UDP) { const struct nf_udp_net *tn = nf_udp_pernet(net); enum udp_conntrack state = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ? UDP_CT_REPLIED : UDP_CT_UNREPLIED; timeout = READ_ONCE(tn->timeouts[state]); expired = nf_flow_has_expired(flow); offload_timeout = READ_ONCE(tn->offload_timeout); } else { return; } if (expired) timeout -= offload_timeout; if (timeout < 0) timeout = 0; if (closing || nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout) nf_ct_refresh(ct, timeout); } static void flow_offload_route_release(struct flow_offload *flow) { nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL); nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY); } void flow_offload_free(struct flow_offload *flow) { switch (flow->type) { case NF_FLOW_OFFLOAD_ROUTE: flow_offload_route_release(flow); break; default: break; } nf_ct_put(flow->ct); kfree_rcu(flow, rcu_head); } EXPORT_SYMBOL_GPL(flow_offload_free); static u32 flow_offload_hash(const void *data, u32 len, u32 seed) { const struct flow_offload_tuple *tuple = data; return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed); } static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed) { const struct flow_offload_tuple_rhash *tuplehash = data; return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed); } static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { const struct flow_offload_tuple *tuple = arg->key; const struct flow_offload_tuple_rhash *x = ptr; if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash))) return 1; return 0; } static const struct rhashtable_params nf_flow_offload_rhash_params = { .head_offset = offsetof(struct flow_offload_tuple_rhash, node), .hashfn = flow_offload_hash, .obj_hashfn = flow_offload_hash_obj, .obj_cmpfn = flow_offload_hash_cmp, .automatic_shrinking = true, }; unsigned long flow_offload_get_timeout(struct flow_offload *flow) { unsigned long timeout = NF_FLOW_TIMEOUT; struct net *net = nf_ct_net(flow->ct); int l4num = nf_ct_protonum(flow->ct); if (l4num == IPPROTO_TCP) { struct nf_tcp_net *tn = nf_tcp_pernet(net); timeout = tn->offload_timeout; } else if (l4num == IPPROTO_UDP) { struct nf_udp_net *tn = nf_udp_pernet(net); timeout = tn->offload_timeout; } return timeout; } int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow) { int err; flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); err = rhashtable_insert_fast(&flow_table->rhashtable, &flow->tuplehash[0].node, nf_flow_offload_rhash_params); if (err < 0) return err; err = rhashtable_insert_fast(&flow_table->rhashtable, &flow->tuplehash[1].node, nf_flow_offload_rhash_params); if (err < 0) { rhashtable_remove_fast(&flow_table->rhashtable, &flow->tuplehash[0].node, nf_flow_offload_rhash_params); return err; } nf_ct_refresh(flow->ct, NF_CT_DAY); if (nf_flowtable_hw_offload(flow_table)) { __set_bit(NF_FLOW_HW, &flow->flags); nf_flow_offload_add(flow_table, flow); } return 0; } EXPORT_SYMBOL_GPL(flow_offload_add); void flow_offload_refresh(struct nf_flowtable *flow_table, struct flow_offload *flow, bool force) { u32 timeout; timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow); if (force || timeout - READ_ONCE(flow->timeout) > HZ) WRITE_ONCE(flow->timeout, timeout); else return; if (likely(!nf_flowtable_hw_offload(flow_table)) || test_bit(NF_FLOW_CLOSING, &flow->flags)) return; nf_flow_offload_add(flow_table, flow); } EXPORT_SYMBOL_GPL(flow_offload_refresh); static void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow) { rhashtable_remove_fast(&flow_table->rhashtable, &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node, nf_flow_offload_rhash_params); rhashtable_remove_fast(&flow_table->rhashtable, &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node, nf_flow_offload_rhash_params); flow_offload_free(flow); } void flow_offload_teardown(struct flow_offload *flow) { clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status); if (!test_and_set_bit(NF_FLOW_TEARDOWN, &flow->flags)) flow_offload_fixup_ct(flow); } EXPORT_SYMBOL_GPL(flow_offload_teardown); struct flow_offload_tuple_rhash * flow_offload_lookup(struct nf_flowtable *flow_table, struct flow_offload_tuple *tuple) { struct flow_offload_tuple_rhash *tuplehash; struct flow_offload *flow; int dir; tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple, nf_flow_offload_rhash_params); if (!tuplehash) return NULL; dir = tuplehash->tuple.dir; flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]); if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) return NULL; if (unlikely(nf_ct_is_dying(flow->ct))) return NULL; return tuplehash; } EXPORT_SYMBOL_GPL(flow_offload_lookup); static int nf_flow_table_iterate(struct nf_flowtable *flow_table, void (*iter)(struct nf_flowtable *flowtable, struct flow_offload *flow, void *data), void *data) { struct flow_offload_tuple_rhash *tuplehash; struct rhashtable_iter hti; struct flow_offload *flow; int err = 0; rhashtable_walk_enter(&flow_table->rhashtable, &hti); rhashtable_walk_start(&hti); while ((tuplehash = rhashtable_walk_next(&hti))) { if (IS_ERR(tuplehash)) { if (PTR_ERR(tuplehash) != -EAGAIN) { err = PTR_ERR(tuplehash); break; } continue; } if (tuplehash->tuple.dir) continue; flow = container_of(tuplehash, struct flow_offload, tuplehash[0]); iter(flow_table, flow, data); } rhashtable_walk_stop(&hti); rhashtable_walk_exit(&hti); return err; } static bool nf_flow_custom_gc(struct nf_flowtable *flow_table, const struct flow_offload *flow) { return flow_table->type->gc && flow_table->type->gc(flow); } /** * nf_flow_table_tcp_timeout() - new timeout of offloaded tcp entry * @ct: Flowtable offloaded tcp ct * * Return: number of seconds when ct entry should expire. */ static u32 nf_flow_table_tcp_timeout(const struct nf_conn *ct) { u8 state = READ_ONCE(ct->proto.tcp.state); switch (state) { case TCP_CONNTRACK_SYN_SENT: case TCP_CONNTRACK_SYN_RECV: return 0; case TCP_CONNTRACK_ESTABLISHED: return NF_CT_DAY; case TCP_CONNTRACK_FIN_WAIT: case TCP_CONNTRACK_CLOSE_WAIT: case TCP_CONNTRACK_LAST_ACK: case TCP_CONNTRACK_TIME_WAIT: return 5 * 60 * HZ; case TCP_CONNTRACK_CLOSE: return 0; } return 0; } /** * nf_flow_table_extend_ct_timeout() - Extend ct timeout of offloaded conntrack entry * @ct: Flowtable offloaded ct * * Datapath lookups in the conntrack table will evict nf_conn entries * if they have expired. * * Once nf_conn entries have been offloaded, nf_conntrack might not see any * packets anymore. Thus ct->timeout is no longer refreshed and ct can * be evicted. * * To avoid the need for an additional check on the offload bit for every * packet processed via nf_conntrack_in(), set an arbitrary timeout large * enough not to ever expire, this save us a check for the IPS_OFFLOAD_BIT * from the packet path via nf_ct_is_expired(). */ static void nf_flow_table_extend_ct_timeout(struct nf_conn *ct) { static const u32 min_timeout = 5 * 60 * HZ; u32 expires = nf_ct_expires(ct); /* normal case: large enough timeout, nothing to do. */ if (likely(expires >= min_timeout)) return; /* must check offload bit after this, we do not hold any locks. * flowtable and ct entries could have been removed on another CPU. */ if (!refcount_inc_not_zero(&ct->ct_general.use)) return; /* load ct->status after refcount increase */ smp_acquire__after_ctrl_dep(); if (nf_ct_is_confirmed(ct) && test_bit(IPS_OFFLOAD_BIT, &ct->status)) { u8 l4proto = nf_ct_protonum(ct); u32 new_timeout = true; switch (l4proto) { case IPPROTO_UDP: new_timeout = NF_CT_DAY; break; case IPPROTO_TCP: new_timeout = nf_flow_table_tcp_timeout(ct); break; default: WARN_ON_ONCE(1); break; } /* Update to ct->timeout from nf_conntrack happens * without holding ct->lock. * * Use cmpxchg to ensure timeout extension doesn't * happen when we race with conntrack datapath. * * The inverse -- datapath updating ->timeout right * after this -- is fine, datapath is authoritative. */ if (new_timeout) { new_timeout += nfct_time_stamp; cmpxchg(&ct->timeout, expires, new_timeout); } } nf_ct_put(ct); } static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table, struct flow_offload *flow, void *data) { bool teardown = test_bit(NF_FLOW_TEARDOWN, &flow->flags); if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || nf_flow_custom_gc(flow_table, flow)) { flow_offload_teardown(flow); teardown = true; } else if (!teardown) { nf_flow_table_extend_ct_timeout(flow->ct); } if (teardown) { if (test_bit(NF_FLOW_HW, &flow->flags)) { if (!test_bit(NF_FLOW_HW_DYING, &flow->flags)) nf_flow_offload_del(flow_table, flow); else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags)) flow_offload_del(flow_table, flow); } else { flow_offload_del(flow_table, flow); } } else if (test_bit(NF_FLOW_CLOSING, &flow->flags) && test_bit(NF_FLOW_HW, &flow->flags) && !test_bit(NF_FLOW_HW_DYING, &flow->flags)) { nf_flow_offload_del(flow_table, flow); } else if (test_bit(NF_FLOW_HW, &flow->flags)) { nf_flow_offload_stats(flow_table, flow); } } void nf_flow_table_gc_run(struct nf_flowtable *flow_table) { nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, NULL); } static void nf_flow_offload_work_gc(struct work_struct *work) { struct nf_flowtable *flow_table; flow_table = container_of(work, struct nf_flowtable, gc_work.work); nf_flow_table_gc_run(flow_table); queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); } static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, __be16 port, __be16 new_port) { struct tcphdr *tcph; tcph = (void *)(skb_network_header(skb) + thoff); inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false); } static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, __be16 port, __be16 new_port) { struct udphdr *udph; udph = (void *)(skb_network_header(skb) + thoff); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace2(&udph->check, skb, port, new_port, false); if (!udph->check) udph->check = CSUM_MANGLED_0; } } static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, u8 protocol, __be16 port, __be16 new_port) { switch (protocol) { case IPPROTO_TCP: nf_flow_nat_port_tcp(skb, thoff, port, new_port); break; case IPPROTO_UDP: nf_flow_nat_port_udp(skb, thoff, port, new_port); break; } } void nf_flow_snat_port(const struct flow_offload *flow, struct sk_buff *skb, unsigned int thoff, u8 protocol, enum flow_offload_tuple_dir dir) { struct flow_ports *hdr; __be16 port, new_port; hdr = (void *)(skb_network_header(skb) + thoff); switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: port = hdr->source; new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port; hdr->source = new_port; break; case FLOW_OFFLOAD_DIR_REPLY: port = hdr->dest; new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; hdr->dest = new_port; break; } nf_flow_nat_port(skb, thoff, protocol, port, new_port); } EXPORT_SYMBOL_GPL(nf_flow_snat_port); void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb, unsigned int thoff, u8 protocol, enum flow_offload_tuple_dir dir) { struct flow_ports *hdr; __be16 port, new_port; hdr = (void *)(skb_network_header(skb) + thoff); switch (dir) { case FLOW_OFFLOAD_DIR_ORIGINAL: port = hdr->dest; new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port; hdr->dest = new_port; break; case FLOW_OFFLOAD_DIR_REPLY: port = hdr->source; new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; hdr->source = new_port; break; } nf_flow_nat_port(skb, thoff, protocol, port, new_port); } EXPORT_SYMBOL_GPL(nf_flow_dnat_port); int nf_flow_table_init(struct nf_flowtable *flowtable) { int err; INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); flow_block_init(&flowtable->flow_block); init_rwsem(&flowtable->flow_block_lock); err = rhashtable_init(&flowtable->rhashtable, &nf_flow_offload_rhash_params); if (err < 0) return err; queue_delayed_work(system_power_efficient_wq, &flowtable->gc_work, HZ); mutex_lock(&flowtable_lock); list_add(&flowtable->list, &flowtables); mutex_unlock(&flowtable_lock); return 0; } EXPORT_SYMBOL_GPL(nf_flow_table_init); static void nf_flow_table_do_cleanup(struct nf_flowtable *flow_table, struct flow_offload *flow, void *data) { struct net_device *dev = data; if (!dev) { flow_offload_teardown(flow); return; } if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) && (flow->tuplehash[0].tuple.iifidx == dev->ifindex || flow->tuplehash[1].tuple.iifidx == dev->ifindex)) flow_offload_teardown(flow); } void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable, struct net_device *dev) { nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev); flush_delayed_work(&flowtable->gc_work); nf_flow_table_offload_flush(flowtable); } void nf_flow_table_cleanup(struct net_device *dev) { struct nf_flowtable *flowtable; mutex_lock(&flowtable_lock); list_for_each_entry(flowtable, &flowtables, list) nf_flow_table_gc_cleanup(flowtable, dev); mutex_unlock(&flowtable_lock); } EXPORT_SYMBOL_GPL(nf_flow_table_cleanup); void nf_flow_table_free(struct nf_flowtable *flow_table) { mutex_lock(&flowtable_lock); list_del(&flow_table->list); mutex_unlock(&flowtable_lock); cancel_delayed_work_sync(&flow_table->gc_work); nf_flow_table_offload_flush(flow_table); /* ... no more pending work after this stage ... */ nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL); nf_flow_table_gc_run(flow_table); nf_flow_table_offload_flush_cleanup(flow_table); rhashtable_destroy(&flow_table->rhashtable); } EXPORT_SYMBOL_GPL(nf_flow_table_free); static int nf_flow_table_init_net(struct net *net) { net->ft.stat = alloc_percpu(struct nf_flow_table_stat); return net->ft.stat ? 0 : -ENOMEM; } static void nf_flow_table_fini_net(struct net *net) { free_percpu(net->ft.stat); } static int nf_flow_table_pernet_init(struct net *net) { int ret; ret = nf_flow_table_init_net(net); if (ret < 0) return ret; ret = nf_flow_table_init_proc(net); if (ret < 0) goto out_proc; return 0; out_proc: nf_flow_table_fini_net(net); return ret; } static void nf_flow_table_pernet_exit(struct list_head *net_exit_list) { struct net *net; list_for_each_entry(net, net_exit_list, exit_list) { nf_flow_table_fini_proc(net); nf_flow_table_fini_net(net); } } static struct pernet_operations nf_flow_table_net_ops = { .init = nf_flow_table_pernet_init, .exit_batch = nf_flow_table_pernet_exit, }; static int __init nf_flow_table_module_init(void) { int ret; ret = register_pernet_subsys(&nf_flow_table_net_ops); if (ret < 0) return ret; ret = nf_flow_table_offload_init(); if (ret) goto out_offload; ret = nf_flow_register_bpf(); if (ret) goto out_bpf; return 0; out_bpf: nf_flow_table_offload_exit(); out_offload: unregister_pernet_subsys(&nf_flow_table_net_ops); return ret; } static void __exit nf_flow_table_module_exit(void) { nf_flow_table_offload_exit(); unregister_pernet_subsys(&nf_flow_table_net_ops); } module_init(nf_flow_table_module_init); module_exit(nf_flow_table_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); MODULE_DESCRIPTION("Netfilter flow table module");
85 87 85 17 16 17 2 1 7 12 15 1 14 14 8 4 4 2 14 14 1 13 10 10 16 16 15 1 1 16 16 15 1 16 15 1 59 70 16 59 22 22 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Generic INET6 transport hashtables * * Authors: Lotsa people, from code originally in tcp, generalised here * by Arnaldo Carvalho de Melo <acme@mandriva.com> */ #include <linux/module.h> #include <linux/random.h> #include <net/addrconf.h> #include <net/hotdata.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> #include <net/inet6_hashtables.h> #include <net/secure_seq.h> #include <net/ip.h> #include <net/sock_reuseport.h> #include <net/tcp.h> u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) { u32 lhash, fhash; net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret)); net_get_random_once(&tcp_ipv6_hash_secret, sizeof(tcp_ipv6_hash_secret)); lhash = (__force u32)laddr->s6_addr32[3]; fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret); return lport + __inet6_ehashfn(lhash, 0, fhash, fport, inet6_ehash_secret + net_hash_mix(net)); } EXPORT_SYMBOL_GPL(inet6_ehashfn); /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM * * The sockhash lock must be held as a reader here. */ struct sock *__inet6_lookup_established(const struct net *net, struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif, const int sdif) { struct sock *sk; const struct hlist_nulls_node *node; const __portpair ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ unsigned int hash = inet6_ehashfn(net, daddr, hnum, saddr, sport); unsigned int slot = hash & hashinfo->ehash_mask; struct inet_ehash_bucket *head = &hashinfo->ehash[slot]; begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; if (!inet6_match(net, sk, saddr, daddr, ports, dif, sdif)) continue; if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!inet6_match(net, sk, saddr, daddr, ports, dif, sdif))) { sock_gen_put(sk); goto begin; } goto found; } if (get_nulls_value(node) != slot) goto begin; out: sk = NULL; found: return sk; } EXPORT_SYMBOL(__inet6_lookup_established); static inline int compute_score(struct sock *sk, const struct net *net, const unsigned short hnum, const struct in6_addr *daddr, const int dif, const int sdif) { int score = -1; if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum && sk->sk_family == PF_INET6) { if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return -1; if (!inet_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) return -1; score = sk->sk_bound_dev_if ? 2 : 1; if (READ_ONCE(sk->sk_incoming_cpu) == raw_smp_processor_id()) score++; } return score; } /** * inet6_lookup_reuseport() - execute reuseport logic on AF_INET6 socket if necessary. * @net: network namespace. * @sk: AF_INET6 socket, must be in TCP_LISTEN state for TCP or TCP_CLOSE for UDP. * @skb: context for a potential SK_REUSEPORT program. * @doff: header offset. * @saddr: source address. * @sport: source port. * @daddr: destination address. * @hnum: destination port in host byte order. * @ehashfn: hash function used to generate the fallback hash. * * Return: NULL if sk doesn't have SO_REUSEPORT set, otherwise a pointer to * the selected sock or an error. */ struct sock *inet6_lookup_reuseport(const struct net *net, struct sock *sk, struct sk_buff *skb, int doff, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned short hnum, inet6_ehashfn_t *ehashfn) { struct sock *reuse_sk = NULL; u32 phash; if (sk->sk_reuseport) { phash = INDIRECT_CALL_INET(ehashfn, udp6_ehashfn, inet6_ehashfn, net, daddr, hnum, saddr, sport); reuse_sk = reuseport_select_sock(sk, phash, skb, doff); } return reuse_sk; } EXPORT_SYMBOL_GPL(inet6_lookup_reuseport); /* called with rcu_read_lock() */ static struct sock *inet6_lhash2_lookup(const struct net *net, struct inet_listen_hashbucket *ilb2, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif, const int sdif) { struct sock *sk, *result = NULL; struct hlist_nulls_node *node; int score, hiscore = 0; sk_nulls_for_each_rcu(sk, node, &ilb2->nulls_head) { score = compute_score(sk, net, hnum, daddr, dif, sdif); if (score > hiscore) { result = inet6_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum, inet6_ehashfn); if (result) return result; result = sk; hiscore = score; } } return result; } struct sock *inet6_lookup_run_sk_lookup(const struct net *net, int protocol, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, const int dif, inet6_ehashfn_t *ehashfn) { struct sock *sk, *reuse_sk; bool no_reuseport; no_reuseport = bpf_sk_lookup_run_v6(net, protocol, saddr, sport, daddr, hnum, dif, &sk); if (no_reuseport || IS_ERR_OR_NULL(sk)) return sk; reuse_sk = inet6_lookup_reuseport(net, sk, skb, doff, saddr, sport, daddr, hnum, ehashfn); if (reuse_sk) sk = reuse_sk; return sk; } EXPORT_SYMBOL_GPL(inet6_lookup_run_sk_lookup); struct sock *inet6_lookup_listener(const struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const unsigned short hnum, const int dif, const int sdif) { struct inet_listen_hashbucket *ilb2; struct sock *result = NULL; unsigned int hash2; /* Lookup redirect from BPF */ if (static_branch_unlikely(&bpf_sk_lookup_enabled) && hashinfo == net->ipv4.tcp_death_row.hashinfo) { result = inet6_lookup_run_sk_lookup(net, IPPROTO_TCP, skb, doff, saddr, sport, daddr, hnum, dif, inet6_ehashfn); if (result) goto done; } hash2 = ipv6_portaddr_hash(net, daddr, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); result = inet6_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, daddr, hnum, dif, sdif); if (result) goto done; /* Lookup lhash2 with in6addr_any */ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum); ilb2 = inet_lhash2_bucket(hashinfo, hash2); result = inet6_lhash2_lookup(net, ilb2, skb, doff, saddr, sport, &in6addr_any, hnum, dif, sdif); done: if (IS_ERR(result)) return NULL; return result; } EXPORT_SYMBOL_GPL(inet6_lookup_listener); struct sock *inet6_lookup(const struct net *net, struct inet_hashinfo *hashinfo, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, const __be16 dport, const int dif) { struct sock *sk; bool refcounted; sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, ntohs(dport), dif, 0, &refcounted); if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; } EXPORT_SYMBOL_GPL(inet6_lookup); static int __inet6_check_established(struct inet_timewait_death_row *death_row, struct sock *sk, const __u16 lport, struct inet_timewait_sock **twp, bool rcu_lookup, u32 hash) { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; const struct in6_addr *saddr = &sk->sk_v6_daddr; const int dif = sk->sk_bound_dev_if; struct net *net = sock_net(sk); const int sdif = l3mdev_master_ifindex_by_index(net, dif); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); struct inet_timewait_sock *tw = NULL; const struct hlist_nulls_node *node; struct sock *sk2; spinlock_t *lock; if (rcu_lookup) { sk_nulls_for_each(sk2, node, &head->chain) { if (sk2->sk_hash != hash || !inet6_match(net, sk2, saddr, daddr, ports, dif, sdif)) continue; if (sk2->sk_state == TCP_TIME_WAIT) break; return -EADDRNOTAVAIL; } return 0; } lock = inet_ehash_lockp(hinfo, hash); spin_lock(lock); sk_nulls_for_each(sk2, node, &head->chain) { if (sk2->sk_hash != hash) continue; if (likely(inet6_match(net, sk2, saddr, daddr, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (sk->sk_protocol == IPPROTO_TCP && tcp_twsk_unique(sk, sk2, twp)) break; } goto not_unique; } } /* Must record num and sport now. Otherwise we will see * in hash table socket with a funny identity. */ inet->inet_num = lport; inet->inet_sport = htons(lport); sk->sk_hash = hash; WARN_ON(!sk_unhashed(sk)); __sk_nulls_add_node_rcu(sk, &head->chain); if (tw) { sk_nulls_del_node_init_rcu((struct sock *)tw); __NET_INC_STATS(net, LINUX_MIB_TIMEWAITRECYCLED); } spin_unlock(lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); if (twp) { *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ inet_twsk_deschedule_put(tw); } return 0; not_unique: spin_unlock(lock); return -EADDRNOTAVAIL; } static u64 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, inet->inet_dport); } int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; const struct in6_addr *saddr = &sk->sk_v6_daddr; const struct inet_sock *inet = inet_sk(sk); const struct net *net = sock_net(sk); u64 port_offset = 0; u32 hash_port0; if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); hash_port0 = inet6_ehashfn(net, daddr, 0, saddr, inet->inet_dport); return __inet_hash_connect(death_row, sk, port_offset, hash_port0, __inet6_check_established); } EXPORT_SYMBOL_GPL(inet6_hash_connect); int inet6_hash(struct sock *sk) { int err = 0; if (sk->sk_state != TCP_CLOSE) err = __inet_hash(sk, NULL); return err; } EXPORT_SYMBOL_GPL(inet6_hash);
1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 // SPDX-License-Identifier: GPL-2.0-only #include <linux/etherdevice.h> #include <linux/if_tap.h> #include <linux/if_vlan.h> #include <linux/interrupt.h> #include <linux/nsproxy.h> #include <linux/compat.h> #include <linux/if_tun.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/cache.h> #include <linux/sched/signal.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/wait.h> #include <linux/cdev.h> #include <linux/idr.h> #include <linux/fs.h> #include <linux/uio.h> #include <net/gso.h> #include <net/net_namespace.h> #include <net/rtnetlink.h> #include <net/sock.h> #include <net/xdp.h> #include <linux/virtio_net.h> #include <linux/skb_array.h> #include "tun_vnet.h" #define TAP_IFFEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE) static struct proto tap_proto = { .name = "tap", .owner = THIS_MODULE, .obj_size = sizeof(struct tap_queue), }; #define TAP_NUM_DEVS (1U << MINORBITS) static LIST_HEAD(major_list); struct major_info { struct rcu_head rcu; dev_t major; struct idr minor_idr; spinlock_t minor_lock; const char *device_name; struct list_head next; }; #define GOODCOPY_LEN 128 static const struct proto_ops tap_socket_ops; #define RX_OFFLOADS (NETIF_F_GRO | NETIF_F_LRO) #define TAP_FEATURES (NETIF_F_GSO | NETIF_F_SG | NETIF_F_FRAGLIST) static struct tap_dev *tap_dev_get_rcu(const struct net_device *dev) { return rcu_dereference(dev->rx_handler_data); } /* * RCU usage: * The tap_queue and the macvlan_dev are loosely coupled, the * pointers from one to the other can only be read while rcu_read_lock * or rtnl is held. * * Both the file and the macvlan_dev hold a reference on the tap_queue * through sock_hold(&q->sk). When the macvlan_dev goes away first, * q->vlan becomes inaccessible. When the files gets closed, * tap_get_queue() fails. * * There may still be references to the struct sock inside of the * queue from outbound SKBs, but these never reference back to the * file or the dev. The data structure is freed through __sk_free * when both our references and any pending SKBs are gone. */ static int tap_enable_queue(struct tap_dev *tap, struct file *file, struct tap_queue *q) { int err = -EINVAL; ASSERT_RTNL(); if (q->enabled) goto out; err = 0; rcu_assign_pointer(tap->taps[tap->numvtaps], q); q->queue_index = tap->numvtaps; q->enabled = true; tap->numvtaps++; out: return err; } /* Requires RTNL */ static int tap_set_queue(struct tap_dev *tap, struct file *file, struct tap_queue *q) { if (tap->numqueues == MAX_TAP_QUEUES) return -EBUSY; rcu_assign_pointer(q->tap, tap); rcu_assign_pointer(tap->taps[tap->numvtaps], q); sock_hold(&q->sk); q->file = file; q->queue_index = tap->numvtaps; q->enabled = true; file->private_data = q; list_add_tail(&q->next, &tap->queue_list); tap->numvtaps++; tap->numqueues++; return 0; } static int tap_disable_queue(struct tap_queue *q) { struct tap_dev *tap; struct tap_queue *nq; ASSERT_RTNL(); if (!q->enabled) return -EINVAL; tap = rtnl_dereference(q->tap); if (tap) { int index = q->queue_index; BUG_ON(index >= tap->numvtaps); nq = rtnl_dereference(tap->taps[tap->numvtaps - 1]); nq->queue_index = index; rcu_assign_pointer(tap->taps[index], nq); RCU_INIT_POINTER(tap->taps[tap->numvtaps - 1], NULL); q->enabled = false; tap->numvtaps--; } return 0; } /* * The file owning the queue got closed, give up both * the reference that the files holds as well as the * one from the macvlan_dev if that still exists. * * Using the spinlock makes sure that we don't get * to the queue again after destroying it. */ static void tap_put_queue(struct tap_queue *q) { struct tap_dev *tap; rtnl_lock(); tap = rtnl_dereference(q->tap); if (tap) { if (q->enabled) BUG_ON(tap_disable_queue(q)); tap->numqueues--; RCU_INIT_POINTER(q->tap, NULL); sock_put(&q->sk); list_del_init(&q->next); } rtnl_unlock(); synchronize_rcu(); sock_put(&q->sk); } /* * Select a queue based on the rxq of the device on which this packet * arrived. If the incoming device is not mq, calculate a flow hash * to select a queue. If all fails, find the first available queue. * Cache vlan->numvtaps since it can become zero during the execution * of this function. */ static struct tap_queue *tap_get_queue(struct tap_dev *tap, struct sk_buff *skb) { struct tap_queue *queue = NULL; /* Access to taps array is protected by rcu, but access to numvtaps * isn't. Below we use it to lookup a queue, but treat it as a hint * and validate that the result isn't NULL - in case we are * racing against queue removal. */ int numvtaps = READ_ONCE(tap->numvtaps); __u32 rxq; if (!numvtaps) goto out; if (numvtaps == 1) goto single; /* Check if we can use flow to select a queue */ rxq = skb_get_hash(skb); if (rxq) { queue = rcu_dereference(tap->taps[rxq % numvtaps]); goto out; } if (likely(skb_rx_queue_recorded(skb))) { rxq = skb_get_rx_queue(skb); while (unlikely(rxq >= numvtaps)) rxq -= numvtaps; queue = rcu_dereference(tap->taps[rxq]); goto out; } single: queue = rcu_dereference(tap->taps[0]); out: return queue; } /* * The net_device is going away, give up the reference * that it holds on all queues and safely set the pointer * from the queues to NULL. */ void tap_del_queues(struct tap_dev *tap) { struct tap_queue *q, *tmp; ASSERT_RTNL(); list_for_each_entry_safe(q, tmp, &tap->queue_list, next) { list_del_init(&q->next); RCU_INIT_POINTER(q->tap, NULL); if (q->enabled) tap->numvtaps--; tap->numqueues--; sock_put(&q->sk); } BUG_ON(tap->numvtaps); BUG_ON(tap->numqueues); /* guarantee that any future tap_set_queue will fail */ tap->numvtaps = MAX_TAP_QUEUES; } EXPORT_SYMBOL_GPL(tap_del_queues); rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; struct tap_dev *tap; struct tap_queue *q; netdev_features_t features = TAP_FEATURES; enum skb_drop_reason drop_reason; tap = tap_dev_get_rcu(dev); if (!tap) return RX_HANDLER_PASS; q = tap_get_queue(tap, skb); if (!q) return RX_HANDLER_PASS; skb_push(skb, ETH_HLEN); /* Apply the forward feature mask so that we perform segmentation * according to users wishes. This only works if VNET_HDR is * enabled. */ if (q->flags & IFF_VNET_HDR) features |= tap->tap_features; if (netif_needs_gso(skb, features)) { struct sk_buff *segs = __skb_gso_segment(skb, features, false); struct sk_buff *next; if (IS_ERR(segs)) { drop_reason = SKB_DROP_REASON_SKB_GSO_SEG; goto drop; } if (!segs) { if (ptr_ring_produce(&q->ring, skb)) { drop_reason = SKB_DROP_REASON_FULL_RING; goto drop; } goto wake_up; } consume_skb(skb); skb_list_walk_safe(segs, skb, next) { skb_mark_not_on_list(skb); if (ptr_ring_produce(&q->ring, skb)) { drop_reason = SKB_DROP_REASON_FULL_RING; kfree_skb_reason(skb, drop_reason); kfree_skb_list_reason(next, drop_reason); break; } } } else { /* If we receive a partial checksum and the tap side * doesn't support checksum offload, compute the checksum. * Note: it doesn't matter which checksum feature to * check, we either support them all or none. */ if (skb->ip_summed == CHECKSUM_PARTIAL && !(features & NETIF_F_CSUM_MASK) && skb_checksum_help(skb)) { drop_reason = SKB_DROP_REASON_SKB_CSUM; goto drop; } if (ptr_ring_produce(&q->ring, skb)) { drop_reason = SKB_DROP_REASON_FULL_RING; goto drop; } } wake_up: wake_up_interruptible_poll(sk_sleep(&q->sk), EPOLLIN | EPOLLRDNORM | EPOLLRDBAND); return RX_HANDLER_CONSUMED; drop: /* Count errors/drops only here, thus don't care about args. */ if (tap->count_rx_dropped) tap->count_rx_dropped(tap); kfree_skb_reason(skb, drop_reason); return RX_HANDLER_CONSUMED; } EXPORT_SYMBOL_GPL(tap_handle_frame); static struct major_info *tap_get_major(int major) { struct major_info *tap_major; list_for_each_entry_rcu(tap_major, &major_list, next) { if (tap_major->major == major) return tap_major; } return NULL; } int tap_get_minor(dev_t major, struct tap_dev *tap) { int retval = -ENOMEM; struct major_info *tap_major; rcu_read_lock(); tap_major = tap_get_major(MAJOR(major)); if (!tap_major) { retval = -EINVAL; goto unlock; } spin_lock(&tap_major->minor_lock); retval = idr_alloc(&tap_major->minor_idr, tap, 1, TAP_NUM_DEVS, GFP_ATOMIC); if (retval >= 0) { tap->minor = retval; } else if (retval == -ENOSPC) { netdev_err(tap->dev, "Too many tap devices\n"); retval = -EINVAL; } spin_unlock(&tap_major->minor_lock); unlock: rcu_read_unlock(); return retval < 0 ? retval : 0; } EXPORT_SYMBOL_GPL(tap_get_minor); void tap_free_minor(dev_t major, struct tap_dev *tap) { struct major_info *tap_major; rcu_read_lock(); tap_major = tap_get_major(MAJOR(major)); if (!tap_major) { goto unlock; } spin_lock(&tap_major->minor_lock); if (tap->minor) { idr_remove(&tap_major->minor_idr, tap->minor); tap->minor = 0; } spin_unlock(&tap_major->minor_lock); unlock: rcu_read_unlock(); } EXPORT_SYMBOL_GPL(tap_free_minor); static struct tap_dev *dev_get_by_tap_file(int major, int minor) { struct net_device *dev = NULL; struct tap_dev *tap; struct major_info *tap_major; rcu_read_lock(); tap_major = tap_get_major(major); if (!tap_major) { tap = NULL; goto unlock; } spin_lock(&tap_major->minor_lock); tap = idr_find(&tap_major->minor_idr, minor); if (tap) { dev = tap->dev; dev_hold(dev); } spin_unlock(&tap_major->minor_lock); unlock: rcu_read_unlock(); return tap; } static void tap_sock_write_space(struct sock *sk) { wait_queue_head_t *wqueue; if (!sock_writeable(sk) || !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) return; wqueue = sk_sleep(sk); if (wqueue && waitqueue_active(wqueue)) wake_up_interruptible_poll(wqueue, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); } static void tap_sock_destruct(struct sock *sk) { struct tap_queue *q = container_of(sk, struct tap_queue, sk); ptr_ring_cleanup(&q->ring, __skb_array_destroy_skb); } static int tap_open(struct inode *inode, struct file *file) { struct net *net = current->nsproxy->net_ns; struct tap_dev *tap; struct tap_queue *q; int err = -ENODEV; rtnl_lock(); tap = dev_get_by_tap_file(imajor(inode), iminor(inode)); if (!tap) goto err; err = -ENOMEM; q = (struct tap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tap_proto, 0); if (!q) goto err; if (ptr_ring_init(&q->ring, tap->dev->tx_queue_len, GFP_KERNEL)) { sk_free(&q->sk); goto err; } init_waitqueue_head(&q->sock.wq.wait); q->sock.type = SOCK_RAW; q->sock.state = SS_CONNECTED; q->sock.file = file; q->sock.ops = &tap_socket_ops; sock_init_data_uid(&q->sock, &q->sk, current_fsuid()); q->sk.sk_write_space = tap_sock_write_space; q->sk.sk_destruct = tap_sock_destruct; q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; q->vnet_hdr_sz = sizeof(struct virtio_net_hdr); /* * so far only KVM virtio_net uses tap, enable zero copy between * guest kernel and host kernel when lower device supports zerocopy * * The macvlan supports zerocopy iff the lower device supports zero * copy so we don't have to look at the lower device directly. */ if ((tap->dev->features & NETIF_F_HIGHDMA) && (tap->dev->features & NETIF_F_SG)) sock_set_flag(&q->sk, SOCK_ZEROCOPY); err = tap_set_queue(tap, file, q); if (err) { /* tap_sock_destruct() will take care of freeing ptr_ring */ goto err_put; } /* tap groks IOCB_NOWAIT just fine, mark it as such */ file->f_mode |= FMODE_NOWAIT; dev_put(tap->dev); rtnl_unlock(); return err; err_put: sock_put(&q->sk); err: if (tap) dev_put(tap->dev); rtnl_unlock(); return err; } static int tap_release(struct inode *inode, struct file *file) { struct tap_queue *q = file->private_data; tap_put_queue(q); return 0; } static __poll_t tap_poll(struct file *file, poll_table *wait) { struct tap_queue *q = file->private_data; __poll_t mask = EPOLLERR; if (!q) goto out; mask = 0; poll_wait(file, &q->sock.wq.wait, wait); if (!ptr_ring_empty(&q->ring)) mask |= EPOLLIN | EPOLLRDNORM; if (sock_writeable(&q->sk) || (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock.flags) && sock_writeable(&q->sk))) mask |= EPOLLOUT | EPOLLWRNORM; out: return mask; } static inline struct sk_buff *tap_alloc_skb(struct sock *sk, size_t prepad, size_t len, size_t linear, int noblock, int *err) { struct sk_buff *skb; /* Under a page? Don't bother with paged skb. */ if (prepad + len < PAGE_SIZE || !linear) linear = len; if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER); skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, err, PAGE_ALLOC_COSTLY_ORDER); if (!skb) return NULL; skb_reserve(skb, prepad); skb_put(skb, linear); skb->data_len = len - linear; skb->len += len - linear; return skb; } /* Neighbour code has some assumptions on HH_DATA_MOD alignment */ #define TAP_RESERVE HH_DATA_OFF(ETH_HLEN) /* Get packet from user space buffer */ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, struct iov_iter *from, int noblock) { int good_linear = SKB_MAX_HEAD(TAP_RESERVE); struct sk_buff *skb; struct tap_dev *tap; unsigned long total_len = iov_iter_count(from); unsigned long len = total_len; int err; struct virtio_net_hdr vnet_hdr = { 0 }; int vnet_hdr_len = 0; int hdr_len = 0; int copylen = 0; int depth; bool zerocopy = false; size_t linear; enum skb_drop_reason drop_reason; if (q->flags & IFF_VNET_HDR) { vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); hdr_len = tun_vnet_hdr_get(vnet_hdr_len, q->flags, from, &vnet_hdr); if (hdr_len < 0) { err = hdr_len; goto err; } len -= vnet_hdr_len; } err = -EINVAL; if (unlikely(len < ETH_HLEN)) goto err; if (msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) { struct iov_iter i; copylen = clamp(hdr_len ?: GOODCOPY_LEN, ETH_HLEN, good_linear); linear = copylen; i = *from; iov_iter_advance(&i, copylen); if (iov_iter_npages(&i, INT_MAX) <= MAX_SKB_FRAGS) zerocopy = true; } if (!zerocopy) { copylen = len; linear = clamp(hdr_len, ETH_HLEN, good_linear); } skb = tap_alloc_skb(&q->sk, TAP_RESERVE, copylen, linear, noblock, &err); if (!skb) goto err; if (zerocopy) err = zerocopy_sg_from_iter(skb, from); else err = skb_copy_datagram_from_iter(skb, 0, from, len); if (err) { drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; goto err_kfree; } skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_header(skb); skb->protocol = eth_hdr(skb)->h_proto; rcu_read_lock(); tap = rcu_dereference(q->tap); if (!tap) { kfree_skb(skb); rcu_read_unlock(); return total_len; } skb->dev = tap->dev; if (vnet_hdr_len) { err = tun_vnet_hdr_to_skb(q->flags, skb, &vnet_hdr); if (err) { rcu_read_unlock(); drop_reason = SKB_DROP_REASON_DEV_HDR; goto err_kfree; } } skb_probe_transport_header(skb); /* Move network header to the right position for VLAN tagged packets */ if (eth_type_vlan(skb->protocol) && vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) skb_set_network_header(skb, depth); /* copy skb_ubuf_info for callback when skb has no error */ if (zerocopy) { skb_zcopy_init(skb, msg_control); } else if (msg_control) { struct ubuf_info *uarg = msg_control; uarg->ops->complete(NULL, uarg, false); } dev_queue_xmit(skb); rcu_read_unlock(); return total_len; err_kfree: kfree_skb_reason(skb, drop_reason); err: rcu_read_lock(); tap = rcu_dereference(q->tap); if (tap && tap->count_tx_dropped) tap->count_tx_dropped(tap); rcu_read_unlock(); return err; } static ssize_t tap_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct tap_queue *q = file->private_data; int noblock = 0; if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) noblock = 1; return tap_get_user(q, NULL, from, noblock); } /* Put packet to the user space buffer */ static ssize_t tap_put_user(struct tap_queue *q, const struct sk_buff *skb, struct iov_iter *iter) { int ret; int vnet_hdr_len = 0; int vlan_offset = 0; int total; if (q->flags & IFF_VNET_HDR) { struct virtio_net_hdr vnet_hdr; vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); ret = tun_vnet_hdr_from_skb(q->flags, NULL, skb, &vnet_hdr); if (ret) return ret; ret = tun_vnet_hdr_put(vnet_hdr_len, iter, &vnet_hdr); if (ret) return ret; } total = vnet_hdr_len; total += skb->len; if (skb_vlan_tag_present(skb)) { struct { __be16 h_vlan_proto; __be16 h_vlan_TCI; } veth; veth.h_vlan_proto = skb->vlan_proto; veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb)); vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto); total += VLAN_HLEN; ret = skb_copy_datagram_iter(skb, 0, iter, vlan_offset); if (ret || !iov_iter_count(iter)) goto done; ret = copy_to_iter(&veth, sizeof(veth), iter); if (ret != sizeof(veth) || !iov_iter_count(iter)) goto done; } ret = skb_copy_datagram_iter(skb, vlan_offset, iter, skb->len - vlan_offset); done: return ret ? ret : total; } static ssize_t tap_do_read(struct tap_queue *q, struct iov_iter *to, int noblock, struct sk_buff *skb) { DEFINE_WAIT(wait); ssize_t ret = 0; if (!iov_iter_count(to)) { kfree_skb(skb); return 0; } if (skb) goto put; while (1) { if (!noblock) prepare_to_wait(sk_sleep(&q->sk), &wait, TASK_INTERRUPTIBLE); /* Read frames from the queue */ skb = ptr_ring_consume(&q->ring); if (skb) break; if (noblock) { ret = -EAGAIN; break; } if (signal_pending(current)) { ret = -ERESTARTSYS; break; } /* Nothing to read, let's sleep */ schedule(); } if (!noblock) finish_wait(sk_sleep(&q->sk), &wait); put: if (skb) { ret = tap_put_user(q, skb, to); if (unlikely(ret < 0)) kfree_skb(skb); else consume_skb(skb); } return ret; } static ssize_t tap_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct tap_queue *q = file->private_data; ssize_t len = iov_iter_count(to), ret; int noblock = 0; if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT)) noblock = 1; ret = tap_do_read(q, to, noblock, NULL); ret = min_t(ssize_t, ret, len); if (ret > 0) iocb->ki_pos = ret; return ret; } static struct tap_dev *tap_get_tap_dev(struct tap_queue *q) { struct tap_dev *tap; ASSERT_RTNL(); tap = rtnl_dereference(q->tap); if (tap) dev_hold(tap->dev); return tap; } static void tap_put_tap_dev(struct tap_dev *tap) { dev_put(tap->dev); } static int tap_ioctl_set_queue(struct file *file, unsigned int flags) { struct tap_queue *q = file->private_data; struct tap_dev *tap; int ret; tap = tap_get_tap_dev(q); if (!tap) return -EINVAL; if (flags & IFF_ATTACH_QUEUE) ret = tap_enable_queue(tap, file, q); else if (flags & IFF_DETACH_QUEUE) ret = tap_disable_queue(q); else ret = -EINVAL; tap_put_tap_dev(tap); return ret; } static int set_offload(struct tap_queue *q, unsigned long arg) { struct tap_dev *tap; netdev_features_t features; netdev_features_t feature_mask = 0; tap = rtnl_dereference(q->tap); if (!tap) return -ENOLINK; features = tap->dev->features; if (arg & TUN_F_CSUM) { feature_mask = NETIF_F_HW_CSUM; if (arg & (TUN_F_TSO4 | TUN_F_TSO6)) { if (arg & TUN_F_TSO_ECN) feature_mask |= NETIF_F_TSO_ECN; if (arg & TUN_F_TSO4) feature_mask |= NETIF_F_TSO; if (arg & TUN_F_TSO6) feature_mask |= NETIF_F_TSO6; } /* TODO: for now USO4 and USO6 should work simultaneously */ if ((arg & (TUN_F_USO4 | TUN_F_USO6)) == (TUN_F_USO4 | TUN_F_USO6)) features |= NETIF_F_GSO_UDP_L4; } /* tun/tap driver inverts the usage for TSO offloads, where * setting the TSO bit means that the userspace wants to * accept TSO frames and turning it off means that user space * does not support TSO. * For tap, we have to invert it to mean the same thing. * When user space turns off TSO, we turn off GSO/LRO so that * user-space will not receive TSO frames. */ if (feature_mask & (NETIF_F_TSO | NETIF_F_TSO6) || (feature_mask & (TUN_F_USO4 | TUN_F_USO6)) == (TUN_F_USO4 | TUN_F_USO6)) features |= RX_OFFLOADS; else features &= ~RX_OFFLOADS; /* tap_features are the same as features on tun/tap and * reflect user expectations. */ tap->tap_features = feature_mask; if (tap->update_features) tap->update_features(tap, features); return 0; } /* * provide compatibility with generic tun/tap interface */ static long tap_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct tap_queue *q = file->private_data; struct tap_dev *tap; void __user *argp = (void __user *)arg; struct ifreq __user *ifr = argp; unsigned int __user *up = argp; unsigned short u; int __user *sp = argp; struct sockaddr_storage ss; int s; int ret; switch (cmd) { case TUNSETIFF: /* ignore the name, just look at flags */ if (get_user(u, &ifr->ifr_flags)) return -EFAULT; ret = 0; if ((u & ~TAP_IFFEATURES) != (IFF_NO_PI | IFF_TAP)) ret = -EINVAL; else q->flags = (q->flags & ~TAP_IFFEATURES) | u; return ret; case TUNGETIFF: rtnl_lock(); tap = tap_get_tap_dev(q); if (!tap) { rtnl_unlock(); return -ENOLINK; } ret = 0; u = q->flags; if (copy_to_user(&ifr->ifr_name, tap->dev->name, IFNAMSIZ) || put_user(u, &ifr->ifr_flags)) ret = -EFAULT; tap_put_tap_dev(tap); rtnl_unlock(); return ret; case TUNSETQUEUE: if (get_user(u, &ifr->ifr_flags)) return -EFAULT; rtnl_lock(); ret = tap_ioctl_set_queue(file, u); rtnl_unlock(); return ret; case TUNGETFEATURES: if (put_user(IFF_TAP | IFF_NO_PI | TAP_IFFEATURES, up)) return -EFAULT; return 0; case TUNSETSNDBUF: if (get_user(s, sp)) return -EFAULT; if (s <= 0) return -EINVAL; q->sk.sk_sndbuf = s; return 0; case TUNSETOFFLOAD: /* let the user check for future flags */ if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 | TUN_F_TSO_ECN | TUN_F_UFO | TUN_F_USO4 | TUN_F_USO6)) return -EINVAL; rtnl_lock(); ret = set_offload(q, arg); rtnl_unlock(); return ret; case SIOCGIFHWADDR: rtnl_lock(); tap = tap_get_tap_dev(q); if (!tap) { rtnl_unlock(); return -ENOLINK; } ret = 0; netif_get_mac_address((struct sockaddr *)&ss, dev_net(tap->dev), tap->dev->name); if (copy_to_user(&ifr->ifr_name, tap->dev->name, IFNAMSIZ) || copy_to_user(&ifr->ifr_hwaddr, &ss, sizeof(ifr->ifr_hwaddr))) ret = -EFAULT; tap_put_tap_dev(tap); rtnl_unlock(); return ret; case SIOCSIFHWADDR: if (copy_from_user(&ss, &ifr->ifr_hwaddr, sizeof(ifr->ifr_hwaddr))) return -EFAULT; rtnl_lock(); tap = tap_get_tap_dev(q); if (!tap) { rtnl_unlock(); return -ENOLINK; } if (tap->dev->addr_len > sizeof(ifr->ifr_hwaddr)) ret = -EINVAL; else ret = dev_set_mac_address_user(tap->dev, &ss, NULL); tap_put_tap_dev(tap); rtnl_unlock(); return ret; default: return tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags, cmd, sp); } } static const struct file_operations tap_fops = { .owner = THIS_MODULE, .open = tap_open, .release = tap_release, .read_iter = tap_read_iter, .write_iter = tap_write_iter, .poll = tap_poll, .unlocked_ioctl = tap_ioctl, .compat_ioctl = compat_ptr_ioctl, }; static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp) { struct virtio_net_hdr *gso = xdp->data_hard_start; int buflen = xdp->frame_sz; int vnet_hdr_len = 0; struct tap_dev *tap; struct sk_buff *skb; int err, depth; if (unlikely(xdp->data_end - xdp->data < ETH_HLEN)) { err = -EINVAL; goto err; } if (q->flags & IFF_VNET_HDR) vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); skb = build_skb(xdp->data_hard_start, buflen); if (!skb) { err = -ENOMEM; goto err; } skb_reserve(skb, xdp->data - xdp->data_hard_start); skb_put(skb, xdp->data_end - xdp->data); skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_header(skb); skb->protocol = eth_hdr(skb)->h_proto; if (vnet_hdr_len) { err = tun_vnet_hdr_to_skb(q->flags, skb, gso); if (err) goto err_kfree; } /* Move network header to the right position for VLAN tagged packets */ if (eth_type_vlan(skb->protocol) && vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) skb_set_network_header(skb, depth); rcu_read_lock(); tap = rcu_dereference(q->tap); if (tap) { skb->dev = tap->dev; skb_probe_transport_header(skb); dev_queue_xmit(skb); } else { kfree_skb(skb); } rcu_read_unlock(); return 0; err_kfree: kfree_skb(skb); err: rcu_read_lock(); tap = rcu_dereference(q->tap); if (tap && tap->count_tx_dropped) tap->count_tx_dropped(tap); rcu_read_unlock(); return err; } static int tap_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { struct tap_queue *q = container_of(sock, struct tap_queue, sock); struct tun_msg_ctl *ctl = m->msg_control; struct xdp_buff *xdp; int i; if (m->msg_controllen == sizeof(struct tun_msg_ctl) && ctl && ctl->type == TUN_MSG_PTR) { for (i = 0; i < ctl->num; i++) { xdp = &((struct xdp_buff *)ctl->ptr)[i]; tap_get_user_xdp(q, xdp); } return 0; } return tap_get_user(q, ctl ? ctl->ptr : NULL, &m->msg_iter, m->msg_flags & MSG_DONTWAIT); } static int tap_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len, int flags) { struct tap_queue *q = container_of(sock, struct tap_queue, sock); struct sk_buff *skb = m->msg_control; int ret; if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) { kfree_skb(skb); return -EINVAL; } ret = tap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT, skb); if (ret > total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; } return ret; } static int tap_peek_len(struct socket *sock) { struct tap_queue *q = container_of(sock, struct tap_queue, sock); return PTR_RING_PEEK_CALL(&q->ring, __skb_array_len_with_tag); } /* Ops structure to mimic raw sockets with tun */ static const struct proto_ops tap_socket_ops = { .sendmsg = tap_sendmsg, .recvmsg = tap_recvmsg, .peek_len = tap_peek_len, }; /* Get an underlying socket object from tun file. Returns error unless file is * attached to a device. The returned object works like a packet socket, it * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for * holding a reference to the file for as long as the socket is in use. */ struct socket *tap_get_socket(struct file *file) { struct tap_queue *q; if (file->f_op != &tap_fops) return ERR_PTR(-EINVAL); q = file->private_data; if (!q) return ERR_PTR(-EBADFD); return &q->sock; } EXPORT_SYMBOL_GPL(tap_get_socket); struct ptr_ring *tap_get_ptr_ring(struct file *file) { struct tap_queue *q; if (file->f_op != &tap_fops) return ERR_PTR(-EINVAL); q = file->private_data; if (!q) return ERR_PTR(-EBADFD); return &q->ring; } EXPORT_SYMBOL_GPL(tap_get_ptr_ring); int tap_queue_resize(struct tap_dev *tap) { struct net_device *dev = tap->dev; struct tap_queue *q; struct ptr_ring **rings; int n = tap->numqueues; int ret, i = 0; rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL); if (!rings) return -ENOMEM; list_for_each_entry(q, &tap->queue_list, next) rings[i++] = &q->ring; ret = ptr_ring_resize_multiple_bh(rings, n, dev->tx_queue_len, GFP_KERNEL, __skb_array_destroy_skb); kfree(rings); return ret; } EXPORT_SYMBOL_GPL(tap_queue_resize); static int tap_list_add(dev_t major, const char *device_name) { struct major_info *tap_major; tap_major = kzalloc(sizeof(*tap_major), GFP_ATOMIC); if (!tap_major) return -ENOMEM; tap_major->major = MAJOR(major); idr_init(&tap_major->minor_idr); spin_lock_init(&tap_major->minor_lock); tap_major->device_name = device_name; list_add_tail_rcu(&tap_major->next, &major_list); return 0; } int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major, const char *device_name, struct module *module) { int err; err = alloc_chrdev_region(tap_major, 0, TAP_NUM_DEVS, device_name); if (err) goto out1; cdev_init(tap_cdev, &tap_fops); tap_cdev->owner = module; err = cdev_add(tap_cdev, *tap_major, TAP_NUM_DEVS); if (err) goto out2; err = tap_list_add(*tap_major, device_name); if (err) goto out3; return 0; out3: cdev_del(tap_cdev); out2: unregister_chrdev_region(*tap_major, TAP_NUM_DEVS); out1: return err; } EXPORT_SYMBOL_GPL(tap_create_cdev); void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev) { struct major_info *tap_major, *tmp; cdev_del(tap_cdev); unregister_chrdev_region(major, TAP_NUM_DEVS); list_for_each_entry_safe(tap_major, tmp, &major_list, next) { if (tap_major->major == MAJOR(major)) { idr_destroy(&tap_major->minor_idr); list_del_rcu(&tap_major->next); kfree_rcu(tap_major, rcu); } } } EXPORT_SYMBOL_GPL(tap_destroy_cdev); MODULE_DESCRIPTION("Common library for drivers implementing the TAP interface"); MODULE_AUTHOR("Arnd Bergmann <arnd@arndb.de>"); MODULE_AUTHOR("Sainath Grandhi <sainath.grandhi@intel.com>"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("NETDEV_INTERNAL");
2 1 1 1 2 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 // SPDX-License-Identifier: GPL-2.0 /* * ESSIV skcipher and aead template for block encryption * * This template encapsulates the ESSIV IV generation algorithm used by * dm-crypt and fscrypt, which converts the initial vector for the skcipher * used for block encryption, by encrypting it using the hash of the * skcipher key as encryption key. Usually, the input IV is a 64-bit sector * number in LE representation zero-padded to the size of the IV, but this * is not assumed by this driver. * * The typical use of this template is to instantiate the skcipher * 'essiv(cbc(aes),sha256)', which is the only instantiation used by * fscrypt, and the most relevant one for dm-crypt. However, dm-crypt * also permits ESSIV to be used in combination with the authenc template, * e.g., 'essiv(authenc(hmac(sha256),cbc(aes)),sha256)', in which case * we need to instantiate an aead that accepts the same special key format * as the authenc template, and deals with the way the encrypted IV is * embedded into the AAD area of the aead request. This means the AEAD * flavor produced by this template is tightly coupled to the way dm-crypt * happens to use it. * * Copyright (c) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> * * Heavily based on: * adiantum length-preserving encryption mode * * Copyright 2018 Google LLC */ #include <crypto/authenc.h> #include <crypto/internal/aead.h> #include <crypto/internal/cipher.h> #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/module.h> #include "internal.h" struct essiv_instance_ctx { union { struct crypto_skcipher_spawn skcipher_spawn; struct crypto_aead_spawn aead_spawn; } u; char essiv_cipher_name[CRYPTO_MAX_ALG_NAME]; char shash_driver_name[CRYPTO_MAX_ALG_NAME]; }; struct essiv_tfm_ctx { union { struct crypto_skcipher *skcipher; struct crypto_aead *aead; } u; struct crypto_cipher *essiv_cipher; struct crypto_shash *hash; int ivoffset; }; struct essiv_aead_request_ctx { struct scatterlist sg[4]; u8 *assoc; struct aead_request aead_req; }; static int essiv_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { struct essiv_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); u8 salt[HASH_MAX_DIGESTSIZE]; int err; crypto_skcipher_clear_flags(tctx->u.skcipher, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(tctx->u.skcipher, crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); err = crypto_skcipher_setkey(tctx->u.skcipher, key, keylen); if (err) return err; err = crypto_shash_tfm_digest(tctx->hash, key, keylen, salt); if (err) return err; crypto_cipher_clear_flags(tctx->essiv_cipher, CRYPTO_TFM_REQ_MASK); crypto_cipher_set_flags(tctx->essiv_cipher, crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); return crypto_cipher_setkey(tctx->essiv_cipher, salt, crypto_shash_digestsize(tctx->hash)); } static int essiv_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen) { struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm); SHASH_DESC_ON_STACK(desc, tctx->hash); struct crypto_authenc_keys keys; u8 salt[HASH_MAX_DIGESTSIZE]; int err; crypto_aead_clear_flags(tctx->u.aead, CRYPTO_TFM_REQ_MASK); crypto_aead_set_flags(tctx->u.aead, crypto_aead_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); err = crypto_aead_setkey(tctx->u.aead, key, keylen); if (err) return err; if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) return -EINVAL; desc->tfm = tctx->hash; err = crypto_shash_init(desc) ?: crypto_shash_update(desc, keys.enckey, keys.enckeylen) ?: crypto_shash_finup(desc, keys.authkey, keys.authkeylen, salt); if (err) return err; crypto_cipher_clear_flags(tctx->essiv_cipher, CRYPTO_TFM_REQ_MASK); crypto_cipher_set_flags(tctx->essiv_cipher, crypto_aead_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); return crypto_cipher_setkey(tctx->essiv_cipher, salt, crypto_shash_digestsize(tctx->hash)); } static int essiv_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm); return crypto_aead_setauthsize(tctx->u.aead, authsize); } static void essiv_skcipher_done(void *data, int err) { struct skcipher_request *req = data; skcipher_request_complete(req, err); } static int essiv_skcipher_crypt(struct skcipher_request *req, bool enc) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); const struct essiv_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); struct skcipher_request *subreq = skcipher_request_ctx(req); crypto_cipher_encrypt_one(tctx->essiv_cipher, req->iv, req->iv); skcipher_request_set_tfm(subreq, tctx->u.skcipher); skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, req->iv); skcipher_request_set_callback(subreq, skcipher_request_flags(req), essiv_skcipher_done, req); return enc ? crypto_skcipher_encrypt(subreq) : crypto_skcipher_decrypt(subreq); } static int essiv_skcipher_encrypt(struct skcipher_request *req) { return essiv_skcipher_crypt(req, true); } static int essiv_skcipher_decrypt(struct skcipher_request *req) { return essiv_skcipher_crypt(req, false); } static void essiv_aead_done(void *data, int err) { struct aead_request *req = data; struct essiv_aead_request_ctx *rctx = aead_request_ctx(req); if (err == -EINPROGRESS) goto out; kfree(rctx->assoc); out: aead_request_complete(req, err); } static int essiv_aead_crypt(struct aead_request *req, bool enc) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); const struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm); struct essiv_aead_request_ctx *rctx = aead_request_ctx(req); struct aead_request *subreq = &rctx->aead_req; struct scatterlist *src = req->src; int err; crypto_cipher_encrypt_one(tctx->essiv_cipher, req->iv, req->iv); /* * dm-crypt embeds the sector number and the IV in the AAD region, so * we have to copy the converted IV into the right scatterlist before * we pass it on. */ rctx->assoc = NULL; if (req->src == req->dst || !enc) { scatterwalk_map_and_copy(req->iv, req->dst, req->assoclen - crypto_aead_ivsize(tfm), crypto_aead_ivsize(tfm), 1); } else { u8 *iv = (u8 *)aead_request_ctx(req) + tctx->ivoffset; int ivsize = crypto_aead_ivsize(tfm); int ssize = req->assoclen - ivsize; struct scatterlist *sg; int nents; if (ssize < 0) return -EINVAL; nents = sg_nents_for_len(req->src, ssize); if (nents < 0) return -EINVAL; memcpy(iv, req->iv, ivsize); sg_init_table(rctx->sg, 4); if (unlikely(nents > 1)) { /* * This is a case that rarely occurs in practice, but * for correctness, we have to deal with it nonetheless. */ rctx->assoc = kmalloc(ssize, GFP_ATOMIC); if (!rctx->assoc) return -ENOMEM; scatterwalk_map_and_copy(rctx->assoc, req->src, 0, ssize, 0); sg_set_buf(rctx->sg, rctx->assoc, ssize); } else { sg_set_page(rctx->sg, sg_page(req->src), ssize, req->src->offset); } sg_set_buf(rctx->sg + 1, iv, ivsize); sg = scatterwalk_ffwd(rctx->sg + 2, req->src, req->assoclen); if (sg != rctx->sg + 2) sg_chain(rctx->sg, 3, sg); src = rctx->sg; } aead_request_set_tfm(subreq, tctx->u.aead); aead_request_set_ad(subreq, req->assoclen); aead_request_set_callback(subreq, aead_request_flags(req), essiv_aead_done, req); aead_request_set_crypt(subreq, src, req->dst, req->cryptlen, req->iv); err = enc ? crypto_aead_encrypt(subreq) : crypto_aead_decrypt(subreq); if (rctx->assoc && err != -EINPROGRESS && err != -EBUSY) kfree(rctx->assoc); return err; } static int essiv_aead_encrypt(struct aead_request *req) { return essiv_aead_crypt(req, true); } static int essiv_aead_decrypt(struct aead_request *req) { return essiv_aead_crypt(req, false); } static int essiv_init_tfm(struct essiv_instance_ctx *ictx, struct essiv_tfm_ctx *tctx) { struct crypto_cipher *essiv_cipher; struct crypto_shash *hash; int err; essiv_cipher = crypto_alloc_cipher(ictx->essiv_cipher_name, 0, 0); if (IS_ERR(essiv_cipher)) return PTR_ERR(essiv_cipher); hash = crypto_alloc_shash(ictx->shash_driver_name, 0, 0); if (IS_ERR(hash)) { err = PTR_ERR(hash); goto err_free_essiv_cipher; } tctx->essiv_cipher = essiv_cipher; tctx->hash = hash; return 0; err_free_essiv_cipher: crypto_free_cipher(essiv_cipher); return err; } static int essiv_skcipher_init_tfm(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); struct essiv_instance_ctx *ictx = skcipher_instance_ctx(inst); struct essiv_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *skcipher; int err; skcipher = crypto_spawn_skcipher(&ictx->u.skcipher_spawn); if (IS_ERR(skcipher)) return PTR_ERR(skcipher); crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) + crypto_skcipher_reqsize(skcipher)); err = essiv_init_tfm(ictx, tctx); if (err) { crypto_free_skcipher(skcipher); return err; } tctx->u.skcipher = skcipher; return 0; } static int essiv_aead_init_tfm(struct crypto_aead *tfm) { struct aead_instance *inst = aead_alg_instance(tfm); struct essiv_instance_ctx *ictx = aead_instance_ctx(inst); struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm); struct crypto_aead *aead; unsigned int subreq_size; int err; BUILD_BUG_ON(offsetofend(struct essiv_aead_request_ctx, aead_req) != sizeof(struct essiv_aead_request_ctx)); aead = crypto_spawn_aead(&ictx->u.aead_spawn); if (IS_ERR(aead)) return PTR_ERR(aead); subreq_size = sizeof_field(struct essiv_aead_request_ctx, aead_req) + crypto_aead_reqsize(aead); tctx->ivoffset = offsetof(struct essiv_aead_request_ctx, aead_req) + subreq_size; crypto_aead_set_reqsize(tfm, tctx->ivoffset + crypto_aead_ivsize(aead)); err = essiv_init_tfm(ictx, tctx); if (err) { crypto_free_aead(aead); return err; } tctx->u.aead = aead; return 0; } static void essiv_skcipher_exit_tfm(struct crypto_skcipher *tfm) { struct essiv_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); crypto_free_skcipher(tctx->u.skcipher); crypto_free_cipher(tctx->essiv_cipher); crypto_free_shash(tctx->hash); } static void essiv_aead_exit_tfm(struct crypto_aead *tfm) { struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm); crypto_free_aead(tctx->u.aead); crypto_free_cipher(tctx->essiv_cipher); crypto_free_shash(tctx->hash); } static void essiv_skcipher_free_instance(struct skcipher_instance *inst) { struct essiv_instance_ctx *ictx = skcipher_instance_ctx(inst); crypto_drop_skcipher(&ictx->u.skcipher_spawn); kfree(inst); } static void essiv_aead_free_instance(struct aead_instance *inst) { struct essiv_instance_ctx *ictx = aead_instance_ctx(inst); crypto_drop_aead(&ictx->u.aead_spawn); kfree(inst); } static bool parse_cipher_name(char *essiv_cipher_name, const char *cra_name) { const char *p, *q; int len; /* find the last opening parens */ p = strrchr(cra_name, '('); if (!p++) return false; /* find the first closing parens in the tail of the string */ q = strchr(p, ')'); if (!q) return false; len = q - p; if (len >= CRYPTO_MAX_ALG_NAME) return false; strscpy(essiv_cipher_name, p, len + 1); return true; } static bool essiv_supported_algorithms(const char *essiv_cipher_name, struct shash_alg *hash_alg, int ivsize) { struct crypto_alg *alg; bool ret = false; alg = crypto_alg_mod_lookup(essiv_cipher_name, CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK); if (IS_ERR(alg)) return false; if (hash_alg->digestsize < alg->cra_cipher.cia_min_keysize || hash_alg->digestsize > alg->cra_cipher.cia_max_keysize) goto out; if (ivsize != alg->cra_blocksize) goto out; if (crypto_shash_alg_needs_key(hash_alg)) goto out; ret = true; out: crypto_mod_put(alg); return ret; } static int essiv_create(struct crypto_template *tmpl, struct rtattr **tb) { struct skcipher_alg_common *skcipher_alg = NULL; struct crypto_attr_type *algt; const char *inner_cipher_name; const char *shash_name; struct skcipher_instance *skcipher_inst = NULL; struct aead_instance *aead_inst = NULL; struct crypto_instance *inst; struct crypto_alg *base, *block_base; struct essiv_instance_ctx *ictx; struct aead_alg *aead_alg = NULL; struct crypto_alg *_hash_alg; struct shash_alg *hash_alg; int ivsize; u32 type; u32 mask; int err; algt = crypto_get_attr_type(tb); if (IS_ERR(algt)) return PTR_ERR(algt); inner_cipher_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(inner_cipher_name)) return PTR_ERR(inner_cipher_name); shash_name = crypto_attr_alg_name(tb[2]); if (IS_ERR(shash_name)) return PTR_ERR(shash_name); type = algt->type & algt->mask; mask = crypto_algt_inherited_mask(algt); switch (type) { case CRYPTO_ALG_TYPE_LSKCIPHER: skcipher_inst = kzalloc(sizeof(*skcipher_inst) + sizeof(*ictx), GFP_KERNEL); if (!skcipher_inst) return -ENOMEM; inst = skcipher_crypto_instance(skcipher_inst); base = &skcipher_inst->alg.base; ictx = crypto_instance_ctx(inst); /* Symmetric cipher, e.g., "cbc(aes)" */ err = crypto_grab_skcipher(&ictx->u.skcipher_spawn, inst, inner_cipher_name, 0, mask); if (err) goto out_free_inst; skcipher_alg = crypto_spawn_skcipher_alg_common( &ictx->u.skcipher_spawn); block_base = &skcipher_alg->base; ivsize = skcipher_alg->ivsize; break; case CRYPTO_ALG_TYPE_AEAD: aead_inst = kzalloc(sizeof(*aead_inst) + sizeof(*ictx), GFP_KERNEL); if (!aead_inst) return -ENOMEM; inst = aead_crypto_instance(aead_inst); base = &aead_inst->alg.base; ictx = crypto_instance_ctx(inst); /* AEAD cipher, e.g., "authenc(hmac(sha256),cbc(aes))" */ err = crypto_grab_aead(&ictx->u.aead_spawn, inst, inner_cipher_name, 0, mask); if (err) goto out_free_inst; aead_alg = crypto_spawn_aead_alg(&ictx->u.aead_spawn); block_base = &aead_alg->base; if (!strstarts(block_base->cra_name, "authenc(")) { pr_warn("Only authenc() type AEADs are supported by ESSIV\n"); err = -EINVAL; goto out_drop_skcipher; } ivsize = aead_alg->ivsize; break; default: return -EINVAL; } if (!parse_cipher_name(ictx->essiv_cipher_name, block_base->cra_name)) { pr_warn("Failed to parse ESSIV cipher name from skcipher cra_name\n"); err = -EINVAL; goto out_drop_skcipher; } /* Synchronous hash, e.g., "sha256" */ _hash_alg = crypto_alg_mod_lookup(shash_name, CRYPTO_ALG_TYPE_SHASH, CRYPTO_ALG_TYPE_MASK | mask); if (IS_ERR(_hash_alg)) { err = PTR_ERR(_hash_alg); goto out_drop_skcipher; } hash_alg = __crypto_shash_alg(_hash_alg); /* Check the set of algorithms */ if (!essiv_supported_algorithms(ictx->essiv_cipher_name, hash_alg, ivsize)) { pr_warn("Unsupported essiv instantiation: essiv(%s,%s)\n", block_base->cra_name, hash_alg->base.cra_name); err = -EINVAL; goto out_free_hash; } /* record the driver name so we can instantiate this exact algo later */ strscpy(ictx->shash_driver_name, hash_alg->base.cra_driver_name); /* Instance fields */ err = -ENAMETOOLONG; if (snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "essiv(%s,%s)", block_base->cra_name, hash_alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME) goto out_free_hash; if (snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "essiv(%s,%s)", block_base->cra_driver_name, hash_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto out_free_hash; /* * hash_alg wasn't gotten via crypto_grab*(), so we need to inherit its * flags manually. */ base->cra_flags |= (hash_alg->base.cra_flags & CRYPTO_ALG_INHERITED_FLAGS); base->cra_blocksize = block_base->cra_blocksize; base->cra_ctxsize = sizeof(struct essiv_tfm_ctx); base->cra_alignmask = block_base->cra_alignmask; base->cra_priority = block_base->cra_priority; if (type == CRYPTO_ALG_TYPE_LSKCIPHER) { skcipher_inst->alg.setkey = essiv_skcipher_setkey; skcipher_inst->alg.encrypt = essiv_skcipher_encrypt; skcipher_inst->alg.decrypt = essiv_skcipher_decrypt; skcipher_inst->alg.init = essiv_skcipher_init_tfm; skcipher_inst->alg.exit = essiv_skcipher_exit_tfm; skcipher_inst->alg.min_keysize = skcipher_alg->min_keysize; skcipher_inst->alg.max_keysize = skcipher_alg->max_keysize; skcipher_inst->alg.ivsize = ivsize; skcipher_inst->alg.chunksize = skcipher_alg->chunksize; skcipher_inst->free = essiv_skcipher_free_instance; err = skcipher_register_instance(tmpl, skcipher_inst); } else { aead_inst->alg.setkey = essiv_aead_setkey; aead_inst->alg.setauthsize = essiv_aead_setauthsize; aead_inst->alg.encrypt = essiv_aead_encrypt; aead_inst->alg.decrypt = essiv_aead_decrypt; aead_inst->alg.init = essiv_aead_init_tfm; aead_inst->alg.exit = essiv_aead_exit_tfm; aead_inst->alg.ivsize = ivsize; aead_inst->alg.maxauthsize = crypto_aead_alg_maxauthsize(aead_alg); aead_inst->alg.chunksize = crypto_aead_alg_chunksize(aead_alg); aead_inst->free = essiv_aead_free_instance; err = aead_register_instance(tmpl, aead_inst); } if (err) goto out_free_hash; crypto_mod_put(_hash_alg); return 0; out_free_hash: crypto_mod_put(_hash_alg); out_drop_skcipher: if (type == CRYPTO_ALG_TYPE_LSKCIPHER) crypto_drop_skcipher(&ictx->u.skcipher_spawn); else crypto_drop_aead(&ictx->u.aead_spawn); out_free_inst: kfree(skcipher_inst); kfree(aead_inst); return err; } /* essiv(cipher_name, shash_name) */ static struct crypto_template essiv_tmpl = { .name = "essiv", .create = essiv_create, .module = THIS_MODULE, }; static int __init essiv_module_init(void) { return crypto_register_template(&essiv_tmpl); } static void __exit essiv_module_exit(void) { crypto_unregister_template(&essiv_tmpl); } module_init(essiv_module_init); module_exit(essiv_module_exit); MODULE_DESCRIPTION("ESSIV skcipher/aead wrapper for block encryption"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("essiv"); MODULE_IMPORT_NS("CRYPTO_INTERNAL");
4 42 11 11 41 42 50 51 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 // SPDX-License-Identifier: GPL-2.0-only /* * Aug 8, 2011 Bob Pearson with help from Joakim Tjernlund and George Spelvin * cleaned up code to current version of sparse and added the slicing-by-8 * algorithm to the closely similar existing slicing-by-4 algorithm. * * Oct 15, 2000 Matt Domsch <Matt_Domsch@dell.com> * Nicer crc32 functions/docs submitted by linux@horizon.com. Thanks! * Code was from the public domain, copyright abandoned. Code was * subsequently included in the kernel, thus was re-licensed under the * GNU GPL v2. * * Oct 12, 2000 Matt Domsch <Matt_Domsch@dell.com> * Same crc32 function was used in 5 other places in the kernel. * I made one version, and deleted the others. * There are various incantations of crc32(). Some use a seed of 0 or ~0. * Some xor at the end with ~0. The generic crc32() function takes * seed as an argument, and doesn't xor at the end. Then individual * users can do whatever they need. * drivers/net/smc9194.c uses seed ~0, doesn't xor with ~0. * fs/jffs2 uses seed 0, doesn't xor with ~0. * fs/partitions/efi.c uses seed ~0, xor's with ~0. */ /* see: Documentation/staging/crc32.rst for a description of algorithms */ #include <linux/crc32.h> #include <linux/export.h> #include <linux/module.h> #include <linux/types.h> #include "crc32table.h" static inline u32 __maybe_unused crc32_le_base(u32 crc, const u8 *p, size_t len) { while (len--) crc = (crc >> 8) ^ crc32table_le[(crc & 255) ^ *p++]; return crc; } static inline u32 __maybe_unused crc32_be_base(u32 crc, const u8 *p, size_t len) { while (len--) crc = (crc << 8) ^ crc32table_be[(crc >> 24) ^ *p++]; return crc; } static inline u32 __maybe_unused crc32c_base(u32 crc, const u8 *p, size_t len) { while (len--) crc = (crc >> 8) ^ crc32ctable_le[(crc & 255) ^ *p++]; return crc; } #ifdef CONFIG_CRC32_ARCH #include "crc32.h" /* $(SRCARCH)/crc32.h */ u32 crc32_optimizations(void) { return crc32_optimizations_arch(); } EXPORT_SYMBOL(crc32_optimizations); #else #define crc32_le_arch crc32_le_base #define crc32_be_arch crc32_be_base #define crc32c_arch crc32c_base #endif u32 crc32_le(u32 crc, const void *p, size_t len) { return crc32_le_arch(crc, p, len); } EXPORT_SYMBOL(crc32_le); u32 crc32_be(u32 crc, const void *p, size_t len) { return crc32_be_arch(crc, p, len); } EXPORT_SYMBOL(crc32_be); u32 crc32c(u32 crc, const void *p, size_t len) { return crc32c_arch(crc, p, len); } EXPORT_SYMBOL(crc32c); #ifdef crc32_mod_init_arch static int __init crc32_mod_init(void) { crc32_mod_init_arch(); return 0; } subsys_initcall(crc32_mod_init); static void __exit crc32_mod_exit(void) { } module_exit(crc32_mod_exit); #endif MODULE_DESCRIPTION("CRC32 library functions"); MODULE_LICENSE("GPL");
105 99 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2011 IBM Corporation * * Author: * Mimi Zohar <zohar@us.ibm.com> */ #include <linux/xattr.h> #include <linux/evm.h> int posix_xattr_acl(const char *xattr) { int xattr_len = strlen(xattr); if ((strlen(XATTR_NAME_POSIX_ACL_ACCESS) == xattr_len) && (strncmp(XATTR_NAME_POSIX_ACL_ACCESS, xattr, xattr_len) == 0)) return 1; if ((strlen(XATTR_NAME_POSIX_ACL_DEFAULT) == xattr_len) && (strncmp(XATTR_NAME_POSIX_ACL_DEFAULT, xattr, xattr_len) == 0)) return 1; return 0; }
151 214 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef __SOUND_CORE_H #define __SOUND_CORE_H /* * Main header file for the ALSA driver * Copyright (c) 1994-2001 by Jaroslav Kysela <perex@perex.cz> */ #include <linux/device.h> #include <linux/sched.h> /* wake_up() */ #include <linux/mutex.h> /* struct mutex */ #include <linux/rwsem.h> /* struct rw_semaphore */ #include <linux/pm.h> /* pm_message_t */ #include <linux/stringify.h> #include <linux/printk.h> #include <linux/xarray.h> /* number of supported soundcards */ #ifdef CONFIG_SND_DYNAMIC_MINORS #define SNDRV_CARDS CONFIG_SND_MAX_CARDS #else #define SNDRV_CARDS 8 /* don't change - minor numbers */ #endif #define CONFIG_SND_MAJOR 116 /* standard configuration */ /* forward declarations */ struct pci_dev; struct module; struct completion; /* device allocation stuff */ /* type of the object used in snd_device_*() * this also defines the calling order */ enum snd_device_type { SNDRV_DEV_LOWLEVEL, SNDRV_DEV_INFO, SNDRV_DEV_BUS, SNDRV_DEV_CODEC, SNDRV_DEV_PCM, SNDRV_DEV_COMPRESS, SNDRV_DEV_RAWMIDI, SNDRV_DEV_TIMER, SNDRV_DEV_SEQUENCER, SNDRV_DEV_HWDEP, SNDRV_DEV_JACK, SNDRV_DEV_CONTROL, /* NOTE: this must be the last one */ }; enum snd_device_state { SNDRV_DEV_BUILD, SNDRV_DEV_REGISTERED, SNDRV_DEV_DISCONNECTED, }; struct snd_device; struct snd_device_ops { int (*dev_free)(struct snd_device *dev); int (*dev_register)(struct snd_device *dev); int (*dev_disconnect)(struct snd_device *dev); }; struct snd_device { struct list_head list; /* list of registered devices */ struct snd_card *card; /* card which holds this device */ enum snd_device_state state; /* state of the device */ enum snd_device_type type; /* device type */ void *device_data; /* device structure */ const struct snd_device_ops *ops; /* operations */ }; #define snd_device(n) list_entry(n, struct snd_device, list) /* main structure for soundcard */ struct snd_card { int number; /* number of soundcard (index to snd_cards) */ char id[16]; /* id string of this card */ char driver[16]; /* driver name */ char shortname[32]; /* short name of this soundcard */ char longname[80]; /* name of this soundcard */ char irq_descr[32]; /* Interrupt description */ char mixername[80]; /* mixer name */ char components[128]; /* card components delimited with space */ struct module *module; /* top-level module */ void *private_data; /* private data for soundcard */ void (*private_free) (struct snd_card *card); /* callback for freeing of private data */ struct list_head devices; /* devices */ struct device *ctl_dev; /* control device */ unsigned int last_numid; /* last used numeric ID */ struct rw_semaphore controls_rwsem; /* controls lock (list and values) */ rwlock_t controls_rwlock; /* lock for lookup and ctl_files list */ int controls_count; /* count of all controls */ size_t user_ctl_alloc_size; // current memory allocation by user controls. struct list_head controls; /* all controls for this card */ struct list_head ctl_files; /* active control files */ #ifdef CONFIG_SND_CTL_FAST_LOOKUP struct xarray ctl_numids; /* hash table for numids */ struct xarray ctl_hash; /* hash table for ctl id matching */ bool ctl_hash_collision; /* ctl_hash collision seen? */ #endif struct snd_info_entry *proc_root; /* root for soundcard specific files */ struct proc_dir_entry *proc_root_link; /* number link to real id */ struct list_head files_list; /* all files associated to this card */ struct snd_shutdown_f_ops *s_f_ops; /* file operations in the shutdown state */ spinlock_t files_lock; /* lock the files for this card */ int shutdown; /* this card is going down */ struct completion *release_completion; struct device *dev; /* device assigned to this card */ struct device card_dev; /* cardX object for sysfs */ const struct attribute_group *dev_groups[4]; /* assigned sysfs attr */ bool registered; /* card_dev is registered? */ bool managed; /* managed via devres */ bool releasing; /* during card free process */ int sync_irq; /* assigned irq, used for PCM sync */ wait_queue_head_t remove_sleep; size_t total_pcm_alloc_bytes; /* total amount of allocated buffers */ struct mutex memory_mutex; /* protection for the above */ #ifdef CONFIG_SND_DEBUG struct dentry *debugfs_root; /* debugfs root for card */ #endif #ifdef CONFIG_PM unsigned int power_state; /* power state */ atomic_t power_ref; wait_queue_head_t power_sleep; wait_queue_head_t power_ref_sleep; #endif #if IS_ENABLED(CONFIG_SND_MIXER_OSS) struct snd_mixer_oss *mixer_oss; int mixer_oss_change_count; #endif }; #define dev_to_snd_card(p) container_of(p, struct snd_card, card_dev) #ifdef CONFIG_PM static inline unsigned int snd_power_get_state(struct snd_card *card) { return READ_ONCE(card->power_state); } static inline void snd_power_change_state(struct snd_card *card, unsigned int state) { WRITE_ONCE(card->power_state, state); wake_up(&card->power_sleep); } /** * snd_power_ref - Take the reference count for power control * @card: sound card object * * The power_ref reference of the card is used for managing to block * the snd_power_sync_ref() operation. This function increments the reference. * The counterpart snd_power_unref() has to be called appropriately later. */ static inline void snd_power_ref(struct snd_card *card) { atomic_inc(&card->power_ref); } /** * snd_power_unref - Release the reference count for power control * @card: sound card object */ static inline void snd_power_unref(struct snd_card *card) { if (atomic_dec_and_test(&card->power_ref)) wake_up(&card->power_ref_sleep); } /** * snd_power_sync_ref - wait until the card power_ref is freed * @card: sound card object * * This function is used to synchronize with the pending power_ref being * released. */ static inline void snd_power_sync_ref(struct snd_card *card) { wait_event(card->power_ref_sleep, !atomic_read(&card->power_ref)); } /* init.c */ int snd_power_wait(struct snd_card *card); int snd_power_ref_and_wait(struct snd_card *card); #else /* ! CONFIG_PM */ static inline int snd_power_wait(struct snd_card *card) { return 0; } static inline void snd_power_ref(struct snd_card *card) {} static inline void snd_power_unref(struct snd_card *card) {} static inline int snd_power_ref_and_wait(struct snd_card *card) { return 0; } static inline void snd_power_sync_ref(struct snd_card *card) {} #define snd_power_get_state(card) ({ (void)(card); SNDRV_CTL_POWER_D0; }) #define snd_power_change_state(card, state) do { (void)(card); } while (0) #endif /* CONFIG_PM */ struct snd_minor { int type; /* SNDRV_DEVICE_TYPE_XXX */ int card; /* card number */ int device; /* device number */ const struct file_operations *f_ops; /* file operations */ void *private_data; /* private data for f_ops->open */ struct device *dev; /* device for sysfs */ struct snd_card *card_ptr; /* assigned card instance */ }; /* return a device pointer linked to each sound device as a parent */ static inline struct device *snd_card_get_device_link(struct snd_card *card) { return card ? &card->card_dev : NULL; } /* sound.c */ extern int snd_major; extern int snd_ecards_limit; extern const struct class sound_class; #ifdef CONFIG_SND_DEBUG extern struct dentry *sound_debugfs_root; #endif void snd_request_card(int card); int snd_device_alloc(struct device **dev_p, struct snd_card *card); int snd_register_device(int type, struct snd_card *card, int dev, const struct file_operations *f_ops, void *private_data, struct device *device); int snd_unregister_device(struct device *dev); void *snd_lookup_minor_data(unsigned int minor, int type); #ifdef CONFIG_SND_OSSEMUL int snd_register_oss_device(int type, struct snd_card *card, int dev, const struct file_operations *f_ops, void *private_data); int snd_unregister_oss_device(int type, struct snd_card *card, int dev); void *snd_lookup_oss_minor_data(unsigned int minor, int type); #endif int snd_minor_info_init(void); /* sound_oss.c */ #ifdef CONFIG_SND_OSSEMUL int snd_minor_info_oss_init(void); #else static inline int snd_minor_info_oss_init(void) { return 0; } #endif /* memory.c */ int copy_to_user_fromio(void __user *dst, const volatile void __iomem *src, size_t count); int copy_from_user_toio(volatile void __iomem *dst, const void __user *src, size_t count); /* init.c */ int snd_card_locked(int card); #if IS_ENABLED(CONFIG_SND_MIXER_OSS) #define SND_MIXER_OSS_NOTIFY_REGISTER 0 #define SND_MIXER_OSS_NOTIFY_DISCONNECT 1 #define SND_MIXER_OSS_NOTIFY_FREE 2 extern int (*snd_mixer_oss_notify_callback)(struct snd_card *card, int cmd); #endif int snd_card_new(struct device *parent, int idx, const char *xid, struct module *module, int extra_size, struct snd_card **card_ret); int snd_devm_card_new(struct device *parent, int idx, const char *xid, struct module *module, size_t extra_size, struct snd_card **card_ret); void snd_card_disconnect(struct snd_card *card); void snd_card_disconnect_sync(struct snd_card *card); void snd_card_free(struct snd_card *card); void snd_card_free_when_closed(struct snd_card *card); int snd_card_free_on_error(struct device *dev, int ret); void snd_card_set_id(struct snd_card *card, const char *id); int snd_card_register(struct snd_card *card); int snd_card_info_init(void); int snd_card_add_dev_attr(struct snd_card *card, const struct attribute_group *group); int snd_component_add(struct snd_card *card, const char *component); int snd_card_file_add(struct snd_card *card, struct file *file); int snd_card_file_remove(struct snd_card *card, struct file *file); struct snd_card *snd_card_ref(int card); /** * snd_card_unref - Unreference the card object * @card: the card object to unreference * * Call this function for the card object that was obtained via snd_card_ref() * or snd_lookup_minor_data(). */ static inline void snd_card_unref(struct snd_card *card) { put_device(&card->card_dev); } #define snd_card_set_dev(card, devptr) ((card)->dev = (devptr)) /* device.c */ int snd_device_new(struct snd_card *card, enum snd_device_type type, void *device_data, const struct snd_device_ops *ops); int snd_device_register(struct snd_card *card, void *device_data); int snd_device_register_all(struct snd_card *card); void snd_device_disconnect(struct snd_card *card, void *device_data); void snd_device_disconnect_all(struct snd_card *card); void snd_device_free(struct snd_card *card, void *device_data); void snd_device_free_all(struct snd_card *card); /* isadma.c */ #ifdef CONFIG_ISA_DMA_API #define DMA_MODE_NO_ENABLE 0x0100 void snd_dma_program(unsigned long dma, unsigned long addr, unsigned int size, unsigned short mode); void snd_dma_disable(unsigned long dma); unsigned int snd_dma_pointer(unsigned long dma, unsigned int size); int snd_devm_request_dma(struct device *dev, int dma, const char *name); #endif /* misc.c */ struct resource; void release_and_free_resource(struct resource *res); /* --- */ #ifdef CONFIG_SND_DEBUG /** * snd_BUG - give a BUG warning message and stack trace * * Calls WARN() if CONFIG_SND_DEBUG is set. * Ignored when CONFIG_SND_DEBUG is not set. */ #define snd_BUG() WARN(1, "BUG?\n") /** * snd_BUG_ON - debugging check macro * @cond: condition to evaluate * * Has the same behavior as WARN_ON when CONFIG_SND_DEBUG is set, * otherwise just evaluates the conditional and returns the value. */ #define snd_BUG_ON(cond) WARN_ON((cond)) #else /* !CONFIG_SND_DEBUG */ #define snd_BUG() do { } while (0) #define snd_BUG_ON(condition) ({ \ int __ret_warn_on = !!(condition); \ unlikely(__ret_warn_on); \ }) #endif /* CONFIG_SND_DEBUG */ #define SNDRV_OSS_VERSION ((3<<16)|(8<<8)|(1<<4)|(0)) /* 3.8.1a */ /* for easier backward-porting */ #if IS_ENABLED(CONFIG_GAMEPORT) #define gameport_set_dev_parent(gp,xdev) ((gp)->dev.parent = (xdev)) #define gameport_set_port_data(gp,r) ((gp)->port_data = (r)) #define gameport_get_port_data(gp) (gp)->port_data #endif /* PCI quirk list helper */ struct snd_pci_quirk { unsigned short subvendor; /* PCI subvendor ID */ unsigned short subdevice; /* PCI subdevice ID */ unsigned short subdevice_mask; /* bitmask to match */ int value; /* value */ #ifdef CONFIG_SND_DEBUG_VERBOSE const char *name; /* name of the device (optional) */ #endif }; #define _SND_PCI_QUIRK_ID_MASK(vend, mask, dev) \ .subvendor = (vend), .subdevice = (dev), .subdevice_mask = (mask) #define _SND_PCI_QUIRK_ID(vend, dev) \ _SND_PCI_QUIRK_ID_MASK(vend, 0xffff, dev) #define SND_PCI_QUIRK_ID(vend,dev) {_SND_PCI_QUIRK_ID(vend, dev)} #ifdef CONFIG_SND_DEBUG_VERBOSE #define SND_PCI_QUIRK(vend,dev,xname,val) \ {_SND_PCI_QUIRK_ID(vend, dev), .value = (val), .name = (xname)} #define SND_PCI_QUIRK_VENDOR(vend, xname, val) \ {_SND_PCI_QUIRK_ID_MASK(vend, 0, 0), .value = (val), .name = (xname)} #define SND_PCI_QUIRK_MASK(vend, mask, dev, xname, val) \ {_SND_PCI_QUIRK_ID_MASK(vend, mask, dev), \ .value = (val), .name = (xname)} #define snd_pci_quirk_name(q) ((q)->name) #else #define SND_PCI_QUIRK(vend,dev,xname,val) \ {_SND_PCI_QUIRK_ID(vend, dev), .value = (val)} #define SND_PCI_QUIRK_MASK(vend, mask, dev, xname, val) \ {_SND_PCI_QUIRK_ID_MASK(vend, mask, dev), .value = (val)} #define SND_PCI_QUIRK_VENDOR(vend, xname, val) \ {_SND_PCI_QUIRK_ID_MASK(vend, 0, 0), .value = (val)} #define snd_pci_quirk_name(q) "" #endif #ifdef CONFIG_PCI const struct snd_pci_quirk * snd_pci_quirk_lookup(struct pci_dev *pci, const struct snd_pci_quirk *list); const struct snd_pci_quirk * snd_pci_quirk_lookup_id(u16 vendor, u16 device, const struct snd_pci_quirk *list); #else static inline const struct snd_pci_quirk * snd_pci_quirk_lookup(struct pci_dev *pci, const struct snd_pci_quirk *list) { return NULL; } static inline const struct snd_pci_quirk * snd_pci_quirk_lookup_id(u16 vendor, u16 device, const struct snd_pci_quirk *list) { return NULL; } #endif /* async signal helpers */ struct snd_fasync; int snd_fasync_helper(int fd, struct file *file, int on, struct snd_fasync **fasyncp); void snd_kill_fasync(struct snd_fasync *fasync, int signal, int poll); void snd_fasync_free(struct snd_fasync *fasync); #endif /* __SOUND_CORE_H */
9 9 1 1 13 3 1 6 2 1 12 12 12 7 12 8 3 7 7 7 14 3 5 2 9 9 8 9 3 1 4 4 4 4 1 1 3 3 12 8 1 1 1 1 9 4 4 12 1 12 12 12 11 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 // SPDX-License-Identifier: GPL-2.0-only #include <linux/ethtool.h> #include <linux/phy.h> #include "netlink.h" #include "common.h" struct strset_info { bool per_dev; bool free_strings; unsigned int count; const char (*strings)[ETH_GSTRING_LEN]; }; static const struct strset_info info_template[] = { [ETH_SS_TEST] = { .per_dev = true, }, [ETH_SS_STATS] = { .per_dev = true, }, [ETH_SS_PRIV_FLAGS] = { .per_dev = true, }, [ETH_SS_FEATURES] = { .per_dev = false, .count = ARRAY_SIZE(netdev_features_strings), .strings = netdev_features_strings, }, [ETH_SS_RSS_HASH_FUNCS] = { .per_dev = false, .count = ARRAY_SIZE(rss_hash_func_strings), .strings = rss_hash_func_strings, }, [ETH_SS_TUNABLES] = { .per_dev = false, .count = ARRAY_SIZE(tunable_strings), .strings = tunable_strings, }, [ETH_SS_PHY_STATS] = { .per_dev = true, }, [ETH_SS_PHY_TUNABLES] = { .per_dev = false, .count = ARRAY_SIZE(phy_tunable_strings), .strings = phy_tunable_strings, }, [ETH_SS_LINK_MODES] = { .per_dev = false, .count = __ETHTOOL_LINK_MODE_MASK_NBITS, .strings = link_mode_names, }, [ETH_SS_MSG_CLASSES] = { .per_dev = false, .count = NETIF_MSG_CLASS_COUNT, .strings = netif_msg_class_names, }, [ETH_SS_WOL_MODES] = { .per_dev = false, .count = WOL_MODE_COUNT, .strings = wol_mode_names, }, [ETH_SS_SOF_TIMESTAMPING] = { .per_dev = false, .count = __SOF_TIMESTAMPING_CNT, .strings = sof_timestamping_names, }, [ETH_SS_TS_TX_TYPES] = { .per_dev = false, .count = __HWTSTAMP_TX_CNT, .strings = ts_tx_type_names, }, [ETH_SS_TS_RX_FILTERS] = { .per_dev = false, .count = __HWTSTAMP_FILTER_CNT, .strings = ts_rx_filter_names, }, [ETH_SS_TS_FLAGS] = { .per_dev = false, .count = __HWTSTAMP_FLAG_CNT, .strings = ts_flags_names, }, [ETH_SS_UDP_TUNNEL_TYPES] = { .per_dev = false, .count = __ETHTOOL_UDP_TUNNEL_TYPE_CNT, .strings = udp_tunnel_type_names, }, [ETH_SS_STATS_STD] = { .per_dev = false, .count = __ETHTOOL_STATS_CNT, .strings = stats_std_names, }, [ETH_SS_STATS_ETH_PHY] = { .per_dev = false, .count = __ETHTOOL_A_STATS_ETH_PHY_CNT, .strings = stats_eth_phy_names, }, [ETH_SS_STATS_ETH_MAC] = { .per_dev = false, .count = __ETHTOOL_A_STATS_ETH_MAC_CNT, .strings = stats_eth_mac_names, }, [ETH_SS_STATS_ETH_CTRL] = { .per_dev = false, .count = __ETHTOOL_A_STATS_ETH_CTRL_CNT, .strings = stats_eth_ctrl_names, }, [ETH_SS_STATS_RMON] = { .per_dev = false, .count = __ETHTOOL_A_STATS_RMON_CNT, .strings = stats_rmon_names, }, [ETH_SS_STATS_PHY] = { .per_dev = false, .count = __ETHTOOL_A_STATS_PHY_CNT, .strings = stats_phy_names, }, }; struct strset_req_info { struct ethnl_req_info base; u32 req_ids; bool counts_only; }; #define STRSET_REQINFO(__req_base) \ container_of(__req_base, struct strset_req_info, base) struct strset_reply_data { struct ethnl_reply_data base; struct strset_info sets[ETH_SS_COUNT]; }; #define STRSET_REPDATA(__reply_base) \ container_of(__reply_base, struct strset_reply_data, base) const struct nla_policy ethnl_strset_get_policy[] = { [ETHTOOL_A_STRSET_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy), [ETHTOOL_A_STRSET_STRINGSETS] = { .type = NLA_NESTED }, [ETHTOOL_A_STRSET_COUNTS_ONLY] = { .type = NLA_FLAG }, }; static const struct nla_policy get_stringset_policy[] = { [ETHTOOL_A_STRINGSET_ID] = { .type = NLA_U32 }, }; /** * strset_include() - test if a string set should be included in reply * @info: parsed client request * @data: pointer to request data structure * @id: id of string set to check (ETH_SS_* constants) */ static bool strset_include(const struct strset_req_info *info, const struct strset_reply_data *data, u32 id) { bool per_dev; BUILD_BUG_ON(ETH_SS_COUNT >= BITS_PER_BYTE * sizeof(info->req_ids)); if (info->req_ids) return info->req_ids & (1U << id); per_dev = data->sets[id].per_dev; if (!per_dev && !data->sets[id].strings) return false; return data->base.dev ? per_dev : !per_dev; } static int strset_get_id(const struct nlattr *nest, u32 *val, struct netlink_ext_ack *extack) { struct nlattr *tb[ARRAY_SIZE(get_stringset_policy)]; int ret; ret = nla_parse_nested(tb, ARRAY_SIZE(get_stringset_policy) - 1, nest, get_stringset_policy, extack); if (ret < 0) return ret; if (NL_REQ_ATTR_CHECK(extack, nest, tb, ETHTOOL_A_STRINGSET_ID)) return -EINVAL; *val = nla_get_u32(tb[ETHTOOL_A_STRINGSET_ID]); return 0; } static const struct nla_policy strset_stringsets_policy[] = { [ETHTOOL_A_STRINGSETS_STRINGSET] = { .type = NLA_NESTED }, }; static int strset_parse_request(struct ethnl_req_info *req_base, struct nlattr **tb, struct netlink_ext_ack *extack) { struct strset_req_info *req_info = STRSET_REQINFO(req_base); struct nlattr *nest = tb[ETHTOOL_A_STRSET_STRINGSETS]; struct nlattr *attr; int rem, ret; if (!nest) return 0; ret = nla_validate_nested(nest, ARRAY_SIZE(strset_stringsets_policy) - 1, strset_stringsets_policy, extack); if (ret < 0) return ret; req_info->counts_only = tb[ETHTOOL_A_STRSET_COUNTS_ONLY]; nla_for_each_nested(attr, nest, rem) { u32 id; if (WARN_ONCE(nla_type(attr) != ETHTOOL_A_STRINGSETS_STRINGSET, "unexpected attrtype %u in ETHTOOL_A_STRSET_STRINGSETS\n", nla_type(attr))) return -EINVAL; ret = strset_get_id(attr, &id, extack); if (ret < 0) return ret; if (id >= ETH_SS_COUNT) { NL_SET_ERR_MSG_ATTR(extack, attr, "unknown string set id"); return -EOPNOTSUPP; } req_info->req_ids |= (1U << id); } return 0; } static void strset_cleanup_data(struct ethnl_reply_data *reply_base) { struct strset_reply_data *data = STRSET_REPDATA(reply_base); unsigned int i; for (i = 0; i < ETH_SS_COUNT; i++) if (data->sets[i].free_strings) { kfree(data->sets[i].strings); data->sets[i].strings = NULL; data->sets[i].free_strings = false; } } static int strset_prepare_set(struct strset_info *info, struct net_device *dev, struct phy_device *phydev, unsigned int id, bool counts_only) { const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops; const struct ethtool_ops *ops = dev->ethtool_ops; void *strings; int count, ret; if (id == ETH_SS_PHY_STATS && phydev && !ops->get_ethtool_phy_stats && phy_ops && phy_ops->get_sset_count) ret = phy_ops->get_sset_count(phydev); else if (ops->get_sset_count && ops->get_strings) ret = ops->get_sset_count(dev, id); else ret = -EOPNOTSUPP; if (ret <= 0) { info->count = 0; return 0; } count = ret; if (!counts_only) { strings = kcalloc(count, ETH_GSTRING_LEN, GFP_KERNEL); if (!strings) return -ENOMEM; if (id == ETH_SS_PHY_STATS && phydev && !ops->get_ethtool_phy_stats && phy_ops && phy_ops->get_strings) phy_ops->get_strings(phydev, strings); else ops->get_strings(dev, id, strings); info->strings = strings; info->free_strings = true; } info->count = count; return 0; } static int strset_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { const struct strset_req_info *req_info = STRSET_REQINFO(req_base); struct strset_reply_data *data = STRSET_REPDATA(reply_base); struct net_device *dev = reply_base->dev; struct nlattr **tb = info->attrs; struct phy_device *phydev; unsigned int i; int ret; BUILD_BUG_ON(ARRAY_SIZE(info_template) != ETH_SS_COUNT); memcpy(&data->sets, &info_template, sizeof(data->sets)); if (!dev) { for (i = 0; i < ETH_SS_COUNT; i++) { if ((req_info->req_ids & (1U << i)) && data->sets[i].per_dev) { GENL_SET_ERR_MSG(info, "requested per device strings without dev"); return -EINVAL; } } return 0; } phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_HEADER_FLAGS, info->extack); /* phydev can be NULL, check for errors only */ if (IS_ERR(phydev)) return PTR_ERR(phydev); ret = ethnl_ops_begin(dev); if (ret < 0) goto err_strset; for (i = 0; i < ETH_SS_COUNT; i++) { if (!strset_include(req_info, data, i) || !data->sets[i].per_dev) continue; ret = strset_prepare_set(&data->sets[i], dev, phydev, i, req_info->counts_only); if (ret < 0) goto err_ops; } ethnl_ops_complete(dev); return 0; err_ops: ethnl_ops_complete(dev); err_strset: strset_cleanup_data(reply_base); return ret; } /* calculate size of ETHTOOL_A_STRSET_STRINGSET nest for one string set */ static int strset_set_size(const struct strset_info *info, bool counts_only) { unsigned int len = 0; unsigned int i; if (info->count == 0) return 0; if (counts_only) return nla_total_size(2 * nla_total_size(sizeof(u32))); for (i = 0; i < info->count; i++) { const char *str = info->strings[i]; /* ETHTOOL_A_STRING_INDEX, ETHTOOL_A_STRING_VALUE, nest */ len += nla_total_size(nla_total_size(sizeof(u32)) + ethnl_strz_size(str)); } /* ETHTOOL_A_STRINGSET_ID, ETHTOOL_A_STRINGSET_COUNT */ len = 2 * nla_total_size(sizeof(u32)) + nla_total_size(len); return nla_total_size(len); } static int strset_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct strset_req_info *req_info = STRSET_REQINFO(req_base); const struct strset_reply_data *data = STRSET_REPDATA(reply_base); unsigned int i; int len = 0; int ret; len += nla_total_size(0); /* ETHTOOL_A_STRSET_STRINGSETS */ for (i = 0; i < ETH_SS_COUNT; i++) { const struct strset_info *set_info = &data->sets[i]; if (!strset_include(req_info, data, i)) continue; ret = strset_set_size(set_info, req_info->counts_only); if (ret < 0) return ret; len += ret; } return len; } /* fill one string into reply */ static int strset_fill_string(struct sk_buff *skb, const struct strset_info *set_info, u32 idx) { struct nlattr *string_attr; const char *value; value = set_info->strings[idx]; string_attr = nla_nest_start(skb, ETHTOOL_A_STRINGS_STRING); if (!string_attr) return -EMSGSIZE; if (nla_put_u32(skb, ETHTOOL_A_STRING_INDEX, idx) || ethnl_put_strz(skb, ETHTOOL_A_STRING_VALUE, value)) goto nla_put_failure; nla_nest_end(skb, string_attr); return 0; nla_put_failure: nla_nest_cancel(skb, string_attr); return -EMSGSIZE; } /* fill one string set into reply */ static int strset_fill_set(struct sk_buff *skb, const struct strset_info *set_info, u32 id, bool counts_only) { struct nlattr *stringset_attr; struct nlattr *strings_attr; unsigned int i; if (!set_info->per_dev && !set_info->strings) return -EOPNOTSUPP; if (set_info->count == 0) return 0; stringset_attr = nla_nest_start(skb, ETHTOOL_A_STRINGSETS_STRINGSET); if (!stringset_attr) return -EMSGSIZE; if (nla_put_u32(skb, ETHTOOL_A_STRINGSET_ID, id) || nla_put_u32(skb, ETHTOOL_A_STRINGSET_COUNT, set_info->count)) goto nla_put_failure; if (!counts_only) { strings_attr = nla_nest_start(skb, ETHTOOL_A_STRINGSET_STRINGS); if (!strings_attr) goto nla_put_failure; for (i = 0; i < set_info->count; i++) { if (strset_fill_string(skb, set_info, i) < 0) goto nla_put_failure; } nla_nest_end(skb, strings_attr); } nla_nest_end(skb, stringset_attr); return 0; nla_put_failure: nla_nest_cancel(skb, stringset_attr); return -EMSGSIZE; } static int strset_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct strset_req_info *req_info = STRSET_REQINFO(req_base); const struct strset_reply_data *data = STRSET_REPDATA(reply_base); struct nlattr *nest; unsigned int i; int ret; nest = nla_nest_start(skb, ETHTOOL_A_STRSET_STRINGSETS); if (!nest) return -EMSGSIZE; for (i = 0; i < ETH_SS_COUNT; i++) { if (strset_include(req_info, data, i)) { ret = strset_fill_set(skb, &data->sets[i], i, req_info->counts_only); if (ret < 0) goto nla_put_failure; } } nla_nest_end(skb, nest); return 0; nla_put_failure: nla_nest_cancel(skb, nest); return ret; } const struct ethnl_request_ops ethnl_strset_request_ops = { .request_cmd = ETHTOOL_MSG_STRSET_GET, .reply_cmd = ETHTOOL_MSG_STRSET_GET_REPLY, .hdr_attr = ETHTOOL_A_STRSET_HEADER, .req_info_size = sizeof(struct strset_req_info), .reply_data_size = sizeof(struct strset_reply_data), .allow_nodev_do = true, .parse_request = strset_parse_request, .prepare_data = strset_prepare_data, .reply_size = strset_reply_size, .fill_reply = strset_fill_reply, .cleanup_data = strset_cleanup_data, };
35 2 33 42 35 7 32 22 2 2 1 1 1 1 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 /* * Copyright (c) 2006, 2017 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/in.h> #include <linux/ipv6.h> #include "rds.h" #include "loop.h" static char * const rds_trans_modules[] = { [RDS_TRANS_IB] = "rds_rdma", [RDS_TRANS_GAP] = NULL, [RDS_TRANS_TCP] = "rds_tcp", }; static struct rds_transport *transports[RDS_TRANS_COUNT]; static DECLARE_RWSEM(rds_trans_sem); void rds_trans_register(struct rds_transport *trans) { BUG_ON(strlen(trans->t_name) + 1 > TRANSNAMSIZ); down_write(&rds_trans_sem); if (transports[trans->t_type]) printk(KERN_ERR "RDS Transport type %d already registered\n", trans->t_type); else { transports[trans->t_type] = trans; printk(KERN_INFO "Registered RDS/%s transport\n", trans->t_name); } up_write(&rds_trans_sem); } EXPORT_SYMBOL_GPL(rds_trans_register); void rds_trans_unregister(struct rds_transport *trans) { down_write(&rds_trans_sem); transports[trans->t_type] = NULL; printk(KERN_INFO "Unregistered RDS/%s transport\n", trans->t_name); up_write(&rds_trans_sem); } EXPORT_SYMBOL_GPL(rds_trans_unregister); void rds_trans_put(struct rds_transport *trans) { if (trans) module_put(trans->t_owner); } struct rds_transport *rds_trans_get_preferred(struct net *net, const struct in6_addr *addr, __u32 scope_id) { struct rds_transport *ret = NULL; struct rds_transport *trans; unsigned int i; if (ipv6_addr_v4mapped(addr)) { if (*(u_int8_t *)&addr->s6_addr32[3] == IN_LOOPBACKNET) return &rds_loop_transport; } else if (ipv6_addr_loopback(addr)) { return &rds_loop_transport; } down_read(&rds_trans_sem); for (i = 0; i < RDS_TRANS_COUNT; i++) { trans = transports[i]; if (trans && (trans->laddr_check(net, addr, scope_id) == 0) && (!trans->t_owner || try_module_get(trans->t_owner))) { ret = trans; break; } } up_read(&rds_trans_sem); return ret; } struct rds_transport *rds_trans_get(int t_type) { struct rds_transport *ret = NULL; struct rds_transport *trans; down_read(&rds_trans_sem); trans = transports[t_type]; if (!trans) { up_read(&rds_trans_sem); if (rds_trans_modules[t_type]) request_module(rds_trans_modules[t_type]); down_read(&rds_trans_sem); trans = transports[t_type]; } if (trans && trans->t_type == t_type && (!trans->t_owner || try_module_get(trans->t_owner))) ret = trans; up_read(&rds_trans_sem); return ret; } /* * This returns the number of stats entries in the snapshot and only * copies them using the iter if there is enough space for them. The * caller passes in the global stats so that we can size and copy while * holding the lock. */ unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, unsigned int avail) { struct rds_transport *trans; unsigned int total = 0; unsigned int part; int i; rds_info_iter_unmap(iter); down_read(&rds_trans_sem); for (i = 0; i < RDS_TRANS_COUNT; i++) { trans = transports[i]; if (!trans || !trans->stats_info_copy) continue; part = trans->stats_info_copy(iter, avail); avail -= min(avail, part); total += part; } up_read(&rds_trans_sem); return total; }
173 170 87 87 86 4 51 13 11 16 59 17 67 2 11 5 45 2 5 56 22 6 63 65 7 14 14 52 52 53 51 2 6 14 2 4 15 10 6 2 11 7 28 18 17 1 1 7 4 21 1 3 18 6 10 19 1 1 17 12 6 4 15 12 12 12 5 3 12 12 12 9 19 1 19 9 5 4 9 9 11 1 6 6 19 17 18 51 5 50 33 50 1 51 9 7 6 3 33 33 32 32 1 2 1 2 48 1 47 48 1 48 45 2 60 1 49 16 1 16 17 3 7 16 16 7 17 10 10 10 9 10 10 9 2 9 1 9 1 1 13 2 11 1 7 1 6 2 10 4 5 3 1 2 2 9 9 10 19 18 2 12 13 5 10 10 19 17 5 17 18 19 5 1 4 44 45 3 1 4 1 1 1 22 26 7 7 4 4 4 4 4 4 1 1 1 2 24 24 18 15 8 15 1 14 5 9 9 1 8 24 1 22 24 16 16 1 7 1 10 2 10 10 3 1 1 1 1 1 8 9 1 8 8 15 15 4 3 2 3 3 1 1 2 4 15 15 23 23 10 23 2 14 14 4 5 1 2 2 1 2 14 10 4 1 2 12 7 2 5 13 1 11 2 2 3 13 39 39 39 1 53 55 53 4 1 1 1 48 38 1 1 47 1 38 1 1 37 1 36 39 38 1 38 38 1 1 3 34 1 1 29 24 1 1 13 10 3 7 2 1 3 2 1 5 10 1 2 7 6 13 13 1 22 28 28 28 1 40 43 1 43 14 30 44 1 43 2 1 1 1 1 2 1 1 2 2 2 1 1 1 1 1 1 1 1 93 1 107 106 87 8 1 3 8 17 1 5 7 4 3 5 13 2 1 2 1 1 1 1 1 45 1 11 1 1 3 3 1 2 7 12 4 2 6 5 1 1 1 2 1 2 2 1 1 5 6 3 2 9 9 9 3 1 2 22 1 8 7 7 2 1 2 1 13 1 15 27 28 10 2 16 5 22 1 2 9 9 12 3 14 5 13 5 5 1 4 1 1 2 3 3 3 2 3 3 3 3 2 3 3 2 1 2 1 1 1 1 1 3 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 14 1 1 4 4 1 4 71 1 11 156 69 102 68 67 4 4 4 2 2 2 2 4 6 6 6 6 115 1 12 109 109 91 17 77 29 27 26 1 26 3 1 2 2 3 9 3 2 2 1 1 4 3 1 1 1 1 2 152 152 150 1 117 1 1 1 2 4 1 8 1 1 1 1 1 1 1 1 2 152 105 48 152 1 1 1 4 1 3 5 5 5 5 5 2 1 4 1 4 4 1 18 18 18 16 2 18 8 18 18 2 2 2 1 2 4 2 2 2 1 6 1 6 4 4 1 2 1 1 2 2 2 2 2 2 2 2 7 7 6 7 4 6 2 6 1 7 6 1 7 5 4 3 3 3 3 3 3 1 3 3 3 3 3 3 1 3 3 3 2 2 2 7 2 3 3 3 7 7 1 2 2 2 1 1 1 1 1 3 3 3 6 6 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 5533 5534 5535 5536 5537 5538 5539 5540 5541 5542 5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553 5554 5555 5556 5557 5558 5559 5560 5561 5562 5563 5564 5565 5566 5567 5568 5569 5570 5571 5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583 5584 5585 5586 5587 5588 5589 5590 5591 5592 5593 5594 5595 5596 5597 5598 5599 5600 5601 5602 5603 5604 5605 5606 5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618 5619 5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633 5634 5635 5636 5637 5638 5639 5640 5641 5642 5643 5644 5645 5646 5647 5648 5649 5650 5651 5652 5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699 5700 5701 5702 5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729 5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779 5780 5781 5782 5783 5784 5785 5786 5787 5788 5789 5790 5791 5792 5793 5794 5795 5796 5797 5798 5799 5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810 5811 5812 5813 5814 5815 5816 5817 5818 5819 5820 5821 5822 5823 5824 5825 5826 5827 5828 5829 5830 5831 5832 5833 5834 5835 5836 5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868 5869 5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889 5890 5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914 5915 5916 5917 5918 5919 5920 5921 5922 5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941 5942 5943 5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018 6019 6020 6021 6022 6023 6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048 6049 6050 6051 6052 6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065 6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083 6084 6085 6086 6087 6088 6089 6090 6091 6092 6093 6094 6095 6096 6097 6098 6099 6100 6101 6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122 6123 6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184 6185 6186 6187 6188 6189 6190 6191 6192 6193 6194 6195 6196 6197 6198 6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216 6217 6218 6219 6220 6221 6222 6223 6224 6225 6226 6227 6228 6229 6230 6231 6232 6233 6234 6235 6236 6237 6238 6239 6240 6241 6242 6243 6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264 6265 6266 6267 6268 6269 6270 6271 6272 6273 6274 6275 6276 6277 6278 6279 6280 6281 6282 6283 6284 6285 6286 6287 6288 6289 6290 6291 6292 6293 6294 6295 6296 6297 6298 6299 6300 6301 6302 6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334 6335 6336 6337 6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358 6359 6360 6361 6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393 6394 6395 6396 6397 6398 6399 6400 6401 6402 6403 6404 6405 6406 6407 6408 6409 6410 6411 6412 6413 6414 6415 6416 6417 6418 6419 6420 6421 6422 6423 6424 6425 6426 6427 6428 6429 6430 6431 6432 6433 6434 6435 6436 6437 6438 6439 6440 6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483 6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495 6496 6497 6498 6499 6500 6501 6502 6503 6504 6505 6506 6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518 6519 6520 6521 6522 6523 6524 6525 6526 6527 6528 6529 6530 6531 6532 6533 6534 6535 6536 6537 6538 6539 6540 6541 6542 6543 6544 6545 6546 6547 6548 6549 6550 6551 6552 6553 6554 6555 6556 6557 6558 6559 6560 6561 6562 6563 6564 6565 6566 6567 6568 6569 6570 6571 6572 6573 6574 6575 6576 6577 6578 6579 6580 6581 6582 6583 6584 6585 6586 6587 6588 6589 6590 6591 6592 6593 6594 6595 6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628 6629 6630 6631 6632 6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647 6648 6649 6650 6651 6652 6653 6654 6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666 6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679 6680 6681 6682 6683 6684 6685 6686 6687 6688 6689 6690 6691 6692 6693 6694 6695 6696 6697 6698 6699 6700 6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714 6715 6716 6717 6718 6719 6720 6721 6722 6723 6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734 6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752 6753 6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786 6787 6788 6789 6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801 6802 6803 6804 6805 6806 6807 6808 6809 6810 6811 6812 6813 6814 6815 6816 6817 6818 6819 6820 6821 6822 6823 6824 6825 6826 6827 6828 6829 6830 6831 6832 6833 6834 6835 6836 6837 6838 6839 6840 6841 6842 6843 6844 6845 6846 6847 6848 6849 6850 6851 6852 6853 6854 6855 6856 6857 6858 6859 6860 6861 6862 6863 6864 6865 6866 6867 6868 6869 6870 6871 6872 6873 6874 6875 6876 6877 6878 6879 6880 6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896 6897 6898 6899 6900 6901 6902 6903 6904 6905 6906 6907 6908 6909 6910 6911 6912 6913 6914 6915 6916 6917 6918 6919 6920 6921 6922 6923 6924 6925 6926 6927 6928 6929 6930 6931 6932 6933 6934 6935 6936 6937 6938 6939 6940 6941 6942 6943 6944 6945 6946 6947 6948 6949 6950 6951 6952 6953 6954 6955 6956 6957 6958 6959 6960 6961 6962 6963 6964 6965 6966 6967 6968 6969 6970 6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989 6990 6991 6992 6993 6994 6995 6996 6997 6998 6999 7000 7001 7002 7003 7004 7005 7006 7007 7008 7009 7010 7011 7012 7013 7014 7015 7016 7017 7018 7019 7020 7021 7022 7023 7024 7025 7026 7027 7028 7029 7030 7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050 7051 7052 7053 7054 7055 7056 7057 7058 7059 7060 7061 7062 7063 7064 7065 7066 7067 7068 7069 7070 7071 7072 7073 7074 7075 7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 // SPDX-License-Identifier: GPL-2.0-only /* binder.c * * Android IPC Subsystem * * Copyright (C) 2007-2008 Google, Inc. */ /* * Locking overview * * There are 3 main spinlocks which must be acquired in the * order shown: * * 1) proc->outer_lock : protects binder_ref * binder_proc_lock() and binder_proc_unlock() are * used to acq/rel. * 2) node->lock : protects most fields of binder_node. * binder_node_lock() and binder_node_unlock() are * used to acq/rel * 3) proc->inner_lock : protects the thread and node lists * (proc->threads, proc->waiting_threads, proc->nodes) * and all todo lists associated with the binder_proc * (proc->todo, thread->todo, proc->delivered_death and * node->async_todo), as well as thread->transaction_stack * binder_inner_proc_lock() and binder_inner_proc_unlock() * are used to acq/rel * * Any lock under procA must never be nested under any lock at the same * level or below on procB. * * Functions that require a lock held on entry indicate which lock * in the suffix of the function name: * * foo_olocked() : requires node->outer_lock * foo_nlocked() : requires node->lock * foo_ilocked() : requires proc->inner_lock * foo_oilocked(): requires proc->outer_lock and proc->inner_lock * foo_nilocked(): requires node->lock and proc->inner_lock * ... */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/fdtable.h> #include <linux/file.h> #include <linux/freezer.h> #include <linux/fs.h> #include <linux/list.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/nsproxy.h> #include <linux/poll.h> #include <linux/debugfs.h> #include <linux/rbtree.h> #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/seq_file.h> #include <linux/string.h> #include <linux/uaccess.h> #include <linux/pid_namespace.h> #include <linux/security.h> #include <linux/spinlock.h> #include <linux/ratelimit.h> #include <linux/syscalls.h> #include <linux/task_work.h> #include <linux/sizes.h> #include <linux/ktime.h> #include <kunit/visibility.h> #include <uapi/linux/android/binder.h> #include <linux/cacheflush.h> #include "binder_internal.h" #include "binder_trace.h" static HLIST_HEAD(binder_deferred_list); static DEFINE_MUTEX(binder_deferred_lock); static HLIST_HEAD(binder_devices); static DEFINE_SPINLOCK(binder_devices_lock); static HLIST_HEAD(binder_procs); static DEFINE_MUTEX(binder_procs_lock); static HLIST_HEAD(binder_dead_nodes); static DEFINE_SPINLOCK(binder_dead_nodes_lock); static struct dentry *binder_debugfs_dir_entry_root; static struct dentry *binder_debugfs_dir_entry_proc; static atomic_t binder_last_id; static int proc_show(struct seq_file *m, void *unused); DEFINE_SHOW_ATTRIBUTE(proc); #define FORBIDDEN_MMAP_FLAGS (VM_WRITE) enum { BINDER_DEBUG_USER_ERROR = 1U << 0, BINDER_DEBUG_FAILED_TRANSACTION = 1U << 1, BINDER_DEBUG_DEAD_TRANSACTION = 1U << 2, BINDER_DEBUG_OPEN_CLOSE = 1U << 3, BINDER_DEBUG_DEAD_BINDER = 1U << 4, BINDER_DEBUG_DEATH_NOTIFICATION = 1U << 5, BINDER_DEBUG_READ_WRITE = 1U << 6, BINDER_DEBUG_USER_REFS = 1U << 7, BINDER_DEBUG_THREADS = 1U << 8, BINDER_DEBUG_TRANSACTION = 1U << 9, BINDER_DEBUG_TRANSACTION_COMPLETE = 1U << 10, BINDER_DEBUG_FREE_BUFFER = 1U << 11, BINDER_DEBUG_INTERNAL_REFS = 1U << 12, BINDER_DEBUG_PRIORITY_CAP = 1U << 13, BINDER_DEBUG_SPINLOCKS = 1U << 14, }; static uint32_t binder_debug_mask = BINDER_DEBUG_USER_ERROR | BINDER_DEBUG_FAILED_TRANSACTION | BINDER_DEBUG_DEAD_TRANSACTION; module_param_named(debug_mask, binder_debug_mask, uint, 0644); char *binder_devices_param = CONFIG_ANDROID_BINDER_DEVICES; module_param_named(devices, binder_devices_param, charp, 0444); static DECLARE_WAIT_QUEUE_HEAD(binder_user_error_wait); static int binder_stop_on_user_error; static int binder_set_stop_on_user_error(const char *val, const struct kernel_param *kp) { int ret; ret = param_set_int(val, kp); if (binder_stop_on_user_error < 2) wake_up(&binder_user_error_wait); return ret; } module_param_call(stop_on_user_error, binder_set_stop_on_user_error, param_get_int, &binder_stop_on_user_error, 0644); static __printf(2, 3) void binder_debug(int mask, const char *format, ...) { struct va_format vaf; va_list args; if (binder_debug_mask & mask) { va_start(args, format); vaf.va = &args; vaf.fmt = format; pr_info_ratelimited("%pV", &vaf); va_end(args); } } #define binder_txn_error(x...) \ binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, x) static __printf(1, 2) void binder_user_error(const char *format, ...) { struct va_format vaf; va_list args; if (binder_debug_mask & BINDER_DEBUG_USER_ERROR) { va_start(args, format); vaf.va = &args; vaf.fmt = format; pr_info_ratelimited("%pV", &vaf); va_end(args); } if (binder_stop_on_user_error) binder_stop_on_user_error = 2; } #define binder_set_extended_error(ee, _id, _command, _param) \ do { \ (ee)->id = _id; \ (ee)->command = _command; \ (ee)->param = _param; \ } while (0) #define to_flat_binder_object(hdr) \ container_of(hdr, struct flat_binder_object, hdr) #define to_binder_fd_object(hdr) container_of(hdr, struct binder_fd_object, hdr) #define to_binder_buffer_object(hdr) \ container_of(hdr, struct binder_buffer_object, hdr) #define to_binder_fd_array_object(hdr) \ container_of(hdr, struct binder_fd_array_object, hdr) static struct binder_stats binder_stats; static inline void binder_stats_deleted(enum binder_stat_types type) { atomic_inc(&binder_stats.obj_deleted[type]); } static inline void binder_stats_created(enum binder_stat_types type) { atomic_inc(&binder_stats.obj_created[type]); } struct binder_transaction_log_entry { int debug_id; int debug_id_done; int call_type; int from_proc; int from_thread; int target_handle; int to_proc; int to_thread; int to_node; int data_size; int offsets_size; int return_error_line; uint32_t return_error; uint32_t return_error_param; char context_name[BINDERFS_MAX_NAME + 1]; }; struct binder_transaction_log { atomic_t cur; bool full; struct binder_transaction_log_entry entry[32]; }; static struct binder_transaction_log binder_transaction_log; static struct binder_transaction_log binder_transaction_log_failed; static struct binder_transaction_log_entry *binder_transaction_log_add( struct binder_transaction_log *log) { struct binder_transaction_log_entry *e; unsigned int cur = atomic_inc_return(&log->cur); if (cur >= ARRAY_SIZE(log->entry)) log->full = true; e = &log->entry[cur % ARRAY_SIZE(log->entry)]; WRITE_ONCE(e->debug_id_done, 0); /* * write-barrier to synchronize access to e->debug_id_done. * We make sure the initialized 0 value is seen before * memset() other fields are zeroed by memset. */ smp_wmb(); memset(e, 0, sizeof(*e)); return e; } enum binder_deferred_state { BINDER_DEFERRED_FLUSH = 0x01, BINDER_DEFERRED_RELEASE = 0x02, }; enum { BINDER_LOOPER_STATE_REGISTERED = 0x01, BINDER_LOOPER_STATE_ENTERED = 0x02, BINDER_LOOPER_STATE_EXITED = 0x04, BINDER_LOOPER_STATE_INVALID = 0x08, BINDER_LOOPER_STATE_WAITING = 0x10, BINDER_LOOPER_STATE_POLL = 0x20, }; /** * binder_proc_lock() - Acquire outer lock for given binder_proc * @proc: struct binder_proc to acquire * * Acquires proc->outer_lock. Used to protect binder_ref * structures associated with the given proc. */ #define binder_proc_lock(proc) _binder_proc_lock(proc, __LINE__) static void _binder_proc_lock(struct binder_proc *proc, int line) __acquires(&proc->outer_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); spin_lock(&proc->outer_lock); } /** * binder_proc_unlock() - Release outer lock for given binder_proc * @proc: struct binder_proc to acquire * * Release lock acquired via binder_proc_lock() */ #define binder_proc_unlock(proc) _binder_proc_unlock(proc, __LINE__) static void _binder_proc_unlock(struct binder_proc *proc, int line) __releases(&proc->outer_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); spin_unlock(&proc->outer_lock); } /** * binder_inner_proc_lock() - Acquire inner lock for given binder_proc * @proc: struct binder_proc to acquire * * Acquires proc->inner_lock. Used to protect todo lists */ #define binder_inner_proc_lock(proc) _binder_inner_proc_lock(proc, __LINE__) static void _binder_inner_proc_lock(struct binder_proc *proc, int line) __acquires(&proc->inner_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); spin_lock(&proc->inner_lock); } /** * binder_inner_proc_unlock() - Release inner lock for given binder_proc * @proc: struct binder_proc to acquire * * Release lock acquired via binder_inner_proc_lock() */ #define binder_inner_proc_unlock(proc) _binder_inner_proc_unlock(proc, __LINE__) static void _binder_inner_proc_unlock(struct binder_proc *proc, int line) __releases(&proc->inner_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); spin_unlock(&proc->inner_lock); } /** * binder_node_lock() - Acquire spinlock for given binder_node * @node: struct binder_node to acquire * * Acquires node->lock. Used to protect binder_node fields */ #define binder_node_lock(node) _binder_node_lock(node, __LINE__) static void _binder_node_lock(struct binder_node *node, int line) __acquires(&node->lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); spin_lock(&node->lock); } /** * binder_node_unlock() - Release spinlock for given binder_proc * @node: struct binder_node to acquire * * Release lock acquired via binder_node_lock() */ #define binder_node_unlock(node) _binder_node_unlock(node, __LINE__) static void _binder_node_unlock(struct binder_node *node, int line) __releases(&node->lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); spin_unlock(&node->lock); } /** * binder_node_inner_lock() - Acquire node and inner locks * @node: struct binder_node to acquire * * Acquires node->lock. If node->proc also acquires * proc->inner_lock. Used to protect binder_node fields */ #define binder_node_inner_lock(node) _binder_node_inner_lock(node, __LINE__) static void _binder_node_inner_lock(struct binder_node *node, int line) __acquires(&node->lock) __acquires(&node->proc->inner_lock) { binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); spin_lock(&node->lock); if (node->proc) binder_inner_proc_lock(node->proc); else /* annotation for sparse */ __acquire(&node->proc->inner_lock); } /** * binder_node_inner_unlock() - Release node and inner locks * @node: struct binder_node to acquire * * Release lock acquired via binder_node_lock() */ #define binder_node_inner_unlock(node) _binder_node_inner_unlock(node, __LINE__) static void _binder_node_inner_unlock(struct binder_node *node, int line) __releases(&node->lock) __releases(&node->proc->inner_lock) { struct binder_proc *proc = node->proc; binder_debug(BINDER_DEBUG_SPINLOCKS, "%s: line=%d\n", __func__, line); if (proc) binder_inner_proc_unlock(proc); else /* annotation for sparse */ __release(&node->proc->inner_lock); spin_unlock(&node->lock); } static bool binder_worklist_empty_ilocked(struct list_head *list) { return list_empty(list); } /** * binder_worklist_empty() - Check if no items on the work list * @proc: binder_proc associated with list * @list: list to check * * Return: true if there are no items on list, else false */ static bool binder_worklist_empty(struct binder_proc *proc, struct list_head *list) { bool ret; binder_inner_proc_lock(proc); ret = binder_worklist_empty_ilocked(list); binder_inner_proc_unlock(proc); return ret; } /** * binder_enqueue_work_ilocked() - Add an item to the work list * @work: struct binder_work to add to list * @target_list: list to add work to * * Adds the work to the specified list. Asserts that work * is not already on a list. * * Requires the proc->inner_lock to be held. */ static void binder_enqueue_work_ilocked(struct binder_work *work, struct list_head *target_list) { BUG_ON(target_list == NULL); BUG_ON(work->entry.next && !list_empty(&work->entry)); list_add_tail(&work->entry, target_list); } /** * binder_enqueue_deferred_thread_work_ilocked() - Add deferred thread work * @thread: thread to queue work to * @work: struct binder_work to add to list * * Adds the work to the todo list of the thread. Doesn't set the process_todo * flag, which means that (if it wasn't already set) the thread will go to * sleep without handling this work when it calls read. * * Requires the proc->inner_lock to be held. */ static void binder_enqueue_deferred_thread_work_ilocked(struct binder_thread *thread, struct binder_work *work) { WARN_ON(!list_empty(&thread->waiting_thread_node)); binder_enqueue_work_ilocked(work, &thread->todo); } /** * binder_enqueue_thread_work_ilocked() - Add an item to the thread work list * @thread: thread to queue work to * @work: struct binder_work to add to list * * Adds the work to the todo list of the thread, and enables processing * of the todo queue. * * Requires the proc->inner_lock to be held. */ static void binder_enqueue_thread_work_ilocked(struct binder_thread *thread, struct binder_work *work) { WARN_ON(!list_empty(&thread->waiting_thread_node)); binder_enqueue_work_ilocked(work, &thread->todo); /* (e)poll-based threads require an explicit wakeup signal when * queuing their own work; they rely on these events to consume * messages without I/O block. Without it, threads risk waiting * indefinitely without handling the work. */ if (thread->looper & BINDER_LOOPER_STATE_POLL && thread->pid == current->pid && !thread->process_todo) wake_up_interruptible_sync(&thread->wait); thread->process_todo = true; } /** * binder_enqueue_thread_work() - Add an item to the thread work list * @thread: thread to queue work to * @work: struct binder_work to add to list * * Adds the work to the todo list of the thread, and enables processing * of the todo queue. */ static void binder_enqueue_thread_work(struct binder_thread *thread, struct binder_work *work) { binder_inner_proc_lock(thread->proc); binder_enqueue_thread_work_ilocked(thread, work); binder_inner_proc_unlock(thread->proc); } static void binder_dequeue_work_ilocked(struct binder_work *work) { list_del_init(&work->entry); } /** * binder_dequeue_work() - Removes an item from the work list * @proc: binder_proc associated with list * @work: struct binder_work to remove from list * * Removes the specified work item from whatever list it is on. * Can safely be called if work is not on any list. */ static void binder_dequeue_work(struct binder_proc *proc, struct binder_work *work) { binder_inner_proc_lock(proc); binder_dequeue_work_ilocked(work); binder_inner_proc_unlock(proc); } static struct binder_work *binder_dequeue_work_head_ilocked( struct list_head *list) { struct binder_work *w; w = list_first_entry_or_null(list, struct binder_work, entry); if (w) list_del_init(&w->entry); return w; } static void binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer); static void binder_free_thread(struct binder_thread *thread); static void binder_free_proc(struct binder_proc *proc); static void binder_inc_node_tmpref_ilocked(struct binder_node *node); static bool binder_has_work_ilocked(struct binder_thread *thread, bool do_proc_work) { return thread->process_todo || thread->looper_need_return || (do_proc_work && !binder_worklist_empty_ilocked(&thread->proc->todo)); } static bool binder_has_work(struct binder_thread *thread, bool do_proc_work) { bool has_work; binder_inner_proc_lock(thread->proc); has_work = binder_has_work_ilocked(thread, do_proc_work); binder_inner_proc_unlock(thread->proc); return has_work; } static bool binder_available_for_proc_work_ilocked(struct binder_thread *thread) { return !thread->transaction_stack && binder_worklist_empty_ilocked(&thread->todo); } static void binder_wakeup_poll_threads_ilocked(struct binder_proc *proc, bool sync) { struct rb_node *n; struct binder_thread *thread; for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) { thread = rb_entry(n, struct binder_thread, rb_node); if (thread->looper & BINDER_LOOPER_STATE_POLL && binder_available_for_proc_work_ilocked(thread)) { if (sync) wake_up_interruptible_sync(&thread->wait); else wake_up_interruptible(&thread->wait); } } } /** * binder_select_thread_ilocked() - selects a thread for doing proc work. * @proc: process to select a thread from * * Note that calling this function moves the thread off the waiting_threads * list, so it can only be woken up by the caller of this function, or a * signal. Therefore, callers *should* always wake up the thread this function * returns. * * Return: If there's a thread currently waiting for process work, * returns that thread. Otherwise returns NULL. */ static struct binder_thread * binder_select_thread_ilocked(struct binder_proc *proc) { struct binder_thread *thread; assert_spin_locked(&proc->inner_lock); thread = list_first_entry_or_null(&proc->waiting_threads, struct binder_thread, waiting_thread_node); if (thread) list_del_init(&thread->waiting_thread_node); return thread; } /** * binder_wakeup_thread_ilocked() - wakes up a thread for doing proc work. * @proc: process to wake up a thread in * @thread: specific thread to wake-up (may be NULL) * @sync: whether to do a synchronous wake-up * * This function wakes up a thread in the @proc process. * The caller may provide a specific thread to wake-up in * the @thread parameter. If @thread is NULL, this function * will wake up threads that have called poll(). * * Note that for this function to work as expected, callers * should first call binder_select_thread() to find a thread * to handle the work (if they don't have a thread already), * and pass the result into the @thread parameter. */ static void binder_wakeup_thread_ilocked(struct binder_proc *proc, struct binder_thread *thread, bool sync) { assert_spin_locked(&proc->inner_lock); if (thread) { if (sync) wake_up_interruptible_sync(&thread->wait); else wake_up_interruptible(&thread->wait); return; } /* Didn't find a thread waiting for proc work; this can happen * in two scenarios: * 1. All threads are busy handling transactions * In that case, one of those threads should call back into * the kernel driver soon and pick up this work. * 2. Threads are using the (e)poll interface, in which case * they may be blocked on the waitqueue without having been * added to waiting_threads. For this case, we just iterate * over all threads not handling transaction work, and * wake them all up. We wake all because we don't know whether * a thread that called into (e)poll is handling non-binder * work currently. */ binder_wakeup_poll_threads_ilocked(proc, sync); } static void binder_wakeup_proc_ilocked(struct binder_proc *proc) { struct binder_thread *thread = binder_select_thread_ilocked(proc); binder_wakeup_thread_ilocked(proc, thread, /* sync = */false); } static void binder_set_nice(long nice) { long min_nice; if (can_nice(current, nice)) { set_user_nice(current, nice); return; } min_nice = rlimit_to_nice(rlimit(RLIMIT_NICE)); binder_debug(BINDER_DEBUG_PRIORITY_CAP, "%d: nice value %ld not allowed use %ld instead\n", current->pid, nice, min_nice); set_user_nice(current, min_nice); if (min_nice <= MAX_NICE) return; binder_user_error("%d RLIMIT_NICE not set\n", current->pid); } static struct binder_node *binder_get_node_ilocked(struct binder_proc *proc, binder_uintptr_t ptr) { struct rb_node *n = proc->nodes.rb_node; struct binder_node *node; assert_spin_locked(&proc->inner_lock); while (n) { node = rb_entry(n, struct binder_node, rb_node); if (ptr < node->ptr) n = n->rb_left; else if (ptr > node->ptr) n = n->rb_right; else { /* * take an implicit weak reference * to ensure node stays alive until * call to binder_put_node() */ binder_inc_node_tmpref_ilocked(node); return node; } } return NULL; } static struct binder_node *binder_get_node(struct binder_proc *proc, binder_uintptr_t ptr) { struct binder_node *node; binder_inner_proc_lock(proc); node = binder_get_node_ilocked(proc, ptr); binder_inner_proc_unlock(proc); return node; } static struct binder_node *binder_init_node_ilocked( struct binder_proc *proc, struct binder_node *new_node, struct flat_binder_object *fp) { struct rb_node **p = &proc->nodes.rb_node; struct rb_node *parent = NULL; struct binder_node *node; binder_uintptr_t ptr = fp ? fp->binder : 0; binder_uintptr_t cookie = fp ? fp->cookie : 0; __u32 flags = fp ? fp->flags : 0; assert_spin_locked(&proc->inner_lock); while (*p) { parent = *p; node = rb_entry(parent, struct binder_node, rb_node); if (ptr < node->ptr) p = &(*p)->rb_left; else if (ptr > node->ptr) p = &(*p)->rb_right; else { /* * A matching node is already in * the rb tree. Abandon the init * and return it. */ binder_inc_node_tmpref_ilocked(node); return node; } } node = new_node; binder_stats_created(BINDER_STAT_NODE); node->tmp_refs++; rb_link_node(&node->rb_node, parent, p); rb_insert_color(&node->rb_node, &proc->nodes); node->debug_id = atomic_inc_return(&binder_last_id); node->proc = proc; node->ptr = ptr; node->cookie = cookie; node->work.type = BINDER_WORK_NODE; node->min_priority = flags & FLAT_BINDER_FLAG_PRIORITY_MASK; node->accept_fds = !!(flags & FLAT_BINDER_FLAG_ACCEPTS_FDS); node->txn_security_ctx = !!(flags & FLAT_BINDER_FLAG_TXN_SECURITY_CTX); spin_lock_init(&node->lock); INIT_LIST_HEAD(&node->work.entry); INIT_LIST_HEAD(&node->async_todo); binder_debug(BINDER_DEBUG_INTERNAL_REFS, "%d:%d node %d u%016llx c%016llx created\n", proc->pid, current->pid, node->debug_id, (u64)node->ptr, (u64)node->cookie); return node; } static struct binder_node *binder_new_node(struct binder_proc *proc, struct flat_binder_object *fp) { struct binder_node *node; struct binder_node *new_node = kzalloc(sizeof(*node), GFP_KERNEL); if (!new_node) return NULL; binder_inner_proc_lock(proc); node = binder_init_node_ilocked(proc, new_node, fp); binder_inner_proc_unlock(proc); if (node != new_node) /* * The node was already added by another thread */ kfree(new_node); return node; } static void binder_free_node(struct binder_node *node) { kfree(node); binder_stats_deleted(BINDER_STAT_NODE); } static int binder_inc_node_nilocked(struct binder_node *node, int strong, int internal, struct list_head *target_list) { struct binder_proc *proc = node->proc; assert_spin_locked(&node->lock); if (proc) assert_spin_locked(&proc->inner_lock); if (strong) { if (internal) { if (target_list == NULL && node->internal_strong_refs == 0 && !(node->proc && node == node->proc->context->binder_context_mgr_node && node->has_strong_ref)) { pr_err("invalid inc strong node for %d\n", node->debug_id); return -EINVAL; } node->internal_strong_refs++; } else node->local_strong_refs++; if (!node->has_strong_ref && target_list) { struct binder_thread *thread = container_of(target_list, struct binder_thread, todo); binder_dequeue_work_ilocked(&node->work); BUG_ON(&thread->todo != target_list); binder_enqueue_deferred_thread_work_ilocked(thread, &node->work); } } else { if (!internal) node->local_weak_refs++; if (!node->has_weak_ref && list_empty(&node->work.entry)) { if (target_list == NULL) { pr_err("invalid inc weak node for %d\n", node->debug_id); return -EINVAL; } /* * See comment above */ binder_enqueue_work_ilocked(&node->work, target_list); } } return 0; } static int binder_inc_node(struct binder_node *node, int strong, int internal, struct list_head *target_list) { int ret; binder_node_inner_lock(node); ret = binder_inc_node_nilocked(node, strong, internal, target_list); binder_node_inner_unlock(node); return ret; } static bool binder_dec_node_nilocked(struct binder_node *node, int strong, int internal) { struct binder_proc *proc = node->proc; assert_spin_locked(&node->lock); if (proc) assert_spin_locked(&proc->inner_lock); if (strong) { if (internal) node->internal_strong_refs--; else node->local_strong_refs--; if (node->local_strong_refs || node->internal_strong_refs) return false; } else { if (!internal) node->local_weak_refs--; if (node->local_weak_refs || node->tmp_refs || !hlist_empty(&node->refs)) return false; } if (proc && (node->has_strong_ref || node->has_weak_ref)) { if (list_empty(&node->work.entry)) { binder_enqueue_work_ilocked(&node->work, &proc->todo); binder_wakeup_proc_ilocked(proc); } } else { if (hlist_empty(&node->refs) && !node->local_strong_refs && !node->local_weak_refs && !node->tmp_refs) { if (proc) { binder_dequeue_work_ilocked(&node->work); rb_erase(&node->rb_node, &proc->nodes); binder_debug(BINDER_DEBUG_INTERNAL_REFS, "refless node %d deleted\n", node->debug_id); } else { BUG_ON(!list_empty(&node->work.entry)); spin_lock(&binder_dead_nodes_lock); /* * tmp_refs could have changed so * check it again */ if (node->tmp_refs) { spin_unlock(&binder_dead_nodes_lock); return false; } hlist_del(&node->dead_node); spin_unlock(&binder_dead_nodes_lock); binder_debug(BINDER_DEBUG_INTERNAL_REFS, "dead node %d deleted\n", node->debug_id); } return true; } } return false; } static void binder_dec_node(struct binder_node *node, int strong, int internal) { bool free_node; binder_node_inner_lock(node); free_node = binder_dec_node_nilocked(node, strong, internal); binder_node_inner_unlock(node); if (free_node) binder_free_node(node); } static void binder_inc_node_tmpref_ilocked(struct binder_node *node) { /* * No call to binder_inc_node() is needed since we * don't need to inform userspace of any changes to * tmp_refs */ node->tmp_refs++; } /** * binder_inc_node_tmpref() - take a temporary reference on node * @node: node to reference * * Take reference on node to prevent the node from being freed * while referenced only by a local variable. The inner lock is * needed to serialize with the node work on the queue (which * isn't needed after the node is dead). If the node is dead * (node->proc is NULL), use binder_dead_nodes_lock to protect * node->tmp_refs against dead-node-only cases where the node * lock cannot be acquired (eg traversing the dead node list to * print nodes) */ static void binder_inc_node_tmpref(struct binder_node *node) { binder_node_lock(node); if (node->proc) binder_inner_proc_lock(node->proc); else spin_lock(&binder_dead_nodes_lock); binder_inc_node_tmpref_ilocked(node); if (node->proc) binder_inner_proc_unlock(node->proc); else spin_unlock(&binder_dead_nodes_lock); binder_node_unlock(node); } /** * binder_dec_node_tmpref() - remove a temporary reference on node * @node: node to reference * * Release temporary reference on node taken via binder_inc_node_tmpref() */ static void binder_dec_node_tmpref(struct binder_node *node) { bool free_node; binder_node_inner_lock(node); if (!node->proc) spin_lock(&binder_dead_nodes_lock); else __acquire(&binder_dead_nodes_lock); node->tmp_refs--; BUG_ON(node->tmp_refs < 0); if (!node->proc) spin_unlock(&binder_dead_nodes_lock); else __release(&binder_dead_nodes_lock); /* * Call binder_dec_node() to check if all refcounts are 0 * and cleanup is needed. Calling with strong=0 and internal=1 * causes no actual reference to be released in binder_dec_node(). * If that changes, a change is needed here too. */ free_node = binder_dec_node_nilocked(node, 0, 1); binder_node_inner_unlock(node); if (free_node) binder_free_node(node); } static void binder_put_node(struct binder_node *node) { binder_dec_node_tmpref(node); } static struct binder_ref *binder_get_ref_olocked(struct binder_proc *proc, u32 desc, bool need_strong_ref) { struct rb_node *n = proc->refs_by_desc.rb_node; struct binder_ref *ref; while (n) { ref = rb_entry(n, struct binder_ref, rb_node_desc); if (desc < ref->data.desc) { n = n->rb_left; } else if (desc > ref->data.desc) { n = n->rb_right; } else if (need_strong_ref && !ref->data.strong) { binder_user_error("tried to use weak ref as strong ref\n"); return NULL; } else { return ref; } } return NULL; } /* Find the smallest unused descriptor the "slow way" */ static u32 slow_desc_lookup_olocked(struct binder_proc *proc, u32 offset) { struct binder_ref *ref; struct rb_node *n; u32 desc; desc = offset; for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n)) { ref = rb_entry(n, struct binder_ref, rb_node_desc); if (ref->data.desc > desc) break; desc = ref->data.desc + 1; } return desc; } /* * Find an available reference descriptor ID. The proc->outer_lock might * be released in the process, in which case -EAGAIN is returned and the * @desc should be considered invalid. */ static int get_ref_desc_olocked(struct binder_proc *proc, struct binder_node *node, u32 *desc) { struct dbitmap *dmap = &proc->dmap; unsigned int nbits, offset; unsigned long *new, bit; /* 0 is reserved for the context manager */ offset = (node == proc->context->binder_context_mgr_node) ? 0 : 1; if (!dbitmap_enabled(dmap)) { *desc = slow_desc_lookup_olocked(proc, offset); return 0; } if (dbitmap_acquire_next_zero_bit(dmap, offset, &bit) == 0) { *desc = bit; return 0; } /* * The dbitmap is full and needs to grow. The proc->outer_lock * is briefly released to allocate the new bitmap safely. */ nbits = dbitmap_grow_nbits(dmap); binder_proc_unlock(proc); new = bitmap_zalloc(nbits, GFP_KERNEL); binder_proc_lock(proc); dbitmap_grow(dmap, new, nbits); return -EAGAIN; } /** * binder_get_ref_for_node_olocked() - get the ref associated with given node * @proc: binder_proc that owns the ref * @node: binder_node of target * @new_ref: newly allocated binder_ref to be initialized or %NULL * * Look up the ref for the given node and return it if it exists * * If it doesn't exist and the caller provides a newly allocated * ref, initialize the fields of the newly allocated ref and insert * into the given proc rb_trees and node refs list. * * Return: the ref for node. It is possible that another thread * allocated/initialized the ref first in which case the * returned ref would be different than the passed-in * new_ref. new_ref must be kfree'd by the caller in * this case. */ static struct binder_ref *binder_get_ref_for_node_olocked( struct binder_proc *proc, struct binder_node *node, struct binder_ref *new_ref) { struct binder_ref *ref; struct rb_node *parent; struct rb_node **p; u32 desc; retry: p = &proc->refs_by_node.rb_node; parent = NULL; while (*p) { parent = *p; ref = rb_entry(parent, struct binder_ref, rb_node_node); if (node < ref->node) p = &(*p)->rb_left; else if (node > ref->node) p = &(*p)->rb_right; else return ref; } if (!new_ref) return NULL; /* might release the proc->outer_lock */ if (get_ref_desc_olocked(proc, node, &desc) == -EAGAIN) goto retry; binder_stats_created(BINDER_STAT_REF); new_ref->data.debug_id = atomic_inc_return(&binder_last_id); new_ref->proc = proc; new_ref->node = node; rb_link_node(&new_ref->rb_node_node, parent, p); rb_insert_color(&new_ref->rb_node_node, &proc->refs_by_node); new_ref->data.desc = desc; p = &proc->refs_by_desc.rb_node; while (*p) { parent = *p; ref = rb_entry(parent, struct binder_ref, rb_node_desc); if (new_ref->data.desc < ref->data.desc) p = &(*p)->rb_left; else if (new_ref->data.desc > ref->data.desc) p = &(*p)->rb_right; else BUG(); } rb_link_node(&new_ref->rb_node_desc, parent, p); rb_insert_color(&new_ref->rb_node_desc, &proc->refs_by_desc); binder_node_lock(node); hlist_add_head(&new_ref->node_entry, &node->refs); binder_debug(BINDER_DEBUG_INTERNAL_REFS, "%d new ref %d desc %d for node %d\n", proc->pid, new_ref->data.debug_id, new_ref->data.desc, node->debug_id); binder_node_unlock(node); return new_ref; } static void binder_cleanup_ref_olocked(struct binder_ref *ref) { struct dbitmap *dmap = &ref->proc->dmap; bool delete_node = false; binder_debug(BINDER_DEBUG_INTERNAL_REFS, "%d delete ref %d desc %d for node %d\n", ref->proc->pid, ref->data.debug_id, ref->data.desc, ref->node->debug_id); if (dbitmap_enabled(dmap)) dbitmap_clear_bit(dmap, ref->data.desc); rb_erase(&ref->rb_node_desc, &ref->proc->refs_by_desc); rb_erase(&ref->rb_node_node, &ref->proc->refs_by_node); binder_node_inner_lock(ref->node); if (ref->data.strong) binder_dec_node_nilocked(ref->node, 1, 1); hlist_del(&ref->node_entry); delete_node = binder_dec_node_nilocked(ref->node, 0, 1); binder_node_inner_unlock(ref->node); /* * Clear ref->node unless we want the caller to free the node */ if (!delete_node) { /* * The caller uses ref->node to determine * whether the node needs to be freed. Clear * it since the node is still alive. */ ref->node = NULL; } if (ref->death) { binder_debug(BINDER_DEBUG_DEAD_BINDER, "%d delete ref %d desc %d has death notification\n", ref->proc->pid, ref->data.debug_id, ref->data.desc); binder_dequeue_work(ref->proc, &ref->death->work); binder_stats_deleted(BINDER_STAT_DEATH); } if (ref->freeze) { binder_dequeue_work(ref->proc, &ref->freeze->work); binder_stats_deleted(BINDER_STAT_FREEZE); } binder_stats_deleted(BINDER_STAT_REF); } /** * binder_inc_ref_olocked() - increment the ref for given handle * @ref: ref to be incremented * @strong: if true, strong increment, else weak * @target_list: list to queue node work on * * Increment the ref. @ref->proc->outer_lock must be held on entry * * Return: 0, if successful, else errno */ static int binder_inc_ref_olocked(struct binder_ref *ref, int strong, struct list_head *target_list) { int ret; if (strong) { if (ref->data.strong == 0) { ret = binder_inc_node(ref->node, 1, 1, target_list); if (ret) return ret; } ref->data.strong++; } else { if (ref->data.weak == 0) { ret = binder_inc_node(ref->node, 0, 1, target_list); if (ret) return ret; } ref->data.weak++; } return 0; } /** * binder_dec_ref_olocked() - dec the ref for given handle * @ref: ref to be decremented * @strong: if true, strong decrement, else weak * * Decrement the ref. * * Return: %true if ref is cleaned up and ready to be freed. */ static bool binder_dec_ref_olocked(struct binder_ref *ref, int strong) { if (strong) { if (ref->data.strong == 0) { binder_user_error("%d invalid dec strong, ref %d desc %d s %d w %d\n", ref->proc->pid, ref->data.debug_id, ref->data.desc, ref->data.strong, ref->data.weak); return false; } ref->data.strong--; if (ref->data.strong == 0) binder_dec_node(ref->node, strong, 1); } else { if (ref->data.weak == 0) { binder_user_error("%d invalid dec weak, ref %d desc %d s %d w %d\n", ref->proc->pid, ref->data.debug_id, ref->data.desc, ref->data.strong, ref->data.weak); return false; } ref->data.weak--; } if (ref->data.strong == 0 && ref->data.weak == 0) { binder_cleanup_ref_olocked(ref); return true; } return false; } /** * binder_get_node_from_ref() - get the node from the given proc/desc * @proc: proc containing the ref * @desc: the handle associated with the ref * @need_strong_ref: if true, only return node if ref is strong * @rdata: the id/refcount data for the ref * * Given a proc and ref handle, return the associated binder_node * * Return: a binder_node or NULL if not found or not strong when strong required */ static struct binder_node *binder_get_node_from_ref( struct binder_proc *proc, u32 desc, bool need_strong_ref, struct binder_ref_data *rdata) { struct binder_node *node; struct binder_ref *ref; binder_proc_lock(proc); ref = binder_get_ref_olocked(proc, desc, need_strong_ref); if (!ref) goto err_no_ref; node = ref->node; /* * Take an implicit reference on the node to ensure * it stays alive until the call to binder_put_node() */ binder_inc_node_tmpref(node); if (rdata) *rdata = ref->data; binder_proc_unlock(proc); return node; err_no_ref: binder_proc_unlock(proc); return NULL; } /** * binder_free_ref() - free the binder_ref * @ref: ref to free * * Free the binder_ref. Free the binder_node indicated by ref->node * (if non-NULL) and the binder_ref_death indicated by ref->death. */ static void binder_free_ref(struct binder_ref *ref) { if (ref->node) binder_free_node(ref->node); kfree(ref->death); kfree(ref->freeze); kfree(ref); } /* shrink descriptor bitmap if needed */ static void try_shrink_dmap(struct binder_proc *proc) { unsigned long *new; int nbits; binder_proc_lock(proc); nbits = dbitmap_shrink_nbits(&proc->dmap); binder_proc_unlock(proc); if (!nbits) return; new = bitmap_zalloc(nbits, GFP_KERNEL); binder_proc_lock(proc); dbitmap_shrink(&proc->dmap, new, nbits); binder_proc_unlock(proc); } /** * binder_update_ref_for_handle() - inc/dec the ref for given handle * @proc: proc containing the ref * @desc: the handle associated with the ref * @increment: true=inc reference, false=dec reference * @strong: true=strong reference, false=weak reference * @rdata: the id/refcount data for the ref * * Given a proc and ref handle, increment or decrement the ref * according to "increment" arg. * * Return: 0 if successful, else errno */ static int binder_update_ref_for_handle(struct binder_proc *proc, uint32_t desc, bool increment, bool strong, struct binder_ref_data *rdata) { int ret = 0; struct binder_ref *ref; bool delete_ref = false; binder_proc_lock(proc); ref = binder_get_ref_olocked(proc, desc, strong); if (!ref) { ret = -EINVAL; goto err_no_ref; } if (increment) ret = binder_inc_ref_olocked(ref, strong, NULL); else delete_ref = binder_dec_ref_olocked(ref, strong); if (rdata) *rdata = ref->data; binder_proc_unlock(proc); if (delete_ref) { binder_free_ref(ref); try_shrink_dmap(proc); } return ret; err_no_ref: binder_proc_unlock(proc); return ret; } /** * binder_dec_ref_for_handle() - dec the ref for given handle * @proc: proc containing the ref * @desc: the handle associated with the ref * @strong: true=strong reference, false=weak reference * @rdata: the id/refcount data for the ref * * Just calls binder_update_ref_for_handle() to decrement the ref. * * Return: 0 if successful, else errno */ static int binder_dec_ref_for_handle(struct binder_proc *proc, uint32_t desc, bool strong, struct binder_ref_data *rdata) { return binder_update_ref_for_handle(proc, desc, false, strong, rdata); } /** * binder_inc_ref_for_node() - increment the ref for given proc/node * @proc: proc containing the ref * @node: target node * @strong: true=strong reference, false=weak reference * @target_list: worklist to use if node is incremented * @rdata: the id/refcount data for the ref * * Given a proc and node, increment the ref. Create the ref if it * doesn't already exist * * Return: 0 if successful, else errno */ static int binder_inc_ref_for_node(struct binder_proc *proc, struct binder_node *node, bool strong, struct list_head *target_list, struct binder_ref_data *rdata) { struct binder_ref *ref; struct binder_ref *new_ref = NULL; int ret = 0; binder_proc_lock(proc); ref = binder_get_ref_for_node_olocked(proc, node, NULL); if (!ref) { binder_proc_unlock(proc); new_ref = kzalloc(sizeof(*ref), GFP_KERNEL); if (!new_ref) return -ENOMEM; binder_proc_lock(proc); ref = binder_get_ref_for_node_olocked(proc, node, new_ref); } ret = binder_inc_ref_olocked(ref, strong, target_list); *rdata = ref->data; if (ret && ref == new_ref) { /* * Cleanup the failed reference here as the target * could now be dead and have already released its * references by now. Calling on the new reference * with strong=0 and a tmp_refs will not decrement * the node. The new_ref gets kfree'd below. */ binder_cleanup_ref_olocked(new_ref); ref = NULL; } binder_proc_unlock(proc); if (new_ref && ref != new_ref) /* * Another thread created the ref first so * free the one we allocated */ kfree(new_ref); return ret; } static void binder_pop_transaction_ilocked(struct binder_thread *target_thread, struct binder_transaction *t) { BUG_ON(!target_thread); assert_spin_locked(&target_thread->proc->inner_lock); BUG_ON(target_thread->transaction_stack != t); BUG_ON(target_thread->transaction_stack->from != target_thread); target_thread->transaction_stack = target_thread->transaction_stack->from_parent; t->from = NULL; } /** * binder_thread_dec_tmpref() - decrement thread->tmp_ref * @thread: thread to decrement * * A thread needs to be kept alive while being used to create or * handle a transaction. binder_get_txn_from() is used to safely * extract t->from from a binder_transaction and keep the thread * indicated by t->from from being freed. When done with that * binder_thread, this function is called to decrement the * tmp_ref and free if appropriate (thread has been released * and no transaction being processed by the driver) */ static void binder_thread_dec_tmpref(struct binder_thread *thread) { /* * atomic is used to protect the counter value while * it cannot reach zero or thread->is_dead is false */ binder_inner_proc_lock(thread->proc); atomic_dec(&thread->tmp_ref); if (thread->is_dead && !atomic_read(&thread->tmp_ref)) { binder_inner_proc_unlock(thread->proc); binder_free_thread(thread); return; } binder_inner_proc_unlock(thread->proc); } /** * binder_proc_dec_tmpref() - decrement proc->tmp_ref * @proc: proc to decrement * * A binder_proc needs to be kept alive while being used to create or * handle a transaction. proc->tmp_ref is incremented when * creating a new transaction or the binder_proc is currently in-use * by threads that are being released. When done with the binder_proc, * this function is called to decrement the counter and free the * proc if appropriate (proc has been released, all threads have * been released and not currently in-use to process a transaction). */ static void binder_proc_dec_tmpref(struct binder_proc *proc) { binder_inner_proc_lock(proc); proc->tmp_ref--; if (proc->is_dead && RB_EMPTY_ROOT(&proc->threads) && !proc->tmp_ref) { binder_inner_proc_unlock(proc); binder_free_proc(proc); return; } binder_inner_proc_unlock(proc); } /** * binder_get_txn_from() - safely extract the "from" thread in transaction * @t: binder transaction for t->from * * Atomically return the "from" thread and increment the tmp_ref * count for the thread to ensure it stays alive until * binder_thread_dec_tmpref() is called. * * Return: the value of t->from */ static struct binder_thread *binder_get_txn_from( struct binder_transaction *t) { struct binder_thread *from; guard(spinlock)(&t->lock); from = t->from; if (from) atomic_inc(&from->tmp_ref); return from; } /** * binder_get_txn_from_and_acq_inner() - get t->from and acquire inner lock * @t: binder transaction for t->from * * Same as binder_get_txn_from() except it also acquires the proc->inner_lock * to guarantee that the thread cannot be released while operating on it. * The caller must call binder_inner_proc_unlock() to release the inner lock * as well as call binder_dec_thread_txn() to release the reference. * * Return: the value of t->from */ static struct binder_thread *binder_get_txn_from_and_acq_inner( struct binder_transaction *t) __acquires(&t->from->proc->inner_lock) { struct binder_thread *from; from = binder_get_txn_from(t); if (!from) { __acquire(&from->proc->inner_lock); return NULL; } binder_inner_proc_lock(from->proc); if (t->from) { BUG_ON(from != t->from); return from; } binder_inner_proc_unlock(from->proc); __acquire(&from->proc->inner_lock); binder_thread_dec_tmpref(from); return NULL; } /** * binder_free_txn_fixups() - free unprocessed fd fixups * @t: binder transaction for t->from * * If the transaction is being torn down prior to being * processed by the target process, free all of the * fd fixups and fput the file structs. It is safe to * call this function after the fixups have been * processed -- in that case, the list will be empty. */ static void binder_free_txn_fixups(struct binder_transaction *t) { struct binder_txn_fd_fixup *fixup, *tmp; list_for_each_entry_safe(fixup, tmp, &t->fd_fixups, fixup_entry) { fput(fixup->file); if (fixup->target_fd >= 0) put_unused_fd(fixup->target_fd); list_del(&fixup->fixup_entry); kfree(fixup); } } static void binder_txn_latency_free(struct binder_transaction *t) { int from_proc, from_thread, to_proc, to_thread; spin_lock(&t->lock); from_proc = t->from ? t->from->proc->pid : 0; from_thread = t->from ? t->from->pid : 0; to_proc = t->to_proc ? t->to_proc->pid : 0; to_thread = t->to_thread ? t->to_thread->pid : 0; spin_unlock(&t->lock); trace_binder_txn_latency_free(t, from_proc, from_thread, to_proc, to_thread); } static void binder_free_transaction(struct binder_transaction *t) { struct binder_proc *target_proc = t->to_proc; if (target_proc) { binder_inner_proc_lock(target_proc); target_proc->outstanding_txns--; if (target_proc->outstanding_txns < 0) pr_warn("%s: Unexpected outstanding_txns %d\n", __func__, target_proc->outstanding_txns); if (!target_proc->outstanding_txns && target_proc->is_frozen) wake_up_interruptible_all(&target_proc->freeze_wait); if (t->buffer) t->buffer->transaction = NULL; binder_inner_proc_unlock(target_proc); } if (trace_binder_txn_latency_free_enabled()) binder_txn_latency_free(t); /* * If the transaction has no target_proc, then * t->buffer->transaction has already been cleared. */ binder_free_txn_fixups(t); kfree(t); binder_stats_deleted(BINDER_STAT_TRANSACTION); } static void binder_send_failed_reply(struct binder_transaction *t, uint32_t error_code) { struct binder_thread *target_thread; struct binder_transaction *next; BUG_ON(t->flags & TF_ONE_WAY); while (1) { target_thread = binder_get_txn_from_and_acq_inner(t); if (target_thread) { binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "send failed reply for transaction %d to %d:%d\n", t->debug_id, target_thread->proc->pid, target_thread->pid); binder_pop_transaction_ilocked(target_thread, t); if (target_thread->reply_error.cmd == BR_OK) { target_thread->reply_error.cmd = error_code; binder_enqueue_thread_work_ilocked( target_thread, &target_thread->reply_error.work); wake_up_interruptible(&target_thread->wait); } else { /* * Cannot get here for normal operation, but * we can if multiple synchronous transactions * are sent without blocking for responses. * Just ignore the 2nd error in this case. */ pr_warn("Unexpected reply error: %u\n", target_thread->reply_error.cmd); } binder_inner_proc_unlock(target_thread->proc); binder_thread_dec_tmpref(target_thread); binder_free_transaction(t); return; } __release(&target_thread->proc->inner_lock); next = t->from_parent; binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "send failed reply for transaction %d, target dead\n", t->debug_id); binder_free_transaction(t); if (next == NULL) { binder_debug(BINDER_DEBUG_DEAD_BINDER, "reply failed, no target thread at root\n"); return; } t = next; binder_debug(BINDER_DEBUG_DEAD_BINDER, "reply failed, no target thread -- retry %d\n", t->debug_id); } } /** * binder_cleanup_transaction() - cleans up undelivered transaction * @t: transaction that needs to be cleaned up * @reason: reason the transaction wasn't delivered * @error_code: error to return to caller (if synchronous call) */ static void binder_cleanup_transaction(struct binder_transaction *t, const char *reason, uint32_t error_code) { if (t->buffer->target_node && !(t->flags & TF_ONE_WAY)) { binder_send_failed_reply(t, error_code); } else { binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "undelivered transaction %d, %s\n", t->debug_id, reason); binder_free_transaction(t); } } /** * binder_get_object() - gets object and checks for valid metadata * @proc: binder_proc owning the buffer * @u: sender's user pointer to base of buffer * @buffer: binder_buffer that we're parsing. * @offset: offset in the @buffer at which to validate an object. * @object: struct binder_object to read into * * Copy the binder object at the given offset into @object. If @u is * provided then the copy is from the sender's buffer. If not, then * it is copied from the target's @buffer. * * Return: If there's a valid metadata object at @offset, the * size of that object. Otherwise, it returns zero. The object * is read into the struct binder_object pointed to by @object. */ static size_t binder_get_object(struct binder_proc *proc, const void __user *u, struct binder_buffer *buffer, unsigned long offset, struct binder_object *object) { size_t read_size; struct binder_object_header *hdr; size_t object_size = 0; read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset); if (offset > buffer->data_size || read_size < sizeof(*hdr) || !IS_ALIGNED(offset, sizeof(u32))) return 0; if (u) { if (copy_from_user(object, u + offset, read_size)) return 0; } else { if (binder_alloc_copy_from_buffer(&proc->alloc, object, buffer, offset, read_size)) return 0; } /* Ok, now see if we read a complete object. */ hdr = &object->hdr; switch (hdr->type) { case BINDER_TYPE_BINDER: case BINDER_TYPE_WEAK_BINDER: case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: object_size = sizeof(struct flat_binder_object); break; case BINDER_TYPE_FD: object_size = sizeof(struct binder_fd_object); break; case BINDER_TYPE_PTR: object_size = sizeof(struct binder_buffer_object); break; case BINDER_TYPE_FDA: object_size = sizeof(struct binder_fd_array_object); break; default: return 0; } if (offset <= buffer->data_size - object_size && buffer->data_size >= object_size) return object_size; else return 0; } /** * binder_validate_ptr() - validates binder_buffer_object in a binder_buffer. * @proc: binder_proc owning the buffer * @b: binder_buffer containing the object * @object: struct binder_object to read into * @index: index in offset array at which the binder_buffer_object is * located * @start_offset: points to the start of the offset array * @object_offsetp: offset of @object read from @b * @num_valid: the number of valid offsets in the offset array * * Return: If @index is within the valid range of the offset array * described by @start and @num_valid, and if there's a valid * binder_buffer_object at the offset found in index @index * of the offset array, that object is returned. Otherwise, * %NULL is returned. * Note that the offset found in index @index itself is not * verified; this function assumes that @num_valid elements * from @start were previously verified to have valid offsets. * If @object_offsetp is non-NULL, then the offset within * @b is written to it. */ static struct binder_buffer_object *binder_validate_ptr( struct binder_proc *proc, struct binder_buffer *b, struct binder_object *object, binder_size_t index, binder_size_t start_offset, binder_size_t *object_offsetp, binder_size_t num_valid) { size_t object_size; binder_size_t object_offset; unsigned long buffer_offset; if (index >= num_valid) return NULL; buffer_offset = start_offset + sizeof(binder_size_t) * index; if (binder_alloc_copy_from_buffer(&proc->alloc, &object_offset, b, buffer_offset, sizeof(object_offset))) return NULL; object_size = binder_get_object(proc, NULL, b, object_offset, object); if (!object_size || object->hdr.type != BINDER_TYPE_PTR) return NULL; if (object_offsetp) *object_offsetp = object_offset; return &object->bbo; } /** * binder_validate_fixup() - validates pointer/fd fixups happen in order. * @proc: binder_proc owning the buffer * @b: transaction buffer * @objects_start_offset: offset to start of objects buffer * @buffer_obj_offset: offset to binder_buffer_object in which to fix up * @fixup_offset: start offset in @buffer to fix up * @last_obj_offset: offset to last binder_buffer_object that we fixed * @last_min_offset: minimum fixup offset in object at @last_obj_offset * * Return: %true if a fixup in buffer @buffer at offset @offset is * allowed. * * For safety reasons, we only allow fixups inside a buffer to happen * at increasing offsets; additionally, we only allow fixup on the last * buffer object that was verified, or one of its parents. * * Example of what is allowed: * * A * B (parent = A, offset = 0) * C (parent = A, offset = 16) * D (parent = C, offset = 0) * E (parent = A, offset = 32) // min_offset is 16 (C.parent_offset) * * Examples of what is not allowed: * * Decreasing offsets within the same parent: * A * C (parent = A, offset = 16) * B (parent = A, offset = 0) // decreasing offset within A * * Referring to a parent that wasn't the last object or any of its parents: * A * B (parent = A, offset = 0) * C (parent = A, offset = 0) * C (parent = A, offset = 16) * D (parent = B, offset = 0) // B is not A or any of A's parents */ static bool binder_validate_fixup(struct binder_proc *proc, struct binder_buffer *b, binder_size_t objects_start_offset, binder_size_t buffer_obj_offset, binder_size_t fixup_offset, binder_size_t last_obj_offset, binder_size_t last_min_offset) { if (!last_obj_offset) { /* Nothing to fix up in */ return false; } while (last_obj_offset != buffer_obj_offset) { unsigned long buffer_offset; struct binder_object last_object; struct binder_buffer_object *last_bbo; size_t object_size = binder_get_object(proc, NULL, b, last_obj_offset, &last_object); if (object_size != sizeof(*last_bbo)) return false; last_bbo = &last_object.bbo; /* * Safe to retrieve the parent of last_obj, since it * was already previously verified by the driver. */ if ((last_bbo->flags & BINDER_BUFFER_FLAG_HAS_PARENT) == 0) return false; last_min_offset = last_bbo->parent_offset + sizeof(uintptr_t); buffer_offset = objects_start_offset + sizeof(binder_size_t) * last_bbo->parent; if (binder_alloc_copy_from_buffer(&proc->alloc, &last_obj_offset, b, buffer_offset, sizeof(last_obj_offset))) return false; } return (fixup_offset >= last_min_offset); } /** * struct binder_task_work_cb - for deferred close * * @twork: callback_head for task work * @file: file to close * * Structure to pass task work to be handled after * returning from binder_ioctl() via task_work_add(). */ struct binder_task_work_cb { struct callback_head twork; struct file *file; }; /** * binder_do_fd_close() - close list of file descriptors * @twork: callback head for task work * * It is not safe to call ksys_close() during the binder_ioctl() * function if there is a chance that binder's own file descriptor * might be closed. This is to meet the requirements for using * fdget() (see comments for __fget_light()). Therefore use * task_work_add() to schedule the close operation once we have * returned from binder_ioctl(). This function is a callback * for that mechanism and does the actual ksys_close() on the * given file descriptor. */ static void binder_do_fd_close(struct callback_head *twork) { struct binder_task_work_cb *twcb = container_of(twork, struct binder_task_work_cb, twork); fput(twcb->file); kfree(twcb); } /** * binder_deferred_fd_close() - schedule a close for the given file-descriptor * @fd: file-descriptor to close * * See comments in binder_do_fd_close(). This function is used to schedule * a file-descriptor to be closed after returning from binder_ioctl(). */ static void binder_deferred_fd_close(int fd) { struct binder_task_work_cb *twcb; twcb = kzalloc(sizeof(*twcb), GFP_KERNEL); if (!twcb) return; init_task_work(&twcb->twork, binder_do_fd_close); twcb->file = file_close_fd(fd); if (twcb->file) { // pin it until binder_do_fd_close(); see comments there get_file(twcb->file); filp_close(twcb->file, current->files); task_work_add(current, &twcb->twork, TWA_RESUME); } else { kfree(twcb); } } static void binder_transaction_buffer_release(struct binder_proc *proc, struct binder_thread *thread, struct binder_buffer *buffer, binder_size_t off_end_offset, bool is_failure) { int debug_id = buffer->debug_id; binder_size_t off_start_offset, buffer_offset; binder_debug(BINDER_DEBUG_TRANSACTION, "%d buffer release %d, size %zd-%zd, failed at %llx\n", proc->pid, buffer->debug_id, buffer->data_size, buffer->offsets_size, (unsigned long long)off_end_offset); if (buffer->target_node) binder_dec_node(buffer->target_node, 1, 0); off_start_offset = ALIGN(buffer->data_size, sizeof(void *)); for (buffer_offset = off_start_offset; buffer_offset < off_end_offset; buffer_offset += sizeof(binder_size_t)) { struct binder_object_header *hdr; size_t object_size = 0; struct binder_object object; binder_size_t object_offset; if (!binder_alloc_copy_from_buffer(&proc->alloc, &object_offset, buffer, buffer_offset, sizeof(object_offset))) object_size = binder_get_object(proc, NULL, buffer, object_offset, &object); if (object_size == 0) { pr_err("transaction release %d bad object at offset %lld, size %zd\n", debug_id, (u64)object_offset, buffer->data_size); continue; } hdr = &object.hdr; switch (hdr->type) { case BINDER_TYPE_BINDER: case BINDER_TYPE_WEAK_BINDER: { struct flat_binder_object *fp; struct binder_node *node; fp = to_flat_binder_object(hdr); node = binder_get_node(proc, fp->binder); if (node == NULL) { pr_err("transaction release %d bad node %016llx\n", debug_id, (u64)fp->binder); break; } binder_debug(BINDER_DEBUG_TRANSACTION, " node %d u%016llx\n", node->debug_id, (u64)node->ptr); binder_dec_node(node, hdr->type == BINDER_TYPE_BINDER, 0); binder_put_node(node); } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { struct flat_binder_object *fp; struct binder_ref_data rdata; int ret; fp = to_flat_binder_object(hdr); ret = binder_dec_ref_for_handle(proc, fp->handle, hdr->type == BINDER_TYPE_HANDLE, &rdata); if (ret) { pr_err("transaction release %d bad handle %d, ret = %d\n", debug_id, fp->handle, ret); break; } binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d\n", rdata.debug_id, rdata.desc); } break; case BINDER_TYPE_FD: { /* * No need to close the file here since user-space * closes it for successfully delivered * transactions. For transactions that weren't * delivered, the new fd was never allocated so * there is no need to close and the fput on the * file is done when the transaction is torn * down. */ } break; case BINDER_TYPE_PTR: /* * Nothing to do here, this will get cleaned up when the * transaction buffer gets freed */ break; case BINDER_TYPE_FDA: { struct binder_fd_array_object *fda; struct binder_buffer_object *parent; struct binder_object ptr_object; binder_size_t fda_offset; size_t fd_index; binder_size_t fd_buf_size; binder_size_t num_valid; if (is_failure) { /* * The fd fixups have not been applied so no * fds need to be closed. */ continue; } num_valid = (buffer_offset - off_start_offset) / sizeof(binder_size_t); fda = to_binder_fd_array_object(hdr); parent = binder_validate_ptr(proc, buffer, &ptr_object, fda->parent, off_start_offset, NULL, num_valid); if (!parent) { pr_err("transaction release %d bad parent offset\n", debug_id); continue; } fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { pr_err("transaction release %d invalid number of fds (%lld)\n", debug_id, (u64)fda->num_fds); continue; } if (fd_buf_size > parent->length || fda->parent_offset > parent->length - fd_buf_size) { /* No space for all file descriptors here. */ pr_err("transaction release %d not enough space for %lld fds in buffer\n", debug_id, (u64)fda->num_fds); continue; } /* * the source data for binder_buffer_object is visible * to user-space and the @buffer element is the user * pointer to the buffer_object containing the fd_array. * Convert the address to an offset relative to * the base of the transaction buffer. */ fda_offset = parent->buffer - buffer->user_data + fda->parent_offset; for (fd_index = 0; fd_index < fda->num_fds; fd_index++) { u32 fd; int err; binder_size_t offset = fda_offset + fd_index * sizeof(fd); err = binder_alloc_copy_from_buffer( &proc->alloc, &fd, buffer, offset, sizeof(fd)); WARN_ON(err); if (!err) { binder_deferred_fd_close(fd); /* * Need to make sure the thread goes * back to userspace to complete the * deferred close */ if (thread) thread->looper_need_return = true; } } } break; default: pr_err("transaction release %d bad object type %x\n", debug_id, hdr->type); break; } } } /* Clean up all the objects in the buffer */ static inline void binder_release_entire_buffer(struct binder_proc *proc, struct binder_thread *thread, struct binder_buffer *buffer, bool is_failure) { binder_size_t off_end_offset; off_end_offset = ALIGN(buffer->data_size, sizeof(void *)); off_end_offset += buffer->offsets_size; binder_transaction_buffer_release(proc, thread, buffer, off_end_offset, is_failure); } static int binder_translate_binder(struct flat_binder_object *fp, struct binder_transaction *t, struct binder_thread *thread) { struct binder_node *node; struct binder_proc *proc = thread->proc; struct binder_proc *target_proc = t->to_proc; struct binder_ref_data rdata; int ret = 0; node = binder_get_node(proc, fp->binder); if (!node) { node = binder_new_node(proc, fp); if (!node) return -ENOMEM; } if (fp->cookie != node->cookie) { binder_user_error("%d:%d sending u%016llx node %d, cookie mismatch %016llx != %016llx\n", proc->pid, thread->pid, (u64)fp->binder, node->debug_id, (u64)fp->cookie, (u64)node->cookie); ret = -EINVAL; goto done; } if (security_binder_transfer_binder(proc->cred, target_proc->cred)) { ret = -EPERM; goto done; } ret = binder_inc_ref_for_node(target_proc, node, fp->hdr.type == BINDER_TYPE_BINDER, &thread->todo, &rdata); if (ret) goto done; if (fp->hdr.type == BINDER_TYPE_BINDER) fp->hdr.type = BINDER_TYPE_HANDLE; else fp->hdr.type = BINDER_TYPE_WEAK_HANDLE; fp->binder = 0; fp->handle = rdata.desc; fp->cookie = 0; trace_binder_transaction_node_to_ref(t, node, &rdata); binder_debug(BINDER_DEBUG_TRANSACTION, " node %d u%016llx -> ref %d desc %d\n", node->debug_id, (u64)node->ptr, rdata.debug_id, rdata.desc); done: binder_put_node(node); return ret; } static int binder_translate_handle(struct flat_binder_object *fp, struct binder_transaction *t, struct binder_thread *thread) { struct binder_proc *proc = thread->proc; struct binder_proc *target_proc = t->to_proc; struct binder_node *node; struct binder_ref_data src_rdata; int ret = 0; node = binder_get_node_from_ref(proc, fp->handle, fp->hdr.type == BINDER_TYPE_HANDLE, &src_rdata); if (!node) { binder_user_error("%d:%d got transaction with invalid handle, %d\n", proc->pid, thread->pid, fp->handle); return -EINVAL; } if (security_binder_transfer_binder(proc->cred, target_proc->cred)) { ret = -EPERM; goto done; } binder_node_lock(node); if (node->proc == target_proc) { if (fp->hdr.type == BINDER_TYPE_HANDLE) fp->hdr.type = BINDER_TYPE_BINDER; else fp->hdr.type = BINDER_TYPE_WEAK_BINDER; fp->binder = node->ptr; fp->cookie = node->cookie; if (node->proc) binder_inner_proc_lock(node->proc); else __acquire(&node->proc->inner_lock); binder_inc_node_nilocked(node, fp->hdr.type == BINDER_TYPE_BINDER, 0, NULL); if (node->proc) binder_inner_proc_unlock(node->proc); else __release(&node->proc->inner_lock); trace_binder_transaction_ref_to_node(t, node, &src_rdata); binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d -> node %d u%016llx\n", src_rdata.debug_id, src_rdata.desc, node->debug_id, (u64)node->ptr); binder_node_unlock(node); } else { struct binder_ref_data dest_rdata; binder_node_unlock(node); ret = binder_inc_ref_for_node(target_proc, node, fp->hdr.type == BINDER_TYPE_HANDLE, NULL, &dest_rdata); if (ret) goto done; fp->binder = 0; fp->handle = dest_rdata.desc; fp->cookie = 0; trace_binder_transaction_ref_to_ref(t, node, &src_rdata, &dest_rdata); binder_debug(BINDER_DEBUG_TRANSACTION, " ref %d desc %d -> ref %d desc %d (node %d)\n", src_rdata.debug_id, src_rdata.desc, dest_rdata.debug_id, dest_rdata.desc, node->debug_id); } done: binder_put_node(node); return ret; } static int binder_translate_fd(u32 fd, binder_size_t fd_offset, struct binder_transaction *t, struct binder_thread *thread, struct binder_transaction *in_reply_to) { struct binder_proc *proc = thread->proc; struct binder_proc *target_proc = t->to_proc; struct binder_txn_fd_fixup *fixup; struct file *file; int ret = 0; bool target_allows_fd; if (in_reply_to) target_allows_fd = !!(in_reply_to->flags & TF_ACCEPT_FDS); else target_allows_fd = t->buffer->target_node->accept_fds; if (!target_allows_fd) { binder_user_error("%d:%d got %s with fd, %d, but target does not allow fds\n", proc->pid, thread->pid, in_reply_to ? "reply" : "transaction", fd); ret = -EPERM; goto err_fd_not_accepted; } file = fget(fd); if (!file) { binder_user_error("%d:%d got transaction with invalid fd, %d\n", proc->pid, thread->pid, fd); ret = -EBADF; goto err_fget; } ret = security_binder_transfer_file(proc->cred, target_proc->cred, file); if (ret < 0) { ret = -EPERM; goto err_security; } /* * Add fixup record for this transaction. The allocation * of the fd in the target needs to be done from a * target thread. */ fixup = kzalloc(sizeof(*fixup), GFP_KERNEL); if (!fixup) { ret = -ENOMEM; goto err_alloc; } fixup->file = file; fixup->offset = fd_offset; fixup->target_fd = -1; trace_binder_transaction_fd_send(t, fd, fixup->offset); list_add_tail(&fixup->fixup_entry, &t->fd_fixups); return ret; err_alloc: err_security: fput(file); err_fget: err_fd_not_accepted: return ret; } /** * struct binder_ptr_fixup - data to be fixed-up in target buffer * @offset offset in target buffer to fixup * @skip_size bytes to skip in copy (fixup will be written later) * @fixup_data data to write at fixup offset * @node list node * * This is used for the pointer fixup list (pf) which is created and consumed * during binder_transaction() and is only accessed locally. No * locking is necessary. * * The list is ordered by @offset. */ struct binder_ptr_fixup { binder_size_t offset; size_t skip_size; binder_uintptr_t fixup_data; struct list_head node; }; /** * struct binder_sg_copy - scatter-gather data to be copied * @offset offset in target buffer * @sender_uaddr user address in source buffer * @length bytes to copy * @node list node * * This is used for the sg copy list (sgc) which is created and consumed * during binder_transaction() and is only accessed locally. No * locking is necessary. * * The list is ordered by @offset. */ struct binder_sg_copy { binder_size_t offset; const void __user *sender_uaddr; size_t length; struct list_head node; }; /** * binder_do_deferred_txn_copies() - copy and fixup scatter-gather data * @alloc: binder_alloc associated with @buffer * @buffer: binder buffer in target process * @sgc_head: list_head of scatter-gather copy list * @pf_head: list_head of pointer fixup list * * Processes all elements of @sgc_head, applying fixups from @pf_head * and copying the scatter-gather data from the source process' user * buffer to the target's buffer. It is expected that the list creation * and processing all occurs during binder_transaction() so these lists * are only accessed in local context. * * Return: 0=success, else -errno */ static int binder_do_deferred_txn_copies(struct binder_alloc *alloc, struct binder_buffer *buffer, struct list_head *sgc_head, struct list_head *pf_head) { int ret = 0; struct binder_sg_copy *sgc, *tmpsgc; struct binder_ptr_fixup *tmppf; struct binder_ptr_fixup *pf = list_first_entry_or_null(pf_head, struct binder_ptr_fixup, node); list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) { size_t bytes_copied = 0; while (bytes_copied < sgc->length) { size_t copy_size; size_t bytes_left = sgc->length - bytes_copied; size_t offset = sgc->offset + bytes_copied; /* * We copy up to the fixup (pointed to by pf) */ copy_size = pf ? min(bytes_left, (size_t)pf->offset - offset) : bytes_left; if (!ret && copy_size) ret = binder_alloc_copy_user_to_buffer( alloc, buffer, offset, sgc->sender_uaddr + bytes_copied, copy_size); bytes_copied += copy_size; if (copy_size != bytes_left) { BUG_ON(!pf); /* we stopped at a fixup offset */ if (pf->skip_size) { /* * we are just skipping. This is for * BINDER_TYPE_FDA where the translated * fds will be fixed up when we get * to target context. */ bytes_copied += pf->skip_size; } else { /* apply the fixup indicated by pf */ if (!ret) ret = binder_alloc_copy_to_buffer( alloc, buffer, pf->offset, &pf->fixup_data, sizeof(pf->fixup_data)); bytes_copied += sizeof(pf->fixup_data); } list_del(&pf->node); kfree(pf); pf = list_first_entry_or_null(pf_head, struct binder_ptr_fixup, node); } } list_del(&sgc->node); kfree(sgc); } list_for_each_entry_safe(pf, tmppf, pf_head, node) { BUG_ON(pf->skip_size == 0); list_del(&pf->node); kfree(pf); } BUG_ON(!list_empty(sgc_head)); return ret > 0 ? -EINVAL : ret; } /** * binder_cleanup_deferred_txn_lists() - free specified lists * @sgc_head: list_head of scatter-gather copy list * @pf_head: list_head of pointer fixup list * * Called to clean up @sgc_head and @pf_head if there is an * error. */ static void binder_cleanup_deferred_txn_lists(struct list_head *sgc_head, struct list_head *pf_head) { struct binder_sg_copy *sgc, *tmpsgc; struct binder_ptr_fixup *pf, *tmppf; list_for_each_entry_safe(sgc, tmpsgc, sgc_head, node) { list_del(&sgc->node); kfree(sgc); } list_for_each_entry_safe(pf, tmppf, pf_head, node) { list_del(&pf->node); kfree(pf); } } /** * binder_defer_copy() - queue a scatter-gather buffer for copy * @sgc_head: list_head of scatter-gather copy list * @offset: binder buffer offset in target process * @sender_uaddr: user address in source process * @length: bytes to copy * * Specify a scatter-gather block to be copied. The actual copy must * be deferred until all the needed fixups are identified and queued. * Then the copy and fixups are done together so un-translated values * from the source are never visible in the target buffer. * * We are guaranteed that repeated calls to this function will have * monotonically increasing @offset values so the list will naturally * be ordered. * * Return: 0=success, else -errno */ static int binder_defer_copy(struct list_head *sgc_head, binder_size_t offset, const void __user *sender_uaddr, size_t length) { struct binder_sg_copy *bc = kzalloc(sizeof(*bc), GFP_KERNEL); if (!bc) return -ENOMEM; bc->offset = offset; bc->sender_uaddr = sender_uaddr; bc->length = length; INIT_LIST_HEAD(&bc->node); /* * We are guaranteed that the deferred copies are in-order * so just add to the tail. */ list_add_tail(&bc->node, sgc_head); return 0; } /** * binder_add_fixup() - queue a fixup to be applied to sg copy * @pf_head: list_head of binder ptr fixup list * @offset: binder buffer offset in target process * @fixup: bytes to be copied for fixup * @skip_size: bytes to skip when copying (fixup will be applied later) * * Add the specified fixup to a list ordered by @offset. When copying * the scatter-gather buffers, the fixup will be copied instead of * data from the source buffer. For BINDER_TYPE_FDA fixups, the fixup * will be applied later (in target process context), so we just skip * the bytes specified by @skip_size. If @skip_size is 0, we copy the * value in @fixup. * * This function is called *mostly* in @offset order, but there are * exceptions. Since out-of-order inserts are relatively uncommon, * we insert the new element by searching backward from the tail of * the list. * * Return: 0=success, else -errno */ static int binder_add_fixup(struct list_head *pf_head, binder_size_t offset, binder_uintptr_t fixup, size_t skip_size) { struct binder_ptr_fixup *pf = kzalloc(sizeof(*pf), GFP_KERNEL); struct binder_ptr_fixup *tmppf; if (!pf) return -ENOMEM; pf->offset = offset; pf->fixup_data = fixup; pf->skip_size = skip_size; INIT_LIST_HEAD(&pf->node); /* Fixups are *mostly* added in-order, but there are some * exceptions. Look backwards through list for insertion point. */ list_for_each_entry_reverse(tmppf, pf_head, node) { if (tmppf->offset < pf->offset) { list_add(&pf->node, &tmppf->node); return 0; } } /* * if we get here, then the new offset is the lowest so * insert at the head */ list_add(&pf->node, pf_head); return 0; } static int binder_translate_fd_array(struct list_head *pf_head, struct binder_fd_array_object *fda, const void __user *sender_ubuffer, struct binder_buffer_object *parent, struct binder_buffer_object *sender_uparent, struct binder_transaction *t, struct binder_thread *thread, struct binder_transaction *in_reply_to) { binder_size_t fdi, fd_buf_size; binder_size_t fda_offset; const void __user *sender_ufda_base; struct binder_proc *proc = thread->proc; int ret; if (fda->num_fds == 0) return 0; fd_buf_size = sizeof(u32) * fda->num_fds; if (fda->num_fds >= SIZE_MAX / sizeof(u32)) { binder_user_error("%d:%d got transaction with invalid number of fds (%lld)\n", proc->pid, thread->pid, (u64)fda->num_fds); return -EINVAL; } if (fd_buf_size > parent->length || fda->parent_offset > parent->length - fd_buf_size) { /* No space for all file descriptors here. */ binder_user_error("%d:%d not enough space to store %lld fds in buffer\n", proc->pid, thread->pid, (u64)fda->num_fds); return -EINVAL; } /* * the source data for binder_buffer_object is visible * to user-space and the @buffer element is the user * pointer to the buffer_object containing the fd_array. * Convert the address to an offset relative to * the base of the transaction buffer. */ fda_offset = parent->buffer - t->buffer->user_data + fda->parent_offset; sender_ufda_base = (void __user *)(uintptr_t)sender_uparent->buffer + fda->parent_offset; if (!IS_ALIGNED((unsigned long)fda_offset, sizeof(u32)) || !IS_ALIGNED((unsigned long)sender_ufda_base, sizeof(u32))) { binder_user_error("%d:%d parent offset not aligned correctly.\n", proc->pid, thread->pid); return -EINVAL; } ret = binder_add_fixup(pf_head, fda_offset, 0, fda->num_fds * sizeof(u32)); if (ret) return ret; for (fdi = 0; fdi < fda->num_fds; fdi++) { u32 fd; binder_size_t offset = fda_offset + fdi * sizeof(fd); binder_size_t sender_uoffset = fdi * sizeof(fd); ret = copy_from_user(&fd, sender_ufda_base + sender_uoffset, sizeof(fd)); if (!ret) ret = binder_translate_fd(fd, offset, t, thread, in_reply_to); if (ret) return ret > 0 ? -EINVAL : ret; } return 0; } static int binder_fixup_parent(struct list_head *pf_head, struct binder_transaction *t, struct binder_thread *thread, struct binder_buffer_object *bp, binder_size_t off_start_offset, binder_size_t num_valid, binder_size_t last_fixup_obj_off, binder_size_t last_fixup_min_off) { struct binder_buffer_object *parent; struct binder_buffer *b = t->buffer; struct binder_proc *proc = thread->proc; struct binder_proc *target_proc = t->to_proc; struct binder_object object; binder_size_t buffer_offset; binder_size_t parent_offset; if (!(bp->flags & BINDER_BUFFER_FLAG_HAS_PARENT)) return 0; parent = binder_validate_ptr(target_proc, b, &object, bp->parent, off_start_offset, &parent_offset, num_valid); if (!parent) { binder_user_error("%d:%d got transaction with invalid parent offset or type\n", proc->pid, thread->pid); return -EINVAL; } if (!binder_validate_fixup(target_proc, b, off_start_offset, parent_offset, bp->parent_offset, last_fixup_obj_off, last_fixup_min_off)) { binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n", proc->pid, thread->pid); return -EINVAL; } if (parent->length < sizeof(binder_uintptr_t) || bp->parent_offset > parent->length - sizeof(binder_uintptr_t)) { /* No space for a pointer here! */ binder_user_error("%d:%d got transaction with invalid parent offset\n", proc->pid, thread->pid); return -EINVAL; } buffer_offset = bp->parent_offset + parent->buffer - b->user_data; return binder_add_fixup(pf_head, buffer_offset, bp->buffer, 0); } /** * binder_can_update_transaction() - Can a txn be superseded by an updated one? * @t1: the pending async txn in the frozen process * @t2: the new async txn to supersede the outdated pending one * * Return: true if t2 can supersede t1 * false if t2 can not supersede t1 */ static bool binder_can_update_transaction(struct binder_transaction *t1, struct binder_transaction *t2) { if ((t1->flags & t2->flags & (TF_ONE_WAY | TF_UPDATE_TXN)) != (TF_ONE_WAY | TF_UPDATE_TXN) || !t1->to_proc || !t2->to_proc) return false; if (t1->to_proc->tsk == t2->to_proc->tsk && t1->code == t2->code && t1->flags == t2->flags && t1->buffer->pid == t2->buffer->pid && t1->buffer->target_node->ptr == t2->buffer->target_node->ptr && t1->buffer->target_node->cookie == t2->buffer->target_node->cookie) return true; return false; } /** * binder_find_outdated_transaction_ilocked() - Find the outdated transaction * @t: new async transaction * @target_list: list to find outdated transaction * * Return: the outdated transaction if found * NULL if no outdated transacton can be found * * Requires the proc->inner_lock to be held. */ static struct binder_transaction * binder_find_outdated_transaction_ilocked(struct binder_transaction *t, struct list_head *target_list) { struct binder_work *w; list_for_each_entry(w, target_list, entry) { struct binder_transaction *t_queued; if (w->type != BINDER_WORK_TRANSACTION) continue; t_queued = container_of(w, struct binder_transaction, work); if (binder_can_update_transaction(t_queued, t)) return t_queued; } return NULL; } /** * binder_proc_transaction() - sends a transaction to a process and wakes it up * @t: transaction to send * @proc: process to send the transaction to * @thread: thread in @proc to send the transaction to (may be NULL) * * This function queues a transaction to the specified process. It will try * to find a thread in the target process to handle the transaction and * wake it up. If no thread is found, the work is queued to the proc * waitqueue. * * If the @thread parameter is not NULL, the transaction is always queued * to the waitlist of that specific thread. * * Return: 0 if the transaction was successfully queued * BR_DEAD_REPLY if the target process or thread is dead * BR_FROZEN_REPLY if the target process or thread is frozen and * the sync transaction was rejected * BR_TRANSACTION_PENDING_FROZEN if the target process is frozen * and the async transaction was successfully queued */ static int binder_proc_transaction(struct binder_transaction *t, struct binder_proc *proc, struct binder_thread *thread) { struct binder_node *node = t->buffer->target_node; bool oneway = !!(t->flags & TF_ONE_WAY); bool pending_async = false; struct binder_transaction *t_outdated = NULL; bool frozen = false; BUG_ON(!node); binder_node_lock(node); if (oneway) { BUG_ON(thread); if (node->has_async_transaction) pending_async = true; else node->has_async_transaction = true; } binder_inner_proc_lock(proc); if (proc->is_frozen) { frozen = true; proc->sync_recv |= !oneway; proc->async_recv |= oneway; } if ((frozen && !oneway) || proc->is_dead || (thread && thread->is_dead)) { binder_inner_proc_unlock(proc); binder_node_unlock(node); return frozen ? BR_FROZEN_REPLY : BR_DEAD_REPLY; } if (!thread && !pending_async) thread = binder_select_thread_ilocked(proc); if (thread) { binder_enqueue_thread_work_ilocked(thread, &t->work); } else if (!pending_async) { binder_enqueue_work_ilocked(&t->work, &proc->todo); } else { if ((t->flags & TF_UPDATE_TXN) && frozen) { t_outdated = binder_find_outdated_transaction_ilocked(t, &node->async_todo); if (t_outdated) { binder_debug(BINDER_DEBUG_TRANSACTION, "txn %d supersedes %d\n", t->debug_id, t_outdated->debug_id); list_del_init(&t_outdated->work.entry); proc->outstanding_txns--; } } binder_enqueue_work_ilocked(&t->work, &node->async_todo); } if (!pending_async) binder_wakeup_thread_ilocked(proc, thread, !oneway /* sync */); proc->outstanding_txns++; binder_inner_proc_unlock(proc); binder_node_unlock(node); /* * To reduce potential contention, free the outdated transaction and * buffer after releasing the locks. */ if (t_outdated) { struct binder_buffer *buffer = t_outdated->buffer; t_outdated->buffer = NULL; buffer->transaction = NULL; trace_binder_transaction_update_buffer_release(buffer); binder_release_entire_buffer(proc, NULL, buffer, false); binder_alloc_free_buf(&proc->alloc, buffer); kfree(t_outdated); binder_stats_deleted(BINDER_STAT_TRANSACTION); } if (oneway && frozen) return BR_TRANSACTION_PENDING_FROZEN; return 0; } /** * binder_get_node_refs_for_txn() - Get required refs on node for txn * @node: struct binder_node for which to get refs * @procp: returns @node->proc if valid * @error: if no @procp then returns BR_DEAD_REPLY * * User-space normally keeps the node alive when creating a transaction * since it has a reference to the target. The local strong ref keeps it * alive if the sending process dies before the target process processes * the transaction. If the source process is malicious or has a reference * counting bug, relying on the local strong ref can fail. * * Since user-space can cause the local strong ref to go away, we also take * a tmpref on the node to ensure it survives while we are constructing * the transaction. We also need a tmpref on the proc while we are * constructing the transaction, so we take that here as well. * * Return: The target_node with refs taken or NULL if no @node->proc is NULL. * Also sets @procp if valid. If the @node->proc is NULL indicating that the * target proc has died, @error is set to BR_DEAD_REPLY. */ static struct binder_node *binder_get_node_refs_for_txn( struct binder_node *node, struct binder_proc **procp, uint32_t *error) { struct binder_node *target_node = NULL; binder_node_inner_lock(node); if (node->proc) { target_node = node; binder_inc_node_nilocked(node, 1, 0, NULL); binder_inc_node_tmpref_ilocked(node); node->proc->tmp_ref++; *procp = node->proc; } else *error = BR_DEAD_REPLY; binder_node_inner_unlock(node); return target_node; } static void binder_set_txn_from_error(struct binder_transaction *t, int id, uint32_t command, int32_t param) { struct binder_thread *from = binder_get_txn_from_and_acq_inner(t); if (!from) { /* annotation for sparse */ __release(&from->proc->inner_lock); return; } /* don't override existing errors */ if (from->ee.command == BR_OK) binder_set_extended_error(&from->ee, id, command, param); binder_inner_proc_unlock(from->proc); binder_thread_dec_tmpref(from); } static void binder_transaction(struct binder_proc *proc, struct binder_thread *thread, struct binder_transaction_data *tr, int reply, binder_size_t extra_buffers_size) { int ret; struct binder_transaction *t; struct binder_work *w; struct binder_work *tcomplete; binder_size_t buffer_offset = 0; binder_size_t off_start_offset, off_end_offset; binder_size_t off_min; binder_size_t sg_buf_offset, sg_buf_end_offset; binder_size_t user_offset = 0; struct binder_proc *target_proc = NULL; struct binder_thread *target_thread = NULL; struct binder_node *target_node = NULL; struct binder_transaction *in_reply_to = NULL; struct binder_transaction_log_entry *e; uint32_t return_error = 0; uint32_t return_error_param = 0; uint32_t return_error_line = 0; binder_size_t last_fixup_obj_off = 0; binder_size_t last_fixup_min_off = 0; struct binder_context *context = proc->context; int t_debug_id = atomic_inc_return(&binder_last_id); ktime_t t_start_time = ktime_get(); struct lsm_context lsmctx = { }; struct list_head sgc_head; struct list_head pf_head; const void __user *user_buffer = (const void __user *) (uintptr_t)tr->data.ptr.buffer; INIT_LIST_HEAD(&sgc_head); INIT_LIST_HEAD(&pf_head); e = binder_transaction_log_add(&binder_transaction_log); e->debug_id = t_debug_id; e->call_type = reply ? 2 : !!(tr->flags & TF_ONE_WAY); e->from_proc = proc->pid; e->from_thread = thread->pid; e->target_handle = tr->target.handle; e->data_size = tr->data_size; e->offsets_size = tr->offsets_size; strscpy(e->context_name, proc->context->name, BINDERFS_MAX_NAME); binder_inner_proc_lock(proc); binder_set_extended_error(&thread->ee, t_debug_id, BR_OK, 0); binder_inner_proc_unlock(proc); if (reply) { binder_inner_proc_lock(proc); in_reply_to = thread->transaction_stack; if (in_reply_to == NULL) { binder_inner_proc_unlock(proc); binder_user_error("%d:%d got reply transaction with no transaction stack\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; return_error_param = -EPROTO; return_error_line = __LINE__; goto err_empty_call_stack; } if (in_reply_to->to_thread != thread) { spin_lock(&in_reply_to->lock); binder_user_error("%d:%d got reply transaction with bad transaction stack, transaction %d has target %d:%d\n", proc->pid, thread->pid, in_reply_to->debug_id, in_reply_to->to_proc ? in_reply_to->to_proc->pid : 0, in_reply_to->to_thread ? in_reply_to->to_thread->pid : 0); spin_unlock(&in_reply_to->lock); binder_inner_proc_unlock(proc); return_error = BR_FAILED_REPLY; return_error_param = -EPROTO; return_error_line = __LINE__; in_reply_to = NULL; goto err_bad_call_stack; } thread->transaction_stack = in_reply_to->to_parent; binder_inner_proc_unlock(proc); binder_set_nice(in_reply_to->saved_priority); target_thread = binder_get_txn_from_and_acq_inner(in_reply_to); if (target_thread == NULL) { /* annotation for sparse */ __release(&target_thread->proc->inner_lock); binder_txn_error("%d:%d reply target not found\n", thread->pid, proc->pid); return_error = BR_DEAD_REPLY; return_error_line = __LINE__; goto err_dead_binder; } if (target_thread->transaction_stack != in_reply_to) { binder_user_error("%d:%d got reply transaction with bad target transaction stack %d, expected %d\n", proc->pid, thread->pid, target_thread->transaction_stack ? target_thread->transaction_stack->debug_id : 0, in_reply_to->debug_id); binder_inner_proc_unlock(target_thread->proc); return_error = BR_FAILED_REPLY; return_error_param = -EPROTO; return_error_line = __LINE__; in_reply_to = NULL; target_thread = NULL; goto err_dead_binder; } target_proc = target_thread->proc; target_proc->tmp_ref++; binder_inner_proc_unlock(target_thread->proc); } else { if (tr->target.handle) { struct binder_ref *ref; /* * There must already be a strong ref * on this node. If so, do a strong * increment on the node to ensure it * stays alive until the transaction is * done. */ binder_proc_lock(proc); ref = binder_get_ref_olocked(proc, tr->target.handle, true); if (ref) { target_node = binder_get_node_refs_for_txn( ref->node, &target_proc, &return_error); } else { binder_user_error("%d:%d got transaction to invalid handle, %u\n", proc->pid, thread->pid, tr->target.handle); return_error = BR_FAILED_REPLY; } binder_proc_unlock(proc); } else { mutex_lock(&context->context_mgr_node_lock); target_node = context->binder_context_mgr_node; if (target_node) target_node = binder_get_node_refs_for_txn( target_node, &target_proc, &return_error); else return_error = BR_DEAD_REPLY; mutex_unlock(&context->context_mgr_node_lock); if (target_node && target_proc->pid == proc->pid) { binder_user_error("%d:%d got transaction to context manager from process owning it\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; return_error_param = -EINVAL; return_error_line = __LINE__; goto err_invalid_target_handle; } } if (!target_node) { binder_txn_error("%d:%d cannot find target node\n", proc->pid, thread->pid); /* return_error is set above */ return_error_param = -EINVAL; return_error_line = __LINE__; goto err_dead_binder; } e->to_node = target_node->debug_id; if (WARN_ON(proc == target_proc)) { binder_txn_error("%d:%d self transactions not allowed\n", thread->pid, proc->pid); return_error = BR_FAILED_REPLY; return_error_param = -EINVAL; return_error_line = __LINE__; goto err_invalid_target_handle; } if (security_binder_transaction(proc->cred, target_proc->cred) < 0) { binder_txn_error("%d:%d transaction credentials failed\n", thread->pid, proc->pid); return_error = BR_FAILED_REPLY; return_error_param = -EPERM; return_error_line = __LINE__; goto err_invalid_target_handle; } binder_inner_proc_lock(proc); w = list_first_entry_or_null(&thread->todo, struct binder_work, entry); if (!(tr->flags & TF_ONE_WAY) && w && w->type == BINDER_WORK_TRANSACTION) { /* * Do not allow new outgoing transaction from a * thread that has a transaction at the head of * its todo list. Only need to check the head * because binder_select_thread_ilocked picks a * thread from proc->waiting_threads to enqueue * the transaction, and nothing is queued to the * todo list while the thread is on waiting_threads. */ binder_user_error("%d:%d new transaction not allowed when there is a transaction on thread todo\n", proc->pid, thread->pid); binder_inner_proc_unlock(proc); return_error = BR_FAILED_REPLY; return_error_param = -EPROTO; return_error_line = __LINE__; goto err_bad_todo_list; } if (!(tr->flags & TF_ONE_WAY) && thread->transaction_stack) { struct binder_transaction *tmp; tmp = thread->transaction_stack; if (tmp->to_thread != thread) { spin_lock(&tmp->lock); binder_user_error("%d:%d got new transaction with bad transaction stack, transaction %d has target %d:%d\n", proc->pid, thread->pid, tmp->debug_id, tmp->to_proc ? tmp->to_proc->pid : 0, tmp->to_thread ? tmp->to_thread->pid : 0); spin_unlock(&tmp->lock); binder_inner_proc_unlock(proc); return_error = BR_FAILED_REPLY; return_error_param = -EPROTO; return_error_line = __LINE__; goto err_bad_call_stack; } while (tmp) { struct binder_thread *from; spin_lock(&tmp->lock); from = tmp->from; if (from && from->proc == target_proc) { atomic_inc(&from->tmp_ref); target_thread = from; spin_unlock(&tmp->lock); break; } spin_unlock(&tmp->lock); tmp = tmp->from_parent; } } binder_inner_proc_unlock(proc); } if (target_thread) e->to_thread = target_thread->pid; e->to_proc = target_proc->pid; /* TODO: reuse incoming transaction for reply */ t = kzalloc(sizeof(*t), GFP_KERNEL); if (t == NULL) { binder_txn_error("%d:%d cannot allocate transaction\n", thread->pid, proc->pid); return_error = BR_FAILED_REPLY; return_error_param = -ENOMEM; return_error_line = __LINE__; goto err_alloc_t_failed; } INIT_LIST_HEAD(&t->fd_fixups); binder_stats_created(BINDER_STAT_TRANSACTION); spin_lock_init(&t->lock); tcomplete = kzalloc(sizeof(*tcomplete), GFP_KERNEL); if (tcomplete == NULL) { binder_txn_error("%d:%d cannot allocate work for transaction\n", thread->pid, proc->pid); return_error = BR_FAILED_REPLY; return_error_param = -ENOMEM; return_error_line = __LINE__; goto err_alloc_tcomplete_failed; } binder_stats_created(BINDER_STAT_TRANSACTION_COMPLETE); t->debug_id = t_debug_id; t->start_time = t_start_time; if (reply) binder_debug(BINDER_DEBUG_TRANSACTION, "%d:%d BC_REPLY %d -> %d:%d, data size %lld-%lld-%lld\n", proc->pid, thread->pid, t->debug_id, target_proc->pid, target_thread->pid, (u64)tr->data_size, (u64)tr->offsets_size, (u64)extra_buffers_size); else binder_debug(BINDER_DEBUG_TRANSACTION, "%d:%d BC_TRANSACTION %d -> %d - node %d, data size %lld-%lld-%lld\n", proc->pid, thread->pid, t->debug_id, target_proc->pid, target_node->debug_id, (u64)tr->data_size, (u64)tr->offsets_size, (u64)extra_buffers_size); if (!reply && !(tr->flags & TF_ONE_WAY)) t->from = thread; else t->from = NULL; t->from_pid = proc->pid; t->from_tid = thread->pid; t->sender_euid = task_euid(proc->tsk); t->to_proc = target_proc; t->to_thread = target_thread; t->code = tr->code; t->flags = tr->flags; t->priority = task_nice(current); if (target_node && target_node->txn_security_ctx) { u32 secid; size_t added_size; security_cred_getsecid(proc->cred, &secid); ret = security_secid_to_secctx(secid, &lsmctx); if (ret < 0) { binder_txn_error("%d:%d failed to get security context\n", thread->pid, proc->pid); return_error = BR_FAILED_REPLY; return_error_param = ret; return_error_line = __LINE__; goto err_get_secctx_failed; } added_size = ALIGN(lsmctx.len, sizeof(u64)); extra_buffers_size += added_size; if (extra_buffers_size < added_size) { binder_txn_error("%d:%d integer overflow of extra_buffers_size\n", thread->pid, proc->pid); return_error = BR_FAILED_REPLY; return_error_param = -EINVAL; return_error_line = __LINE__; goto err_bad_extra_size; } } trace_binder_transaction(reply, t, target_node); t->buffer = binder_alloc_new_buf(&target_proc->alloc, tr->data_size, tr->offsets_size, extra_buffers_size, !reply && (t->flags & TF_ONE_WAY)); if (IS_ERR(t->buffer)) { char *s; ret = PTR_ERR(t->buffer); s = (ret == -ESRCH) ? ": vma cleared, target dead or dying" : (ret == -ENOSPC) ? ": no space left" : (ret == -ENOMEM) ? ": memory allocation failed" : ""; binder_txn_error("cannot allocate buffer%s", s); return_error_param = PTR_ERR(t->buffer); return_error = return_error_param == -ESRCH ? BR_DEAD_REPLY : BR_FAILED_REPLY; return_error_line = __LINE__; t->buffer = NULL; goto err_binder_alloc_buf_failed; } if (lsmctx.context) { int err; size_t buf_offset = ALIGN(tr->data_size, sizeof(void *)) + ALIGN(tr->offsets_size, sizeof(void *)) + ALIGN(extra_buffers_size, sizeof(void *)) - ALIGN(lsmctx.len, sizeof(u64)); t->security_ctx = t->buffer->user_data + buf_offset; err = binder_alloc_copy_to_buffer(&target_proc->alloc, t->buffer, buf_offset, lsmctx.context, lsmctx.len); if (err) { t->security_ctx = 0; WARN_ON(1); } security_release_secctx(&lsmctx); lsmctx.context = NULL; } t->buffer->debug_id = t->debug_id; t->buffer->transaction = t; t->buffer->target_node = target_node; t->buffer->clear_on_free = !!(t->flags & TF_CLEAR_BUF); trace_binder_transaction_alloc_buf(t->buffer); if (binder_alloc_copy_user_to_buffer( &target_proc->alloc, t->buffer, ALIGN(tr->data_size, sizeof(void *)), (const void __user *) (uintptr_t)tr->data.ptr.offsets, tr->offsets_size)) { binder_user_error("%d:%d got transaction with invalid offsets ptr\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; return_error_param = -EFAULT; return_error_line = __LINE__; goto err_copy_data_failed; } if (!IS_ALIGNED(tr->offsets_size, sizeof(binder_size_t))) { binder_user_error("%d:%d got transaction with invalid offsets size, %lld\n", proc->pid, thread->pid, (u64)tr->offsets_size); return_error = BR_FAILED_REPLY; return_error_param = -EINVAL; return_error_line = __LINE__; goto err_bad_offset; } if (!IS_ALIGNED(extra_buffers_size, sizeof(u64))) { binder_user_error("%d:%d got transaction with unaligned buffers size, %lld\n", proc->pid, thread->pid, (u64)extra_buffers_size); return_error = BR_FAILED_REPLY; return_error_param = -EINVAL; return_error_line = __LINE__; goto err_bad_offset; } off_start_offset = ALIGN(tr->data_size, sizeof(void *)); buffer_offset = off_start_offset; off_end_offset = off_start_offset + tr->offsets_size; sg_buf_offset = ALIGN(off_end_offset, sizeof(void *)); sg_buf_end_offset = sg_buf_offset + extra_buffers_size - ALIGN(lsmctx.len, sizeof(u64)); off_min = 0; for (buffer_offset = off_start_offset; buffer_offset < off_end_offset; buffer_offset += sizeof(binder_size_t)) { struct binder_object_header *hdr; size_t object_size; struct binder_object object; binder_size_t object_offset; binder_size_t copy_size; if (binder_alloc_copy_from_buffer(&target_proc->alloc, &object_offset, t->buffer, buffer_offset, sizeof(object_offset))) { binder_txn_error("%d:%d copy offset from buffer failed\n", thread->pid, proc->pid); return_error = BR_FAILED_REPLY; return_error_param = -EINVAL; return_error_line = __LINE__; goto err_bad_offset; } /* * Copy the source user buffer up to the next object * that will be processed. */ copy_size = object_offset - user_offset; if (copy_size && (user_offset > object_offset || object_offset > tr->data_size || binder_alloc_copy_user_to_buffer( &target_proc->alloc, t->buffer, user_offset, user_buffer + user_offset, copy_size))) { binder_user_error("%d:%d got transaction with invalid data ptr\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; return_error_param = -EFAULT; return_error_line = __LINE__; goto err_copy_data_failed; } object_size = binder_get_object(target_proc, user_buffer, t->buffer, object_offset, &object); if (object_size == 0 || object_offset < off_min) { binder_user_error("%d:%d got transaction with invalid offset (%lld, min %lld max %lld) or object.\n", proc->pid, thread->pid, (u64)object_offset, (u64)off_min, (u64)t->buffer->data_size); return_error = BR_FAILED_REPLY; return_error_param = -EINVAL; return_error_line = __LINE__; goto err_bad_offset; } /* * Set offset to the next buffer fragment to be * copied */ user_offset = object_offset + object_size; hdr = &object.hdr; off_min = object_offset + object_size; switch (hdr->type) { case BINDER_TYPE_BINDER: case BINDER_TYPE_WEAK_BINDER: { struct flat_binder_object *fp; fp = to_flat_binder_object(hdr); ret = binder_translate_binder(fp, t, thread); if (ret < 0 || binder_alloc_copy_to_buffer(&target_proc->alloc, t->buffer, object_offset, fp, sizeof(*fp))) { binder_txn_error("%d:%d translate binder failed\n", thread->pid, proc->pid); return_error = BR_FAILED_REPLY; return_error_param = ret; return_error_line = __LINE__; goto err_translate_failed; } } break; case BINDER_TYPE_HANDLE: case BINDER_TYPE_WEAK_HANDLE: { struct flat_binder_object *fp; fp = to_flat_binder_object(hdr); ret = binder_translate_handle(fp, t, thread); if (ret < 0 || binder_alloc_copy_to_buffer(&target_proc->alloc, t->buffer, object_offset, fp, sizeof(*fp))) { binder_txn_error("%d:%d translate handle failed\n", thread->pid, proc->pid); return_error = BR_FAILED_REPLY; return_error_param = ret; return_error_line = __LINE__; goto err_translate_failed; } } break; case BINDER_TYPE_FD: { struct binder_fd_object *fp = to_binder_fd_object(hdr); binder_size_t fd_offset = object_offset + (uintptr_t)&fp->fd - (uintptr_t)fp; int ret = binder_translate_fd(fp->fd, fd_offset, t, thread, in_reply_to); fp->pad_binder = 0; if (ret < 0 || binder_alloc_copy_to_buffer(&target_proc->alloc, t->buffer, object_offset, fp, sizeof(*fp))) { binder_txn_error("%d:%d translate fd failed\n", thread->pid, proc->pid); return_error = BR_FAILED_REPLY; return_error_param = ret; return_error_line = __LINE__; goto err_translate_failed; } } break; case BINDER_TYPE_FDA: { struct binder_object ptr_object; binder_size_t parent_offset; struct binder_object user_object; size_t user_parent_size; struct binder_fd_array_object *fda = to_binder_fd_array_object(hdr); size_t num_valid = (buffer_offset - off_start_offset) / sizeof(binder_size_t); struct binder_buffer_object *parent = binder_validate_ptr(target_proc, t->buffer, &ptr_object, fda->parent, off_start_offset, &parent_offset, num_valid); if (!parent) { binder_user_error("%d:%d got transaction with invalid parent offset or type\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; return_error_param = -EINVAL; return_error_line = __LINE__; goto err_bad_parent; } if (!binder_validate_fixup(target_proc, t->buffer, off_start_offset, parent_offset, fda->parent_offset, last_fixup_obj_off, last_fixup_min_off)) { binder_user_error("%d:%d got transaction with out-of-order buffer fixup\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; return_error_param = -EINVAL; return_error_line = __LINE__; goto err_bad_parent; } /* * We need to read the user version of the parent * object to get the original user offset */ user_parent_size = binder_get_object(proc, user_buffer, t->buffer, parent_offset, &user_object); if (user_parent_size != sizeof(user_object.bbo)) { binder_user_error("%d:%d invalid ptr object size: %zd vs %zd\n", proc->pid, thread->pid, user_parent_size, sizeof(user_object.bbo)); return_error = BR_FAILED_REPLY; return_error_param = -EINVAL; return_error_line = __LINE__; goto err_bad_parent; } ret = binder_translate_fd_array(&pf_head, fda, user_buffer, parent, &user_object.bbo, t, thread, in_reply_to); if (!ret) ret = binder_alloc_copy_to_buffer(&target_proc->alloc, t->buffer, object_offset, fda, sizeof(*fda)); if (ret) { binder_txn_error("%d:%d translate fd array failed\n", thread->pid, proc->pid); return_error = BR_FAILED_REPLY; return_error_param = ret > 0 ? -EINVAL : ret; return_error_line = __LINE__; goto err_translate_failed; } last_fixup_obj_off = parent_offset; last_fixup_min_off = fda->parent_offset + sizeof(u32) * fda->num_fds; } break; case BINDER_TYPE_PTR: { struct binder_buffer_object *bp = to_binder_buffer_object(hdr); size_t buf_left = sg_buf_end_offset - sg_buf_offset; size_t num_valid; if (bp->length > buf_left) { binder_user_error("%d:%d got transaction with too large buffer\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; return_error_param = -EINVAL; return_error_line = __LINE__; goto err_bad_offset; } ret = binder_defer_copy(&sgc_head, sg_buf_offset, (const void __user *)(uintptr_t)bp->buffer, bp->length); if (ret) { binder_txn_error("%d:%d deferred copy failed\n", thread->pid, proc->pid); return_error = BR_FAILED_REPLY; return_error_param = ret; return_error_line = __LINE__; goto err_translate_failed; } /* Fixup buffer pointer to target proc address space */ bp->buffer = t->buffer->user_data + sg_buf_offset; sg_buf_offset += ALIGN(bp->length, sizeof(u64)); num_valid = (buffer_offset - off_start_offset) / sizeof(binder_size_t); ret = binder_fixup_parent(&pf_head, t, thread, bp, off_start_offset, num_valid, last_fixup_obj_off, last_fixup_min_off); if (ret < 0 || binder_alloc_copy_to_buffer(&target_proc->alloc, t->buffer, object_offset, bp, sizeof(*bp))) { binder_txn_error("%d:%d failed to fixup parent\n", thread->pid, proc->pid); return_error = BR_FAILED_REPLY; return_error_param = ret; return_error_line = __LINE__; goto err_translate_failed; } last_fixup_obj_off = object_offset; last_fixup_min_off = 0; } break; default: binder_user_error("%d:%d got transaction with invalid object type, %x\n", proc->pid, thread->pid, hdr->type); return_error = BR_FAILED_REPLY; return_error_param = -EINVAL; return_error_line = __LINE__; goto err_bad_object_type; } } /* Done processing objects, copy the rest of the buffer */ if (binder_alloc_copy_user_to_buffer( &target_proc->alloc, t->buffer, user_offset, user_buffer + user_offset, tr->data_size - user_offset)) { binder_user_error("%d:%d got transaction with invalid data ptr\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; return_error_param = -EFAULT; return_error_line = __LINE__; goto err_copy_data_failed; } ret = binder_do_deferred_txn_copies(&target_proc->alloc, t->buffer, &sgc_head, &pf_head); if (ret) { binder_user_error("%d:%d got transaction with invalid offsets ptr\n", proc->pid, thread->pid); return_error = BR_FAILED_REPLY; return_error_param = ret; return_error_line = __LINE__; goto err_copy_data_failed; } if (t->buffer->oneway_spam_suspect) tcomplete->type = BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT; else tcomplete->type = BINDER_WORK_TRANSACTION_COMPLETE; t->work.type = BINDER_WORK_TRANSACTION; if (reply) { binder_enqueue_thread_work(thread, tcomplete); binder_inner_proc_lock(target_proc); if (target_thread->is_dead) { return_error = BR_DEAD_REPLY; binder_inner_proc_unlock(target_proc); goto err_dead_proc_or_thread; } BUG_ON(t->buffer->async_transaction != 0); binder_pop_transaction_ilocked(target_thread, in_reply_to); binder_enqueue_thread_work_ilocked(target_thread, &t->work); target_proc->outstanding_txns++; binder_inner_proc_unlock(target_proc); wake_up_interruptible_sync(&target_thread->wait); binder_free_transaction(in_reply_to); } else if (!(t->flags & TF_ONE_WAY)) { BUG_ON(t->buffer->async_transaction != 0); binder_inner_proc_lock(proc); /* * Defer the TRANSACTION_COMPLETE, so we don't return to * userspace immediately; this allows the target process to * immediately start processing this transaction, reducing * latency. We will then return the TRANSACTION_COMPLETE when * the target replies (or there is an error). */ binder_enqueue_deferred_thread_work_ilocked(thread, tcomplete); t->need_reply = 1; t->from_parent = thread->transaction_stack; thread->transaction_stack = t; binder_inner_proc_unlock(proc); return_error = binder_proc_transaction(t, target_proc, target_thread); if (return_error) { binder_inner_proc_lock(proc); binder_pop_transaction_ilocked(thread, t); binder_inner_proc_unlock(proc); goto err_dead_proc_or_thread; } } else { BUG_ON(target_node == NULL); BUG_ON(t->buffer->async_transaction != 1); return_error = binder_proc_transaction(t, target_proc, NULL); /* * Let the caller know when async transaction reaches a frozen * process and is put in a pending queue, waiting for the target * process to be unfrozen. */ if (return_error == BR_TRANSACTION_PENDING_FROZEN) tcomplete->type = BINDER_WORK_TRANSACTION_PENDING; binder_enqueue_thread_work(thread, tcomplete); if (return_error && return_error != BR_TRANSACTION_PENDING_FROZEN) goto err_dead_proc_or_thread; } if (target_thread) binder_thread_dec_tmpref(target_thread); binder_proc_dec_tmpref(target_proc); if (target_node) binder_dec_node_tmpref(target_node); /* * write barrier to synchronize with initialization * of log entry */ smp_wmb(); WRITE_ONCE(e->debug_id_done, t_debug_id); return; err_dead_proc_or_thread: binder_txn_error("%d:%d dead process or thread\n", thread->pid, proc->pid); return_error_line = __LINE__; binder_dequeue_work(proc, tcomplete); err_translate_failed: err_bad_object_type: err_bad_offset: err_bad_parent: err_copy_data_failed: binder_cleanup_deferred_txn_lists(&sgc_head, &pf_head); binder_free_txn_fixups(t); trace_binder_transaction_failed_buffer_release(t->buffer); binder_transaction_buffer_release(target_proc, NULL, t->buffer, buffer_offset, true); if (target_node) binder_dec_node_tmpref(target_node); target_node = NULL; t->buffer->transaction = NULL; binder_alloc_free_buf(&target_proc->alloc, t->buffer); err_binder_alloc_buf_failed: err_bad_extra_size: if (lsmctx.context) security_release_secctx(&lsmctx); err_get_secctx_failed: kfree(tcomplete); binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); err_alloc_tcomplete_failed: if (trace_binder_txn_latency_free_enabled()) binder_txn_latency_free(t); kfree(t); binder_stats_deleted(BINDER_STAT_TRANSACTION); err_alloc_t_failed: err_bad_todo_list: err_bad_call_stack: err_empty_call_stack: err_dead_binder: err_invalid_target_handle: if (target_node) { binder_dec_node(target_node, 1, 0); binder_dec_node_tmpref(target_node); } binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d transaction %s to %d:%d failed %d/%d/%d, code %u size %lld-%lld line %d\n", proc->pid, thread->pid, reply ? "reply" : (tr->flags & TF_ONE_WAY ? "async" : "call"), target_proc ? target_proc->pid : 0, target_thread ? target_thread->pid : 0, t_debug_id, return_error, return_error_param, tr->code, (u64)tr->data_size, (u64)tr->offsets_size, return_error_line); if (target_thread) binder_thread_dec_tmpref(target_thread); if (target_proc) binder_proc_dec_tmpref(target_proc); { struct binder_transaction_log_entry *fe; e->return_error = return_error; e->return_error_param = return_error_param; e->return_error_line = return_error_line; fe = binder_transaction_log_add(&binder_transaction_log_failed); *fe = *e; /* * write barrier to synchronize with initialization * of log entry */ smp_wmb(); WRITE_ONCE(e->debug_id_done, t_debug_id); WRITE_ONCE(fe->debug_id_done, t_debug_id); } BUG_ON(thread->return_error.cmd != BR_OK); if (in_reply_to) { binder_set_txn_from_error(in_reply_to, t_debug_id, return_error, return_error_param); thread->return_error.cmd = BR_TRANSACTION_COMPLETE; binder_enqueue_thread_work(thread, &thread->return_error.work); binder_send_failed_reply(in_reply_to, return_error); } else { binder_inner_proc_lock(proc); binder_set_extended_error(&thread->ee, t_debug_id, return_error, return_error_param); binder_inner_proc_unlock(proc); thread->return_error.cmd = return_error; binder_enqueue_thread_work(thread, &thread->return_error.work); } } static int binder_request_freeze_notification(struct binder_proc *proc, struct binder_thread *thread, struct binder_handle_cookie *handle_cookie) { struct binder_ref_freeze *freeze; struct binder_ref *ref; freeze = kzalloc(sizeof(*freeze), GFP_KERNEL); if (!freeze) return -ENOMEM; binder_proc_lock(proc); ref = binder_get_ref_olocked(proc, handle_cookie->handle, false); if (!ref) { binder_user_error("%d:%d BC_REQUEST_FREEZE_NOTIFICATION invalid ref %d\n", proc->pid, thread->pid, handle_cookie->handle); binder_proc_unlock(proc); kfree(freeze); return -EINVAL; } binder_node_lock(ref->node); if (ref->freeze) { binder_user_error("%d:%d BC_REQUEST_FREEZE_NOTIFICATION already set\n", proc->pid, thread->pid); binder_node_unlock(ref->node); binder_proc_unlock(proc); kfree(freeze); return -EINVAL; } binder_stats_created(BINDER_STAT_FREEZE); INIT_LIST_HEAD(&freeze->work.entry); freeze->cookie = handle_cookie->cookie; freeze->work.type = BINDER_WORK_FROZEN_BINDER; ref->freeze = freeze; if (ref->node->proc) { binder_inner_proc_lock(ref->node->proc); freeze->is_frozen = ref->node->proc->is_frozen; binder_inner_proc_unlock(ref->node->proc); binder_inner_proc_lock(proc); binder_enqueue_work_ilocked(&freeze->work, &proc->todo); binder_wakeup_proc_ilocked(proc); binder_inner_proc_unlock(proc); } binder_node_unlock(ref->node); binder_proc_unlock(proc); return 0; } static int binder_clear_freeze_notification(struct binder_proc *proc, struct binder_thread *thread, struct binder_handle_cookie *handle_cookie) { struct binder_ref_freeze *freeze; struct binder_ref *ref; binder_proc_lock(proc); ref = binder_get_ref_olocked(proc, handle_cookie->handle, false); if (!ref) { binder_user_error("%d:%d BC_CLEAR_FREEZE_NOTIFICATION invalid ref %d\n", proc->pid, thread->pid, handle_cookie->handle); binder_proc_unlock(proc); return -EINVAL; } binder_node_lock(ref->node); if (!ref->freeze) { binder_user_error("%d:%d BC_CLEAR_FREEZE_NOTIFICATION freeze notification not active\n", proc->pid, thread->pid); binder_node_unlock(ref->node); binder_proc_unlock(proc); return -EINVAL; } freeze = ref->freeze; binder_inner_proc_lock(proc); if (freeze->cookie != handle_cookie->cookie) { binder_user_error("%d:%d BC_CLEAR_FREEZE_NOTIFICATION freeze notification cookie mismatch %016llx != %016llx\n", proc->pid, thread->pid, (u64)freeze->cookie, (u64)handle_cookie->cookie); binder_inner_proc_unlock(proc); binder_node_unlock(ref->node); binder_proc_unlock(proc); return -EINVAL; } ref->freeze = NULL; /* * Take the existing freeze object and overwrite its work type. There are three cases here: * 1. No pending notification. In this case just add the work to the queue. * 2. A notification was sent and is pending an ack from userspace. Once an ack arrives, we * should resend with the new work type. * 3. A notification is pending to be sent. Since the work is already in the queue, nothing * needs to be done here. */ freeze->work.type = BINDER_WORK_CLEAR_FREEZE_NOTIFICATION; if (list_empty(&freeze->work.entry)) { binder_enqueue_work_ilocked(&freeze->work, &proc->todo); binder_wakeup_proc_ilocked(proc); } else if (freeze->sent) { freeze->resend = true; } binder_inner_proc_unlock(proc); binder_node_unlock(ref->node); binder_proc_unlock(proc); return 0; } static int binder_freeze_notification_done(struct binder_proc *proc, struct binder_thread *thread, binder_uintptr_t cookie) { struct binder_ref_freeze *freeze = NULL; struct binder_work *w; binder_inner_proc_lock(proc); list_for_each_entry(w, &proc->delivered_freeze, entry) { struct binder_ref_freeze *tmp_freeze = container_of(w, struct binder_ref_freeze, work); if (tmp_freeze->cookie == cookie) { freeze = tmp_freeze; break; } } if (!freeze) { binder_user_error("%d:%d BC_FREEZE_NOTIFICATION_DONE %016llx not found\n", proc->pid, thread->pid, (u64)cookie); binder_inner_proc_unlock(proc); return -EINVAL; } binder_dequeue_work_ilocked(&freeze->work); freeze->sent = false; if (freeze->resend) { freeze->resend = false; binder_enqueue_work_ilocked(&freeze->work, &proc->todo); binder_wakeup_proc_ilocked(proc); } binder_inner_proc_unlock(proc); return 0; } /** * binder_free_buf() - free the specified buffer * @proc: binder proc that owns buffer * @buffer: buffer to be freed * @is_failure: failed to send transaction * * If buffer for an async transaction, enqueue the next async * transaction from the node. * * Cleanup buffer and free it. */ static void binder_free_buf(struct binder_proc *proc, struct binder_thread *thread, struct binder_buffer *buffer, bool is_failure) { binder_inner_proc_lock(proc); if (buffer->transaction) { buffer->transaction->buffer = NULL; buffer->transaction = NULL; } binder_inner_proc_unlock(proc); if (buffer->async_transaction && buffer->target_node) { struct binder_node *buf_node; struct binder_work *w; buf_node = buffer->target_node; binder_node_inner_lock(buf_node); BUG_ON(!buf_node->has_async_transaction); BUG_ON(buf_node->proc != proc); w = binder_dequeue_work_head_ilocked( &buf_node->async_todo); if (!w) { buf_node->has_async_transaction = false; } else { binder_enqueue_work_ilocked( w, &proc->todo); binder_wakeup_proc_ilocked(proc); } binder_node_inner_unlock(buf_node); } trace_binder_transaction_buffer_release(buffer); binder_release_entire_buffer(proc, thread, buffer, is_failure); binder_alloc_free_buf(&proc->alloc, buffer); } static int binder_thread_write(struct binder_proc *proc, struct binder_thread *thread, binder_uintptr_t binder_buffer, size_t size, binder_size_t *consumed) { uint32_t cmd; struct binder_context *context = proc->context; void __user *buffer = (void __user *)(uintptr_t)binder_buffer; void __user *ptr = buffer + *consumed; void __user *end = buffer + size; while (ptr < end && thread->return_error.cmd == BR_OK) { int ret; if (get_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); trace_binder_command(cmd); if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.bc)) { atomic_inc(&binder_stats.bc[_IOC_NR(cmd)]); atomic_inc(&proc->stats.bc[_IOC_NR(cmd)]); atomic_inc(&thread->stats.bc[_IOC_NR(cmd)]); } switch (cmd) { case BC_INCREFS: case BC_ACQUIRE: case BC_RELEASE: case BC_DECREFS: { uint32_t target; const char *debug_string; bool strong = cmd == BC_ACQUIRE || cmd == BC_RELEASE; bool increment = cmd == BC_INCREFS || cmd == BC_ACQUIRE; struct binder_ref_data rdata; if (get_user(target, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); ret = -1; if (increment && !target) { struct binder_node *ctx_mgr_node; mutex_lock(&context->context_mgr_node_lock); ctx_mgr_node = context->binder_context_mgr_node; if (ctx_mgr_node) { if (ctx_mgr_node->proc == proc) { binder_user_error("%d:%d context manager tried to acquire desc 0\n", proc->pid, thread->pid); mutex_unlock(&context->context_mgr_node_lock); return -EINVAL; } ret = binder_inc_ref_for_node( proc, ctx_mgr_node, strong, NULL, &rdata); } mutex_unlock(&context->context_mgr_node_lock); } if (ret) ret = binder_update_ref_for_handle( proc, target, increment, strong, &rdata); if (!ret && rdata.desc != target) { binder_user_error("%d:%d tried to acquire reference to desc %d, got %d instead\n", proc->pid, thread->pid, target, rdata.desc); } switch (cmd) { case BC_INCREFS: debug_string = "IncRefs"; break; case BC_ACQUIRE: debug_string = "Acquire"; break; case BC_RELEASE: debug_string = "Release"; break; case BC_DECREFS: default: debug_string = "DecRefs"; break; } if (ret) { binder_user_error("%d:%d %s %d refcount change on invalid ref %d ret %d\n", proc->pid, thread->pid, debug_string, strong, target, ret); break; } binder_debug(BINDER_DEBUG_USER_REFS, "%d:%d %s ref %d desc %d s %d w %d\n", proc->pid, thread->pid, debug_string, rdata.debug_id, rdata.desc, rdata.strong, rdata.weak); break; } case BC_INCREFS_DONE: case BC_ACQUIRE_DONE: { binder_uintptr_t node_ptr; binder_uintptr_t cookie; struct binder_node *node; bool free_node; if (get_user(node_ptr, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); node = binder_get_node(proc, node_ptr); if (node == NULL) { binder_user_error("%d:%d %s u%016llx no match\n", proc->pid, thread->pid, cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE", (u64)node_ptr); break; } if (cookie != node->cookie) { binder_user_error("%d:%d %s u%016llx node %d cookie mismatch %016llx != %016llx\n", proc->pid, thread->pid, cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE", (u64)node_ptr, node->debug_id, (u64)cookie, (u64)node->cookie); binder_put_node(node); break; } binder_node_inner_lock(node); if (cmd == BC_ACQUIRE_DONE) { if (node->pending_strong_ref == 0) { binder_user_error("%d:%d BC_ACQUIRE_DONE node %d has no pending acquire request\n", proc->pid, thread->pid, node->debug_id); binder_node_inner_unlock(node); binder_put_node(node); break; } node->pending_strong_ref = 0; } else { if (node->pending_weak_ref == 0) { binder_user_error("%d:%d BC_INCREFS_DONE node %d has no pending increfs request\n", proc->pid, thread->pid, node->debug_id); binder_node_inner_unlock(node); binder_put_node(node); break; } node->pending_weak_ref = 0; } free_node = binder_dec_node_nilocked(node, cmd == BC_ACQUIRE_DONE, 0); WARN_ON(free_node); binder_debug(BINDER_DEBUG_USER_REFS, "%d:%d %s node %d ls %d lw %d tr %d\n", proc->pid, thread->pid, cmd == BC_INCREFS_DONE ? "BC_INCREFS_DONE" : "BC_ACQUIRE_DONE", node->debug_id, node->local_strong_refs, node->local_weak_refs, node->tmp_refs); binder_node_inner_unlock(node); binder_put_node(node); break; } case BC_ATTEMPT_ACQUIRE: pr_err("BC_ATTEMPT_ACQUIRE not supported\n"); return -EINVAL; case BC_ACQUIRE_RESULT: pr_err("BC_ACQUIRE_RESULT not supported\n"); return -EINVAL; case BC_FREE_BUFFER: { binder_uintptr_t data_ptr; struct binder_buffer *buffer; if (get_user(data_ptr, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); buffer = binder_alloc_prepare_to_free(&proc->alloc, data_ptr); if (IS_ERR_OR_NULL(buffer)) { if (PTR_ERR(buffer) == -EPERM) { binder_user_error( "%d:%d BC_FREE_BUFFER matched unreturned or currently freeing buffer at offset %lx\n", proc->pid, thread->pid, (unsigned long)data_ptr - proc->alloc.vm_start); } else { binder_user_error( "%d:%d BC_FREE_BUFFER no match for buffer at offset %lx\n", proc->pid, thread->pid, (unsigned long)data_ptr - proc->alloc.vm_start); } break; } binder_debug(BINDER_DEBUG_FREE_BUFFER, "%d:%d BC_FREE_BUFFER at offset %lx found buffer %d for %s transaction\n", proc->pid, thread->pid, (unsigned long)data_ptr - proc->alloc.vm_start, buffer->debug_id, buffer->transaction ? "active" : "finished"); binder_free_buf(proc, thread, buffer, false); break; } case BC_TRANSACTION_SG: case BC_REPLY_SG: { struct binder_transaction_data_sg tr; if (copy_from_user(&tr, ptr, sizeof(tr))) return -EFAULT; ptr += sizeof(tr); binder_transaction(proc, thread, &tr.transaction_data, cmd == BC_REPLY_SG, tr.buffers_size); break; } case BC_TRANSACTION: case BC_REPLY: { struct binder_transaction_data tr; if (copy_from_user(&tr, ptr, sizeof(tr))) return -EFAULT; ptr += sizeof(tr); binder_transaction(proc, thread, &tr, cmd == BC_REPLY, 0); break; } case BC_REGISTER_LOOPER: binder_debug(BINDER_DEBUG_THREADS, "%d:%d BC_REGISTER_LOOPER\n", proc->pid, thread->pid); binder_inner_proc_lock(proc); if (thread->looper & BINDER_LOOPER_STATE_ENTERED) { thread->looper |= BINDER_LOOPER_STATE_INVALID; binder_user_error("%d:%d ERROR: BC_REGISTER_LOOPER called after BC_ENTER_LOOPER\n", proc->pid, thread->pid); } else if (proc->requested_threads == 0) { thread->looper |= BINDER_LOOPER_STATE_INVALID; binder_user_error("%d:%d ERROR: BC_REGISTER_LOOPER called without request\n", proc->pid, thread->pid); } else { proc->requested_threads--; proc->requested_threads_started++; } thread->looper |= BINDER_LOOPER_STATE_REGISTERED; binder_inner_proc_unlock(proc); break; case BC_ENTER_LOOPER: binder_debug(BINDER_DEBUG_THREADS, "%d:%d BC_ENTER_LOOPER\n", proc->pid, thread->pid); if (thread->looper & BINDER_LOOPER_STATE_REGISTERED) { thread->looper |= BINDER_LOOPER_STATE_INVALID; binder_user_error("%d:%d ERROR: BC_ENTER_LOOPER called after BC_REGISTER_LOOPER\n", proc->pid, thread->pid); } thread->looper |= BINDER_LOOPER_STATE_ENTERED; break; case BC_EXIT_LOOPER: binder_debug(BINDER_DEBUG_THREADS, "%d:%d BC_EXIT_LOOPER\n", proc->pid, thread->pid); thread->looper |= BINDER_LOOPER_STATE_EXITED; break; case BC_REQUEST_DEATH_NOTIFICATION: case BC_CLEAR_DEATH_NOTIFICATION: { uint32_t target; binder_uintptr_t cookie; struct binder_ref *ref; struct binder_ref_death *death = NULL; if (get_user(target, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); if (cmd == BC_REQUEST_DEATH_NOTIFICATION) { /* * Allocate memory for death notification * before taking lock */ death = kzalloc(sizeof(*death), GFP_KERNEL); if (death == NULL) { WARN_ON(thread->return_error.cmd != BR_OK); thread->return_error.cmd = BR_ERROR; binder_enqueue_thread_work( thread, &thread->return_error.work); binder_debug( BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d BC_REQUEST_DEATH_NOTIFICATION failed\n", proc->pid, thread->pid); break; } } binder_proc_lock(proc); ref = binder_get_ref_olocked(proc, target, false); if (ref == NULL) { binder_user_error("%d:%d %s invalid ref %d\n", proc->pid, thread->pid, cmd == BC_REQUEST_DEATH_NOTIFICATION ? "BC_REQUEST_DEATH_NOTIFICATION" : "BC_CLEAR_DEATH_NOTIFICATION", target); binder_proc_unlock(proc); kfree(death); break; } binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION, "%d:%d %s %016llx ref %d desc %d s %d w %d for node %d\n", proc->pid, thread->pid, cmd == BC_REQUEST_DEATH_NOTIFICATION ? "BC_REQUEST_DEATH_NOTIFICATION" : "BC_CLEAR_DEATH_NOTIFICATION", (u64)cookie, ref->data.debug_id, ref->data.desc, ref->data.strong, ref->data.weak, ref->node->debug_id); binder_node_lock(ref->node); if (cmd == BC_REQUEST_DEATH_NOTIFICATION) { if (ref->death) { binder_user_error("%d:%d BC_REQUEST_DEATH_NOTIFICATION death notification already set\n", proc->pid, thread->pid); binder_node_unlock(ref->node); binder_proc_unlock(proc); kfree(death); break; } binder_stats_created(BINDER_STAT_DEATH); INIT_LIST_HEAD(&death->work.entry); death->cookie = cookie; ref->death = death; if (ref->node->proc == NULL) { ref->death->work.type = BINDER_WORK_DEAD_BINDER; binder_inner_proc_lock(proc); binder_enqueue_work_ilocked( &ref->death->work, &proc->todo); binder_wakeup_proc_ilocked(proc); binder_inner_proc_unlock(proc); } } else { if (ref->death == NULL) { binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification not active\n", proc->pid, thread->pid); binder_node_unlock(ref->node); binder_proc_unlock(proc); break; } death = ref->death; if (death->cookie != cookie) { binder_user_error("%d:%d BC_CLEAR_DEATH_NOTIFICATION death notification cookie mismatch %016llx != %016llx\n", proc->pid, thread->pid, (u64)death->cookie, (u64)cookie); binder_node_unlock(ref->node); binder_proc_unlock(proc); break; } ref->death = NULL; binder_inner_proc_lock(proc); if (list_empty(&death->work.entry)) { death->work.type = BINDER_WORK_CLEAR_DEATH_NOTIFICATION; if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) binder_enqueue_thread_work_ilocked( thread, &death->work); else { binder_enqueue_work_ilocked( &death->work, &proc->todo); binder_wakeup_proc_ilocked( proc); } } else { BUG_ON(death->work.type != BINDER_WORK_DEAD_BINDER); death->work.type = BINDER_WORK_DEAD_BINDER_AND_CLEAR; } binder_inner_proc_unlock(proc); } binder_node_unlock(ref->node); binder_proc_unlock(proc); } break; case BC_DEAD_BINDER_DONE: { struct binder_work *w; binder_uintptr_t cookie; struct binder_ref_death *death = NULL; if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(cookie); binder_inner_proc_lock(proc); list_for_each_entry(w, &proc->delivered_death, entry) { struct binder_ref_death *tmp_death = container_of(w, struct binder_ref_death, work); if (tmp_death->cookie == cookie) { death = tmp_death; break; } } binder_debug(BINDER_DEBUG_DEAD_BINDER, "%d:%d BC_DEAD_BINDER_DONE %016llx found %pK\n", proc->pid, thread->pid, (u64)cookie, death); if (death == NULL) { binder_user_error("%d:%d BC_DEAD_BINDER_DONE %016llx not found\n", proc->pid, thread->pid, (u64)cookie); binder_inner_proc_unlock(proc); break; } binder_dequeue_work_ilocked(&death->work); if (death->work.type == BINDER_WORK_DEAD_BINDER_AND_CLEAR) { death->work.type = BINDER_WORK_CLEAR_DEATH_NOTIFICATION; if (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) binder_enqueue_thread_work_ilocked( thread, &death->work); else { binder_enqueue_work_ilocked( &death->work, &proc->todo); binder_wakeup_proc_ilocked(proc); } } binder_inner_proc_unlock(proc); } break; case BC_REQUEST_FREEZE_NOTIFICATION: { struct binder_handle_cookie handle_cookie; int error; if (copy_from_user(&handle_cookie, ptr, sizeof(handle_cookie))) return -EFAULT; ptr += sizeof(handle_cookie); error = binder_request_freeze_notification(proc, thread, &handle_cookie); if (error) return error; } break; case BC_CLEAR_FREEZE_NOTIFICATION: { struct binder_handle_cookie handle_cookie; int error; if (copy_from_user(&handle_cookie, ptr, sizeof(handle_cookie))) return -EFAULT; ptr += sizeof(handle_cookie); error = binder_clear_freeze_notification(proc, thread, &handle_cookie); if (error) return error; } break; case BC_FREEZE_NOTIFICATION_DONE: { binder_uintptr_t cookie; int error; if (get_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(cookie); error = binder_freeze_notification_done(proc, thread, cookie); if (error) return error; } break; default: pr_err("%d:%d unknown command %u\n", proc->pid, thread->pid, cmd); return -EINVAL; } *consumed = ptr - buffer; } return 0; } static void binder_stat_br(struct binder_proc *proc, struct binder_thread *thread, uint32_t cmd) { trace_binder_return(cmd); if (_IOC_NR(cmd) < ARRAY_SIZE(binder_stats.br)) { atomic_inc(&binder_stats.br[_IOC_NR(cmd)]); atomic_inc(&proc->stats.br[_IOC_NR(cmd)]); atomic_inc(&thread->stats.br[_IOC_NR(cmd)]); } } static int binder_put_node_cmd(struct binder_proc *proc, struct binder_thread *thread, void __user **ptrp, binder_uintptr_t node_ptr, binder_uintptr_t node_cookie, int node_debug_id, uint32_t cmd, const char *cmd_name) { void __user *ptr = *ptrp; if (put_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); if (put_user(node_ptr, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); if (put_user(node_cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); binder_stat_br(proc, thread, cmd); binder_debug(BINDER_DEBUG_USER_REFS, "%d:%d %s %d u%016llx c%016llx\n", proc->pid, thread->pid, cmd_name, node_debug_id, (u64)node_ptr, (u64)node_cookie); *ptrp = ptr; return 0; } static int binder_wait_for_work(struct binder_thread *thread, bool do_proc_work) { DEFINE_WAIT(wait); struct binder_proc *proc = thread->proc; int ret = 0; binder_inner_proc_lock(proc); for (;;) { prepare_to_wait(&thread->wait, &wait, TASK_INTERRUPTIBLE|TASK_FREEZABLE); if (binder_has_work_ilocked(thread, do_proc_work)) break; if (do_proc_work) list_add(&thread->waiting_thread_node, &proc->waiting_threads); binder_inner_proc_unlock(proc); schedule(); binder_inner_proc_lock(proc); list_del_init(&thread->waiting_thread_node); if (signal_pending(current)) { ret = -EINTR; break; } } finish_wait(&thread->wait, &wait); binder_inner_proc_unlock(proc); return ret; } /** * binder_apply_fd_fixups() - finish fd translation * @proc: binder_proc associated @t->buffer * @t: binder transaction with list of fd fixups * * Now that we are in the context of the transaction target * process, we can allocate and install fds. Process the * list of fds to translate and fixup the buffer with the * new fds first and only then install the files. * * If we fail to allocate an fd, skip the install and release * any fds that have already been allocated. */ static int binder_apply_fd_fixups(struct binder_proc *proc, struct binder_transaction *t) { struct binder_txn_fd_fixup *fixup, *tmp; int ret = 0; list_for_each_entry(fixup, &t->fd_fixups, fixup_entry) { int fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { binder_debug(BINDER_DEBUG_TRANSACTION, "failed fd fixup txn %d fd %d\n", t->debug_id, fd); ret = -ENOMEM; goto err; } binder_debug(BINDER_DEBUG_TRANSACTION, "fd fixup txn %d fd %d\n", t->debug_id, fd); trace_binder_transaction_fd_recv(t, fd, fixup->offset); fixup->target_fd = fd; if (binder_alloc_copy_to_buffer(&proc->alloc, t->buffer, fixup->offset, &fd, sizeof(u32))) { ret = -EINVAL; goto err; } } list_for_each_entry_safe(fixup, tmp, &t->fd_fixups, fixup_entry) { fd_install(fixup->target_fd, fixup->file); list_del(&fixup->fixup_entry); kfree(fixup); } return ret; err: binder_free_txn_fixups(t); return ret; } static int binder_thread_read(struct binder_proc *proc, struct binder_thread *thread, binder_uintptr_t binder_buffer, size_t size, binder_size_t *consumed, int non_block) { void __user *buffer = (void __user *)(uintptr_t)binder_buffer; void __user *ptr = buffer + *consumed; void __user *end = buffer + size; int ret = 0; int wait_for_proc_work; if (*consumed == 0) { if (put_user(BR_NOOP, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); } retry: binder_inner_proc_lock(proc); wait_for_proc_work = binder_available_for_proc_work_ilocked(thread); binder_inner_proc_unlock(proc); thread->looper |= BINDER_LOOPER_STATE_WAITING; trace_binder_wait_for_work(wait_for_proc_work, !!thread->transaction_stack, !binder_worklist_empty(proc, &thread->todo)); if (wait_for_proc_work) { if (!(thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED))) { binder_user_error("%d:%d ERROR: Thread waiting for process work before calling BC_REGISTER_LOOPER or BC_ENTER_LOOPER (state %x)\n", proc->pid, thread->pid, thread->looper); wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); } binder_set_nice(proc->default_priority); } if (non_block) { if (!binder_has_work(thread, wait_for_proc_work)) ret = -EAGAIN; } else { ret = binder_wait_for_work(thread, wait_for_proc_work); } thread->looper &= ~BINDER_LOOPER_STATE_WAITING; if (ret) return ret; while (1) { uint32_t cmd; struct binder_transaction_data_secctx tr; struct binder_transaction_data *trd = &tr.transaction_data; struct binder_work *w = NULL; struct list_head *list = NULL; struct binder_transaction *t = NULL; struct binder_thread *t_from; size_t trsize = sizeof(*trd); binder_inner_proc_lock(proc); if (!binder_worklist_empty_ilocked(&thread->todo)) list = &thread->todo; else if (!binder_worklist_empty_ilocked(&proc->todo) && wait_for_proc_work) list = &proc->todo; else { binder_inner_proc_unlock(proc); /* no data added */ if (ptr - buffer == 4 && !thread->looper_need_return) goto retry; break; } if (end - ptr < sizeof(tr) + 4) { binder_inner_proc_unlock(proc); break; } w = binder_dequeue_work_head_ilocked(list); if (binder_worklist_empty_ilocked(&thread->todo)) thread->process_todo = false; switch (w->type) { case BINDER_WORK_TRANSACTION: { binder_inner_proc_unlock(proc); t = container_of(w, struct binder_transaction, work); } break; case BINDER_WORK_RETURN_ERROR: { struct binder_error *e = container_of( w, struct binder_error, work); WARN_ON(e->cmd == BR_OK); binder_inner_proc_unlock(proc); if (put_user(e->cmd, (uint32_t __user *)ptr)) return -EFAULT; cmd = e->cmd; e->cmd = BR_OK; ptr += sizeof(uint32_t); binder_stat_br(proc, thread, cmd); } break; case BINDER_WORK_TRANSACTION_COMPLETE: case BINDER_WORK_TRANSACTION_PENDING: case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: { if (proc->oneway_spam_detection_enabled && w->type == BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT) cmd = BR_ONEWAY_SPAM_SUSPECT; else if (w->type == BINDER_WORK_TRANSACTION_PENDING) cmd = BR_TRANSACTION_PENDING_FROZEN; else cmd = BR_TRANSACTION_COMPLETE; binder_inner_proc_unlock(proc); kfree(w); binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); if (put_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); binder_stat_br(proc, thread, cmd); binder_debug(BINDER_DEBUG_TRANSACTION_COMPLETE, "%d:%d BR_TRANSACTION_COMPLETE\n", proc->pid, thread->pid); } break; case BINDER_WORK_NODE: { struct binder_node *node = container_of(w, struct binder_node, work); int strong, weak; binder_uintptr_t node_ptr = node->ptr; binder_uintptr_t node_cookie = node->cookie; int node_debug_id = node->debug_id; int has_weak_ref; int has_strong_ref; void __user *orig_ptr = ptr; BUG_ON(proc != node->proc); strong = node->internal_strong_refs || node->local_strong_refs; weak = !hlist_empty(&node->refs) || node->local_weak_refs || node->tmp_refs || strong; has_strong_ref = node->has_strong_ref; has_weak_ref = node->has_weak_ref; if (weak && !has_weak_ref) { node->has_weak_ref = 1; node->pending_weak_ref = 1; node->local_weak_refs++; } if (strong && !has_strong_ref) { node->has_strong_ref = 1; node->pending_strong_ref = 1; node->local_strong_refs++; } if (!strong && has_strong_ref) node->has_strong_ref = 0; if (!weak && has_weak_ref) node->has_weak_ref = 0; if (!weak && !strong) { binder_debug(BINDER_DEBUG_INTERNAL_REFS, "%d:%d node %d u%016llx c%016llx deleted\n", proc->pid, thread->pid, node_debug_id, (u64)node_ptr, (u64)node_cookie); rb_erase(&node->rb_node, &proc->nodes); binder_inner_proc_unlock(proc); binder_node_lock(node); /* * Acquire the node lock before freeing the * node to serialize with other threads that * may have been holding the node lock while * decrementing this node (avoids race where * this thread frees while the other thread * is unlocking the node after the final * decrement) */ binder_node_unlock(node); binder_free_node(node); } else binder_inner_proc_unlock(proc); if (weak && !has_weak_ref) ret = binder_put_node_cmd( proc, thread, &ptr, node_ptr, node_cookie, node_debug_id, BR_INCREFS, "BR_INCREFS"); if (!ret && strong && !has_strong_ref) ret = binder_put_node_cmd( proc, thread, &ptr, node_ptr, node_cookie, node_debug_id, BR_ACQUIRE, "BR_ACQUIRE"); if (!ret && !strong && has_strong_ref) ret = binder_put_node_cmd( proc, thread, &ptr, node_ptr, node_cookie, node_debug_id, BR_RELEASE, "BR_RELEASE"); if (!ret && !weak && has_weak_ref) ret = binder_put_node_cmd( proc, thread, &ptr, node_ptr, node_cookie, node_debug_id, BR_DECREFS, "BR_DECREFS"); if (orig_ptr == ptr) binder_debug(BINDER_DEBUG_INTERNAL_REFS, "%d:%d node %d u%016llx c%016llx state unchanged\n", proc->pid, thread->pid, node_debug_id, (u64)node_ptr, (u64)node_cookie); if (ret) return ret; } break; case BINDER_WORK_DEAD_BINDER: case BINDER_WORK_DEAD_BINDER_AND_CLEAR: case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: { struct binder_ref_death *death; uint32_t cmd; binder_uintptr_t cookie; death = container_of(w, struct binder_ref_death, work); if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) cmd = BR_CLEAR_DEATH_NOTIFICATION_DONE; else cmd = BR_DEAD_BINDER; cookie = death->cookie; binder_debug(BINDER_DEBUG_DEATH_NOTIFICATION, "%d:%d %s %016llx\n", proc->pid, thread->pid, cmd == BR_DEAD_BINDER ? "BR_DEAD_BINDER" : "BR_CLEAR_DEATH_NOTIFICATION_DONE", (u64)cookie); if (w->type == BINDER_WORK_CLEAR_DEATH_NOTIFICATION) { binder_inner_proc_unlock(proc); kfree(death); binder_stats_deleted(BINDER_STAT_DEATH); } else { binder_enqueue_work_ilocked( w, &proc->delivered_death); binder_inner_proc_unlock(proc); } if (put_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); if (put_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); binder_stat_br(proc, thread, cmd); if (cmd == BR_DEAD_BINDER) goto done; /* DEAD_BINDER notifications can cause transactions */ } break; case BINDER_WORK_FROZEN_BINDER: { struct binder_ref_freeze *freeze; struct binder_frozen_state_info info; memset(&info, 0, sizeof(info)); freeze = container_of(w, struct binder_ref_freeze, work); info.is_frozen = freeze->is_frozen; info.cookie = freeze->cookie; freeze->sent = true; binder_enqueue_work_ilocked(w, &proc->delivered_freeze); binder_inner_proc_unlock(proc); if (put_user(BR_FROZEN_BINDER, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); if (copy_to_user(ptr, &info, sizeof(info))) return -EFAULT; ptr += sizeof(info); binder_stat_br(proc, thread, BR_FROZEN_BINDER); goto done; /* BR_FROZEN_BINDER notifications can cause transactions */ } break; case BINDER_WORK_CLEAR_FREEZE_NOTIFICATION: { struct binder_ref_freeze *freeze = container_of(w, struct binder_ref_freeze, work); binder_uintptr_t cookie = freeze->cookie; binder_inner_proc_unlock(proc); kfree(freeze); binder_stats_deleted(BINDER_STAT_FREEZE); if (put_user(BR_CLEAR_FREEZE_NOTIFICATION_DONE, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); if (put_user(cookie, (binder_uintptr_t __user *)ptr)) return -EFAULT; ptr += sizeof(binder_uintptr_t); binder_stat_br(proc, thread, BR_CLEAR_FREEZE_NOTIFICATION_DONE); } break; default: binder_inner_proc_unlock(proc); pr_err("%d:%d: bad work type %d\n", proc->pid, thread->pid, w->type); break; } if (!t) continue; BUG_ON(t->buffer == NULL); if (t->buffer->target_node) { struct binder_node *target_node = t->buffer->target_node; trd->target.ptr = target_node->ptr; trd->cookie = target_node->cookie; t->saved_priority = task_nice(current); if (t->priority < target_node->min_priority && !(t->flags & TF_ONE_WAY)) binder_set_nice(t->priority); else if (!(t->flags & TF_ONE_WAY) || t->saved_priority > target_node->min_priority) binder_set_nice(target_node->min_priority); cmd = BR_TRANSACTION; } else { trd->target.ptr = 0; trd->cookie = 0; cmd = BR_REPLY; } trd->code = t->code; trd->flags = t->flags; trd->sender_euid = from_kuid(current_user_ns(), t->sender_euid); t_from = binder_get_txn_from(t); if (t_from) { struct task_struct *sender = t_from->proc->tsk; trd->sender_pid = task_tgid_nr_ns(sender, task_active_pid_ns(current)); } else { trd->sender_pid = 0; } ret = binder_apply_fd_fixups(proc, t); if (ret) { struct binder_buffer *buffer = t->buffer; bool oneway = !!(t->flags & TF_ONE_WAY); int tid = t->debug_id; if (t_from) binder_thread_dec_tmpref(t_from); buffer->transaction = NULL; binder_cleanup_transaction(t, "fd fixups failed", BR_FAILED_REPLY); binder_free_buf(proc, thread, buffer, true); binder_debug(BINDER_DEBUG_FAILED_TRANSACTION, "%d:%d %stransaction %d fd fixups failed %d/%d, line %d\n", proc->pid, thread->pid, oneway ? "async " : (cmd == BR_REPLY ? "reply " : ""), tid, BR_FAILED_REPLY, ret, __LINE__); if (cmd == BR_REPLY) { cmd = BR_FAILED_REPLY; if (put_user(cmd, (uint32_t __user *)ptr)) return -EFAULT; ptr += sizeof(uint32_t); binder_stat_br(proc, thread, cmd); break; } continue; } trd->data_size = t->buffer->data_size; trd->offsets_size = t->buffer->offsets_size; trd->data.ptr.buffer = t->buffer->user_data; trd->data.ptr.offsets = trd->data.ptr.buffer + ALIGN(t->buffer->data_size, sizeof(void *)); tr.secctx = t->security_ctx; if (t->security_ctx) { cmd = BR_TRANSACTION_SEC_CTX; trsize = sizeof(tr); } if (put_user(cmd, (uint32_t __user *)ptr)) { if (t_from) binder_thread_dec_tmpref(t_from); binder_cleanup_transaction(t, "put_user failed", BR_FAILED_REPLY); return -EFAULT; } ptr += sizeof(uint32_t); if (copy_to_user(ptr, &tr, trsize)) { if (t_from) binder_thread_dec_tmpref(t_from); binder_cleanup_transaction(t, "copy_to_user failed", BR_FAILED_REPLY); return -EFAULT; } ptr += trsize; trace_binder_transaction_received(t); binder_stat_br(proc, thread, cmd); binder_debug(BINDER_DEBUG_TRANSACTION, "%d:%d %s %d %d:%d, cmd %u size %zd-%zd\n", proc->pid, thread->pid, (cmd == BR_TRANSACTION) ? "BR_TRANSACTION" : (cmd == BR_TRANSACTION_SEC_CTX) ? "BR_TRANSACTION_SEC_CTX" : "BR_REPLY", t->debug_id, t_from ? t_from->proc->pid : 0, t_from ? t_from->pid : 0, cmd, t->buffer->data_size, t->buffer->offsets_size); if (t_from) binder_thread_dec_tmpref(t_from); t->buffer->allow_user_free = 1; if (cmd != BR_REPLY && !(t->flags & TF_ONE_WAY)) { binder_inner_proc_lock(thread->proc); t->to_parent = thread->transaction_stack; t->to_thread = thread; thread->transaction_stack = t; binder_inner_proc_unlock(thread->proc); } else { binder_free_transaction(t); } break; } done: *consumed = ptr - buffer; binder_inner_proc_lock(proc); if (proc->requested_threads == 0 && list_empty(&thread->proc->waiting_threads) && proc->requested_threads_started < proc->max_threads && (thread->looper & (BINDER_LOOPER_STATE_REGISTERED | BINDER_LOOPER_STATE_ENTERED)) /* the user-space code fails to */ /*spawn a new thread if we leave this out */) { proc->requested_threads++; binder_inner_proc_unlock(proc); binder_debug(BINDER_DEBUG_THREADS, "%d:%d BR_SPAWN_LOOPER\n", proc->pid, thread->pid); if (put_user(BR_SPAWN_LOOPER, (uint32_t __user *)buffer)) return -EFAULT; binder_stat_br(proc, thread, BR_SPAWN_LOOPER); } else binder_inner_proc_unlock(proc); return 0; } static void binder_release_work(struct binder_proc *proc, struct list_head *list) { struct binder_work *w; enum binder_work_type wtype; while (1) { binder_inner_proc_lock(proc); w = binder_dequeue_work_head_ilocked(list); wtype = w ? w->type : 0; binder_inner_proc_unlock(proc); if (!w) return; switch (wtype) { case BINDER_WORK_TRANSACTION: { struct binder_transaction *t; t = container_of(w, struct binder_transaction, work); binder_cleanup_transaction(t, "process died.", BR_DEAD_REPLY); } break; case BINDER_WORK_RETURN_ERROR: { struct binder_error *e = container_of( w, struct binder_error, work); binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "undelivered TRANSACTION_ERROR: %u\n", e->cmd); } break; case BINDER_WORK_TRANSACTION_PENDING: case BINDER_WORK_TRANSACTION_ONEWAY_SPAM_SUSPECT: case BINDER_WORK_TRANSACTION_COMPLETE: { binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "undelivered TRANSACTION_COMPLETE\n"); kfree(w); binder_stats_deleted(BINDER_STAT_TRANSACTION_COMPLETE); } break; case BINDER_WORK_DEAD_BINDER_AND_CLEAR: case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: { struct binder_ref_death *death; death = container_of(w, struct binder_ref_death, work); binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "undelivered death notification, %016llx\n", (u64)death->cookie); kfree(death); binder_stats_deleted(BINDER_STAT_DEATH); } break; case BINDER_WORK_NODE: break; case BINDER_WORK_CLEAR_FREEZE_NOTIFICATION: { struct binder_ref_freeze *freeze; freeze = container_of(w, struct binder_ref_freeze, work); binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "undelivered freeze notification, %016llx\n", (u64)freeze->cookie); kfree(freeze); binder_stats_deleted(BINDER_STAT_FREEZE); } break; default: pr_err("unexpected work type, %d, not freed\n", wtype); break; } } } static struct binder_thread *binder_get_thread_ilocked( struct binder_proc *proc, struct binder_thread *new_thread) { struct binder_thread *thread = NULL; struct rb_node *parent = NULL; struct rb_node **p = &proc->threads.rb_node; while (*p) { parent = *p; thread = rb_entry(parent, struct binder_thread, rb_node); if (current->pid < thread->pid) p = &(*p)->rb_left; else if (current->pid > thread->pid) p = &(*p)->rb_right; else return thread; } if (!new_thread) return NULL; thread = new_thread; binder_stats_created(BINDER_STAT_THREAD); thread->proc = proc; thread->pid = current->pid; atomic_set(&thread->tmp_ref, 0); init_waitqueue_head(&thread->wait); INIT_LIST_HEAD(&thread->todo); rb_link_node(&thread->rb_node, parent, p); rb_insert_color(&thread->rb_node, &proc->threads); thread->looper_need_return = true; thread->return_error.work.type = BINDER_WORK_RETURN_ERROR; thread->return_error.cmd = BR_OK; thread->reply_error.work.type = BINDER_WORK_RETURN_ERROR; thread->reply_error.cmd = BR_OK; thread->ee.command = BR_OK; INIT_LIST_HEAD(&new_thread->waiting_thread_node); return thread; } static struct binder_thread *binder_get_thread(struct binder_proc *proc) { struct binder_thread *thread; struct binder_thread *new_thread; binder_inner_proc_lock(proc); thread = binder_get_thread_ilocked(proc, NULL); binder_inner_proc_unlock(proc); if (!thread) { new_thread = kzalloc(sizeof(*thread), GFP_KERNEL); if (new_thread == NULL) return NULL; binder_inner_proc_lock(proc); thread = binder_get_thread_ilocked(proc, new_thread); binder_inner_proc_unlock(proc); if (thread != new_thread) kfree(new_thread); } return thread; } static void binder_free_proc(struct binder_proc *proc) { struct binder_device *device; BUG_ON(!list_empty(&proc->todo)); BUG_ON(!list_empty(&proc->delivered_death)); if (proc->outstanding_txns) pr_warn("%s: Unexpected outstanding_txns %d\n", __func__, proc->outstanding_txns); device = container_of(proc->context, struct binder_device, context); if (refcount_dec_and_test(&device->ref)) { binder_remove_device(device); kfree(proc->context->name); kfree(device); } binder_alloc_deferred_release(&proc->alloc); put_task_struct(proc->tsk); put_cred(proc->cred); binder_stats_deleted(BINDER_STAT_PROC); dbitmap_free(&proc->dmap); kfree(proc); } static void binder_free_thread(struct binder_thread *thread) { BUG_ON(!list_empty(&thread->todo)); binder_stats_deleted(BINDER_STAT_THREAD); binder_proc_dec_tmpref(thread->proc); kfree(thread); } static int binder_thread_release(struct binder_proc *proc, struct binder_thread *thread) { struct binder_transaction *t; struct binder_transaction *send_reply = NULL; int active_transactions = 0; struct binder_transaction *last_t = NULL; binder_inner_proc_lock(thread->proc); /* * take a ref on the proc so it survives * after we remove this thread from proc->threads. * The corresponding dec is when we actually * free the thread in binder_free_thread() */ proc->tmp_ref++; /* * take a ref on this thread to ensure it * survives while we are releasing it */ atomic_inc(&thread->tmp_ref); rb_erase(&thread->rb_node, &proc->threads); t = thread->transaction_stack; if (t) { spin_lock(&t->lock); if (t->to_thread == thread) send_reply = t; } else { __acquire(&t->lock); } thread->is_dead = true; while (t) { last_t = t; active_transactions++; binder_debug(BINDER_DEBUG_DEAD_TRANSACTION, "release %d:%d transaction %d %s, still active\n", proc->pid, thread->pid, t->debug_id, (t->to_thread == thread) ? "in" : "out"); if (t->to_thread == thread) { thread->proc->outstanding_txns--; t->to_proc = NULL; t->to_thread = NULL; if (t->buffer) { t->buffer->transaction = NULL; t->buffer = NULL; } t = t->to_parent; } else if (t->from == thread) { t->from = NULL; t = t->from_parent; } else BUG(); spin_unlock(&last_t->lock); if (t) spin_lock(&t->lock); else __acquire(&t->lock); } /* annotation for sparse, lock not acquired in last iteration above */ __release(&t->lock); /* * If this thread used poll, make sure we remove the waitqueue from any * poll data structures holding it. */ if (thread->looper & BINDER_LOOPER_STATE_POLL) wake_up_pollfree(&thread->wait); binder_inner_proc_unlock(thread->proc); /* * This is needed to avoid races between wake_up_pollfree() above and * someone else removing the last entry from the queue for other reasons * (e.g. ep_remove_wait_queue() being called due to an epoll file * descriptor being closed). Such other users hold an RCU read lock, so * we can be sure they're done after we call synchronize_rcu(). */ if (thread->looper & BINDER_LOOPER_STATE_POLL) synchronize_rcu(); if (send_reply) binder_send_failed_reply(send_reply, BR_DEAD_REPLY); binder_release_work(proc, &thread->todo); binder_thread_dec_tmpref(thread); return active_transactions; } static __poll_t binder_poll(struct file *filp, struct poll_table_struct *wait) { struct binder_proc *proc = filp->private_data; struct binder_thread *thread = NULL; bool wait_for_proc_work; thread = binder_get_thread(proc); if (!thread) return EPOLLERR; binder_inner_proc_lock(thread->proc); thread->looper |= BINDER_LOOPER_STATE_POLL; wait_for_proc_work = binder_available_for_proc_work_ilocked(thread); binder_inner_proc_unlock(thread->proc); poll_wait(filp, &thread->wait, wait); if (binder_has_work(thread, wait_for_proc_work)) return EPOLLIN; return 0; } static int binder_ioctl_write_read(struct file *filp, unsigned long arg, struct binder_thread *thread) { int ret = 0; struct binder_proc *proc = filp->private_data; void __user *ubuf = (void __user *)arg; struct binder_write_read bwr; if (copy_from_user(&bwr, ubuf, sizeof(bwr))) return -EFAULT; binder_debug(BINDER_DEBUG_READ_WRITE, "%d:%d write %lld at %016llx, read %lld at %016llx\n", proc->pid, thread->pid, (u64)bwr.write_size, (u64)bwr.write_buffer, (u64)bwr.read_size, (u64)bwr.read_buffer); if (bwr.write_size > 0) { ret = binder_thread_write(proc, thread, bwr.write_buffer, bwr.write_size, &bwr.write_consumed); trace_binder_write_done(ret); if (ret < 0) { bwr.read_consumed = 0; goto out; } } if (bwr.read_size > 0) { ret = binder_thread_read(proc, thread, bwr.read_buffer, bwr.read_size, &bwr.read_consumed, filp->f_flags & O_NONBLOCK); trace_binder_read_done(ret); binder_inner_proc_lock(proc); if (!binder_worklist_empty_ilocked(&proc->todo)) binder_wakeup_proc_ilocked(proc); binder_inner_proc_unlock(proc); if (ret < 0) goto out; } binder_debug(BINDER_DEBUG_READ_WRITE, "%d:%d wrote %lld of %lld, read return %lld of %lld\n", proc->pid, thread->pid, (u64)bwr.write_consumed, (u64)bwr.write_size, (u64)bwr.read_consumed, (u64)bwr.read_size); out: if (copy_to_user(ubuf, &bwr, sizeof(bwr))) ret = -EFAULT; return ret; } static int binder_ioctl_set_ctx_mgr(struct file *filp, struct flat_binder_object *fbo) { int ret = 0; struct binder_proc *proc = filp->private_data; struct binder_context *context = proc->context; struct binder_node *new_node; kuid_t curr_euid = current_euid(); guard(mutex)(&context->context_mgr_node_lock); if (context->binder_context_mgr_node) { pr_err("BINDER_SET_CONTEXT_MGR already set\n"); return -EBUSY; } ret = security_binder_set_context_mgr(proc->cred); if (ret < 0) return ret; if (uid_valid(context->binder_context_mgr_uid)) { if (!uid_eq(context->binder_context_mgr_uid, curr_euid)) { pr_err("BINDER_SET_CONTEXT_MGR bad uid %d != %d\n", from_kuid(&init_user_ns, curr_euid), from_kuid(&init_user_ns, context->binder_context_mgr_uid)); return -EPERM; } } else { context->binder_context_mgr_uid = curr_euid; } new_node = binder_new_node(proc, fbo); if (!new_node) return -ENOMEM; binder_node_lock(new_node); new_node->local_weak_refs++; new_node->local_strong_refs++; new_node->has_strong_ref = 1; new_node->has_weak_ref = 1; context->binder_context_mgr_node = new_node; binder_node_unlock(new_node); binder_put_node(new_node); return ret; } static int binder_ioctl_get_node_info_for_ref(struct binder_proc *proc, struct binder_node_info_for_ref *info) { struct binder_node *node; struct binder_context *context = proc->context; __u32 handle = info->handle; if (info->strong_count || info->weak_count || info->reserved1 || info->reserved2 || info->reserved3) { binder_user_error("%d BINDER_GET_NODE_INFO_FOR_REF: only handle may be non-zero.", proc->pid); return -EINVAL; } /* This ioctl may only be used by the context manager */ mutex_lock(&context->context_mgr_node_lock); if (!context->binder_context_mgr_node || context->binder_context_mgr_node->proc != proc) { mutex_unlock(&context->context_mgr_node_lock); return -EPERM; } mutex_unlock(&context->context_mgr_node_lock); node = binder_get_node_from_ref(proc, handle, true, NULL); if (!node) return -EINVAL; info->strong_count = node->local_strong_refs + node->internal_strong_refs; info->weak_count = node->local_weak_refs; binder_put_node(node); return 0; } static int binder_ioctl_get_node_debug_info(struct binder_proc *proc, struct binder_node_debug_info *info) { struct rb_node *n; binder_uintptr_t ptr = info->ptr; memset(info, 0, sizeof(*info)); binder_inner_proc_lock(proc); for (n = rb_first(&proc->nodes); n != NULL; n = rb_next(n)) { struct binder_node *node = rb_entry(n, struct binder_node, rb_node); if (node->ptr > ptr) { info->ptr = node->ptr; info->cookie = node->cookie; info->has_strong_ref = node->has_strong_ref; info->has_weak_ref = node->has_weak_ref; break; } } binder_inner_proc_unlock(proc); return 0; } static bool binder_txns_pending_ilocked(struct binder_proc *proc) { struct rb_node *n; struct binder_thread *thread; if (proc->outstanding_txns > 0) return true; for (n = rb_first(&proc->threads); n; n = rb_next(n)) { thread = rb_entry(n, struct binder_thread, rb_node); if (thread->transaction_stack) return true; } return false; } static void binder_add_freeze_work(struct binder_proc *proc, bool is_frozen) { struct binder_node *prev = NULL; struct rb_node *n; struct binder_ref *ref; binder_inner_proc_lock(proc); for (n = rb_first(&proc->nodes); n; n = rb_next(n)) { struct binder_node *node; node = rb_entry(n, struct binder_node, rb_node); binder_inc_node_tmpref_ilocked(node); binder_inner_proc_unlock(proc); if (prev) binder_put_node(prev); binder_node_lock(node); hlist_for_each_entry(ref, &node->refs, node_entry) { /* * Need the node lock to synchronize * with new notification requests and the * inner lock to synchronize with queued * freeze notifications. */ binder_inner_proc_lock(ref->proc); if (!ref->freeze) { binder_inner_proc_unlock(ref->proc); continue; } ref->freeze->work.type = BINDER_WORK_FROZEN_BINDER; if (list_empty(&ref->freeze->work.entry)) { ref->freeze->is_frozen = is_frozen; binder_enqueue_work_ilocked(&ref->freeze->work, &ref->proc->todo); binder_wakeup_proc_ilocked(ref->proc); } else { if (ref->freeze->sent && ref->freeze->is_frozen != is_frozen) ref->freeze->resend = true; ref->freeze->is_frozen = is_frozen; } binder_inner_proc_unlock(ref->proc); } prev = node; binder_node_unlock(node); binder_inner_proc_lock(proc); if (proc->is_dead) break; } binder_inner_proc_unlock(proc); if (prev) binder_put_node(prev); } static int binder_ioctl_freeze(struct binder_freeze_info *info, struct binder_proc *target_proc) { int ret = 0; if (!info->enable) { binder_inner_proc_lock(target_proc); target_proc->sync_recv = false; target_proc->async_recv = false; target_proc->is_frozen = false; binder_inner_proc_unlock(target_proc); binder_add_freeze_work(target_proc, false); return 0; } /* * Freezing the target. Prevent new transactions by * setting frozen state. If timeout specified, wait * for transactions to drain. */ binder_inner_proc_lock(target_proc); target_proc->sync_recv = false; target_proc->async_recv = false; target_proc->is_frozen = true; binder_inner_proc_unlock(target_proc); if (info->timeout_ms > 0) ret = wait_event_interruptible_timeout( target_proc->freeze_wait, (!target_proc->outstanding_txns), msecs_to_jiffies(info->timeout_ms)); /* Check pending transactions that wait for reply */ if (ret >= 0) { binder_inner_proc_lock(target_proc); if (binder_txns_pending_ilocked(target_proc)) ret = -EAGAIN; binder_inner_proc_unlock(target_proc); } if (ret < 0) { binder_inner_proc_lock(target_proc); target_proc->is_frozen = false; binder_inner_proc_unlock(target_proc); } else { binder_add_freeze_work(target_proc, true); } return ret; } static int binder_ioctl_get_freezer_info( struct binder_frozen_status_info *info) { struct binder_proc *target_proc; bool found = false; __u32 txns_pending; info->sync_recv = 0; info->async_recv = 0; mutex_lock(&binder_procs_lock); hlist_for_each_entry(target_proc, &binder_procs, proc_node) { if (target_proc->pid == info->pid) { found = true; binder_inner_proc_lock(target_proc); txns_pending = binder_txns_pending_ilocked(target_proc); info->sync_recv |= target_proc->sync_recv | (txns_pending << 1); info->async_recv |= target_proc->async_recv; binder_inner_proc_unlock(target_proc); } } mutex_unlock(&binder_procs_lock); if (!found) return -EINVAL; return 0; } static int binder_ioctl_get_extended_error(struct binder_thread *thread, void __user *ubuf) { struct binder_extended_error ee; binder_inner_proc_lock(thread->proc); ee = thread->ee; binder_set_extended_error(&thread->ee, 0, BR_OK, 0); binder_inner_proc_unlock(thread->proc); if (copy_to_user(ubuf, &ee, sizeof(ee))) return -EFAULT; return 0; } static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { int ret; struct binder_proc *proc = filp->private_data; struct binder_thread *thread; void __user *ubuf = (void __user *)arg; trace_binder_ioctl(cmd, arg); ret = wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret) goto err_unlocked; thread = binder_get_thread(proc); if (thread == NULL) { ret = -ENOMEM; goto err; } switch (cmd) { case BINDER_WRITE_READ: ret = binder_ioctl_write_read(filp, arg, thread); if (ret) goto err; break; case BINDER_SET_MAX_THREADS: { u32 max_threads; if (copy_from_user(&max_threads, ubuf, sizeof(max_threads))) { ret = -EINVAL; goto err; } binder_inner_proc_lock(proc); proc->max_threads = max_threads; binder_inner_proc_unlock(proc); break; } case BINDER_SET_CONTEXT_MGR_EXT: { struct flat_binder_object fbo; if (copy_from_user(&fbo, ubuf, sizeof(fbo))) { ret = -EINVAL; goto err; } ret = binder_ioctl_set_ctx_mgr(filp, &fbo); if (ret) goto err; break; } case BINDER_SET_CONTEXT_MGR: ret = binder_ioctl_set_ctx_mgr(filp, NULL); if (ret) goto err; break; case BINDER_THREAD_EXIT: binder_debug(BINDER_DEBUG_THREADS, "%d:%d exit\n", proc->pid, thread->pid); binder_thread_release(proc, thread); thread = NULL; break; case BINDER_VERSION: { struct binder_version __user *ver = ubuf; if (put_user(BINDER_CURRENT_PROTOCOL_VERSION, &ver->protocol_version)) { ret = -EINVAL; goto err; } break; } case BINDER_GET_NODE_INFO_FOR_REF: { struct binder_node_info_for_ref info; if (copy_from_user(&info, ubuf, sizeof(info))) { ret = -EFAULT; goto err; } ret = binder_ioctl_get_node_info_for_ref(proc, &info); if (ret < 0) goto err; if (copy_to_user(ubuf, &info, sizeof(info))) { ret = -EFAULT; goto err; } break; } case BINDER_GET_NODE_DEBUG_INFO: { struct binder_node_debug_info info; if (copy_from_user(&info, ubuf, sizeof(info))) { ret = -EFAULT; goto err; } ret = binder_ioctl_get_node_debug_info(proc, &info); if (ret < 0) goto err; if (copy_to_user(ubuf, &info, sizeof(info))) { ret = -EFAULT; goto err; } break; } case BINDER_FREEZE: { struct binder_freeze_info info; struct binder_proc **target_procs = NULL, *target_proc; int target_procs_count = 0, i = 0; ret = 0; if (copy_from_user(&info, ubuf, sizeof(info))) { ret = -EFAULT; goto err; } mutex_lock(&binder_procs_lock); hlist_for_each_entry(target_proc, &binder_procs, proc_node) { if (target_proc->pid == info.pid) target_procs_count++; } if (target_procs_count == 0) { mutex_unlock(&binder_procs_lock); ret = -EINVAL; goto err; } target_procs = kcalloc(target_procs_count, sizeof(struct binder_proc *), GFP_KERNEL); if (!target_procs) { mutex_unlock(&binder_procs_lock); ret = -ENOMEM; goto err; } hlist_for_each_entry(target_proc, &binder_procs, proc_node) { if (target_proc->pid != info.pid) continue; binder_inner_proc_lock(target_proc); target_proc->tmp_ref++; binder_inner_proc_unlock(target_proc); target_procs[i++] = target_proc; } mutex_unlock(&binder_procs_lock); for (i = 0; i < target_procs_count; i++) { if (ret >= 0) ret = binder_ioctl_freeze(&info, target_procs[i]); binder_proc_dec_tmpref(target_procs[i]); } kfree(target_procs); if (ret < 0) goto err; break; } case BINDER_GET_FROZEN_INFO: { struct binder_frozen_status_info info; if (copy_from_user(&info, ubuf, sizeof(info))) { ret = -EFAULT; goto err; } ret = binder_ioctl_get_freezer_info(&info); if (ret < 0) goto err; if (copy_to_user(ubuf, &info, sizeof(info))) { ret = -EFAULT; goto err; } break; } case BINDER_ENABLE_ONEWAY_SPAM_DETECTION: { uint32_t enable; if (copy_from_user(&enable, ubuf, sizeof(enable))) { ret = -EFAULT; goto err; } binder_inner_proc_lock(proc); proc->oneway_spam_detection_enabled = (bool)enable; binder_inner_proc_unlock(proc); break; } case BINDER_GET_EXTENDED_ERROR: ret = binder_ioctl_get_extended_error(thread, ubuf); if (ret < 0) goto err; break; default: ret = -EINVAL; goto err; } ret = 0; err: if (thread) thread->looper_need_return = false; wait_event_interruptible(binder_user_error_wait, binder_stop_on_user_error < 2); if (ret && ret != -EINTR) pr_info("%d:%d ioctl %x %lx returned %d\n", proc->pid, current->pid, cmd, arg, ret); err_unlocked: trace_binder_ioctl_done(ret); return ret; } static void binder_vma_open(struct vm_area_struct *vma) { struct binder_proc *proc = vma->vm_private_data; binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%d open vm area %lx-%lx (%ld K) vma %lx pagep %lx\n", proc->pid, vma->vm_start, vma->vm_end, (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, (unsigned long)pgprot_val(vma->vm_page_prot)); } static void binder_vma_close(struct vm_area_struct *vma) { struct binder_proc *proc = vma->vm_private_data; binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%d close vm area %lx-%lx (%ld K) vma %lx pagep %lx\n", proc->pid, vma->vm_start, vma->vm_end, (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, (unsigned long)pgprot_val(vma->vm_page_prot)); binder_alloc_vma_close(&proc->alloc); } VISIBLE_IF_KUNIT vm_fault_t binder_vm_fault(struct vm_fault *vmf) { return VM_FAULT_SIGBUS; } EXPORT_SYMBOL_IF_KUNIT(binder_vm_fault); static const struct vm_operations_struct binder_vm_ops = { .open = binder_vma_open, .close = binder_vma_close, .fault = binder_vm_fault, }; static int binder_mmap(struct file *filp, struct vm_area_struct *vma) { struct binder_proc *proc = filp->private_data; if (proc->tsk != current->group_leader) return -EINVAL; binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d %lx-%lx (%ld K) vma %lx pagep %lx\n", __func__, proc->pid, vma->vm_start, vma->vm_end, (vma->vm_end - vma->vm_start) / SZ_1K, vma->vm_flags, (unsigned long)pgprot_val(vma->vm_page_prot)); if (vma->vm_flags & FORBIDDEN_MMAP_FLAGS) { pr_err("%s: %d %lx-%lx %s failed %d\n", __func__, proc->pid, vma->vm_start, vma->vm_end, "bad vm_flags", -EPERM); return -EPERM; } vm_flags_mod(vma, VM_DONTCOPY | VM_MIXEDMAP, VM_MAYWRITE); vma->vm_ops = &binder_vm_ops; vma->vm_private_data = proc; return binder_alloc_mmap_handler(&proc->alloc, vma); } static int binder_open(struct inode *nodp, struct file *filp) { struct binder_proc *proc, *itr; struct binder_device *binder_dev; struct binderfs_info *info; struct dentry *binder_binderfs_dir_entry_proc = NULL; bool existing_pid = false; binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d:%d\n", __func__, current->group_leader->pid, current->pid); proc = kzalloc(sizeof(*proc), GFP_KERNEL); if (proc == NULL) return -ENOMEM; dbitmap_init(&proc->dmap); spin_lock_init(&proc->inner_lock); spin_lock_init(&proc->outer_lock); get_task_struct(current->group_leader); proc->tsk = current->group_leader; proc->cred = get_cred(filp->f_cred); INIT_LIST_HEAD(&proc->todo); init_waitqueue_head(&proc->freeze_wait); proc->default_priority = task_nice(current); /* binderfs stashes devices in i_private */ if (is_binderfs_device(nodp)) { binder_dev = nodp->i_private; info = nodp->i_sb->s_fs_info; binder_binderfs_dir_entry_proc = info->proc_log_dir; } else { binder_dev = container_of(filp->private_data, struct binder_device, miscdev); } refcount_inc(&binder_dev->ref); proc->context = &binder_dev->context; binder_alloc_init(&proc->alloc); binder_stats_created(BINDER_STAT_PROC); proc->pid = current->group_leader->pid; INIT_LIST_HEAD(&proc->delivered_death); INIT_LIST_HEAD(&proc->delivered_freeze); INIT_LIST_HEAD(&proc->waiting_threads); filp->private_data = proc; mutex_lock(&binder_procs_lock); hlist_for_each_entry(itr, &binder_procs, proc_node) { if (itr->pid == proc->pid) { existing_pid = true; break; } } hlist_add_head(&proc->proc_node, &binder_procs); mutex_unlock(&binder_procs_lock); if (binder_debugfs_dir_entry_proc && !existing_pid) { char strbuf[11]; snprintf(strbuf, sizeof(strbuf), "%u", proc->pid); /* * proc debug entries are shared between contexts. * Only create for the first PID to avoid debugfs log spamming * The printing code will anyway print all contexts for a given * PID so this is not a problem. */ proc->debugfs_entry = debugfs_create_file(strbuf, 0444, binder_debugfs_dir_entry_proc, (void *)(unsigned long)proc->pid, &proc_fops); } if (binder_binderfs_dir_entry_proc && !existing_pid) { char strbuf[11]; struct dentry *binderfs_entry; snprintf(strbuf, sizeof(strbuf), "%u", proc->pid); /* * Similar to debugfs, the process specific log file is shared * between contexts. Only create for the first PID. * This is ok since same as debugfs, the log file will contain * information on all contexts of a given PID. */ binderfs_entry = binderfs_create_file(binder_binderfs_dir_entry_proc, strbuf, &proc_fops, (void *)(unsigned long)proc->pid); if (!IS_ERR(binderfs_entry)) { proc->binderfs_entry = binderfs_entry; } else { int error; error = PTR_ERR(binderfs_entry); pr_warn("Unable to create file %s in binderfs (error %d)\n", strbuf, error); } } return 0; } static int binder_flush(struct file *filp, fl_owner_t id) { struct binder_proc *proc = filp->private_data; binder_defer_work(proc, BINDER_DEFERRED_FLUSH); return 0; } static void binder_deferred_flush(struct binder_proc *proc) { struct rb_node *n; int wake_count = 0; binder_inner_proc_lock(proc); for (n = rb_first(&proc->threads); n != NULL; n = rb_next(n)) { struct binder_thread *thread = rb_entry(n, struct binder_thread, rb_node); thread->looper_need_return = true; if (thread->looper & BINDER_LOOPER_STATE_WAITING) { wake_up_interruptible(&thread->wait); wake_count++; } } binder_inner_proc_unlock(proc); binder_debug(BINDER_DEBUG_OPEN_CLOSE, "binder_flush: %d woke %d threads\n", proc->pid, wake_count); } static int binder_release(struct inode *nodp, struct file *filp) { struct binder_proc *proc = filp->private_data; debugfs_remove(proc->debugfs_entry); if (proc->binderfs_entry) { simple_recursive_removal(proc->binderfs_entry, NULL); proc->binderfs_entry = NULL; } binder_defer_work(proc, BINDER_DEFERRED_RELEASE); return 0; } static int binder_node_release(struct binder_node *node, int refs) { struct binder_ref *ref; int death = 0; struct binder_proc *proc = node->proc; binder_release_work(proc, &node->async_todo); binder_node_lock(node); binder_inner_proc_lock(proc); binder_dequeue_work_ilocked(&node->work); /* * The caller must have taken a temporary ref on the node, */ BUG_ON(!node->tmp_refs); if (hlist_empty(&node->refs) && node->tmp_refs == 1) { binder_inner_proc_unlock(proc); binder_node_unlock(node); binder_free_node(node); return refs; } node->proc = NULL; node->local_strong_refs = 0; node->local_weak_refs = 0; binder_inner_proc_unlock(proc); spin_lock(&binder_dead_nodes_lock); hlist_add_head(&node->dead_node, &binder_dead_nodes); spin_unlock(&binder_dead_nodes_lock); hlist_for_each_entry(ref, &node->refs, node_entry) { refs++; /* * Need the node lock to synchronize * with new notification requests and the * inner lock to synchronize with queued * death notifications. */ binder_inner_proc_lock(ref->proc); if (!ref->death) { binder_inner_proc_unlock(ref->proc); continue; } death++; BUG_ON(!list_empty(&ref->death->work.entry)); ref->death->work.type = BINDER_WORK_DEAD_BINDER; binder_enqueue_work_ilocked(&ref->death->work, &ref->proc->todo); binder_wakeup_proc_ilocked(ref->proc); binder_inner_proc_unlock(ref->proc); } binder_debug(BINDER_DEBUG_DEAD_BINDER, "node %d now dead, refs %d, death %d\n", node->debug_id, refs, death); binder_node_unlock(node); binder_put_node(node); return refs; } static void binder_deferred_release(struct binder_proc *proc) { struct binder_context *context = proc->context; struct rb_node *n; int threads, nodes, incoming_refs, outgoing_refs, active_transactions; mutex_lock(&binder_procs_lock); hlist_del(&proc->proc_node); mutex_unlock(&binder_procs_lock); mutex_lock(&context->context_mgr_node_lock); if (context->binder_context_mgr_node && context->binder_context_mgr_node->proc == proc) { binder_debug(BINDER_DEBUG_DEAD_BINDER, "%s: %d context_mgr_node gone\n", __func__, proc->pid); context->binder_context_mgr_node = NULL; } mutex_unlock(&context->context_mgr_node_lock); binder_inner_proc_lock(proc); /* * Make sure proc stays alive after we * remove all the threads */ proc->tmp_ref++; proc->is_dead = true; proc->is_frozen = false; proc->sync_recv = false; proc->async_recv = false; threads = 0; active_transactions = 0; while ((n = rb_first(&proc->threads))) { struct binder_thread *thread; thread = rb_entry(n, struct binder_thread, rb_node); binder_inner_proc_unlock(proc); threads++; active_transactions += binder_thread_release(proc, thread); binder_inner_proc_lock(proc); } nodes = 0; incoming_refs = 0; while ((n = rb_first(&proc->nodes))) { struct binder_node *node; node = rb_entry(n, struct binder_node, rb_node); nodes++; /* * take a temporary ref on the node before * calling binder_node_release() which will either * kfree() the node or call binder_put_node() */ binder_inc_node_tmpref_ilocked(node); rb_erase(&node->rb_node, &proc->nodes); binder_inner_proc_unlock(proc); incoming_refs = binder_node_release(node, incoming_refs); binder_inner_proc_lock(proc); } binder_inner_proc_unlock(proc); outgoing_refs = 0; binder_proc_lock(proc); while ((n = rb_first(&proc->refs_by_desc))) { struct binder_ref *ref; ref = rb_entry(n, struct binder_ref, rb_node_desc); outgoing_refs++; binder_cleanup_ref_olocked(ref); binder_proc_unlock(proc); binder_free_ref(ref); binder_proc_lock(proc); } binder_proc_unlock(proc); binder_release_work(proc, &proc->todo); binder_release_work(proc, &proc->delivered_death); binder_release_work(proc, &proc->delivered_freeze); binder_debug(BINDER_DEBUG_OPEN_CLOSE, "%s: %d threads %d, nodes %d (ref %d), refs %d, active transactions %d\n", __func__, proc->pid, threads, nodes, incoming_refs, outgoing_refs, active_transactions); binder_proc_dec_tmpref(proc); } static void binder_deferred_func(struct work_struct *work) { struct binder_proc *proc; int defer; do { mutex_lock(&binder_deferred_lock); if (!hlist_empty(&binder_deferred_list)) { proc = hlist_entry(binder_deferred_list.first, struct binder_proc, deferred_work_node); hlist_del_init(&proc->deferred_work_node); defer = proc->deferred_work; proc->deferred_work = 0; } else { proc = NULL; defer = 0; } mutex_unlock(&binder_deferred_lock); if (defer & BINDER_DEFERRED_FLUSH) binder_deferred_flush(proc); if (defer & BINDER_DEFERRED_RELEASE) binder_deferred_release(proc); /* frees proc */ } while (proc); } static DECLARE_WORK(binder_deferred_work, binder_deferred_func); static void binder_defer_work(struct binder_proc *proc, enum binder_deferred_state defer) { guard(mutex)(&binder_deferred_lock); proc->deferred_work |= defer; if (hlist_unhashed(&proc->deferred_work_node)) { hlist_add_head(&proc->deferred_work_node, &binder_deferred_list); schedule_work(&binder_deferred_work); } } static void print_binder_transaction_ilocked(struct seq_file *m, struct binder_proc *proc, const char *prefix, struct binder_transaction *t) { struct binder_proc *to_proc; struct binder_buffer *buffer = t->buffer; ktime_t current_time = ktime_get(); spin_lock(&t->lock); to_proc = t->to_proc; seq_printf(m, "%s %d: %pK from %d:%d to %d:%d code %x flags %x pri %ld r%d elapsed %lldms", prefix, t->debug_id, t, t->from_pid, t->from_tid, to_proc ? to_proc->pid : 0, t->to_thread ? t->to_thread->pid : 0, t->code, t->flags, t->priority, t->need_reply, ktime_ms_delta(current_time, t->start_time)); spin_unlock(&t->lock); if (proc != to_proc) { /* * Can only safely deref buffer if we are holding the * correct proc inner lock for this node */ seq_puts(m, "\n"); return; } if (buffer == NULL) { seq_puts(m, " buffer free\n"); return; } if (buffer->target_node) seq_printf(m, " node %d", buffer->target_node->debug_id); seq_printf(m, " size %zd:%zd offset %lx\n", buffer->data_size, buffer->offsets_size, buffer->user_data - proc->alloc.vm_start); } static void print_binder_work_ilocked(struct seq_file *m, struct binder_proc *proc, const char *prefix, const char *transaction_prefix, struct binder_work *w, bool hash_ptrs) { struct binder_node *node; struct binder_transaction *t; switch (w->type) { case BINDER_WORK_TRANSACTION: t = container_of(w, struct binder_transaction, work); print_binder_transaction_ilocked( m, proc, transaction_prefix, t); break; case BINDER_WORK_RETURN_ERROR: { struct binder_error *e = container_of( w, struct binder_error, work); seq_printf(m, "%stransaction error: %u\n", prefix, e->cmd); } break; case BINDER_WORK_TRANSACTION_COMPLETE: seq_printf(m, "%stransaction complete\n", prefix); break; case BINDER_WORK_NODE: node = container_of(w, struct binder_node, work); if (hash_ptrs) seq_printf(m, "%snode work %d: u%p c%p\n", prefix, node->debug_id, (void *)(long)node->ptr, (void *)(long)node->cookie); else seq_printf(m, "%snode work %d: u%016llx c%016llx\n", prefix, node->debug_id, (u64)node->ptr, (u64)node->cookie); break; case BINDER_WORK_DEAD_BINDER: seq_printf(m, "%shas dead binder\n", prefix); break; case BINDER_WORK_DEAD_BINDER_AND_CLEAR: seq_printf(m, "%shas cleared dead binder\n", prefix); break; case BINDER_WORK_CLEAR_DEATH_NOTIFICATION: seq_printf(m, "%shas cleared death notification\n", prefix); break; case BINDER_WORK_FROZEN_BINDER: seq_printf(m, "%shas frozen binder\n", prefix); break; case BINDER_WORK_CLEAR_FREEZE_NOTIFICATION: seq_printf(m, "%shas cleared freeze notification\n", prefix); break; default: seq_printf(m, "%sunknown work: type %d\n", prefix, w->type); break; } } static void print_binder_thread_ilocked(struct seq_file *m, struct binder_thread *thread, bool print_always, bool hash_ptrs) { struct binder_transaction *t; struct binder_work *w; size_t start_pos = m->count; size_t header_pos; seq_printf(m, " thread %d: l %02x need_return %d tr %d\n", thread->pid, thread->looper, thread->looper_need_return, atomic_read(&thread->tmp_ref)); header_pos = m->count; t = thread->transaction_stack; while (t) { if (t->from == thread) { print_binder_transaction_ilocked(m, thread->proc, " outgoing transaction", t); t = t->from_parent; } else if (t->to_thread == thread) { print_binder_transaction_ilocked(m, thread->proc, " incoming transaction", t); t = t->to_parent; } else { print_binder_transaction_ilocked(m, thread->proc, " bad transaction", t); t = NULL; } } list_for_each_entry(w, &thread->todo, entry) { print_binder_work_ilocked(m, thread->proc, " ", " pending transaction", w, hash_ptrs); } if (!print_always && m->count == header_pos) m->count = start_pos; } static void print_binder_node_nilocked(struct seq_file *m, struct binder_node *node, bool hash_ptrs) { struct binder_ref *ref; struct binder_work *w; int count; count = hlist_count_nodes(&node->refs); if (hash_ptrs) seq_printf(m, " node %d: u%p c%p", node->debug_id, (void *)(long)node->ptr, (void *)(long)node->cookie); else seq_printf(m, " node %d: u%016llx c%016llx", node->debug_id, (u64)node->ptr, (u64)node->cookie); seq_printf(m, " hs %d hw %d ls %d lw %d is %d iw %d tr %d", node->has_strong_ref, node->has_weak_ref, node->local_strong_refs, node->local_weak_refs, node->internal_strong_refs, count, node->tmp_refs); if (count) { seq_puts(m, " proc"); hlist_for_each_entry(ref, &node->refs, node_entry) seq_printf(m, " %d", ref->proc->pid); } seq_puts(m, "\n"); if (node->proc) { list_for_each_entry(w, &node->async_todo, entry) print_binder_work_ilocked(m, node->proc, " ", " pending async transaction", w, hash_ptrs); } } static void print_binder_ref_olocked(struct seq_file *m, struct binder_ref *ref) { binder_node_lock(ref->node); seq_printf(m, " ref %d: desc %d %snode %d s %d w %d d %pK\n", ref->data.debug_id, ref->data.desc, ref->node->proc ? "" : "dead ", ref->node->debug_id, ref->data.strong, ref->data.weak, ref->death); binder_node_unlock(ref->node); } /** * print_next_binder_node_ilocked() - Print binder_node from a locked list * @m: struct seq_file for output via seq_printf() * @proc: struct binder_proc we hold the inner_proc_lock to (if any) * @node: struct binder_node to print fields of * @prev_node: struct binder_node we hold a temporary reference to (if any) * @hash_ptrs: whether to hash @node's binder_uintptr_t fields * * Helper function to handle synchronization around printing a struct * binder_node while iterating through @proc->nodes or the dead nodes list. * Caller must hold either @proc->inner_lock (for live nodes) or * binder_dead_nodes_lock. This lock will be released during the body of this * function, but it will be reacquired before returning to the caller. * * Return: pointer to the struct binder_node we hold a tmpref on */ static struct binder_node * print_next_binder_node_ilocked(struct seq_file *m, struct binder_proc *proc, struct binder_node *node, struct binder_node *prev_node, bool hash_ptrs) { /* * Take a temporary reference on the node so that isn't freed while * we print it. */ binder_inc_node_tmpref_ilocked(node); /* * Live nodes need to drop the inner proc lock and dead nodes need to * drop the binder_dead_nodes_lock before trying to take the node lock. */ if (proc) binder_inner_proc_unlock(proc); else spin_unlock(&binder_dead_nodes_lock); if (prev_node) binder_put_node(prev_node); binder_node_inner_lock(node); print_binder_node_nilocked(m, node, hash_ptrs); binder_node_inner_unlock(node); if (proc) binder_inner_proc_lock(proc); else spin_lock(&binder_dead_nodes_lock); return node; } static void print_binder_proc(struct seq_file *m, struct binder_proc *proc, bool print_all, bool hash_ptrs) { struct binder_work *w; struct rb_node *n; size_t start_pos = m->count; size_t header_pos; struct binder_node *last_node = NULL; seq_printf(m, "proc %d\n", proc->pid); seq_printf(m, "context %s\n", proc->context->name); header_pos = m->count; binder_inner_proc_lock(proc); for (n = rb_first(&proc->threads); n; n = rb_next(n)) print_binder_thread_ilocked(m, rb_entry(n, struct binder_thread, rb_node), print_all, hash_ptrs); for (n = rb_first(&proc->nodes); n; n = rb_next(n)) { struct binder_node *node = rb_entry(n, struct binder_node, rb_node); if (!print_all && !node->has_async_transaction) continue; last_node = print_next_binder_node_ilocked(m, proc, node, last_node, hash_ptrs); } binder_inner_proc_unlock(proc); if (last_node) binder_put_node(last_node); if (print_all) { binder_proc_lock(proc); for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n)) print_binder_ref_olocked(m, rb_entry(n, struct binder_ref, rb_node_desc)); binder_proc_unlock(proc); } binder_alloc_print_allocated(m, &proc->alloc); binder_inner_proc_lock(proc); list_for_each_entry(w, &proc->todo, entry) print_binder_work_ilocked(m, proc, " ", " pending transaction", w, hash_ptrs); list_for_each_entry(w, &proc->delivered_death, entry) { seq_puts(m, " has delivered dead binder\n"); break; } list_for_each_entry(w, &proc->delivered_freeze, entry) { seq_puts(m, " has delivered freeze binder\n"); break; } binder_inner_proc_unlock(proc); if (!print_all && m->count == header_pos) m->count = start_pos; } static const char * const binder_return_strings[] = { "BR_ERROR", "BR_OK", "BR_TRANSACTION", "BR_REPLY", "BR_ACQUIRE_RESULT", "BR_DEAD_REPLY", "BR_TRANSACTION_COMPLETE", "BR_INCREFS", "BR_ACQUIRE", "BR_RELEASE", "BR_DECREFS", "BR_ATTEMPT_ACQUIRE", "BR_NOOP", "BR_SPAWN_LOOPER", "BR_FINISHED", "BR_DEAD_BINDER", "BR_CLEAR_DEATH_NOTIFICATION_DONE", "BR_FAILED_REPLY", "BR_FROZEN_REPLY", "BR_ONEWAY_SPAM_SUSPECT", "BR_TRANSACTION_PENDING_FROZEN", "BR_FROZEN_BINDER", "BR_CLEAR_FREEZE_NOTIFICATION_DONE", }; static const char * const binder_command_strings[] = { "BC_TRANSACTION", "BC_REPLY", "BC_ACQUIRE_RESULT", "BC_FREE_BUFFER", "BC_INCREFS", "BC_ACQUIRE", "BC_RELEASE", "BC_DECREFS", "BC_INCREFS_DONE", "BC_ACQUIRE_DONE", "BC_ATTEMPT_ACQUIRE", "BC_REGISTER_LOOPER", "BC_ENTER_LOOPER", "BC_EXIT_LOOPER", "BC_REQUEST_DEATH_NOTIFICATION", "BC_CLEAR_DEATH_NOTIFICATION", "BC_DEAD_BINDER_DONE", "BC_TRANSACTION_SG", "BC_REPLY_SG", "BC_REQUEST_FREEZE_NOTIFICATION", "BC_CLEAR_FREEZE_NOTIFICATION", "BC_FREEZE_NOTIFICATION_DONE", }; static const char * const binder_objstat_strings[] = { "proc", "thread", "node", "ref", "death", "transaction", "transaction_complete", "freeze", }; static void print_binder_stats(struct seq_file *m, const char *prefix, struct binder_stats *stats) { int i; BUILD_BUG_ON(ARRAY_SIZE(stats->bc) != ARRAY_SIZE(binder_command_strings)); for (i = 0; i < ARRAY_SIZE(stats->bc); i++) { int temp = atomic_read(&stats->bc[i]); if (temp) seq_printf(m, "%s%s: %d\n", prefix, binder_command_strings[i], temp); } BUILD_BUG_ON(ARRAY_SIZE(stats->br) != ARRAY_SIZE(binder_return_strings)); for (i = 0; i < ARRAY_SIZE(stats->br); i++) { int temp = atomic_read(&stats->br[i]); if (temp) seq_printf(m, "%s%s: %d\n", prefix, binder_return_strings[i], temp); } BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) != ARRAY_SIZE(binder_objstat_strings)); BUILD_BUG_ON(ARRAY_SIZE(stats->obj_created) != ARRAY_SIZE(stats->obj_deleted)); for (i = 0; i < ARRAY_SIZE(stats->obj_created); i++) { int created = atomic_read(&stats->obj_created[i]); int deleted = atomic_read(&stats->obj_deleted[i]); if (created || deleted) seq_printf(m, "%s%s: active %d total %d\n", prefix, binder_objstat_strings[i], created - deleted, created); } } static void print_binder_proc_stats(struct seq_file *m, struct binder_proc *proc) { struct binder_work *w; struct binder_thread *thread; struct rb_node *n; int count, strong, weak, ready_threads; size_t free_async_space = binder_alloc_get_free_async_space(&proc->alloc); seq_printf(m, "proc %d\n", proc->pid); seq_printf(m, "context %s\n", proc->context->name); count = 0; ready_threads = 0; binder_inner_proc_lock(proc); for (n = rb_first(&proc->threads); n; n = rb_next(n)) count++; list_for_each_entry(thread, &proc->waiting_threads, waiting_thread_node) ready_threads++; seq_printf(m, " threads: %d\n", count); seq_printf(m, " requested threads: %d+%d/%d\n" " ready threads %d\n" " free async space %zd\n", proc->requested_threads, proc->requested_threads_started, proc->max_threads, ready_threads, free_async_space); count = 0; for (n = rb_first(&proc->nodes); n; n = rb_next(n)) count++; binder_inner_proc_unlock(proc); seq_printf(m, " nodes: %d\n", count); count = 0; strong = 0; weak = 0; binder_proc_lock(proc); for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n)) { struct binder_ref *ref = rb_entry(n, struct binder_ref, rb_node_desc); count++; strong += ref->data.strong; weak += ref->data.weak; } binder_proc_unlock(proc); seq_printf(m, " refs: %d s %d w %d\n", count, strong, weak); count = binder_alloc_get_allocated_count(&proc->alloc); seq_printf(m, " buffers: %d\n", count); binder_alloc_print_pages(m, &proc->alloc); count = 0; binder_inner_proc_lock(proc); list_for_each_entry(w, &proc->todo, entry) { if (w->type == BINDER_WORK_TRANSACTION) count++; } binder_inner_proc_unlock(proc); seq_printf(m, " pending transactions: %d\n", count); print_binder_stats(m, " ", &proc->stats); } static void print_binder_state(struct seq_file *m, bool hash_ptrs) { struct binder_proc *proc; struct binder_node *node; struct binder_node *last_node = NULL; seq_puts(m, "binder state:\n"); spin_lock(&binder_dead_nodes_lock); if (!hlist_empty(&binder_dead_nodes)) seq_puts(m, "dead nodes:\n"); hlist_for_each_entry(node, &binder_dead_nodes, dead_node) last_node = print_next_binder_node_ilocked(m, NULL, node, last_node, hash_ptrs); spin_unlock(&binder_dead_nodes_lock); if (last_node) binder_put_node(last_node); mutex_lock(&binder_procs_lock); hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc(m, proc, true, hash_ptrs); mutex_unlock(&binder_procs_lock); } static void print_binder_transactions(struct seq_file *m, bool hash_ptrs) { struct binder_proc *proc; seq_puts(m, "binder transactions:\n"); mutex_lock(&binder_procs_lock); hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc(m, proc, false, hash_ptrs); mutex_unlock(&binder_procs_lock); } static int state_show(struct seq_file *m, void *unused) { print_binder_state(m, false); return 0; } static int state_hashed_show(struct seq_file *m, void *unused) { print_binder_state(m, true); return 0; } static int stats_show(struct seq_file *m, void *unused) { struct binder_proc *proc; seq_puts(m, "binder stats:\n"); print_binder_stats(m, "", &binder_stats); mutex_lock(&binder_procs_lock); hlist_for_each_entry(proc, &binder_procs, proc_node) print_binder_proc_stats(m, proc); mutex_unlock(&binder_procs_lock); return 0; } static int transactions_show(struct seq_file *m, void *unused) { print_binder_transactions(m, false); return 0; } static int transactions_hashed_show(struct seq_file *m, void *unused) { print_binder_transactions(m, true); return 0; } static int proc_show(struct seq_file *m, void *unused) { struct binder_proc *itr; int pid = (unsigned long)m->private; guard(mutex)(&binder_procs_lock); hlist_for_each_entry(itr, &binder_procs, proc_node) { if (itr->pid == pid) { seq_puts(m, "binder proc state:\n"); print_binder_proc(m, itr, true, false); } } return 0; } static void print_binder_transaction_log_entry(struct seq_file *m, struct binder_transaction_log_entry *e) { int debug_id = READ_ONCE(e->debug_id_done); /* * read barrier to guarantee debug_id_done read before * we print the log values */ smp_rmb(); seq_printf(m, "%d: %s from %d:%d to %d:%d context %s node %d handle %d size %d:%d ret %d/%d l=%d", e->debug_id, (e->call_type == 2) ? "reply" : ((e->call_type == 1) ? "async" : "call "), e->from_proc, e->from_thread, e->to_proc, e->to_thread, e->context_name, e->to_node, e->target_handle, e->data_size, e->offsets_size, e->return_error, e->return_error_param, e->return_error_line); /* * read-barrier to guarantee read of debug_id_done after * done printing the fields of the entry */ smp_rmb(); seq_printf(m, debug_id && debug_id == READ_ONCE(e->debug_id_done) ? "\n" : " (incomplete)\n"); } static int transaction_log_show(struct seq_file *m, void *unused) { struct binder_transaction_log *log = m->private; unsigned int log_cur = atomic_read(&log->cur); unsigned int count; unsigned int cur; int i; count = log_cur + 1; cur = count < ARRAY_SIZE(log->entry) && !log->full ? 0 : count % ARRAY_SIZE(log->entry); if (count > ARRAY_SIZE(log->entry) || log->full) count = ARRAY_SIZE(log->entry); for (i = 0; i < count; i++) { unsigned int index = cur++ % ARRAY_SIZE(log->entry); print_binder_transaction_log_entry(m, &log->entry[index]); } return 0; } const struct file_operations binder_fops = { .owner = THIS_MODULE, .poll = binder_poll, .unlocked_ioctl = binder_ioctl, .compat_ioctl = compat_ptr_ioctl, .mmap = binder_mmap, .open = binder_open, .flush = binder_flush, .release = binder_release, }; DEFINE_SHOW_ATTRIBUTE(state); DEFINE_SHOW_ATTRIBUTE(state_hashed); DEFINE_SHOW_ATTRIBUTE(stats); DEFINE_SHOW_ATTRIBUTE(transactions); DEFINE_SHOW_ATTRIBUTE(transactions_hashed); DEFINE_SHOW_ATTRIBUTE(transaction_log); const struct binder_debugfs_entry binder_debugfs_entries[] = { { .name = "state", .mode = 0444, .fops = &state_fops, .data = NULL, }, { .name = "state_hashed", .mode = 0444, .fops = &state_hashed_fops, .data = NULL, }, { .name = "stats", .mode = 0444, .fops = &stats_fops, .data = NULL, }, { .name = "transactions", .mode = 0444, .fops = &transactions_fops, .data = NULL, }, { .name = "transactions_hashed", .mode = 0444, .fops = &transactions_hashed_fops, .data = NULL, }, { .name = "transaction_log", .mode = 0444, .fops = &transaction_log_fops, .data = &binder_transaction_log, }, { .name = "failed_transaction_log", .mode = 0444, .fops = &transaction_log_fops, .data = &binder_transaction_log_failed, }, {} /* terminator */ }; void binder_add_device(struct binder_device *device) { guard(spinlock)(&binder_devices_lock); hlist_add_head(&device->hlist, &binder_devices); } void binder_remove_device(struct binder_device *device) { guard(spinlock)(&binder_devices_lock); hlist_del_init(&device->hlist); } static int __init init_binder_device(const char *name) { int ret; struct binder_device *binder_device; binder_device = kzalloc(sizeof(*binder_device), GFP_KERNEL); if (!binder_device) return -ENOMEM; binder_device->miscdev.fops = &binder_fops; binder_device->miscdev.minor = MISC_DYNAMIC_MINOR; binder_device->miscdev.name = name; refcount_set(&binder_device->ref, 1); binder_device->context.binder_context_mgr_uid = INVALID_UID; binder_device->context.name = name; mutex_init(&binder_device->context.context_mgr_node_lock); ret = misc_register(&binder_device->miscdev); if (ret < 0) { kfree(binder_device); return ret; } binder_add_device(binder_device); return ret; } static int __init binder_init(void) { int ret; char *device_name, *device_tmp; struct binder_device *device; struct hlist_node *tmp; char *device_names = NULL; const struct binder_debugfs_entry *db_entry; ret = binder_alloc_shrinker_init(); if (ret) return ret; atomic_set(&binder_transaction_log.cur, ~0U); atomic_set(&binder_transaction_log_failed.cur, ~0U); binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL); binder_for_each_debugfs_entry(db_entry) debugfs_create_file(db_entry->name, db_entry->mode, binder_debugfs_dir_entry_root, db_entry->data, db_entry->fops); binder_debugfs_dir_entry_proc = debugfs_create_dir("proc", binder_debugfs_dir_entry_root); if (!IS_ENABLED(CONFIG_ANDROID_BINDERFS) && strcmp(binder_devices_param, "") != 0) { /* * Copy the module_parameter string, because we don't want to * tokenize it in-place. */ device_names = kstrdup(binder_devices_param, GFP_KERNEL); if (!device_names) { ret = -ENOMEM; goto err_alloc_device_names_failed; } device_tmp = device_names; while ((device_name = strsep(&device_tmp, ","))) { ret = init_binder_device(device_name); if (ret) goto err_init_binder_device_failed; } } ret = init_binderfs(); if (ret) goto err_init_binder_device_failed; return ret; err_init_binder_device_failed: hlist_for_each_entry_safe(device, tmp, &binder_devices, hlist) { misc_deregister(&device->miscdev); binder_remove_device(device); kfree(device); } kfree(device_names); err_alloc_device_names_failed: debugfs_remove_recursive(binder_debugfs_dir_entry_root); binder_alloc_shrinker_exit(); return ret; } device_initcall(binder_init); #define CREATE_TRACE_POINTS #include "binder_trace.h"
208 189 173 174 2 1 1 2 2 2 2 1 1 2 6 6 1 3 3 2 2 2 2 1 1 9 4 4 4 1 1 1 1 4 22 5 5 4 1 1 5 4 1 3 4 29 29 1 30 1 21 21 19 6 24 30 3 145 140 141 7 19 166 166 75 73 4 74 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 /* * Created: Tue Feb 2 08:37:54 1999 by faith@valinux.com * * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas. * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California. * All Rights Reserved. * * Author Rickard E. (Rik) Faith <faith@valinux.com> * Author Gareth Hughes <gareth@valinux.com> * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <linux/export.h> #include <linux/slab.h> #include <drm/drm_auth.h> #include <drm/drm_drv.h> #include <drm/drm_file.h> #include <drm/drm_lease.h> #include <drm/drm_print.h> #include "drm_internal.h" /** * DOC: master and authentication * * &struct drm_master is used to track groups of clients with open * primary device nodes. For every &struct drm_file which has had at * least once successfully became the device master (either through the * SET_MASTER IOCTL, or implicitly through opening the primary device node when * no one else is the current master that time) there exists one &drm_master. * This is noted in &drm_file.is_master. All other clients have just a pointer * to the &drm_master they are associated with. * * In addition only one &drm_master can be the current master for a &drm_device. * It can be switched through the DROP_MASTER and SET_MASTER IOCTL, or * implicitly through closing/opening the primary device node. See also * drm_is_current_master(). * * Clients can authenticate against the current master (if it matches their own) * using the GETMAGIC and AUTHMAGIC IOCTLs. Together with exchanging masters, * this allows controlled access to the device for an entire group of mutually * trusted clients. */ static bool drm_is_current_master_locked(struct drm_file *fpriv) { lockdep_assert_once(lockdep_is_held(&fpriv->master_lookup_lock) || lockdep_is_held(&fpriv->minor->dev->master_mutex)); return fpriv->is_master && drm_lease_owner(fpriv->master) == fpriv->minor->dev->master; } /** * drm_is_current_master - checks whether @priv is the current master * @fpriv: DRM file private * * Checks whether @fpriv is current master on its device. This decides whether a * client is allowed to run DRM_MASTER IOCTLs. * * Most of the modern IOCTL which require DRM_MASTER are for kernel modesetting * - the current master is assumed to own the non-shareable display hardware. */ bool drm_is_current_master(struct drm_file *fpriv) { bool ret; spin_lock(&fpriv->master_lookup_lock); ret = drm_is_current_master_locked(fpriv); spin_unlock(&fpriv->master_lookup_lock); return ret; } EXPORT_SYMBOL(drm_is_current_master); int drm_getmagic(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_auth *auth = data; int ret = 0; guard(mutex)(&dev->master_mutex); if (!file_priv->magic) { ret = idr_alloc(&file_priv->master->magic_map, file_priv, 1, 0, GFP_KERNEL); if (ret >= 0) file_priv->magic = ret; } auth->magic = file_priv->magic; drm_dbg_core(dev, "%u\n", auth->magic); return ret < 0 ? ret : 0; } int drm_authmagic(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_auth *auth = data; struct drm_file *file; drm_dbg_core(dev, "%u\n", auth->magic); guard(mutex)(&dev->master_mutex); file = idr_find(&file_priv->master->magic_map, auth->magic); if (file) { file->authenticated = 1; idr_replace(&file_priv->master->magic_map, NULL, auth->magic); } return file ? 0 : -EINVAL; } struct drm_master *drm_master_create(struct drm_device *dev) { struct drm_master *master; master = kzalloc(sizeof(*master), GFP_KERNEL); if (!master) return NULL; kref_init(&master->refcount); idr_init_base(&master->magic_map, 1); master->dev = dev; /* initialize the tree of output resource lessees */ INIT_LIST_HEAD(&master->lessees); INIT_LIST_HEAD(&master->lessee_list); idr_init(&master->leases); idr_init_base(&master->lessee_idr, 1); return master; } static void drm_set_master(struct drm_device *dev, struct drm_file *fpriv, bool new_master) { dev->master = drm_master_get(fpriv->master); if (dev->driver->master_set) dev->driver->master_set(dev, fpriv, new_master); fpriv->was_master = true; } static int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv) { struct drm_master *old_master; struct drm_master *new_master; lockdep_assert_held_once(&dev->master_mutex); WARN_ON(fpriv->is_master); old_master = fpriv->master; new_master = drm_master_create(dev); if (!new_master) return -ENOMEM; spin_lock(&fpriv->master_lookup_lock); fpriv->master = new_master; spin_unlock(&fpriv->master_lookup_lock); fpriv->is_master = 1; fpriv->authenticated = 1; drm_set_master(dev, fpriv, true); if (old_master) drm_master_put(&old_master); return 0; } /* * In the olden days the SET/DROP_MASTER ioctls used to return EACCES when * CAP_SYS_ADMIN was not set. This was used to prevent rogue applications * from becoming master and/or failing to release it. * * At the same time, the first client (for a given VT) is _always_ master. * Thus in order for the ioctls to succeed, one had to _explicitly_ run the * application as root or flip the setuid bit. * * If the CAP_SYS_ADMIN was missing, no other client could become master... * EVER :-( Leading to a) the graphics session dying badly or b) a completely * locked session. * * * As some point systemd-logind was introduced to orchestrate and delegate * master as applicable. It does so by opening the fd and passing it to users * while in itself logind a) does the set/drop master per users' request and * b) * implicitly drops master on VT switch. * * Even though logind looks like the future, there are a few issues: * - some platforms don't have equivalent (Android, CrOS, some BSDs) so * root is required _solely_ for SET/DROP MASTER. * - applications may not be updated to use it, * - any client which fails to drop master* can DoS the application using * logind, to a varying degree. * * * Either due missing CAP_SYS_ADMIN or simply not calling DROP_MASTER. * * * Here we implement the next best thing: * - ensure the logind style of fd passing works unchanged, and * - allow a client to drop/set master, iff it is/was master at a given point * in time. * * Note: DROP_MASTER cannot be free for all, as an arbitrator user could: * - DoS/crash the arbitrator - details would be implementation specific * - open the node, become master implicitly and cause issues * * As a result this fixes the following when using root-less build w/o logind * - startx * - weston * - various compositors based on wlroots */ static int drm_master_check_perm(struct drm_device *dev, struct drm_file *file_priv) { if (file_priv->was_master && rcu_access_pointer(file_priv->pid) == task_tgid(current)) return 0; if (!capable(CAP_SYS_ADMIN)) return -EACCES; return 0; } int drm_setmaster_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { int ret; guard(mutex)(&dev->master_mutex); ret = drm_master_check_perm(dev, file_priv); if (ret) return ret; if (drm_is_current_master_locked(file_priv)) return ret; if (dev->master) return -EBUSY; if (!file_priv->master) return -EINVAL; if (!file_priv->is_master) return drm_new_set_master(dev, file_priv); if (file_priv->master->lessor != NULL) { drm_dbg_lease(dev, "Attempt to set lessee %d as master\n", file_priv->master->lessee_id); return -EINVAL; } drm_set_master(dev, file_priv, false); return ret; } static void drm_drop_master(struct drm_device *dev, struct drm_file *fpriv) { if (dev->driver->master_drop) dev->driver->master_drop(dev, fpriv); drm_master_put(&dev->master); } int drm_dropmaster_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { int ret; guard(mutex)(&dev->master_mutex); ret = drm_master_check_perm(dev, file_priv); if (ret) return ret; if (!drm_is_current_master_locked(file_priv)) return -EINVAL; if (!dev->master) return -EINVAL; if (file_priv->master->lessor != NULL) { drm_dbg_lease(dev, "Attempt to drop lessee %d as master\n", file_priv->master->lessee_id); return -EINVAL; } drm_drop_master(dev, file_priv); return ret; } int drm_master_open(struct drm_file *file_priv) { struct drm_device *dev = file_priv->minor->dev; int ret = 0; /* if there is no current master make this fd it, but do not create * any master object for render clients */ guard(mutex)(&dev->master_mutex); if (!dev->master) { ret = drm_new_set_master(dev, file_priv); } else { spin_lock(&file_priv->master_lookup_lock); file_priv->master = drm_master_get(dev->master); spin_unlock(&file_priv->master_lookup_lock); } return ret; } void drm_master_release(struct drm_file *file_priv) { struct drm_device *dev = file_priv->minor->dev; struct drm_master *master; guard(mutex)(&dev->master_mutex); master = file_priv->master; if (file_priv->magic) idr_remove(&file_priv->master->magic_map, file_priv->magic); if (!drm_is_current_master_locked(file_priv)) goto out; if (dev->master == file_priv->master) drm_drop_master(dev, file_priv); out: if (drm_core_check_feature(dev, DRIVER_MODESET) && file_priv->is_master) { /* Revoke any leases held by this or lessees, but only if * this is the "real" master */ drm_lease_revoke(master); } /* drop the master reference held by the file priv */ if (file_priv->master) drm_master_put(&file_priv->master); } /** * drm_master_get - reference a master pointer * @master: &struct drm_master * * Increments the reference count of @master and returns a pointer to @master. */ struct drm_master *drm_master_get(struct drm_master *master) { kref_get(&master->refcount); return master; } EXPORT_SYMBOL(drm_master_get); /** * drm_file_get_master - reference &drm_file.master of @file_priv * @file_priv: DRM file private * * Increments the reference count of @file_priv's &drm_file.master and returns * the &drm_file.master. If @file_priv has no &drm_file.master, returns NULL. * * Master pointers returned from this function should be unreferenced using * drm_master_put(). */ struct drm_master *drm_file_get_master(struct drm_file *file_priv) { struct drm_master *master = NULL; spin_lock(&file_priv->master_lookup_lock); if (!file_priv->master) goto unlock; master = drm_master_get(file_priv->master); unlock: spin_unlock(&file_priv->master_lookup_lock); return master; } EXPORT_SYMBOL(drm_file_get_master); static void drm_master_destroy(struct kref *kref) { struct drm_master *master = container_of(kref, struct drm_master, refcount); struct drm_device *dev = master->dev; if (drm_core_check_feature(dev, DRIVER_MODESET)) drm_lease_destroy(master); idr_destroy(&master->magic_map); idr_destroy(&master->leases); idr_destroy(&master->lessee_idr); kfree(master->unique); kfree(master); } /** * drm_master_put - unreference and clear a master pointer * @master: pointer to a pointer of &struct drm_master * * This decrements the &drm_master behind @master and sets it to NULL. */ void drm_master_put(struct drm_master **master) { kref_put(&(*master)->refcount, drm_master_destroy); *master = NULL; } EXPORT_SYMBOL(drm_master_put); /* Used by drm_client and drm_fb_helper */ bool drm_master_internal_acquire(struct drm_device *dev) { mutex_lock(&dev->master_mutex); if (dev->master) { mutex_unlock(&dev->master_mutex); return false; } return true; } EXPORT_SYMBOL(drm_master_internal_acquire); /* Used by drm_client and drm_fb_helper */ void drm_master_internal_release(struct drm_device *dev) { mutex_unlock(&dev->master_mutex); } EXPORT_SYMBOL(drm_master_internal_release);
39 12 29 27 17 17 17 17 30 30 38 2 39 17 30 29 32 32 14 14 14 32 32 31 1 32 32 20 31 1 31 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 // SPDX-License-Identifier: GPL-2.0 /* * linux/mm/page_io.c * * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds * * Swap reorganised 29.12.95, * Asynchronous swapping added 30.12.95. Stephen Tweedie * Removed race in async swapping. 14.4.1996. Bruno Haible * Add swap of shared pages through the page cache. 20.2.1998. Stephen Tweedie * Always use brw_page, life becomes simpler. 12 May 1998 Eric Biederman */ #include <linux/mm.h> #include <linux/kernel_stat.h> #include <linux/gfp.h> #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/bio.h> #include <linux/swapops.h> #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/psi.h> #include <linux/uio.h> #include <linux/sched/task.h> #include <linux/delayacct.h> #include <linux/zswap.h> #include "swap.h" static void __end_swap_bio_write(struct bio *bio) { struct folio *folio = bio_first_folio_all(bio); if (bio->bi_status) { /* * We failed to write the page out to swap-space. * Re-dirty the page in order to avoid it being reclaimed. * Also print a dire warning that things will go BAD (tm) * very quickly. * * Also clear PG_reclaim to avoid folio_rotate_reclaimable() */ folio_mark_dirty(folio); pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n", MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), (unsigned long long)bio->bi_iter.bi_sector); folio_clear_reclaim(folio); } folio_end_writeback(folio); } static void end_swap_bio_write(struct bio *bio) { __end_swap_bio_write(bio); bio_put(bio); } static void __end_swap_bio_read(struct bio *bio) { struct folio *folio = bio_first_folio_all(bio); if (bio->bi_status) { pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n", MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)), (unsigned long long)bio->bi_iter.bi_sector); } else { folio_mark_uptodate(folio); } folio_unlock(folio); } static void end_swap_bio_read(struct bio *bio) { __end_swap_bio_read(bio); bio_put(bio); } int generic_swapfile_activate(struct swap_info_struct *sis, struct file *swap_file, sector_t *span) { struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; unsigned blocks_per_page; unsigned long page_no; unsigned blkbits; sector_t probe_block; sector_t last_block; sector_t lowest_block = -1; sector_t highest_block = 0; int nr_extents = 0; int ret; blkbits = inode->i_blkbits; blocks_per_page = PAGE_SIZE >> blkbits; /* * Map all the blocks into the extent tree. This code doesn't try * to be very smart. */ probe_block = 0; page_no = 0; last_block = i_size_read(inode) >> blkbits; while ((probe_block + blocks_per_page) <= last_block && page_no < sis->max) { unsigned block_in_page; sector_t first_block; cond_resched(); first_block = probe_block; ret = bmap(inode, &first_block); if (ret || !first_block) goto bad_bmap; /* * It must be PAGE_SIZE aligned on-disk */ if (first_block & (blocks_per_page - 1)) { probe_block++; goto reprobe; } for (block_in_page = 1; block_in_page < blocks_per_page; block_in_page++) { sector_t block; block = probe_block + block_in_page; ret = bmap(inode, &block); if (ret || !block) goto bad_bmap; if (block != first_block + block_in_page) { /* Discontiguity */ probe_block++; goto reprobe; } } first_block >>= (PAGE_SHIFT - blkbits); if (page_no) { /* exclude the header page */ if (first_block < lowest_block) lowest_block = first_block; if (first_block > highest_block) highest_block = first_block; } /* * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks */ ret = add_swap_extent(sis, page_no, 1, first_block); if (ret < 0) goto out; nr_extents += ret; page_no++; probe_block += blocks_per_page; reprobe: continue; } ret = nr_extents; *span = 1 + highest_block - lowest_block; if (page_no == 0) page_no = 1; /* force Empty message */ sis->max = page_no; sis->pages = page_no - 1; out: return ret; bad_bmap: pr_err("swapon: swapfile has holes\n"); ret = -EINVAL; goto out; } static bool is_folio_zero_filled(struct folio *folio) { unsigned int pos, last_pos; unsigned long *data; unsigned int i; last_pos = PAGE_SIZE / sizeof(*data) - 1; for (i = 0; i < folio_nr_pages(folio); i++) { data = kmap_local_folio(folio, i * PAGE_SIZE); /* * Check last word first, incase the page is zero-filled at * the start and has non-zero data at the end, which is common * in real-world workloads. */ if (data[last_pos]) { kunmap_local(data); return false; } for (pos = 0; pos < last_pos; pos++) { if (data[pos]) { kunmap_local(data); return false; } } kunmap_local(data); } return true; } static void swap_zeromap_folio_set(struct folio *folio) { struct obj_cgroup *objcg = get_obj_cgroup_from_folio(folio); struct swap_info_struct *sis = swp_swap_info(folio->swap); int nr_pages = folio_nr_pages(folio); swp_entry_t entry; unsigned int i; for (i = 0; i < folio_nr_pages(folio); i++) { entry = page_swap_entry(folio_page(folio, i)); set_bit(swp_offset(entry), sis->zeromap); } count_vm_events(SWPOUT_ZERO, nr_pages); if (objcg) { count_objcg_events(objcg, SWPOUT_ZERO, nr_pages); obj_cgroup_put(objcg); } } static void swap_zeromap_folio_clear(struct folio *folio) { struct swap_info_struct *sis = swp_swap_info(folio->swap); swp_entry_t entry; unsigned int i; for (i = 0; i < folio_nr_pages(folio); i++) { entry = page_swap_entry(folio_page(folio, i)); clear_bit(swp_offset(entry), sis->zeromap); } } /* * We may have stale swap cache pages in memory: notice * them here and get rid of the unnecessary final write. */ int swap_writeout(struct folio *folio, struct swap_iocb **swap_plug) { int ret = 0; if (folio_free_swap(folio)) goto out_unlock; /* * Arch code may have to preserve more data than just the page * contents, e.g. memory tags. */ ret = arch_prepare_to_swap(folio); if (ret) { folio_mark_dirty(folio); goto out_unlock; } /* * Use a bitmap (zeromap) to avoid doing IO for zero-filled pages. * The bits in zeromap are protected by the locked swapcache folio * and atomic updates are used to protect against read-modify-write * corruption due to other zero swap entries seeing concurrent updates. */ if (is_folio_zero_filled(folio)) { swap_zeromap_folio_set(folio); goto out_unlock; } /* * Clear bits this folio occupies in the zeromap to prevent zero data * being read in from any previous zero writes that occupied the same * swap entries. */ swap_zeromap_folio_clear(folio); if (zswap_store(folio)) { count_mthp_stat(folio_order(folio), MTHP_STAT_ZSWPOUT); goto out_unlock; } if (!mem_cgroup_zswap_writeback_enabled(folio_memcg(folio))) { folio_mark_dirty(folio); return AOP_WRITEPAGE_ACTIVATE; } __swap_writepage(folio, swap_plug); return 0; out_unlock: folio_unlock(folio); return ret; } static inline void count_swpout_vm_event(struct folio *folio) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (unlikely(folio_test_pmd_mappable(folio))) { count_memcg_folio_events(folio, THP_SWPOUT, 1); count_vm_event(THP_SWPOUT); } #endif count_mthp_stat(folio_order(folio), MTHP_STAT_SWPOUT); count_memcg_folio_events(folio, PSWPOUT, folio_nr_pages(folio)); count_vm_events(PSWPOUT, folio_nr_pages(folio)); } #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) static void bio_associate_blkg_from_page(struct bio *bio, struct folio *folio) { struct cgroup_subsys_state *css; struct mem_cgroup *memcg; memcg = folio_memcg(folio); if (!memcg) return; rcu_read_lock(); css = cgroup_e_css(memcg->css.cgroup, &io_cgrp_subsys); bio_associate_blkg_from_css(bio, css); rcu_read_unlock(); } #else #define bio_associate_blkg_from_page(bio, folio) do { } while (0) #endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */ struct swap_iocb { struct kiocb iocb; struct bio_vec bvec[SWAP_CLUSTER_MAX]; int pages; int len; }; static mempool_t *sio_pool; int sio_pool_init(void) { if (!sio_pool) { mempool_t *pool = mempool_create_kmalloc_pool( SWAP_CLUSTER_MAX, sizeof(struct swap_iocb)); if (cmpxchg(&sio_pool, NULL, pool)) mempool_destroy(pool); } if (!sio_pool) return -ENOMEM; return 0; } static void sio_write_complete(struct kiocb *iocb, long ret) { struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb); struct page *page = sio->bvec[0].bv_page; int p; if (ret != sio->len) { /* * In the case of swap-over-nfs, this can be a * temporary failure if the system has limited * memory for allocating transmit buffers. * Mark the page dirty and avoid * folio_rotate_reclaimable but rate-limit the * messages. */ pr_err_ratelimited("Write error %ld on dio swapfile (%llu)\n", ret, swap_dev_pos(page_swap_entry(page))); for (p = 0; p < sio->pages; p++) { page = sio->bvec[p].bv_page; set_page_dirty(page); ClearPageReclaim(page); } } for (p = 0; p < sio->pages; p++) end_page_writeback(sio->bvec[p].bv_page); mempool_free(sio, sio_pool); } static void swap_writepage_fs(struct folio *folio, struct swap_iocb **swap_plug) { struct swap_iocb *sio = swap_plug ? *swap_plug : NULL; struct swap_info_struct *sis = swp_swap_info(folio->swap); struct file *swap_file = sis->swap_file; loff_t pos = swap_dev_pos(folio->swap); count_swpout_vm_event(folio); folio_start_writeback(folio); folio_unlock(folio); if (sio) { if (sio->iocb.ki_filp != swap_file || sio->iocb.ki_pos + sio->len != pos) { swap_write_unplug(sio); sio = NULL; } } if (!sio) { sio = mempool_alloc(sio_pool, GFP_NOIO); init_sync_kiocb(&sio->iocb, swap_file); sio->iocb.ki_complete = sio_write_complete; sio->iocb.ki_pos = pos; sio->pages = 0; sio->len = 0; } bvec_set_folio(&sio->bvec[sio->pages], folio, folio_size(folio), 0); sio->len += folio_size(folio); sio->pages += 1; if (sio->pages == ARRAY_SIZE(sio->bvec) || !swap_plug) { swap_write_unplug(sio); sio = NULL; } if (swap_plug) *swap_plug = sio; } static void swap_writepage_bdev_sync(struct folio *folio, struct swap_info_struct *sis) { struct bio_vec bv; struct bio bio; bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_WRITE | REQ_SWAP); bio.bi_iter.bi_sector = swap_folio_sector(folio); bio_add_folio_nofail(&bio, folio, folio_size(folio), 0); bio_associate_blkg_from_page(&bio, folio); count_swpout_vm_event(folio); folio_start_writeback(folio); folio_unlock(folio); submit_bio_wait(&bio); __end_swap_bio_write(&bio); } static void swap_writepage_bdev_async(struct folio *folio, struct swap_info_struct *sis) { struct bio *bio; bio = bio_alloc(sis->bdev, 1, REQ_OP_WRITE | REQ_SWAP, GFP_NOIO); bio->bi_iter.bi_sector = swap_folio_sector(folio); bio->bi_end_io = end_swap_bio_write; bio_add_folio_nofail(bio, folio, folio_size(folio), 0); bio_associate_blkg_from_page(bio, folio); count_swpout_vm_event(folio); folio_start_writeback(folio); folio_unlock(folio); submit_bio(bio); } void __swap_writepage(struct folio *folio, struct swap_iocb **swap_plug) { struct swap_info_struct *sis = swp_swap_info(folio->swap); VM_BUG_ON_FOLIO(!folio_test_swapcache(folio), folio); /* * ->flags can be updated non-atomicially (scan_swap_map_slots), * but that will never affect SWP_FS_OPS, so the data_race * is safe. */ if (data_race(sis->flags & SWP_FS_OPS)) swap_writepage_fs(folio, swap_plug); /* * ->flags can be updated non-atomicially (scan_swap_map_slots), * but that will never affect SWP_SYNCHRONOUS_IO, so the data_race * is safe. */ else if (data_race(sis->flags & SWP_SYNCHRONOUS_IO)) swap_writepage_bdev_sync(folio, sis); else swap_writepage_bdev_async(folio, sis); } void swap_write_unplug(struct swap_iocb *sio) { struct iov_iter from; struct address_space *mapping = sio->iocb.ki_filp->f_mapping; int ret; iov_iter_bvec(&from, ITER_SOURCE, sio->bvec, sio->pages, sio->len); ret = mapping->a_ops->swap_rw(&sio->iocb, &from); if (ret != -EIOCBQUEUED) sio_write_complete(&sio->iocb, ret); } static void sio_read_complete(struct kiocb *iocb, long ret) { struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb); int p; if (ret == sio->len) { for (p = 0; p < sio->pages; p++) { struct folio *folio = page_folio(sio->bvec[p].bv_page); count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN); count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio)); folio_mark_uptodate(folio); folio_unlock(folio); } count_vm_events(PSWPIN, sio->pages); } else { for (p = 0; p < sio->pages; p++) { struct folio *folio = page_folio(sio->bvec[p].bv_page); folio_unlock(folio); } pr_alert_ratelimited("Read-error on swap-device\n"); } mempool_free(sio, sio_pool); } static bool swap_read_folio_zeromap(struct folio *folio) { int nr_pages = folio_nr_pages(folio); struct obj_cgroup *objcg; bool is_zeromap; /* * Swapping in a large folio that is partially in the zeromap is not * currently handled. Return true without marking the folio uptodate so * that an IO error is emitted (e.g. do_swap_page() will sigbus). */ if (WARN_ON_ONCE(swap_zeromap_batch(folio->swap, nr_pages, &is_zeromap) != nr_pages)) return true; if (!is_zeromap) return false; objcg = get_obj_cgroup_from_folio(folio); count_vm_events(SWPIN_ZERO, nr_pages); if (objcg) { count_objcg_events(objcg, SWPIN_ZERO, nr_pages); obj_cgroup_put(objcg); } folio_zero_range(folio, 0, folio_size(folio)); folio_mark_uptodate(folio); return true; } static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug) { struct swap_info_struct *sis = swp_swap_info(folio->swap); struct swap_iocb *sio = NULL; loff_t pos = swap_dev_pos(folio->swap); if (plug) sio = *plug; if (sio) { if (sio->iocb.ki_filp != sis->swap_file || sio->iocb.ki_pos + sio->len != pos) { swap_read_unplug(sio); sio = NULL; } } if (!sio) { sio = mempool_alloc(sio_pool, GFP_KERNEL); init_sync_kiocb(&sio->iocb, sis->swap_file); sio->iocb.ki_pos = pos; sio->iocb.ki_complete = sio_read_complete; sio->pages = 0; sio->len = 0; } bvec_set_folio(&sio->bvec[sio->pages], folio, folio_size(folio), 0); sio->len += folio_size(folio); sio->pages += 1; if (sio->pages == ARRAY_SIZE(sio->bvec) || !plug) { swap_read_unplug(sio); sio = NULL; } if (plug) *plug = sio; } static void swap_read_folio_bdev_sync(struct folio *folio, struct swap_info_struct *sis) { struct bio_vec bv; struct bio bio; bio_init(&bio, sis->bdev, &bv, 1, REQ_OP_READ); bio.bi_iter.bi_sector = swap_folio_sector(folio); bio_add_folio_nofail(&bio, folio, folio_size(folio), 0); /* * Keep this task valid during swap readpage because the oom killer may * attempt to access it in the page fault retry time check. */ get_task_struct(current); count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN); count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio)); count_vm_events(PSWPIN, folio_nr_pages(folio)); submit_bio_wait(&bio); __end_swap_bio_read(&bio); put_task_struct(current); } static void swap_read_folio_bdev_async(struct folio *folio, struct swap_info_struct *sis) { struct bio *bio; bio = bio_alloc(sis->bdev, 1, REQ_OP_READ, GFP_KERNEL); bio->bi_iter.bi_sector = swap_folio_sector(folio); bio->bi_end_io = end_swap_bio_read; bio_add_folio_nofail(bio, folio, folio_size(folio), 0); count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN); count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio)); count_vm_events(PSWPIN, folio_nr_pages(folio)); submit_bio(bio); } void swap_read_folio(struct folio *folio, struct swap_iocb **plug) { struct swap_info_struct *sis = swp_swap_info(folio->swap); bool synchronous = sis->flags & SWP_SYNCHRONOUS_IO; bool workingset = folio_test_workingset(folio); unsigned long pflags; bool in_thrashing; VM_BUG_ON_FOLIO(!folio_test_swapcache(folio) && !synchronous, folio); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); VM_BUG_ON_FOLIO(folio_test_uptodate(folio), folio); /* * Count submission time as memory stall and delay. When the device * is congested, or the submitting cgroup IO-throttled, submission * can be a significant part of overall IO time. */ if (workingset) { delayacct_thrashing_start(&in_thrashing); psi_memstall_enter(&pflags); } delayacct_swapin_start(); if (swap_read_folio_zeromap(folio)) { folio_unlock(folio); goto finish; } if (zswap_load(folio) != -ENOENT) goto finish; /* We have to read from slower devices. Increase zswap protection. */ zswap_folio_swapin(folio); if (data_race(sis->flags & SWP_FS_OPS)) { swap_read_folio_fs(folio, plug); } else if (synchronous) { swap_read_folio_bdev_sync(folio, sis); } else { swap_read_folio_bdev_async(folio, sis); } finish: if (workingset) { delayacct_thrashing_end(&in_thrashing); psi_memstall_leave(&pflags); } delayacct_swapin_end(); } void __swap_read_unplug(struct swap_iocb *sio) { struct iov_iter from; struct address_space *mapping = sio->iocb.ki_filp->f_mapping; int ret; iov_iter_bvec(&from, ITER_DEST, sio->bvec, sio->pages, sio->len); ret = mapping->a_ops->swap_rw(&sio->iocb, &from); if (ret != -EIOCBQUEUED) sio_read_complete(&sio->iocb, ret); }
8 8 8 8 1 1 2 5 2 5 3 4 7 7 1 7 7 7 1 7 1 7 7 1 1 1 2 2 2 1 1 2 2 2 2 4 4 3 4 1 3 4 5 5 4 4 3 4 5 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 // SPDX-License-Identifier: GPL-2.0 /* * chaoskey - driver for ChaosKey device from Altus Metrum. * * This device provides true random numbers using a noise source based * on a reverse-biased p-n junction in avalanche breakdown. More * details can be found at http://chaoskey.org * * The driver connects to the kernel hardware RNG interface to provide * entropy for /dev/random and other kernel activities. It also offers * a separate /dev/ entry to allow for direct access to the random * bit stream. * * Copyright © 2015 Keith Packard <keithp@keithp.com> */ #include <linux/module.h> #include <linux/slab.h> #include <linux/usb.h> #include <linux/wait.h> #include <linux/hw_random.h> #include <linux/mutex.h> #include <linux/uaccess.h> static struct usb_driver chaoskey_driver; static struct usb_class_driver chaoskey_class; static int chaoskey_rng_read(struct hwrng *rng, void *data, size_t max, bool wait); static DEFINE_MUTEX(chaoskey_list_lock); #define usb_dbg(usb_if, format, arg...) \ dev_dbg(&(usb_if)->dev, format, ## arg) #define usb_err(usb_if, format, arg...) \ dev_err(&(usb_if)->dev, format, ## arg) /* Version Information */ #define DRIVER_AUTHOR "Keith Packard, keithp@keithp.com" #define DRIVER_DESC "Altus Metrum ChaosKey driver" #define DRIVER_SHORT "chaoskey" MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); #define CHAOSKEY_VENDOR_ID 0x1d50 /* OpenMoko */ #define CHAOSKEY_PRODUCT_ID 0x60c6 /* ChaosKey */ #define ALEA_VENDOR_ID 0x12d8 /* Araneus */ #define ALEA_PRODUCT_ID 0x0001 /* Alea I */ #define CHAOSKEY_BUF_LEN 64 /* max size of USB full speed packet */ #define NAK_TIMEOUT (HZ) /* normal stall/wait timeout */ #define ALEA_FIRST_TIMEOUT (HZ*3) /* first stall/wait timeout for Alea */ #ifdef CONFIG_USB_DYNAMIC_MINORS #define USB_CHAOSKEY_MINOR_BASE 0 #else /* IOWARRIOR_MINOR_BASE + 16, not official yet */ #define USB_CHAOSKEY_MINOR_BASE 224 #endif static const struct usb_device_id chaoskey_table[] = { { USB_DEVICE(CHAOSKEY_VENDOR_ID, CHAOSKEY_PRODUCT_ID) }, { USB_DEVICE(ALEA_VENDOR_ID, ALEA_PRODUCT_ID) }, { }, }; MODULE_DEVICE_TABLE(usb, chaoskey_table); static void chaos_read_callback(struct urb *urb); /* Driver-local specific stuff */ struct chaoskey { struct usb_interface *interface; char in_ep; struct mutex lock; struct mutex rng_lock; int open; /* open count */ bool present; /* device not disconnected */ bool reading; /* ongoing IO */ bool reads_started; /* track first read for Alea */ int size; /* size of buf */ int valid; /* bytes of buf read */ int used; /* bytes of buf consumed */ char *name; /* product + serial */ struct hwrng hwrng; /* Embedded struct for hwrng */ int hwrng_registered; /* registered with hwrng API */ wait_queue_head_t wait_q; /* for timeouts */ struct urb *urb; /* for performing IO */ char *buf; }; static void chaoskey_free(struct chaoskey *dev) { if (dev) { usb_dbg(dev->interface, "free"); usb_free_urb(dev->urb); kfree(dev->name); kfree(dev->buf); usb_put_intf(dev->interface); kfree(dev); } } static int chaoskey_probe(struct usb_interface *interface, const struct usb_device_id *id) { struct usb_device *udev = interface_to_usbdev(interface); struct usb_host_interface *altsetting = interface->cur_altsetting; struct usb_endpoint_descriptor *epd; int in_ep; struct chaoskey *dev; int result = -ENOMEM; int size; int res; usb_dbg(interface, "probe %s-%s", udev->product, udev->serial); /* Find the first bulk IN endpoint and its packet size */ res = usb_find_bulk_in_endpoint(altsetting, &epd); if (res) { usb_dbg(interface, "no IN endpoint found"); return res; } in_ep = usb_endpoint_num(epd); size = usb_endpoint_maxp(epd); /* Validate endpoint and size */ if (size <= 0) { usb_dbg(interface, "invalid size (%d)", size); return -ENODEV; } if (size > CHAOSKEY_BUF_LEN) { usb_dbg(interface, "size reduced from %d to %d\n", size, CHAOSKEY_BUF_LEN); size = CHAOSKEY_BUF_LEN; } /* Looks good, allocate and initialize */ dev = kzalloc(sizeof(struct chaoskey), GFP_KERNEL); if (dev == NULL) goto out; dev->interface = usb_get_intf(interface); dev->buf = kmalloc(size, GFP_KERNEL); if (dev->buf == NULL) goto out; dev->urb = usb_alloc_urb(0, GFP_KERNEL); if (!dev->urb) goto out; usb_fill_bulk_urb(dev->urb, udev, usb_rcvbulkpipe(udev, in_ep), dev->buf, size, chaos_read_callback, dev); /* Construct a name using the product and serial values. Each * device needs a unique name for the hwrng code */ if (udev->product && udev->serial) { dev->name = kasprintf(GFP_KERNEL, "%s-%s", udev->product, udev->serial); if (dev->name == NULL) goto out; } dev->in_ep = in_ep; if (le16_to_cpu(udev->descriptor.idVendor) != ALEA_VENDOR_ID) dev->reads_started = true; dev->size = size; dev->present = true; init_waitqueue_head(&dev->wait_q); mutex_init(&dev->lock); mutex_init(&dev->rng_lock); usb_set_intfdata(interface, dev); result = usb_register_dev(interface, &chaoskey_class); if (result) { usb_err(interface, "Unable to allocate minor number."); goto out; } dev->hwrng.name = dev->name ? dev->name : chaoskey_driver.name; dev->hwrng.read = chaoskey_rng_read; dev->hwrng_registered = (hwrng_register(&dev->hwrng) == 0); if (!dev->hwrng_registered) usb_err(interface, "Unable to register with hwrng"); usb_enable_autosuspend(udev); usb_dbg(interface, "chaoskey probe success, size %d", dev->size); return 0; out: usb_set_intfdata(interface, NULL); chaoskey_free(dev); return result; } static void chaoskey_disconnect(struct usb_interface *interface) { struct chaoskey *dev; usb_dbg(interface, "disconnect"); dev = usb_get_intfdata(interface); if (!dev) { usb_dbg(interface, "disconnect failed - no dev"); return; } if (dev->hwrng_registered) hwrng_unregister(&dev->hwrng); usb_deregister_dev(interface, &chaoskey_class); usb_set_intfdata(interface, NULL); mutex_lock(&chaoskey_list_lock); mutex_lock(&dev->lock); dev->present = false; usb_poison_urb(dev->urb); if (!dev->open) { mutex_unlock(&dev->lock); chaoskey_free(dev); } else mutex_unlock(&dev->lock); mutex_unlock(&chaoskey_list_lock); usb_dbg(interface, "disconnect done"); } static int chaoskey_open(struct inode *inode, struct file *file) { struct chaoskey *dev; struct usb_interface *interface; int rv = 0; /* get the interface from minor number and driver information */ interface = usb_find_interface(&chaoskey_driver, iminor(inode)); if (!interface) return -ENODEV; usb_dbg(interface, "open"); dev = usb_get_intfdata(interface); if (!dev) { usb_dbg(interface, "open (dev)"); return -ENODEV; } file->private_data = dev; mutex_lock(&chaoskey_list_lock); mutex_lock(&dev->lock); if (dev->present) ++dev->open; else rv = -ENODEV; mutex_unlock(&dev->lock); mutex_unlock(&chaoskey_list_lock); return rv; } static int chaoskey_release(struct inode *inode, struct file *file) { struct chaoskey *dev = file->private_data; struct usb_interface *interface; int rv = 0; if (dev == NULL) return -ENODEV; interface = dev->interface; usb_dbg(interface, "release"); mutex_lock(&chaoskey_list_lock); mutex_lock(&dev->lock); usb_dbg(interface, "open count at release is %d", dev->open); if (dev->open <= 0) { usb_dbg(interface, "invalid open count (%d)", dev->open); rv = -ENODEV; goto bail; } --dev->open; if (!dev->present) { if (dev->open == 0) { mutex_unlock(&dev->lock); chaoskey_free(dev); goto destruction; } } bail: mutex_unlock(&dev->lock); destruction: mutex_unlock(&chaoskey_list_lock); usb_dbg(interface, "release success"); return rv; } static void chaos_read_callback(struct urb *urb) { struct chaoskey *dev = urb->context; int status = urb->status; usb_dbg(dev->interface, "callback status (%d)", status); if (status == 0) dev->valid = urb->actual_length; else dev->valid = 0; dev->used = 0; /* must be seen first before validity is announced */ smp_wmb(); dev->reading = false; wake_up(&dev->wait_q); } /* Fill the buffer. Called with dev->lock held */ static int _chaoskey_fill(struct chaoskey *dev) { DEFINE_WAIT(wait); int result; bool started; usb_dbg(dev->interface, "fill"); /* Return immediately if someone called before the buffer was * empty */ if (dev->valid != dev->used) { usb_dbg(dev->interface, "not empty yet (valid %d used %d)", dev->valid, dev->used); return 0; } /* Bail if the device has been removed */ if (!dev->present) { usb_dbg(dev->interface, "device not present"); return -ENODEV; } /* Make sure the device is awake */ result = usb_autopm_get_interface(dev->interface); if (result) { usb_dbg(dev->interface, "wakeup failed (result %d)", result); return result; } dev->reading = true; result = usb_submit_urb(dev->urb, GFP_KERNEL); if (result < 0) { result = usb_translate_errors(result); dev->reading = false; goto out; } /* The first read on the Alea takes a little under 2 seconds. * Reads after the first read take only a few microseconds * though. Presumably the entropy-generating circuit needs * time to ramp up. So, we wait longer on the first read. */ started = dev->reads_started; dev->reads_started = true; result = wait_event_interruptible_timeout( dev->wait_q, !dev->reading, (started ? NAK_TIMEOUT : ALEA_FIRST_TIMEOUT) ); if (result < 0) { usb_kill_urb(dev->urb); goto out; } if (result == 0) { result = -ETIMEDOUT; usb_kill_urb(dev->urb); } else { result = dev->valid; } out: /* Let the device go back to sleep eventually */ usb_autopm_put_interface(dev->interface); usb_dbg(dev->interface, "read %d bytes", dev->valid); return result; } static ssize_t chaoskey_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct chaoskey *dev; ssize_t read_count = 0; int this_time; int result = 0; unsigned long remain; dev = file->private_data; if (dev == NULL || !dev->present) return -ENODEV; usb_dbg(dev->interface, "read %zu", count); while (count > 0) { /* Grab the rng_lock briefly to ensure that the hwrng interface * gets priority over other user access */ result = mutex_lock_interruptible(&dev->rng_lock); if (result) goto bail; mutex_unlock(&dev->rng_lock); result = mutex_lock_interruptible(&dev->lock); if (result) goto bail; if (dev->valid == dev->used) { result = _chaoskey_fill(dev); if (result < 0) { mutex_unlock(&dev->lock); goto bail; } } this_time = dev->valid - dev->used; if (this_time > count) this_time = count; remain = copy_to_user(buffer, dev->buf + dev->used, this_time); if (remain) { result = -EFAULT; /* Consume the bytes that were copied so we don't leak * data to user space */ dev->used += this_time - remain; mutex_unlock(&dev->lock); goto bail; } count -= this_time; read_count += this_time; buffer += this_time; dev->used += this_time; mutex_unlock(&dev->lock); } bail: if (read_count) { usb_dbg(dev->interface, "read %zu bytes", read_count); return read_count; } usb_dbg(dev->interface, "empty read, result %d", result); if (result == -ETIMEDOUT) result = -EAGAIN; return result; } static int chaoskey_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) { struct chaoskey *dev = container_of(rng, struct chaoskey, hwrng); int this_time; usb_dbg(dev->interface, "rng_read max %zu wait %d", max, wait); if (!dev->present) { usb_dbg(dev->interface, "device not present"); return 0; } /* Hold the rng_lock until we acquire the device lock so that * this operation gets priority over other user access to the * device */ mutex_lock(&dev->rng_lock); mutex_lock(&dev->lock); mutex_unlock(&dev->rng_lock); /* Try to fill the buffer if empty. It doesn't actually matter * if _chaoskey_fill works; we'll just return zero bytes as * the buffer will still be empty */ if (dev->valid == dev->used) (void) _chaoskey_fill(dev); this_time = dev->valid - dev->used; if (this_time > max) this_time = max; memcpy(data, dev->buf + dev->used, this_time); dev->used += this_time; mutex_unlock(&dev->lock); usb_dbg(dev->interface, "rng_read this_time %d\n", this_time); return this_time; } #ifdef CONFIG_PM static int chaoskey_suspend(struct usb_interface *interface, pm_message_t message) { usb_dbg(interface, "suspend"); return 0; } static int chaoskey_resume(struct usb_interface *interface) { struct chaoskey *dev; struct usb_device *udev = interface_to_usbdev(interface); usb_dbg(interface, "resume"); dev = usb_get_intfdata(interface); /* * We may have lost power. * In that case the device that needs a long time * for the first requests needs an extended timeout * again */ if (le16_to_cpu(udev->descriptor.idVendor) == ALEA_VENDOR_ID) dev->reads_started = false; return 0; } #else #define chaoskey_suspend NULL #define chaoskey_resume NULL #endif /* file operation pointers */ static const struct file_operations chaoskey_fops = { .owner = THIS_MODULE, .read = chaoskey_read, .open = chaoskey_open, .release = chaoskey_release, .llseek = default_llseek, }; /* class driver information */ static struct usb_class_driver chaoskey_class = { .name = "chaoskey%d", .fops = &chaoskey_fops, .minor_base = USB_CHAOSKEY_MINOR_BASE, }; /* usb specific object needed to register this driver with the usb subsystem */ static struct usb_driver chaoskey_driver = { .name = DRIVER_SHORT, .probe = chaoskey_probe, .disconnect = chaoskey_disconnect, .suspend = chaoskey_suspend, .resume = chaoskey_resume, .reset_resume = chaoskey_resume, .id_table = chaoskey_table, .supports_autosuspend = 1, }; module_usb_driver(chaoskey_driver);
8 1 1 2 4 7 7 47 1 46 6 2 4 3 2 1 43 43 42 9 1 2 6 5 4 3 5 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 /* * linux/drivers/video/fbcmap.c -- Colormap handling for frame buffer devices * * Created 15 Jun 1997 by Geert Uytterhoeven * * 2001 - Documented with DocBook * - Brad Douglas <brad@neruo.com> * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive for * more details. */ #include <linux/export.h> #include <linux/string.h> #include <linux/module.h> #include <linux/fb.h> #include <linux/slab.h> #include <linux/uaccess.h> static u16 red2[] __read_mostly = { 0x0000, 0xaaaa }; static u16 green2[] __read_mostly = { 0x0000, 0xaaaa }; static u16 blue2[] __read_mostly = { 0x0000, 0xaaaa }; static u16 red4[] __read_mostly = { 0x0000, 0xaaaa, 0x5555, 0xffff }; static u16 green4[] __read_mostly = { 0x0000, 0xaaaa, 0x5555, 0xffff }; static u16 blue4[] __read_mostly = { 0x0000, 0xaaaa, 0x5555, 0xffff }; static u16 red8[] __read_mostly = { 0x0000, 0x0000, 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa }; static u16 green8[] __read_mostly = { 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0x0000, 0x0000, 0x5555, 0xaaaa }; static u16 blue8[] __read_mostly = { 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa }; static u16 red16[] __read_mostly = { 0x0000, 0x0000, 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0xaaaa, 0xaaaa, 0x5555, 0x5555, 0x5555, 0x5555, 0xffff, 0xffff, 0xffff, 0xffff }; static u16 green16[] __read_mostly = { 0x0000, 0x0000, 0xaaaa, 0xaaaa, 0x0000, 0x0000, 0x5555, 0xaaaa, 0x5555, 0x5555, 0xffff, 0xffff, 0x5555, 0x5555, 0xffff, 0xffff }; static u16 blue16[] __read_mostly = { 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x0000, 0xaaaa, 0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0xffff, 0x5555, 0xffff }; static const struct fb_cmap default_2_colors = { .len=2, .red=red2, .green=green2, .blue=blue2 }; static const struct fb_cmap default_8_colors = { .len=8, .red=red8, .green=green8, .blue=blue8 }; static const struct fb_cmap default_4_colors = { .len=4, .red=red4, .green=green4, .blue=blue4 }; static const struct fb_cmap default_16_colors = { .len=16, .red=red16, .green=green16, .blue=blue16 }; /** * fb_alloc_cmap_gfp - allocate a colormap * @cmap: frame buffer colormap structure * @len: length of @cmap * @transp: boolean, 1 if there is transparency, 0 otherwise * @flags: flags for kmalloc memory allocation * * Allocates memory for a colormap @cmap. @len is the * number of entries in the palette. * * Returns negative errno on error, or zero on success. * */ int fb_alloc_cmap_gfp(struct fb_cmap *cmap, int len, int transp, gfp_t flags) { int size = len * sizeof(u16); int ret = -ENOMEM; flags |= __GFP_NOWARN; if (cmap->len != len) { fb_dealloc_cmap(cmap); if (!len) return 0; cmap->red = kzalloc(size, flags); if (!cmap->red) goto fail; cmap->green = kzalloc(size, flags); if (!cmap->green) goto fail; cmap->blue = kzalloc(size, flags); if (!cmap->blue) goto fail; if (transp) { cmap->transp = kzalloc(size, flags); if (!cmap->transp) goto fail; } else { cmap->transp = NULL; } } cmap->start = 0; cmap->len = len; ret = fb_copy_cmap(fb_default_cmap(len), cmap); if (ret) goto fail; return 0; fail: fb_dealloc_cmap(cmap); return ret; } int fb_alloc_cmap(struct fb_cmap *cmap, int len, int transp) { return fb_alloc_cmap_gfp(cmap, len, transp, GFP_ATOMIC); } /** * fb_dealloc_cmap - deallocate a colormap * @cmap: frame buffer colormap structure * * Deallocates a colormap that was previously allocated with * fb_alloc_cmap(). * */ void fb_dealloc_cmap(struct fb_cmap *cmap) { kfree(cmap->red); kfree(cmap->green); kfree(cmap->blue); kfree(cmap->transp); cmap->red = cmap->green = cmap->blue = cmap->transp = NULL; cmap->len = 0; } /** * fb_copy_cmap - copy a colormap * @from: frame buffer colormap structure * @to: frame buffer colormap structure * * Copy contents of colormap from @from to @to. */ int fb_copy_cmap(const struct fb_cmap *from, struct fb_cmap *to) { unsigned int tooff = 0, fromoff = 0; size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; if (fromoff >= from->len || tooff >= to->len) return -EINVAL; size = min_t(size_t, to->len - tooff, from->len - fromoff); if (size == 0) return -EINVAL; size *= sizeof(u16); memcpy(to->red+tooff, from->red+fromoff, size); memcpy(to->green+tooff, from->green+fromoff, size); memcpy(to->blue+tooff, from->blue+fromoff, size); if (from->transp && to->transp) memcpy(to->transp+tooff, from->transp+fromoff, size); return 0; } int fb_cmap_to_user(const struct fb_cmap *from, struct fb_cmap_user *to) { unsigned int tooff = 0, fromoff = 0; size_t size; if (to->start > from->start) fromoff = to->start - from->start; else tooff = from->start - to->start; if (fromoff >= from->len || tooff >= to->len) return -EINVAL; size = min_t(size_t, to->len - tooff, from->len - fromoff); if (size == 0) return -EINVAL; size *= sizeof(u16); if (copy_to_user(to->red+tooff, from->red+fromoff, size)) return -EFAULT; if (copy_to_user(to->green+tooff, from->green+fromoff, size)) return -EFAULT; if (copy_to_user(to->blue+tooff, from->blue+fromoff, size)) return -EFAULT; if (from->transp && to->transp) if (copy_to_user(to->transp+tooff, from->transp+fromoff, size)) return -EFAULT; return 0; } /** * fb_set_cmap - set the colormap * @cmap: frame buffer colormap structure * @info: frame buffer info structure * * Sets the colormap @cmap for a screen of device @info. * * Returns negative errno on error, or zero on success. * */ int fb_set_cmap(struct fb_cmap *cmap, struct fb_info *info) { int i, start, rc = 0; u16 *red, *green, *blue, *transp; u_int hred, hgreen, hblue, htransp = 0xffff; red = cmap->red; green = cmap->green; blue = cmap->blue; transp = cmap->transp; start = cmap->start; if (start < 0 || (!info->fbops->fb_setcolreg && !info->fbops->fb_setcmap)) return -EINVAL; if (info->fbops->fb_setcmap) { rc = info->fbops->fb_setcmap(cmap, info); } else { for (i = 0; i < cmap->len; i++) { hred = *red++; hgreen = *green++; hblue = *blue++; if (transp) htransp = *transp++; if (info->fbops->fb_setcolreg(start++, hred, hgreen, hblue, htransp, info)) break; } } if (rc == 0) fb_copy_cmap(cmap, &info->cmap); return rc; } int fb_set_user_cmap(struct fb_cmap_user *cmap, struct fb_info *info) { int rc, size = cmap->len * sizeof(u16); struct fb_cmap umap; if (size < 0 || size < cmap->len) return -E2BIG; memset(&umap, 0, sizeof(struct fb_cmap)); rc = fb_alloc_cmap_gfp(&umap, cmap->len, cmap->transp != NULL, GFP_KERNEL); if (rc) return rc; if (copy_from_user(umap.red, cmap->red, size) || copy_from_user(umap.green, cmap->green, size) || copy_from_user(umap.blue, cmap->blue, size) || (cmap->transp && copy_from_user(umap.transp, cmap->transp, size))) { rc = -EFAULT; goto out; } umap.start = cmap->start; lock_fb_info(info); rc = fb_set_cmap(&umap, info); unlock_fb_info(info); out: fb_dealloc_cmap(&umap); return rc; } /** * fb_default_cmap - get default colormap * @len: size of palette for a depth * * Gets the default colormap for a specific screen depth. @len * is the size of the palette for a particular screen depth. * * Returns pointer to a frame buffer colormap structure. * */ const struct fb_cmap *fb_default_cmap(int len) { if (len <= 2) return &default_2_colors; if (len <= 4) return &default_4_colors; if (len <= 8) return &default_8_colors; return &default_16_colors; } /** * fb_invert_cmaps - invert all defaults colormaps * * Invert all default colormaps. * */ void fb_invert_cmaps(void) { u_int i; for (i = 0; i < ARRAY_SIZE(red2); i++) { red2[i] = ~red2[i]; green2[i] = ~green2[i]; blue2[i] = ~blue2[i]; } for (i = 0; i < ARRAY_SIZE(red4); i++) { red4[i] = ~red4[i]; green4[i] = ~green4[i]; blue4[i] = ~blue4[i]; } for (i = 0; i < ARRAY_SIZE(red8); i++) { red8[i] = ~red8[i]; green8[i] = ~green8[i]; blue8[i] = ~blue8[i]; } for (i = 0; i < ARRAY_SIZE(red16); i++) { red16[i] = ~red16[i]; green16[i] = ~green16[i]; blue16[i] = ~blue16[i]; } } /* * Visible symbols for modules */ EXPORT_SYMBOL(fb_alloc_cmap); EXPORT_SYMBOL(fb_dealloc_cmap); EXPORT_SYMBOL(fb_copy_cmap); EXPORT_SYMBOL(fb_set_cmap); EXPORT_SYMBOL(fb_default_cmap); EXPORT_SYMBOL(fb_invert_cmaps);
8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 7 1 8 8 8 1 3 5 8 10 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 // SPDX-License-Identifier: GPL-2.0 /* * power_supply_hwmon.c - power supply hwmon support. */ #include <linux/err.h> #include <linux/hwmon.h> #include <linux/power_supply.h> #include <linux/slab.h> #include "power_supply.h" struct power_supply_hwmon { struct power_supply *psy; unsigned long *props; }; static const char *const ps_temp_label[] = { "temp", "ambient temp", }; static int power_supply_hwmon_in_to_property(u32 attr) { switch (attr) { case hwmon_in_average: return POWER_SUPPLY_PROP_VOLTAGE_AVG; case hwmon_in_min: return POWER_SUPPLY_PROP_VOLTAGE_MIN; case hwmon_in_max: return POWER_SUPPLY_PROP_VOLTAGE_MAX; case hwmon_in_input: return POWER_SUPPLY_PROP_VOLTAGE_NOW; default: return -EINVAL; } } static int power_supply_hwmon_curr_to_property(u32 attr) { switch (attr) { case hwmon_curr_average: return POWER_SUPPLY_PROP_CURRENT_AVG; case hwmon_curr_max: return POWER_SUPPLY_PROP_CURRENT_MAX; case hwmon_curr_input: return POWER_SUPPLY_PROP_CURRENT_NOW; default: return -EINVAL; } } static int power_supply_hwmon_power_to_property(u32 attr) { switch (attr) { case hwmon_power_input: return POWER_SUPPLY_PROP_POWER_NOW; case hwmon_power_average: return POWER_SUPPLY_PROP_POWER_AVG; default: return -EINVAL; } } static int power_supply_hwmon_temp_to_property(u32 attr, int channel) { if (channel) { switch (attr) { case hwmon_temp_input: return POWER_SUPPLY_PROP_TEMP_AMBIENT; case hwmon_temp_min_alarm: return POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN; case hwmon_temp_max_alarm: return POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX; default: break; } } else { switch (attr) { case hwmon_temp_input: return POWER_SUPPLY_PROP_TEMP; case hwmon_temp_max: return POWER_SUPPLY_PROP_TEMP_MAX; case hwmon_temp_min: return POWER_SUPPLY_PROP_TEMP_MIN; case hwmon_temp_min_alarm: return POWER_SUPPLY_PROP_TEMP_ALERT_MIN; case hwmon_temp_max_alarm: return POWER_SUPPLY_PROP_TEMP_ALERT_MAX; default: break; } } return -EINVAL; } static int power_supply_hwmon_to_property(enum hwmon_sensor_types type, u32 attr, int channel) { switch (type) { case hwmon_in: return power_supply_hwmon_in_to_property(attr); case hwmon_curr: return power_supply_hwmon_curr_to_property(attr); case hwmon_power: return power_supply_hwmon_power_to_property(attr); case hwmon_temp: return power_supply_hwmon_temp_to_property(attr, channel); default: return -EINVAL; } } static bool power_supply_hwmon_is_a_label(enum hwmon_sensor_types type, u32 attr) { return type == hwmon_temp && attr == hwmon_temp_label; } struct hwmon_type_attr_list { const u32 *attrs; size_t n_attrs; }; static const u32 ps_temp_attrs[] = { hwmon_temp_input, hwmon_temp_min, hwmon_temp_max, hwmon_temp_min_alarm, hwmon_temp_max_alarm, }; static const struct hwmon_type_attr_list ps_type_attrs[hwmon_max] = { [hwmon_temp] = { ps_temp_attrs, ARRAY_SIZE(ps_temp_attrs) }, }; static bool power_supply_hwmon_has_input( const struct power_supply_hwmon *psyhw, enum hwmon_sensor_types type, int channel) { const struct hwmon_type_attr_list *attr_list = &ps_type_attrs[type]; size_t i; for (i = 0; i < attr_list->n_attrs; ++i) { int prop = power_supply_hwmon_to_property(type, attr_list->attrs[i], channel); if (prop >= 0 && test_bit(prop, psyhw->props)) return true; } return false; } static bool power_supply_hwmon_is_writable(enum hwmon_sensor_types type, u32 attr) { switch (type) { case hwmon_in: return attr == hwmon_in_min || attr == hwmon_in_max; case hwmon_curr: return attr == hwmon_curr_max; case hwmon_temp: return attr == hwmon_temp_max || attr == hwmon_temp_min || attr == hwmon_temp_min_alarm || attr == hwmon_temp_max_alarm; default: return false; } } static umode_t power_supply_hwmon_is_visible(const void *data, enum hwmon_sensor_types type, u32 attr, int channel) { const struct power_supply_hwmon *psyhw = data; int prop; if (power_supply_hwmon_is_a_label(type, attr)) { if (power_supply_hwmon_has_input(psyhw, type, channel)) return 0444; else return 0; } prop = power_supply_hwmon_to_property(type, attr, channel); if (prop < 0 || !test_bit(prop, psyhw->props)) return 0; if (power_supply_property_is_writeable(psyhw->psy, prop) > 0 && power_supply_hwmon_is_writable(type, attr)) return 0644; return 0444; } static int power_supply_hwmon_read_string(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, const char **str) { switch (type) { case hwmon_temp: *str = ps_temp_label[channel]; break; default: /* unreachable, but see: * gcc bug #51513 [1] and clang bug #978 [2] * * [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51513 * [2] https://github.com/ClangBuiltLinux/linux/issues/978 */ break; } return 0; } static int power_supply_hwmon_read(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val) { struct power_supply_hwmon *psyhw = dev_get_drvdata(dev); struct power_supply *psy = psyhw->psy; union power_supply_propval pspval; int ret, prop; prop = power_supply_hwmon_to_property(type, attr, channel); if (prop < 0) return prop; ret = power_supply_get_property(psy, prop, &pspval); if (ret) return ret; switch (type) { /* * Both voltage and current is reported in units of * microvolts/microamps, so we need to adjust it to * milliamps(volts) */ case hwmon_curr: case hwmon_in: pspval.intval = DIV_ROUND_CLOSEST(pspval.intval, 1000); break; case hwmon_power: /* * Power properties are already in microwatts. */ break; /* * Temp needs to be converted from 1/10 C to milli-C */ case hwmon_temp: if (check_mul_overflow(pspval.intval, 100, &pspval.intval)) return -EOVERFLOW; break; default: return -EINVAL; } *val = pspval.intval; return 0; } static int power_supply_hwmon_write(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long val) { struct power_supply_hwmon *psyhw = dev_get_drvdata(dev); struct power_supply *psy = psyhw->psy; union power_supply_propval pspval; int prop; prop = power_supply_hwmon_to_property(type, attr, channel); if (prop < 0) return prop; pspval.intval = val; switch (type) { /* * Both voltage and current is reported in units of * microvolts/microamps, so we need to adjust it to * milliamps(volts) */ case hwmon_curr: case hwmon_in: if (check_mul_overflow(pspval.intval, 1000, &pspval.intval)) return -EOVERFLOW; break; /* * Temp needs to be converted from 1/10 C to milli-C */ case hwmon_temp: pspval.intval = DIV_ROUND_CLOSEST(pspval.intval, 100); break; default: return -EINVAL; } return power_supply_set_property(psy, prop, &pspval); } static const struct hwmon_ops power_supply_hwmon_ops = { .is_visible = power_supply_hwmon_is_visible, .read = power_supply_hwmon_read, .write = power_supply_hwmon_write, .read_string = power_supply_hwmon_read_string, }; static const struct hwmon_channel_info * const power_supply_hwmon_info[] = { HWMON_CHANNEL_INFO(temp, HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_MIN | HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM, HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_MIN_ALARM | HWMON_T_MAX_ALARM), HWMON_CHANNEL_INFO(curr, HWMON_C_AVERAGE | HWMON_C_MAX | HWMON_C_INPUT), HWMON_CHANNEL_INFO(power, HWMON_P_INPUT | HWMON_P_AVERAGE), HWMON_CHANNEL_INFO(in, HWMON_I_AVERAGE | HWMON_I_MIN | HWMON_I_MAX | HWMON_I_INPUT), NULL }; static const struct hwmon_chip_info power_supply_hwmon_chip_info = { .ops = &power_supply_hwmon_ops, .info = power_supply_hwmon_info, }; static const enum power_supply_property power_supply_hwmon_props[] = { POWER_SUPPLY_PROP_CURRENT_AVG, POWER_SUPPLY_PROP_CURRENT_MAX, POWER_SUPPLY_PROP_CURRENT_NOW, POWER_SUPPLY_PROP_POWER_AVG, POWER_SUPPLY_PROP_POWER_NOW, POWER_SUPPLY_PROP_TEMP, POWER_SUPPLY_PROP_TEMP_MAX, POWER_SUPPLY_PROP_TEMP_MIN, POWER_SUPPLY_PROP_TEMP_ALERT_MIN, POWER_SUPPLY_PROP_TEMP_ALERT_MAX, POWER_SUPPLY_PROP_TEMP_AMBIENT, POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MIN, POWER_SUPPLY_PROP_TEMP_AMBIENT_ALERT_MAX, POWER_SUPPLY_PROP_VOLTAGE_AVG, POWER_SUPPLY_PROP_VOLTAGE_MIN, POWER_SUPPLY_PROP_VOLTAGE_MAX, POWER_SUPPLY_PROP_VOLTAGE_NOW, }; int power_supply_add_hwmon_sysfs(struct power_supply *psy) { struct power_supply_hwmon *psyhw; struct device *dev = &psy->dev; struct device *hwmon; int ret, i; const char *name; if (!devres_open_group(dev, power_supply_add_hwmon_sysfs, GFP_KERNEL)) return -ENOMEM; psyhw = devm_kzalloc(dev, sizeof(*psyhw), GFP_KERNEL); if (!psyhw) { ret = -ENOMEM; goto error; } psyhw->psy = psy; psyhw->props = devm_bitmap_zalloc(dev, POWER_SUPPLY_PROP_TIME_TO_FULL_AVG + 1, GFP_KERNEL); if (!psyhw->props) { ret = -ENOMEM; goto error; } for (i = 0; i < ARRAY_SIZE(power_supply_hwmon_props); i++) { const enum power_supply_property prop = power_supply_hwmon_props[i]; if (power_supply_has_property(psy, prop)) set_bit(prop, psyhw->props); } name = psy->desc->name; if (strchr(name, '-')) { char *new_name; new_name = devm_kstrdup(dev, name, GFP_KERNEL); if (!new_name) { ret = -ENOMEM; goto error; } strreplace(new_name, '-', '_'); name = new_name; } hwmon = devm_hwmon_device_register_with_info(dev, name, psyhw, &power_supply_hwmon_chip_info, NULL); ret = PTR_ERR_OR_ZERO(hwmon); if (ret) goto error; devres_close_group(dev, power_supply_add_hwmon_sysfs); return 0; error: devres_release_group(dev, NULL); return ret; } void power_supply_remove_hwmon_sysfs(struct power_supply *psy) { devres_release_group(&psy->dev, power_supply_add_hwmon_sysfs); }
26 18 565 228 229 17 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _linux_POSIX_TIMERS_H #define _linux_POSIX_TIMERS_H #include <linux/alarmtimer.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/pid.h> #include <linux/posix-timers_types.h> #include <linux/rcuref.h> #include <linux/spinlock.h> #include <linux/timerqueue.h> struct kernel_siginfo; struct task_struct; struct sigqueue; struct k_itimer; static inline clockid_t make_process_cpuclock(const unsigned int pid, const clockid_t clock) { return ((~pid) << 3) | clock; } static inline clockid_t make_thread_cpuclock(const unsigned int tid, const clockid_t clock) { return make_process_cpuclock(tid, clock | CPUCLOCK_PERTHREAD_MASK); } static inline clockid_t fd_to_clockid(const int fd) { return make_process_cpuclock((unsigned int) fd, CLOCKFD); } static inline int clockid_to_fd(const clockid_t clk) { return ~(clk >> 3); } static inline bool clockid_aux_valid(clockid_t id) { return IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS) && id >= CLOCK_AUX && id <= CLOCK_AUX_LAST; } #ifdef CONFIG_POSIX_TIMERS #include <linux/signal_types.h> /** * cpu_timer - Posix CPU timer representation for k_itimer * @node: timerqueue node to queue in the task/sig * @head: timerqueue head on which this timer is queued * @pid: Pointer to target task PID * @elist: List head for the expiry list * @firing: Timer is currently firing * @nanosleep: Timer is used for nanosleep and is not a regular posix-timer * @handling: Pointer to the task which handles expiry */ struct cpu_timer { struct timerqueue_node node; struct timerqueue_head *head; struct pid *pid; struct list_head elist; bool firing; bool nanosleep; struct task_struct __rcu *handling; }; static inline bool cpu_timer_enqueue(struct timerqueue_head *head, struct cpu_timer *ctmr) { ctmr->head = head; return timerqueue_add(head, &ctmr->node); } static inline bool cpu_timer_queued(struct cpu_timer *ctmr) { return !!ctmr->head; } static inline bool cpu_timer_dequeue(struct cpu_timer *ctmr) { if (cpu_timer_queued(ctmr)) { timerqueue_del(ctmr->head, &ctmr->node); ctmr->head = NULL; return true; } return false; } static inline u64 cpu_timer_getexpires(struct cpu_timer *ctmr) { return ctmr->node.expires; } static inline void cpu_timer_setexpires(struct cpu_timer *ctmr, u64 exp) { ctmr->node.expires = exp; } static inline void posix_cputimers_init(struct posix_cputimers *pct) { memset(pct, 0, sizeof(*pct)); pct->bases[0].nextevt = U64_MAX; pct->bases[1].nextevt = U64_MAX; pct->bases[2].nextevt = U64_MAX; } void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit); static inline void posix_cputimers_rt_watchdog(struct posix_cputimers *pct, u64 runtime) { pct->bases[CPUCLOCK_SCHED].nextevt = runtime; } void posixtimer_rearm_itimer(struct task_struct *p); bool posixtimer_init_sigqueue(struct sigqueue *q); void posixtimer_send_sigqueue(struct k_itimer *tmr); bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq); void posixtimer_free_timer(struct k_itimer *timer); long posixtimer_create_prctl(unsigned long ctrl); /* Init task static initializer */ #define INIT_CPU_TIMERBASE(b) { \ .nextevt = U64_MAX, \ } #define INIT_CPU_TIMERBASES(b) { \ INIT_CPU_TIMERBASE(b[0]), \ INIT_CPU_TIMERBASE(b[1]), \ INIT_CPU_TIMERBASE(b[2]), \ } #define INIT_CPU_TIMERS(s) \ .posix_cputimers = { \ .bases = INIT_CPU_TIMERBASES(s.posix_cputimers.bases), \ }, #else struct cpu_timer { }; #define INIT_CPU_TIMERS(s) static inline void posix_cputimers_init(struct posix_cputimers *pct) { } static inline void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit) { } static inline void posixtimer_rearm_itimer(struct task_struct *p) { } static inline bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq) { return false; } static inline void posixtimer_free_timer(struct k_itimer *timer) { } static inline long posixtimer_create_prctl(unsigned long ctrl) { return -EINVAL; } #endif #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK void clear_posix_cputimers_work(struct task_struct *p); void posix_cputimers_init_work(void); #else static inline void clear_posix_cputimers_work(struct task_struct *p) { } static inline void posix_cputimers_init_work(void) { } #endif /** * struct k_itimer - POSIX.1b interval timer structure. * @list: List node for binding the timer to tsk::signal::posix_timers * @ignored_list: List node for tracking ignored timers in tsk::signal::ignored_posix_timers * @t_hash: Entry in the posix timer hash table * @it_lock: Lock protecting the timer * @kclock: Pointer to the k_clock struct handling this timer * @it_clock: The posix timer clock id * @it_id: The posix timer id for identifying the timer * @it_status: The status of the timer * @it_sig_periodic: The periodic status at signal delivery * @it_overrun: The overrun counter for pending signals * @it_overrun_last: The overrun at the time of the last delivered signal * @it_signal_seq: Sequence count to control signal delivery * @it_sigqueue_seq: The sequence count at the point where the signal was queued * @it_sigev_notify: The notify word of sigevent struct for signal delivery * @it_interval: The interval for periodic timers * @it_signal: Pointer to the creators signal struct * @it_pid: The pid of the process/task targeted by the signal * @it_process: The task to wakeup on clock_nanosleep (CPU timers) * @rcuref: Reference count for life time management * @sigq: Embedded sigqueue * @it: Union representing the various posix timer type * internals. * @rcu: RCU head for freeing the timer. */ struct k_itimer { /* 1st cacheline contains read-mostly fields */ struct hlist_node t_hash; struct hlist_node list; timer_t it_id; clockid_t it_clock; int it_sigev_notify; enum pid_type it_pid_type; struct signal_struct *it_signal; const struct k_clock *kclock; /* 2nd cacheline and above contain fields which are modified regularly */ spinlock_t it_lock; int it_status; bool it_sig_periodic; s64 it_overrun; s64 it_overrun_last; unsigned int it_signal_seq; unsigned int it_sigqueue_seq; ktime_t it_interval; struct hlist_node ignored_list; union { struct pid *it_pid; struct task_struct *it_process; }; struct sigqueue sigq; rcuref_t rcuref; union { struct { struct hrtimer timer; } real; struct cpu_timer cpu; struct { struct alarm alarmtimer; } alarm; } it; struct rcu_head rcu; } ____cacheline_aligned_in_smp; void run_posix_cpu_timers(void); void posix_cpu_timers_exit(struct task_struct *task); void posix_cpu_timers_exit_group(struct task_struct *task); void set_process_cpu_timer(struct task_struct *task, unsigned int clock_idx, u64 *newval, u64 *oldval); int update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new); #ifdef CONFIG_POSIX_TIMERS static inline void posixtimer_putref(struct k_itimer *tmr) { if (rcuref_put(&tmr->rcuref)) posixtimer_free_timer(tmr); } static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { struct k_itimer *tmr = container_of(q, struct k_itimer, sigq); WARN_ON_ONCE(!rcuref_get(&tmr->rcuref)); } static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { struct k_itimer *tmr = container_of(q, struct k_itimer, sigq); posixtimer_putref(tmr); } static inline bool posixtimer_valid(const struct k_itimer *timer) { unsigned long val = (unsigned long)timer->it_signal; return !(val & 0x1UL); } #else /* CONFIG_POSIX_TIMERS */ static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { } static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { } #endif /* !CONFIG_POSIX_TIMERS */ #endif
9 9 9 9 9 1 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 // SPDX-License-Identifier: GPL-2.0-or-later /* * iSCSI transport class definitions * * Copyright (C) IBM Corporation, 2004 * Copyright (C) Mike Christie, 2004 - 2005 * Copyright (C) Dmitry Yusupov, 2004 - 2005 * Copyright (C) Alex Aizman, 2004 - 2005 */ #include <linux/module.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/bsg-lib.h> #include <linux/idr.h> #include <net/tcp.h> #include <scsi/scsi.h> #include <scsi/scsi_host.h> #include <scsi/scsi_device.h> #include <scsi/scsi_transport.h> #include <scsi/scsi_transport_iscsi.h> #include <scsi/iscsi_if.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_bsg_iscsi.h> #define ISCSI_TRANSPORT_VERSION "2.0-870" #define ISCSI_SEND_MAX_ALLOWED 10 #define CREATE_TRACE_POINTS #include <trace/events/iscsi.h> /* * Export tracepoint symbols to be used by other modules. */ EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_conn); EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_eh); EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_session); EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_tcp); EXPORT_TRACEPOINT_SYMBOL_GPL(iscsi_dbg_sw_tcp); static int dbg_session; module_param_named(debug_session, dbg_session, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(debug_session, "Turn on debugging for sessions in scsi_transport_iscsi " "module. Set to 1 to turn on, and zero to turn off. Default " "is off."); static int dbg_conn; module_param_named(debug_conn, dbg_conn, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(debug_conn, "Turn on debugging for connections in scsi_transport_iscsi " "module. Set to 1 to turn on, and zero to turn off. Default " "is off."); #define ISCSI_DBG_TRANS_SESSION(_session, dbg_fmt, arg...) \ do { \ if (dbg_session) \ iscsi_cls_session_printk(KERN_INFO, _session, \ "%s: " dbg_fmt, \ __func__, ##arg); \ iscsi_dbg_trace(trace_iscsi_dbg_trans_session, \ &(_session)->dev, \ "%s " dbg_fmt, __func__, ##arg); \ } while (0); #define ISCSI_DBG_TRANS_CONN(_conn, dbg_fmt, arg...) \ do { \ if (dbg_conn) \ iscsi_cls_conn_printk(KERN_INFO, _conn, \ "%s: " dbg_fmt, \ __func__, ##arg); \ iscsi_dbg_trace(trace_iscsi_dbg_trans_conn, \ &(_conn)->dev, \ "%s " dbg_fmt, __func__, ##arg); \ } while (0); struct iscsi_internal { struct scsi_transport_template t; struct iscsi_transport *iscsi_transport; struct list_head list; struct device dev; struct transport_container conn_cont; struct transport_container session_cont; }; static DEFINE_IDR(iscsi_ep_idr); static DEFINE_MUTEX(iscsi_ep_idr_mutex); static atomic_t iscsi_session_nr; /* sysfs session id for next new session */ static struct workqueue_struct *iscsi_conn_cleanup_workq; static DEFINE_IDA(iscsi_sess_ida); /* * list of registered transports and lock that must * be held while accessing list. The iscsi_transport_lock must * be acquired after the rx_queue_mutex. */ static LIST_HEAD(iscsi_transports); static DEFINE_SPINLOCK(iscsi_transport_lock); #define to_iscsi_internal(tmpl) \ container_of(tmpl, struct iscsi_internal, t) #define dev_to_iscsi_internal(_dev) \ container_of(_dev, struct iscsi_internal, dev) static void iscsi_transport_release(struct device *dev) { struct iscsi_internal *priv = dev_to_iscsi_internal(dev); kfree(priv); } /* * iscsi_transport_class represents the iscsi_transports that are * registered. */ static struct class iscsi_transport_class = { .name = "iscsi_transport", .dev_release = iscsi_transport_release, }; static ssize_t show_transport_handle(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_internal *priv = dev_to_iscsi_internal(dev); if (!capable(CAP_SYS_ADMIN)) return -EACCES; return sysfs_emit(buf, "%llu\n", (unsigned long long)iscsi_handle(priv->iscsi_transport)); } static DEVICE_ATTR(handle, S_IRUGO, show_transport_handle, NULL); #define show_transport_attr(name, format) \ static ssize_t \ show_transport_##name(struct device *dev, \ struct device_attribute *attr,char *buf) \ { \ struct iscsi_internal *priv = dev_to_iscsi_internal(dev); \ return sysfs_emit(buf, format"\n", priv->iscsi_transport->name);\ } \ static DEVICE_ATTR(name, S_IRUGO, show_transport_##name, NULL); show_transport_attr(caps, "0x%x"); static struct attribute *iscsi_transport_attrs[] = { &dev_attr_handle.attr, &dev_attr_caps.attr, NULL, }; static struct attribute_group iscsi_transport_group = { .attrs = iscsi_transport_attrs, }; /* * iSCSI endpoint attrs */ #define iscsi_dev_to_endpoint(_dev) \ container_of(_dev, struct iscsi_endpoint, dev) #define ISCSI_ATTR(_prefix,_name,_mode,_show,_store) \ struct device_attribute dev_attr_##_prefix##_##_name = \ __ATTR(_name,_mode,_show,_store) static void iscsi_endpoint_release(struct device *dev) { struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev); mutex_lock(&iscsi_ep_idr_mutex); idr_remove(&iscsi_ep_idr, ep->id); mutex_unlock(&iscsi_ep_idr_mutex); kfree(ep); } static struct class iscsi_endpoint_class = { .name = "iscsi_endpoint", .dev_release = iscsi_endpoint_release, }; static ssize_t show_ep_handle(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev); return sysfs_emit(buf, "%d\n", ep->id); } static ISCSI_ATTR(ep, handle, S_IRUGO, show_ep_handle, NULL); static struct attribute *iscsi_endpoint_attrs[] = { &dev_attr_ep_handle.attr, NULL, }; static struct attribute_group iscsi_endpoint_group = { .attrs = iscsi_endpoint_attrs, }; struct iscsi_endpoint * iscsi_create_endpoint(int dd_size) { struct iscsi_endpoint *ep; int err, id; ep = kzalloc(sizeof(*ep) + dd_size, GFP_KERNEL); if (!ep) return NULL; mutex_lock(&iscsi_ep_idr_mutex); /* * First endpoint id should be 1 to comply with user space * applications (iscsid). */ id = idr_alloc(&iscsi_ep_idr, ep, 1, -1, GFP_NOIO); if (id < 0) { mutex_unlock(&iscsi_ep_idr_mutex); printk(KERN_ERR "Could not allocate endpoint ID. Error %d.\n", id); goto free_ep; } mutex_unlock(&iscsi_ep_idr_mutex); ep->id = id; ep->dev.class = &iscsi_endpoint_class; dev_set_name(&ep->dev, "ep-%d", id); err = device_register(&ep->dev); if (err) goto put_dev; err = sysfs_create_group(&ep->dev.kobj, &iscsi_endpoint_group); if (err) goto unregister_dev; if (dd_size) ep->dd_data = &ep[1]; return ep; unregister_dev: device_unregister(&ep->dev); return NULL; put_dev: mutex_lock(&iscsi_ep_idr_mutex); idr_remove(&iscsi_ep_idr, id); mutex_unlock(&iscsi_ep_idr_mutex); put_device(&ep->dev); return NULL; free_ep: kfree(ep); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_endpoint); void iscsi_destroy_endpoint(struct iscsi_endpoint *ep) { sysfs_remove_group(&ep->dev.kobj, &iscsi_endpoint_group); device_unregister(&ep->dev); } EXPORT_SYMBOL_GPL(iscsi_destroy_endpoint); void iscsi_put_endpoint(struct iscsi_endpoint *ep) { put_device(&ep->dev); } EXPORT_SYMBOL_GPL(iscsi_put_endpoint); /** * iscsi_lookup_endpoint - get ep from handle * @handle: endpoint handle * * Caller must do a iscsi_put_endpoint. */ struct iscsi_endpoint *iscsi_lookup_endpoint(u64 handle) { struct iscsi_endpoint *ep; mutex_lock(&iscsi_ep_idr_mutex); ep = idr_find(&iscsi_ep_idr, handle); if (!ep) goto unlock; get_device(&ep->dev); unlock: mutex_unlock(&iscsi_ep_idr_mutex); return ep; } EXPORT_SYMBOL_GPL(iscsi_lookup_endpoint); /* * Interface to display network param to sysfs */ static void iscsi_iface_release(struct device *dev) { struct iscsi_iface *iface = iscsi_dev_to_iface(dev); struct device *parent = iface->dev.parent; kfree(iface); put_device(parent); } static struct class iscsi_iface_class = { .name = "iscsi_iface", .dev_release = iscsi_iface_release, }; #define ISCSI_IFACE_ATTR(_prefix, _name, _mode, _show, _store) \ struct device_attribute dev_attr_##_prefix##_##_name = \ __ATTR(_name, _mode, _show, _store) /* iface attrs show */ #define iscsi_iface_attr_show(type, name, param_type, param) \ static ssize_t \ show_##type##_##name(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct iscsi_iface *iface = iscsi_dev_to_iface(dev); \ struct iscsi_transport *t = iface->transport; \ return t->get_iface_param(iface, param_type, param, buf); \ } \ #define iscsi_iface_net_attr(type, name, param) \ iscsi_iface_attr_show(type, name, ISCSI_NET_PARAM, param) \ static ISCSI_IFACE_ATTR(type, name, S_IRUGO, show_##type##_##name, NULL); #define iscsi_iface_attr(type, name, param) \ iscsi_iface_attr_show(type, name, ISCSI_IFACE_PARAM, param) \ static ISCSI_IFACE_ATTR(type, name, S_IRUGO, show_##type##_##name, NULL); /* generic read only ipv4 attribute */ iscsi_iface_net_attr(ipv4_iface, ipaddress, ISCSI_NET_PARAM_IPV4_ADDR); iscsi_iface_net_attr(ipv4_iface, gateway, ISCSI_NET_PARAM_IPV4_GW); iscsi_iface_net_attr(ipv4_iface, subnet, ISCSI_NET_PARAM_IPV4_SUBNET); iscsi_iface_net_attr(ipv4_iface, bootproto, ISCSI_NET_PARAM_IPV4_BOOTPROTO); iscsi_iface_net_attr(ipv4_iface, dhcp_dns_address_en, ISCSI_NET_PARAM_IPV4_DHCP_DNS_ADDR_EN); iscsi_iface_net_attr(ipv4_iface, dhcp_slp_da_info_en, ISCSI_NET_PARAM_IPV4_DHCP_SLP_DA_EN); iscsi_iface_net_attr(ipv4_iface, tos_en, ISCSI_NET_PARAM_IPV4_TOS_EN); iscsi_iface_net_attr(ipv4_iface, tos, ISCSI_NET_PARAM_IPV4_TOS); iscsi_iface_net_attr(ipv4_iface, grat_arp_en, ISCSI_NET_PARAM_IPV4_GRAT_ARP_EN); iscsi_iface_net_attr(ipv4_iface, dhcp_alt_client_id_en, ISCSI_NET_PARAM_IPV4_DHCP_ALT_CLIENT_ID_EN); iscsi_iface_net_attr(ipv4_iface, dhcp_alt_client_id, ISCSI_NET_PARAM_IPV4_DHCP_ALT_CLIENT_ID); iscsi_iface_net_attr(ipv4_iface, dhcp_req_vendor_id_en, ISCSI_NET_PARAM_IPV4_DHCP_REQ_VENDOR_ID_EN); iscsi_iface_net_attr(ipv4_iface, dhcp_use_vendor_id_en, ISCSI_NET_PARAM_IPV4_DHCP_USE_VENDOR_ID_EN); iscsi_iface_net_attr(ipv4_iface, dhcp_vendor_id, ISCSI_NET_PARAM_IPV4_DHCP_VENDOR_ID); iscsi_iface_net_attr(ipv4_iface, dhcp_learn_iqn_en, ISCSI_NET_PARAM_IPV4_DHCP_LEARN_IQN_EN); iscsi_iface_net_attr(ipv4_iface, fragment_disable, ISCSI_NET_PARAM_IPV4_FRAGMENT_DISABLE); iscsi_iface_net_attr(ipv4_iface, incoming_forwarding_en, ISCSI_NET_PARAM_IPV4_IN_FORWARD_EN); iscsi_iface_net_attr(ipv4_iface, ttl, ISCSI_NET_PARAM_IPV4_TTL); /* generic read only ipv6 attribute */ iscsi_iface_net_attr(ipv6_iface, ipaddress, ISCSI_NET_PARAM_IPV6_ADDR); iscsi_iface_net_attr(ipv6_iface, link_local_addr, ISCSI_NET_PARAM_IPV6_LINKLOCAL); iscsi_iface_net_attr(ipv6_iface, router_addr, ISCSI_NET_PARAM_IPV6_ROUTER); iscsi_iface_net_attr(ipv6_iface, ipaddr_autocfg, ISCSI_NET_PARAM_IPV6_ADDR_AUTOCFG); iscsi_iface_net_attr(ipv6_iface, link_local_autocfg, ISCSI_NET_PARAM_IPV6_LINKLOCAL_AUTOCFG); iscsi_iface_net_attr(ipv6_iface, link_local_state, ISCSI_NET_PARAM_IPV6_LINKLOCAL_STATE); iscsi_iface_net_attr(ipv6_iface, router_state, ISCSI_NET_PARAM_IPV6_ROUTER_STATE); iscsi_iface_net_attr(ipv6_iface, grat_neighbor_adv_en, ISCSI_NET_PARAM_IPV6_GRAT_NEIGHBOR_ADV_EN); iscsi_iface_net_attr(ipv6_iface, mld_en, ISCSI_NET_PARAM_IPV6_MLD_EN); iscsi_iface_net_attr(ipv6_iface, flow_label, ISCSI_NET_PARAM_IPV6_FLOW_LABEL); iscsi_iface_net_attr(ipv6_iface, traffic_class, ISCSI_NET_PARAM_IPV6_TRAFFIC_CLASS); iscsi_iface_net_attr(ipv6_iface, hop_limit, ISCSI_NET_PARAM_IPV6_HOP_LIMIT); iscsi_iface_net_attr(ipv6_iface, nd_reachable_tmo, ISCSI_NET_PARAM_IPV6_ND_REACHABLE_TMO); iscsi_iface_net_attr(ipv6_iface, nd_rexmit_time, ISCSI_NET_PARAM_IPV6_ND_REXMIT_TIME); iscsi_iface_net_attr(ipv6_iface, nd_stale_tmo, ISCSI_NET_PARAM_IPV6_ND_STALE_TMO); iscsi_iface_net_attr(ipv6_iface, dup_addr_detect_cnt, ISCSI_NET_PARAM_IPV6_DUP_ADDR_DETECT_CNT); iscsi_iface_net_attr(ipv6_iface, router_adv_link_mtu, ISCSI_NET_PARAM_IPV6_RTR_ADV_LINK_MTU); /* common read only iface attribute */ iscsi_iface_net_attr(iface, enabled, ISCSI_NET_PARAM_IFACE_ENABLE); iscsi_iface_net_attr(iface, vlan_id, ISCSI_NET_PARAM_VLAN_ID); iscsi_iface_net_attr(iface, vlan_priority, ISCSI_NET_PARAM_VLAN_PRIORITY); iscsi_iface_net_attr(iface, vlan_enabled, ISCSI_NET_PARAM_VLAN_ENABLED); iscsi_iface_net_attr(iface, mtu, ISCSI_NET_PARAM_MTU); iscsi_iface_net_attr(iface, port, ISCSI_NET_PARAM_PORT); iscsi_iface_net_attr(iface, ipaddress_state, ISCSI_NET_PARAM_IPADDR_STATE); iscsi_iface_net_attr(iface, delayed_ack_en, ISCSI_NET_PARAM_DELAYED_ACK_EN); iscsi_iface_net_attr(iface, tcp_nagle_disable, ISCSI_NET_PARAM_TCP_NAGLE_DISABLE); iscsi_iface_net_attr(iface, tcp_wsf_disable, ISCSI_NET_PARAM_TCP_WSF_DISABLE); iscsi_iface_net_attr(iface, tcp_wsf, ISCSI_NET_PARAM_TCP_WSF); iscsi_iface_net_attr(iface, tcp_timer_scale, ISCSI_NET_PARAM_TCP_TIMER_SCALE); iscsi_iface_net_attr(iface, tcp_timestamp_en, ISCSI_NET_PARAM_TCP_TIMESTAMP_EN); iscsi_iface_net_attr(iface, cache_id, ISCSI_NET_PARAM_CACHE_ID); iscsi_iface_net_attr(iface, redirect_en, ISCSI_NET_PARAM_REDIRECT_EN); /* common iscsi specific settings attributes */ iscsi_iface_attr(iface, def_taskmgmt_tmo, ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO); iscsi_iface_attr(iface, header_digest, ISCSI_IFACE_PARAM_HDRDGST_EN); iscsi_iface_attr(iface, data_digest, ISCSI_IFACE_PARAM_DATADGST_EN); iscsi_iface_attr(iface, immediate_data, ISCSI_IFACE_PARAM_IMM_DATA_EN); iscsi_iface_attr(iface, initial_r2t, ISCSI_IFACE_PARAM_INITIAL_R2T_EN); iscsi_iface_attr(iface, data_seq_in_order, ISCSI_IFACE_PARAM_DATASEQ_INORDER_EN); iscsi_iface_attr(iface, data_pdu_in_order, ISCSI_IFACE_PARAM_PDU_INORDER_EN); iscsi_iface_attr(iface, erl, ISCSI_IFACE_PARAM_ERL); iscsi_iface_attr(iface, max_recv_dlength, ISCSI_IFACE_PARAM_MAX_RECV_DLENGTH); iscsi_iface_attr(iface, first_burst_len, ISCSI_IFACE_PARAM_FIRST_BURST); iscsi_iface_attr(iface, max_outstanding_r2t, ISCSI_IFACE_PARAM_MAX_R2T); iscsi_iface_attr(iface, max_burst_len, ISCSI_IFACE_PARAM_MAX_BURST); iscsi_iface_attr(iface, chap_auth, ISCSI_IFACE_PARAM_CHAP_AUTH_EN); iscsi_iface_attr(iface, bidi_chap, ISCSI_IFACE_PARAM_BIDI_CHAP_EN); iscsi_iface_attr(iface, discovery_auth_optional, ISCSI_IFACE_PARAM_DISCOVERY_AUTH_OPTIONAL); iscsi_iface_attr(iface, discovery_logout, ISCSI_IFACE_PARAM_DISCOVERY_LOGOUT_EN); iscsi_iface_attr(iface, strict_login_comp_en, ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN); iscsi_iface_attr(iface, initiator_name, ISCSI_IFACE_PARAM_INITIATOR_NAME); static umode_t iscsi_iface_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *dev = container_of(kobj, struct device, kobj); struct iscsi_iface *iface = iscsi_dev_to_iface(dev); struct iscsi_transport *t = iface->transport; int param = -1; if (attr == &dev_attr_iface_def_taskmgmt_tmo.attr) param = ISCSI_IFACE_PARAM_DEF_TASKMGMT_TMO; else if (attr == &dev_attr_iface_header_digest.attr) param = ISCSI_IFACE_PARAM_HDRDGST_EN; else if (attr == &dev_attr_iface_data_digest.attr) param = ISCSI_IFACE_PARAM_DATADGST_EN; else if (attr == &dev_attr_iface_immediate_data.attr) param = ISCSI_IFACE_PARAM_IMM_DATA_EN; else if (attr == &dev_attr_iface_initial_r2t.attr) param = ISCSI_IFACE_PARAM_INITIAL_R2T_EN; else if (attr == &dev_attr_iface_data_seq_in_order.attr) param = ISCSI_IFACE_PARAM_DATASEQ_INORDER_EN; else if (attr == &dev_attr_iface_data_pdu_in_order.attr) param = ISCSI_IFACE_PARAM_PDU_INORDER_EN; else if (attr == &dev_attr_iface_erl.attr) param = ISCSI_IFACE_PARAM_ERL; else if (attr == &dev_attr_iface_max_recv_dlength.attr) param = ISCSI_IFACE_PARAM_MAX_RECV_DLENGTH; else if (attr == &dev_attr_iface_first_burst_len.attr) param = ISCSI_IFACE_PARAM_FIRST_BURST; else if (attr == &dev_attr_iface_max_outstanding_r2t.attr) param = ISCSI_IFACE_PARAM_MAX_R2T; else if (attr == &dev_attr_iface_max_burst_len.attr) param = ISCSI_IFACE_PARAM_MAX_BURST; else if (attr == &dev_attr_iface_chap_auth.attr) param = ISCSI_IFACE_PARAM_CHAP_AUTH_EN; else if (attr == &dev_attr_iface_bidi_chap.attr) param = ISCSI_IFACE_PARAM_BIDI_CHAP_EN; else if (attr == &dev_attr_iface_discovery_auth_optional.attr) param = ISCSI_IFACE_PARAM_DISCOVERY_AUTH_OPTIONAL; else if (attr == &dev_attr_iface_discovery_logout.attr) param = ISCSI_IFACE_PARAM_DISCOVERY_LOGOUT_EN; else if (attr == &dev_attr_iface_strict_login_comp_en.attr) param = ISCSI_IFACE_PARAM_STRICT_LOGIN_COMP_EN; else if (attr == &dev_attr_iface_initiator_name.attr) param = ISCSI_IFACE_PARAM_INITIATOR_NAME; if (param != -1) return t->attr_is_visible(ISCSI_IFACE_PARAM, param); if (attr == &dev_attr_iface_enabled.attr) param = ISCSI_NET_PARAM_IFACE_ENABLE; else if (attr == &dev_attr_iface_vlan_id.attr) param = ISCSI_NET_PARAM_VLAN_ID; else if (attr == &dev_attr_iface_vlan_priority.attr) param = ISCSI_NET_PARAM_VLAN_PRIORITY; else if (attr == &dev_attr_iface_vlan_enabled.attr) param = ISCSI_NET_PARAM_VLAN_ENABLED; else if (attr == &dev_attr_iface_mtu.attr) param = ISCSI_NET_PARAM_MTU; else if (attr == &dev_attr_iface_port.attr) param = ISCSI_NET_PARAM_PORT; else if (attr == &dev_attr_iface_ipaddress_state.attr) param = ISCSI_NET_PARAM_IPADDR_STATE; else if (attr == &dev_attr_iface_delayed_ack_en.attr) param = ISCSI_NET_PARAM_DELAYED_ACK_EN; else if (attr == &dev_attr_iface_tcp_nagle_disable.attr) param = ISCSI_NET_PARAM_TCP_NAGLE_DISABLE; else if (attr == &dev_attr_iface_tcp_wsf_disable.attr) param = ISCSI_NET_PARAM_TCP_WSF_DISABLE; else if (attr == &dev_attr_iface_tcp_wsf.attr) param = ISCSI_NET_PARAM_TCP_WSF; else if (attr == &dev_attr_iface_tcp_timer_scale.attr) param = ISCSI_NET_PARAM_TCP_TIMER_SCALE; else if (attr == &dev_attr_iface_tcp_timestamp_en.attr) param = ISCSI_NET_PARAM_TCP_TIMESTAMP_EN; else if (attr == &dev_attr_iface_cache_id.attr) param = ISCSI_NET_PARAM_CACHE_ID; else if (attr == &dev_attr_iface_redirect_en.attr) param = ISCSI_NET_PARAM_REDIRECT_EN; else if (iface->iface_type == ISCSI_IFACE_TYPE_IPV4) { if (attr == &dev_attr_ipv4_iface_ipaddress.attr) param = ISCSI_NET_PARAM_IPV4_ADDR; else if (attr == &dev_attr_ipv4_iface_gateway.attr) param = ISCSI_NET_PARAM_IPV4_GW; else if (attr == &dev_attr_ipv4_iface_subnet.attr) param = ISCSI_NET_PARAM_IPV4_SUBNET; else if (attr == &dev_attr_ipv4_iface_bootproto.attr) param = ISCSI_NET_PARAM_IPV4_BOOTPROTO; else if (attr == &dev_attr_ipv4_iface_dhcp_dns_address_en.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_DNS_ADDR_EN; else if (attr == &dev_attr_ipv4_iface_dhcp_slp_da_info_en.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_SLP_DA_EN; else if (attr == &dev_attr_ipv4_iface_tos_en.attr) param = ISCSI_NET_PARAM_IPV4_TOS_EN; else if (attr == &dev_attr_ipv4_iface_tos.attr) param = ISCSI_NET_PARAM_IPV4_TOS; else if (attr == &dev_attr_ipv4_iface_grat_arp_en.attr) param = ISCSI_NET_PARAM_IPV4_GRAT_ARP_EN; else if (attr == &dev_attr_ipv4_iface_dhcp_alt_client_id_en.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_ALT_CLIENT_ID_EN; else if (attr == &dev_attr_ipv4_iface_dhcp_alt_client_id.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_ALT_CLIENT_ID; else if (attr == &dev_attr_ipv4_iface_dhcp_req_vendor_id_en.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_REQ_VENDOR_ID_EN; else if (attr == &dev_attr_ipv4_iface_dhcp_use_vendor_id_en.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_USE_VENDOR_ID_EN; else if (attr == &dev_attr_ipv4_iface_dhcp_vendor_id.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_VENDOR_ID; else if (attr == &dev_attr_ipv4_iface_dhcp_learn_iqn_en.attr) param = ISCSI_NET_PARAM_IPV4_DHCP_LEARN_IQN_EN; else if (attr == &dev_attr_ipv4_iface_fragment_disable.attr) param = ISCSI_NET_PARAM_IPV4_FRAGMENT_DISABLE; else if (attr == &dev_attr_ipv4_iface_incoming_forwarding_en.attr) param = ISCSI_NET_PARAM_IPV4_IN_FORWARD_EN; else if (attr == &dev_attr_ipv4_iface_ttl.attr) param = ISCSI_NET_PARAM_IPV4_TTL; else return 0; } else if (iface->iface_type == ISCSI_IFACE_TYPE_IPV6) { if (attr == &dev_attr_ipv6_iface_ipaddress.attr) param = ISCSI_NET_PARAM_IPV6_ADDR; else if (attr == &dev_attr_ipv6_iface_link_local_addr.attr) param = ISCSI_NET_PARAM_IPV6_LINKLOCAL; else if (attr == &dev_attr_ipv6_iface_router_addr.attr) param = ISCSI_NET_PARAM_IPV6_ROUTER; else if (attr == &dev_attr_ipv6_iface_ipaddr_autocfg.attr) param = ISCSI_NET_PARAM_IPV6_ADDR_AUTOCFG; else if (attr == &dev_attr_ipv6_iface_link_local_autocfg.attr) param = ISCSI_NET_PARAM_IPV6_LINKLOCAL_AUTOCFG; else if (attr == &dev_attr_ipv6_iface_link_local_state.attr) param = ISCSI_NET_PARAM_IPV6_LINKLOCAL_STATE; else if (attr == &dev_attr_ipv6_iface_router_state.attr) param = ISCSI_NET_PARAM_IPV6_ROUTER_STATE; else if (attr == &dev_attr_ipv6_iface_grat_neighbor_adv_en.attr) param = ISCSI_NET_PARAM_IPV6_GRAT_NEIGHBOR_ADV_EN; else if (attr == &dev_attr_ipv6_iface_mld_en.attr) param = ISCSI_NET_PARAM_IPV6_MLD_EN; else if (attr == &dev_attr_ipv6_iface_flow_label.attr) param = ISCSI_NET_PARAM_IPV6_FLOW_LABEL; else if (attr == &dev_attr_ipv6_iface_traffic_class.attr) param = ISCSI_NET_PARAM_IPV6_TRAFFIC_CLASS; else if (attr == &dev_attr_ipv6_iface_hop_limit.attr) param = ISCSI_NET_PARAM_IPV6_HOP_LIMIT; else if (attr == &dev_attr_ipv6_iface_nd_reachable_tmo.attr) param = ISCSI_NET_PARAM_IPV6_ND_REACHABLE_TMO; else if (attr == &dev_attr_ipv6_iface_nd_rexmit_time.attr) param = ISCSI_NET_PARAM_IPV6_ND_REXMIT_TIME; else if (attr == &dev_attr_ipv6_iface_nd_stale_tmo.attr) param = ISCSI_NET_PARAM_IPV6_ND_STALE_TMO; else if (attr == &dev_attr_ipv6_iface_dup_addr_detect_cnt.attr) param = ISCSI_NET_PARAM_IPV6_DUP_ADDR_DETECT_CNT; else if (attr == &dev_attr_ipv6_iface_router_adv_link_mtu.attr) param = ISCSI_NET_PARAM_IPV6_RTR_ADV_LINK_MTU; else return 0; } else { WARN_ONCE(1, "Invalid iface attr"); return 0; } return t->attr_is_visible(ISCSI_NET_PARAM, param); } static struct attribute *iscsi_iface_attrs[] = { &dev_attr_iface_enabled.attr, &dev_attr_iface_vlan_id.attr, &dev_attr_iface_vlan_priority.attr, &dev_attr_iface_vlan_enabled.attr, &dev_attr_ipv4_iface_ipaddress.attr, &dev_attr_ipv4_iface_gateway.attr, &dev_attr_ipv4_iface_subnet.attr, &dev_attr_ipv4_iface_bootproto.attr, &dev_attr_ipv6_iface_ipaddress.attr, &dev_attr_ipv6_iface_link_local_addr.attr, &dev_attr_ipv6_iface_router_addr.attr, &dev_attr_ipv6_iface_ipaddr_autocfg.attr, &dev_attr_ipv6_iface_link_local_autocfg.attr, &dev_attr_iface_mtu.attr, &dev_attr_iface_port.attr, &dev_attr_iface_ipaddress_state.attr, &dev_attr_iface_delayed_ack_en.attr, &dev_attr_iface_tcp_nagle_disable.attr, &dev_attr_iface_tcp_wsf_disable.attr, &dev_attr_iface_tcp_wsf.attr, &dev_attr_iface_tcp_timer_scale.attr, &dev_attr_iface_tcp_timestamp_en.attr, &dev_attr_iface_cache_id.attr, &dev_attr_iface_redirect_en.attr, &dev_attr_iface_def_taskmgmt_tmo.attr, &dev_attr_iface_header_digest.attr, &dev_attr_iface_data_digest.attr, &dev_attr_iface_immediate_data.attr, &dev_attr_iface_initial_r2t.attr, &dev_attr_iface_data_seq_in_order.attr, &dev_attr_iface_data_pdu_in_order.attr, &dev_attr_iface_erl.attr, &dev_attr_iface_max_recv_dlength.attr, &dev_attr_iface_first_burst_len.attr, &dev_attr_iface_max_outstanding_r2t.attr, &dev_attr_iface_max_burst_len.attr, &dev_attr_iface_chap_auth.attr, &dev_attr_iface_bidi_chap.attr, &dev_attr_iface_discovery_auth_optional.attr, &dev_attr_iface_discovery_logout.attr, &dev_attr_iface_strict_login_comp_en.attr, &dev_attr_iface_initiator_name.attr, &dev_attr_ipv4_iface_dhcp_dns_address_en.attr, &dev_attr_ipv4_iface_dhcp_slp_da_info_en.attr, &dev_attr_ipv4_iface_tos_en.attr, &dev_attr_ipv4_iface_tos.attr, &dev_attr_ipv4_iface_grat_arp_en.attr, &dev_attr_ipv4_iface_dhcp_alt_client_id_en.attr, &dev_attr_ipv4_iface_dhcp_alt_client_id.attr, &dev_attr_ipv4_iface_dhcp_req_vendor_id_en.attr, &dev_attr_ipv4_iface_dhcp_use_vendor_id_en.attr, &dev_attr_ipv4_iface_dhcp_vendor_id.attr, &dev_attr_ipv4_iface_dhcp_learn_iqn_en.attr, &dev_attr_ipv4_iface_fragment_disable.attr, &dev_attr_ipv4_iface_incoming_forwarding_en.attr, &dev_attr_ipv4_iface_ttl.attr, &dev_attr_ipv6_iface_link_local_state.attr, &dev_attr_ipv6_iface_router_state.attr, &dev_attr_ipv6_iface_grat_neighbor_adv_en.attr, &dev_attr_ipv6_iface_mld_en.attr, &dev_attr_ipv6_iface_flow_label.attr, &dev_attr_ipv6_iface_traffic_class.attr, &dev_attr_ipv6_iface_hop_limit.attr, &dev_attr_ipv6_iface_nd_reachable_tmo.attr, &dev_attr_ipv6_iface_nd_rexmit_time.attr, &dev_attr_ipv6_iface_nd_stale_tmo.attr, &dev_attr_ipv6_iface_dup_addr_detect_cnt.attr, &dev_attr_ipv6_iface_router_adv_link_mtu.attr, NULL, }; static struct attribute_group iscsi_iface_group = { .attrs = iscsi_iface_attrs, .is_visible = iscsi_iface_attr_is_visible, }; /* convert iscsi_ipaddress_state values to ascii string name */ static const struct { enum iscsi_ipaddress_state value; char *name; } iscsi_ipaddress_state_names[] = { {ISCSI_IPDDRESS_STATE_UNCONFIGURED, "Unconfigured" }, {ISCSI_IPDDRESS_STATE_ACQUIRING, "Acquiring" }, {ISCSI_IPDDRESS_STATE_TENTATIVE, "Tentative" }, {ISCSI_IPDDRESS_STATE_VALID, "Valid" }, {ISCSI_IPDDRESS_STATE_DISABLING, "Disabling" }, {ISCSI_IPDDRESS_STATE_INVALID, "Invalid" }, {ISCSI_IPDDRESS_STATE_DEPRECATED, "Deprecated" }, }; char *iscsi_get_ipaddress_state_name(enum iscsi_ipaddress_state port_state) { int i; char *state = NULL; for (i = 0; i < ARRAY_SIZE(iscsi_ipaddress_state_names); i++) { if (iscsi_ipaddress_state_names[i].value == port_state) { state = iscsi_ipaddress_state_names[i].name; break; } } return state; } EXPORT_SYMBOL_GPL(iscsi_get_ipaddress_state_name); /* convert iscsi_router_state values to ascii string name */ static const struct { enum iscsi_router_state value; char *name; } iscsi_router_state_names[] = { {ISCSI_ROUTER_STATE_UNKNOWN, "Unknown" }, {ISCSI_ROUTER_STATE_ADVERTISED, "Advertised" }, {ISCSI_ROUTER_STATE_MANUAL, "Manual" }, {ISCSI_ROUTER_STATE_STALE, "Stale" }, }; char *iscsi_get_router_state_name(enum iscsi_router_state router_state) { int i; char *state = NULL; for (i = 0; i < ARRAY_SIZE(iscsi_router_state_names); i++) { if (iscsi_router_state_names[i].value == router_state) { state = iscsi_router_state_names[i].name; break; } } return state; } EXPORT_SYMBOL_GPL(iscsi_get_router_state_name); struct iscsi_iface * iscsi_create_iface(struct Scsi_Host *shost, struct iscsi_transport *transport, uint32_t iface_type, uint32_t iface_num, int dd_size) { struct iscsi_iface *iface; int err; iface = kzalloc(sizeof(*iface) + dd_size, GFP_KERNEL); if (!iface) return NULL; iface->transport = transport; iface->iface_type = iface_type; iface->iface_num = iface_num; iface->dev.release = iscsi_iface_release; iface->dev.class = &iscsi_iface_class; /* parent reference released in iscsi_iface_release */ iface->dev.parent = get_device(&shost->shost_gendev); if (iface_type == ISCSI_IFACE_TYPE_IPV4) dev_set_name(&iface->dev, "ipv4-iface-%u-%u", shost->host_no, iface_num); else dev_set_name(&iface->dev, "ipv6-iface-%u-%u", shost->host_no, iface_num); err = device_register(&iface->dev); if (err) goto put_dev; err = sysfs_create_group(&iface->dev.kobj, &iscsi_iface_group); if (err) goto unreg_iface; if (dd_size) iface->dd_data = &iface[1]; return iface; unreg_iface: device_unregister(&iface->dev); return NULL; put_dev: put_device(&iface->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_iface); void iscsi_destroy_iface(struct iscsi_iface *iface) { sysfs_remove_group(&iface->dev.kobj, &iscsi_iface_group); device_unregister(&iface->dev); } EXPORT_SYMBOL_GPL(iscsi_destroy_iface); /* * Interface to display flash node params to sysfs */ #define ISCSI_FLASHNODE_ATTR(_prefix, _name, _mode, _show, _store) \ struct device_attribute dev_attr_##_prefix##_##_name = \ __ATTR(_name, _mode, _show, _store) /* flash node session attrs show */ #define iscsi_flashnode_sess_attr_show(type, name, param) \ static ssize_t \ show_##type##_##name(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct iscsi_bus_flash_session *fnode_sess = \ iscsi_dev_to_flash_session(dev);\ struct iscsi_transport *t = fnode_sess->transport; \ return t->get_flashnode_param(fnode_sess, param, buf); \ } \ #define iscsi_flashnode_sess_attr(type, name, param) \ iscsi_flashnode_sess_attr_show(type, name, param) \ static ISCSI_FLASHNODE_ATTR(type, name, S_IRUGO, \ show_##type##_##name, NULL); /* Flash node session attributes */ iscsi_flashnode_sess_attr(fnode, auto_snd_tgt_disable, ISCSI_FLASHNODE_AUTO_SND_TGT_DISABLE); iscsi_flashnode_sess_attr(fnode, discovery_session, ISCSI_FLASHNODE_DISCOVERY_SESS); iscsi_flashnode_sess_attr(fnode, portal_type, ISCSI_FLASHNODE_PORTAL_TYPE); iscsi_flashnode_sess_attr(fnode, entry_enable, ISCSI_FLASHNODE_ENTRY_EN); iscsi_flashnode_sess_attr(fnode, immediate_data, ISCSI_FLASHNODE_IMM_DATA_EN); iscsi_flashnode_sess_attr(fnode, initial_r2t, ISCSI_FLASHNODE_INITIAL_R2T_EN); iscsi_flashnode_sess_attr(fnode, data_seq_in_order, ISCSI_FLASHNODE_DATASEQ_INORDER); iscsi_flashnode_sess_attr(fnode, data_pdu_in_order, ISCSI_FLASHNODE_PDU_INORDER); iscsi_flashnode_sess_attr(fnode, chap_auth, ISCSI_FLASHNODE_CHAP_AUTH_EN); iscsi_flashnode_sess_attr(fnode, discovery_logout, ISCSI_FLASHNODE_DISCOVERY_LOGOUT_EN); iscsi_flashnode_sess_attr(fnode, bidi_chap, ISCSI_FLASHNODE_BIDI_CHAP_EN); iscsi_flashnode_sess_attr(fnode, discovery_auth_optional, ISCSI_FLASHNODE_DISCOVERY_AUTH_OPTIONAL); iscsi_flashnode_sess_attr(fnode, erl, ISCSI_FLASHNODE_ERL); iscsi_flashnode_sess_attr(fnode, first_burst_len, ISCSI_FLASHNODE_FIRST_BURST); iscsi_flashnode_sess_attr(fnode, def_time2wait, ISCSI_FLASHNODE_DEF_TIME2WAIT); iscsi_flashnode_sess_attr(fnode, def_time2retain, ISCSI_FLASHNODE_DEF_TIME2RETAIN); iscsi_flashnode_sess_attr(fnode, max_outstanding_r2t, ISCSI_FLASHNODE_MAX_R2T); iscsi_flashnode_sess_attr(fnode, isid, ISCSI_FLASHNODE_ISID); iscsi_flashnode_sess_attr(fnode, tsid, ISCSI_FLASHNODE_TSID); iscsi_flashnode_sess_attr(fnode, max_burst_len, ISCSI_FLASHNODE_MAX_BURST); iscsi_flashnode_sess_attr(fnode, def_taskmgmt_tmo, ISCSI_FLASHNODE_DEF_TASKMGMT_TMO); iscsi_flashnode_sess_attr(fnode, targetalias, ISCSI_FLASHNODE_ALIAS); iscsi_flashnode_sess_attr(fnode, targetname, ISCSI_FLASHNODE_NAME); iscsi_flashnode_sess_attr(fnode, tpgt, ISCSI_FLASHNODE_TPGT); iscsi_flashnode_sess_attr(fnode, discovery_parent_idx, ISCSI_FLASHNODE_DISCOVERY_PARENT_IDX); iscsi_flashnode_sess_attr(fnode, discovery_parent_type, ISCSI_FLASHNODE_DISCOVERY_PARENT_TYPE); iscsi_flashnode_sess_attr(fnode, chap_in_idx, ISCSI_FLASHNODE_CHAP_IN_IDX); iscsi_flashnode_sess_attr(fnode, chap_out_idx, ISCSI_FLASHNODE_CHAP_OUT_IDX); iscsi_flashnode_sess_attr(fnode, username, ISCSI_FLASHNODE_USERNAME); iscsi_flashnode_sess_attr(fnode, username_in, ISCSI_FLASHNODE_USERNAME_IN); iscsi_flashnode_sess_attr(fnode, password, ISCSI_FLASHNODE_PASSWORD); iscsi_flashnode_sess_attr(fnode, password_in, ISCSI_FLASHNODE_PASSWORD_IN); iscsi_flashnode_sess_attr(fnode, is_boot_target, ISCSI_FLASHNODE_IS_BOOT_TGT); static struct attribute *iscsi_flashnode_sess_attrs[] = { &dev_attr_fnode_auto_snd_tgt_disable.attr, &dev_attr_fnode_discovery_session.attr, &dev_attr_fnode_portal_type.attr, &dev_attr_fnode_entry_enable.attr, &dev_attr_fnode_immediate_data.attr, &dev_attr_fnode_initial_r2t.attr, &dev_attr_fnode_data_seq_in_order.attr, &dev_attr_fnode_data_pdu_in_order.attr, &dev_attr_fnode_chap_auth.attr, &dev_attr_fnode_discovery_logout.attr, &dev_attr_fnode_bidi_chap.attr, &dev_attr_fnode_discovery_auth_optional.attr, &dev_attr_fnode_erl.attr, &dev_attr_fnode_first_burst_len.attr, &dev_attr_fnode_def_time2wait.attr, &dev_attr_fnode_def_time2retain.attr, &dev_attr_fnode_max_outstanding_r2t.attr, &dev_attr_fnode_isid.attr, &dev_attr_fnode_tsid.attr, &dev_attr_fnode_max_burst_len.attr, &dev_attr_fnode_def_taskmgmt_tmo.attr, &dev_attr_fnode_targetalias.attr, &dev_attr_fnode_targetname.attr, &dev_attr_fnode_tpgt.attr, &dev_attr_fnode_discovery_parent_idx.attr, &dev_attr_fnode_discovery_parent_type.attr, &dev_attr_fnode_chap_in_idx.attr, &dev_attr_fnode_chap_out_idx.attr, &dev_attr_fnode_username.attr, &dev_attr_fnode_username_in.attr, &dev_attr_fnode_password.attr, &dev_attr_fnode_password_in.attr, &dev_attr_fnode_is_boot_target.attr, NULL, }; static umode_t iscsi_flashnode_sess_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *dev = container_of(kobj, struct device, kobj); struct iscsi_bus_flash_session *fnode_sess = iscsi_dev_to_flash_session(dev); struct iscsi_transport *t = fnode_sess->transport; int param; if (attr == &dev_attr_fnode_auto_snd_tgt_disable.attr) { param = ISCSI_FLASHNODE_AUTO_SND_TGT_DISABLE; } else if (attr == &dev_attr_fnode_discovery_session.attr) { param = ISCSI_FLASHNODE_DISCOVERY_SESS; } else if (attr == &dev_attr_fnode_portal_type.attr) { param = ISCSI_FLASHNODE_PORTAL_TYPE; } else if (attr == &dev_attr_fnode_entry_enable.attr) { param = ISCSI_FLASHNODE_ENTRY_EN; } else if (attr == &dev_attr_fnode_immediate_data.attr) { param = ISCSI_FLASHNODE_IMM_DATA_EN; } else if (attr == &dev_attr_fnode_initial_r2t.attr) { param = ISCSI_FLASHNODE_INITIAL_R2T_EN; } else if (attr == &dev_attr_fnode_data_seq_in_order.attr) { param = ISCSI_FLASHNODE_DATASEQ_INORDER; } else if (attr == &dev_attr_fnode_data_pdu_in_order.attr) { param = ISCSI_FLASHNODE_PDU_INORDER; } else if (attr == &dev_attr_fnode_chap_auth.attr) { param = ISCSI_FLASHNODE_CHAP_AUTH_EN; } else if (attr == &dev_attr_fnode_discovery_logout.attr) { param = ISCSI_FLASHNODE_DISCOVERY_LOGOUT_EN; } else if (attr == &dev_attr_fnode_bidi_chap.attr) { param = ISCSI_FLASHNODE_BIDI_CHAP_EN; } else if (attr == &dev_attr_fnode_discovery_auth_optional.attr) { param = ISCSI_FLASHNODE_DISCOVERY_AUTH_OPTIONAL; } else if (attr == &dev_attr_fnode_erl.attr) { param = ISCSI_FLASHNODE_ERL; } else if (attr == &dev_attr_fnode_first_burst_len.attr) { param = ISCSI_FLASHNODE_FIRST_BURST; } else if (attr == &dev_attr_fnode_def_time2wait.attr) { param = ISCSI_FLASHNODE_DEF_TIME2WAIT; } else if (attr == &dev_attr_fnode_def_time2retain.attr) { param = ISCSI_FLASHNODE_DEF_TIME2RETAIN; } else if (attr == &dev_attr_fnode_max_outstanding_r2t.attr) { param = ISCSI_FLASHNODE_MAX_R2T; } else if (attr == &dev_attr_fnode_isid.attr) { param = ISCSI_FLASHNODE_ISID; } else if (attr == &dev_attr_fnode_tsid.attr) { param = ISCSI_FLASHNODE_TSID; } else if (attr == &dev_attr_fnode_max_burst_len.attr) { param = ISCSI_FLASHNODE_MAX_BURST; } else if (attr == &dev_attr_fnode_def_taskmgmt_tmo.attr) { param = ISCSI_FLASHNODE_DEF_TASKMGMT_TMO; } else if (attr == &dev_attr_fnode_targetalias.attr) { param = ISCSI_FLASHNODE_ALIAS; } else if (attr == &dev_attr_fnode_targetname.attr) { param = ISCSI_FLASHNODE_NAME; } else if (attr == &dev_attr_fnode_tpgt.attr) { param = ISCSI_FLASHNODE_TPGT; } else if (attr == &dev_attr_fnode_discovery_parent_idx.attr) { param = ISCSI_FLASHNODE_DISCOVERY_PARENT_IDX; } else if (attr == &dev_attr_fnode_discovery_parent_type.attr) { param = ISCSI_FLASHNODE_DISCOVERY_PARENT_TYPE; } else if (attr == &dev_attr_fnode_chap_in_idx.attr) { param = ISCSI_FLASHNODE_CHAP_IN_IDX; } else if (attr == &dev_attr_fnode_chap_out_idx.attr) { param = ISCSI_FLASHNODE_CHAP_OUT_IDX; } else if (attr == &dev_attr_fnode_username.attr) { param = ISCSI_FLASHNODE_USERNAME; } else if (attr == &dev_attr_fnode_username_in.attr) { param = ISCSI_FLASHNODE_USERNAME_IN; } else if (attr == &dev_attr_fnode_password.attr) { param = ISCSI_FLASHNODE_PASSWORD; } else if (attr == &dev_attr_fnode_password_in.attr) { param = ISCSI_FLASHNODE_PASSWORD_IN; } else if (attr == &dev_attr_fnode_is_boot_target.attr) { param = ISCSI_FLASHNODE_IS_BOOT_TGT; } else { WARN_ONCE(1, "Invalid flashnode session attr"); return 0; } return t->attr_is_visible(ISCSI_FLASHNODE_PARAM, param); } static struct attribute_group iscsi_flashnode_sess_attr_group = { .attrs = iscsi_flashnode_sess_attrs, .is_visible = iscsi_flashnode_sess_attr_is_visible, }; static const struct attribute_group *iscsi_flashnode_sess_attr_groups[] = { &iscsi_flashnode_sess_attr_group, NULL, }; static void iscsi_flashnode_sess_release(struct device *dev) { struct iscsi_bus_flash_session *fnode_sess = iscsi_dev_to_flash_session(dev); kfree(fnode_sess->targetname); kfree(fnode_sess->targetalias); kfree(fnode_sess->portal_type); kfree(fnode_sess); } static const struct device_type iscsi_flashnode_sess_dev_type = { .name = "iscsi_flashnode_sess_dev_type", .groups = iscsi_flashnode_sess_attr_groups, .release = iscsi_flashnode_sess_release, }; /* flash node connection attrs show */ #define iscsi_flashnode_conn_attr_show(type, name, param) \ static ssize_t \ show_##type##_##name(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ struct iscsi_bus_flash_conn *fnode_conn = iscsi_dev_to_flash_conn(dev);\ struct iscsi_bus_flash_session *fnode_sess = \ iscsi_flash_conn_to_flash_session(fnode_conn);\ struct iscsi_transport *t = fnode_conn->transport; \ return t->get_flashnode_param(fnode_sess, param, buf); \ } \ #define iscsi_flashnode_conn_attr(type, name, param) \ iscsi_flashnode_conn_attr_show(type, name, param) \ static ISCSI_FLASHNODE_ATTR(type, name, S_IRUGO, \ show_##type##_##name, NULL); /* Flash node connection attributes */ iscsi_flashnode_conn_attr(fnode, is_fw_assigned_ipv6, ISCSI_FLASHNODE_IS_FW_ASSIGNED_IPV6); iscsi_flashnode_conn_attr(fnode, header_digest, ISCSI_FLASHNODE_HDR_DGST_EN); iscsi_flashnode_conn_attr(fnode, data_digest, ISCSI_FLASHNODE_DATA_DGST_EN); iscsi_flashnode_conn_attr(fnode, snack_req, ISCSI_FLASHNODE_SNACK_REQ_EN); iscsi_flashnode_conn_attr(fnode, tcp_timestamp_stat, ISCSI_FLASHNODE_TCP_TIMESTAMP_STAT); iscsi_flashnode_conn_attr(fnode, tcp_nagle_disable, ISCSI_FLASHNODE_TCP_NAGLE_DISABLE); iscsi_flashnode_conn_attr(fnode, tcp_wsf_disable, ISCSI_FLASHNODE_TCP_WSF_DISABLE); iscsi_flashnode_conn_attr(fnode, tcp_timer_scale, ISCSI_FLASHNODE_TCP_TIMER_SCALE); iscsi_flashnode_conn_attr(fnode, tcp_timestamp_enable, ISCSI_FLASHNODE_TCP_TIMESTAMP_EN); iscsi_flashnode_conn_attr(fnode, fragment_disable, ISCSI_FLASHNODE_IP_FRAG_DISABLE); iscsi_flashnode_conn_attr(fnode, keepalive_tmo, ISCSI_FLASHNODE_KEEPALIVE_TMO); iscsi_flashnode_conn_attr(fnode, port, ISCSI_FLASHNODE_PORT); iscsi_flashnode_conn_attr(fnode, ipaddress, ISCSI_FLASHNODE_IPADDR); iscsi_flashnode_conn_attr(fnode, max_recv_dlength, ISCSI_FLASHNODE_MAX_RECV_DLENGTH); iscsi_flashnode_conn_attr(fnode, max_xmit_dlength, ISCSI_FLASHNODE_MAX_XMIT_DLENGTH); iscsi_flashnode_conn_attr(fnode, local_port, ISCSI_FLASHNODE_LOCAL_PORT); iscsi_flashnode_conn_attr(fnode, ipv4_tos, ISCSI_FLASHNODE_IPV4_TOS); iscsi_flashnode_conn_attr(fnode, ipv6_traffic_class, ISCSI_FLASHNODE_IPV6_TC); iscsi_flashnode_conn_attr(fnode, ipv6_flow_label, ISCSI_FLASHNODE_IPV6_FLOW_LABEL); iscsi_flashnode_conn_attr(fnode, redirect_ipaddr, ISCSI_FLASHNODE_REDIRECT_IPADDR); iscsi_flashnode_conn_attr(fnode, max_segment_size, ISCSI_FLASHNODE_MAX_SEGMENT_SIZE); iscsi_flashnode_conn_attr(fnode, link_local_ipv6, ISCSI_FLASHNODE_LINK_LOCAL_IPV6); iscsi_flashnode_conn_attr(fnode, tcp_xmit_wsf, ISCSI_FLASHNODE_TCP_XMIT_WSF); iscsi_flashnode_conn_attr(fnode, tcp_recv_wsf, ISCSI_FLASHNODE_TCP_RECV_WSF); iscsi_flashnode_conn_attr(fnode, statsn, ISCSI_FLASHNODE_STATSN); iscsi_flashnode_conn_attr(fnode, exp_statsn, ISCSI_FLASHNODE_EXP_STATSN); static struct attribute *iscsi_flashnode_conn_attrs[] = { &dev_attr_fnode_is_fw_assigned_ipv6.attr, &dev_attr_fnode_header_digest.attr, &dev_attr_fnode_data_digest.attr, &dev_attr_fnode_snack_req.attr, &dev_attr_fnode_tcp_timestamp_stat.attr, &dev_attr_fnode_tcp_nagle_disable.attr, &dev_attr_fnode_tcp_wsf_disable.attr, &dev_attr_fnode_tcp_timer_scale.attr, &dev_attr_fnode_tcp_timestamp_enable.attr, &dev_attr_fnode_fragment_disable.attr, &dev_attr_fnode_max_recv_dlength.attr, &dev_attr_fnode_max_xmit_dlength.attr, &dev_attr_fnode_keepalive_tmo.attr, &dev_attr_fnode_port.attr, &dev_attr_fnode_ipaddress.attr, &dev_attr_fnode_redirect_ipaddr.attr, &dev_attr_fnode_max_segment_size.attr, &dev_attr_fnode_local_port.attr, &dev_attr_fnode_ipv4_tos.attr, &dev_attr_fnode_ipv6_traffic_class.attr, &dev_attr_fnode_ipv6_flow_label.attr, &dev_attr_fnode_link_local_ipv6.attr, &dev_attr_fnode_tcp_xmit_wsf.attr, &dev_attr_fnode_tcp_recv_wsf.attr, &dev_attr_fnode_statsn.attr, &dev_attr_fnode_exp_statsn.attr, NULL, }; static umode_t iscsi_flashnode_conn_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *dev = container_of(kobj, struct device, kobj); struct iscsi_bus_flash_conn *fnode_conn = iscsi_dev_to_flash_conn(dev); struct iscsi_transport *t = fnode_conn->transport; int param; if (attr == &dev_attr_fnode_is_fw_assigned_ipv6.attr) { param = ISCSI_FLASHNODE_IS_FW_ASSIGNED_IPV6; } else if (attr == &dev_attr_fnode_header_digest.attr) { param = ISCSI_FLASHNODE_HDR_DGST_EN; } else if (attr == &dev_attr_fnode_data_digest.attr) { param = ISCSI_FLASHNODE_DATA_DGST_EN; } else if (attr == &dev_attr_fnode_snack_req.attr) { param = ISCSI_FLASHNODE_SNACK_REQ_EN; } else if (attr == &dev_attr_fnode_tcp_timestamp_stat.attr) { param = ISCSI_FLASHNODE_TCP_TIMESTAMP_STAT; } else if (attr == &dev_attr_fnode_tcp_nagle_disable.attr) { param = ISCSI_FLASHNODE_TCP_NAGLE_DISABLE; } else if (attr == &dev_attr_fnode_tcp_wsf_disable.attr) { param = ISCSI_FLASHNODE_TCP_WSF_DISABLE; } else if (attr == &dev_attr_fnode_tcp_timer_scale.attr) { param = ISCSI_FLASHNODE_TCP_TIMER_SCALE; } else if (attr == &dev_attr_fnode_tcp_timestamp_enable.attr) { param = ISCSI_FLASHNODE_TCP_TIMESTAMP_EN; } else if (attr == &dev_attr_fnode_fragment_disable.attr) { param = ISCSI_FLASHNODE_IP_FRAG_DISABLE; } else if (attr == &dev_attr_fnode_max_recv_dlength.attr) { param = ISCSI_FLASHNODE_MAX_RECV_DLENGTH; } else if (attr == &dev_attr_fnode_max_xmit_dlength.attr) { param = ISCSI_FLASHNODE_MAX_XMIT_DLENGTH; } else if (attr == &dev_attr_fnode_keepalive_tmo.attr) { param = ISCSI_FLASHNODE_KEEPALIVE_TMO; } else if (attr == &dev_attr_fnode_port.attr) { param = ISCSI_FLASHNODE_PORT; } else if (attr == &dev_attr_fnode_ipaddress.attr) { param = ISCSI_FLASHNODE_IPADDR; } else if (attr == &dev_attr_fnode_redirect_ipaddr.attr) { param = ISCSI_FLASHNODE_REDIRECT_IPADDR; } else if (attr == &dev_attr_fnode_max_segment_size.attr) { param = ISCSI_FLASHNODE_MAX_SEGMENT_SIZE; } else if (attr == &dev_attr_fnode_local_port.attr) { param = ISCSI_FLASHNODE_LOCAL_PORT; } else if (attr == &dev_attr_fnode_ipv4_tos.attr) { param = ISCSI_FLASHNODE_IPV4_TOS; } else if (attr == &dev_attr_fnode_ipv6_traffic_class.attr) { param = ISCSI_FLASHNODE_IPV6_TC; } else if (attr == &dev_attr_fnode_ipv6_flow_label.attr) { param = ISCSI_FLASHNODE_IPV6_FLOW_LABEL; } else if (attr == &dev_attr_fnode_link_local_ipv6.attr) { param = ISCSI_FLASHNODE_LINK_LOCAL_IPV6; } else if (attr == &dev_attr_fnode_tcp_xmit_wsf.attr) { param = ISCSI_FLASHNODE_TCP_XMIT_WSF; } else if (attr == &dev_attr_fnode_tcp_recv_wsf.attr) { param = ISCSI_FLASHNODE_TCP_RECV_WSF; } else if (attr == &dev_attr_fnode_statsn.attr) { param = ISCSI_FLASHNODE_STATSN; } else if (attr == &dev_attr_fnode_exp_statsn.attr) { param = ISCSI_FLASHNODE_EXP_STATSN; } else { WARN_ONCE(1, "Invalid flashnode connection attr"); return 0; } return t->attr_is_visible(ISCSI_FLASHNODE_PARAM, param); } static struct attribute_group iscsi_flashnode_conn_attr_group = { .attrs = iscsi_flashnode_conn_attrs, .is_visible = iscsi_flashnode_conn_attr_is_visible, }; static const struct attribute_group *iscsi_flashnode_conn_attr_groups[] = { &iscsi_flashnode_conn_attr_group, NULL, }; static void iscsi_flashnode_conn_release(struct device *dev) { struct iscsi_bus_flash_conn *fnode_conn = iscsi_dev_to_flash_conn(dev); kfree(fnode_conn->ipaddress); kfree(fnode_conn->redirect_ipaddr); kfree(fnode_conn->link_local_ipv6_addr); kfree(fnode_conn); } static const struct device_type iscsi_flashnode_conn_dev_type = { .name = "iscsi_flashnode_conn_dev_type", .groups = iscsi_flashnode_conn_attr_groups, .release = iscsi_flashnode_conn_release, }; static const struct bus_type iscsi_flashnode_bus; int iscsi_flashnode_bus_match(struct device *dev, const struct device_driver *drv) { if (dev->bus == &iscsi_flashnode_bus) return 1; return 0; } EXPORT_SYMBOL_GPL(iscsi_flashnode_bus_match); static const struct bus_type iscsi_flashnode_bus = { .name = "iscsi_flashnode", .match = &iscsi_flashnode_bus_match, }; /** * iscsi_create_flashnode_sess - Add flashnode session entry in sysfs * @shost: pointer to host data * @index: index of flashnode to add in sysfs * @transport: pointer to transport data * @dd_size: total size to allocate * * Adds a sysfs entry for the flashnode session attributes * * Returns: * pointer to allocated flashnode sess on success * %NULL on failure */ struct iscsi_bus_flash_session * iscsi_create_flashnode_sess(struct Scsi_Host *shost, int index, struct iscsi_transport *transport, int dd_size) { struct iscsi_bus_flash_session *fnode_sess; int err; fnode_sess = kzalloc(sizeof(*fnode_sess) + dd_size, GFP_KERNEL); if (!fnode_sess) return NULL; fnode_sess->transport = transport; fnode_sess->target_id = index; fnode_sess->dev.type = &iscsi_flashnode_sess_dev_type; fnode_sess->dev.bus = &iscsi_flashnode_bus; fnode_sess->dev.parent = &shost->shost_gendev; dev_set_name(&fnode_sess->dev, "flashnode_sess-%u:%u", shost->host_no, index); err = device_register(&fnode_sess->dev); if (err) goto put_dev; if (dd_size) fnode_sess->dd_data = &fnode_sess[1]; return fnode_sess; put_dev: put_device(&fnode_sess->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_flashnode_sess); /** * iscsi_create_flashnode_conn - Add flashnode conn entry in sysfs * @shost: pointer to host data * @fnode_sess: pointer to the parent flashnode session entry * @transport: pointer to transport data * @dd_size: total size to allocate * * Adds a sysfs entry for the flashnode connection attributes * * Returns: * pointer to allocated flashnode conn on success * %NULL on failure */ struct iscsi_bus_flash_conn * iscsi_create_flashnode_conn(struct Scsi_Host *shost, struct iscsi_bus_flash_session *fnode_sess, struct iscsi_transport *transport, int dd_size) { struct iscsi_bus_flash_conn *fnode_conn; int err; fnode_conn = kzalloc(sizeof(*fnode_conn) + dd_size, GFP_KERNEL); if (!fnode_conn) return NULL; fnode_conn->transport = transport; fnode_conn->dev.type = &iscsi_flashnode_conn_dev_type; fnode_conn->dev.bus = &iscsi_flashnode_bus; fnode_conn->dev.parent = &fnode_sess->dev; dev_set_name(&fnode_conn->dev, "flashnode_conn-%u:%u:0", shost->host_no, fnode_sess->target_id); err = device_register(&fnode_conn->dev); if (err) goto put_dev; if (dd_size) fnode_conn->dd_data = &fnode_conn[1]; return fnode_conn; put_dev: put_device(&fnode_conn->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_create_flashnode_conn); /** * iscsi_is_flashnode_conn_dev - verify passed device is to be flashnode conn * @dev: device to verify * @data: pointer to data containing value to use for verification * * Verifies if the passed device is flashnode conn device * * Returns: * 1 on success * 0 on failure */ static int iscsi_is_flashnode_conn_dev(struct device *dev, const void *data) { return dev->bus == &iscsi_flashnode_bus; } static int iscsi_destroy_flashnode_conn(struct iscsi_bus_flash_conn *fnode_conn) { device_unregister(&fnode_conn->dev); return 0; } static int flashnode_match_index(struct device *dev, const void *data) { struct iscsi_bus_flash_session *fnode_sess = NULL; int ret = 0; if (!iscsi_flashnode_bus_match(dev, NULL)) goto exit_match_index; fnode_sess = iscsi_dev_to_flash_session(dev); ret = (fnode_sess->target_id == *((const int *)data)) ? 1 : 0; exit_match_index: return ret; } /** * iscsi_get_flashnode_by_index -finds flashnode session entry by index * @shost: pointer to host data * @idx: index to match * * Finds the flashnode session object for the passed index * * Returns: * pointer to found flashnode session object on success * %NULL on failure */ static struct iscsi_bus_flash_session * iscsi_get_flashnode_by_index(struct Scsi_Host *shost, uint32_t idx) { struct iscsi_bus_flash_session *fnode_sess = NULL; struct device *dev; dev = device_find_child(&shost->shost_gendev, &idx, flashnode_match_index); if (dev) fnode_sess = iscsi_dev_to_flash_session(dev); return fnode_sess; } /** * iscsi_find_flashnode_sess - finds flashnode session entry * @shost: pointer to host data * @data: pointer to data containing value to use for comparison * @fn: function pointer that does actual comparison * * Finds the flashnode session object comparing the data passed using logic * defined in passed function pointer * * Returns: * pointer to found flashnode session device object on success * %NULL on failure */ struct device * iscsi_find_flashnode_sess(struct Scsi_Host *shost, const void *data, device_match_t fn) { return device_find_child(&shost->shost_gendev, data, fn); } EXPORT_SYMBOL_GPL(iscsi_find_flashnode_sess); /** * iscsi_find_flashnode_conn - finds flashnode connection entry * @fnode_sess: pointer to parent flashnode session entry * * Finds the flashnode connection object comparing the data passed using logic * defined in passed function pointer * * Returns: * pointer to found flashnode connection device object on success * %NULL on failure */ struct device * iscsi_find_flashnode_conn(struct iscsi_bus_flash_session *fnode_sess) { return device_find_child(&fnode_sess->dev, NULL, iscsi_is_flashnode_conn_dev); } EXPORT_SYMBOL_GPL(iscsi_find_flashnode_conn); static int iscsi_iter_destroy_flashnode_conn_fn(struct device *dev, void *data) { if (!iscsi_is_flashnode_conn_dev(dev, NULL)) return 0; return iscsi_destroy_flashnode_conn(iscsi_dev_to_flash_conn(dev)); } /** * iscsi_destroy_flashnode_sess - destroy flashnode session entry * @fnode_sess: pointer to flashnode session entry to be destroyed * * Deletes the flashnode session entry and all children flashnode connection * entries from sysfs */ void iscsi_destroy_flashnode_sess(struct iscsi_bus_flash_session *fnode_sess) { int err; err = device_for_each_child(&fnode_sess->dev, NULL, iscsi_iter_destroy_flashnode_conn_fn); if (err) pr_err("Could not delete all connections for %s. Error %d.\n", fnode_sess->dev.kobj.name, err); device_unregister(&fnode_sess->dev); } EXPORT_SYMBOL_GPL(iscsi_destroy_flashnode_sess); static int iscsi_iter_destroy_flashnode_fn(struct device *dev, void *data) { if (!iscsi_flashnode_bus_match(dev, NULL)) return 0; iscsi_destroy_flashnode_sess(iscsi_dev_to_flash_session(dev)); return 0; } /** * iscsi_destroy_all_flashnode - destroy all flashnode session entries * @shost: pointer to host data * * Destroys all the flashnode session entries and all corresponding children * flashnode connection entries from sysfs */ void iscsi_destroy_all_flashnode(struct Scsi_Host *shost) { device_for_each_child(&shost->shost_gendev, NULL, iscsi_iter_destroy_flashnode_fn); } EXPORT_SYMBOL_GPL(iscsi_destroy_all_flashnode); /* * BSG support */ /** * iscsi_bsg_host_dispatch - Dispatch command to LLD. * @job: bsg job to be processed */ static int iscsi_bsg_host_dispatch(struct bsg_job *job) { struct Scsi_Host *shost = iscsi_job_to_shost(job); struct iscsi_bsg_request *req = job->request; struct iscsi_bsg_reply *reply = job->reply; struct iscsi_internal *i = to_iscsi_internal(shost->transportt); int cmdlen = sizeof(uint32_t); /* start with length of msgcode */ int ret; /* check if we have the msgcode value at least */ if (job->request_len < sizeof(uint32_t)) { ret = -ENOMSG; goto fail_host_msg; } /* Validate the host command */ switch (req->msgcode) { case ISCSI_BSG_HST_VENDOR: cmdlen += sizeof(struct iscsi_bsg_host_vendor); if ((shost->hostt->vendor_id == 0L) || (req->rqst_data.h_vendor.vendor_id != shost->hostt->vendor_id)) { ret = -ESRCH; goto fail_host_msg; } break; default: ret = -EBADR; goto fail_host_msg; } /* check if we really have all the request data needed */ if (job->request_len < cmdlen) { ret = -ENOMSG; goto fail_host_msg; } ret = i->iscsi_transport->bsg_request(job); if (!ret) return 0; fail_host_msg: /* return the errno failure code as the only status */ BUG_ON(job->reply_len < sizeof(uint32_t)); reply->reply_payload_rcv_len = 0; reply->result = ret; job->reply_len = sizeof(uint32_t); bsg_job_done(job, ret, 0); return 0; } /** * iscsi_bsg_host_add - Create and add the bsg hooks to receive requests * @shost: shost for iscsi_host * @ihost: iscsi_cls_host adding the structures to */ static int iscsi_bsg_host_add(struct Scsi_Host *shost, struct iscsi_cls_host *ihost) { struct device *dev = &shost->shost_gendev; struct iscsi_internal *i = to_iscsi_internal(shost->transportt); struct queue_limits lim; struct request_queue *q; char bsg_name[20]; if (!i->iscsi_transport->bsg_request) return -ENOTSUPP; snprintf(bsg_name, sizeof(bsg_name), "iscsi_host%d", shost->host_no); scsi_init_limits(shost, &lim); q = bsg_setup_queue(dev, bsg_name, &lim, iscsi_bsg_host_dispatch, NULL, 0); if (IS_ERR(q)) { shost_printk(KERN_ERR, shost, "bsg interface failed to " "initialize - no request queue\n"); return PTR_ERR(q); } ihost->bsg_q = q; return 0; } static int iscsi_setup_host(struct transport_container *tc, struct device *dev, struct device *cdev) { struct Scsi_Host *shost = dev_to_shost(dev); struct iscsi_cls_host *ihost = shost->shost_data; memset(ihost, 0, sizeof(*ihost)); mutex_init(&ihost->mutex); iscsi_bsg_host_add(shost, ihost); /* ignore any bsg add error - we just can't do sgio */ return 0; } static int iscsi_remove_host(struct transport_container *tc, struct device *dev, struct device *cdev) { struct Scsi_Host *shost = dev_to_shost(dev); struct iscsi_cls_host *ihost = shost->shost_data; bsg_remove_queue(ihost->bsg_q); return 0; } static DECLARE_TRANSPORT_CLASS(iscsi_host_class, "iscsi_host", iscsi_setup_host, iscsi_remove_host, NULL); static DECLARE_TRANSPORT_CLASS(iscsi_session_class, "iscsi_session", NULL, NULL, NULL); static DECLARE_TRANSPORT_CLASS(iscsi_connection_class, "iscsi_connection", NULL, NULL, NULL); static struct sock *nls; static DEFINE_MUTEX(rx_queue_mutex); static LIST_HEAD(sesslist); static DEFINE_SPINLOCK(sesslock); static LIST_HEAD(connlist); static DEFINE_SPINLOCK(connlock); static uint32_t iscsi_conn_get_sid(struct iscsi_cls_conn *conn) { struct iscsi_cls_session *sess = iscsi_dev_to_session(conn->dev.parent); return sess->sid; } /* * Returns the matching session to a given sid */ static struct iscsi_cls_session *iscsi_session_lookup(uint32_t sid) { unsigned long flags; struct iscsi_cls_session *sess; spin_lock_irqsave(&sesslock, flags); list_for_each_entry(sess, &sesslist, sess_list) { if (sess->sid == sid) { spin_unlock_irqrestore(&sesslock, flags); return sess; } } spin_unlock_irqrestore(&sesslock, flags); return NULL; } /* * Returns the matching connection to a given sid / cid tuple */ static struct iscsi_cls_conn *iscsi_conn_lookup(uint32_t sid, uint32_t cid) { unsigned long flags; struct iscsi_cls_conn *conn; spin_lock_irqsave(&connlock, flags); list_for_each_entry(conn, &connlist, conn_list) { if ((conn->cid == cid) && (iscsi_conn_get_sid(conn) == sid)) { spin_unlock_irqrestore(&connlock, flags); return conn; } } spin_unlock_irqrestore(&connlock, flags); return NULL; } /* * The following functions can be used by LLDs that allocate * their own scsi_hosts or by software iscsi LLDs */ static struct { int value; char *name; } iscsi_session_state_names[] = { { ISCSI_SESSION_LOGGED_IN, "LOGGED_IN" }, { ISCSI_SESSION_FAILED, "FAILED" }, { ISCSI_SESSION_FREE, "FREE" }, }; static const char *iscsi_session_state_name(int state) { int i; char *name = NULL; for (i = 0; i < ARRAY_SIZE(iscsi_session_state_names); i++) { if (iscsi_session_state_names[i].value == state) { name = iscsi_session_state_names[i].name; break; } } return name; } static char *iscsi_session_target_state_name[] = { [ISCSI_SESSION_TARGET_UNBOUND] = "UNBOUND", [ISCSI_SESSION_TARGET_ALLOCATED] = "ALLOCATED", [ISCSI_SESSION_TARGET_SCANNED] = "SCANNED", [ISCSI_SESSION_TARGET_UNBINDING] = "UNBINDING", }; int iscsi_session_chkready(struct iscsi_cls_session *session) { int err; switch (session->state) { case ISCSI_SESSION_LOGGED_IN: err = 0; break; case ISCSI_SESSION_FAILED: err = DID_IMM_RETRY << 16; break; case ISCSI_SESSION_FREE: err = DID_TRANSPORT_FAILFAST << 16; break; default: err = DID_NO_CONNECT << 16; break; } return err; } EXPORT_SYMBOL_GPL(iscsi_session_chkready); int iscsi_is_session_online(struct iscsi_cls_session *session) { unsigned long flags; int ret = 0; spin_lock_irqsave(&session->lock, flags); if (session->state == ISCSI_SESSION_LOGGED_IN) ret = 1; spin_unlock_irqrestore(&session->lock, flags); return ret; } EXPORT_SYMBOL_GPL(iscsi_is_session_online); static void iscsi_session_release(struct device *dev) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev); struct Scsi_Host *shost; shost = iscsi_session_to_shost(session); scsi_host_put(shost); ISCSI_DBG_TRANS_SESSION(session, "Completing session release\n"); kfree(session); } int iscsi_is_session_dev(const struct device *dev) { return dev->release == iscsi_session_release; } EXPORT_SYMBOL_GPL(iscsi_is_session_dev); static int iscsi_iter_session_fn(struct device *dev, void *data) { void (* fn) (struct iscsi_cls_session *) = data; if (!iscsi_is_session_dev(dev)) return 0; fn(iscsi_dev_to_session(dev)); return 0; } void iscsi_host_for_each_session(struct Scsi_Host *shost, void (*fn)(struct iscsi_cls_session *)) { device_for_each_child(&shost->shost_gendev, fn, iscsi_iter_session_fn); } EXPORT_SYMBOL_GPL(iscsi_host_for_each_session); struct iscsi_scan_data { unsigned int channel; unsigned int id; u64 lun; enum scsi_scan_mode rescan; }; static int iscsi_user_scan_session(struct device *dev, void *data) { struct iscsi_scan_data *scan_data = data; struct iscsi_cls_session *session; struct Scsi_Host *shost; struct iscsi_cls_host *ihost; unsigned long flags; unsigned int id; if (!iscsi_is_session_dev(dev)) return 0; session = iscsi_dev_to_session(dev); ISCSI_DBG_TRANS_SESSION(session, "Scanning session\n"); shost = iscsi_session_to_shost(session); ihost = shost->shost_data; mutex_lock(&ihost->mutex); spin_lock_irqsave(&session->lock, flags); if (session->state != ISCSI_SESSION_LOGGED_IN) { spin_unlock_irqrestore(&session->lock, flags); goto user_scan_exit; } id = session->target_id; spin_unlock_irqrestore(&session->lock, flags); if (id != ISCSI_MAX_TARGET) { if ((scan_data->channel == SCAN_WILD_CARD || scan_data->channel == 0) && (scan_data->id == SCAN_WILD_CARD || scan_data->id == id)) { scsi_scan_target(&session->dev, 0, id, scan_data->lun, scan_data->rescan); spin_lock_irqsave(&session->lock, flags); session->target_state = ISCSI_SESSION_TARGET_SCANNED; spin_unlock_irqrestore(&session->lock, flags); } } user_scan_exit: mutex_unlock(&ihost->mutex); ISCSI_DBG_TRANS_SESSION(session, "Completed session scan\n"); return 0; } static int iscsi_user_scan(struct Scsi_Host *shost, uint channel, uint id, u64 lun) { struct iscsi_scan_data scan_data; scan_data.channel = channel; scan_data.id = id; scan_data.lun = lun; scan_data.rescan = SCSI_SCAN_MANUAL; return device_for_each_child(&shost->shost_gendev, &scan_data, iscsi_user_scan_session); } static void iscsi_scan_session(struct work_struct *work) { struct iscsi_cls_session *session = container_of(work, struct iscsi_cls_session, scan_work); struct iscsi_scan_data scan_data; scan_data.channel = 0; scan_data.id = SCAN_WILD_CARD; scan_data.lun = SCAN_WILD_CARD; scan_data.rescan = SCSI_SCAN_RESCAN; iscsi_user_scan_session(&session->dev, &scan_data); } /** * iscsi_block_scsi_eh - block scsi eh until session state has transistioned * @cmd: scsi cmd passed to scsi eh handler * * If the session is down this function will wait for the recovery * timer to fire or for the session to be logged back in. If the * recovery timer fires then FAST_IO_FAIL is returned. The caller * should pass this error value to the scsi eh. */ int iscsi_block_scsi_eh(struct scsi_cmnd *cmd) { struct iscsi_cls_session *session = starget_to_session(scsi_target(cmd->device)); unsigned long flags; int ret = 0; spin_lock_irqsave(&session->lock, flags); while (session->state != ISCSI_SESSION_LOGGED_IN) { if (session->state == ISCSI_SESSION_FREE) { ret = FAST_IO_FAIL; break; } spin_unlock_irqrestore(&session->lock, flags); msleep(1000); spin_lock_irqsave(&session->lock, flags); } spin_unlock_irqrestore(&session->lock, flags); return ret; } EXPORT_SYMBOL_GPL(iscsi_block_scsi_eh); static void session_recovery_timedout(struct work_struct *work) { struct iscsi_cls_session *session = container_of(work, struct iscsi_cls_session, recovery_work.work); unsigned long flags; iscsi_cls_session_printk(KERN_INFO, session, "session recovery timed out after %d secs\n", session->recovery_tmo); spin_lock_irqsave(&session->lock, flags); switch (session->state) { case ISCSI_SESSION_FAILED: session->state = ISCSI_SESSION_FREE; break; case ISCSI_SESSION_LOGGED_IN: case ISCSI_SESSION_FREE: /* we raced with the unblock's flush */ spin_unlock_irqrestore(&session->lock, flags); return; } spin_unlock_irqrestore(&session->lock, flags); ISCSI_DBG_TRANS_SESSION(session, "Unblocking SCSI target\n"); scsi_target_unblock(&session->dev, SDEV_TRANSPORT_OFFLINE); ISCSI_DBG_TRANS_SESSION(session, "Completed unblocking SCSI target\n"); if (session->transport->session_recovery_timedout) session->transport->session_recovery_timedout(session); } static void __iscsi_unblock_session(struct work_struct *work) { struct iscsi_cls_session *session = container_of(work, struct iscsi_cls_session, unblock_work); unsigned long flags; ISCSI_DBG_TRANS_SESSION(session, "Unblocking session\n"); cancel_delayed_work_sync(&session->recovery_work); spin_lock_irqsave(&session->lock, flags); session->state = ISCSI_SESSION_LOGGED_IN; spin_unlock_irqrestore(&session->lock, flags); /* start IO */ scsi_target_unblock(&session->dev, SDEV_RUNNING); ISCSI_DBG_TRANS_SESSION(session, "Completed unblocking session\n"); } /** * iscsi_unblock_session - set a session as logged in and start IO. * @session: iscsi session * * Mark a session as ready to accept IO. */ void iscsi_unblock_session(struct iscsi_cls_session *session) { if (!cancel_work_sync(&session->block_work)) cancel_delayed_work_sync(&session->recovery_work); queue_work(session->workq, &session->unblock_work); /* * Blocking the session can be done from any context so we only * queue the block work. Make sure the unblock work has completed * because it flushes/cancels the other works and updates the state. */ flush_work(&session->unblock_work); } EXPORT_SYMBOL_GPL(iscsi_unblock_session); static void __iscsi_block_session(struct work_struct *work) { struct iscsi_cls_session *session = container_of(work, struct iscsi_cls_session, block_work); struct Scsi_Host *shost = iscsi_session_to_shost(session); unsigned long flags; ISCSI_DBG_TRANS_SESSION(session, "Blocking session\n"); spin_lock_irqsave(&session->lock, flags); session->state = ISCSI_SESSION_FAILED; spin_unlock_irqrestore(&session->lock, flags); scsi_block_targets(shost, &session->dev); ISCSI_DBG_TRANS_SESSION(session, "Completed SCSI target blocking\n"); if (session->recovery_tmo >= 0) queue_delayed_work(session->workq, &session->recovery_work, session->recovery_tmo * HZ); } void iscsi_block_session(struct iscsi_cls_session *session) { queue_work(session->workq, &session->block_work); } EXPORT_SYMBOL_GPL(iscsi_block_session); static void __iscsi_unbind_session(struct work_struct *work) { struct iscsi_cls_session *session = container_of(work, struct iscsi_cls_session, unbind_work); struct Scsi_Host *shost = iscsi_session_to_shost(session); struct iscsi_cls_host *ihost = shost->shost_data; unsigned long flags; unsigned int target_id; bool remove_target = true; ISCSI_DBG_TRANS_SESSION(session, "Unbinding session\n"); /* Prevent new scans and make sure scanning is not in progress */ mutex_lock(&ihost->mutex); spin_lock_irqsave(&session->lock, flags); if (session->target_state == ISCSI_SESSION_TARGET_ALLOCATED) { remove_target = false; } else if (session->target_state != ISCSI_SESSION_TARGET_SCANNED) { spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); ISCSI_DBG_TRANS_SESSION(session, "Skipping target unbinding: Session is unbound/unbinding.\n"); return; } session->target_state = ISCSI_SESSION_TARGET_UNBINDING; target_id = session->target_id; session->target_id = ISCSI_MAX_TARGET; spin_unlock_irqrestore(&session->lock, flags); mutex_unlock(&ihost->mutex); if (remove_target) scsi_remove_target(&session->dev); if (session->ida_used) ida_free(&iscsi_sess_ida, target_id); iscsi_session_event(session, ISCSI_KEVENT_UNBIND_SESSION); ISCSI_DBG_TRANS_SESSION(session, "Completed target removal\n"); spin_lock_irqsave(&session->lock, flags); session->target_state = ISCSI_SESSION_TARGET_UNBOUND; spin_unlock_irqrestore(&session->lock, flags); } static void __iscsi_destroy_session(struct work_struct *work) { struct iscsi_cls_session *session = container_of(work, struct iscsi_cls_session, destroy_work); session->transport->destroy_session(session); } struct iscsi_cls_session * iscsi_alloc_session(struct Scsi_Host *shost, struct iscsi_transport *transport, int dd_size) { struct iscsi_cls_session *session; session = kzalloc(sizeof(*session) + dd_size, GFP_KERNEL); if (!session) return NULL; session->transport = transport; session->creator = -1; session->recovery_tmo = 120; session->recovery_tmo_sysfs_override = false; session->state = ISCSI_SESSION_FREE; INIT_DELAYED_WORK(&session->recovery_work, session_recovery_timedout); INIT_LIST_HEAD(&session->sess_list); INIT_WORK(&session->unblock_work, __iscsi_unblock_session); INIT_WORK(&session->block_work, __iscsi_block_session); INIT_WORK(&session->unbind_work, __iscsi_unbind_session); INIT_WORK(&session->scan_work, iscsi_scan_session); INIT_WORK(&session->destroy_work, __iscsi_destroy_session); spin_lock_init(&session->lock); /* this is released in the dev's release function */ scsi_host_get(shost); session->dev.parent = &shost->shost_gendev; session->dev.release = iscsi_session_release; device_initialize(&session->dev); if (dd_size) session->dd_data = &session[1]; ISCSI_DBG_TRANS_SESSION(session, "Completed session allocation\n"); return session; } EXPORT_SYMBOL_GPL(iscsi_alloc_session); int iscsi_add_session(struct iscsi_cls_session *session, unsigned int target_id) { struct Scsi_Host *shost = iscsi_session_to_shost(session); unsigned long flags; int id = 0; int err; session->sid = atomic_add_return(1, &iscsi_session_nr); session->workq = alloc_workqueue("iscsi_ctrl_%d:%d", WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND, 0, shost->host_no, session->sid); if (!session->workq) return -ENOMEM; if (target_id == ISCSI_MAX_TARGET) { id = ida_alloc(&iscsi_sess_ida, GFP_KERNEL); if (id < 0) { iscsi_cls_session_printk(KERN_ERR, session, "Failure in Target ID Allocation\n"); err = id; goto destroy_wq; } session->target_id = (unsigned int)id; session->ida_used = true; } else session->target_id = target_id; spin_lock_irqsave(&session->lock, flags); session->target_state = ISCSI_SESSION_TARGET_ALLOCATED; spin_unlock_irqrestore(&session->lock, flags); dev_set_name(&session->dev, "session%u", session->sid); err = device_add(&session->dev); if (err) { iscsi_cls_session_printk(KERN_ERR, session, "could not register session's dev\n"); goto release_ida; } err = transport_register_device(&session->dev); if (err) { iscsi_cls_session_printk(KERN_ERR, session, "could not register transport's dev\n"); goto release_dev; } spin_lock_irqsave(&sesslock, flags); list_add(&session->sess_list, &sesslist); spin_unlock_irqrestore(&sesslock, flags); iscsi_session_event(session, ISCSI_KEVENT_CREATE_SESSION); ISCSI_DBG_TRANS_SESSION(session, "Completed session adding\n"); return 0; release_dev: device_del(&session->dev); release_ida: if (session->ida_used) ida_free(&iscsi_sess_ida, session->target_id); destroy_wq: destroy_workqueue(session->workq); return err; } EXPORT_SYMBOL_GPL(iscsi_add_session); static void iscsi_conn_release(struct device *dev) { struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev); struct device *parent = conn->dev.parent; ISCSI_DBG_TRANS_CONN(conn, "Releasing conn\n"); kfree(conn); put_device(parent); } static int iscsi_is_conn_dev(const struct device *dev) { return dev->release == iscsi_conn_release; } static int iscsi_iter_destroy_conn_fn(struct device *dev, void *data) { if (!iscsi_is_conn_dev(dev)) return 0; iscsi_remove_conn(iscsi_dev_to_conn(dev)); iscsi_put_conn(iscsi_dev_to_conn(dev)); return 0; } void iscsi_remove_session(struct iscsi_cls_session *session) { unsigned long flags; int err; ISCSI_DBG_TRANS_SESSION(session, "Removing session\n"); spin_lock_irqsave(&sesslock, flags); if (!list_empty(&session->sess_list)) list_del(&session->sess_list); spin_unlock_irqrestore(&sesslock, flags); if (!cancel_work_sync(&session->block_work)) cancel_delayed_work_sync(&session->recovery_work); cancel_work_sync(&session->unblock_work); /* * If we are blocked let commands flow again. The lld or iscsi * layer should set up the queuecommand to fail commands. * We assume that LLD will not be calling block/unblock while * removing the session. */ spin_lock_irqsave(&session->lock, flags); session->state = ISCSI_SESSION_FREE; spin_unlock_irqrestore(&session->lock, flags); scsi_target_unblock(&session->dev, SDEV_TRANSPORT_OFFLINE); /* * qla4xxx can perform it's own scans when it runs in kernel only * mode. Make sure to flush those scans. */ flush_work(&session->scan_work); /* flush running unbind operations */ flush_work(&session->unbind_work); __iscsi_unbind_session(&session->unbind_work); /* hw iscsi may not have removed all connections from session */ err = device_for_each_child(&session->dev, NULL, iscsi_iter_destroy_conn_fn); if (err) iscsi_cls_session_printk(KERN_ERR, session, "Could not delete all connections " "for session. Error %d.\n", err); transport_unregister_device(&session->dev); destroy_workqueue(session->workq); ISCSI_DBG_TRANS_SESSION(session, "Completing session removal\n"); device_del(&session->dev); } EXPORT_SYMBOL_GPL(iscsi_remove_session); static void iscsi_stop_conn(struct iscsi_cls_conn *conn, int flag) { ISCSI_DBG_TRANS_CONN(conn, "Stopping conn.\n"); switch (flag) { case STOP_CONN_RECOVER: WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); break; case STOP_CONN_TERM: WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); break; default: iscsi_cls_conn_printk(KERN_ERR, conn, "invalid stop flag %d\n", flag); return; } conn->transport->stop_conn(conn, flag); ISCSI_DBG_TRANS_CONN(conn, "Stopping conn done.\n"); } static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) { struct iscsi_cls_session *session = iscsi_conn_to_session(conn); struct iscsi_endpoint *ep; ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); WRITE_ONCE(conn->state, ISCSI_CONN_FAILED); if (!conn->ep || !session->transport->ep_disconnect) return; ep = conn->ep; conn->ep = NULL; session->transport->unbind_conn(conn, is_active); session->transport->ep_disconnect(ep); ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); } static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn, struct iscsi_endpoint *ep, bool is_active) { /* Check if this was a conn error and the kernel took ownership */ spin_lock_irq(&conn->lock); if (!test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { spin_unlock_irq(&conn->lock); iscsi_ep_disconnect(conn, is_active); } else { spin_unlock_irq(&conn->lock); ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); mutex_unlock(&conn->ep_mutex); flush_work(&conn->cleanup_work); /* * Userspace is now done with the EP so we can release the ref * iscsi_cleanup_conn_work_fn took. */ iscsi_put_endpoint(ep); mutex_lock(&conn->ep_mutex); } } static int iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag) { ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop.\n"); /* * For offload, iscsid may not know about the ep like when iscsid is * restarted or for kernel based session shutdown iscsid is not even * up. For these cases, we do the disconnect now. */ mutex_lock(&conn->ep_mutex); if (conn->ep) iscsi_if_disconnect_bound_ep(conn, conn->ep, true); mutex_unlock(&conn->ep_mutex); /* * If this is a termination we have to call stop_conn with that flag * so the correct states get set. If we haven't run the work yet try to * avoid the extra run. */ if (flag == STOP_CONN_TERM) { cancel_work_sync(&conn->cleanup_work); iscsi_stop_conn(conn, flag); } else { /* * Figure out if it was the kernel or userspace initiating this. */ spin_lock_irq(&conn->lock); if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { spin_unlock_irq(&conn->lock); iscsi_stop_conn(conn, flag); } else { spin_unlock_irq(&conn->lock); ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); flush_work(&conn->cleanup_work); } /* * Only clear for recovery to avoid extra cleanup runs during * termination. */ spin_lock_irq(&conn->lock); clear_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags); spin_unlock_irq(&conn->lock); } ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop done.\n"); return 0; } static void iscsi_cleanup_conn_work_fn(struct work_struct *work) { struct iscsi_cls_conn *conn = container_of(work, struct iscsi_cls_conn, cleanup_work); struct iscsi_cls_session *session = iscsi_conn_to_session(conn); mutex_lock(&conn->ep_mutex); /* * Get a ref to the ep, so we don't release its ID until after * userspace is done referencing it in iscsi_if_disconnect_bound_ep. */ if (conn->ep) get_device(&conn->ep->dev); iscsi_ep_disconnect(conn, false); if (system_state != SYSTEM_RUNNING) { /* * If the user has set up for the session to never timeout * then hang like they wanted. For all other cases fail right * away since userspace is not going to relogin. */ if (session->recovery_tmo > 0) session->recovery_tmo = 0; } iscsi_stop_conn(conn, STOP_CONN_RECOVER); mutex_unlock(&conn->ep_mutex); ISCSI_DBG_TRANS_CONN(conn, "cleanup done.\n"); } static int iscsi_iter_force_destroy_conn_fn(struct device *dev, void *data) { struct iscsi_transport *transport; struct iscsi_cls_conn *conn; if (!iscsi_is_conn_dev(dev)) return 0; conn = iscsi_dev_to_conn(dev); transport = conn->transport; if (READ_ONCE(conn->state) != ISCSI_CONN_DOWN) iscsi_if_stop_conn(conn, STOP_CONN_TERM); transport->destroy_conn(conn); return 0; } /** * iscsi_force_destroy_session - destroy a session from the kernel * @session: session to destroy * * Force the destruction of a session from the kernel. This should only be * used when userspace is no longer running during system shutdown. */ void iscsi_force_destroy_session(struct iscsi_cls_session *session) { struct iscsi_transport *transport = session->transport; unsigned long flags; WARN_ON_ONCE(system_state == SYSTEM_RUNNING); spin_lock_irqsave(&sesslock, flags); if (list_empty(&session->sess_list)) { spin_unlock_irqrestore(&sesslock, flags); /* * Conn/ep is already freed. Session is being torn down via * async path. For shutdown we don't care about it so return. */ return; } spin_unlock_irqrestore(&sesslock, flags); device_for_each_child(&session->dev, NULL, iscsi_iter_force_destroy_conn_fn); transport->destroy_session(session); } EXPORT_SYMBOL_GPL(iscsi_force_destroy_session); void iscsi_free_session(struct iscsi_cls_session *session) { ISCSI_DBG_TRANS_SESSION(session, "Freeing session\n"); iscsi_session_event(session, ISCSI_KEVENT_DESTROY_SESSION); put_device(&session->dev); } EXPORT_SYMBOL_GPL(iscsi_free_session); /** * iscsi_alloc_conn - alloc iscsi class connection * @session: iscsi cls session * @dd_size: private driver data size * @cid: connection id */ struct iscsi_cls_conn * iscsi_alloc_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) { struct iscsi_transport *transport = session->transport; struct iscsi_cls_conn *conn; conn = kzalloc(sizeof(*conn) + dd_size, GFP_KERNEL); if (!conn) return NULL; if (dd_size) conn->dd_data = &conn[1]; mutex_init(&conn->ep_mutex); spin_lock_init(&conn->lock); INIT_LIST_HEAD(&conn->conn_list); INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn); conn->transport = transport; conn->cid = cid; WRITE_ONCE(conn->state, ISCSI_CONN_DOWN); /* this is released in the dev's release function */ if (!get_device(&session->dev)) goto free_conn; dev_set_name(&conn->dev, "connection%d:%u", session->sid, cid); device_initialize(&conn->dev); conn->dev.parent = &session->dev; conn->dev.release = iscsi_conn_release; return conn; free_conn: kfree(conn); return NULL; } EXPORT_SYMBOL_GPL(iscsi_alloc_conn); /** * iscsi_add_conn - add iscsi class connection * @conn: iscsi cls connection * * This will expose iscsi_cls_conn to sysfs so make sure the related * resources for sysfs attributes are initialized before calling this. */ int iscsi_add_conn(struct iscsi_cls_conn *conn) { int err; unsigned long flags; struct iscsi_cls_session *session = iscsi_dev_to_session(conn->dev.parent); err = device_add(&conn->dev); if (err) { iscsi_cls_session_printk(KERN_ERR, session, "could not register connection's dev\n"); return err; } err = transport_register_device(&conn->dev); if (err) { iscsi_cls_session_printk(KERN_ERR, session, "could not register transport's dev\n"); device_del(&conn->dev); return err; } spin_lock_irqsave(&connlock, flags); list_add(&conn->conn_list, &connlist); spin_unlock_irqrestore(&connlock, flags); return 0; } EXPORT_SYMBOL_GPL(iscsi_add_conn); /** * iscsi_remove_conn - remove iscsi class connection from sysfs * @conn: iscsi cls connection * * Remove iscsi_cls_conn from sysfs, and wait for previous * read/write of iscsi_cls_conn's attributes in sysfs to finish. */ void iscsi_remove_conn(struct iscsi_cls_conn *conn) { unsigned long flags; spin_lock_irqsave(&connlock, flags); list_del(&conn->conn_list); spin_unlock_irqrestore(&connlock, flags); transport_unregister_device(&conn->dev); device_del(&conn->dev); } EXPORT_SYMBOL_GPL(iscsi_remove_conn); void iscsi_put_conn(struct iscsi_cls_conn *conn) { put_device(&conn->dev); } EXPORT_SYMBOL_GPL(iscsi_put_conn); void iscsi_get_conn(struct iscsi_cls_conn *conn) { get_device(&conn->dev); } EXPORT_SYMBOL_GPL(iscsi_get_conn); /* * iscsi interface functions */ static struct iscsi_internal * iscsi_if_transport_lookup(struct iscsi_transport *tt) { struct iscsi_internal *priv; unsigned long flags; spin_lock_irqsave(&iscsi_transport_lock, flags); list_for_each_entry(priv, &iscsi_transports, list) { if (tt == priv->iscsi_transport) { spin_unlock_irqrestore(&iscsi_transport_lock, flags); return priv; } } spin_unlock_irqrestore(&iscsi_transport_lock, flags); return NULL; } static int iscsi_multicast_skb(struct sk_buff *skb, uint32_t group, gfp_t gfp) { return nlmsg_multicast(nls, skb, 0, group, gfp); } static int iscsi_unicast_skb(struct sk_buff *skb, u32 portid) { return nlmsg_unicast(nls, skb, portid); } int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr, char *data, uint32_t data_size) { struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; char *pdu; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev) + sizeof(struct iscsi_hdr) + data_size); priv = iscsi_if_transport_lookup(conn->transport); if (!priv) return -EINVAL; skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { iscsi_conn_error_event(conn, ISCSI_ERR_CONN_FAILED); iscsi_cls_conn_printk(KERN_ERR, conn, "can not deliver " "control PDU: OOM\n"); return -ENOMEM; } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = nlmsg_data(nlh); memset(ev, 0, sizeof(*ev)); ev->transport_handle = iscsi_handle(conn->transport); ev->type = ISCSI_KEVENT_RECV_PDU; ev->r.recv_req.cid = conn->cid; ev->r.recv_req.sid = iscsi_conn_get_sid(conn); pdu = (char*)ev + sizeof(*ev); memcpy(pdu, hdr, sizeof(struct iscsi_hdr)); memcpy(pdu + sizeof(struct iscsi_hdr), data, data_size); return iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC); } EXPORT_SYMBOL_GPL(iscsi_recv_pdu); int iscsi_offload_mesg(struct Scsi_Host *shost, struct iscsi_transport *transport, uint32_t type, char *data, uint16_t data_size) { struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; int len = nlmsg_total_size(sizeof(*ev) + data_size); skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { printk(KERN_ERR "can not deliver iscsi offload message:OOM\n"); return -ENOMEM; } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = nlmsg_data(nlh); memset(ev, 0, sizeof(*ev)); ev->type = type; ev->transport_handle = iscsi_handle(transport); switch (type) { case ISCSI_KEVENT_PATH_REQ: ev->r.req_path.host_no = shost->host_no; break; case ISCSI_KEVENT_IF_DOWN: ev->r.notify_if_down.host_no = shost->host_no; break; } memcpy((char *)ev + sizeof(*ev), data, data_size); return iscsi_multicast_skb(skb, ISCSI_NL_GRP_UIP, GFP_ATOMIC); } EXPORT_SYMBOL_GPL(iscsi_offload_mesg); void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) { struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); unsigned long flags; int state; spin_lock_irqsave(&conn->lock, flags); /* * Userspace will only do a stop call if we are at least bound. And, we * only need to do the in kernel cleanup if in the UP state so cmds can * be released to upper layers. If in other states just wait for * userspace to avoid races that can leave the cleanup_work queued. */ state = READ_ONCE(conn->state); switch (state) { case ISCSI_CONN_BOUND: case ISCSI_CONN_UP: if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work); } break; default: ISCSI_DBG_TRANS_CONN(conn, "Got conn error in state %d\n", state); break; } spin_unlock_irqrestore(&conn->lock, flags); priv = iscsi_if_transport_lookup(conn->transport); if (!priv) return; skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { iscsi_cls_conn_printk(KERN_ERR, conn, "gracefully ignored " "conn error (%d)\n", error); return; } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(conn->transport); ev->type = ISCSI_KEVENT_CONN_ERROR; ev->r.connerror.error = error; ev->r.connerror.cid = conn->cid; ev->r.connerror.sid = iscsi_conn_get_sid(conn); iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC); iscsi_cls_conn_printk(KERN_INFO, conn, "detected conn error (%d)\n", error); } EXPORT_SYMBOL_GPL(iscsi_conn_error_event); void iscsi_conn_login_event(struct iscsi_cls_conn *conn, enum iscsi_conn_state state) { struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev)); priv = iscsi_if_transport_lookup(conn->transport); if (!priv) return; skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { iscsi_cls_conn_printk(KERN_ERR, conn, "gracefully ignored " "conn login (%d)\n", state); return; } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(conn->transport); ev->type = ISCSI_KEVENT_CONN_LOGIN_STATE; ev->r.conn_login.state = state; ev->r.conn_login.cid = conn->cid; ev->r.conn_login.sid = iscsi_conn_get_sid(conn); iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC); iscsi_cls_conn_printk(KERN_INFO, conn, "detected conn login (%d)\n", state); } EXPORT_SYMBOL_GPL(iscsi_conn_login_event); void iscsi_post_host_event(uint32_t host_no, struct iscsi_transport *transport, enum iscsi_host_event_code code, uint32_t data_size, uint8_t *data) { struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; int len = nlmsg_total_size(sizeof(*ev) + data_size); skb = alloc_skb(len, GFP_NOIO); if (!skb) { printk(KERN_ERR "gracefully ignored host event (%d):%d OOM\n", host_no, code); return; } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(transport); ev->type = ISCSI_KEVENT_HOST_EVENT; ev->r.host_event.host_no = host_no; ev->r.host_event.code = code; ev->r.host_event.data_size = data_size; if (data_size) memcpy((char *)ev + sizeof(*ev), data, data_size); iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_NOIO); } EXPORT_SYMBOL_GPL(iscsi_post_host_event); void iscsi_ping_comp_event(uint32_t host_no, struct iscsi_transport *transport, uint32_t status, uint32_t pid, uint32_t data_size, uint8_t *data) { struct nlmsghdr *nlh; struct sk_buff *skb; struct iscsi_uevent *ev; int len = nlmsg_total_size(sizeof(*ev) + data_size); skb = alloc_skb(len, GFP_NOIO); if (!skb) { printk(KERN_ERR "gracefully ignored ping comp: OOM\n"); return; } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(transport); ev->type = ISCSI_KEVENT_PING_COMP; ev->r.ping_comp.host_no = host_no; ev->r.ping_comp.status = status; ev->r.ping_comp.pid = pid; ev->r.ping_comp.data_size = data_size; memcpy((char *)ev + sizeof(*ev), data, data_size); iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_NOIO); } EXPORT_SYMBOL_GPL(iscsi_ping_comp_event); static int iscsi_if_send_reply(u32 portid, int type, void *payload, int size) { struct sk_buff *skb; struct nlmsghdr *nlh; int len = nlmsg_total_size(size); skb = alloc_skb(len, GFP_ATOMIC); if (!skb) { printk(KERN_ERR "Could not allocate skb to send reply.\n"); return -ENOMEM; } nlh = __nlmsg_put(skb, 0, 0, type, (len - sizeof(*nlh)), 0); memcpy(nlmsg_data(nlh), payload, size); return iscsi_unicast_skb(skb, portid); } static int iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh) { struct iscsi_uevent *ev = nlmsg_data(nlh); struct iscsi_stats *stats; struct sk_buff *skbstat; struct iscsi_cls_conn *conn; struct nlmsghdr *nlhstat; struct iscsi_uevent *evstat; struct iscsi_internal *priv; int len = nlmsg_total_size(sizeof(*ev) + sizeof(struct iscsi_stats) + sizeof(struct iscsi_stats_custom) * ISCSI_STATS_CUSTOM_MAX); int err = 0; priv = iscsi_if_transport_lookup(transport); if (!priv) return -EINVAL; conn = iscsi_conn_lookup(ev->u.get_stats.sid, ev->u.get_stats.cid); if (!conn) return -EEXIST; do { int actual_size; skbstat = alloc_skb(len, GFP_ATOMIC); if (!skbstat) { iscsi_cls_conn_printk(KERN_ERR, conn, "can not " "deliver stats: OOM\n"); return -ENOMEM; } nlhstat = __nlmsg_put(skbstat, 0, 0, 0, (len - sizeof(*nlhstat)), 0); evstat = nlmsg_data(nlhstat); memset(evstat, 0, sizeof(*evstat)); evstat->transport_handle = iscsi_handle(conn->transport); evstat->type = nlh->nlmsg_type; evstat->u.get_stats.cid = ev->u.get_stats.cid; evstat->u.get_stats.sid = ev->u.get_stats.sid; stats = (struct iscsi_stats *) ((char*)evstat + sizeof(*evstat)); memset(stats, 0, sizeof(*stats)); transport->get_stats(conn, stats); actual_size = nlmsg_total_size(sizeof(struct iscsi_uevent) + sizeof(struct iscsi_stats) + sizeof(struct iscsi_stats_custom) * stats->custom_length); actual_size -= sizeof(*nlhstat); actual_size = nlmsg_msg_size(actual_size); skb_trim(skbstat, NLMSG_ALIGN(actual_size)); nlhstat->nlmsg_len = actual_size; err = iscsi_multicast_skb(skbstat, ISCSI_NL_GRP_ISCSID, GFP_ATOMIC); } while (err < 0 && err != -ECONNREFUSED); return err; } /** * iscsi_session_event - send session destr. completion event * @session: iscsi class session * @event: type of event */ int iscsi_session_event(struct iscsi_cls_session *session, enum iscsi_uevent_e event) { struct iscsi_internal *priv; struct Scsi_Host *shost; struct iscsi_uevent *ev; struct sk_buff *skb; struct nlmsghdr *nlh; int rc, len = nlmsg_total_size(sizeof(*ev)); priv = iscsi_if_transport_lookup(session->transport); if (!priv) return -EINVAL; shost = iscsi_session_to_shost(session); skb = alloc_skb(len, GFP_KERNEL); if (!skb) { iscsi_cls_session_printk(KERN_ERR, session, "Cannot notify userspace of session " "event %u\n", event); return -ENOMEM; } nlh = __nlmsg_put(skb, 0, 0, 0, (len - sizeof(*nlh)), 0); ev = nlmsg_data(nlh); ev->transport_handle = iscsi_handle(session->transport); ev->type = event; switch (event) { case ISCSI_KEVENT_DESTROY_SESSION: ev->r.d_session.host_no = shost->host_no; ev->r.d_session.sid = session->sid; break; case ISCSI_KEVENT_CREATE_SESSION: ev->r.c_session_ret.host_no = shost->host_no; ev->r.c_session_ret.sid = session->sid; break; case ISCSI_KEVENT_UNBIND_SESSION: ev->r.unbind_session.host_no = shost->host_no; ev->r.unbind_session.sid = session->sid; break; default: iscsi_cls_session_printk(KERN_ERR, session, "Invalid event " "%u.\n", event); kfree_skb(skb); return -EINVAL; } /* * this will occur if the daemon is not up, so we just warn * the user and when the daemon is restarted it will handle it */ rc = iscsi_multicast_skb(skb, ISCSI_NL_GRP_ISCSID, GFP_KERNEL); if (rc == -ESRCH) iscsi_cls_session_printk(KERN_ERR, session, "Cannot notify userspace of session " "event %u. Check iscsi daemon\n", event); ISCSI_DBG_TRANS_SESSION(session, "Completed handling event %d rc %d\n", event, rc); return rc; } EXPORT_SYMBOL_GPL(iscsi_session_event); static int iscsi_if_create_session(struct iscsi_internal *priv, struct iscsi_endpoint *ep, struct iscsi_uevent *ev, pid_t pid, uint32_t initial_cmdsn, uint16_t cmds_max, uint16_t queue_depth) { struct iscsi_transport *transport = priv->iscsi_transport; struct iscsi_cls_session *session; struct Scsi_Host *shost; session = transport->create_session(ep, cmds_max, queue_depth, initial_cmdsn); if (!session) return -ENOMEM; session->creator = pid; shost = iscsi_session_to_shost(session); ev->r.c_session_ret.host_no = shost->host_no; ev->r.c_session_ret.sid = session->sid; ISCSI_DBG_TRANS_SESSION(session, "Completed creating transport session\n"); return 0; } static int iscsi_if_create_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct iscsi_cls_conn *conn; struct iscsi_cls_session *session; session = iscsi_session_lookup(ev->u.c_conn.sid); if (!session) { printk(KERN_ERR "iscsi: invalid session %d.\n", ev->u.c_conn.sid); return -EINVAL; } conn = transport->create_conn(session, ev->u.c_conn.cid); if (!conn) { iscsi_cls_session_printk(KERN_ERR, session, "couldn't create a new connection."); return -ENOMEM; } ev->r.c_conn_ret.sid = session->sid; ev->r.c_conn_ret.cid = conn->cid; ISCSI_DBG_TRANS_CONN(conn, "Completed creating transport conn\n"); return 0; } static int iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct iscsi_cls_conn *conn; conn = iscsi_conn_lookup(ev->u.d_conn.sid, ev->u.d_conn.cid); if (!conn) return -EINVAL; ISCSI_DBG_TRANS_CONN(conn, "Flushing cleanup during destruction\n"); flush_work(&conn->cleanup_work); ISCSI_DBG_TRANS_CONN(conn, "Destroying transport conn\n"); if (transport->destroy_conn) transport->destroy_conn(conn); return 0; } static int iscsi_if_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev, u32 rlen) { char *data = (char*)ev + sizeof(*ev); struct iscsi_cls_conn *conn; struct iscsi_cls_session *session; int err = 0, value = 0, state; if (ev->u.set_param.len > rlen || ev->u.set_param.len > PAGE_SIZE) return -EINVAL; session = iscsi_session_lookup(ev->u.set_param.sid); conn = iscsi_conn_lookup(ev->u.set_param.sid, ev->u.set_param.cid); if (!conn || !session) return -EINVAL; /* data will be regarded as NULL-ended string, do length check */ if (strlen(data) > ev->u.set_param.len) return -EINVAL; switch (ev->u.set_param.param) { case ISCSI_PARAM_SESS_RECOVERY_TMO: sscanf(data, "%d", &value); if (!session->recovery_tmo_sysfs_override) session->recovery_tmo = value; break; default: state = READ_ONCE(conn->state); if (state == ISCSI_CONN_BOUND || state == ISCSI_CONN_UP) { err = transport->set_param(conn, ev->u.set_param.param, data, ev->u.set_param.len); } else { return -ENOTCONN; } } return err; } static int iscsi_if_ep_connect(struct iscsi_transport *transport, struct iscsi_uevent *ev, int msg_type) { struct iscsi_endpoint *ep; struct sockaddr *dst_addr; struct Scsi_Host *shost = NULL; int non_blocking, err = 0; if (!transport->ep_connect) return -EINVAL; if (msg_type == ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST) { shost = scsi_host_lookup(ev->u.ep_connect_through_host.host_no); if (!shost) { printk(KERN_ERR "ep connect failed. Could not find " "host no %u\n", ev->u.ep_connect_through_host.host_no); return -ENODEV; } non_blocking = ev->u.ep_connect_through_host.non_blocking; } else non_blocking = ev->u.ep_connect.non_blocking; dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev)); ep = transport->ep_connect(shost, dst_addr, non_blocking); if (IS_ERR(ep)) { err = PTR_ERR(ep); goto release_host; } ev->r.ep_connect_ret.handle = ep->id; release_host: if (shost) scsi_host_put(shost); return err; } static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, u64 ep_handle) { struct iscsi_cls_conn *conn; struct iscsi_endpoint *ep; if (!transport->ep_disconnect) return -EINVAL; ep = iscsi_lookup_endpoint(ep_handle); if (!ep) return -EINVAL; conn = ep->conn; if (!conn) { /* * conn was not even bound yet, so we can't get iscsi conn * failures yet. */ transport->ep_disconnect(ep); goto put_ep; } mutex_lock(&conn->ep_mutex); iscsi_if_disconnect_bound_ep(conn, ep, false); mutex_unlock(&conn->ep_mutex); put_ep: iscsi_put_endpoint(ep); return 0; } static int iscsi_if_transport_ep(struct iscsi_transport *transport, struct iscsi_uevent *ev, int msg_type, u32 rlen) { struct iscsi_endpoint *ep; int rc = 0; switch (msg_type) { case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST: case ISCSI_UEVENT_TRANSPORT_EP_CONNECT: if (rlen < sizeof(struct sockaddr)) rc = -EINVAL; else rc = iscsi_if_ep_connect(transport, ev, msg_type); break; case ISCSI_UEVENT_TRANSPORT_EP_POLL: if (!transport->ep_poll) return -EINVAL; ep = iscsi_lookup_endpoint(ev->u.ep_poll.ep_handle); if (!ep) return -EINVAL; ev->r.retcode = transport->ep_poll(ep, ev->u.ep_poll.timeout_ms); iscsi_put_endpoint(ep); break; case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT: rc = iscsi_if_ep_disconnect(transport, ev->u.ep_disconnect.ep_handle); break; } return rc; } static int iscsi_tgt_dscvr(struct iscsi_transport *transport, struct iscsi_uevent *ev, u32 rlen) { struct Scsi_Host *shost; struct sockaddr *dst_addr; int err; if (rlen < sizeof(*dst_addr)) return -EINVAL; if (!transport->tgt_dscvr) return -EINVAL; shost = scsi_host_lookup(ev->u.tgt_dscvr.host_no); if (!shost) { printk(KERN_ERR "target discovery could not find host no %u\n", ev->u.tgt_dscvr.host_no); return -ENODEV; } dst_addr = (struct sockaddr *)((char*)ev + sizeof(*ev)); err = transport->tgt_dscvr(shost, ev->u.tgt_dscvr.type, ev->u.tgt_dscvr.enable, dst_addr); scsi_host_put(shost); return err; } static int iscsi_set_host_param(struct iscsi_transport *transport, struct iscsi_uevent *ev, u32 rlen) { char *data = (char*)ev + sizeof(*ev); struct Scsi_Host *shost; int err; if (!transport->set_host_param) return -ENOSYS; if (ev->u.set_host_param.len > rlen || ev->u.set_host_param.len > PAGE_SIZE) return -EINVAL; shost = scsi_host_lookup(ev->u.set_host_param.host_no); if (!shost) { printk(KERN_ERR "set_host_param could not find host no %u\n", ev->u.set_host_param.host_no); return -ENODEV; } /* see similar check in iscsi_if_set_param() */ if (strlen(data) > ev->u.set_host_param.len) { err = -EINVAL; goto out; } err = transport->set_host_param(shost, ev->u.set_host_param.param, data, ev->u.set_host_param.len); out: scsi_host_put(shost); return err; } static int iscsi_set_path(struct iscsi_transport *transport, struct iscsi_uevent *ev, u32 rlen) { struct Scsi_Host *shost; struct iscsi_path *params; int err; if (rlen < sizeof(*params)) return -EINVAL; if (!transport->set_path) return -ENOSYS; shost = scsi_host_lookup(ev->u.set_path.host_no); if (!shost) { printk(KERN_ERR "set path could not find host no %u\n", ev->u.set_path.host_no); return -ENODEV; } params = (struct iscsi_path *)((char *)ev + sizeof(*ev)); err = transport->set_path(shost, params); scsi_host_put(shost); return err; } static int iscsi_session_has_conns(int sid) { struct iscsi_cls_conn *conn; unsigned long flags; int found = 0; spin_lock_irqsave(&connlock, flags); list_for_each_entry(conn, &connlist, conn_list) { if (iscsi_conn_get_sid(conn) == sid) { found = 1; break; } } spin_unlock_irqrestore(&connlock, flags); return found; } static int iscsi_set_iface_params(struct iscsi_transport *transport, struct iscsi_uevent *ev, uint32_t len) { char *data = (char *)ev + sizeof(*ev); struct Scsi_Host *shost; int err; if (!transport->set_iface_param) return -ENOSYS; shost = scsi_host_lookup(ev->u.set_iface_params.host_no); if (!shost) { printk(KERN_ERR "set_iface_params could not find host no %u\n", ev->u.set_iface_params.host_no); return -ENODEV; } err = transport->set_iface_param(shost, data, len); scsi_host_put(shost); return err; } static int iscsi_send_ping(struct iscsi_transport *transport, struct iscsi_uevent *ev, u32 rlen) { struct Scsi_Host *shost; struct sockaddr *dst_addr; int err; if (rlen < sizeof(*dst_addr)) return -EINVAL; if (!transport->send_ping) return -ENOSYS; shost = scsi_host_lookup(ev->u.iscsi_ping.host_no); if (!shost) { printk(KERN_ERR "iscsi_ping could not find host no %u\n", ev->u.iscsi_ping.host_no); return -ENODEV; } dst_addr = (struct sockaddr *)((char *)ev + sizeof(*ev)); err = transport->send_ping(shost, ev->u.iscsi_ping.iface_num, ev->u.iscsi_ping.iface_type, ev->u.iscsi_ping.payload_size, ev->u.iscsi_ping.pid, dst_addr); scsi_host_put(shost); return err; } static int iscsi_get_chap(struct iscsi_transport *transport, struct nlmsghdr *nlh) { struct iscsi_uevent *ev = nlmsg_data(nlh); struct Scsi_Host *shost = NULL; struct iscsi_chap_rec *chap_rec; struct iscsi_internal *priv; struct sk_buff *skbchap; struct nlmsghdr *nlhchap; struct iscsi_uevent *evchap; uint32_t chap_buf_size; int len, err = 0; char *buf; if (!transport->get_chap) return -EINVAL; priv = iscsi_if_transport_lookup(transport); if (!priv) return -EINVAL; chap_buf_size = (ev->u.get_chap.num_entries * sizeof(*chap_rec)); len = nlmsg_total_size(sizeof(*ev) + chap_buf_size); shost = scsi_host_lookup(ev->u.get_chap.host_no); if (!shost) { printk(KERN_ERR "%s: failed. Could not find host no %u\n", __func__, ev->u.get_chap.host_no); return -ENODEV; } do { int actual_size; skbchap = alloc_skb(len, GFP_KERNEL); if (!skbchap) { printk(KERN_ERR "can not deliver chap: OOM\n"); err = -ENOMEM; goto exit_get_chap; } nlhchap = __nlmsg_put(skbchap, 0, 0, 0, (len - sizeof(*nlhchap)), 0); evchap = nlmsg_data(nlhchap); memset(evchap, 0, sizeof(*evchap)); evchap->transport_handle = iscsi_handle(transport); evchap->type = nlh->nlmsg_type; evchap->u.get_chap.host_no = ev->u.get_chap.host_no; evchap->u.get_chap.chap_tbl_idx = ev->u.get_chap.chap_tbl_idx; evchap->u.get_chap.num_entries = ev->u.get_chap.num_entries; buf = (char *)evchap + sizeof(*evchap); memset(buf, 0, chap_buf_size); err = transport->get_chap(shost, ev->u.get_chap.chap_tbl_idx, &evchap->u.get_chap.num_entries, buf); actual_size = nlmsg_total_size(sizeof(*ev) + chap_buf_size); skb_trim(skbchap, NLMSG_ALIGN(actual_size)); nlhchap->nlmsg_len = actual_size; err = iscsi_multicast_skb(skbchap, ISCSI_NL_GRP_ISCSID, GFP_KERNEL); } while (err < 0 && err != -ECONNREFUSED); exit_get_chap: scsi_host_put(shost); return err; } static int iscsi_set_chap(struct iscsi_transport *transport, struct iscsi_uevent *ev, uint32_t len) { char *data = (char *)ev + sizeof(*ev); struct Scsi_Host *shost; int err = 0; if (!transport->set_chap) return -ENOSYS; shost = scsi_host_lookup(ev->u.set_path.host_no); if (!shost) { pr_err("%s could not find host no %u\n", __func__, ev->u.set_path.host_no); return -ENODEV; } err = transport->set_chap(shost, data, len); scsi_host_put(shost); return err; } static int iscsi_delete_chap(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct Scsi_Host *shost; int err = 0; if (!transport->delete_chap) return -ENOSYS; shost = scsi_host_lookup(ev->u.delete_chap.host_no); if (!shost) { printk(KERN_ERR "%s could not find host no %u\n", __func__, ev->u.delete_chap.host_no); return -ENODEV; } err = transport->delete_chap(shost, ev->u.delete_chap.chap_tbl_idx); scsi_host_put(shost); return err; } static const struct { enum iscsi_discovery_parent_type value; char *name; } iscsi_discovery_parent_names[] = { {ISCSI_DISC_PARENT_UNKNOWN, "Unknown" }, {ISCSI_DISC_PARENT_SENDTGT, "Sendtarget" }, {ISCSI_DISC_PARENT_ISNS, "isns" }, }; char *iscsi_get_discovery_parent_name(int parent_type) { int i; char *state = "Unknown!"; for (i = 0; i < ARRAY_SIZE(iscsi_discovery_parent_names); i++) { if (iscsi_discovery_parent_names[i].value & parent_type) { state = iscsi_discovery_parent_names[i].name; break; } } return state; } EXPORT_SYMBOL_GPL(iscsi_get_discovery_parent_name); static int iscsi_set_flashnode_param(struct iscsi_transport *transport, struct iscsi_uevent *ev, uint32_t len) { char *data = (char *)ev + sizeof(*ev); struct Scsi_Host *shost; struct iscsi_bus_flash_session *fnode_sess; struct iscsi_bus_flash_conn *fnode_conn; struct device *dev; uint32_t idx; int err = 0; if (!transport->set_flashnode_param) { err = -ENOSYS; goto exit_set_fnode; } shost = scsi_host_lookup(ev->u.set_flashnode.host_no); if (!shost) { pr_err("%s could not find host no %u\n", __func__, ev->u.set_flashnode.host_no); err = -ENODEV; goto exit_set_fnode; } idx = ev->u.set_flashnode.flashnode_idx; fnode_sess = iscsi_get_flashnode_by_index(shost, idx); if (!fnode_sess) { pr_err("%s could not find flashnode %u for host no %u\n", __func__, idx, ev->u.set_flashnode.host_no); err = -ENODEV; goto put_host; } dev = iscsi_find_flashnode_conn(fnode_sess); if (!dev) { err = -ENODEV; goto put_sess; } fnode_conn = iscsi_dev_to_flash_conn(dev); err = transport->set_flashnode_param(fnode_sess, fnode_conn, data, len); put_device(dev); put_sess: put_device(&fnode_sess->dev); put_host: scsi_host_put(shost); exit_set_fnode: return err; } static int iscsi_new_flashnode(struct iscsi_transport *transport, struct iscsi_uevent *ev, uint32_t len) { char *data = (char *)ev + sizeof(*ev); struct Scsi_Host *shost; int index; int err = 0; if (!transport->new_flashnode) { err = -ENOSYS; goto exit_new_fnode; } shost = scsi_host_lookup(ev->u.new_flashnode.host_no); if (!shost) { pr_err("%s could not find host no %u\n", __func__, ev->u.new_flashnode.host_no); err = -ENODEV; goto exit_new_fnode; } index = transport->new_flashnode(shost, data, len); if (index >= 0) ev->r.new_flashnode_ret.flashnode_idx = index; else err = -EIO; scsi_host_put(shost); exit_new_fnode: return err; } static int iscsi_del_flashnode(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct Scsi_Host *shost; struct iscsi_bus_flash_session *fnode_sess; uint32_t idx; int err = 0; if (!transport->del_flashnode) { err = -ENOSYS; goto exit_del_fnode; } shost = scsi_host_lookup(ev->u.del_flashnode.host_no); if (!shost) { pr_err("%s could not find host no %u\n", __func__, ev->u.del_flashnode.host_no); err = -ENODEV; goto exit_del_fnode; } idx = ev->u.del_flashnode.flashnode_idx; fnode_sess = iscsi_get_flashnode_by_index(shost, idx); if (!fnode_sess) { pr_err("%s could not find flashnode %u for host no %u\n", __func__, idx, ev->u.del_flashnode.host_no); err = -ENODEV; goto put_host; } err = transport->del_flashnode(fnode_sess); put_device(&fnode_sess->dev); put_host: scsi_host_put(shost); exit_del_fnode: return err; } static int iscsi_login_flashnode(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct Scsi_Host *shost; struct iscsi_bus_flash_session *fnode_sess; struct iscsi_bus_flash_conn *fnode_conn; struct device *dev; uint32_t idx; int err = 0; if (!transport->login_flashnode) { err = -ENOSYS; goto exit_login_fnode; } shost = scsi_host_lookup(ev->u.login_flashnode.host_no); if (!shost) { pr_err("%s could not find host no %u\n", __func__, ev->u.login_flashnode.host_no); err = -ENODEV; goto exit_login_fnode; } idx = ev->u.login_flashnode.flashnode_idx; fnode_sess = iscsi_get_flashnode_by_index(shost, idx); if (!fnode_sess) { pr_err("%s could not find flashnode %u for host no %u\n", __func__, idx, ev->u.login_flashnode.host_no); err = -ENODEV; goto put_host; } dev = iscsi_find_flashnode_conn(fnode_sess); if (!dev) { err = -ENODEV; goto put_sess; } fnode_conn = iscsi_dev_to_flash_conn(dev); err = transport->login_flashnode(fnode_sess, fnode_conn); put_device(dev); put_sess: put_device(&fnode_sess->dev); put_host: scsi_host_put(shost); exit_login_fnode: return err; } static int iscsi_logout_flashnode(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct Scsi_Host *shost; struct iscsi_bus_flash_session *fnode_sess; struct iscsi_bus_flash_conn *fnode_conn; struct device *dev; uint32_t idx; int err = 0; if (!transport->logout_flashnode) { err = -ENOSYS; goto exit_logout_fnode; } shost = scsi_host_lookup(ev->u.logout_flashnode.host_no); if (!shost) { pr_err("%s could not find host no %u\n", __func__, ev->u.logout_flashnode.host_no); err = -ENODEV; goto exit_logout_fnode; } idx = ev->u.logout_flashnode.flashnode_idx; fnode_sess = iscsi_get_flashnode_by_index(shost, idx); if (!fnode_sess) { pr_err("%s could not find flashnode %u for host no %u\n", __func__, idx, ev->u.logout_flashnode.host_no); err = -ENODEV; goto put_host; } dev = iscsi_find_flashnode_conn(fnode_sess); if (!dev) { err = -ENODEV; goto put_sess; } fnode_conn = iscsi_dev_to_flash_conn(dev); err = transport->logout_flashnode(fnode_sess, fnode_conn); put_device(dev); put_sess: put_device(&fnode_sess->dev); put_host: scsi_host_put(shost); exit_logout_fnode: return err; } static int iscsi_logout_flashnode_sid(struct iscsi_transport *transport, struct iscsi_uevent *ev) { struct Scsi_Host *shost; struct iscsi_cls_session *session; int err = 0; if (!transport->logout_flashnode_sid) { err = -ENOSYS; goto exit_logout_sid; } shost = scsi_host_lookup(ev->u.logout_flashnode_sid.host_no); if (!shost) { pr_err("%s could not find host no %u\n", __func__, ev->u.logout_flashnode.host_no); err = -ENODEV; goto exit_logout_sid; } session = iscsi_session_lookup(ev->u.logout_flashnode_sid.sid); if (!session) { pr_err("%s could not find session id %u\n", __func__, ev->u.logout_flashnode_sid.sid); err = -EINVAL; goto put_host; } err = transport->logout_flashnode_sid(session); put_host: scsi_host_put(shost); exit_logout_sid: return err; } static int iscsi_get_host_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh) { struct iscsi_uevent *ev = nlmsg_data(nlh); struct Scsi_Host *shost = NULL; struct iscsi_internal *priv; struct sk_buff *skbhost_stats; struct nlmsghdr *nlhhost_stats; struct iscsi_uevent *evhost_stats; int host_stats_size = 0; int len, err = 0; char *buf; if (!transport->get_host_stats) return -ENOSYS; priv = iscsi_if_transport_lookup(transport); if (!priv) return -EINVAL; host_stats_size = sizeof(struct iscsi_offload_host_stats); len = nlmsg_total_size(sizeof(*ev) + host_stats_size); shost = scsi_host_lookup(ev->u.get_host_stats.host_no); if (!shost) { pr_err("%s: failed. Could not find host no %u\n", __func__, ev->u.get_host_stats.host_no); return -ENODEV; } do { int actual_size; skbhost_stats = alloc_skb(len, GFP_KERNEL); if (!skbhost_stats) { pr_err("cannot deliver host stats: OOM\n"); err = -ENOMEM; goto exit_host_stats; } nlhhost_stats = __nlmsg_put(skbhost_stats, 0, 0, 0, (len - sizeof(*nlhhost_stats)), 0); evhost_stats = nlmsg_data(nlhhost_stats); memset(evhost_stats, 0, sizeof(*evhost_stats)); evhost_stats->transport_handle = iscsi_handle(transport); evhost_stats->type = nlh->nlmsg_type; evhost_stats->u.get_host_stats.host_no = ev->u.get_host_stats.host_no; buf = (char *)evhost_stats + sizeof(*evhost_stats); memset(buf, 0, host_stats_size); err = transport->get_host_stats(shost, buf, host_stats_size); if (err) { kfree_skb(skbhost_stats); goto exit_host_stats; } actual_size = nlmsg_total_size(sizeof(*ev) + host_stats_size); skb_trim(skbhost_stats, NLMSG_ALIGN(actual_size)); nlhhost_stats->nlmsg_len = actual_size; err = iscsi_multicast_skb(skbhost_stats, ISCSI_NL_GRP_ISCSID, GFP_KERNEL); } while (err < 0 && err != -ECONNREFUSED); exit_host_stats: scsi_host_put(shost); return err; } static int iscsi_if_transport_conn(struct iscsi_transport *transport, struct nlmsghdr *nlh, u32 pdu_len) { struct iscsi_uevent *ev = nlmsg_data(nlh); struct iscsi_cls_session *session; struct iscsi_cls_conn *conn = NULL; struct iscsi_endpoint *ep; int err = 0; switch (nlh->nlmsg_type) { case ISCSI_UEVENT_CREATE_CONN: return iscsi_if_create_conn(transport, ev); case ISCSI_UEVENT_DESTROY_CONN: return iscsi_if_destroy_conn(transport, ev); case ISCSI_UEVENT_STOP_CONN: conn = iscsi_conn_lookup(ev->u.stop_conn.sid, ev->u.stop_conn.cid); if (!conn) return -EINVAL; return iscsi_if_stop_conn(conn, ev->u.stop_conn.flag); } /* * The following cmds need to be run under the ep_mutex so in kernel * conn cleanup (ep_disconnect + unbind and conn) is not done while * these are running. They also must not run if we have just run a conn * cleanup because they would set the state in a way that might allow * IO or send IO themselves. */ switch (nlh->nlmsg_type) { case ISCSI_UEVENT_START_CONN: conn = iscsi_conn_lookup(ev->u.start_conn.sid, ev->u.start_conn.cid); break; case ISCSI_UEVENT_BIND_CONN: conn = iscsi_conn_lookup(ev->u.b_conn.sid, ev->u.b_conn.cid); break; case ISCSI_UEVENT_SEND_PDU: conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid); break; } if (!conn) return -EINVAL; mutex_lock(&conn->ep_mutex); spin_lock_irq(&conn->lock); if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { spin_unlock_irq(&conn->lock); mutex_unlock(&conn->ep_mutex); ev->r.retcode = -ENOTCONN; return 0; } spin_unlock_irq(&conn->lock); switch (nlh->nlmsg_type) { case ISCSI_UEVENT_BIND_CONN: session = iscsi_session_lookup(ev->u.b_conn.sid); if (!session) { err = -EINVAL; break; } ev->r.retcode = transport->bind_conn(session, conn, ev->u.b_conn.transport_eph, ev->u.b_conn.is_leading); if (!ev->r.retcode) WRITE_ONCE(conn->state, ISCSI_CONN_BOUND); if (ev->r.retcode || !transport->ep_connect) break; ep = iscsi_lookup_endpoint(ev->u.b_conn.transport_eph); if (ep) { ep->conn = conn; conn->ep = ep; iscsi_put_endpoint(ep); } else { err = -ENOTCONN; iscsi_cls_conn_printk(KERN_ERR, conn, "Could not set ep conn binding\n"); } break; case ISCSI_UEVENT_START_CONN: ev->r.retcode = transport->start_conn(conn); if (!ev->r.retcode) WRITE_ONCE(conn->state, ISCSI_CONN_UP); break; case ISCSI_UEVENT_SEND_PDU: if ((ev->u.send_pdu.hdr_size > pdu_len) || (ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) { err = -EINVAL; break; } ev->r.retcode = transport->send_pdu(conn, (struct iscsi_hdr *)((char *)ev + sizeof(*ev)), (char *)ev + sizeof(*ev) + ev->u.send_pdu.hdr_size, ev->u.send_pdu.data_size); break; default: err = -ENOSYS; } mutex_unlock(&conn->ep_mutex); return err; } static int iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) { int err = 0; u32 portid; struct iscsi_uevent *ev = nlmsg_data(nlh); struct iscsi_transport *transport = NULL; struct iscsi_internal *priv; struct iscsi_cls_session *session; struct iscsi_endpoint *ep = NULL; u32 rlen; if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; if (nlh->nlmsg_type == ISCSI_UEVENT_PATH_UPDATE) *group = ISCSI_NL_GRP_UIP; else *group = ISCSI_NL_GRP_ISCSID; priv = iscsi_if_transport_lookup(iscsi_ptr(ev->transport_handle)); if (!priv) return -EINVAL; transport = priv->iscsi_transport; if (!try_module_get(transport->owner)) return -EINVAL; portid = NETLINK_CB(skb).portid; /* * Even though the remaining payload may not be regarded as nlattr, * (like address or something else), calculate the remaining length * here to ease following length checks. */ rlen = nlmsg_attrlen(nlh, sizeof(*ev)); switch (nlh->nlmsg_type) { case ISCSI_UEVENT_CREATE_SESSION: err = iscsi_if_create_session(priv, ep, ev, portid, ev->u.c_session.initial_cmdsn, ev->u.c_session.cmds_max, ev->u.c_session.queue_depth); break; case ISCSI_UEVENT_CREATE_BOUND_SESSION: ep = iscsi_lookup_endpoint(ev->u.c_bound_session.ep_handle); if (!ep) { err = -EINVAL; break; } err = iscsi_if_create_session(priv, ep, ev, portid, ev->u.c_bound_session.initial_cmdsn, ev->u.c_bound_session.cmds_max, ev->u.c_bound_session.queue_depth); iscsi_put_endpoint(ep); break; case ISCSI_UEVENT_DESTROY_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); if (!session) err = -EINVAL; else if (iscsi_session_has_conns(ev->u.d_session.sid)) err = -EBUSY; else transport->destroy_session(session); break; case ISCSI_UEVENT_DESTROY_SESSION_ASYNC: session = iscsi_session_lookup(ev->u.d_session.sid); if (!session) err = -EINVAL; else if (iscsi_session_has_conns(ev->u.d_session.sid)) err = -EBUSY; else { unsigned long flags; /* Prevent this session from being found again */ spin_lock_irqsave(&sesslock, flags); list_del_init(&session->sess_list); spin_unlock_irqrestore(&sesslock, flags); queue_work(system_unbound_wq, &session->destroy_work); } break; case ISCSI_UEVENT_UNBIND_SESSION: session = iscsi_session_lookup(ev->u.d_session.sid); if (session) queue_work(session->workq, &session->unbind_work); else err = -EINVAL; break; case ISCSI_UEVENT_SET_PARAM: err = iscsi_if_set_param(transport, ev, rlen); break; case ISCSI_UEVENT_CREATE_CONN: case ISCSI_UEVENT_DESTROY_CONN: case ISCSI_UEVENT_STOP_CONN: case ISCSI_UEVENT_START_CONN: case ISCSI_UEVENT_BIND_CONN: case ISCSI_UEVENT_SEND_PDU: err = iscsi_if_transport_conn(transport, nlh, rlen); break; case ISCSI_UEVENT_GET_STATS: err = iscsi_if_get_stats(transport, nlh); break; case ISCSI_UEVENT_TRANSPORT_EP_CONNECT: case ISCSI_UEVENT_TRANSPORT_EP_POLL: case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT: case ISCSI_UEVENT_TRANSPORT_EP_CONNECT_THROUGH_HOST: err = iscsi_if_transport_ep(transport, ev, nlh->nlmsg_type, rlen); break; case ISCSI_UEVENT_TGT_DSCVR: err = iscsi_tgt_dscvr(transport, ev, rlen); break; case ISCSI_UEVENT_SET_HOST_PARAM: err = iscsi_set_host_param(transport, ev, rlen); break; case ISCSI_UEVENT_PATH_UPDATE: err = iscsi_set_path(transport, ev, rlen); break; case ISCSI_UEVENT_SET_IFACE_PARAMS: err = iscsi_set_iface_params(transport, ev, rlen); break; case ISCSI_UEVENT_PING: err = iscsi_send_ping(transport, ev, rlen); break; case ISCSI_UEVENT_GET_CHAP: err = iscsi_get_chap(transport, nlh); break; case ISCSI_UEVENT_DELETE_CHAP: err = iscsi_delete_chap(transport, ev); break; case ISCSI_UEVENT_SET_FLASHNODE_PARAMS: err = iscsi_set_flashnode_param(transport, ev, rlen); break; case ISCSI_UEVENT_NEW_FLASHNODE: err = iscsi_new_flashnode(transport, ev, rlen); break; case ISCSI_UEVENT_DEL_FLASHNODE: err = iscsi_del_flashnode(transport, ev); break; case ISCSI_UEVENT_LOGIN_FLASHNODE: err = iscsi_login_flashnode(transport, ev); break; case ISCSI_UEVENT_LOGOUT_FLASHNODE: err = iscsi_logout_flashnode(transport, ev); break; case ISCSI_UEVENT_LOGOUT_FLASHNODE_SID: err = iscsi_logout_flashnode_sid(transport, ev); break; case ISCSI_UEVENT_SET_CHAP: err = iscsi_set_chap(transport, ev, rlen); break; case ISCSI_UEVENT_GET_HOST_STATS: err = iscsi_get_host_stats(transport, nlh); break; default: err = -ENOSYS; break; } module_put(transport->owner); return err; } /* * Get message from skb. Each message is processed by iscsi_if_recv_msg. * Malformed skbs with wrong lengths or invalid creds are not processed. */ static void iscsi_if_rx(struct sk_buff *skb) { u32 portid = NETLINK_CB(skb).portid; mutex_lock(&rx_queue_mutex); while (skb->len >= NLMSG_HDRLEN) { int err; uint32_t rlen; struct nlmsghdr *nlh; struct iscsi_uevent *ev; uint32_t group; int retries = ISCSI_SEND_MAX_ALLOWED; nlh = nlmsg_hdr(skb); if (nlh->nlmsg_len < sizeof(*nlh) + sizeof(*ev) || skb->len < nlh->nlmsg_len) { break; } ev = nlmsg_data(nlh); rlen = NLMSG_ALIGN(nlh->nlmsg_len); if (rlen > skb->len) rlen = skb->len; err = iscsi_if_recv_msg(skb, nlh, &group); if (err) { ev->type = ISCSI_KEVENT_IF_ERROR; ev->iferror = err; } do { /* * special case for GET_STATS, GET_CHAP and GET_HOST_STATS: * on success - sending reply and stats from * inside of if_recv_msg(), * on error - fall through. */ if (ev->type == ISCSI_UEVENT_GET_STATS && !err) break; if (ev->type == ISCSI_UEVENT_GET_CHAP && !err) break; if (ev->type == ISCSI_UEVENT_GET_HOST_STATS && !err) break; err = iscsi_if_send_reply(portid, nlh->nlmsg_type, ev, sizeof(*ev)); if (err == -EAGAIN && --retries < 0) { printk(KERN_WARNING "Send reply failed, error %d\n", err); break; } } while (err < 0 && err != -ECONNREFUSED && err != -ESRCH); skb_pull(skb, rlen); } mutex_unlock(&rx_queue_mutex); } #define ISCSI_CLASS_ATTR(_prefix,_name,_mode,_show,_store) \ struct device_attribute dev_attr_##_prefix##_##_name = \ __ATTR(_name,_mode,_show,_store) /* * iSCSI connection attrs */ #define iscsi_conn_attr_show(param) \ static ssize_t \ show_conn_param_##param(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent); \ struct iscsi_transport *t = conn->transport; \ return t->get_conn_param(conn, param, buf); \ } #define iscsi_conn_attr(field, param) \ iscsi_conn_attr_show(param) \ static ISCSI_CLASS_ATTR(conn, field, S_IRUGO, show_conn_param_##param, \ NULL); iscsi_conn_attr(max_recv_dlength, ISCSI_PARAM_MAX_RECV_DLENGTH); iscsi_conn_attr(max_xmit_dlength, ISCSI_PARAM_MAX_XMIT_DLENGTH); iscsi_conn_attr(header_digest, ISCSI_PARAM_HDRDGST_EN); iscsi_conn_attr(data_digest, ISCSI_PARAM_DATADGST_EN); iscsi_conn_attr(ifmarker, ISCSI_PARAM_IFMARKER_EN); iscsi_conn_attr(ofmarker, ISCSI_PARAM_OFMARKER_EN); iscsi_conn_attr(persistent_port, ISCSI_PARAM_PERSISTENT_PORT); iscsi_conn_attr(exp_statsn, ISCSI_PARAM_EXP_STATSN); iscsi_conn_attr(persistent_address, ISCSI_PARAM_PERSISTENT_ADDRESS); iscsi_conn_attr(ping_tmo, ISCSI_PARAM_PING_TMO); iscsi_conn_attr(recv_tmo, ISCSI_PARAM_RECV_TMO); iscsi_conn_attr(local_port, ISCSI_PARAM_LOCAL_PORT); iscsi_conn_attr(statsn, ISCSI_PARAM_STATSN); iscsi_conn_attr(keepalive_tmo, ISCSI_PARAM_KEEPALIVE_TMO); iscsi_conn_attr(max_segment_size, ISCSI_PARAM_MAX_SEGMENT_SIZE); iscsi_conn_attr(tcp_timestamp_stat, ISCSI_PARAM_TCP_TIMESTAMP_STAT); iscsi_conn_attr(tcp_wsf_disable, ISCSI_PARAM_TCP_WSF_DISABLE); iscsi_conn_attr(tcp_nagle_disable, ISCSI_PARAM_TCP_NAGLE_DISABLE); iscsi_conn_attr(tcp_timer_scale, ISCSI_PARAM_TCP_TIMER_SCALE); iscsi_conn_attr(tcp_timestamp_enable, ISCSI_PARAM_TCP_TIMESTAMP_EN); iscsi_conn_attr(fragment_disable, ISCSI_PARAM_IP_FRAGMENT_DISABLE); iscsi_conn_attr(ipv4_tos, ISCSI_PARAM_IPV4_TOS); iscsi_conn_attr(ipv6_traffic_class, ISCSI_PARAM_IPV6_TC); iscsi_conn_attr(ipv6_flow_label, ISCSI_PARAM_IPV6_FLOW_LABEL); iscsi_conn_attr(is_fw_assigned_ipv6, ISCSI_PARAM_IS_FW_ASSIGNED_IPV6); iscsi_conn_attr(tcp_xmit_wsf, ISCSI_PARAM_TCP_XMIT_WSF); iscsi_conn_attr(tcp_recv_wsf, ISCSI_PARAM_TCP_RECV_WSF); iscsi_conn_attr(local_ipaddr, ISCSI_PARAM_LOCAL_IPADDR); static const char *const connection_state_names[] = { [ISCSI_CONN_UP] = "up", [ISCSI_CONN_DOWN] = "down", [ISCSI_CONN_FAILED] = "failed", [ISCSI_CONN_BOUND] = "bound" }; static ssize_t show_conn_state(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent); const char *state = "unknown"; int conn_state = READ_ONCE(conn->state); if (conn_state >= 0 && conn_state < ARRAY_SIZE(connection_state_names)) state = connection_state_names[conn_state]; return sysfs_emit(buf, "%s\n", state); } static ISCSI_CLASS_ATTR(conn, state, S_IRUGO, show_conn_state, NULL); #define iscsi_conn_ep_attr_show(param) \ static ssize_t show_conn_ep_param_##param(struct device *dev, \ struct device_attribute *attr,\ char *buf) \ { \ struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev->parent); \ struct iscsi_transport *t = conn->transport; \ struct iscsi_endpoint *ep; \ ssize_t rc; \ \ /* \ * Need to make sure ep_disconnect does not free the LLD's \ * interconnect resources while we are trying to read them. \ */ \ mutex_lock(&conn->ep_mutex); \ ep = conn->ep; \ if (!ep && t->ep_connect) { \ mutex_unlock(&conn->ep_mutex); \ return -ENOTCONN; \ } \ \ if (ep) \ rc = t->get_ep_param(ep, param, buf); \ else \ rc = t->get_conn_param(conn, param, buf); \ mutex_unlock(&conn->ep_mutex); \ return rc; \ } #define iscsi_conn_ep_attr(field, param) \ iscsi_conn_ep_attr_show(param) \ static ISCSI_CLASS_ATTR(conn, field, S_IRUGO, \ show_conn_ep_param_##param, NULL); iscsi_conn_ep_attr(address, ISCSI_PARAM_CONN_ADDRESS); iscsi_conn_ep_attr(port, ISCSI_PARAM_CONN_PORT); static struct attribute *iscsi_conn_attrs[] = { &dev_attr_conn_max_recv_dlength.attr, &dev_attr_conn_max_xmit_dlength.attr, &dev_attr_conn_header_digest.attr, &dev_attr_conn_data_digest.attr, &dev_attr_conn_ifmarker.attr, &dev_attr_conn_ofmarker.attr, &dev_attr_conn_address.attr, &dev_attr_conn_port.attr, &dev_attr_conn_exp_statsn.attr, &dev_attr_conn_persistent_address.attr, &dev_attr_conn_persistent_port.attr, &dev_attr_conn_ping_tmo.attr, &dev_attr_conn_recv_tmo.attr, &dev_attr_conn_local_port.attr, &dev_attr_conn_statsn.attr, &dev_attr_conn_keepalive_tmo.attr, &dev_attr_conn_max_segment_size.attr, &dev_attr_conn_tcp_timestamp_stat.attr, &dev_attr_conn_tcp_wsf_disable.attr, &dev_attr_conn_tcp_nagle_disable.attr, &dev_attr_conn_tcp_timer_scale.attr, &dev_attr_conn_tcp_timestamp_enable.attr, &dev_attr_conn_fragment_disable.attr, &dev_attr_conn_ipv4_tos.attr, &dev_attr_conn_ipv6_traffic_class.attr, &dev_attr_conn_ipv6_flow_label.attr, &dev_attr_conn_is_fw_assigned_ipv6.attr, &dev_attr_conn_tcp_xmit_wsf.attr, &dev_attr_conn_tcp_recv_wsf.attr, &dev_attr_conn_local_ipaddr.attr, &dev_attr_conn_state.attr, NULL, }; static umode_t iscsi_conn_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *cdev = container_of(kobj, struct device, kobj); struct iscsi_cls_conn *conn = transport_class_to_conn(cdev); struct iscsi_transport *t = conn->transport; int param; if (attr == &dev_attr_conn_max_recv_dlength.attr) param = ISCSI_PARAM_MAX_RECV_DLENGTH; else if (attr == &dev_attr_conn_max_xmit_dlength.attr) param = ISCSI_PARAM_MAX_XMIT_DLENGTH; else if (attr == &dev_attr_conn_header_digest.attr) param = ISCSI_PARAM_HDRDGST_EN; else if (attr == &dev_attr_conn_data_digest.attr) param = ISCSI_PARAM_DATADGST_EN; else if (attr == &dev_attr_conn_ifmarker.attr) param = ISCSI_PARAM_IFMARKER_EN; else if (attr == &dev_attr_conn_ofmarker.attr) param = ISCSI_PARAM_OFMARKER_EN; else if (attr == &dev_attr_conn_address.attr) param = ISCSI_PARAM_CONN_ADDRESS; else if (attr == &dev_attr_conn_port.attr) param = ISCSI_PARAM_CONN_PORT; else if (attr == &dev_attr_conn_exp_statsn.attr) param = ISCSI_PARAM_EXP_STATSN; else if (attr == &dev_attr_conn_persistent_address.attr) param = ISCSI_PARAM_PERSISTENT_ADDRESS; else if (attr == &dev_attr_conn_persistent_port.attr) param = ISCSI_PARAM_PERSISTENT_PORT; else if (attr == &dev_attr_conn_ping_tmo.attr) param = ISCSI_PARAM_PING_TMO; else if (attr == &dev_attr_conn_recv_tmo.attr) param = ISCSI_PARAM_RECV_TMO; else if (attr == &dev_attr_conn_local_port.attr) param = ISCSI_PARAM_LOCAL_PORT; else if (attr == &dev_attr_conn_statsn.attr) param = ISCSI_PARAM_STATSN; else if (attr == &dev_attr_conn_keepalive_tmo.attr) param = ISCSI_PARAM_KEEPALIVE_TMO; else if (attr == &dev_attr_conn_max_segment_size.attr) param = ISCSI_PARAM_MAX_SEGMENT_SIZE; else if (attr == &dev_attr_conn_tcp_timestamp_stat.attr) param = ISCSI_PARAM_TCP_TIMESTAMP_STAT; else if (attr == &dev_attr_conn_tcp_wsf_disable.attr) param = ISCSI_PARAM_TCP_WSF_DISABLE; else if (attr == &dev_attr_conn_tcp_nagle_disable.attr) param = ISCSI_PARAM_TCP_NAGLE_DISABLE; else if (attr == &dev_attr_conn_tcp_timer_scale.attr) param = ISCSI_PARAM_TCP_TIMER_SCALE; else if (attr == &dev_attr_conn_tcp_timestamp_enable.attr) param = ISCSI_PARAM_TCP_TIMESTAMP_EN; else if (attr == &dev_attr_conn_fragment_disable.attr) param = ISCSI_PARAM_IP_FRAGMENT_DISABLE; else if (attr == &dev_attr_conn_ipv4_tos.attr) param = ISCSI_PARAM_IPV4_TOS; else if (attr == &dev_attr_conn_ipv6_traffic_class.attr) param = ISCSI_PARAM_IPV6_TC; else if (attr == &dev_attr_conn_ipv6_flow_label.attr) param = ISCSI_PARAM_IPV6_FLOW_LABEL; else if (attr == &dev_attr_conn_is_fw_assigned_ipv6.attr) param = ISCSI_PARAM_IS_FW_ASSIGNED_IPV6; else if (attr == &dev_attr_conn_tcp_xmit_wsf.attr) param = ISCSI_PARAM_TCP_XMIT_WSF; else if (attr == &dev_attr_conn_tcp_recv_wsf.attr) param = ISCSI_PARAM_TCP_RECV_WSF; else if (attr == &dev_attr_conn_local_ipaddr.attr) param = ISCSI_PARAM_LOCAL_IPADDR; else if (attr == &dev_attr_conn_state.attr) return S_IRUGO; else { WARN_ONCE(1, "Invalid conn attr"); return 0; } return t->attr_is_visible(ISCSI_PARAM, param); } static struct attribute_group iscsi_conn_group = { .attrs = iscsi_conn_attrs, .is_visible = iscsi_conn_attr_is_visible, }; /* * iSCSI session attrs */ #define iscsi_session_attr_show(param, perm) \ static ssize_t \ show_session_param_##param(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct iscsi_cls_session *session = \ iscsi_dev_to_session(dev->parent); \ struct iscsi_transport *t = session->transport; \ \ if (perm && !capable(CAP_SYS_ADMIN)) \ return -EACCES; \ return t->get_session_param(session, param, buf); \ } #define iscsi_session_attr(field, param, perm) \ iscsi_session_attr_show(param, perm) \ static ISCSI_CLASS_ATTR(sess, field, S_IRUGO, show_session_param_##param, \ NULL); iscsi_session_attr(targetname, ISCSI_PARAM_TARGET_NAME, 0); iscsi_session_attr(initial_r2t, ISCSI_PARAM_INITIAL_R2T_EN, 0); iscsi_session_attr(max_outstanding_r2t, ISCSI_PARAM_MAX_R2T, 0); iscsi_session_attr(immediate_data, ISCSI_PARAM_IMM_DATA_EN, 0); iscsi_session_attr(first_burst_len, ISCSI_PARAM_FIRST_BURST, 0); iscsi_session_attr(max_burst_len, ISCSI_PARAM_MAX_BURST, 0); iscsi_session_attr(data_pdu_in_order, ISCSI_PARAM_PDU_INORDER_EN, 0); iscsi_session_attr(data_seq_in_order, ISCSI_PARAM_DATASEQ_INORDER_EN, 0); iscsi_session_attr(erl, ISCSI_PARAM_ERL, 0); iscsi_session_attr(tpgt, ISCSI_PARAM_TPGT, 0); iscsi_session_attr(username, ISCSI_PARAM_USERNAME, 1); iscsi_session_attr(username_in, ISCSI_PARAM_USERNAME_IN, 1); iscsi_session_attr(password, ISCSI_PARAM_PASSWORD, 1); iscsi_session_attr(password_in, ISCSI_PARAM_PASSWORD_IN, 1); iscsi_session_attr(chap_out_idx, ISCSI_PARAM_CHAP_OUT_IDX, 1); iscsi_session_attr(chap_in_idx, ISCSI_PARAM_CHAP_IN_IDX, 1); iscsi_session_attr(fast_abort, ISCSI_PARAM_FAST_ABORT, 0); iscsi_session_attr(abort_tmo, ISCSI_PARAM_ABORT_TMO, 0); iscsi_session_attr(lu_reset_tmo, ISCSI_PARAM_LU_RESET_TMO, 0); iscsi_session_attr(tgt_reset_tmo, ISCSI_PARAM_TGT_RESET_TMO, 0); iscsi_session_attr(ifacename, ISCSI_PARAM_IFACE_NAME, 0); iscsi_session_attr(initiatorname, ISCSI_PARAM_INITIATOR_NAME, 0); iscsi_session_attr(targetalias, ISCSI_PARAM_TARGET_ALIAS, 0); iscsi_session_attr(boot_root, ISCSI_PARAM_BOOT_ROOT, 0); iscsi_session_attr(boot_nic, ISCSI_PARAM_BOOT_NIC, 0); iscsi_session_attr(boot_target, ISCSI_PARAM_BOOT_TARGET, 0); iscsi_session_attr(auto_snd_tgt_disable, ISCSI_PARAM_AUTO_SND_TGT_DISABLE, 0); iscsi_session_attr(discovery_session, ISCSI_PARAM_DISCOVERY_SESS, 0); iscsi_session_attr(portal_type, ISCSI_PARAM_PORTAL_TYPE, 0); iscsi_session_attr(chap_auth, ISCSI_PARAM_CHAP_AUTH_EN, 0); iscsi_session_attr(discovery_logout, ISCSI_PARAM_DISCOVERY_LOGOUT_EN, 0); iscsi_session_attr(bidi_chap, ISCSI_PARAM_BIDI_CHAP_EN, 0); iscsi_session_attr(discovery_auth_optional, ISCSI_PARAM_DISCOVERY_AUTH_OPTIONAL, 0); iscsi_session_attr(def_time2wait, ISCSI_PARAM_DEF_TIME2WAIT, 0); iscsi_session_attr(def_time2retain, ISCSI_PARAM_DEF_TIME2RETAIN, 0); iscsi_session_attr(isid, ISCSI_PARAM_ISID, 0); iscsi_session_attr(tsid, ISCSI_PARAM_TSID, 0); iscsi_session_attr(def_taskmgmt_tmo, ISCSI_PARAM_DEF_TASKMGMT_TMO, 0); iscsi_session_attr(discovery_parent_idx, ISCSI_PARAM_DISCOVERY_PARENT_IDX, 0); iscsi_session_attr(discovery_parent_type, ISCSI_PARAM_DISCOVERY_PARENT_TYPE, 0); static ssize_t show_priv_session_target_state(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); return sysfs_emit(buf, "%s\n", iscsi_session_target_state_name[session->target_state]); } static ISCSI_CLASS_ATTR(priv_sess, target_state, S_IRUGO, show_priv_session_target_state, NULL); static ssize_t show_priv_session_state(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); return sysfs_emit(buf, "%s\n", iscsi_session_state_name(session->state)); } static ISCSI_CLASS_ATTR(priv_sess, state, S_IRUGO, show_priv_session_state, NULL); static ssize_t show_priv_session_creator(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); return sysfs_emit(buf, "%d\n", session->creator); } static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator, NULL); static ssize_t show_priv_session_target_id(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); return sysfs_emit(buf, "%d\n", session->target_id); } static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO, show_priv_session_target_id, NULL); #define iscsi_priv_session_attr_show(field, format) \ static ssize_t \ show_priv_session_##field(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct iscsi_cls_session *session = \ iscsi_dev_to_session(dev->parent); \ if (session->field == -1) \ return sysfs_emit(buf, "off\n"); \ return sysfs_emit(buf, format"\n", session->field); \ } #define iscsi_priv_session_attr_store(field) \ static ssize_t \ store_priv_session_##field(struct device *dev, \ struct device_attribute *attr, \ const char *buf, size_t count) \ { \ int val; \ char *cp; \ struct iscsi_cls_session *session = \ iscsi_dev_to_session(dev->parent); \ if ((session->state == ISCSI_SESSION_FREE) || \ (session->state == ISCSI_SESSION_FAILED)) \ return -EBUSY; \ if (strncmp(buf, "off", 3) == 0) { \ session->field = -1; \ session->field##_sysfs_override = true; \ } else { \ val = simple_strtoul(buf, &cp, 0); \ if (*cp != '\0' && *cp != '\n') \ return -EINVAL; \ session->field = val; \ session->field##_sysfs_override = true; \ } \ return count; \ } #define iscsi_priv_session_rw_attr(field, format) \ iscsi_priv_session_attr_show(field, format) \ iscsi_priv_session_attr_store(field) \ static ISCSI_CLASS_ATTR(priv_sess, field, S_IRUGO | S_IWUSR, \ show_priv_session_##field, \ store_priv_session_##field) iscsi_priv_session_rw_attr(recovery_tmo, "%d"); static struct attribute *iscsi_session_attrs[] = { &dev_attr_sess_initial_r2t.attr, &dev_attr_sess_max_outstanding_r2t.attr, &dev_attr_sess_immediate_data.attr, &dev_attr_sess_first_burst_len.attr, &dev_attr_sess_max_burst_len.attr, &dev_attr_sess_data_pdu_in_order.attr, &dev_attr_sess_data_seq_in_order.attr, &dev_attr_sess_erl.attr, &dev_attr_sess_targetname.attr, &dev_attr_sess_tpgt.attr, &dev_attr_sess_password.attr, &dev_attr_sess_password_in.attr, &dev_attr_sess_username.attr, &dev_attr_sess_username_in.attr, &dev_attr_sess_fast_abort.attr, &dev_attr_sess_abort_tmo.attr, &dev_attr_sess_lu_reset_tmo.attr, &dev_attr_sess_tgt_reset_tmo.attr, &dev_attr_sess_ifacename.attr, &dev_attr_sess_initiatorname.attr, &dev_attr_sess_targetalias.attr, &dev_attr_sess_boot_root.attr, &dev_attr_sess_boot_nic.attr, &dev_attr_sess_boot_target.attr, &dev_attr_priv_sess_recovery_tmo.attr, &dev_attr_priv_sess_state.attr, &dev_attr_priv_sess_target_state.attr, &dev_attr_priv_sess_creator.attr, &dev_attr_sess_chap_out_idx.attr, &dev_attr_sess_chap_in_idx.attr, &dev_attr_priv_sess_target_id.attr, &dev_attr_sess_auto_snd_tgt_disable.attr, &dev_attr_sess_discovery_session.attr, &dev_attr_sess_portal_type.attr, &dev_attr_sess_chap_auth.attr, &dev_attr_sess_discovery_logout.attr, &dev_attr_sess_bidi_chap.attr, &dev_attr_sess_discovery_auth_optional.attr, &dev_attr_sess_def_time2wait.attr, &dev_attr_sess_def_time2retain.attr, &dev_attr_sess_isid.attr, &dev_attr_sess_tsid.attr, &dev_attr_sess_def_taskmgmt_tmo.attr, &dev_attr_sess_discovery_parent_idx.attr, &dev_attr_sess_discovery_parent_type.attr, NULL, }; static umode_t iscsi_session_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *cdev = container_of(kobj, struct device, kobj); struct iscsi_cls_session *session = transport_class_to_session(cdev); struct iscsi_transport *t = session->transport; int param; if (attr == &dev_attr_sess_initial_r2t.attr) param = ISCSI_PARAM_INITIAL_R2T_EN; else if (attr == &dev_attr_sess_max_outstanding_r2t.attr) param = ISCSI_PARAM_MAX_R2T; else if (attr == &dev_attr_sess_immediate_data.attr) param = ISCSI_PARAM_IMM_DATA_EN; else if (attr == &dev_attr_sess_first_burst_len.attr) param = ISCSI_PARAM_FIRST_BURST; else if (attr == &dev_attr_sess_max_burst_len.attr) param = ISCSI_PARAM_MAX_BURST; else if (attr == &dev_attr_sess_data_pdu_in_order.attr) param = ISCSI_PARAM_PDU_INORDER_EN; else if (attr == &dev_attr_sess_data_seq_in_order.attr) param = ISCSI_PARAM_DATASEQ_INORDER_EN; else if (attr == &dev_attr_sess_erl.attr) param = ISCSI_PARAM_ERL; else if (attr == &dev_attr_sess_targetname.attr) param = ISCSI_PARAM_TARGET_NAME; else if (attr == &dev_attr_sess_tpgt.attr) param = ISCSI_PARAM_TPGT; else if (attr == &dev_attr_sess_chap_in_idx.attr) param = ISCSI_PARAM_CHAP_IN_IDX; else if (attr == &dev_attr_sess_chap_out_idx.attr) param = ISCSI_PARAM_CHAP_OUT_IDX; else if (attr == &dev_attr_sess_password.attr) param = ISCSI_PARAM_USERNAME; else if (attr == &dev_attr_sess_password_in.attr) param = ISCSI_PARAM_USERNAME_IN; else if (attr == &dev_attr_sess_username.attr) param = ISCSI_PARAM_PASSWORD; else if (attr == &dev_attr_sess_username_in.attr) param = ISCSI_PARAM_PASSWORD_IN; else if (attr == &dev_attr_sess_fast_abort.attr) param = ISCSI_PARAM_FAST_ABORT; else if (attr == &dev_attr_sess_abort_tmo.attr) param = ISCSI_PARAM_ABORT_TMO; else if (attr == &dev_attr_sess_lu_reset_tmo.attr) param = ISCSI_PARAM_LU_RESET_TMO; else if (attr == &dev_attr_sess_tgt_reset_tmo.attr) param = ISCSI_PARAM_TGT_RESET_TMO; else if (attr == &dev_attr_sess_ifacename.attr) param = ISCSI_PARAM_IFACE_NAME; else if (attr == &dev_attr_sess_initiatorname.attr) param = ISCSI_PARAM_INITIATOR_NAME; else if (attr == &dev_attr_sess_targetalias.attr) param = ISCSI_PARAM_TARGET_ALIAS; else if (attr == &dev_attr_sess_boot_root.attr) param = ISCSI_PARAM_BOOT_ROOT; else if (attr == &dev_attr_sess_boot_nic.attr) param = ISCSI_PARAM_BOOT_NIC; else if (attr == &dev_attr_sess_boot_target.attr) param = ISCSI_PARAM_BOOT_TARGET; else if (attr == &dev_attr_sess_auto_snd_tgt_disable.attr) param = ISCSI_PARAM_AUTO_SND_TGT_DISABLE; else if (attr == &dev_attr_sess_discovery_session.attr) param = ISCSI_PARAM_DISCOVERY_SESS; else if (attr == &dev_attr_sess_portal_type.attr) param = ISCSI_PARAM_PORTAL_TYPE; else if (attr == &dev_attr_sess_chap_auth.attr) param = ISCSI_PARAM_CHAP_AUTH_EN; else if (attr == &dev_attr_sess_discovery_logout.attr) param = ISCSI_PARAM_DISCOVERY_LOGOUT_EN; else if (attr == &dev_attr_sess_bidi_chap.attr) param = ISCSI_PARAM_BIDI_CHAP_EN; else if (attr == &dev_attr_sess_discovery_auth_optional.attr) param = ISCSI_PARAM_DISCOVERY_AUTH_OPTIONAL; else if (attr == &dev_attr_sess_def_time2wait.attr) param = ISCSI_PARAM_DEF_TIME2WAIT; else if (attr == &dev_attr_sess_def_time2retain.attr) param = ISCSI_PARAM_DEF_TIME2RETAIN; else if (attr == &dev_attr_sess_isid.attr) param = ISCSI_PARAM_ISID; else if (attr == &dev_attr_sess_tsid.attr) param = ISCSI_PARAM_TSID; else if (attr == &dev_attr_sess_def_taskmgmt_tmo.attr) param = ISCSI_PARAM_DEF_TASKMGMT_TMO; else if (attr == &dev_attr_sess_discovery_parent_idx.attr) param = ISCSI_PARAM_DISCOVERY_PARENT_IDX; else if (attr == &dev_attr_sess_discovery_parent_type.attr) param = ISCSI_PARAM_DISCOVERY_PARENT_TYPE; else if (attr == &dev_attr_priv_sess_recovery_tmo.attr) return S_IRUGO | S_IWUSR; else if (attr == &dev_attr_priv_sess_state.attr) return S_IRUGO; else if (attr == &dev_attr_priv_sess_target_state.attr) return S_IRUGO; else if (attr == &dev_attr_priv_sess_creator.attr) return S_IRUGO; else if (attr == &dev_attr_priv_sess_target_id.attr) return S_IRUGO; else { WARN_ONCE(1, "Invalid session attr"); return 0; } return t->attr_is_visible(ISCSI_PARAM, param); } static struct attribute_group iscsi_session_group = { .attrs = iscsi_session_attrs, .is_visible = iscsi_session_attr_is_visible, }; /* * iSCSI host attrs */ #define iscsi_host_attr_show(param) \ static ssize_t \ show_host_param_##param(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct Scsi_Host *shost = transport_class_to_shost(dev); \ struct iscsi_internal *priv = to_iscsi_internal(shost->transportt); \ return priv->iscsi_transport->get_host_param(shost, param, buf); \ } #define iscsi_host_attr(field, param) \ iscsi_host_attr_show(param) \ static ISCSI_CLASS_ATTR(host, field, S_IRUGO, show_host_param_##param, \ NULL); iscsi_host_attr(netdev, ISCSI_HOST_PARAM_NETDEV_NAME); iscsi_host_attr(hwaddress, ISCSI_HOST_PARAM_HWADDRESS); iscsi_host_attr(ipaddress, ISCSI_HOST_PARAM_IPADDRESS); iscsi_host_attr(initiatorname, ISCSI_HOST_PARAM_INITIATOR_NAME); iscsi_host_attr(port_state, ISCSI_HOST_PARAM_PORT_STATE); iscsi_host_attr(port_speed, ISCSI_HOST_PARAM_PORT_SPEED); static struct attribute *iscsi_host_attrs[] = { &dev_attr_host_netdev.attr, &dev_attr_host_hwaddress.attr, &dev_attr_host_ipaddress.attr, &dev_attr_host_initiatorname.attr, &dev_attr_host_port_state.attr, &dev_attr_host_port_speed.attr, NULL, }; static umode_t iscsi_host_attr_is_visible(struct kobject *kobj, struct attribute *attr, int i) { struct device *cdev = container_of(kobj, struct device, kobj); struct Scsi_Host *shost = transport_class_to_shost(cdev); struct iscsi_internal *priv = to_iscsi_internal(shost->transportt); int param; if (attr == &dev_attr_host_netdev.attr) param = ISCSI_HOST_PARAM_NETDEV_NAME; else if (attr == &dev_attr_host_hwaddress.attr) param = ISCSI_HOST_PARAM_HWADDRESS; else if (attr == &dev_attr_host_ipaddress.attr) param = ISCSI_HOST_PARAM_IPADDRESS; else if (attr == &dev_attr_host_initiatorname.attr) param = ISCSI_HOST_PARAM_INITIATOR_NAME; else if (attr == &dev_attr_host_port_state.attr) param = ISCSI_HOST_PARAM_PORT_STATE; else if (attr == &dev_attr_host_port_speed.attr) param = ISCSI_HOST_PARAM_PORT_SPEED; else { WARN_ONCE(1, "Invalid host attr"); return 0; } return priv->iscsi_transport->attr_is_visible(ISCSI_HOST_PARAM, param); } static struct attribute_group iscsi_host_group = { .attrs = iscsi_host_attrs, .is_visible = iscsi_host_attr_is_visible, }; /* convert iscsi_port_speed values to ascii string name */ static const struct { enum iscsi_port_speed value; char *name; } iscsi_port_speed_names[] = { {ISCSI_PORT_SPEED_UNKNOWN, "Unknown" }, {ISCSI_PORT_SPEED_10MBPS, "10 Mbps" }, {ISCSI_PORT_SPEED_100MBPS, "100 Mbps" }, {ISCSI_PORT_SPEED_1GBPS, "1 Gbps" }, {ISCSI_PORT_SPEED_10GBPS, "10 Gbps" }, {ISCSI_PORT_SPEED_25GBPS, "25 Gbps" }, {ISCSI_PORT_SPEED_40GBPS, "40 Gbps" }, }; char *iscsi_get_port_speed_name(struct Scsi_Host *shost) { int i; char *speed = "Unknown!"; struct iscsi_cls_host *ihost = shost->shost_data; uint32_t port_speed = ihost->port_speed; for (i = 0; i < ARRAY_SIZE(iscsi_port_speed_names); i++) { if (iscsi_port_speed_names[i].value & port_speed) { speed = iscsi_port_speed_names[i].name; break; } } return speed; } EXPORT_SYMBOL_GPL(iscsi_get_port_speed_name); /* convert iscsi_port_state values to ascii string name */ static const struct { enum iscsi_port_state value; char *name; } iscsi_port_state_names[] = { {ISCSI_PORT_STATE_DOWN, "LINK DOWN" }, {ISCSI_PORT_STATE_UP, "LINK UP" }, }; char *iscsi_get_port_state_name(struct Scsi_Host *shost) { int i; char *state = "Unknown!"; struct iscsi_cls_host *ihost = shost->shost_data; uint32_t port_state = ihost->port_state; for (i = 0; i < ARRAY_SIZE(iscsi_port_state_names); i++) { if (iscsi_port_state_names[i].value & port_state) { state = iscsi_port_state_names[i].name; break; } } return state; } EXPORT_SYMBOL_GPL(iscsi_get_port_state_name); static int iscsi_session_match(struct attribute_container *cont, struct device *dev) { struct iscsi_cls_session *session; struct Scsi_Host *shost; struct iscsi_internal *priv; if (!iscsi_is_session_dev(dev)) return 0; session = iscsi_dev_to_session(dev); shost = iscsi_session_to_shost(session); if (!shost->transportt) return 0; priv = to_iscsi_internal(shost->transportt); if (priv->session_cont.ac.class != &iscsi_session_class.class) return 0; return &priv->session_cont.ac == cont; } static int iscsi_conn_match(struct attribute_container *cont, struct device *dev) { struct iscsi_cls_session *session; struct iscsi_cls_conn *conn; struct Scsi_Host *shost; struct iscsi_internal *priv; if (!iscsi_is_conn_dev(dev)) return 0; conn = iscsi_dev_to_conn(dev); session = iscsi_dev_to_session(conn->dev.parent); shost = iscsi_session_to_shost(session); if (!shost->transportt) return 0; priv = to_iscsi_internal(shost->transportt); if (priv->conn_cont.ac.class != &iscsi_connection_class.class) return 0; return &priv->conn_cont.ac == cont; } static int iscsi_host_match(struct attribute_container *cont, struct device *dev) { struct Scsi_Host *shost; struct iscsi_internal *priv; if (!scsi_is_host_device(dev)) return 0; shost = dev_to_shost(dev); if (!shost->transportt || shost->transportt->host_attrs.ac.class != &iscsi_host_class.class) return 0; priv = to_iscsi_internal(shost->transportt); return &priv->t.host_attrs.ac == cont; } struct scsi_transport_template * iscsi_register_transport(struct iscsi_transport *tt) { struct iscsi_internal *priv; unsigned long flags; int err; BUG_ON(!tt); WARN_ON(tt->ep_disconnect && !tt->unbind_conn); priv = iscsi_if_transport_lookup(tt); if (priv) return NULL; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return NULL; INIT_LIST_HEAD(&priv->list); priv->iscsi_transport = tt; priv->t.user_scan = iscsi_user_scan; priv->dev.class = &iscsi_transport_class; dev_set_name(&priv->dev, "%s", tt->name); err = device_register(&priv->dev); if (err) goto put_dev; err = sysfs_create_group(&priv->dev.kobj, &iscsi_transport_group); if (err) goto unregister_dev; /* host parameters */ priv->t.host_attrs.ac.class = &iscsi_host_class.class; priv->t.host_attrs.ac.match = iscsi_host_match; priv->t.host_attrs.ac.grp = &iscsi_host_group; priv->t.host_size = sizeof(struct iscsi_cls_host); transport_container_register(&priv->t.host_attrs); /* connection parameters */ priv->conn_cont.ac.class = &iscsi_connection_class.class; priv->conn_cont.ac.match = iscsi_conn_match; priv->conn_cont.ac.grp = &iscsi_conn_group; transport_container_register(&priv->conn_cont); /* session parameters */ priv->session_cont.ac.class = &iscsi_session_class.class; priv->session_cont.ac.match = iscsi_session_match; priv->session_cont.ac.grp = &iscsi_session_group; transport_container_register(&priv->session_cont); spin_lock_irqsave(&iscsi_transport_lock, flags); list_add(&priv->list, &iscsi_transports); spin_unlock_irqrestore(&iscsi_transport_lock, flags); printk(KERN_NOTICE "iscsi: registered transport (%s)\n", tt->name); return &priv->t; unregister_dev: device_unregister(&priv->dev); return NULL; put_dev: put_device(&priv->dev); return NULL; } EXPORT_SYMBOL_GPL(iscsi_register_transport); void iscsi_unregister_transport(struct iscsi_transport *tt) { struct iscsi_internal *priv; unsigned long flags; BUG_ON(!tt); mutex_lock(&rx_queue_mutex); priv = iscsi_if_transport_lookup(tt); BUG_ON (!priv); spin_lock_irqsave(&iscsi_transport_lock, flags); list_del(&priv->list); spin_unlock_irqrestore(&iscsi_transport_lock, flags); transport_container_unregister(&priv->conn_cont); transport_container_unregister(&priv->session_cont); transport_container_unregister(&priv->t.host_attrs); sysfs_remove_group(&priv->dev.kobj, &iscsi_transport_group); device_unregister(&priv->dev); mutex_unlock(&rx_queue_mutex); } EXPORT_SYMBOL_GPL(iscsi_unregister_transport); void iscsi_dbg_trace(void (*trace)(struct device *dev, struct va_format *), struct device *dev, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; trace(dev, &vaf); va_end(args); } EXPORT_SYMBOL_GPL(iscsi_dbg_trace); static __init int iscsi_transport_init(void) { int err; struct netlink_kernel_cfg cfg = { .groups = 1, .input = iscsi_if_rx, }; printk(KERN_INFO "Loading iSCSI transport class v%s.\n", ISCSI_TRANSPORT_VERSION); atomic_set(&iscsi_session_nr, 0); err = class_register(&iscsi_transport_class); if (err) return err; err = class_register(&iscsi_endpoint_class); if (err) goto unregister_transport_class; err = class_register(&iscsi_iface_class); if (err) goto unregister_endpoint_class; err = transport_class_register(&iscsi_host_class); if (err) goto unregister_iface_class; err = transport_class_register(&iscsi_connection_class); if (err) goto unregister_host_class; err = transport_class_register(&iscsi_session_class); if (err) goto unregister_conn_class; err = bus_register(&iscsi_flashnode_bus); if (err) goto unregister_session_class; nls = netlink_kernel_create(&init_net, NETLINK_ISCSI, &cfg); if (!nls) { err = -ENOBUFS; goto unregister_flashnode_bus; } iscsi_conn_cleanup_workq = alloc_workqueue("%s", WQ_SYSFS | WQ_MEM_RECLAIM | WQ_UNBOUND, 0, "iscsi_conn_cleanup"); if (!iscsi_conn_cleanup_workq) { err = -ENOMEM; goto release_nls; } return 0; release_nls: netlink_kernel_release(nls); unregister_flashnode_bus: bus_unregister(&iscsi_flashnode_bus); unregister_session_class: transport_class_unregister(&iscsi_session_class); unregister_conn_class: transport_class_unregister(&iscsi_connection_class); unregister_host_class: transport_class_unregister(&iscsi_host_class); unregister_iface_class: class_unregister(&iscsi_iface_class); unregister_endpoint_class: class_unregister(&iscsi_endpoint_class); unregister_transport_class: class_unregister(&iscsi_transport_class); return err; } static void __exit iscsi_transport_exit(void) { destroy_workqueue(iscsi_conn_cleanup_workq); netlink_kernel_release(nls); bus_unregister(&iscsi_flashnode_bus); transport_class_unregister(&iscsi_connection_class); transport_class_unregister(&iscsi_session_class); transport_class_unregister(&iscsi_host_class); class_unregister(&iscsi_endpoint_class); class_unregister(&iscsi_iface_class); class_unregister(&iscsi_transport_class); } module_init(iscsi_transport_init); module_exit(iscsi_transport_exit); MODULE_AUTHOR("Mike Christie <michaelc@cs.wisc.edu>, " "Dmitry Yusupov <dmitry_yus@yahoo.com>, " "Alex Aizman <itn780@yahoo.com>"); MODULE_DESCRIPTION("iSCSI Transport Interface"); MODULE_LICENSE("GPL"); MODULE_VERSION(ISCSI_TRANSPORT_VERSION); MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_ISCSI);
8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2007-2012 Siemens AG * * Written by: * Pavel Smolenskiy <pavel.smolenskiy@gmail.com> * Maxim Gorbachyov <maxim.gorbachev@siemens.com> * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com> * Alexander Smirnov <alex.bluesman.smirnov@gmail.com> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/crc-ccitt.h> #include <linux/unaligned.h> #include <net/mac802154.h> #include <net/ieee802154_netdev.h> #include <net/nl802154.h> #include "ieee802154_i.h" static int ieee802154_deliver_skb(struct sk_buff *skb) { skb->ip_summed = CHECKSUM_UNNECESSARY; skb->protocol = htons(ETH_P_IEEE802154); return netif_receive_skb(skb); } void mac802154_rx_beacon_worker(struct work_struct *work) { struct ieee802154_local *local = container_of(work, struct ieee802154_local, rx_beacon_work); struct cfg802154_mac_pkt *mac_pkt; mac_pkt = list_first_entry_or_null(&local->rx_beacon_list, struct cfg802154_mac_pkt, node); if (!mac_pkt) return; mac802154_process_beacon(local, mac_pkt->skb, mac_pkt->page, mac_pkt->channel); list_del(&mac_pkt->node); kfree_skb(mac_pkt->skb); kfree(mac_pkt); } static bool mac802154_should_answer_beacon_req(struct ieee802154_local *local) { struct cfg802154_beacon_request *beacon_req; unsigned int interval; rcu_read_lock(); beacon_req = rcu_dereference(local->beacon_req); if (!beacon_req) { rcu_read_unlock(); return false; } interval = beacon_req->interval; rcu_read_unlock(); if (!mac802154_is_beaconing(local)) return false; return interval == IEEE802154_ACTIVE_SCAN_DURATION; } void mac802154_rx_mac_cmd_worker(struct work_struct *work) { struct ieee802154_local *local = container_of(work, struct ieee802154_local, rx_mac_cmd_work); struct cfg802154_mac_pkt *mac_pkt; u8 mac_cmd; int rc; mac_pkt = list_first_entry_or_null(&local->rx_mac_cmd_list, struct cfg802154_mac_pkt, node); if (!mac_pkt) return; rc = ieee802154_get_mac_cmd(mac_pkt->skb, &mac_cmd); if (rc) goto out; switch (mac_cmd) { case IEEE802154_CMD_BEACON_REQ: dev_dbg(&mac_pkt->sdata->dev->dev, "processing BEACON REQ\n"); if (!mac802154_should_answer_beacon_req(local)) break; queue_delayed_work(local->mac_wq, &local->beacon_work, 0); break; case IEEE802154_CMD_ASSOCIATION_RESP: dev_dbg(&mac_pkt->sdata->dev->dev, "processing ASSOC RESP\n"); if (!mac802154_is_associating(local)) break; mac802154_process_association_resp(mac_pkt->sdata, mac_pkt->skb); break; case IEEE802154_CMD_ASSOCIATION_REQ: dev_dbg(&mac_pkt->sdata->dev->dev, "processing ASSOC REQ\n"); if (mac_pkt->sdata->wpan_dev.iftype != NL802154_IFTYPE_COORD) break; mac802154_process_association_req(mac_pkt->sdata, mac_pkt->skb); break; case IEEE802154_CMD_DISASSOCIATION_NOTIFY: dev_dbg(&mac_pkt->sdata->dev->dev, "processing DISASSOC NOTIF\n"); if (mac_pkt->sdata->wpan_dev.iftype != NL802154_IFTYPE_COORD) break; mac802154_process_disassociation_notif(mac_pkt->sdata, mac_pkt->skb); break; default: break; } out: list_del(&mac_pkt->node); kfree_skb(mac_pkt->skb); kfree(mac_pkt); } static int ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, struct sk_buff *skb, const struct ieee802154_hdr *hdr) { struct wpan_phy *wpan_phy = sdata->local->hw.phy; struct wpan_dev *wpan_dev = &sdata->wpan_dev; struct cfg802154_mac_pkt *mac_pkt; __le16 span, sshort; int rc; pr_debug("getting packet via slave interface %s\n", sdata->dev->name); span = wpan_dev->pan_id; sshort = wpan_dev->short_addr; /* Level 3 filtering: Only beacons are accepted during scans */ if (sdata->required_filtering == IEEE802154_FILTERING_3_SCAN && sdata->required_filtering > wpan_phy->filtering) { if (mac_cb(skb)->type != IEEE802154_FC_TYPE_BEACON) { dev_dbg(&sdata->dev->dev, "drop non-beacon frame (0x%x) during scan\n", mac_cb(skb)->type); goto fail; } } switch (mac_cb(skb)->dest.mode) { case IEEE802154_ADDR_NONE: if (hdr->source.mode == IEEE802154_ADDR_NONE) /* ACK comes with both addresses empty */ skb->pkt_type = PACKET_HOST; else if (!wpan_dev->parent) /* No dest means PAN coordinator is the recipient */ skb->pkt_type = PACKET_HOST; else /* We are not the PAN coordinator, just relaying */ skb->pkt_type = PACKET_OTHERHOST; break; case IEEE802154_ADDR_LONG: if (mac_cb(skb)->dest.pan_id != span && mac_cb(skb)->dest.pan_id != cpu_to_le16(IEEE802154_PANID_BROADCAST)) skb->pkt_type = PACKET_OTHERHOST; else if (mac_cb(skb)->dest.extended_addr == wpan_dev->extended_addr) skb->pkt_type = PACKET_HOST; else skb->pkt_type = PACKET_OTHERHOST; break; case IEEE802154_ADDR_SHORT: if (mac_cb(skb)->dest.pan_id != span && mac_cb(skb)->dest.pan_id != cpu_to_le16(IEEE802154_PANID_BROADCAST)) skb->pkt_type = PACKET_OTHERHOST; else if (mac_cb(skb)->dest.short_addr == sshort) skb->pkt_type = PACKET_HOST; else if (mac_cb(skb)->dest.short_addr == cpu_to_le16(IEEE802154_ADDR_BROADCAST)) skb->pkt_type = PACKET_BROADCAST; else skb->pkt_type = PACKET_OTHERHOST; break; default: pr_debug("invalid dest mode\n"); goto fail; } skb->dev = sdata->dev; /* TODO this should be moved after netif_receive_skb call, otherwise * wireshark will show a mac header with security fields and the * payload is already decrypted. */ rc = mac802154_llsec_decrypt(&sdata->sec, skb); if (rc) { pr_debug("decryption failed: %i\n", rc); goto fail; } sdata->dev->stats.rx_packets++; sdata->dev->stats.rx_bytes += skb->len; switch (mac_cb(skb)->type) { case IEEE802154_FC_TYPE_BEACON: dev_dbg(&sdata->dev->dev, "BEACON received\n"); if (!mac802154_is_scanning(sdata->local)) goto fail; mac_pkt = kzalloc(sizeof(*mac_pkt), GFP_ATOMIC); if (!mac_pkt) goto fail; mac_pkt->skb = skb_get(skb); mac_pkt->sdata = sdata; mac_pkt->page = sdata->local->scan_page; mac_pkt->channel = sdata->local->scan_channel; list_add_tail(&mac_pkt->node, &sdata->local->rx_beacon_list); queue_work(sdata->local->mac_wq, &sdata->local->rx_beacon_work); return NET_RX_SUCCESS; case IEEE802154_FC_TYPE_MAC_CMD: dev_dbg(&sdata->dev->dev, "MAC COMMAND received\n"); mac_pkt = kzalloc(sizeof(*mac_pkt), GFP_ATOMIC); if (!mac_pkt) goto fail; mac_pkt->skb = skb_get(skb); mac_pkt->sdata = sdata; list_add_tail(&mac_pkt->node, &sdata->local->rx_mac_cmd_list); queue_work(sdata->local->mac_wq, &sdata->local->rx_mac_cmd_work); return NET_RX_SUCCESS; case IEEE802154_FC_TYPE_ACK: goto fail; case IEEE802154_FC_TYPE_DATA: return ieee802154_deliver_skb(skb); default: pr_warn_ratelimited("ieee802154: bad frame received " "(type = %d)\n", mac_cb(skb)->type); goto fail; } fail: kfree_skb(skb); return NET_RX_DROP; } static void ieee802154_print_addr(const char *name, const struct ieee802154_addr *addr) { if (addr->mode == IEEE802154_ADDR_NONE) { pr_debug("%s not present\n", name); return; } pr_debug("%s PAN ID: %04x\n", name, le16_to_cpu(addr->pan_id)); if (addr->mode == IEEE802154_ADDR_SHORT) { pr_debug("%s is short: %04x\n", name, le16_to_cpu(addr->short_addr)); } else { u64 hw = swab64((__force u64)addr->extended_addr); pr_debug("%s is hardware: %8phC\n", name, &hw); } } static int ieee802154_parse_frame_start(struct sk_buff *skb, struct ieee802154_hdr *hdr) { int hlen; struct ieee802154_mac_cb *cb = mac_cb(skb); skb_reset_mac_header(skb); hlen = ieee802154_hdr_pull(skb, hdr); if (hlen < 0) return -EINVAL; skb->mac_len = hlen; pr_debug("fc: %04x dsn: %02x\n", le16_to_cpup((__le16 *)&hdr->fc), hdr->seq); cb->type = hdr->fc.type; cb->ackreq = hdr->fc.ack_request; cb->secen = hdr->fc.security_enabled; ieee802154_print_addr("destination", &hdr->dest); ieee802154_print_addr("source", &hdr->source); cb->source = hdr->source; cb->dest = hdr->dest; if (hdr->fc.security_enabled) { u64 key; pr_debug("seclevel %i\n", hdr->sec.level); switch (hdr->sec.key_id_mode) { case IEEE802154_SCF_KEY_IMPLICIT: pr_debug("implicit key\n"); break; case IEEE802154_SCF_KEY_INDEX: pr_debug("key %02x\n", hdr->sec.key_id); break; case IEEE802154_SCF_KEY_SHORT_INDEX: pr_debug("key %04x:%04x %02x\n", le32_to_cpu(hdr->sec.short_src) >> 16, le32_to_cpu(hdr->sec.short_src) & 0xffff, hdr->sec.key_id); break; case IEEE802154_SCF_KEY_HW_INDEX: key = swab64((__force u64)hdr->sec.extended_src); pr_debug("key source %8phC %02x\n", &key, hdr->sec.key_id); break; } } return 0; } static void __ieee802154_rx_handle_packet(struct ieee802154_local *local, struct sk_buff *skb) { int ret; struct ieee802154_sub_if_data *sdata; struct ieee802154_hdr hdr; struct sk_buff *skb2; ret = ieee802154_parse_frame_start(skb, &hdr); if (ret) { pr_debug("got invalid frame\n"); return; } list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (sdata->wpan_dev.iftype == NL802154_IFTYPE_MONITOR) continue; if (!ieee802154_sdata_running(sdata)) continue; /* Do not deliver packets received on interfaces expecting * AACK=1 if the address filters where disabled. */ if (local->hw.phy->filtering < IEEE802154_FILTERING_4_FRAME_FIELDS && sdata->required_filtering == IEEE802154_FILTERING_4_FRAME_FIELDS) continue; skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) { skb2->dev = sdata->dev; ieee802154_subif_frame(sdata, skb2, &hdr); } } } static void ieee802154_monitors_rx(struct ieee802154_local *local, struct sk_buff *skb) { struct sk_buff *skb2; struct ieee802154_sub_if_data *sdata; skb_reset_mac_header(skb); skb->ip_summed = CHECKSUM_UNNECESSARY; skb->pkt_type = PACKET_OTHERHOST; skb->protocol = htons(ETH_P_IEEE802154); list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (sdata->wpan_dev.iftype != NL802154_IFTYPE_MONITOR) continue; if (!ieee802154_sdata_running(sdata)) continue; skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) { skb2->dev = sdata->dev; ieee802154_deliver_skb(skb2); sdata->dev->stats.rx_packets++; sdata->dev->stats.rx_bytes += skb->len; } } } void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb) { u16 crc; WARN_ON_ONCE(softirq_count() == 0); if (local->suspended) goto free_skb; /* TODO: When a transceiver omits the checksum here, we * add an own calculated one. This is currently an ugly * solution because the monitor needs a crc here. */ if (local->hw.flags & IEEE802154_HW_RX_OMIT_CKSUM) { crc = crc_ccitt(0, skb->data, skb->len); put_unaligned_le16(crc, skb_put(skb, 2)); } rcu_read_lock(); ieee802154_monitors_rx(local, skb); /* Level 1 filtering: Check the FCS by software when relevant */ if (local->hw.phy->filtering == IEEE802154_FILTERING_NONE) { crc = crc_ccitt(0, skb->data, skb->len); if (crc) goto drop; } /* remove crc */ skb_trim(skb, skb->len - 2); __ieee802154_rx_handle_packet(local, skb); drop: rcu_read_unlock(); free_skb: kfree_skb(skb); } void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi) { struct ieee802154_local *local = hw_to_local(hw); struct ieee802154_mac_cb *cb = mac_cb_init(skb); cb->lqi = lqi; skb->pkt_type = IEEE802154_RX_MSG; skb_queue_tail(&local->skb_queue, skb); tasklet_schedule(&local->tasklet); } EXPORT_SYMBOL(ieee802154_rx_irqsafe);
398 396 399 1 1 323 110 69 1 55 55 1 107 100 7 393 392 394 392 283 184 185 1 1 392 393 393 26 406 26 46 1 380 404 407 399 405 403 3 390 3 224 226 225 214 11 217 10 1 8 213 5 6 6 5 221 8 224 226 225 224 225 223 225 226 224 3 10 10 10 10 35 34 35 35 35 8 7 7 7 8 8 7 7 8 6 4 27 26 26 27 24 27 46 46 46 4 42 37 3 1 2 37 4 35 21 14 1 13 9 2 12 9 8 7 8 7 7 7 1 5 24 3 24 24 16 32 1 82 49 1 32 26 6 432 259 319 320 322 322 315 3 270 77 269 291 83 384 381 379 380 382 2 3 384 1 15 74 48 357 82 349 21 82 82 82 61 55 5 33 26 30 182 185 186 156 33 33 33 33 33 32 186 179 6 1 205 36 186 210 205 2 1 205 3 208 4 174 41 36 25 4 8 3 6 4 171 4 200 1 2 204 197 29 13 185 184 29 186 24 164 1 1 89 111 185 182 1 183 97 2 184 161 25 114 89 37 8 5 4 1 1 3 30 30 29 30 29 4 109 109 1 10 9 1 172 174 6 36 142 144 168 209 175 41 194 22 181 2 178 2 180 29 181 181 178 2 4 33 180 181 181 179 181 179 144 57 10 49 10 58 177 177 174 170 7 20 175 128 131 1 129 40 37 13 31 31 49 48 2 9 44 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 output functions * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Based on linux/net/ipv4/ip_output.c * * Changes: * A.N.Kuznetsov : airthmetics in fragmentation. * extension headers are implemented. * route changes now work. * ip6_forward does not confuse sniffers. * etc. * * H. von Brand : Added missing #include <linux/string.h> * Imran Patel : frag id should be in NBO * Kazunori MIYAZAWA @USAGI * : add ip6_append_data and related functions * for datagram xmit */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/socket.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/in6.h> #include <linux/tcp.h> #include <linux/route.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/bpf-cgroup.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> #include <net/sock.h> #include <net/snmp.h> #include <net/gso.h> #include <net/ipv6.h> #include <net/ndisc.h> #include <net/protocol.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/rawv6.h> #include <net/icmp.h> #include <net/xfrm.h> #include <net/checksum.h> #include <linux/mroute6.h> #include <net/l3mdev.h> #include <net/lwtunnel.h> #include <net/ip_tunnels.h> static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst_dev(dst); struct inet6_dev *idev = ip6_dst_idev(dst); unsigned int hh_len = LL_RESERVED_SPACE(dev); const struct in6_addr *daddr, *nexthop; struct ipv6hdr *hdr; struct neighbour *neigh; int ret; /* Be paranoid, rather than too clever. */ if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { /* Make sure idev stays alive */ rcu_read_lock(); skb = skb_expand_head(skb, hh_len); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); return -ENOMEM; } rcu_read_unlock(); } hdr = ipv6_hdr(skb); daddr = &hdr->daddr; if (ipv6_addr_is_multicast(daddr)) { if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && ((mroute6_is_socket(net, skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); /* Do not check for IFF_ALLMULTI; multicast routing is not supported in any case. */ if (newskb) NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, newskb, NULL, newskb->dev, dev_loopback_xmit); if (hdr->hop_limit == 0) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } } IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len); if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL && !(dev->flags & IFF_LOOPBACK)) { kfree_skb(skb); return 0; } } if (lwtunnel_xmit_redirect(dst->lwtstate)) { int res = lwtunnel_xmit(skb); if (res != LWTUNNEL_XMIT_CONTINUE) return res; } IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); rcu_read_lock(); nexthop = rt6_nexthop(dst_rt6_info(dst), daddr); neigh = __ipv6_neigh_lookup_noref(dev, nexthop); if (IS_ERR_OR_NULL(neigh)) { if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dev, false); if (IS_ERR(neigh)) { rcu_read_unlock(); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES); kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL); return -EINVAL; } } sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); rcu_read_unlock(); return ret; } static int ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int mtu) { struct sk_buff *segs, *nskb; netdev_features_t features; int ret = 0; /* Please see corresponding comment in ip_finish_output_gso * describing the cases where GSO segment length exceeds the * egress MTU. */ features = netif_skb_features(skb); segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) { kfree_skb(skb); return -ENOMEM; } consume_skb(skb); skb_list_walk_safe(segs, segs, nskb) { int err; skb_mark_not_on_list(segs); /* Last GSO segment can be smaller than gso_size (and MTU). * Adding a fragment header would produce an "atomic fragment", * which is considered harmful (RFC-8021). Avoid that. */ err = segs->len > mtu ? ip6_fragment(net, sk, segs, ip6_finish_output2) : ip6_finish_output2(net, sk, segs); if (err && ret == 0) ret = err; } return ret; } static int ip6_finish_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int mtu) { if (!(IP6CB(skb)->flags & IP6SKB_FAKEJUMBO) && !skb_gso_validate_network_len(skb, mtu)) return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu); return ip6_finish_output2(net, sk, skb); } static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { unsigned int mtu; #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { IP6CB(skb)->flags |= IP6SKB_REROUTED; return dst_output(net, sk, skb); } #endif mtu = ip6_skb_dst_mtu(skb); if (skb_is_gso(skb)) return ip6_finish_output_gso(net, sk, skb, mtu); if (skb->len > mtu || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(net, sk, skb, ip6_finish_output2); return ip6_finish_output2(net, sk, skb); } static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { int ret; ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb); switch (ret) { case NET_XMIT_SUCCESS: case NET_XMIT_CN: return __ip6_finish_output(net, sk, skb) ? : ret; default: kfree_skb_reason(skb, SKB_DROP_REASON_BPF_CGROUP_EGRESS); return ret; } } int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst_dev(dst), *indev = skb->dev; struct inet6_dev *idev = ip6_dst_idev(dst); skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; if (unlikely(!idev || READ_ONCE(idev->cnf.disable_ipv6))) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb_reason(skb, SKB_DROP_REASON_IPV6DISABLED); return 0; } return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, indev, dev, ip6_finish_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } EXPORT_SYMBOL(ip6_output); bool ip6_autoflowlabel(struct net *net, const struct sock *sk) { if (!inet6_test_bit(AUTOFLOWLABEL_SET, sk)) return ip6_default_np_autolabel(net); return inet6_test_bit(AUTOFLOWLABEL, sk); } /* * xmit an sk_buff (used by TCP and SCTP) * Note : socket lock is not held for SYNACK packets, but might be modified * by calls to skb_set_owner_w() and ipv6_local_error(), * which are using proper atomic operations or spinlocks. */ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority) { struct net *net = sock_net(sk); const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst_dev(dst); struct inet6_dev *idev = ip6_dst_idev(dst); struct hop_jumbo_hdr *hop_jumbo; int hoplen = sizeof(*hop_jumbo); unsigned int head_room; struct ipv6hdr *hdr; u8 proto = fl6->flowi6_proto; int seg_len = skb->len; int hlimit = -1; u32 mtu; head_room = sizeof(struct ipv6hdr) + hoplen + LL_RESERVED_SPACE(dev); if (opt) head_room += opt->opt_nflen + opt->opt_flen; if (unlikely(head_room > skb_headroom(skb))) { /* Make sure idev stays alive */ rcu_read_lock(); skb = skb_expand_head(skb, head_room); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); return -ENOBUFS; } rcu_read_unlock(); } if (opt) { seg_len += opt->opt_nflen + opt->opt_flen; if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop, &fl6->saddr); } if (unlikely(seg_len > IPV6_MAXPLEN)) { hop_jumbo = skb_push(skb, hoplen); hop_jumbo->nexthdr = proto; hop_jumbo->hdrlen = 0; hop_jumbo->tlv_type = IPV6_TLV_JUMBO; hop_jumbo->tlv_len = 4; hop_jumbo->jumbo_payload_len = htonl(seg_len + hoplen); proto = IPPROTO_HOPOPTS; seg_len = 0; IP6CB(skb)->flags |= IP6SKB_FAKEJUMBO; } skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); /* * Fill in the IPv6 header */ if (np) hlimit = READ_ONCE(np->hop_limit); if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, ip6_autoflowlabel(net, sk), fl6)); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; hdr->hop_limit = hlimit; hdr->saddr = fl6->saddr; hdr->daddr = *first_hop; skb->protocol = htons(ETH_P_IPV6); skb->priority = priority; skb->mark = mark; mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); /* if egress device is enslaved to an L3 master device pass the * skb to its handler for processing */ skb = l3mdev_ip6_out((struct sock *)sk, skb); if (unlikely(!skb)) return 0; /* hooks should never assume socket lock is held. * we promote our socket to non const */ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, (struct sock *)sk, skb, NULL, dev, dst_output); } skb->dev = dev; /* ipv6_local_error() does not require socket lock, * we promote our socket to non const */ ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu); IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } EXPORT_SYMBOL(ip6_xmit); static int ip6_call_ra_chain(struct sk_buff *skb, int sel) { struct ip6_ra_chain *ra; struct sock *last = NULL; read_lock(&ip6_ra_lock); for (ra = ip6_ra_chain; ra; ra = ra->next) { struct sock *sk = ra->sk; if (sk && ra->sel == sel && (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { if (inet6_test_bit(RTALERT_ISOLATE, sk) && !net_eq(sock_net(sk), dev_net(skb->dev))) { continue; } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) rawv6_rcv(last, skb2); } last = sk; } } if (last) { rawv6_rcv(last, skb); read_unlock(&ip6_ra_lock); return 1; } read_unlock(&ip6_ra_lock); return 0; } static int ip6_forward_proxy_check(struct sk_buff *skb) { struct ipv6hdr *hdr = ipv6_hdr(skb); u8 nexthdr = hdr->nexthdr; __be16 frag_off; int offset; if (ipv6_ext_hdr(nexthdr)) { offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); if (offset < 0) return 0; } else offset = sizeof(struct ipv6hdr); if (nexthdr == IPPROTO_ICMPV6) { struct icmp6hdr *icmp6; if (!pskb_may_pull(skb, (skb_network_header(skb) + offset + 1 - skb->data))) return 0; icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset); switch (icmp6->icmp6_type) { case NDISC_ROUTER_SOLICITATION: case NDISC_ROUTER_ADVERTISEMENT: case NDISC_NEIGHBOUR_SOLICITATION: case NDISC_NEIGHBOUR_ADVERTISEMENT: case NDISC_REDIRECT: /* For reaction involving unicast neighbor discovery * message destined to the proxied address, pass it to * input function. */ return 1; default: break; } } /* * The proxying router can't forward traffic sent to a link-local * address, so signal the sender and discard the packet. This * behavior is clarified by the MIPv6 specification. */ if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { dst_link_failure(skb); return -1; } return 0; } static inline int ip6_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { #ifdef CONFIG_NET_SWITCHDEV if (skb->offload_l3_fwd_mark) { consume_skb(skb); return 0; } #endif skb_clear_tstamp(skb); return dst_output(net, sk, skb); } static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) { if (skb->len <= mtu) return false; /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) return true; if (skb->ignore_df) return false; if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) return false; return true; } int ip6_forward(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst_dev(dst)); struct net_device *dev; struct inet6_dev *idev; SKB_DR(reason); u32 mtu; idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); if (!READ_ONCE(net->ipv6.devconf_all->forwarding) && (!idev || !READ_ONCE(idev->cnf.force_forwarding))) goto error; if (skb->pkt_type != PACKET_HOST) goto drop; if (unlikely(skb->sk)) goto drop; if (skb_warn_if_lro(skb)) goto drop; if (!READ_ONCE(net->ipv6.devconf_all->disable_policy) && (!idev || !READ_ONCE(idev->cnf.disable_policy)) && !xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } skb_forward_csum(skb); /* * We DO NOT make any processing on * RA packets, pushing them to user level AS IS * without ane WARRANTY that application will be able * to interpret them. The reason is that we * cannot make anything clever here. * * We are not end-node, so that if packet contains * AH/ESP, we cannot make anything. * Defragmentation also would be mistake, RA packets * cannot be fragmented, because there is no warranty * that different fragments will go along one path. --ANK */ if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { if (ip6_call_ra_chain(skb, ntohs(opt->ra))) return 0; } /* * check and decrement ttl */ if (hdr->hop_limit <= 1) { icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS); kfree_skb_reason(skb, SKB_DROP_REASON_IP_INHDR); return -ETIMEDOUT; } /* XXX: idev->cnf.proxy_ndp? */ if (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) { /* It's tempting to decrease the hop limit * here by 1, as we do at the end of the * function too. * * But that would be incorrect, as proxying is * not forwarding. The ip6_input function * will handle this packet locally, and it * depends on the hop limit being unchanged. * * One example is the NDP hop limit, that * always has to stay 255, but other would be * similar checks around RA packets, where the * user can even change the desired limit. */ return ip6_input(skb); } else if (proxied < 0) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); SKB_DR_SET(reason, XFRM_POLICY); goto drop; } dst = skb_dst(skb); dev = dst_dev(dst); /* IPv6 specs say nothing about it, but it is clear that we cannot send redirects to source routed frames. We don't send redirects to frames decapsulated from IPsec. */ if (IP6CB(skb)->iif == dev->ifindex && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; struct inet_peer *peer; struct rt6_info *rt; /* * incoming and outgoing devices are the same * send a redirect. */ rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) target = &rt->rt6i_gateway; else target = &hdr->daddr; rcu_read_lock(); peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr); /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) */ if (inet_peer_xrlim_allow(peer, 1*HZ)) ndisc_send_redirect(skb, target); rcu_read_unlock(); } else { int addrtype = ipv6_addr_type(&hdr->saddr); /* This check is security critical. */ if (addrtype == IPV6_ADDR_ANY || addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK)) goto error; if (addrtype & IPV6_ADDR_LINKLOCAL) { icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOT_NEIGHBOUR, 0); goto error; } } __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); mtu = ip6_dst_mtu_maybe_forward(dst, true); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (ip6_pkt_too_big(skb, mtu)) { /* Again, force OUTPUT device used as source address */ skb->dev = dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS); __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); kfree_skb_reason(skb, SKB_DROP_REASON_PKT_TOO_BIG); return -EMSGSIZE; } if (skb_cow(skb, dev->hard_header_len)) { __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); goto drop; } hdr = ipv6_hdr(skb); /* Mangling hops number delayed to point after skb COW */ hdr->hop_limit--; return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, net, NULL, skb, skb->dev, dev, ip6_forward_finish); error: __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); SKB_DR_SET(reason, IP_INADDRERRORS); drop: kfree_skb_reason(skb, reason); return -EINVAL; } static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) { to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; skb_dst_drop(to); skb_dst_set(to, dst_clone(skb_dst(from))); to->dev = from->dev; to->mark = from->mark; skb_copy_hash(to, from); #ifdef CONFIG_NET_SCHED to->tc_index = from->tc_index; #endif nf_copy(to, from); skb_ext_copy(to, from); skb_copy_secmark(to, from); } int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr, u8 nexthdr, __be32 frag_id, struct ip6_fraglist_iter *iter) { unsigned int first_len; struct frag_hdr *fh; /* BUILD HEADER */ *prevhdr = NEXTHDR_FRAGMENT; iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!iter->tmp_hdr) return -ENOMEM; iter->frag = skb_shinfo(skb)->frag_list; skb_frag_list_init(skb); iter->offset = 0; iter->hlen = hlen; iter->frag_id = frag_id; iter->nexthdr = nexthdr; __skb_pull(skb, hlen); fh = __skb_push(skb, sizeof(struct frag_hdr)); __skb_push(skb, hlen); skb_reset_network_header(skb); memcpy(skb_network_header(skb), iter->tmp_hdr, hlen); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(IP6_MF); fh->identification = frag_id; first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); skb->len = first_len; ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); return 0; } EXPORT_SYMBOL(ip6_fraglist_init); void ip6_fraglist_prepare(struct sk_buff *skb, struct ip6_fraglist_iter *iter) { struct sk_buff *frag = iter->frag; unsigned int hlen = iter->hlen; struct frag_hdr *fh; frag->ip_summed = CHECKSUM_NONE; skb_reset_transport_header(frag); fh = __skb_push(frag, sizeof(struct frag_hdr)); __skb_push(frag, hlen); skb_reset_network_header(frag); memcpy(skb_network_header(frag), iter->tmp_hdr, hlen); iter->offset += skb->len - hlen - sizeof(struct frag_hdr); fh->nexthdr = iter->nexthdr; fh->reserved = 0; fh->frag_off = htons(iter->offset); if (frag->next) fh->frag_off |= htons(IP6_MF); fh->identification = iter->frag_id; ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); ip6_copy_metadata(frag, skb); } EXPORT_SYMBOL(ip6_fraglist_prepare); void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu, unsigned short needed_tailroom, int hdr_room, u8 *prevhdr, u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state) { state->prevhdr = prevhdr; state->nexthdr = nexthdr; state->frag_id = frag_id; state->hlen = hlen; state->mtu = mtu; state->left = skb->len - hlen; /* Space per frame */ state->ptr = hlen; /* Where to start from */ state->hroom = hdr_room; state->troom = needed_tailroom; state->offset = 0; } EXPORT_SYMBOL(ip6_frag_init); struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state) { u8 *prevhdr = state->prevhdr, *fragnexthdr_offset; struct sk_buff *frag; struct frag_hdr *fh; unsigned int len; len = state->left; /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > state->mtu) len = state->mtu; /* IF: we are not sending up to and including the packet end then align the next start on an eight byte boundary */ if (len < state->left) len &= ~7; /* Allocate buffer */ frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) + state->hroom + state->troom, GFP_ATOMIC); if (!frag) return ERR_PTR(-ENOMEM); /* * Set up data on packet */ ip6_copy_metadata(frag, skb); skb_reserve(frag, state->hroom); skb_put(frag, len + state->hlen + sizeof(struct frag_hdr)); skb_reset_network_header(frag); fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen); frag->transport_header = (frag->network_header + state->hlen + sizeof(struct frag_hdr)); /* * Charge the memory for the fragment to any owner * it might possess */ if (skb->sk) skb_set_owner_w(frag, skb->sk); /* * Copy the packet header into the new buffer. */ skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen); fragnexthdr_offset = skb_network_header(frag); fragnexthdr_offset += prevhdr - skb_network_header(skb); *fragnexthdr_offset = NEXTHDR_FRAGMENT; /* * Build fragment header. */ fh->nexthdr = state->nexthdr; fh->reserved = 0; fh->identification = state->frag_id; /* * Copy a block of the IP datagram. */ BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag), len)); state->left -= len; fh->frag_off = htons(state->offset); if (state->left > 0) fh->frag_off |= htons(IP6_MF); ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr)); state->ptr += len; state->offset += len; return frag; } EXPORT_SYMBOL(ip6_frag_next); int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? inet6_sk(skb->sk) : NULL; u8 tstamp_type = skb->tstamp_type; struct ip6_frag_state state; unsigned int mtu, hlen, nexthdr_offset; ktime_t tstamp = skb->tstamp; int hroom, err = 0; __be32 frag_id; u8 *prevhdr, nexthdr = 0; err = ip6_find_1stfragopt(skb, &prevhdr); if (err < 0) goto fail; hlen = err; nexthdr = *prevhdr; nexthdr_offset = prevhdr - skb_network_header(skb); mtu = ip6_skb_dst_mtu(skb); /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ if (unlikely(!skb->ignore_df && skb->len > mtu)) goto fail_toobig; if (IP6CB(skb)->frag_max_size) { if (IP6CB(skb)->frag_max_size > mtu) goto fail_toobig; /* don't send fragments larger than what we received */ mtu = IP6CB(skb)->frag_max_size; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; } if (np) { u32 frag_size = READ_ONCE(np->frag_size); if (frag_size && frag_size < mtu) mtu = frag_size; } if (mtu < hlen + sizeof(struct frag_hdr) + 8) goto fail_toobig; mtu -= hlen + sizeof(struct frag_hdr); frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr); if (skb->ip_summed == CHECKSUM_PARTIAL && (err = skb_checksum_help(skb))) goto fail; prevhdr = skb_network_header(skb) + nexthdr_offset; hroom = LL_RESERVED_SPACE(rt->dst.dev); if (skb_has_frag_list(skb)) { unsigned int first_len = skb_pagelen(skb); struct ip6_fraglist_iter iter; struct sk_buff *frag2; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || skb_cloned(skb) || skb_headroom(skb) < (hroom + sizeof(struct frag_hdr))) goto slow_path; skb_walk_frags(skb, frag) { /* Correct geometry. */ if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr))) goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; } skb->truesize -= frag->truesize; } err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id, &iter); if (err < 0) goto fail; /* We prevent @rt from being freed. */ rcu_read_lock(); for (;;) { /* Prepare header of the next frame, * before previous one went down. */ if (iter.frag) ip6_fraglist_prepare(skb, &iter); skb_set_delivery_time(skb, tstamp, tstamp_type); err = output(net, sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); if (err || !iter.frag) break; skb = ip6_fraglist_next(&iter); } kfree(iter.tmp_hdr); if (err == 0) { IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); rcu_read_unlock(); return 0; } kfree_skb_list(iter.frag); IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); rcu_read_unlock(); return err; slow_path_clean: skb_walk_frags(skb, frag2) { if (frag2 == frag) break; frag2->sk = NULL; frag2->destructor = NULL; skb->truesize += frag2->truesize; } } slow_path: /* * Fragment the datagram. */ ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom, LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id, &state); /* * Keep copying data until we run out. */ while (state.left > 0) { frag = ip6_frag_next(skb, &state); if (IS_ERR(frag)) { err = PTR_ERR(frag); goto fail; } /* * Put this fragment into the sending queue. */ skb_set_delivery_time(frag, tstamp, tstamp_type); err = output(net, sk, frag); if (err) goto fail; IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGCREATES); } IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGOKS); consume_skb(skb); return err; fail_toobig: icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); err = -EMSGSIZE; fail: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return err; } static inline int ip6_rt_check(const struct rt6key *rt_key, const struct in6_addr *fl_addr, const struct in6_addr *addr_cache) { return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); } static struct dst_entry *ip6_sk_dst_check(struct sock *sk, struct dst_entry *dst, const struct flowi6 *fl6) { struct ipv6_pinfo *np = inet6_sk(sk); struct rt6_info *rt; if (!dst) goto out; if (dst->ops->family != AF_INET6) { dst_release(dst); return NULL; } rt = dst_rt6_info(dst); /* Yes, checking route validity in not connected * case is not very simple. Take into account, * that we do not support routing by source, TOS, * and MSG_DONTROUTE --ANK (980726) * * 1. ip6_rt_check(): If route was host route, * check that cached destination is current. * If it is network route, we still may * check its validity using saved pointer * to the last used address: daddr_cache. * We do not want to save whole address now, * (because main consumer of this service * is tcp, which has not this problem), * so that the last trick works only on connected * sockets. * 2. oif also should be the same. */ if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || #ifdef CONFIG_IPV6_SUBTREES ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif (fl6->flowi6_oif && fl6->flowi6_oif != dst_dev(dst)->ifindex)) { dst_release(dst); dst = NULL; } out: return dst; } static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD struct neighbour *n; struct rt6_info *rt; #endif int err; int flags = 0; /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, * the route-specific preferred source forces the * ip6_route_output call _before_ ip6_route_get_saddr. * * In source specific routing (no src=any default route), * ip6_route_output will fail given src=any saddr, though, so * that's why we try it again later. */ if (ipv6_addr_any(&fl6->saddr)) { struct fib6_info *from; struct rt6_info *rt; *dst = ip6_route_output(net, sk, fl6); rt = (*dst)->error ? NULL : dst_rt6_info(*dst); rcu_read_lock(); from = rt ? rcu_dereference(rt->from) : NULL; err = ip6_route_get_saddr(net, from, &fl6->daddr, sk ? READ_ONCE(inet6_sk(sk)->srcprefs) : 0, fl6->flowi6_l3mdev, &fl6->saddr); rcu_read_unlock(); if (err) goto out_err_release; /* If we had an erroneous initial result, pretend it * never existed and let the SA-enabled version take * over. */ if ((*dst)->error) { dst_release(*dst); *dst = NULL; } if (fl6->flowi6_oif) flags |= RT6_LOOKUP_F_IFACE; } if (!*dst) *dst = ip6_route_output_flags(net, sk, fl6, flags); err = (*dst)->error; if (err) goto out_err_release; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD /* * Here if the dst entry we've looked up * has a neighbour entry that is in the INCOMPLETE * state and the src address from the flow is * marked as OPTIMISTIC, we release the found * dst entry and replace it instead with the * dst entry of the nexthop router */ rt = dst_rt6_info(*dst); rcu_read_lock(); n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); err = n && !(READ_ONCE(n->nud_state) & NUD_VALID) ? -EINVAL : 0; rcu_read_unlock(); if (err) { struct inet6_ifaddr *ifp; struct flowi6 fl_gw6; int redirect; ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); if (ifp) in6_ifa_put(ifp); if (redirect) { /* * We need to get the dst entry for the * default router instead */ dst_release(*dst); memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); *dst = ip6_route_output(net, sk, &fl_gw6); err = (*dst)->error; if (err) goto out_err_release; } } #endif if (ipv6_addr_v4mapped(&fl6->saddr) && !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) { err = -EAFNOSUPPORT; goto out_err_release; } return 0; out_err_release: dst_release(*dst); *dst = NULL; if (err == -ENETUNREACH) IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); return err; } /** * ip6_dst_lookup - perform route lookup on flow * @net: Network namespace to perform lookup in * @sk: socket which provides route info * @dst: pointer to dst_entry * for result * @fl6: flow to lookup * * This function performs a route lookup on the given flow. * * It returns zero on success, or a standard errno code on error. */ int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { *dst = NULL; return ip6_dst_lookup_tail(net, sk, dst, fl6); } EXPORT_SYMBOL_GPL(ip6_dst_lookup); /** * ip6_dst_lookup_flow - perform route lookup on flow with ipsec * @net: Network namespace to perform lookup in * @sk: socket which provides route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup * * This function performs a route lookup on the given flow. * * It returns a valid dst pointer on success, or a pointer encoded * error code. */ struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst) { struct dst_entry *dst = NULL; int err; err = ip6_dst_lookup_tail(net, sk, &dst, fl6); if (err) return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0); } EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); /** * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow * @sk: socket which provides the dst cache and route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup * @connected: whether @sk is connected or not * * This function performs a route lookup on the given flow with the * possibility of using the cached route in the socket if it is valid. * It will take the socket dst lock when operating on the dst cache. * As a result, this function can only be used in process context. * * In addition, for a connected socket, cache the dst in the socket * if the current cache is not valid. * * It returns a valid dst pointer on success, or a pointer encoded * error code. */ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool connected) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); dst = ip6_sk_dst_check(sk, dst, fl6); if (dst) return dst; dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst); if (connected && !IS_ERR(dst)) ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6); return dst; } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, gfp_t gfp) { return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, gfp_t gfp) { return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } static void ip6_append_data_mtu(unsigned int *mtu, int *maxfraglen, unsigned int fragheaderlen, struct sk_buff *skb, struct rt6_info *rt, unsigned int orig_mtu) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (!skb) { /* first fragment, reserve header_len */ *mtu = orig_mtu - rt->dst.header_len; } else { /* * this fragment is not first, the headers * space is regarded as data space. */ *mtu = orig_mtu; } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); } } static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6, struct rt6_info *rt) { struct ipv6_pinfo *np = inet6_sk(sk); unsigned int mtu, frag_size; struct ipv6_txoptions *nopt, *opt = ipc6->opt; /* callers pass dst together with a reference, set it first so * ip6_cork_release() can put it down even in case of an error. */ cork->base.dst = &rt->dst; /* * setup for corking */ if (opt) { if (WARN_ON(v6_cork->opt)) return -EINVAL; nopt = v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); if (unlikely(!nopt)) return -ENOBUFS; nopt->tot_len = sizeof(*opt); nopt->opt_flen = opt->opt_flen; nopt->opt_nflen = opt->opt_nflen; nopt->dst0opt = ip6_opt_dup(opt->dst0opt, sk->sk_allocation); if (opt->dst0opt && !nopt->dst0opt) return -ENOBUFS; nopt->dst1opt = ip6_opt_dup(opt->dst1opt, sk->sk_allocation); if (opt->dst1opt && !nopt->dst1opt) return -ENOBUFS; nopt->hopopt = ip6_opt_dup(opt->hopopt, sk->sk_allocation); if (opt->hopopt && !nopt->hopopt) return -ENOBUFS; nopt->srcrt = ip6_rthdr_dup(opt->srcrt, sk->sk_allocation); if (opt->srcrt && !nopt->srcrt) return -ENOBUFS; /* need source address above miyazawa*/ } v6_cork->hop_limit = ipc6->hlimit; v6_cork->tclass = ipc6->tclass; v6_cork->dontfrag = ipc6->dontfrag; if (rt->dst.flags & DST_XFRM_TUNNEL) mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst); else mtu = READ_ONCE(np->pmtudisc) >= IPV6_PMTUDISC_PROBE ? READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst)); frag_size = READ_ONCE(np->frag_size); if (frag_size && frag_size < mtu) mtu = frag_size; cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; cork->base.mark = ipc6->sockc.mark; cork->base.priority = ipc6->sockc.priority; sock_tx_timestamp(sk, &ipc6->sockc, &cork->base.tx_flags); if (ipc6->sockc.tsflags & SOCKCM_FLAG_TS_OPT_ID) { cork->base.flags |= IPCORK_TS_OPT_ID; cork->base.ts_opt_id = ipc6->sockc.ts_opt_id; } cork->base.length = 0; cork->base.transmit_time = ipc6->sockc.transmit_time; return 0; } static int __ip6_append_data(struct sock *sk, struct sk_buff_head *queue, struct inet_cork_full *cork_full, struct inet6_cork *v6_cork, struct page_frag *pfrag, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, unsigned int flags) { struct sk_buff *skb, *skb_prev = NULL; struct inet_cork *cork = &cork_full->base; struct flowi6 *fl6 = &cork_full->fl.u.ip6; unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu; struct ubuf_info *uarg = NULL; int exthdrlen = 0; int dst_exthdrlen = 0; int hh_len; int copy; int err; int offset = 0; bool zc = false; u32 tskey = 0; struct rt6_info *rt = dst_rt6_info(cork->dst); bool paged, hold_tskey = false, extra_uref = false; struct ipv6_txoptions *opt = v6_cork->opt; int csummode = CHECKSUM_NONE; unsigned int maxnonfragsize, headersize; unsigned int wmem_alloc_delta = 0; skb = skb_peek_tail(queue); if (!skb) { exthdrlen = opt ? opt->opt_flen : 0; dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; } paged = !!cork->gso_size; mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize; orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); headersize = sizeof(struct ipv6hdr) + (opt ? opt->opt_flen + opt->opt_nflen : 0) + rt->rt6i_nfheader_len; if (mtu <= fragheaderlen || ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr)) goto emsgsize; maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit * the first fragment */ if (headersize + transhdrlen > mtu) goto emsgsize; if (cork->length + length > mtu - headersize && v6_cork->dontfrag && (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_ICMPV6 || sk->sk_protocol == IPPROTO_RAW)) { ipv6_local_rxpmtu(sk, fl6, mtu - headersize + sizeof(struct ipv6hdr)); goto emsgsize; } if (ip6_sk_ignore_df(sk)) maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; else maxnonfragsize = mtu; if (cork->length + length > maxnonfragsize - headersize) { emsgsize: pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0); ipv6_local_error(sk, EMSGSIZE, fl6, pmtu); return -EMSGSIZE; } /* CHECKSUM_PARTIAL only with no extension headers and when * we are not going to fragment */ if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && headersize == sizeof(struct ipv6hdr) && length <= mtu - headersize && (!(flags & MSG_MORE) || cork->gso_size) && rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) csummode = CHECKSUM_PARTIAL; if ((flags & MSG_ZEROCOPY) && length) { struct msghdr *msg = from; if (getfrag == ip_generic_getfrag && msg->msg_ubuf) { if (skb_zcopy(skb) && msg->msg_ubuf != skb_zcopy(skb)) return -EINVAL; /* Leave uarg NULL if can't zerocopy, callers should * be able to handle it. */ if ((rt->dst.dev->features & NETIF_F_SG) && csummode == CHECKSUM_PARTIAL) { paged = true; zc = true; uarg = msg->msg_ubuf; } } else if (sock_flag(sk, SOCK_ZEROCOPY)) { uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb), false); if (!uarg) return -ENOBUFS; extra_uref = !skb_zcopy(skb); /* only ref on new uarg */ if (rt->dst.dev->features & NETIF_F_SG && csummode == CHECKSUM_PARTIAL) { paged = true; zc = true; } else { uarg_to_msgzc(uarg)->zerocopy = 0; skb_zcopy_set(skb, uarg, &extra_uref); } } } else if ((flags & MSG_SPLICE_PAGES) && length) { if (inet_test_bit(HDRINCL, sk)) return -EPERM; if (rt->dst.dev->features & NETIF_F_SG && getfrag == ip_generic_getfrag) /* We need an empty buffer to attach stuff to */ paged = true; else flags &= ~MSG_SPLICE_PAGES; } if (cork->tx_flags & SKBTX_ANY_TSTAMP && READ_ONCE(sk->sk_tsflags) & SOF_TIMESTAMPING_OPT_ID) { if (cork->flags & IPCORK_TS_OPT_ID) { tskey = cork->ts_opt_id; } else { tskey = atomic_inc_return(&sk->sk_tskey) - 1; hold_tskey = true; } } /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. * Otherwise, we need to reserve fragment header and * fragment alignment (= 8-15 octects, in total). * * Note that we may need to "move" the data from the tail * of the buffer to the new fragment when we split * the message. * * FIXME: It may be fragmented into multiple chunks * at once if non-fragmentable extension headers * are too large. * --yoshfuji */ cork->length += length; if (!skb) goto alloc_new_skb; while (length > 0) { /* Check if the remaining data fits into current packet. */ copy = (cork->length <= mtu ? mtu : maxfraglen) - skb->len; if (copy < length) copy = maxfraglen - skb->len; if (copy <= 0) { char *data; unsigned int datalen; unsigned int fraglen; unsigned int fraggap; unsigned int alloclen, alloc_extra; unsigned int pagedlen; alloc_new_skb: /* There's no room in the current skb */ if (skb) fraggap = skb->len - maxfraglen; else fraggap = 0; /* update mtu and maxfraglen if necessary */ if (!skb || !skb_prev) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, orig_mtu); skb_prev = skb; /* * If remaining data exceeds the mtu, * we know we need more fragment(s). */ datalen = length + fraggap; if (datalen > (cork->length <= mtu ? mtu : maxfraglen) - fragheaderlen) datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; fraglen = datalen + fragheaderlen; pagedlen = 0; alloc_extra = hh_len; alloc_extra += dst_exthdrlen; alloc_extra += rt->dst.trailer_len; /* We just reserve space for fragment header. * Note: this may be overallocation if the message * (without MSG_MORE) fits into the MTU. */ alloc_extra += sizeof(struct frag_hdr); if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else if (!paged && (fraglen + alloc_extra < SKB_MAX_ALLOC || !(rt->dst.dev->features & NETIF_F_SG))) alloclen = fraglen; else { alloclen = fragheaderlen + transhdrlen; pagedlen = datalen - transhdrlen; } alloclen += alloc_extra; if (datalen != length + fraggap) { /* * this is not the last fragment, the trailer * space is regarded as data space. */ datalen += rt->dst.trailer_len; } fraglen = datalen + fragheaderlen; copy = datalen - transhdrlen - fraggap - pagedlen; /* [!] NOTE: copy may be negative if pagedlen>0 * because then the equation may reduces to -fraggap. */ if (copy < 0 && !(flags & MSG_SPLICE_PAGES)) { err = -EINVAL; goto error; } if (transhdrlen) { skb = sock_alloc_send_skb(sk, alloclen, (flags & MSG_DONTWAIT), &err); } else { skb = NULL; if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <= 2 * sk->sk_sndbuf) skb = alloc_skb(alloclen, sk->sk_allocation); if (unlikely(!skb)) err = -ENOBUFS; } if (!skb) goto error; /* * Fill in the control structures */ skb->protocol = htons(ETH_P_IPV6); skb->ip_summed = csummode; skb->csum = 0; /* reserve for fragmentation and ipsec header */ skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + dst_exthdrlen); /* * Find where to start putting bytes */ data = skb_put(skb, fraglen - pagedlen); skb_set_network_header(skb, exthdrlen); data += fragheaderlen; skb->transport_header = (skb->network_header + fragheaderlen); if (fraggap) { skb->csum = skb_copy_and_csum_bits( skb_prev, maxfraglen, data + transhdrlen, fraggap); skb_prev->csum = csum_sub(skb_prev->csum, skb->csum); data += fraggap; pskb_trim_unique(skb_prev, maxfraglen); } if (copy > 0 && INDIRECT_CALL_1(getfrag, ip_generic_getfrag, from, data + transhdrlen, offset, copy, fraggap, skb) < 0) { err = -EFAULT; kfree_skb(skb); goto error; } else if (flags & MSG_SPLICE_PAGES) { copy = 0; } offset += copy; length -= copy + transhdrlen; transhdrlen = 0; exthdrlen = 0; dst_exthdrlen = 0; /* Only the initial fragment is time stamped */ skb_shinfo(skb)->tx_flags = cork->tx_flags; cork->tx_flags = 0; skb_shinfo(skb)->tskey = tskey; tskey = 0; skb_zcopy_set(skb, uarg, &extra_uref); if ((flags & MSG_CONFIRM) && !skb_prev) skb_set_dst_pending_confirm(skb, 1); /* * Put the packet on the pending queue */ if (!skb->destructor) { skb->destructor = sock_wfree; skb->sk = sk; wmem_alloc_delta += skb->truesize; } __skb_queue_tail(queue, skb); continue; } if (copy > length) copy = length; if (!(rt->dst.dev->features&NETIF_F_SG) && skb_tailroom(skb) >= copy) { unsigned int off; off = skb->len; if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag, from, skb_put(skb, copy), offset, copy, off, skb) < 0) { __skb_trim(skb, off); err = -EFAULT; goto error; } } else if (flags & MSG_SPLICE_PAGES) { struct msghdr *msg = from; err = -EIO; if (WARN_ON_ONCE(copy > msg->msg_iter.count)) goto error; err = skb_splice_from_iter(skb, &msg->msg_iter, copy); if (err < 0) goto error; copy = err; wmem_alloc_delta += copy; } else if (!zc) { int i = skb_shinfo(skb)->nr_frags; err = -ENOMEM; if (!sk_page_frag_refill(sk, pfrag)) goto error; skb_zcopy_downgrade_managed(skb); if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { err = -EMSGSIZE; if (i == MAX_SKB_FRAGS) goto error; __skb_fill_page_desc(skb, i, pfrag->page, pfrag->offset, 0); skb_shinfo(skb)->nr_frags = ++i; get_page(pfrag->page); } copy = min_t(int, copy, pfrag->size - pfrag->offset); if (INDIRECT_CALL_1(getfrag, ip_generic_getfrag, from, page_address(pfrag->page) + pfrag->offset, offset, copy, skb->len, skb) < 0) goto error_efault; pfrag->offset += copy; skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; wmem_alloc_delta += copy; } else { err = skb_zerocopy_iter_dgram(skb, from, copy); if (err < 0) goto error; } offset += copy; length -= copy; } if (wmem_alloc_delta) refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); return 0; error_efault: err = -EFAULT; error: net_zcopy_put_abort(uarg, extra_uref); cork->length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc); if (hold_tskey) atomic_dec(&sk->sk_tskey); return err; } int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, struct ipcm6_cookie *ipc6, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); int exthdrlen; int err; if (flags&MSG_PROBE) return 0; if (skb_queue_empty(&sk->sk_write_queue)) { /* * setup for corking */ dst_hold(&rt->dst); err = ip6_setup_cork(sk, &inet->cork, &np->cork, ipc6, rt); if (err) return err; inet->cork.fl.u.ip6 = *fl6; exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); length += exthdrlen; transhdrlen += exthdrlen; } else { transhdrlen = 0; } return __ip6_append_data(sk, &sk->sk_write_queue, &inet->cork, &np->cork, sk_page_frag(sk), getfrag, from, length, transhdrlen, flags); } EXPORT_SYMBOL_GPL(ip6_append_data); static void ip6_cork_steal_dst(struct sk_buff *skb, struct inet_cork_full *cork) { struct dst_entry *dst = cork->base.dst; cork->base.dst = NULL; skb_dst_set(skb, dst); } static void ip6_cork_release(struct inet_cork_full *cork, struct inet6_cork *v6_cork) { if (v6_cork->opt) { struct ipv6_txoptions *opt = v6_cork->opt; kfree(opt->dst0opt); kfree(opt->dst1opt); kfree(opt->hopopt); kfree(opt->srcrt); kfree(opt); v6_cork->opt = NULL; } if (cork->base.dst) { dst_release(cork->base.dst); cork->base.dst = NULL; } } struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue, struct inet_cork_full *cork, struct inet6_cork *v6_cork) { struct sk_buff *skb, *tmp_skb; struct sk_buff **tail_skb; struct in6_addr *final_dst; struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = v6_cork->opt; struct rt6_info *rt = dst_rt6_info(cork->base.dst); struct flowi6 *fl6 = &cork->fl.u.ip6; unsigned char proto = fl6->flowi6_proto; skb = __skb_dequeue(queue); if (!skb) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); /* move skb->data to ip header from ext header */ if (skb->data < skb_network_header(skb)) __skb_pull(skb, skb_network_offset(skb)); while ((tmp_skb = __skb_dequeue(queue)) != NULL) { __skb_pull(tmp_skb, skb_network_header_len(skb)); *tail_skb = tmp_skb; tail_skb = &(tmp_skb->next); skb->len += tmp_skb->len; skb->data_len += tmp_skb->len; skb->truesize += tmp_skb->truesize; tmp_skb->destructor = NULL; tmp_skb->sk = NULL; } /* Allow local fragmentation. */ skb->ignore_df = ip6_sk_ignore_df(sk); __skb_pull(skb, skb_network_header_len(skb)); final_dst = &fl6->daddr; if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); if (opt && opt->opt_nflen) ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr); skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); ip6_flow_hdr(hdr, v6_cork->tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel, ip6_autoflowlabel(net, sk), fl6)); hdr->hop_limit = v6_cork->hop_limit; hdr->nexthdr = proto; hdr->saddr = fl6->saddr; hdr->daddr = *final_dst; skb->priority = cork->base.priority; skb->mark = cork->base.mark; if (sk_is_tcp(sk)) skb_set_delivery_time(skb, cork->base.transmit_time, SKB_CLOCK_MONOTONIC); else skb_set_delivery_type_by_clockid(skb, cork->base.transmit_time, sk->sk_clockid); ip6_cork_steal_dst(skb, cork); IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); u8 icmp6_type; if (sk->sk_socket->type == SOCK_RAW && !(fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH)) icmp6_type = fl6->fl6_icmp_type; else icmp6_type = icmp6_hdr(skb)->icmp6_type; ICMP6MSGOUT_INC_STATS(net, idev, icmp6_type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } ip6_cork_release(cork, v6_cork); out: return skb; } int ip6_send_skb(struct sk_buff *skb) { struct net *net = sock_net(skb->sk); struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); int err; rcu_read_lock(); err = ip6_local_out(net, skb->sk, skb); if (err) { if (err > 0) err = net_xmit_errno(err); if (err) IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); } rcu_read_unlock(); return err; } int ip6_push_pending_frames(struct sock *sk) { struct sk_buff *skb; skb = ip6_finish_skb(sk); if (!skb) return 0; return ip6_send_skb(skb); } EXPORT_SYMBOL_GPL(ip6_push_pending_frames); static void __ip6_flush_pending_frames(struct sock *sk, struct sk_buff_head *queue, struct inet_cork_full *cork, struct inet6_cork *v6_cork) { struct sk_buff *skb; while ((skb = __skb_dequeue_tail(queue)) != NULL) { if (skb_dst(skb)) IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } ip6_cork_release(cork, v6_cork); } void ip6_flush_pending_frames(struct sock *sk) { __ip6_flush_pending_frames(sk, &sk->sk_write_queue, &inet_sk(sk)->cork, &inet6_sk(sk)->cork); } EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); struct sk_buff *ip6_make_skb(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, size_t length, int transhdrlen, struct ipcm6_cookie *ipc6, struct rt6_info *rt, unsigned int flags, struct inet_cork_full *cork) { struct inet6_cork v6_cork; struct sk_buff_head queue; int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0); int err; if (flags & MSG_PROBE) { dst_release(&rt->dst); return NULL; } __skb_queue_head_init(&queue); cork->base.flags = 0; cork->base.addr = 0; cork->base.opt = NULL; v6_cork.opt = NULL; err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt); if (err) { ip6_cork_release(cork, &v6_cork); return ERR_PTR(err); } err = __ip6_append_data(sk, &queue, cork, &v6_cork, &current->task_frag, getfrag, from, length + exthdrlen, transhdrlen + exthdrlen, flags); if (err) { __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork); return ERR_PTR(err); } return __ip6_make_skb(sk, &queue, cork, &v6_cork); }
9 10 2 13 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (C) B.A.T.M.A.N. contributors: * * Simon Wunderlich, Marek Lindner */ #ifndef _NET_BATMAN_ADV_HASH_H_ #define _NET_BATMAN_ADV_HASH_H_ #include "main.h" #include <linux/atomic.h> #include <linux/compiler.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/rculist.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/types.h> /* callback to a compare function. should compare 2 element data for their * keys * * Return: true if same and false if not same */ typedef bool (*batadv_hashdata_compare_cb)(const struct hlist_node *, const void *); /* the hashfunction * * Return: an index based on the key in the data of the first argument and the * size the second */ typedef u32 (*batadv_hashdata_choose_cb)(const void *, u32); typedef void (*batadv_hashdata_free_cb)(struct hlist_node *, void *); /** * struct batadv_hashtable - Wrapper of simple hlist based hashtable */ struct batadv_hashtable { /** @table: the hashtable itself with the buckets */ struct hlist_head *table; /** @list_locks: spinlock for each hash list entry */ spinlock_t *list_locks; /** @size: size of hashtable */ u32 size; /** @generation: current (generation) sequence number */ atomic_t generation; }; /* allocates and clears the hash */ struct batadv_hashtable *batadv_hash_new(u32 size); /* set class key for all locks */ void batadv_hash_set_lock_class(struct batadv_hashtable *hash, struct lock_class_key *key); /* free only the hashtable and the hash itself. */ void batadv_hash_destroy(struct batadv_hashtable *hash); /** * batadv_hash_add() - adds data to the hashtable * @hash: storage hash table * @compare: callback to determine if 2 hash elements are identical * @choose: callback calculating the hash index * @data: data passed to the aforementioned callbacks as argument * @data_node: to be added element * * Return: 0 on success, 1 if the element already is in the hash * and -1 on error. */ static inline int batadv_hash_add(struct batadv_hashtable *hash, batadv_hashdata_compare_cb compare, batadv_hashdata_choose_cb choose, const void *data, struct hlist_node *data_node) { u32 index; int ret = -1; struct hlist_head *head; struct hlist_node *node; spinlock_t *list_lock; /* spinlock to protect write access */ if (!hash) goto out; index = choose(data, hash->size); head = &hash->table[index]; list_lock = &hash->list_locks[index]; spin_lock_bh(list_lock); hlist_for_each(node, head) { if (!compare(node, data)) continue; ret = 1; goto unlock; } /* no duplicate found in list, add new element */ hlist_add_head_rcu(data_node, head); atomic_inc(&hash->generation); ret = 0; unlock: spin_unlock_bh(list_lock); out: return ret; } /** * batadv_hash_remove() - Removes data from hash, if found * @hash: hash table * @compare: callback to determine if 2 hash elements are identical * @choose: callback calculating the hash index * @data: data passed to the aforementioned callbacks as argument * * ata could be the structure you use with just the key filled, we just need * the key for comparing. * * Return: returns pointer do data on success, so you can remove the used * structure yourself, or NULL on error */ static inline void *batadv_hash_remove(struct batadv_hashtable *hash, batadv_hashdata_compare_cb compare, batadv_hashdata_choose_cb choose, void *data) { u32 index; struct hlist_node *node; struct hlist_head *head; void *data_save = NULL; index = choose(data, hash->size); head = &hash->table[index]; spin_lock_bh(&hash->list_locks[index]); hlist_for_each(node, head) { if (!compare(node, data)) continue; data_save = node; hlist_del_rcu(node); atomic_inc(&hash->generation); break; } spin_unlock_bh(&hash->list_locks[index]); return data_save; } #endif /* _NET_BATMAN_ADV_HASH_H_ */
26 26 26 8 31 11 21 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1992, 1998-2006 Linus Torvalds, Ingo Molnar * Copyright (C) 2005-2006, Thomas Gleixner * * This file contains the IRQ-resend code * * If the interrupt is waiting to be processed, we try to re-run it. * We can't directly run it from here since the caller might be in an * interrupt-protected region. Not all irq controller chips can * retrigger interrupts at the hardware level, so in those cases * we allow the resending of IRQs via a tasklet. */ #include <linux/irq.h> #include <linux/module.h> #include <linux/random.h> #include <linux/interrupt.h> #include "internals.h" #ifdef CONFIG_HARDIRQS_SW_RESEND /* hlist_head to handle software resend of interrupts: */ static HLIST_HEAD(irq_resend_list); static DEFINE_RAW_SPINLOCK(irq_resend_lock); /* * Run software resends of IRQ's */ static void resend_irqs(struct tasklet_struct *unused) { guard(raw_spinlock_irq)(&irq_resend_lock); while (!hlist_empty(&irq_resend_list)) { struct irq_desc *desc; desc = hlist_entry(irq_resend_list.first, struct irq_desc, resend_node); hlist_del_init(&desc->resend_node); raw_spin_unlock(&irq_resend_lock); desc->handle_irq(desc); raw_spin_lock(&irq_resend_lock); } } /* Tasklet to handle resend: */ static DECLARE_TASKLET(resend_tasklet, resend_irqs); static int irq_sw_resend(struct irq_desc *desc) { /* * Validate whether this interrupt can be safely injected from * non interrupt context */ if (irqd_is_handle_enforce_irqctx(&desc->irq_data)) return -EINVAL; /* * If the interrupt is running in the thread context of the parent * irq we need to be careful, because we cannot trigger it * directly. */ if (irq_settings_is_nested_thread(desc)) { /* * If the parent_irq is valid, we retrigger the parent, * otherwise we do nothing. */ if (!desc->parent_irq) return -EINVAL; desc = irq_to_desc(desc->parent_irq); if (!desc) return -EINVAL; } /* Add to resend_list and activate the softirq: */ scoped_guard(raw_spinlock, &irq_resend_lock) { if (hlist_unhashed(&desc->resend_node)) hlist_add_head(&desc->resend_node, &irq_resend_list); } tasklet_schedule(&resend_tasklet); return 0; } void clear_irq_resend(struct irq_desc *desc) { guard(raw_spinlock)(&irq_resend_lock); hlist_del_init(&desc->resend_node); } void irq_resend_init(struct irq_desc *desc) { INIT_HLIST_NODE(&desc->resend_node); } #else void clear_irq_resend(struct irq_desc *desc) {} void irq_resend_init(struct irq_desc *desc) {} static int irq_sw_resend(struct irq_desc *desc) { return -EINVAL; } #endif static int try_retrigger(struct irq_desc *desc) { if (desc->irq_data.chip->irq_retrigger) return desc->irq_data.chip->irq_retrigger(&desc->irq_data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY return irq_chip_retrigger_hierarchy(&desc->irq_data); #else return 0; #endif } /* * IRQ resend * * Is called with interrupts disabled and desc->lock held. */ int check_irq_resend(struct irq_desc *desc, bool inject) { int err = 0; /* * We do not resend level type interrupts. Level type interrupts * are resent by hardware when they are still active. Clear the * pending bit so suspend/resume does not get confused. */ if (irq_settings_is_level(desc)) { desc->istate &= ~IRQS_PENDING; return -EINVAL; } if (desc->istate & IRQS_REPLAY) return -EBUSY; if (!(desc->istate & IRQS_PENDING) && !inject) return 0; desc->istate &= ~IRQS_PENDING; if (!try_retrigger(desc)) err = irq_sw_resend(desc); /* If the retrigger was successful, mark it with the REPLAY bit */ if (!err) desc->istate |= IRQS_REPLAY; return err; } #ifdef CONFIG_GENERIC_IRQ_INJECTION /** * irq_inject_interrupt - Inject an interrupt for testing/error injection * @irq: The interrupt number * * This function must only be used for debug and testing purposes! * * Especially on x86 this can cause a premature completion of an interrupt * affinity change causing the interrupt line to become stale. Very * unlikely, but possible. * * The injection can fail for various reasons: * - Interrupt is not activated * - Interrupt is NMI type or currently replaying * - Interrupt is level type * - Interrupt does not support hardware retrigger and software resend is * either not enabled or not possible for the interrupt. */ int irq_inject_interrupt(unsigned int irq) { int err = -EINVAL; /* Try the state injection hardware interface first */ if (!irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, true)) return 0; /* That failed, try via the resend mechanism */ scoped_irqdesc_get_and_buslock(irq, 0) { struct irq_desc *desc = scoped_irqdesc; /* * Only try to inject when the interrupt is: * - not NMI type * - activated */ if (!irq_is_nmi(desc) && irqd_is_activated(&desc->irq_data)) err = check_irq_resend(desc, true); } return err; } EXPORT_SYMBOL_GPL(irq_inject_interrupt); #endif
3 3 3 3 3 3 3 3 1 3 1 3 3 1 3 3 2 3 3 3 3 4 2 1 1 2 1 1 1 4 4 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 // SPDX-License-Identifier: GPL-2.0-or-later /* * PRNG: Pseudo Random Number Generator * Based on NIST Recommended PRNG From ANSI X9.31 Appendix A.2.4 using * AES 128 cipher * * (C) Neil Horman <nhorman@tuxdriver.com> */ #include <crypto/internal/cipher.h> #include <crypto/internal/rng.h> #include <linux/err.h> #include <linux/init.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/string.h> #define DEFAULT_PRNG_KEY "0123456789abcdef" #define DEFAULT_PRNG_KSZ 16 #define DEFAULT_BLK_SZ 16 #define DEFAULT_V_SEED "zaybxcwdveuftgsh" /* * Flags for the prng_context flags field */ #define PRNG_FIXED_SIZE 0x1 #define PRNG_NEED_RESET 0x2 /* * Note: DT is our counter value * I is our intermediate value * V is our seed vector * See http://csrc.nist.gov/groups/STM/cavp/documents/rng/931rngext.pdf * for implementation details */ struct prng_context { spinlock_t prng_lock; unsigned char rand_data[DEFAULT_BLK_SZ]; unsigned char last_rand_data[DEFAULT_BLK_SZ]; unsigned char DT[DEFAULT_BLK_SZ]; unsigned char I[DEFAULT_BLK_SZ]; unsigned char V[DEFAULT_BLK_SZ]; u32 rand_data_valid; struct crypto_cipher *tfm; u32 flags; }; static int dbg; static void hexdump(char *note, unsigned char *buf, unsigned int len) { if (dbg) { printk(KERN_CRIT "%s", note); print_hex_dump(KERN_CONT, "", DUMP_PREFIX_OFFSET, 16, 1, buf, len, false); } } #define dbgprint(format, args...) do {\ if (dbg)\ printk(format, ##args);\ } while (0) static void xor_vectors(unsigned char *in1, unsigned char *in2, unsigned char *out, unsigned int size) { int i; for (i = 0; i < size; i++) out[i] = in1[i] ^ in2[i]; } /* * Returns DEFAULT_BLK_SZ bytes of random data per call * returns 0 if generation succeeded, <0 if something went wrong */ static int _get_more_prng_bytes(struct prng_context *ctx, int cont_test) { int i; unsigned char tmp[DEFAULT_BLK_SZ]; unsigned char *output = NULL; dbgprint(KERN_CRIT "Calling _get_more_prng_bytes for context %p\n", ctx); hexdump("Input DT: ", ctx->DT, DEFAULT_BLK_SZ); hexdump("Input I: ", ctx->I, DEFAULT_BLK_SZ); hexdump("Input V: ", ctx->V, DEFAULT_BLK_SZ); /* * This algorithm is a 3 stage state machine */ for (i = 0; i < 3; i++) { switch (i) { case 0: /* * Start by encrypting the counter value * This gives us an intermediate value I */ memcpy(tmp, ctx->DT, DEFAULT_BLK_SZ); output = ctx->I; hexdump("tmp stage 0: ", tmp, DEFAULT_BLK_SZ); break; case 1: /* * Next xor I with our secret vector V * encrypt that result to obtain our * pseudo random data which we output */ xor_vectors(ctx->I, ctx->V, tmp, DEFAULT_BLK_SZ); hexdump("tmp stage 1: ", tmp, DEFAULT_BLK_SZ); output = ctx->rand_data; break; case 2: /* * First check that we didn't produce the same * random data that we did last time around through this */ if (!memcmp(ctx->rand_data, ctx->last_rand_data, DEFAULT_BLK_SZ)) { if (cont_test) { panic("cprng %p Failed repetition check!\n", ctx); } printk(KERN_ERR "ctx %p Failed repetition check!\n", ctx); ctx->flags |= PRNG_NEED_RESET; return -EINVAL; } memcpy(ctx->last_rand_data, ctx->rand_data, DEFAULT_BLK_SZ); /* * Lastly xor the random data with I * and encrypt that to obtain a new secret vector V */ xor_vectors(ctx->rand_data, ctx->I, tmp, DEFAULT_BLK_SZ); output = ctx->V; hexdump("tmp stage 2: ", tmp, DEFAULT_BLK_SZ); break; } /* do the encryption */ crypto_cipher_encrypt_one(ctx->tfm, output, tmp); } /* * Now update our DT value */ for (i = DEFAULT_BLK_SZ - 1; i >= 0; i--) { ctx->DT[i] += 1; if (ctx->DT[i] != 0) break; } dbgprint("Returning new block for context %p\n", ctx); ctx->rand_data_valid = 0; hexdump("Output DT: ", ctx->DT, DEFAULT_BLK_SZ); hexdump("Output I: ", ctx->I, DEFAULT_BLK_SZ); hexdump("Output V: ", ctx->V, DEFAULT_BLK_SZ); hexdump("New Random Data: ", ctx->rand_data, DEFAULT_BLK_SZ); return 0; } /* Our exported functions */ static int get_prng_bytes(char *buf, size_t nbytes, struct prng_context *ctx, int do_cont_test) { unsigned char *ptr = buf; unsigned int byte_count = (unsigned int)nbytes; int err; spin_lock_bh(&ctx->prng_lock); err = -EINVAL; if (ctx->flags & PRNG_NEED_RESET) goto done; /* * If the FIXED_SIZE flag is on, only return whole blocks of * pseudo random data */ err = -EINVAL; if (ctx->flags & PRNG_FIXED_SIZE) { if (nbytes < DEFAULT_BLK_SZ) goto done; byte_count = DEFAULT_BLK_SZ; } /* * Return 0 in case of success as mandated by the kernel * crypto API interface definition. */ err = 0; dbgprint(KERN_CRIT "getting %d random bytes for context %p\n", byte_count, ctx); remainder: if (ctx->rand_data_valid == DEFAULT_BLK_SZ) { if (_get_more_prng_bytes(ctx, do_cont_test) < 0) { memset(buf, 0, nbytes); err = -EINVAL; goto done; } } /* * Copy any data less than an entire block */ if (byte_count < DEFAULT_BLK_SZ) { empty_rbuf: while (ctx->rand_data_valid < DEFAULT_BLK_SZ) { *ptr = ctx->rand_data[ctx->rand_data_valid]; ptr++; byte_count--; ctx->rand_data_valid++; if (byte_count == 0) goto done; } } /* * Now copy whole blocks */ for (; byte_count >= DEFAULT_BLK_SZ; byte_count -= DEFAULT_BLK_SZ) { if (ctx->rand_data_valid == DEFAULT_BLK_SZ) { if (_get_more_prng_bytes(ctx, do_cont_test) < 0) { memset(buf, 0, nbytes); err = -EINVAL; goto done; } } if (ctx->rand_data_valid > 0) goto empty_rbuf; memcpy(ptr, ctx->rand_data, DEFAULT_BLK_SZ); ctx->rand_data_valid += DEFAULT_BLK_SZ; ptr += DEFAULT_BLK_SZ; } /* * Now go back and get any remaining partial block */ if (byte_count) goto remainder; done: spin_unlock_bh(&ctx->prng_lock); dbgprint(KERN_CRIT "returning %d from get_prng_bytes in context %p\n", err, ctx); return err; } static void free_prng_context(struct prng_context *ctx) { crypto_free_cipher(ctx->tfm); } static int reset_prng_context(struct prng_context *ctx, const unsigned char *key, size_t klen, const unsigned char *V, const unsigned char *DT) { int ret; const unsigned char *prng_key; spin_lock_bh(&ctx->prng_lock); ctx->flags |= PRNG_NEED_RESET; prng_key = (key != NULL) ? key : (unsigned char *)DEFAULT_PRNG_KEY; if (!key) klen = DEFAULT_PRNG_KSZ; if (V) memcpy(ctx->V, V, DEFAULT_BLK_SZ); else memcpy(ctx->V, DEFAULT_V_SEED, DEFAULT_BLK_SZ); if (DT) memcpy(ctx->DT, DT, DEFAULT_BLK_SZ); else memset(ctx->DT, 0, DEFAULT_BLK_SZ); memset(ctx->rand_data, 0, DEFAULT_BLK_SZ); memset(ctx->last_rand_data, 0, DEFAULT_BLK_SZ); ctx->rand_data_valid = DEFAULT_BLK_SZ; ret = crypto_cipher_setkey(ctx->tfm, prng_key, klen); if (ret) { dbgprint(KERN_CRIT "PRNG: setkey() failed flags=%x\n", crypto_cipher_get_flags(ctx->tfm)); goto out; } ret = 0; ctx->flags &= ~PRNG_NEED_RESET; out: spin_unlock_bh(&ctx->prng_lock); return ret; } static int cprng_init(struct crypto_tfm *tfm) { struct prng_context *ctx = crypto_tfm_ctx(tfm); spin_lock_init(&ctx->prng_lock); ctx->tfm = crypto_alloc_cipher("aes", 0, 0); if (IS_ERR(ctx->tfm)) { dbgprint(KERN_CRIT "Failed to alloc tfm for context %p\n", ctx); return PTR_ERR(ctx->tfm); } if (reset_prng_context(ctx, NULL, DEFAULT_PRNG_KSZ, NULL, NULL) < 0) return -EINVAL; /* * after allocation, we should always force the user to reset * so they don't inadvertently use the insecure default values * without specifying them intentially */ ctx->flags |= PRNG_NEED_RESET; return 0; } static void cprng_exit(struct crypto_tfm *tfm) { free_prng_context(crypto_tfm_ctx(tfm)); } static int cprng_get_random(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *rdata, unsigned int dlen) { struct prng_context *prng = crypto_rng_ctx(tfm); return get_prng_bytes(rdata, dlen, prng, 0); } /* * This is the cprng_registered reset method the seed value is * interpreted as the tuple { V KEY DT} * V and KEY are required during reset, and DT is optional, detected * as being present by testing the length of the seed */ static int cprng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) { struct prng_context *prng = crypto_rng_ctx(tfm); const u8 *key = seed + DEFAULT_BLK_SZ; const u8 *dt = NULL; if (slen < DEFAULT_PRNG_KSZ + DEFAULT_BLK_SZ) return -EINVAL; if (slen >= (2 * DEFAULT_BLK_SZ + DEFAULT_PRNG_KSZ)) dt = key + DEFAULT_PRNG_KSZ; reset_prng_context(prng, key, DEFAULT_PRNG_KSZ, seed, dt); if (prng->flags & PRNG_NEED_RESET) return -EINVAL; return 0; } #ifdef CONFIG_CRYPTO_FIPS static int fips_cprng_get_random(struct crypto_rng *tfm, const u8 *src, unsigned int slen, u8 *rdata, unsigned int dlen) { struct prng_context *prng = crypto_rng_ctx(tfm); return get_prng_bytes(rdata, dlen, prng, 1); } static int fips_cprng_reset(struct crypto_rng *tfm, const u8 *seed, unsigned int slen) { u8 rdata[DEFAULT_BLK_SZ]; const u8 *key = seed + DEFAULT_BLK_SZ; int rc; struct prng_context *prng = crypto_rng_ctx(tfm); if (slen < DEFAULT_PRNG_KSZ + DEFAULT_BLK_SZ) return -EINVAL; /* fips strictly requires seed != key */ if (!memcmp(seed, key, DEFAULT_PRNG_KSZ)) return -EINVAL; rc = cprng_reset(tfm, seed, slen); if (!rc) goto out; /* this primes our continuity test */ rc = get_prng_bytes(rdata, DEFAULT_BLK_SZ, prng, 0); prng->rand_data_valid = DEFAULT_BLK_SZ; out: return rc; } #endif static struct rng_alg rng_algs[] = { { .generate = cprng_get_random, .seed = cprng_reset, .seedsize = DEFAULT_PRNG_KSZ + 2 * DEFAULT_BLK_SZ, .base = { .cra_name = "stdrng", .cra_driver_name = "ansi_cprng", .cra_priority = 100, .cra_ctxsize = sizeof(struct prng_context), .cra_module = THIS_MODULE, .cra_init = cprng_init, .cra_exit = cprng_exit, } #ifdef CONFIG_CRYPTO_FIPS }, { .generate = fips_cprng_get_random, .seed = fips_cprng_reset, .seedsize = DEFAULT_PRNG_KSZ + 2 * DEFAULT_BLK_SZ, .base = { .cra_name = "fips(ansi_cprng)", .cra_driver_name = "fips_ansi_cprng", .cra_priority = 300, .cra_ctxsize = sizeof(struct prng_context), .cra_module = THIS_MODULE, .cra_init = cprng_init, .cra_exit = cprng_exit, } #endif } }; /* Module initalization */ static int __init prng_mod_init(void) { return crypto_register_rngs(rng_algs, ARRAY_SIZE(rng_algs)); } static void __exit prng_mod_fini(void) { crypto_unregister_rngs(rng_algs, ARRAY_SIZE(rng_algs)); } MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Software Pseudo Random Number Generator"); MODULE_AUTHOR("Neil Horman <nhorman@tuxdriver.com>"); module_param(dbg, int, 0); MODULE_PARM_DESC(dbg, "Boolean to enable debugging (0/1 == off/on)"); module_init(prng_mod_init); module_exit(prng_mod_fini); MODULE_ALIAS_CRYPTO("stdrng"); MODULE_ALIAS_CRYPTO("ansi_cprng"); MODULE_IMPORT_NS("CRYPTO_INTERNAL");
150 150 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _ASM_PGTABLE_INVERT_H #define _ASM_PGTABLE_INVERT_H 1 #ifndef __ASSEMBLER__ /* * A clear pte value is special, and doesn't get inverted. * * Note that even users that only pass a pgprot_t (rather * than a full pte) won't trigger the special zero case, * because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED * set. So the all zero case really is limited to just the * cleared page table entry case. */ static inline bool __pte_needs_invert(u64 val) { return val && !(val & _PAGE_PRESENT); } /* Get a mask to xor with the page table entry to get the correct pfn. */ static inline u64 protnone_mask(u64 val) { return __pte_needs_invert(val) ? ~0ull : 0; } static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask) { /* * When a PTE transitions from NONE to !NONE or vice-versa * invert the PFN part to stop speculation. * pte_pfn undoes this when needed. */ if (__pte_needs_invert(oldval) != __pte_needs_invert(val)) val = (val & ~mask) | (~val & mask); return val; } #endif /* __ASSEMBLER__ */ #endif
523 522 523 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 /* SPDX-License-Identifier: GPL-2.0 */ /* * linux/cgroup-defs.h - basic definitions for cgroup * * This file provides basic type and interface. Include this file directly * only if necessary to avoid cyclic dependencies. */ #ifndef _LINUX_CGROUP_DEFS_H #define _LINUX_CGROUP_DEFS_H #include <linux/limits.h> #include <linux/list.h> #include <linux/idr.h> #include <linux/wait.h> #include <linux/mutex.h> #include <linux/rcupdate.h> #include <linux/refcount.h> #include <linux/percpu-refcount.h> #include <linux/percpu-rwsem.h> #include <linux/u64_stats_sync.h> #include <linux/workqueue.h> #include <linux/bpf-cgroup-defs.h> #include <linux/psi_types.h> #ifdef CONFIG_CGROUPS struct cgroup; struct cgroup_root; struct cgroup_subsys; struct cgroup_taskset; struct kernfs_node; struct kernfs_ops; struct kernfs_open_file; struct seq_file; struct poll_table_struct; #define MAX_CGROUP_TYPE_NAMELEN 32 #define MAX_CGROUP_ROOT_NAMELEN 64 #define MAX_CFTYPE_NAME 64 /* define the enumeration of all cgroup subsystems */ #define SUBSYS(_x) _x ## _cgrp_id, enum cgroup_subsys_id { #include <linux/cgroup_subsys.h> CGROUP_SUBSYS_COUNT, }; #undef SUBSYS /* bits in struct cgroup_subsys_state flags field */ enum { CSS_NO_REF = (1 << 0), /* no reference counting for this css */ CSS_ONLINE = (1 << 1), /* between ->css_online() and ->css_offline() */ CSS_RELEASED = (1 << 2), /* refcnt reached zero, released */ CSS_VISIBLE = (1 << 3), /* css is visible to userland */ CSS_DYING = (1 << 4), /* css is dying */ }; /* bits in struct cgroup flags field */ enum { /* Control Group requires release notifications to userspace */ CGRP_NOTIFY_ON_RELEASE, /* * Clone the parent's configuration when creating a new child * cpuset cgroup. For historical reasons, this option can be * specified at mount time and thus is implemented here. */ CGRP_CPUSET_CLONE_CHILDREN, /* Control group has to be frozen. */ CGRP_FREEZE, /* Cgroup is frozen. */ CGRP_FROZEN, }; /* cgroup_root->flags */ enum { CGRP_ROOT_NOPREFIX = (1 << 1), /* mounted subsystems have no named prefix */ CGRP_ROOT_XATTR = (1 << 2), /* supports extended attributes */ /* * Consider namespaces as delegation boundaries. If this flag is * set, controller specific interface files in a namespace root * aren't writeable from inside the namespace. */ CGRP_ROOT_NS_DELEGATE = (1 << 3), /* * Reduce latencies on dynamic cgroup modifications such as task * migrations and controller on/offs by disabling percpu operation on * cgroup_threadgroup_rwsem. This makes hot path operations such as * forks and exits into the slow path and more expensive. * * The static usage pattern of creating a cgroup, enabling controllers, * and then seeding it with CLONE_INTO_CGROUP doesn't require write * locking cgroup_threadgroup_rwsem and thus doesn't benefit from * favordynmod. */ CGRP_ROOT_FAVOR_DYNMODS = (1 << 4), /* * Enable cpuset controller in v1 cgroup to use v2 behavior. */ CGRP_ROOT_CPUSET_V2_MODE = (1 << 16), /* * Enable legacy local memory.events. */ CGRP_ROOT_MEMORY_LOCAL_EVENTS = (1 << 17), /* * Enable recursive subtree protection */ CGRP_ROOT_MEMORY_RECURSIVE_PROT = (1 << 18), /* * Enable hugetlb accounting for the memory controller. */ CGRP_ROOT_MEMORY_HUGETLB_ACCOUNTING = (1 << 19), /* * Enable legacy local pids.events. */ CGRP_ROOT_PIDS_LOCAL_EVENTS = (1 << 20), }; /* cftype->flags */ enum { CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ CFTYPE_NS_DELEGATABLE = (1 << 2), /* writeable beyond delegation boundaries */ CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ CFTYPE_DEBUG = (1 << 5), /* create when cgroup_debug */ /* internal flags, do not use outside cgroup core proper */ __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ __CFTYPE_ADDED = (1 << 18), }; /* * cgroup_file is the handle for a file instance created in a cgroup which * is used, for example, to generate file changed notifications. This can * be obtained by setting cftype->file_offset. */ struct cgroup_file { /* do not access any fields from outside cgroup core */ struct kernfs_node *kn; unsigned long notified_at; struct timer_list notify_timer; }; /* * Per-subsystem/per-cgroup state maintained by the system. This is the * fundamental structural building block that controllers deal with. * * Fields marked with "PI:" are public and immutable and may be accessed * directly without synchronization. */ struct cgroup_subsys_state { /* PI: the cgroup that this css is attached to */ struct cgroup *cgroup; /* PI: the cgroup subsystem that this css is attached to */ struct cgroup_subsys *ss; /* reference count - access via css_[try]get() and css_put() */ struct percpu_ref refcnt; /* * Depending on the context, this field is initialized * via css_rstat_init() at different places: * * when css is associated with cgroup::self * when css->cgroup is the root cgroup * performed in cgroup_init() * when css->cgroup is not the root cgroup * performed in cgroup_create() * when css is associated with a subsystem * when css->cgroup is the root cgroup * performed in cgroup_init_subsys() in the non-early path * when css->cgroup is not the root cgroup * performed in css_create() */ struct css_rstat_cpu __percpu *rstat_cpu; /* * siblings list anchored at the parent's ->children * * linkage is protected by cgroup_mutex or RCU */ struct list_head sibling; struct list_head children; /* * PI: Subsys-unique ID. 0 is unused and root is always 1. The * matching css can be looked up using css_from_id(). */ int id; unsigned int flags; /* * Monotonically increasing unique serial number which defines a * uniform order among all csses. It's guaranteed that all * ->children lists are in the ascending order of ->serial_nr and * used to allow interrupting and resuming iterations. */ u64 serial_nr; /* * Incremented by online self and children. Used to guarantee that * parents are not offlined before their children. */ atomic_t online_cnt; /* percpu_ref killing and RCU release */ struct work_struct destroy_work; struct rcu_work destroy_rwork; /* * PI: the parent css. Placed here for cache proximity to following * fields of the containing structure. */ struct cgroup_subsys_state *parent; /* * Keep track of total numbers of visible descendant CSSes. * The total number of dying CSSes is tracked in * css->cgroup->nr_dying_subsys[ssid]. * Protected by cgroup_mutex. */ int nr_descendants; /* * A singly-linked list of css structures to be rstat flushed. * This is a scratch field to be used exclusively by * css_rstat_flush(). * * Protected by rstat_base_lock when css is cgroup::self. * Protected by css->ss->rstat_ss_lock otherwise. */ struct cgroup_subsys_state *rstat_flush_next; }; /* * A css_set is a structure holding pointers to a set of * cgroup_subsys_state objects. This saves space in the task struct * object and speeds up fork()/exit(), since a single inc/dec and a * list_add()/del() can bump the reference count on the entire cgroup * set for a task. */ struct css_set { /* * Set of subsystem states, one for each subsystem. This array is * immutable after creation apart from the init_css_set during * subsystem registration (at boot time). */ struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; /* reference count */ refcount_t refcount; /* * For a domain cgroup, the following points to self. If threaded, * to the matching cset of the nearest domain ancestor. The * dom_cset provides access to the domain cgroup and its csses to * which domain level resource consumptions should be charged. */ struct css_set *dom_cset; /* the default cgroup associated with this css_set */ struct cgroup *dfl_cgrp; /* internal task count, protected by css_set_lock */ int nr_tasks; /* * Lists running through all tasks using this cgroup group. * mg_tasks lists tasks which belong to this cset but are in the * process of being migrated out or in. Protected by * css_set_lock, but, during migration, once tasks are moved to * mg_tasks, it can be read safely while holding cgroup_mutex. */ struct list_head tasks; struct list_head mg_tasks; struct list_head dying_tasks; /* all css_task_iters currently walking this cset */ struct list_head task_iters; /* * On the default hierarchy, ->subsys[ssid] may point to a css * attached to an ancestor instead of the cgroup this css_set is * associated with. The following node is anchored at * ->subsys[ssid]->cgroup->e_csets[ssid] and provides a way to * iterate through all css's attached to a given cgroup. */ struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; /* all threaded csets whose ->dom_cset points to this cset */ struct list_head threaded_csets; struct list_head threaded_csets_node; /* * List running through all cgroup groups in the same hash * slot. Protected by css_set_lock */ struct hlist_node hlist; /* * List of cgrp_cset_links pointing at cgroups referenced from this * css_set. Protected by css_set_lock. */ struct list_head cgrp_links; /* * List of csets participating in the on-going migration either as * source or destination. Protected by cgroup_mutex. */ struct list_head mg_src_preload_node; struct list_head mg_dst_preload_node; struct list_head mg_node; /* * If this cset is acting as the source of migration the following * two fields are set. mg_src_cgrp and mg_dst_cgrp are * respectively the source and destination cgroups of the on-going * migration. mg_dst_cset is the destination cset the target tasks * on this cset should be migrated to. Protected by cgroup_mutex. */ struct cgroup *mg_src_cgrp; struct cgroup *mg_dst_cgrp; struct css_set *mg_dst_cset; /* dead and being drained, ignore for migration */ bool dead; /* For RCU-protected deletion */ struct rcu_head rcu_head; }; struct cgroup_base_stat { struct task_cputime cputime; #ifdef CONFIG_SCHED_CORE u64 forceidle_sum; #endif u64 ntime; }; /* * rstat - cgroup scalable recursive statistics. Accounting is done * per-cpu in css_rstat_cpu which is then lazily propagated up the * hierarchy on reads. * * When a stat gets updated, the css_rstat_cpu and its ancestors are * linked into the updated tree. On the following read, propagation only * considers and consumes the updated tree. This makes reading O(the * number of descendants which have been active since last read) instead of * O(the total number of descendants). * * This is important because there can be a lot of (draining) cgroups which * aren't active and stat may be read frequently. The combination can * become very expensive. By propagating selectively, increasing reading * frequency decreases the cost of each read. * * This struct hosts both the fields which implement the above - * updated_children and updated_next. */ struct css_rstat_cpu { /* * Child cgroups with stat updates on this cpu since the last read * are linked on the parent's ->updated_children through * ->updated_next. updated_children is terminated by its container css. */ struct cgroup_subsys_state *updated_children; struct cgroup_subsys_state *updated_next; /* NULL if not on the list */ struct llist_node lnode; /* lockless list for update */ struct cgroup_subsys_state *owner; /* back pointer */ }; /* * This struct hosts the fields which track basic resource statistics on * top of it - bsync, bstat and last_bstat. */ struct cgroup_rstat_base_cpu { /* * ->bsync protects ->bstat. These are the only fields which get * updated in the hot path. */ struct u64_stats_sync bsync; struct cgroup_base_stat bstat; /* * Snapshots at the last reading. These are used to calculate the * deltas to propagate to the global counters. */ struct cgroup_base_stat last_bstat; /* * This field is used to record the cumulative per-cpu time of * the cgroup and its descendants. Currently it can be read via * eBPF/drgn etc, and we are still trying to determine how to * expose it in the cgroupfs interface. */ struct cgroup_base_stat subtree_bstat; /* * Snapshots at the last reading. These are used to calculate the * deltas to propagate to the per-cpu subtree_bstat. */ struct cgroup_base_stat last_subtree_bstat; }; struct cgroup_freezer_state { /* Should the cgroup and its descendants be frozen. */ bool freeze; /* Should the cgroup actually be frozen? */ bool e_freeze; /* Fields below are protected by css_set_lock */ /* Number of frozen descendant cgroups */ int nr_frozen_descendants; /* * Number of tasks, which are counted as frozen: * frozen, SIGSTOPped, and PTRACEd. */ int nr_frozen_tasks; }; struct cgroup { /* self css with NULL ->ss, points back to this cgroup */ struct cgroup_subsys_state self; unsigned long flags; /* "unsigned long" so bitops work */ /* * The depth this cgroup is at. The root is at depth zero and each * step down the hierarchy increments the level. This along with * ancestors[] can determine whether a given cgroup is a * descendant of another without traversing the hierarchy. */ int level; /* Maximum allowed descent tree depth */ int max_depth; /* * Keep track of total numbers of visible and dying descent cgroups. * Dying cgroups are cgroups which were deleted by a user, * but are still existing because someone else is holding a reference. * max_descendants is a maximum allowed number of descent cgroups. * * nr_descendants and nr_dying_descendants are protected * by cgroup_mutex and css_set_lock. It's fine to read them holding * any of cgroup_mutex and css_set_lock; for writing both locks * should be held. */ int nr_descendants; int nr_dying_descendants; int max_descendants; /* * Each non-empty css_set associated with this cgroup contributes * one to nr_populated_csets. The counter is zero iff this cgroup * doesn't have any tasks. * * All children which have non-zero nr_populated_csets and/or * nr_populated_children of their own contribute one to either * nr_populated_domain_children or nr_populated_threaded_children * depending on their type. Each counter is zero iff all cgroups * of the type in the subtree proper don't have any tasks. */ int nr_populated_csets; int nr_populated_domain_children; int nr_populated_threaded_children; int nr_threaded_children; /* # of live threaded child cgroups */ /* sequence number for cgroup.kill, serialized by css_set_lock. */ unsigned int kill_seq; struct kernfs_node *kn; /* cgroup kernfs entry */ struct cgroup_file procs_file; /* handle for "cgroup.procs" */ struct cgroup_file events_file; /* handle for "cgroup.events" */ /* handles for "{cpu,memory,io,irq}.pressure" */ struct cgroup_file psi_files[NR_PSI_RESOURCES]; /* * The bitmask of subsystems enabled on the child cgroups. * ->subtree_control is the one configured through * "cgroup.subtree_control" while ->subtree_ss_mask is the effective * one which may have more subsystems enabled. Controller knobs * are made available iff it's enabled in ->subtree_control. */ u16 subtree_control; u16 subtree_ss_mask; u16 old_subtree_control; u16 old_subtree_ss_mask; /* Private pointers for each registered subsystem */ struct cgroup_subsys_state __rcu *subsys[CGROUP_SUBSYS_COUNT]; /* * Keep track of total number of dying CSSes at and below this cgroup. * Protected by cgroup_mutex. */ int nr_dying_subsys[CGROUP_SUBSYS_COUNT]; struct cgroup_root *root; /* * List of cgrp_cset_links pointing at css_sets with tasks in this * cgroup. Protected by css_set_lock. */ struct list_head cset_links; /* * On the default hierarchy, a css_set for a cgroup with some * susbsys disabled will point to css's which are associated with * the closest ancestor which has the subsys enabled. The * following lists all css_sets which point to this cgroup's css * for the given subsystem. */ struct list_head e_csets[CGROUP_SUBSYS_COUNT]; /* * If !threaded, self. If threaded, it points to the nearest * domain ancestor. Inside a threaded subtree, cgroups are exempt * from process granularity and no-internal-task constraint. * Domain level resource consumptions which aren't tied to a * specific task are charged to the dom_cgrp. */ struct cgroup *dom_cgrp; struct cgroup *old_dom_cgrp; /* used while enabling threaded */ /* * Depending on the context, this field is initialized via * css_rstat_init() at different places: * * when cgroup is the root cgroup * performed in cgroup_setup_root() * otherwise * performed in cgroup_create() */ struct cgroup_rstat_base_cpu __percpu *rstat_base_cpu; /* * Add padding to keep the read mostly rstat per-cpu pointer on a * different cacheline than the following *bstat fields which can have * frequent updates. */ CACHELINE_PADDING(_pad_); /* cgroup basic resource statistics */ struct cgroup_base_stat last_bstat; struct cgroup_base_stat bstat; struct prev_cputime prev_cputime; /* for printing out cputime */ /* * list of pidlists, up to two for each namespace (one for procs, one * for tasks); created on demand. */ struct list_head pidlists; struct mutex pidlist_mutex; /* used to wait for offlining of csses */ wait_queue_head_t offline_waitq; /* used to schedule release agent */ struct work_struct release_agent_work; /* used to track pressure stalls */ struct psi_group *psi; /* used to store eBPF programs */ struct cgroup_bpf bpf; /* Used to store internal freezer state */ struct cgroup_freezer_state freezer; #ifdef CONFIG_BPF_SYSCALL struct bpf_local_storage __rcu *bpf_cgrp_storage; #endif /* All ancestors including self */ struct cgroup *ancestors[]; }; /* * A cgroup_root represents the root of a cgroup hierarchy, and may be * associated with a kernfs_root to form an active hierarchy. This is * internal to cgroup core. Don't access directly from controllers. */ struct cgroup_root { struct kernfs_root *kf_root; /* The bitmask of subsystems attached to this hierarchy */ unsigned int subsys_mask; /* Unique id for this hierarchy. */ int hierarchy_id; /* A list running through the active hierarchies */ struct list_head root_list; struct rcu_head rcu; /* Must be near the top */ /* * The root cgroup. The containing cgroup_root will be destroyed on its * release. cgrp->ancestors[0] will be used overflowing into the * following field. cgrp_ancestor_storage must immediately follow. */ struct cgroup cgrp; /* must follow cgrp for cgrp->ancestors[0], see above */ struct cgroup *cgrp_ancestor_storage; /* Number of cgroups in the hierarchy, used only for /proc/cgroups */ atomic_t nr_cgrps; /* Hierarchy-specific flags */ unsigned int flags; /* The path to use for release notifications. */ char release_agent_path[PATH_MAX]; /* The name for this hierarchy - may be empty */ char name[MAX_CGROUP_ROOT_NAMELEN]; }; /* * struct cftype: handler definitions for cgroup control files * * When reading/writing to a file: * - the cgroup to use is file->f_path.dentry->d_parent->d_fsdata * - the 'cftype' of the file is file->f_path.dentry->d_fsdata */ struct cftype { /* * Name of the subsystem is prepended in cgroup_file_name(). * Zero length string indicates end of cftype array. */ char name[MAX_CFTYPE_NAME]; unsigned long private; /* * The maximum length of string, excluding trailing nul, that can * be passed to write. If < PAGE_SIZE-1, PAGE_SIZE-1 is assumed. */ size_t max_write_len; /* CFTYPE_* flags */ unsigned int flags; /* * If non-zero, should contain the offset from the start of css to * a struct cgroup_file field. cgroup will record the handle of * the created file into it. The recorded handle can be used as * long as the containing css remains accessible. */ unsigned int file_offset; /* * Fields used for internal bookkeeping. Initialized automatically * during registration. */ struct cgroup_subsys *ss; /* NULL for cgroup core files */ struct list_head node; /* anchored at ss->cfts */ struct kernfs_ops *kf_ops; int (*open)(struct kernfs_open_file *of); void (*release)(struct kernfs_open_file *of); /* * read_u64() is a shortcut for the common case of returning a * single integer. Use it in place of read() */ u64 (*read_u64)(struct cgroup_subsys_state *css, struct cftype *cft); /* * read_s64() is a signed version of read_u64() */ s64 (*read_s64)(struct cgroup_subsys_state *css, struct cftype *cft); /* generic seq_file read interface */ int (*seq_show)(struct seq_file *sf, void *v); /* optional ops, implement all or none */ void *(*seq_start)(struct seq_file *sf, loff_t *ppos); void *(*seq_next)(struct seq_file *sf, void *v, loff_t *ppos); void (*seq_stop)(struct seq_file *sf, void *v); /* * write_u64() is a shortcut for the common case of accepting * a single integer (as parsed by simple_strtoull) from * userspace. Use in place of write(); return 0 or error. */ int (*write_u64)(struct cgroup_subsys_state *css, struct cftype *cft, u64 val); /* * write_s64() is a signed version of write_u64() */ int (*write_s64)(struct cgroup_subsys_state *css, struct cftype *cft, s64 val); /* * write() is the generic write callback which maps directly to * kernfs write operation and overrides all other operations. * Maximum write size is determined by ->max_write_len. Use * of_css/cft() to access the associated css and cft. */ ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off); __poll_t (*poll)(struct kernfs_open_file *of, struct poll_table_struct *pt); struct lock_class_key lockdep_key; }; /* * Control Group subsystem type. * See Documentation/admin-guide/cgroup-v1/cgroups.rst for details */ struct cgroup_subsys { struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css); int (*css_online)(struct cgroup_subsys_state *css); void (*css_offline)(struct cgroup_subsys_state *css); void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); void (*css_killed)(struct cgroup_subsys_state *css); void (*css_rstat_flush)(struct cgroup_subsys_state *css, int cpu); int (*css_extra_stat_show)(struct seq_file *seq, struct cgroup_subsys_state *css); int (*css_local_stat_show)(struct seq_file *seq, struct cgroup_subsys_state *css); int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset); void (*post_attach)(void); int (*can_fork)(struct task_struct *task, struct css_set *cset); void (*cancel_fork)(struct task_struct *task, struct css_set *cset); void (*fork)(struct task_struct *task); void (*exit)(struct task_struct *task); void (*release)(struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); bool early_init:1; /* * If %true, the controller, on the default hierarchy, doesn't show * up in "cgroup.controllers" or "cgroup.subtree_control", is * implicitly enabled on all cgroups on the default hierarchy, and * bypasses the "no internal process" constraint. This is for * utility type controllers which is transparent to userland. * * An implicit controller can be stolen from the default hierarchy * anytime and thus must be okay with offline csses from previous * hierarchies coexisting with csses for the current one. */ bool implicit_on_dfl:1; /* * If %true, the controller, supports threaded mode on the default * hierarchy. In a threaded subtree, both process granularity and * no-internal-process constraint are ignored and a threaded * controllers should be able to handle that. * * Note that as an implicit controller is automatically enabled on * all cgroups on the default hierarchy, it should also be * threaded. implicit && !threaded is not supported. */ bool threaded:1; /* the following two fields are initialized automatically during boot */ int id; const char *name; /* optional, initialized automatically during boot if not set */ const char *legacy_name; /* link to parent, protected by cgroup_lock() */ struct cgroup_root *root; /* idr for css->id */ struct idr css_idr; /* * List of cftypes. Each entry is the first entry of an array * terminated by zero length name. */ struct list_head cfts; /* * Base cftypes which are automatically registered. The two can * point to the same array. */ struct cftype *dfl_cftypes; /* for the default hierarchy */ struct cftype *legacy_cftypes; /* for the legacy hierarchies */ /* * A subsystem may depend on other subsystems. When such subsystem * is enabled on a cgroup, the depended-upon subsystems are enabled * together if available. Subsystems enabled due to dependency are * not visible to userland until explicitly enabled. The following * specifies the mask of subsystems that this one depends on. */ unsigned int depends_on; spinlock_t rstat_ss_lock; struct llist_head __percpu *lhead; /* lockless update list head */ }; extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; struct cgroup_of_peak { unsigned long value; struct list_head list; }; /** * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups * @tsk: target task * * Allows cgroup operations to synchronize against threadgroup changes * using a percpu_rw_semaphore. */ static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) { percpu_down_read(&cgroup_threadgroup_rwsem); } /** * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups * @tsk: target task * * Counterpart of cgroup_threadcgroup_change_begin(). */ static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) { percpu_up_read(&cgroup_threadgroup_rwsem); } #else /* CONFIG_CGROUPS */ #define CGROUP_SUBSYS_COUNT 0 static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) { might_sleep(); } static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) {} #endif /* CONFIG_CGROUPS */ #ifdef CONFIG_SOCK_CGROUP_DATA /* * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains * per-socket cgroup information except for memcg association. * * On legacy hierarchies, net_prio and net_cls controllers directly * set attributes on each sock which can then be tested by the network * layer. On the default hierarchy, each sock is associated with the * cgroup it was created in and the networking layer can match the * cgroup directly. */ struct sock_cgroup_data { struct cgroup *cgroup; /* v2 */ #ifdef CONFIG_CGROUP_NET_CLASSID u32 classid; /* v1 */ #endif #ifdef CONFIG_CGROUP_NET_PRIO u16 prioidx; /* v1 */ #endif }; static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd) { #ifdef CONFIG_CGROUP_NET_PRIO return READ_ONCE(skcd->prioidx); #else return 1; #endif } #ifdef CONFIG_CGROUP_NET_CLASSID static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd) { return READ_ONCE(skcd->classid); } #endif static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd, u16 prioidx) { #ifdef CONFIG_CGROUP_NET_PRIO WRITE_ONCE(skcd->prioidx, prioidx); #endif } #ifdef CONFIG_CGROUP_NET_CLASSID static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd, u32 classid) { WRITE_ONCE(skcd->classid, classid); } #endif #else /* CONFIG_SOCK_CGROUP_DATA */ struct sock_cgroup_data { }; #endif /* CONFIG_SOCK_CGROUP_DATA */ #endif /* _LINUX_CGROUP_DEFS_H */
565 558 19 344 556 23 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 // SPDX-License-Identifier: GPL-2.0-or-later /* Copyright Amazon.com Inc. or its affiliates. */ #include <linux/init.h> #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/rtnetlink.h> #include <net/net_namespace.h> #include <net/netdev_lock.h> #include <net/netns/generic.h> int netdev_debug_event(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); enum netdev_cmd cmd = event; /* Keep enum and don't add default to trigger -Werror=switch */ switch (cmd) { case NETDEV_XDP_FEAT_CHANGE: netdev_assert_locked(dev); fallthrough; case NETDEV_CHANGE: case NETDEV_REGISTER: case NETDEV_UP: netdev_ops_assert_locked(dev); fallthrough; case NETDEV_DOWN: case NETDEV_REBOOT: case NETDEV_UNREGISTER: case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: case NETDEV_PRE_CHANGEADDR: case NETDEV_GOING_DOWN: case NETDEV_FEAT_CHANGE: case NETDEV_BONDING_FAILOVER: case NETDEV_PRE_UP: case NETDEV_PRE_TYPE_CHANGE: case NETDEV_POST_TYPE_CHANGE: case NETDEV_POST_INIT: case NETDEV_PRE_UNINIT: case NETDEV_RELEASE: case NETDEV_NOTIFY_PEERS: case NETDEV_JOIN: case NETDEV_CHANGEUPPER: case NETDEV_RESEND_IGMP: case NETDEV_PRECHANGEMTU: case NETDEV_CHANGEINFODATA: case NETDEV_BONDING_INFO: case NETDEV_PRECHANGEUPPER: case NETDEV_CHANGELOWERSTATE: case NETDEV_UDP_TUNNEL_PUSH_INFO: case NETDEV_UDP_TUNNEL_DROP_INFO: case NETDEV_CHANGE_TX_QUEUE_LEN: case NETDEV_CVLAN_FILTER_PUSH_INFO: case NETDEV_CVLAN_FILTER_DROP_INFO: case NETDEV_SVLAN_FILTER_PUSH_INFO: case NETDEV_SVLAN_FILTER_DROP_INFO: case NETDEV_OFFLOAD_XSTATS_ENABLE: case NETDEV_OFFLOAD_XSTATS_DISABLE: case NETDEV_OFFLOAD_XSTATS_REPORT_USED: case NETDEV_OFFLOAD_XSTATS_REPORT_DELTA: ASSERT_RTNL(); break; case NETDEV_CHANGENAME: ASSERT_RTNL_NET(net); break; } return NOTIFY_DONE; } EXPORT_SYMBOL_NS_GPL(netdev_debug_event, "NETDEV_INTERNAL"); static int rtnl_net_debug_net_id; static int __net_init rtnl_net_debug_net_init(struct net *net) { struct notifier_block *nb; nb = net_generic(net, rtnl_net_debug_net_id); nb->notifier_call = netdev_debug_event; return register_netdevice_notifier_net(net, nb); } static void __net_exit rtnl_net_debug_net_exit(struct net *net) { struct notifier_block *nb; nb = net_generic(net, rtnl_net_debug_net_id); unregister_netdevice_notifier_net(net, nb); } static struct pernet_operations rtnl_net_debug_net_ops __net_initdata = { .init = rtnl_net_debug_net_init, .exit = rtnl_net_debug_net_exit, .id = &rtnl_net_debug_net_id, .size = sizeof(struct notifier_block), }; static struct notifier_block rtnl_net_debug_block = { .notifier_call = netdev_debug_event, }; static int __init rtnl_net_debug_init(void) { int ret; ret = register_pernet_subsys(&rtnl_net_debug_net_ops); if (ret) return ret; ret = register_netdevice_notifier(&rtnl_net_debug_block); if (ret) unregister_pernet_subsys(&rtnl_net_debug_net_ops); return ret; } subsys_initcall(rtnl_net_debug_init);
5 5 5 1 1 2 1 1 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 // SPDX-License-Identifier: GPL-2.0-only /* * vivid-radio-common.c - common radio rx/tx support functions. * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/delay.h> #include <linux/videodev2.h> #include "vivid-core.h" #include "vivid-ctrls.h" #include "vivid-radio-common.h" #include "vivid-rds-gen.h" /* * These functions are shared between the vivid receiver and transmitter * since both use the same frequency bands. */ const struct v4l2_frequency_band vivid_radio_bands[TOT_BANDS] = { /* Band FM */ { .type = V4L2_TUNER_RADIO, .index = 0, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = FM_FREQ_RANGE_LOW, .rangehigh = FM_FREQ_RANGE_HIGH, .modulation = V4L2_BAND_MODULATION_FM, }, /* Band AM */ { .type = V4L2_TUNER_RADIO, .index = 1, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = AM_FREQ_RANGE_LOW, .rangehigh = AM_FREQ_RANGE_HIGH, .modulation = V4L2_BAND_MODULATION_AM, }, /* Band SW */ { .type = V4L2_TUNER_RADIO, .index = 2, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = SW_FREQ_RANGE_LOW, .rangehigh = SW_FREQ_RANGE_HIGH, .modulation = V4L2_BAND_MODULATION_AM, }, }; /* * Initialize the RDS generator. If we can loop, then the RDS generator * is set up with the values from the RDS TX controls, otherwise it * will fill in standard values using one of two alternates. */ void vivid_radio_rds_init(struct vivid_dev *dev) { struct vivid_rds_gen *rds = &dev->rds_gen; bool alt = dev->radio_rx_rds_use_alternates; /* Do nothing, blocks will be filled by the transmitter */ if (dev->radio_rds_loop && !dev->radio_tx_rds_controls) return; if (dev->radio_rds_loop) { v4l2_ctrl_lock(dev->radio_tx_rds_pi); rds->picode = dev->radio_tx_rds_pi->cur.val; rds->pty = dev->radio_tx_rds_pty->cur.val; rds->mono_stereo = dev->radio_tx_rds_mono_stereo->cur.val; rds->art_head = dev->radio_tx_rds_art_head->cur.val; rds->compressed = dev->radio_tx_rds_compressed->cur.val; rds->dyn_pty = dev->radio_tx_rds_dyn_pty->cur.val; rds->ta = dev->radio_tx_rds_ta->cur.val; rds->tp = dev->radio_tx_rds_tp->cur.val; rds->ms = dev->radio_tx_rds_ms->cur.val; strscpy(rds->psname, dev->radio_tx_rds_psname->p_cur.p_char, sizeof(rds->psname)); strscpy(rds->radiotext, dev->radio_tx_rds_radiotext->p_cur.p_char + alt * 64, sizeof(rds->radiotext)); v4l2_ctrl_unlock(dev->radio_tx_rds_pi); } else { vivid_rds_gen_fill(rds, dev->radio_rx_freq, alt); } if (dev->radio_rx_rds_controls) { v4l2_ctrl_s_ctrl(dev->radio_rx_rds_pty, rds->pty); v4l2_ctrl_s_ctrl(dev->radio_rx_rds_ta, rds->ta); v4l2_ctrl_s_ctrl(dev->radio_rx_rds_tp, rds->tp); v4l2_ctrl_s_ctrl(dev->radio_rx_rds_ms, rds->ms); v4l2_ctrl_s_ctrl_string(dev->radio_rx_rds_psname, rds->psname); v4l2_ctrl_s_ctrl_string(dev->radio_rx_rds_radiotext, rds->radiotext); if (!dev->radio_rds_loop) dev->radio_rx_rds_use_alternates = !dev->radio_rx_rds_use_alternates; } vivid_rds_generate(rds); } /* * Calculate the emulated signal quality taking into account the frequency * the transmitter is using. */ static void vivid_radio_calc_sig_qual(struct vivid_dev *dev) { int mod = 16000; int delta = 800; int sig_qual, sig_qual_tx = mod; /* * For SW and FM there is a channel every 1000 kHz, for AM there is one * every 100 kHz. */ if (dev->radio_rx_freq <= AM_FREQ_RANGE_HIGH) { mod /= 10; delta /= 10; } sig_qual = (dev->radio_rx_freq + delta) % mod - delta; if (dev->has_radio_tx) sig_qual_tx = dev->radio_rx_freq - dev->radio_tx_freq; if (abs(sig_qual_tx) <= abs(sig_qual)) { sig_qual = sig_qual_tx; /* * Zero the internal rds buffer if we are going to loop * rds blocks. */ if (!dev->radio_rds_loop && !dev->radio_tx_rds_controls) memset(dev->rds_gen.data, 0, sizeof(dev->rds_gen.data)); dev->radio_rds_loop = dev->radio_rx_freq >= FM_FREQ_RANGE_LOW; } else { dev->radio_rds_loop = false; } if (dev->radio_rx_freq <= AM_FREQ_RANGE_HIGH) sig_qual *= 10; dev->radio_rx_sig_qual = sig_qual; } int vivid_radio_g_frequency(struct file *file, const unsigned *pfreq, struct v4l2_frequency *vf) { if (vf->tuner != 0) return -EINVAL; vf->frequency = *pfreq; return 0; } int vivid_radio_s_frequency(struct file *file, unsigned *pfreq, const struct v4l2_frequency *vf) { struct vivid_dev *dev = video_drvdata(file); unsigned freq; unsigned band; if (vf->tuner != 0) return -EINVAL; if (vf->frequency >= (FM_FREQ_RANGE_LOW + SW_FREQ_RANGE_HIGH) / 2) band = BAND_FM; else if (vf->frequency <= (AM_FREQ_RANGE_HIGH + SW_FREQ_RANGE_LOW) / 2) band = BAND_AM; else band = BAND_SW; freq = clamp_t(u32, vf->frequency, vivid_radio_bands[band].rangelow, vivid_radio_bands[band].rangehigh); *pfreq = freq; /* * For both receiver and transmitter recalculate the signal quality * (since that depends on both frequencies) and re-init the rds * generator. */ vivid_radio_calc_sig_qual(dev); vivid_radio_rds_init(dev); return 0; }
1 1 1 1 1 1 11 11 11 11 11 11 11 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 /* * Copyright (c) 2002 Red Hat, Inc. All rights reserved. * * This software may be freely redistributed under the terms of the * GNU General Public License. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * Authors: David Woodhouse <dwmw2@infradead.org> * David Howells <dhowells@redhat.com> * */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/init.h> #include <linux/fs.h> #include <linux/pagemap.h> #include <linux/sched.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/iversion.h> #include "internal.h" #include "afs_fs.h" void afs_init_new_symlink(struct afs_vnode *vnode, struct afs_operation *op) { size_t size = strlen(op->create.symlink) + 1; size_t dsize = 0; char *p; if (netfs_alloc_folioq_buffer(NULL, &vnode->directory, &dsize, size, mapping_gfp_mask(vnode->netfs.inode.i_mapping)) < 0) return; vnode->directory_size = dsize; p = kmap_local_folio(folioq_folio(vnode->directory, 0), 0); memcpy(p, op->create.symlink, size); kunmap_local(p); set_bit(AFS_VNODE_DIR_READ, &vnode->flags); netfs_single_mark_inode_dirty(&vnode->netfs.inode); } static void afs_put_link(void *arg) { struct folio *folio = virt_to_folio(arg); kunmap_local(arg); folio_put(folio); } const char *afs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *callback) { struct afs_vnode *vnode = AFS_FS_I(inode); struct folio *folio; char *content; ssize_t ret; if (!dentry) { /* RCU pathwalk. */ if (!test_bit(AFS_VNODE_DIR_READ, &vnode->flags) || !afs_check_validity(vnode)) return ERR_PTR(-ECHILD); goto good; } if (test_bit(AFS_VNODE_DIR_READ, &vnode->flags)) goto fetch; ret = afs_validate(vnode, NULL); if (ret < 0) return ERR_PTR(ret); if (!test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) && test_bit(AFS_VNODE_DIR_READ, &vnode->flags)) goto good; fetch: ret = afs_read_single(vnode, NULL); if (ret < 0) return ERR_PTR(ret); set_bit(AFS_VNODE_DIR_READ, &vnode->flags); good: folio = folioq_folio(vnode->directory, 0); folio_get(folio); content = kmap_local_folio(folio, 0); set_delayed_call(callback, afs_put_link, content); return content; } int afs_readlink(struct dentry *dentry, char __user *buffer, int buflen) { DEFINE_DELAYED_CALL(done); const char *content; int len; content = afs_get_link(dentry, d_inode(dentry), &done); if (IS_ERR(content)) { do_delayed_call(&done); return PTR_ERR(content); } len = umin(strlen(content), buflen); if (copy_to_user(buffer, content, len)) len = -EFAULT; do_delayed_call(&done); return len; } static const struct inode_operations afs_symlink_inode_operations = { .get_link = afs_get_link, .readlink = afs_readlink, }; static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode) { static unsigned long once_only; pr_warn("kAFS: AFS vnode with undefined type %u\n", vnode->status.type); pr_warn("kAFS: A=%d m=%o s=%llx v=%llx\n", vnode->status.abort_code, vnode->status.mode, vnode->status.size, vnode->status.data_version); pr_warn("kAFS: vnode %llx:%llx:%x\n", vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); if (parent_vnode) pr_warn("kAFS: dir %llx:%llx:%x\n", parent_vnode->fid.vid, parent_vnode->fid.vnode, parent_vnode->fid.unique); if (!test_and_set_bit(0, &once_only)) dump_stack(); } /* * Set parameters for the netfs library */ static void afs_set_netfs_context(struct afs_vnode *vnode) { netfs_inode_init(&vnode->netfs, &afs_req_ops, true); } /* * Initialise an inode from the vnode status. */ static int afs_inode_init_from_status(struct afs_operation *op, struct afs_vnode_param *vp, struct afs_vnode *vnode) { struct afs_file_status *status = &vp->scb.status; struct inode *inode = AFS_VNODE_TO_I(vnode); struct timespec64 t; _enter("{%llx:%llu.%u} %s", vp->fid.vid, vp->fid.vnode, vp->fid.unique, op->type ? op->type->name : "???"); _debug("FS: ft=%d lk=%d sz=%llu ver=%Lu mod=%hu", status->type, status->nlink, (unsigned long long) status->size, status->data_version, status->mode); write_seqlock(&vnode->cb_lock); vnode->cb_v_check = op->cb_v_break; vnode->status = *status; t = status->mtime_client; inode_set_ctime_to_ts(inode, t); inode_set_mtime_to_ts(inode, t); inode_set_atime_to_ts(inode, t); inode->i_flags |= S_NOATIME; inode->i_uid = make_kuid(&init_user_ns, status->owner); inode->i_gid = make_kgid(&init_user_ns, status->group); set_nlink(&vnode->netfs.inode, status->nlink); switch (status->type) { case AFS_FTYPE_FILE: inode->i_mode = S_IFREG | (status->mode & S_IALLUGO); inode->i_op = &afs_file_inode_operations; inode->i_fop = &afs_file_operations; inode->i_mapping->a_ops = &afs_file_aops; mapping_set_large_folios(inode->i_mapping); break; case AFS_FTYPE_DIR: inode->i_mode = S_IFDIR | (status->mode & S_IALLUGO); inode->i_op = &afs_dir_inode_operations; inode->i_fop = &afs_dir_file_operations; inode->i_mapping->a_ops = &afs_dir_aops; __set_bit(NETFS_ICTX_SINGLE_NO_UPLOAD, &vnode->netfs.flags); /* Assume locally cached directory data will be valid. */ __set_bit(AFS_VNODE_DIR_VALID, &vnode->flags); break; case AFS_FTYPE_SYMLINK: /* Symlinks with a mode of 0644 are actually mountpoints. */ if ((status->mode & 0777) == 0644) { inode->i_flags |= S_AUTOMOUNT; set_bit(AFS_VNODE_MOUNTPOINT, &vnode->flags); inode->i_mode = S_IFDIR | 0555; inode->i_op = &afs_mntpt_inode_operations; inode->i_fop = &afs_mntpt_file_operations; } else { inode->i_mode = S_IFLNK | status->mode; inode->i_op = &afs_symlink_inode_operations; } inode->i_mapping->a_ops = &afs_dir_aops; inode_nohighmem(inode); mapping_set_release_always(inode->i_mapping); break; default: dump_vnode(vnode, op->file[0].vnode != vnode ? op->file[0].vnode : NULL); write_sequnlock(&vnode->cb_lock); return afs_protocol_error(NULL, afs_eproto_file_type); } afs_set_i_size(vnode, status->size); afs_set_netfs_context(vnode); vnode->invalid_before = status->data_version; trace_afs_set_dv(vnode, status->data_version); inode_set_iversion_raw(&vnode->netfs.inode, status->data_version); if (!vp->scb.have_cb) { /* it's a symlink we just created (the fileserver * didn't give us a callback) */ afs_clear_cb_promise(vnode, afs_cb_promise_set_new_symlink); } else { vnode->cb_server = op->server; afs_set_cb_promise(vnode, vp->scb.callback.expires_at, afs_cb_promise_set_new_inode); } write_sequnlock(&vnode->cb_lock); return 0; } /* * Update the core inode struct from a returned status record. */ static void afs_apply_status(struct afs_operation *op, struct afs_vnode_param *vp) { struct afs_file_status *status = &vp->scb.status; struct afs_vnode *vnode = vp->vnode; struct inode *inode = &vnode->netfs.inode; struct timespec64 t; umode_t mode; bool unexpected_jump = false; bool data_changed = false; bool change_size = vp->set_size; _enter("{%llx:%llu.%u} %s", vp->fid.vid, vp->fid.vnode, vp->fid.unique, op->type ? op->type->name : "???"); BUG_ON(test_bit(AFS_VNODE_UNSET, &vnode->flags)); if (status->type != vnode->status.type) { pr_warn("Vnode %llx:%llx:%x changed type %u to %u\n", vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, status->type, vnode->status.type); afs_protocol_error(NULL, afs_eproto_bad_status); return; } if (status->nlink != vnode->status.nlink) set_nlink(inode, status->nlink); if (status->owner != vnode->status.owner) inode->i_uid = make_kuid(&init_user_ns, status->owner); if (status->group != vnode->status.group) inode->i_gid = make_kgid(&init_user_ns, status->group); if (status->mode != vnode->status.mode) { mode = inode->i_mode; mode &= ~S_IALLUGO; mode |= status->mode & S_IALLUGO; WRITE_ONCE(inode->i_mode, mode); } t = status->mtime_client; inode_set_mtime_to_ts(inode, t); if (vp->update_ctime) inode_set_ctime_to_ts(inode, op->ctime); if (vnode->status.data_version != status->data_version) { trace_afs_set_dv(vnode, status->data_version); data_changed = true; } vnode->status = *status; if (vp->dv_before + vp->dv_delta != status->data_version) { trace_afs_dv_mismatch(vnode, vp->dv_before, vp->dv_delta, status->data_version); if (vnode->cb_ro_snapshot == atomic_read(&vnode->volume->cb_ro_snapshot) && atomic64_read(&vnode->cb_expires_at) != AFS_NO_CB_PROMISE) pr_warn("kAFS: vnode modified {%llx:%llu} %llx->%llx %s (op=%x)\n", vnode->fid.vid, vnode->fid.vnode, (unsigned long long)vp->dv_before + vp->dv_delta, (unsigned long long)status->data_version, op->type ? op->type->name : "???", op->debug_id); vnode->invalid_before = status->data_version; if (vnode->status.type == AFS_FTYPE_DIR) afs_invalidate_dir(vnode, afs_dir_invalid_dv_mismatch); else set_bit(AFS_VNODE_ZAP_DATA, &vnode->flags); change_size = true; data_changed = true; unexpected_jump = true; } else if (vnode->status.type == AFS_FTYPE_DIR) { /* Expected directory change is handled elsewhere so * that we can locally edit the directory and save on a * download. */ if (test_bit(AFS_VNODE_DIR_VALID, &vnode->flags)) data_changed = false; change_size = true; } if (data_changed) { inode_set_iversion_raw(inode, status->data_version); /* Only update the size if the data version jumped. If the * file is being modified locally, then we might have our own * idea of what the size should be that's not the same as * what's on the server. */ vnode->netfs.remote_i_size = status->size; if (change_size || status->size > i_size_read(inode)) { afs_set_i_size(vnode, status->size); if (unexpected_jump) vnode->netfs.zero_point = status->size; inode_set_ctime_to_ts(inode, t); inode_set_atime_to_ts(inode, t); } if (op->ops == &afs_fetch_data_operation) op->fetch.subreq->rreq->i_size = status->size; } } /* * Apply a callback to a vnode. */ static void afs_apply_callback(struct afs_operation *op, struct afs_vnode_param *vp) { struct afs_callback *cb = &vp->scb.callback; struct afs_vnode *vnode = vp->vnode; if (!afs_cb_is_broken(vp->cb_break_before, vnode)) { if (op->volume->type == AFSVL_RWVOL) vnode->cb_server = op->server; afs_set_cb_promise(vnode, cb->expires_at, afs_cb_promise_set_apply_cb); } } /* * Apply the received status and callback to an inode all in the same critical * section to avoid races with afs_validate(). */ void afs_vnode_commit_status(struct afs_operation *op, struct afs_vnode_param *vp) { struct afs_vnode *vnode = vp->vnode; _enter(""); write_seqlock(&vnode->cb_lock); if (vp->scb.have_error) { /* A YFS server will return this from RemoveFile2 and AFS and * YFS will return this from InlineBulkStatus. */ if (vp->scb.status.abort_code == VNOVNODE) { set_bit(AFS_VNODE_DELETED, &vnode->flags); clear_nlink(&vnode->netfs.inode); __afs_break_callback(vnode, afs_cb_break_for_deleted); op->flags &= ~AFS_OPERATION_DIR_CONFLICT; } } else if (vp->scb.have_status) { if (vp->speculative && (test_bit(AFS_VNODE_MODIFYING, &vnode->flags) || vp->dv_before != vnode->status.data_version)) /* Ignore the result of a speculative bulk status fetch * if it splits around a modification op, thereby * appearing to regress the data version. */ goto out; afs_apply_status(op, vp); if (vp->scb.have_cb) afs_apply_callback(op, vp); } else if (vp->op_unlinked && !(op->flags & AFS_OPERATION_DIR_CONFLICT)) { drop_nlink(&vnode->netfs.inode); if (vnode->netfs.inode.i_nlink == 0) { set_bit(AFS_VNODE_DELETED, &vnode->flags); __afs_break_callback(vnode, afs_cb_break_for_deleted); } } out: write_sequnlock(&vnode->cb_lock); if (vp->scb.have_status) afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb); } static void afs_fetch_status_success(struct afs_operation *op) { struct afs_vnode_param *vp = &op->file[op->fetch_status.which]; struct afs_vnode *vnode = vp->vnode; int ret; if (vnode->netfs.inode.i_state & I_NEW) { ret = afs_inode_init_from_status(op, vp, vnode); afs_op_set_error(op, ret); if (ret == 0) afs_cache_permit(vnode, op->key, vp->cb_break_before, &vp->scb); } else { afs_vnode_commit_status(op, vp); } } const struct afs_operation_ops afs_fetch_status_operation = { .issue_afs_rpc = afs_fs_fetch_status, .issue_yfs_rpc = yfs_fs_fetch_status, .success = afs_fetch_status_success, .aborted = afs_check_for_remote_deletion, }; /* * Fetch file status from the volume. */ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool is_new, afs_access_t *_caller_access) { struct afs_operation *op; _enter("%s,{%llx:%llu.%u,S=%lx}", vnode->volume->name, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique, vnode->flags); op = afs_alloc_operation(key, vnode->volume); if (IS_ERR(op)) return PTR_ERR(op); afs_op_set_vnode(op, 0, vnode); op->nr_files = 1; op->ops = &afs_fetch_status_operation; afs_begin_vnode_operation(op); afs_wait_for_operation(op); if (_caller_access) *_caller_access = op->file[0].scb.status.caller_access; return afs_put_operation(op); } /* * ilookup() comparator */ int afs_ilookup5_test_by_fid(struct inode *inode, void *opaque) { struct afs_vnode *vnode = AFS_FS_I(inode); struct afs_fid *fid = opaque; return (fid->vnode == vnode->fid.vnode && fid->vnode_hi == vnode->fid.vnode_hi && fid->unique == vnode->fid.unique); } /* * iget5() comparator */ static int afs_iget5_test(struct inode *inode, void *opaque) { struct afs_vnode_param *vp = opaque; //struct afs_vnode *vnode = AFS_FS_I(inode); return afs_ilookup5_test_by_fid(inode, &vp->fid); } /* * iget5() inode initialiser */ static int afs_iget5_set(struct inode *inode, void *opaque) { struct afs_vnode_param *vp = opaque; struct afs_super_info *as = AFS_FS_S(inode->i_sb); struct afs_vnode *vnode = AFS_FS_I(inode); vnode->volume = as->volume; vnode->fid = vp->fid; /* YFS supports 96-bit vnode IDs, but Linux only supports * 64-bit inode numbers. */ inode->i_ino = vnode->fid.vnode; inode->i_generation = vnode->fid.unique; return 0; } /* * Get a cache cookie for an inode. */ static void afs_get_inode_cache(struct afs_vnode *vnode) { #ifdef CONFIG_AFS_FSCACHE struct { __be32 vnode_id; __be32 unique; __be32 vnode_id_ext[2]; /* Allow for a 96-bit key */ } __packed key; struct afs_vnode_cache_aux aux; if (vnode->status.type != AFS_FTYPE_FILE && vnode->status.type != AFS_FTYPE_DIR && vnode->status.type != AFS_FTYPE_SYMLINK) { vnode->netfs.cache = NULL; return; } key.vnode_id = htonl(vnode->fid.vnode); key.unique = htonl(vnode->fid.unique); key.vnode_id_ext[0] = htonl(vnode->fid.vnode >> 32); key.vnode_id_ext[1] = htonl(vnode->fid.vnode_hi); afs_set_cache_aux(vnode, &aux); afs_vnode_set_cache(vnode, fscache_acquire_cookie( vnode->volume->cache, vnode->status.type == AFS_FTYPE_FILE ? 0 : FSCACHE_ADV_SINGLE_CHUNK, &key, sizeof(key), &aux, sizeof(aux), i_size_read(&vnode->netfs.inode))); #endif } /* * inode retrieval */ struct inode *afs_iget(struct afs_operation *op, struct afs_vnode_param *vp) { struct afs_vnode_param *dvp = &op->file[0]; struct super_block *sb = dvp->vnode->netfs.inode.i_sb; struct afs_vnode *vnode; struct inode *inode; int ret; _enter(",{%llx:%llu.%u},,", vp->fid.vid, vp->fid.vnode, vp->fid.unique); inode = iget5_locked(sb, vp->fid.vnode, afs_iget5_test, afs_iget5_set, vp); if (!inode) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); } vnode = AFS_FS_I(inode); _debug("GOT INODE %p { vl=%llx vn=%llx, u=%x }", inode, vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); /* deal with an existing inode */ if (!(inode->i_state & I_NEW)) { _leave(" = %p", inode); return inode; } ret = afs_inode_init_from_status(op, vp, vnode); if (ret < 0) goto bad_inode; afs_get_inode_cache(vnode); /* success */ clear_bit(AFS_VNODE_UNSET, &vnode->flags); unlock_new_inode(inode); _leave(" = %p", inode); return inode; /* failure */ bad_inode: iget_failed(inode); _leave(" = %d [bad]", ret); return ERR_PTR(ret); } static int afs_iget5_set_root(struct inode *inode, void *opaque) { struct afs_super_info *as = AFS_FS_S(inode->i_sb); struct afs_vnode *vnode = AFS_FS_I(inode); vnode->volume = as->volume; vnode->fid.vid = as->volume->vid; vnode->fid.vnode = 1; vnode->fid.unique = 1; inode->i_ino = 1; inode->i_generation = 1; return 0; } /* * Set up the root inode for a volume. This is always vnode 1, unique 1 within * the volume. */ struct inode *afs_root_iget(struct super_block *sb, struct key *key) { struct afs_super_info *as = AFS_FS_S(sb); struct afs_operation *op; struct afs_vnode *vnode; struct inode *inode; int ret; _enter(",{%llx},,", as->volume->vid); inode = iget5_locked(sb, 1, NULL, afs_iget5_set_root, NULL); if (!inode) { _leave(" = -ENOMEM"); return ERR_PTR(-ENOMEM); } _debug("GOT ROOT INODE %p { vl=%llx }", inode, as->volume->vid); BUG_ON(!(inode->i_state & I_NEW)); vnode = AFS_FS_I(inode); vnode->cb_v_check = atomic_read(&as->volume->cb_v_break); afs_set_netfs_context(vnode); op = afs_alloc_operation(key, as->volume); if (IS_ERR(op)) { ret = PTR_ERR(op); goto error; } afs_op_set_vnode(op, 0, vnode); op->nr_files = 1; op->ops = &afs_fetch_status_operation; ret = afs_do_sync_operation(op); if (ret < 0) goto error; afs_get_inode_cache(vnode); clear_bit(AFS_VNODE_UNSET, &vnode->flags); unlock_new_inode(inode); _leave(" = %p", inode); return inode; error: iget_failed(inode); _leave(" = %d [bad]", ret); return ERR_PTR(ret); } /* * read the attributes of an inode */ int afs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct afs_vnode *vnode = AFS_FS_I(inode); struct key *key; int ret, seq; _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); if (vnode->volume && !(query_flags & AT_STATX_DONT_SYNC) && atomic64_read(&vnode->cb_expires_at) == AFS_NO_CB_PROMISE) { key = afs_request_key(vnode->volume->cell); if (IS_ERR(key)) return PTR_ERR(key); ret = afs_validate(vnode, key); key_put(key); if (ret < 0) return ret; } do { seq = read_seqbegin(&vnode->cb_lock); generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) && stat->nlink > 0) stat->nlink -= 1; /* Lie about the size of directories. We maintain a locally * edited copy and may make different allocation decisions on * it, but we need to give userspace the server's size. */ if (S_ISDIR(inode->i_mode)) stat->size = vnode->netfs.remote_i_size; } while (read_seqretry(&vnode->cb_lock, seq)); return 0; } /* * discard an AFS inode */ int afs_drop_inode(struct inode *inode) { _enter(""); if (test_bit(AFS_VNODE_PSEUDODIR, &AFS_FS_I(inode)->flags)) return generic_delete_inode(inode); else return generic_drop_inode(inode); } /* * clear an AFS inode */ void afs_evict_inode(struct inode *inode) { struct afs_vnode_cache_aux aux; struct afs_super_info *sbi = AFS_FS_S(inode->i_sb); struct afs_vnode *vnode = AFS_FS_I(inode); _enter("{%llx:%llu.%d}", vnode->fid.vid, vnode->fid.vnode, vnode->fid.unique); _debug("CLEAR INODE %p", inode); ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); if ((S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) && (inode->i_state & I_DIRTY) && !sbi->dyn_root) { struct writeback_control wbc = { .sync_mode = WB_SYNC_ALL, .for_sync = true, .range_end = LLONG_MAX, }; afs_single_writepages(inode->i_mapping, &wbc); } netfs_wait_for_outstanding_io(inode); truncate_inode_pages_final(&inode->i_data); netfs_free_folioq_buffer(vnode->directory); afs_set_cache_aux(vnode, &aux); netfs_clear_inode_writeback(inode, &aux); clear_inode(inode); while (!list_empty(&vnode->wb_keys)) { struct afs_wb_key *wbk = list_entry(vnode->wb_keys.next, struct afs_wb_key, vnode_link); list_del(&wbk->vnode_link); afs_put_wb_key(wbk); } fscache_relinquish_cookie(afs_vnode_cache(vnode), test_bit(AFS_VNODE_DELETED, &vnode->flags)); afs_prune_wb_keys(vnode); afs_put_permits(rcu_access_pointer(vnode->permit_cache)); key_put(vnode->silly_key); vnode->silly_key = NULL; key_put(vnode->lock_key); vnode->lock_key = NULL; _leave(""); } static void afs_setattr_success(struct afs_operation *op) { struct afs_vnode_param *vp = &op->file[0]; struct inode *inode = &vp->vnode->netfs.inode; loff_t old_i_size = i_size_read(inode); op->setattr.old_i_size = old_i_size; afs_vnode_commit_status(op, vp); /* inode->i_size has now been changed. */ if (op->setattr.attr->ia_valid & ATTR_SIZE) { loff_t size = op->setattr.attr->ia_size; if (size > old_i_size) pagecache_isize_extended(inode, old_i_size, size); } } static void afs_setattr_edit_file(struct afs_operation *op) { struct afs_vnode_param *vp = &op->file[0]; struct afs_vnode *vnode = vp->vnode; struct inode *inode = &vnode->netfs.inode; if (op->setattr.attr->ia_valid & ATTR_SIZE) { loff_t size = op->setattr.attr->ia_size; loff_t old = op->setattr.old_i_size; /* Note: inode->i_size was updated by afs_apply_status() inside * the I/O and callback locks. */ if (size != old) { truncate_pagecache(inode, size); netfs_resize_file(&vnode->netfs, size, true); fscache_resize_cookie(afs_vnode_cache(vnode), size); } } } static const struct afs_operation_ops afs_setattr_operation = { .issue_afs_rpc = afs_fs_setattr, .issue_yfs_rpc = yfs_fs_setattr, .success = afs_setattr_success, .edit_dir = afs_setattr_edit_file, }; /* * set the attributes of an inode */ int afs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { const unsigned int supported = ATTR_SIZE | ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME | ATTR_MTIME_SET | ATTR_TIMES_SET | ATTR_TOUCH; struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); struct inode *inode = &vnode->netfs.inode; loff_t i_size; int ret; _enter("{%llx:%llu},{n=%pd},%x", vnode->fid.vid, vnode->fid.vnode, dentry, attr->ia_valid); if (!(attr->ia_valid & supported)) { _leave(" = 0 [unsupported]"); return 0; } i_size = i_size_read(inode); if (attr->ia_valid & ATTR_SIZE) { if (!S_ISREG(inode->i_mode)) return -EISDIR; ret = inode_newsize_ok(inode, attr->ia_size); if (ret) return ret; if (attr->ia_size == i_size) attr->ia_valid &= ~ATTR_SIZE; } fscache_use_cookie(afs_vnode_cache(vnode), true); /* Prevent any new writebacks from starting whilst we do this. */ down_write(&vnode->validate_lock); if ((attr->ia_valid & ATTR_SIZE) && S_ISREG(inode->i_mode)) { loff_t size = attr->ia_size; /* Wait for any outstanding writes to the server to complete */ loff_t from = min(size, i_size); loff_t to = max(size, i_size); ret = filemap_fdatawait_range(inode->i_mapping, from, to); if (ret < 0) goto out_unlock; /* Don't talk to the server if we're just shortening in-memory * writes that haven't gone to the server yet. */ if (!(attr->ia_valid & (supported & ~ATTR_SIZE & ~ATTR_MTIME)) && attr->ia_size < i_size && attr->ia_size > vnode->netfs.remote_i_size) { truncate_setsize(inode, attr->ia_size); netfs_resize_file(&vnode->netfs, size, false); fscache_resize_cookie(afs_vnode_cache(vnode), attr->ia_size); ret = 0; goto out_unlock; } } op = afs_alloc_operation(((attr->ia_valid & ATTR_FILE) ? afs_file_key(attr->ia_file) : NULL), vnode->volume); if (IS_ERR(op)) { ret = PTR_ERR(op); goto out_unlock; } afs_op_set_vnode(op, 0, vnode); op->setattr.attr = attr; if (attr->ia_valid & ATTR_SIZE) { op->file[0].dv_delta = 1; op->file[0].set_size = true; } op->ctime = attr->ia_ctime; op->file[0].update_ctime = 1; op->file[0].modification = true; op->ops = &afs_setattr_operation; ret = afs_do_sync_operation(op); out_unlock: up_write(&vnode->validate_lock); fscache_unuse_cookie(afs_vnode_cache(vnode), NULL, NULL); _leave(" = %d", ret); return ret; }
295 4 117 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the IP protocol. * * Version: @(#)ip.h 1.0.2 04/28/93 * * Authors: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> */ #ifndef _LINUX_IP_H #define _LINUX_IP_H #include <linux/skbuff.h> #include <uapi/linux/ip.h> static inline struct iphdr *ip_hdr(const struct sk_buff *skb) { return (struct iphdr *)skb_network_header(skb); } static inline struct iphdr *inner_ip_hdr(const struct sk_buff *skb) { return (struct iphdr *)skb_inner_network_header(skb); } static inline struct iphdr *ipip_hdr(const struct sk_buff *skb) { return (struct iphdr *)skb_transport_header(skb); } static inline unsigned int ip_transport_len(const struct sk_buff *skb) { return ntohs(ip_hdr(skb)->tot_len) - skb_network_header_len(skb); } static inline unsigned int iph_totlen(const struct sk_buff *skb, const struct iphdr *iph) { u32 len = ntohs(iph->tot_len); return (len || !skb_is_gso(skb) || !skb_is_gso_tcp(skb)) ? len : skb->len - skb_network_offset(skb); } static inline unsigned int skb_ip_totlen(const struct sk_buff *skb) { return iph_totlen(skb, ip_hdr(skb)); } /* IPv4 datagram length is stored into 16bit field (tot_len) */ #define IP_MAX_MTU 0xFFFFU static inline void iph_set_totlen(struct iphdr *iph, unsigned int len) { iph->tot_len = len <= IP_MAX_MTU ? htons(len) : 0; } #endif /* _LINUX_IP_H */
17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the ICMP module. * * Version: @(#)icmp.h 1.0.4 05/13/93 * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> */ #ifndef _ICMP_H #define _ICMP_H #include <linux/icmp.h> #include <net/inet_sock.h> #include <net/snmp.h> #include <net/ip.h> struct icmp_err { int errno; unsigned int fatal:1; }; extern const struct icmp_err icmp_err_convert[]; #define ICMP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.icmp_statistics, field) #define __ICMP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.icmp_statistics, field) #define ICMPMSGOUT_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field+256) #define ICMPMSGIN_INC_STATS(net, field) SNMP_INC_STATS_ATOMIC_LONG((net)->mib.icmpmsg_statistics, field) struct dst_entry; struct net_proto_family; struct sk_buff; struct net; void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, const struct ip_options *opt); static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { __icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt); } #if IS_ENABLED(CONFIG_NF_NAT) void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info); #else static inline void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct ip_options opts = { 0 }; __icmp_send(skb_in, type, code, info, &opts); } #endif int icmp_rcv(struct sk_buff *skb); int icmp_err(struct sk_buff *skb, u32 info); int icmp_init(void); void icmp_out_count(struct net *net, unsigned char type); bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr); #endif /* _ICMP_H */
21 21 11 11 11 11 10 10 9 10 19 21 20 1 21 2 19 18 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for some microsoft "special" devices * * Copyright (c) 1999 Andreas Gal * Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc * Copyright (c) 2006-2007 Jiri Kosina * Copyright (c) 2008 Jiri Slaby */ /* */ #include <linux/device.h> #include <linux/input.h> #include <linux/hid.h> #include <linux/module.h> #include "hid-ids.h" #define MS_HIDINPUT BIT(0) #define MS_ERGONOMY BIT(1) #define MS_PRESENTER BIT(2) #define MS_RDESC BIT(3) #define MS_NOGET BIT(4) #define MS_DUPLICATE_USAGES BIT(5) #define MS_SURFACE_DIAL BIT(6) #define MS_QUIRK_FF BIT(7) struct ms_data { unsigned long quirks; struct hid_device *hdev; struct work_struct ff_worker; __u8 strong; __u8 weak; void *output_report_dmabuf; }; #define XB1S_FF_REPORT 3 #define ENABLE_WEAK BIT(0) #define ENABLE_STRONG BIT(1) enum { MAGNITUDE_STRONG = 2, MAGNITUDE_WEAK, MAGNITUDE_NUM }; struct xb1s_ff_report { __u8 report_id; __u8 enable; __u8 magnitude[MAGNITUDE_NUM]; __u8 duration_10ms; __u8 start_delay_10ms; __u8 loop_count; } __packed; static const __u8 *ms_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { struct ms_data *ms = hid_get_drvdata(hdev); unsigned long quirks = ms->quirks; /* * Microsoft Wireless Desktop Receiver (Model 1028) has * 'Usage Min/Max' where it ought to have 'Physical Min/Max' */ if ((quirks & MS_RDESC) && *rsize == 571 && rdesc[557] == 0x19 && rdesc[559] == 0x29) { hid_info(hdev, "fixing up Microsoft Wireless Receiver Model 1028 report descriptor\n"); rdesc[557] = 0x35; rdesc[559] = 0x45; } return rdesc; } #define ms_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, \ EV_KEY, (c)) static int ms_ergonomy_kb_quirk(struct hid_input *hi, struct hid_usage *usage, unsigned long **bit, int *max) { struct input_dev *input = hi->input; if ((usage->hid & HID_USAGE_PAGE) == HID_UP_CONSUMER) { switch (usage->hid & HID_USAGE) { /* * Microsoft uses these 2 reserved usage ids for 2 keys on * the MS office kb labelled "Office Home" and "Task Pane". */ case 0x29d: ms_map_key_clear(KEY_PROG1); return 1; case 0x29e: ms_map_key_clear(KEY_PROG2); return 1; } return 0; } if ((usage->hid & HID_USAGE_PAGE) != HID_UP_MSVENDOR) return 0; switch (usage->hid & HID_USAGE) { case 0xfd06: ms_map_key_clear(KEY_CHAT); break; case 0xfd07: ms_map_key_clear(KEY_PHONE); break; case 0xff00: /* Special keypad keys */ ms_map_key_clear(KEY_KPEQUAL); set_bit(KEY_KPLEFTPAREN, input->keybit); set_bit(KEY_KPRIGHTPAREN, input->keybit); break; case 0xff01: /* Scroll wheel */ hid_map_usage_clear(hi, usage, bit, max, EV_REL, REL_WHEEL); break; case 0xff02: /* * This byte contains a copy of the modifier keys byte of a * standard hid keyboard report, as send by interface 0 * (this usage is found on interface 1). * * This byte only gets send when another key in the same report * changes state, and as such is useless, ignore it. */ return -1; case 0xff05: set_bit(EV_REP, input->evbit); ms_map_key_clear(KEY_F13); set_bit(KEY_F14, input->keybit); set_bit(KEY_F15, input->keybit); set_bit(KEY_F16, input->keybit); set_bit(KEY_F17, input->keybit); set_bit(KEY_F18, input->keybit); break; default: return 0; } return 1; } static int ms_presenter_8k_quirk(struct hid_input *hi, struct hid_usage *usage, unsigned long **bit, int *max) { if ((usage->hid & HID_USAGE_PAGE) != HID_UP_MSVENDOR) return 0; set_bit(EV_REP, hi->input->evbit); switch (usage->hid & HID_USAGE) { case 0xfd08: ms_map_key_clear(KEY_FORWARD); break; case 0xfd09: ms_map_key_clear(KEY_BACK); break; case 0xfd0b: ms_map_key_clear(KEY_PLAYPAUSE); break; case 0xfd0e: ms_map_key_clear(KEY_CLOSE); break; case 0xfd0f: ms_map_key_clear(KEY_PLAY); break; default: return 0; } return 1; } static int ms_surface_dial_quirk(struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { switch (usage->hid & HID_USAGE_PAGE) { case 0xff070000: case HID_UP_DIGITIZER: /* ignore those axis */ return -1; case HID_UP_GENDESK: switch (usage->hid) { case HID_GD_X: case HID_GD_Y: case HID_GD_RFKILL_BTN: /* ignore those axis */ return -1; } } return 0; } static int ms_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { struct ms_data *ms = hid_get_drvdata(hdev); unsigned long quirks = ms->quirks; if (quirks & MS_ERGONOMY) { int ret = ms_ergonomy_kb_quirk(hi, usage, bit, max); if (ret) return ret; } if ((quirks & MS_PRESENTER) && ms_presenter_8k_quirk(hi, usage, bit, max)) return 1; if (quirks & MS_SURFACE_DIAL) { int ret = ms_surface_dial_quirk(hi, field, usage, bit, max); if (ret) return ret; } return 0; } static int ms_input_mapped(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { struct ms_data *ms = hid_get_drvdata(hdev); unsigned long quirks = ms->quirks; if (quirks & MS_DUPLICATE_USAGES) clear_bit(usage->code, *bit); return 0; } static int ms_event(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage, __s32 value) { struct ms_data *ms = hid_get_drvdata(hdev); unsigned long quirks = ms->quirks; struct input_dev *input; if (!(hdev->claimed & HID_CLAIMED_INPUT) || !field->hidinput || !usage->type) return 0; input = field->hidinput->input; /* Handling MS keyboards special buttons */ if (quirks & MS_ERGONOMY && usage->hid == (HID_UP_MSVENDOR | 0xff00)) { /* Special keypad keys */ input_report_key(input, KEY_KPEQUAL, value & 0x01); input_report_key(input, KEY_KPLEFTPAREN, value & 0x02); input_report_key(input, KEY_KPRIGHTPAREN, value & 0x04); return 1; } if (quirks & MS_ERGONOMY && usage->hid == (HID_UP_MSVENDOR | 0xff01)) { /* Scroll wheel */ int step = ((value & 0x60) >> 5) + 1; switch (value & 0x1f) { case 0x01: input_report_rel(input, REL_WHEEL, step); break; case 0x1f: input_report_rel(input, REL_WHEEL, -step); break; } return 1; } if (quirks & MS_ERGONOMY && usage->hid == (HID_UP_MSVENDOR | 0xff05)) { static unsigned int last_key = 0; unsigned int key = 0; switch (value) { case 0x01: key = KEY_F14; break; case 0x02: key = KEY_F15; break; case 0x04: key = KEY_F16; break; case 0x08: key = KEY_F17; break; case 0x10: key = KEY_F18; break; } if (key) { input_event(input, usage->type, key, 1); last_key = key; } else input_event(input, usage->type, last_key, 0); return 1; } return 0; } static void ms_ff_worker(struct work_struct *work) { struct ms_data *ms = container_of(work, struct ms_data, ff_worker); struct hid_device *hdev = ms->hdev; struct xb1s_ff_report *r = ms->output_report_dmabuf; int ret; memset(r, 0, sizeof(*r)); r->report_id = XB1S_FF_REPORT; r->enable = ENABLE_WEAK | ENABLE_STRONG; /* * Specifying maximum duration and maximum loop count should * cover maximum duration of a single effect, which is 65536 * ms */ r->duration_10ms = U8_MAX; r->loop_count = U8_MAX; r->magnitude[MAGNITUDE_STRONG] = ms->strong; /* left actuator */ r->magnitude[MAGNITUDE_WEAK] = ms->weak; /* right actuator */ ret = hid_hw_output_report(hdev, (__u8 *)r, sizeof(*r)); if (ret < 0) hid_warn(hdev, "failed to send FF report\n"); } static int ms_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect) { struct hid_device *hid = input_get_drvdata(dev); struct ms_data *ms = hid_get_drvdata(hid); if (effect->type != FF_RUMBLE) return 0; /* * Magnitude is 0..100 so scale the 16-bit input here */ ms->strong = ((u32) effect->u.rumble.strong_magnitude * 100) / U16_MAX; ms->weak = ((u32) effect->u.rumble.weak_magnitude * 100) / U16_MAX; schedule_work(&ms->ff_worker); return 0; } static int ms_init_ff(struct hid_device *hdev) { struct hid_input *hidinput; struct input_dev *input_dev; struct ms_data *ms = hid_get_drvdata(hdev); if (list_empty(&hdev->inputs)) { hid_err(hdev, "no inputs found\n"); return -ENODEV; } hidinput = list_entry(hdev->inputs.next, struct hid_input, list); input_dev = hidinput->input; if (!(ms->quirks & MS_QUIRK_FF)) return 0; ms->hdev = hdev; INIT_WORK(&ms->ff_worker, ms_ff_worker); ms->output_report_dmabuf = devm_kzalloc(&hdev->dev, sizeof(struct xb1s_ff_report), GFP_KERNEL); if (ms->output_report_dmabuf == NULL) return -ENOMEM; input_set_capability(input_dev, EV_FF, FF_RUMBLE); return input_ff_create_memless(input_dev, NULL, ms_play_effect); } static void ms_remove_ff(struct hid_device *hdev) { struct ms_data *ms = hid_get_drvdata(hdev); if (!(ms->quirks & MS_QUIRK_FF)) return; cancel_work_sync(&ms->ff_worker); } static int ms_probe(struct hid_device *hdev, const struct hid_device_id *id) { unsigned long quirks = id->driver_data; struct ms_data *ms; int ret; ms = devm_kzalloc(&hdev->dev, sizeof(*ms), GFP_KERNEL); if (ms == NULL) return -ENOMEM; ms->quirks = quirks; hid_set_drvdata(hdev, ms); if (quirks & MS_NOGET) hdev->quirks |= HID_QUIRK_NOGET; if (quirks & MS_SURFACE_DIAL) hdev->quirks |= HID_QUIRK_INPUT_PER_APP; ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err_free; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT | ((quirks & MS_HIDINPUT) ? HID_CONNECT_HIDINPUT_FORCE : 0)); if (ret) { hid_err(hdev, "hw start failed\n"); goto err_free; } ret = ms_init_ff(hdev); if (ret) hid_err(hdev, "could not initialize ff, continuing anyway"); return 0; err_free: return ret; } static void ms_remove(struct hid_device *hdev) { hid_hw_stop(hdev); ms_remove_ff(hdev); } static const struct hid_device_id ms_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_SIDEWINDER_GV), .driver_data = MS_HIDINPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_OFFICE_KB), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE4K), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE4K_JP), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_NE7K), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_LK6K), .driver_data = MS_ERGONOMY | MS_RDESC }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_USB), .driver_data = MS_PRESENTER }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3K), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_7K), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_600), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_DIGITAL_MEDIA_3KV1), .driver_data = MS_ERGONOMY }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_WIRELESS_OPTICAL_DESKTOP_3_0), .driver_data = MS_NOGET }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_MOUSE_4500), .driver_data = MS_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_POWER_COVER), .driver_data = MS_HIDINPUT }, { HID_USB_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_COMFORT_KEYBOARD), .driver_data = MS_ERGONOMY}, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_PRESENTER_8K_BT), .driver_data = MS_PRESENTER }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, 0x091B), .driver_data = MS_SURFACE_DIAL }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1708), .driver_data = MS_QUIRK_FF }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1708_BLE), .driver_data = MS_QUIRK_FF }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1914), .driver_data = MS_QUIRK_FF }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1797), .driver_data = MS_QUIRK_FF }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_MS_XBOX_CONTROLLER_MODEL_1797_BLE), .driver_data = MS_QUIRK_FF }, { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_MICROSOFT, USB_DEVICE_ID_8BITDO_SN30_PRO_PLUS), .driver_data = MS_QUIRK_FF }, { } }; MODULE_DEVICE_TABLE(hid, ms_devices); static struct hid_driver ms_driver = { .name = "microsoft", .id_table = ms_devices, .report_fixup = ms_report_fixup, .input_mapping = ms_input_mapping, .input_mapped = ms_input_mapped, .event = ms_event, .probe = ms_probe, .remove = ms_remove, }; module_hid_driver(ms_driver); MODULE_DESCRIPTION("HID driver for some microsoft \"special\" devices"); MODULE_LICENSE("GPL");
1 1 11 11 11 11 8 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 // SPDX-License-Identifier: GPL-2.0-only #include <linux/linkage.h> #include <linux/mmap_lock.h> #include <linux/mm.h> #include <linux/time_namespace.h> #include <linux/types.h> #include <linux/vdso_datastore.h> #include <vdso/datapage.h> /* * The vDSO data page. */ #ifdef CONFIG_HAVE_GENERIC_VDSO static union { struct vdso_time_data data; u8 page[PAGE_SIZE]; } vdso_time_data_store __page_aligned_data; struct vdso_time_data *vdso_k_time_data = &vdso_time_data_store.data; static_assert(sizeof(vdso_time_data_store) == PAGE_SIZE); #endif /* CONFIG_HAVE_GENERIC_VDSO */ #ifdef CONFIG_VDSO_GETRANDOM static union { struct vdso_rng_data data; u8 page[PAGE_SIZE]; } vdso_rng_data_store __page_aligned_data; struct vdso_rng_data *vdso_k_rng_data = &vdso_rng_data_store.data; static_assert(sizeof(vdso_rng_data_store) == PAGE_SIZE); #endif /* CONFIG_VDSO_GETRANDOM */ #ifdef CONFIG_ARCH_HAS_VDSO_ARCH_DATA static union { struct vdso_arch_data data; u8 page[VDSO_ARCH_DATA_SIZE]; } vdso_arch_data_store __page_aligned_data; struct vdso_arch_data *vdso_k_arch_data = &vdso_arch_data_store.data; #endif /* CONFIG_ARCH_HAS_VDSO_ARCH_DATA */ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, struct vm_area_struct *vma, struct vm_fault *vmf) { struct page *timens_page = find_timens_vvar_page(vma); unsigned long addr, pfn; vm_fault_t err; switch (vmf->pgoff) { case VDSO_TIME_PAGE_OFFSET: if (!IS_ENABLED(CONFIG_HAVE_GENERIC_VDSO)) return VM_FAULT_SIGBUS; pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data)); if (timens_page) { /* * Fault in VVAR page too, since it will be accessed * to get clock data anyway. */ addr = vmf->address + VDSO_TIMENS_PAGE_OFFSET * PAGE_SIZE; err = vmf_insert_pfn(vma, addr, pfn); if (unlikely(err & VM_FAULT_ERROR)) return err; pfn = page_to_pfn(timens_page); } break; case VDSO_TIMENS_PAGE_OFFSET: /* * If a task belongs to a time namespace then a namespace * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET * offset. * See also the comment near timens_setup_vdso_data(). */ if (!IS_ENABLED(CONFIG_TIME_NS) || !timens_page) return VM_FAULT_SIGBUS; pfn = __phys_to_pfn(__pa_symbol(vdso_k_time_data)); break; case VDSO_RNG_PAGE_OFFSET: if (!IS_ENABLED(CONFIG_VDSO_GETRANDOM)) return VM_FAULT_SIGBUS; pfn = __phys_to_pfn(__pa_symbol(vdso_k_rng_data)); break; case VDSO_ARCH_PAGES_START ... VDSO_ARCH_PAGES_END: if (!IS_ENABLED(CONFIG_ARCH_HAS_VDSO_ARCH_DATA)) return VM_FAULT_SIGBUS; pfn = __phys_to_pfn(__pa_symbol(vdso_k_arch_data)) + vmf->pgoff - VDSO_ARCH_PAGES_START; break; default: return VM_FAULT_SIGBUS; } return vmf_insert_pfn(vma, vmf->address, pfn); } const struct vm_special_mapping vdso_vvar_mapping = { .name = "[vvar]", .fault = vvar_fault, }; struct vm_area_struct *vdso_install_vvar_mapping(struct mm_struct *mm, unsigned long addr) { return _install_special_mapping(mm, addr, VDSO_NR_PAGES * PAGE_SIZE, VM_READ | VM_MAYREAD | VM_IO | VM_DONTDUMP | VM_PFNMAP | VM_SEALED_SYSMAP, &vdso_vvar_mapping); } #ifdef CONFIG_TIME_NS /* * The vvar page layout depends on whether a task belongs to the root or * non-root time namespace. Whenever a task changes its namespace, the VVAR * page tables are cleared and then they will be re-faulted with a * corresponding layout. * See also the comment near timens_setup_vdso_clock_data() for details. */ int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) { struct mm_struct *mm = task->mm; struct vm_area_struct *vma; VMA_ITERATOR(vmi, mm, 0); mmap_read_lock(mm); for_each_vma(vmi, vma) { if (vma_is_special_mapping(vma, &vdso_vvar_mapping)) zap_vma_pages(vma); } mmap_read_unlock(mm); return 0; } #endif
568 566 155 15 14 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 // SPDX-License-Identifier: GPL-2.0-or-later /* * vrf.c: device driver to encapsulate a VRF space * * Copyright (c) 2015 Cumulus Networks. All rights reserved. * Copyright (c) 2015 Shrijeet Mukherjee <shm@cumulusnetworks.com> * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com> * * Based on dummy, team and ipvlan drivers */ #include <linux/ethtool.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ip.h> #include <linux/init.h> #include <linux/moduleparam.h> #include <linux/netfilter.h> #include <linux/rtnetlink.h> #include <net/rtnetlink.h> #include <linux/u64_stats_sync.h> #include <linux/hashtable.h> #include <linux/spinlock_types.h> #include <linux/inetdevice.h> #include <net/arp.h> #include <net/ip.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> #include <net/route.h> #include <net/addrconf.h> #include <net/l3mdev.h> #include <net/fib_rules.h> #include <net/netdev_lock.h> #include <net/sch_generic.h> #include <net/netns/generic.h> #include <net/netfilter/nf_conntrack.h> #include <net/inet_dscp.h> #define DRV_NAME "vrf" #define DRV_VERSION "1.1" #define FIB_RULE_PREF 1000 /* default preference for FIB rules */ #define HT_MAP_BITS 4 #define HASH_INITVAL ((u32)0xcafef00d) struct vrf_map { DECLARE_HASHTABLE(ht, HT_MAP_BITS); spinlock_t vmap_lock; /* shared_tables: * count how many distinct tables do not comply with the strict mode * requirement. * shared_tables value must be 0 in order to enable the strict mode. * * example of the evolution of shared_tables: * | time * add vrf0 --> table 100 shared_tables = 0 | t0 * add vrf1 --> table 101 shared_tables = 0 | t1 * add vrf2 --> table 100 shared_tables = 1 | t2 * add vrf3 --> table 100 shared_tables = 1 | t3 * add vrf4 --> table 101 shared_tables = 2 v t4 * * shared_tables is a "step function" (or "staircase function") * and it is increased by one when the second vrf is associated to a * table. * * at t2, vrf0 and vrf2 are bound to table 100: shared_tables = 1. * * at t3, another dev (vrf3) is bound to the same table 100 but the * value of shared_tables is still 1. * This means that no matter how many new vrfs will register on the * table 100, the shared_tables will not increase (considering only * table 100). * * at t4, vrf4 is bound to table 101, and shared_tables = 2. * * Looking at the value of shared_tables we can immediately know if * the strict_mode can or cannot be enforced. Indeed, strict_mode * can be enforced iff shared_tables = 0. * * Conversely, shared_tables is decreased when a vrf is de-associated * from a table with exactly two associated vrfs. */ u32 shared_tables; bool strict_mode; }; struct vrf_map_elem { struct hlist_node hnode; struct list_head vrf_list; /* VRFs registered to this table */ u32 table_id; int users; int ifindex; }; static unsigned int vrf_net_id; /* per netns vrf data */ struct netns_vrf { /* protected by rtnl lock */ bool add_fib_rules; struct vrf_map vmap; struct ctl_table_header *ctl_hdr; }; struct net_vrf { struct rtable __rcu *rth; struct rt6_info __rcu *rt6; #if IS_ENABLED(CONFIG_IPV6) struct fib6_table *fib6_table; #endif u32 tb_id; struct list_head me_list; /* entry in vrf_map_elem */ int ifindex; }; static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb) { vrf_dev->stats.tx_errors++; kfree_skb(skb); } static struct vrf_map *netns_vrf_map(struct net *net) { struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); return &nn_vrf->vmap; } static struct vrf_map *netns_vrf_map_by_dev(struct net_device *dev) { return netns_vrf_map(dev_net(dev)); } static int vrf_map_elem_get_vrf_ifindex(struct vrf_map_elem *me) { struct list_head *me_head = &me->vrf_list; struct net_vrf *vrf; if (list_empty(me_head)) return -ENODEV; vrf = list_first_entry(me_head, struct net_vrf, me_list); return vrf->ifindex; } static struct vrf_map_elem *vrf_map_elem_alloc(gfp_t flags) { struct vrf_map_elem *me; me = kmalloc(sizeof(*me), flags); if (!me) return NULL; return me; } static void vrf_map_elem_free(struct vrf_map_elem *me) { kfree(me); } static void vrf_map_elem_init(struct vrf_map_elem *me, int table_id, int ifindex, int users) { me->table_id = table_id; me->ifindex = ifindex; me->users = users; INIT_LIST_HEAD(&me->vrf_list); } static struct vrf_map_elem *vrf_map_lookup_elem(struct vrf_map *vmap, u32 table_id) { struct vrf_map_elem *me; u32 key; key = jhash_1word(table_id, HASH_INITVAL); hash_for_each_possible(vmap->ht, me, hnode, key) { if (me->table_id == table_id) return me; } return NULL; } static void vrf_map_add_elem(struct vrf_map *vmap, struct vrf_map_elem *me) { u32 table_id = me->table_id; u32 key; key = jhash_1word(table_id, HASH_INITVAL); hash_add(vmap->ht, &me->hnode, key); } static void vrf_map_del_elem(struct vrf_map_elem *me) { hash_del(&me->hnode); } static void vrf_map_lock(struct vrf_map *vmap) __acquires(&vmap->vmap_lock) { spin_lock(&vmap->vmap_lock); } static void vrf_map_unlock(struct vrf_map *vmap) __releases(&vmap->vmap_lock) { spin_unlock(&vmap->vmap_lock); } /* called with rtnl lock held */ static int vrf_map_register_dev(struct net_device *dev, struct netlink_ext_ack *extack) { struct vrf_map *vmap = netns_vrf_map_by_dev(dev); struct net_vrf *vrf = netdev_priv(dev); struct vrf_map_elem *new_me, *me; u32 table_id = vrf->tb_id; bool free_new_me = false; int users; int res; /* we pre-allocate elements used in the spin-locked section (so that we * keep the spinlock as short as possible). */ new_me = vrf_map_elem_alloc(GFP_KERNEL); if (!new_me) return -ENOMEM; vrf_map_elem_init(new_me, table_id, dev->ifindex, 0); vrf_map_lock(vmap); me = vrf_map_lookup_elem(vmap, table_id); if (!me) { me = new_me; vrf_map_add_elem(vmap, me); goto link_vrf; } /* we already have an entry in the vrf_map, so it means there is (at * least) a vrf registered on the specific table. */ free_new_me = true; if (vmap->strict_mode) { /* vrfs cannot share the same table */ NL_SET_ERR_MSG(extack, "Table is used by another VRF"); res = -EBUSY; goto unlock; } link_vrf: users = ++me->users; if (users == 2) ++vmap->shared_tables; list_add(&vrf->me_list, &me->vrf_list); res = 0; unlock: vrf_map_unlock(vmap); /* clean-up, if needed */ if (free_new_me) vrf_map_elem_free(new_me); return res; } /* called with rtnl lock held */ static void vrf_map_unregister_dev(struct net_device *dev) { struct vrf_map *vmap = netns_vrf_map_by_dev(dev); struct net_vrf *vrf = netdev_priv(dev); u32 table_id = vrf->tb_id; struct vrf_map_elem *me; int users; vrf_map_lock(vmap); me = vrf_map_lookup_elem(vmap, table_id); if (!me) goto unlock; list_del(&vrf->me_list); users = --me->users; if (users == 1) { --vmap->shared_tables; } else if (users == 0) { vrf_map_del_elem(me); /* no one will refer to this element anymore */ vrf_map_elem_free(me); } unlock: vrf_map_unlock(vmap); } /* return the vrf device index associated with the table_id */ static int vrf_ifindex_lookup_by_table_id(struct net *net, u32 table_id) { struct vrf_map *vmap = netns_vrf_map(net); struct vrf_map_elem *me; int ifindex; vrf_map_lock(vmap); if (!vmap->strict_mode) { ifindex = -EPERM; goto unlock; } me = vrf_map_lookup_elem(vmap, table_id); if (!me) { ifindex = -ENODEV; goto unlock; } ifindex = vrf_map_elem_get_vrf_ifindex(me); unlock: vrf_map_unlock(vmap); return ifindex; } /* by default VRF devices do not have a qdisc and are expected * to be created with only a single queue. */ static bool qdisc_tx_is_default(const struct net_device *dev) { struct netdev_queue *txq; if (dev->num_tx_queues > 1) return false; txq = netdev_get_tx_queue(dev, 0); return qdisc_txq_has_no_queue(txq); } /* Local traffic destined to local address. Reinsert the packet to rx * path, similar to loopback handling. */ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, struct dst_entry *dst) { unsigned int len = skb->len; skb_orphan(skb); skb_dst_set(skb, dst); /* set pkt_type to avoid skb hitting packet taps twice - * once on Tx and again in Rx processing */ skb->pkt_type = PACKET_LOOPBACK; skb->protocol = eth_type_trans(skb, dev); if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) dev_dstats_rx_add(dev, len); else dev_dstats_rx_dropped(dev); return NETDEV_TX_OK; } static void vrf_nf_set_untracked(struct sk_buff *skb) { if (skb_get_nfct(skb) == 0) nf_ct_set(skb, NULL, IP_CT_UNTRACKED); } static void vrf_nf_reset_ct(struct sk_buff *skb) { if (skb_get_nfct(skb) == IP_CT_UNTRACKED) nf_reset_ct(skb); } #if IS_ENABLED(CONFIG_IPV6) static int vrf_ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; vrf_nf_reset_ct(skb); err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); if (likely(err == 1)) err = dst_output(net, sk, skb); return err; } static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { const struct ipv6hdr *iph; struct net *net = dev_net(skb->dev); struct flowi6 fl6; int ret = NET_XMIT_DROP; struct dst_entry *dst; struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst; if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr))) goto err; iph = ipv6_hdr(skb); memset(&fl6, 0, sizeof(fl6)); /* needed to match OIF rule */ fl6.flowi6_l3mdev = dev->ifindex; fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = iph->nexthdr; dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); if (IS_ERR(dst) || dst == dst_null) goto err; skb_dst_drop(skb); /* if dst.dev is the VRF device again this is locally originated traffic * destined to a local address. Short circuit to Rx path. */ if (dst->dev == dev) return vrf_local_xmit(skb, dev, dst); skb_dst_set(skb, dst); /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); ret = vrf_ip6_local_out(net, skb->sk, skb); if (unlikely(net_xmit_eval(ret))) dev->stats.tx_errors++; else ret = NET_XMIT_SUCCESS; return ret; err: vrf_tx_error(dev, skb); return NET_XMIT_DROP; } #else static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { vrf_tx_error(dev, skb); return NET_XMIT_DROP; } #endif /* based on ip_local_out; can't use it b/c the dst is switched pointing to us */ static int vrf_ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; vrf_nf_reset_ct(skb); err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); if (likely(err == 1)) err = dst_output(net, sk, skb); return err; } static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, struct net_device *vrf_dev) { struct iphdr *ip4h; int ret = NET_XMIT_DROP; struct flowi4 fl4; struct net *net = dev_net(vrf_dev); struct rtable *rt; if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr))) goto err; ip4h = ip_hdr(skb); memset(&fl4, 0, sizeof(fl4)); /* needed to match OIF rule */ fl4.flowi4_l3mdev = vrf_dev->ifindex; fl4.flowi4_iif = LOOPBACK_IFINDEX; fl4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip4h)); fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; fl4.flowi4_proto = ip4h->protocol; fl4.daddr = ip4h->daddr; fl4.saddr = ip4h->saddr; rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto err; skb_dst_drop(skb); /* if dst.dev is the VRF device again this is locally originated traffic * destined to a local address. Short circuit to Rx path. */ if (rt->dst.dev == vrf_dev) return vrf_local_xmit(skb, vrf_dev, &rt->dst); skb_dst_set(skb, &rt->dst); /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); if (!ip4h->saddr) { ip4h->saddr = inet_select_addr(skb_dst(skb)->dev, 0, RT_SCOPE_LINK); } memset(IPCB(skb), 0, sizeof(*IPCB(skb))); ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); if (unlikely(net_xmit_eval(ret))) vrf_dev->stats.tx_errors++; else ret = NET_XMIT_SUCCESS; out: return ret; err: vrf_tx_error(vrf_dev, skb); goto out; } static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) { switch (skb->protocol) { case htons(ETH_P_IP): return vrf_process_v4_outbound(skb, dev); case htons(ETH_P_IPV6): return vrf_process_v6_outbound(skb, dev); default: vrf_tx_error(dev, skb); return NET_XMIT_DROP; } } static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) { unsigned int len = skb->len; netdev_tx_t ret; ret = is_ip_tx_frame(skb, dev); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) dev_dstats_tx_add(dev, len); else dev_dstats_tx_dropped(dev); return ret; } static void vrf_finish_direct(struct sk_buff *skb) { struct net_device *vrf_dev = skb->dev; if (!list_empty(&vrf_dev->ptype_all) && likely(skb_headroom(skb) >= ETH_HLEN)) { struct ethhdr *eth = skb_push(skb, ETH_HLEN); ether_addr_copy(eth->h_source, vrf_dev->dev_addr); eth_zero_addr(eth->h_dest); eth->h_proto = skb->protocol; rcu_read_lock_bh(); dev_queue_xmit_nit(skb, vrf_dev); rcu_read_unlock_bh(); skb_pull(skb, ETH_HLEN); } vrf_nf_reset_ct(skb); } #if IS_ENABLED(CONFIG_IPV6) /* modelled after ip6_finish_output2 */ static int vrf_finish_output6(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; const struct in6_addr *nexthop; struct neighbour *neigh; int ret; vrf_nf_reset_ct(skb); skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; rcu_read_lock(); nexthop = rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); rcu_read_unlock(); return ret; } rcu_read_unlock(); IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } /* modelled after ip6_output */ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) { return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, NULL, skb_dst(skb)->dev, vrf_finish_output6, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } /* set dst on skb to send packet to us via dev_xmit path. Allows * packet to go through device based features such as qdisc, netfilter * hooks and packet sockets with skb->dev set to vrf device. */ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev, struct sk_buff *skb) { struct net_vrf *vrf = netdev_priv(vrf_dev); struct dst_entry *dst = NULL; struct rt6_info *rt6; rcu_read_lock(); rt6 = rcu_dereference(vrf->rt6); if (likely(rt6)) { dst = &rt6->dst; dst_hold(dst); } rcu_read_unlock(); if (unlikely(!dst)) { vrf_tx_error(vrf_dev, skb); return NULL; } skb_dst_drop(skb); skb_dst_set(skb, dst); return skb; } static int vrf_output6_direct_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { vrf_finish_direct(skb); return vrf_ip6_local_out(net, sk, skb); } static int vrf_output6_direct(struct net *net, struct sock *sk, struct sk_buff *skb) { int err = 1; skb->protocol = htons(ETH_P_IPV6); if (!(IPCB(skb)->flags & IPSKB_REROUTED)) err = nf_hook(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, NULL, skb->dev, vrf_output6_direct_finish); if (likely(err == 1)) vrf_finish_direct(skb); return err; } static int vrf_ip6_out_direct_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; err = vrf_output6_direct(net, sk, skb); if (likely(err == 1)) err = vrf_ip6_local_out(net, sk, skb); return err; } static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(vrf_dev); int err; skb->dev = vrf_dev; err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, vrf_dev, vrf_ip6_out_direct_finish); if (likely(err == 1)) err = vrf_output6_direct(net, sk, skb); if (likely(err == 1)) return skb; return NULL; } static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { /* don't divert link scope packets */ if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) return skb; vrf_nf_set_untracked(skb); if (qdisc_tx_is_default(vrf_dev) || IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) return vrf_ip6_out_direct(vrf_dev, sk, skb); return vrf_ip6_out_redirect(vrf_dev, skb); } /* holding rtnl */ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { struct rt6_info *rt6 = rtnl_dereference(vrf->rt6); struct net *net = dev_net(dev); struct dst_entry *dst; RCU_INIT_POINTER(vrf->rt6, NULL); synchronize_rcu(); /* move dev in dst's to loopback so this VRF device can be deleted * - based on dst_ifdown */ if (rt6) { dst = &rt6->dst; netdev_ref_replace(dst->dev, net->loopback_dev, &dst->dev_tracker, GFP_KERNEL); dst->dev = net->loopback_dev; dst_release(dst); } } static int vrf_rt6_create(struct net_device *dev) { int flags = DST_NOPOLICY | DST_NOXFRM; struct net_vrf *vrf = netdev_priv(dev); struct net *net = dev_net(dev); struct rt6_info *rt6; int rc = -ENOMEM; /* IPv6 can be CONFIG enabled and then disabled runtime */ if (!ipv6_mod_enabled()) return 0; vrf->fib6_table = fib6_new_table(net, vrf->tb_id); if (!vrf->fib6_table) goto out; /* create a dst for routing packets out a VRF device */ rt6 = ip6_dst_alloc(net, dev, flags); if (!rt6) goto out; rt6->dst.output = vrf_output6; rcu_assign_pointer(vrf->rt6, rt6); rc = 0; out: return rc; } #else static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { return skb; } static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { } static int vrf_rt6_create(struct net_device *dev) { return 0; } #endif /* modelled after ip_finish_output2 */ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = dst_rtable(dst); struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; bool is_v6gw = false; vrf_nf_reset_ct(skb); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { skb = skb_expand_head(skb, hh_len); if (!skb) { dev->stats.tx_errors++; return -ENOMEM; } } rcu_read_lock(); neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { int ret; sock_confirm_neigh(skb, neigh); /* if crossing protocols, can not use the cached header */ ret = neigh_output(neigh, skb, is_v6gw); rcu_read_unlock(); return ret; } rcu_read_unlock(); vrf_tx_error(skb->dev, skb); return -EINVAL; } static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); skb->dev = dev; skb->protocol = htons(ETH_P_IP); return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, NULL, dev, vrf_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } /* set dst on skb to send packet to us via dev_xmit path. Allows * packet to go through device based features such as qdisc, netfilter * hooks and packet sockets with skb->dev set to vrf device. */ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev, struct sk_buff *skb) { struct net_vrf *vrf = netdev_priv(vrf_dev); struct dst_entry *dst = NULL; struct rtable *rth; rcu_read_lock(); rth = rcu_dereference(vrf->rth); if (likely(rth)) { dst = &rth->dst; dst_hold(dst); } rcu_read_unlock(); if (unlikely(!dst)) { vrf_tx_error(vrf_dev, skb); return NULL; } skb_dst_drop(skb); skb_dst_set(skb, dst); return skb; } static int vrf_output_direct_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { vrf_finish_direct(skb); return vrf_ip_local_out(net, sk, skb); } static int vrf_output_direct(struct net *net, struct sock *sk, struct sk_buff *skb) { int err = 1; skb->protocol = htons(ETH_P_IP); if (!(IPCB(skb)->flags & IPSKB_REROUTED)) err = nf_hook(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, NULL, skb->dev, vrf_output_direct_finish); if (likely(err == 1)) vrf_finish_direct(skb); return err; } static int vrf_ip_out_direct_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; err = vrf_output_direct(net, sk, skb); if (likely(err == 1)) err = vrf_ip_local_out(net, sk, skb); return err; } static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(vrf_dev); int err; skb->dev = vrf_dev; err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, vrf_dev, vrf_ip_out_direct_finish); if (likely(err == 1)) err = vrf_output_direct(net, sk, skb); if (likely(err == 1)) return skb; return NULL; } static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { /* don't divert multicast or local broadcast */ if (ipv4_is_multicast(ip_hdr(skb)->daddr) || ipv4_is_lbcast(ip_hdr(skb)->daddr)) return skb; vrf_nf_set_untracked(skb); if (qdisc_tx_is_default(vrf_dev) || IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return vrf_ip_out_direct(vrf_dev, sk, skb); return vrf_ip_out_redirect(vrf_dev, skb); } /* called with rcu lock held */ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb, u16 proto) { switch (proto) { case AF_INET: return vrf_ip_out(vrf_dev, sk, skb); case AF_INET6: return vrf_ip6_out(vrf_dev, sk, skb); } return skb; } /* holding rtnl */ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) { struct rtable *rth = rtnl_dereference(vrf->rth); struct net *net = dev_net(dev); struct dst_entry *dst; RCU_INIT_POINTER(vrf->rth, NULL); synchronize_rcu(); /* move dev in dst's to loopback so this VRF device can be deleted * - based on dst_ifdown */ if (rth) { dst = &rth->dst; netdev_ref_replace(dst->dev, net->loopback_dev, &dst->dev_tracker, GFP_KERNEL); dst->dev = net->loopback_dev; dst_release(dst); } } static int vrf_rtable_create(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); struct rtable *rth; if (!fib_new_table(dev_net(dev), vrf->tb_id)) return -ENOMEM; /* create a dst for routing packets out through a VRF device */ rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1); if (!rth) return -ENOMEM; rth->dst.output = vrf_output; rcu_assign_pointer(vrf->rth, rth); return 0; } /**************************** device handling ********************/ /* cycle interface to flush neighbor cache and move routes across tables */ static void cycle_netdev(struct net_device *dev, struct netlink_ext_ack *extack) { unsigned int flags = dev->flags; int ret; if (!netif_running(dev)) return; ret = dev_change_flags(dev, flags & ~IFF_UP, extack); if (ret >= 0) ret = dev_change_flags(dev, flags, extack); if (ret < 0) { netdev_err(dev, "Failed to cycle device %s; route tables might be wrong!\n", dev->name); } } static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev, struct netlink_ext_ack *extack) { int ret; /* do not allow loopback device to be enslaved to a VRF. * The vrf device acts as the loopback for the vrf. */ if (port_dev == dev_net(dev)->loopback_dev) { NL_SET_ERR_MSG(extack, "Can not enslave loopback device to a VRF"); return -EOPNOTSUPP; } port_dev->priv_flags |= IFF_L3MDEV_SLAVE; ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL, extack); if (ret < 0) goto err; cycle_netdev(port_dev, extack); return 0; err: port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; return ret; } static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev, struct netlink_ext_ack *extack) { if (netif_is_l3_master(port_dev)) { NL_SET_ERR_MSG(extack, "Can not enslave an L3 master device to a VRF"); return -EINVAL; } if (netif_is_l3_slave(port_dev)) return -EINVAL; return do_vrf_add_slave(dev, port_dev, extack); } /* inverse of do_vrf_add_slave */ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev) { netdev_upper_dev_unlink(port_dev, dev); port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; cycle_netdev(port_dev, NULL); return 0; } static int vrf_del_slave(struct net_device *dev, struct net_device *port_dev) { return do_vrf_del_slave(dev, port_dev); } static void vrf_dev_uninit(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); vrf_rtable_release(dev, vrf); vrf_rt6_release(dev, vrf); } static int vrf_dev_init(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); /* create the default dst which points back to us */ if (vrf_rtable_create(dev) != 0) goto out_nomem; if (vrf_rt6_create(dev) != 0) goto out_rth; dev->flags = IFF_MASTER | IFF_NOARP; /* similarly, oper state is irrelevant; set to up to avoid confusion */ dev->operstate = IF_OPER_UP; netdev_lockdep_set_classes(dev); return 0; out_rth: vrf_rtable_release(dev, vrf); out_nomem: return -ENOMEM; } static const struct net_device_ops vrf_netdev_ops = { .ndo_init = vrf_dev_init, .ndo_uninit = vrf_dev_uninit, .ndo_start_xmit = vrf_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_add_slave = vrf_add_slave, .ndo_del_slave = vrf_del_slave, }; static u32 vrf_fib_table(const struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); return vrf->tb_id; } static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); return 0; } static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook, struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); if (nf_hook(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) != 1) skb = NULL; /* kfree_skb(skb) handled by nf code */ return skb; } static int vrf_prepare_mac_header(struct sk_buff *skb, struct net_device *vrf_dev, u16 proto) { struct ethhdr *eth; int err; /* in general, we do not know if there is enough space in the head of * the packet for hosting the mac header. */ err = skb_cow_head(skb, LL_RESERVED_SPACE(vrf_dev)); if (unlikely(err)) /* no space in the skb head */ return -ENOBUFS; __skb_push(skb, ETH_HLEN); eth = (struct ethhdr *)skb->data; skb_reset_mac_header(skb); skb_reset_mac_len(skb); /* we set the ethernet destination and the source addresses to the * address of the VRF device. */ ether_addr_copy(eth->h_dest, vrf_dev->dev_addr); ether_addr_copy(eth->h_source, vrf_dev->dev_addr); eth->h_proto = htons(proto); /* the destination address of the Ethernet frame corresponds to the * address set on the VRF interface; therefore, the packet is intended * to be processed locally. */ skb->protocol = eth->h_proto; skb->pkt_type = PACKET_HOST; skb_postpush_rcsum(skb, skb->data, ETH_HLEN); skb_pull_inline(skb, ETH_HLEN); return 0; } /* prepare and add the mac header to the packet if it was not set previously. * In this way, packet sniffers such as tcpdump can parse the packet correctly. * If the mac header was already set, the original mac header is left * untouched and the function returns immediately. */ static int vrf_add_mac_header_if_unset(struct sk_buff *skb, struct net_device *vrf_dev, u16 proto, struct net_device *orig_dev) { if (skb_mac_header_was_set(skb) && dev_has_header(orig_dev)) return 0; return vrf_prepare_mac_header(skb, vrf_dev, proto); } #if IS_ENABLED(CONFIG_IPV6) /* neighbor handling is done with actual device; do not want * to flip skb->dev for those ndisc packets. This really fails * for multiple next protocols (e.g., NEXTHDR_HOP). But it is * a start. */ static bool ipv6_ndisc_frame(const struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); bool rc = false; if (iph->nexthdr == NEXTHDR_ICMP) { const struct icmp6hdr *icmph; struct icmp6hdr _icmph; icmph = skb_header_pointer(skb, sizeof(*iph), sizeof(_icmph), &_icmph); if (!icmph) goto out; switch (icmph->icmp6_type) { case NDISC_ROUTER_SOLICITATION: case NDISC_ROUTER_ADVERTISEMENT: case NDISC_NEIGHBOUR_SOLICITATION: case NDISC_NEIGHBOUR_ADVERTISEMENT: case NDISC_REDIRECT: rc = true; break; } } out: return rc; } static struct rt6_info *vrf_ip6_route_lookup(struct net *net, const struct net_device *dev, struct flowi6 *fl6, int ifindex, const struct sk_buff *skb, int flags) { struct net_vrf *vrf = netdev_priv(dev); return ip6_pol_route(net, vrf->fib6_table, ifindex, fl6, skb, flags); } static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, int ifindex) { const struct ipv6hdr *iph = ipv6_hdr(skb); struct flowi6 fl6 = { .flowi6_iif = ifindex, .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, .daddr = iph->daddr, .saddr = iph->saddr, .flowlabel = ip6_flowinfo(iph), }; struct net *net = dev_net(vrf_dev); struct rt6_info *rt6; skb_dst_drop(skb); rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb, RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE); if (unlikely(!rt6)) return; if (unlikely(&rt6->dst == &net->ipv6.ip6_null_entry->dst)) return; skb_dst_set(skb, &rt6->dst); } static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { int orig_iif = skb->skb_iif; bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); bool is_ndisc = ipv6_ndisc_frame(skb); /* loopback, multicast & non-ND link-local traffic; do not push through * packet taps again. Reset pkt_type for upper layers to process skb. * For non-loopback strict packets, determine the dst using the original * ifindex. */ if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IP6CB(skb)->flags |= IP6SKB_L3SLAVE; if (skb->pkt_type == PACKET_LOOPBACK) skb->pkt_type = PACKET_HOST; else vrf_ip6_input_dst(skb, vrf_dev, orig_iif); goto out; } /* if packet is NDISC then keep the ingress interface */ if (!is_ndisc) { struct net_device *orig_dev = skb->dev; dev_dstats_rx_add(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; if (!list_empty(&vrf_dev->ptype_all)) { int err; err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IPV6, orig_dev); if (likely(!err)) { skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); skb_pull(skb, skb->mac_len); } } IP6CB(skb)->flags |= IP6SKB_L3SLAVE; } if (need_strict) vrf_ip6_input_dst(skb, vrf_dev, orig_iif); skb = vrf_rcv_nfhook(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, vrf_dev); out: return skb; } #else static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { return skb; } #endif static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { struct net_device *orig_dev = skb->dev; skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IPCB(skb)->flags |= IPSKB_L3SLAVE; if (ipv4_is_multicast(ip_hdr(skb)->daddr)) goto out; /* loopback traffic; do not push through packet taps again. * Reset pkt_type for upper layers to process skb */ if (skb->pkt_type == PACKET_LOOPBACK) { skb->pkt_type = PACKET_HOST; goto out; } dev_dstats_rx_add(vrf_dev, skb->len); if (!list_empty(&vrf_dev->ptype_all)) { int err; err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP, orig_dev); if (likely(!err)) { skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); skb_pull(skb, skb->mac_len); } } skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev); out: return skb; } /* called with rcu lock held */ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, struct sk_buff *skb, u16 proto) { switch (proto) { case AF_INET: return vrf_ip_rcv(vrf_dev, skb); case AF_INET6: return vrf_ip6_rcv(vrf_dev, skb); } return skb; } #if IS_ENABLED(CONFIG_IPV6) /* send to link-local or multicast address via interface enslaved to * VRF device. Force lookup to VRF table without changing flow struct * Note: Caller to this function must hold rcu_read_lock() and no refcnt * is taken on the dst by this function. */ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, struct flowi6 *fl6) { struct net *net = dev_net(dev); int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_DST_NOREF; struct dst_entry *dst = NULL; struct rt6_info *rt; /* VRF device does not have a link-local address and * sending packets to link-local or mcast addresses over * a VRF device does not make sense */ if (fl6->flowi6_oif == dev->ifindex) { dst = &net->ipv6.ip6_null_entry->dst; return dst; } if (!ipv6_addr_any(&fl6->saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags); if (rt) dst = &rt->dst; return dst; } #endif static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) .l3mdev_link_scope_lookup = vrf_link_scope_lookup, #endif }; static void vrf_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { strscpy(info->driver, DRV_NAME, sizeof(info->driver)); strscpy(info->version, DRV_VERSION, sizeof(info->version)); } static const struct ethtool_ops vrf_ethtool_ops = { .get_drvinfo = vrf_get_drvinfo, }; static inline size_t vrf_fib_rule_nl_size(void) { size_t sz; sz = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)); sz += nla_total_size(sizeof(u8)); /* FRA_L3MDEV */ sz += nla_total_size(sizeof(u32)); /* FRA_PRIORITY */ sz += nla_total_size(sizeof(u8)); /* FRA_PROTOCOL */ return sz; } static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) { struct fib_rule_hdr *frh; struct nlmsghdr *nlh; struct sk_buff *skb; int err; if ((family == AF_INET6 || family == RTNL_FAMILY_IP6MR) && !ipv6_mod_enabled()) return 0; skb = nlmsg_new(vrf_fib_rule_nl_size(), GFP_KERNEL); if (!skb) return -ENOMEM; nlh = nlmsg_put(skb, 0, 0, 0, sizeof(*frh), 0); if (!nlh) goto nla_put_failure; /* rule only needs to appear once */ nlh->nlmsg_flags |= NLM_F_EXCL; frh = nlmsg_data(nlh); memset(frh, 0, sizeof(*frh)); frh->family = family; frh->action = FR_ACT_TO_TBL; if (nla_put_u8(skb, FRA_PROTOCOL, RTPROT_KERNEL)) goto nla_put_failure; if (nla_put_u8(skb, FRA_L3MDEV, 1)) goto nla_put_failure; if (nla_put_u32(skb, FRA_PRIORITY, FIB_RULE_PREF)) goto nla_put_failure; nlmsg_end(skb, nlh); if (add_it) { err = fib_newrule(dev_net(dev), skb, nlh, NULL, true); if (err == -EEXIST) err = 0; } else { err = fib_delrule(dev_net(dev), skb, nlh, NULL, true); if (err == -ENOENT) err = 0; } nlmsg_free(skb); return err; nla_put_failure: nlmsg_free(skb); return -EMSGSIZE; } static int vrf_add_fib_rules(const struct net_device *dev) { int err; err = vrf_fib_rule(dev, AF_INET, true); if (err < 0) goto out_err; err = vrf_fib_rule(dev, AF_INET6, true); if (err < 0) goto ipv6_err; #if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES) err = vrf_fib_rule(dev, RTNL_FAMILY_IPMR, true); if (err < 0) goto ipmr_err; #endif #if IS_ENABLED(CONFIG_IPV6_MROUTE_MULTIPLE_TABLES) err = vrf_fib_rule(dev, RTNL_FAMILY_IP6MR, true); if (err < 0) goto ip6mr_err; #endif return 0; #if IS_ENABLED(CONFIG_IPV6_MROUTE_MULTIPLE_TABLES) ip6mr_err: vrf_fib_rule(dev, RTNL_FAMILY_IPMR, false); #endif #if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES) ipmr_err: vrf_fib_rule(dev, AF_INET6, false); #endif ipv6_err: vrf_fib_rule(dev, AF_INET, false); out_err: netdev_err(dev, "Failed to add FIB rules.\n"); return err; } static void vrf_setup(struct net_device *dev) { ether_setup(dev); /* Initialize the device structure. */ dev->netdev_ops = &vrf_netdev_ops; dev->l3mdev_ops = &vrf_l3mdev_ops; dev->ethtool_ops = &vrf_ethtool_ops; dev->needs_free_netdev = true; /* Fill in device structure with ethernet-generic values. */ eth_hw_addr_random(dev); /* don't acquire vrf device's netif_tx_lock when transmitting */ dev->lltx = true; /* don't allow vrf devices to change network namespaces. */ dev->netns_immutable = true; /* does not make sense for a VLAN to be added to a vrf device */ dev->features |= NETIF_F_VLAN_CHALLENGED; /* enable offload features */ dev->features |= NETIF_F_GSO_SOFTWARE; dev->features |= NETIF_F_RXCSUM | NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC; dev->features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA; dev->hw_features = dev->features; dev->hw_enc_features = dev->features; /* default to no qdisc; user can add if desired */ dev->priv_flags |= IFF_NO_QUEUE; dev->priv_flags |= IFF_NO_RX_HANDLER; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; /* VRF devices do not care about MTU, but if the MTU is set * too low then the ipv4 and ipv6 protocols are disabled * which breaks networking. */ dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU; dev->mtu = dev->max_mtu; dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; } static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { NL_SET_ERR_MSG(extack, "Invalid hardware address"); return -EINVAL; } if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { NL_SET_ERR_MSG(extack, "Invalid hardware address"); return -EADDRNOTAVAIL; } } return 0; } static void vrf_dellink(struct net_device *dev, struct list_head *head) { struct net_device *port_dev; struct list_head *iter; netdev_for_each_lower_dev(dev, port_dev, iter) vrf_del_slave(dev, port_dev); vrf_map_unregister_dev(dev); unregister_netdevice_queue(dev, head); } static int vrf_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct net_vrf *vrf = netdev_priv(dev); struct nlattr **data = params->data; struct netns_vrf *nn_vrf; bool *add_fib_rules; struct net *net; int err; if (!data || !data[IFLA_VRF_TABLE]) { NL_SET_ERR_MSG(extack, "VRF table id is missing"); return -EINVAL; } vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]); if (vrf->tb_id == RT_TABLE_UNSPEC) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VRF_TABLE], "Invalid VRF table id"); return -EINVAL; } dev->priv_flags |= IFF_L3MDEV_MASTER; err = register_netdevice(dev); if (err) goto out; /* mapping between table_id and vrf; * note: such binding could not be done in the dev init function * because dev->ifindex id is not available yet. */ vrf->ifindex = dev->ifindex; err = vrf_map_register_dev(dev, extack); if (err) { unregister_netdevice(dev); goto out; } net = dev_net(dev); nn_vrf = net_generic(net, vrf_net_id); add_fib_rules = &nn_vrf->add_fib_rules; if (*add_fib_rules) { err = vrf_add_fib_rules(dev); if (err) { vrf_map_unregister_dev(dev); unregister_netdevice(dev); goto out; } *add_fib_rules = false; } out: return err; } static size_t vrf_nl_getsize(const struct net_device *dev) { return nla_total_size(sizeof(u32)); /* IFLA_VRF_TABLE */ } static int vrf_fillinfo(struct sk_buff *skb, const struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); return nla_put_u32(skb, IFLA_VRF_TABLE, vrf->tb_id); } static size_t vrf_get_slave_size(const struct net_device *bond_dev, const struct net_device *slave_dev) { return nla_total_size(sizeof(u32)); /* IFLA_VRF_PORT_TABLE */ } static int vrf_fill_slave_info(struct sk_buff *skb, const struct net_device *vrf_dev, const struct net_device *slave_dev) { struct net_vrf *vrf = netdev_priv(vrf_dev); if (nla_put_u32(skb, IFLA_VRF_PORT_TABLE, vrf->tb_id)) return -EMSGSIZE; return 0; } static const struct nla_policy vrf_nl_policy[IFLA_VRF_MAX + 1] = { [IFLA_VRF_TABLE] = { .type = NLA_U32 }, }; static struct rtnl_link_ops vrf_link_ops __read_mostly = { .kind = DRV_NAME, .priv_size = sizeof(struct net_vrf), .get_size = vrf_nl_getsize, .policy = vrf_nl_policy, .validate = vrf_validate, .fill_info = vrf_fillinfo, .get_slave_size = vrf_get_slave_size, .fill_slave_info = vrf_fill_slave_info, .newlink = vrf_newlink, .dellink = vrf_dellink, .setup = vrf_setup, .maxtype = IFLA_VRF_MAX, }; static int vrf_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); /* only care about unregister events to drop slave references */ if (event == NETDEV_UNREGISTER) { struct net_device *vrf_dev; if (!netif_is_l3_slave(dev)) goto out; vrf_dev = netdev_master_upper_dev_get(dev); vrf_del_slave(vrf_dev, dev); } out: return NOTIFY_DONE; } static struct notifier_block vrf_notifier_block __read_mostly = { .notifier_call = vrf_device_event, }; static int vrf_map_init(struct vrf_map *vmap) { spin_lock_init(&vmap->vmap_lock); hash_init(vmap->ht); vmap->strict_mode = false; return 0; } #ifdef CONFIG_SYSCTL static bool vrf_strict_mode(struct vrf_map *vmap) { bool strict_mode; vrf_map_lock(vmap); strict_mode = vmap->strict_mode; vrf_map_unlock(vmap); return strict_mode; } static int vrf_strict_mode_change(struct vrf_map *vmap, bool new_mode) { bool *cur_mode; int res = 0; vrf_map_lock(vmap); cur_mode = &vmap->strict_mode; if (*cur_mode == new_mode) goto unlock; if (*cur_mode) { /* disable strict mode */ *cur_mode = false; } else { if (vmap->shared_tables) { /* we cannot allow strict_mode because there are some * vrfs that share one or more tables. */ res = -EBUSY; goto unlock; } /* no tables are shared among vrfs, so we can go back * to 1:1 association between a vrf with its table. */ *cur_mode = true; } unlock: vrf_map_unlock(vmap); return res; } static int vrf_shared_table_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = (struct net *)table->extra1; struct vrf_map *vmap = netns_vrf_map(net); int proc_strict_mode = 0; struct ctl_table tmp = { .procname = table->procname, .data = &proc_strict_mode, .maxlen = sizeof(int), .mode = table->mode, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }; int ret; if (!write) proc_strict_mode = vrf_strict_mode(vmap); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) ret = vrf_strict_mode_change(vmap, (bool)proc_strict_mode); return ret; } static const struct ctl_table vrf_table[] = { { .procname = "strict_mode", .data = NULL, .maxlen = sizeof(int), .mode = 0644, .proc_handler = vrf_shared_table_handler, /* set by the vrf_netns_init */ .extra1 = NULL, }, }; static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf) { struct ctl_table *table; table = kmemdup(vrf_table, sizeof(vrf_table), GFP_KERNEL); if (!table) return -ENOMEM; /* init the extra1 parameter with the reference to current netns */ table[0].extra1 = net; nn_vrf->ctl_hdr = register_net_sysctl_sz(net, "net/vrf", table, ARRAY_SIZE(vrf_table)); if (!nn_vrf->ctl_hdr) { kfree(table); return -ENOMEM; } return 0; } static void vrf_netns_exit_sysctl(struct net *net) { struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); const struct ctl_table *table; table = nn_vrf->ctl_hdr->ctl_table_arg; unregister_net_sysctl_table(nn_vrf->ctl_hdr); kfree(table); } #else static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf) { return 0; } static void vrf_netns_exit_sysctl(struct net *net) { } #endif /* Initialize per network namespace state */ static int __net_init vrf_netns_init(struct net *net) { struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); nn_vrf->add_fib_rules = true; vrf_map_init(&nn_vrf->vmap); return vrf_netns_init_sysctl(net, nn_vrf); } static void __net_exit vrf_netns_exit(struct net *net) { vrf_netns_exit_sysctl(net); } static struct pernet_operations vrf_net_ops __net_initdata = { .init = vrf_netns_init, .exit = vrf_netns_exit, .id = &vrf_net_id, .size = sizeof(struct netns_vrf), }; static int __init vrf_init_module(void) { int rc; register_netdevice_notifier(&vrf_notifier_block); rc = register_pernet_subsys(&vrf_net_ops); if (rc < 0) goto error; rc = l3mdev_table_lookup_register(L3MDEV_TYPE_VRF, vrf_ifindex_lookup_by_table_id); if (rc < 0) goto unreg_pernet; rc = rtnl_link_register(&vrf_link_ops); if (rc < 0) goto table_lookup_unreg; return 0; table_lookup_unreg: l3mdev_table_lookup_unregister(L3MDEV_TYPE_VRF, vrf_ifindex_lookup_by_table_id); unreg_pernet: unregister_pernet_subsys(&vrf_net_ops); error: unregister_netdevice_notifier(&vrf_notifier_block); return rc; } module_init(vrf_init_module); MODULE_AUTHOR("Shrijeet Mukherjee, David Ahern"); MODULE_DESCRIPTION("Device driver to instantiate VRF domains"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK(DRV_NAME); MODULE_VERSION(DRV_VERSION);
90 23 1 8 81 89 3 1 2 3 4 95 6 1 5 1 1 4 3 5 96 1 27 95 91 93 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 // SPDX-License-Identifier: GPL-2.0 #include <linux/proc_fs.h> #include <linux/nsproxy.h> #include <linux/ptrace.h> #include <linux/namei.h> #include <linux/file.h> #include <linux/utsname.h> #include <net/net_namespace.h> #include <linux/ipc_namespace.h> #include <linux/pid_namespace.h> #include <linux/user_namespace.h> #include "internal.h" static const struct proc_ns_operations *ns_entries[] = { #ifdef CONFIG_NET_NS &netns_operations, #endif #ifdef CONFIG_UTS_NS &utsns_operations, #endif #ifdef CONFIG_IPC_NS &ipcns_operations, #endif #ifdef CONFIG_PID_NS &pidns_operations, &pidns_for_children_operations, #endif #ifdef CONFIG_USER_NS &userns_operations, #endif &mntns_operations, #ifdef CONFIG_CGROUPS &cgroupns_operations, #endif #ifdef CONFIG_TIME_NS &timens_operations, &timens_for_children_operations, #endif }; static const char *proc_ns_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; struct task_struct *task; struct path ns_path; int error = -EACCES; if (!dentry) return ERR_PTR(-ECHILD); task = get_proc_task(inode); if (!task) return ERR_PTR(-EACCES); if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out; error = ns_get_path(&ns_path, task, ns_ops); if (error) goto out; error = nd_jump_link(&ns_path); out: put_task_struct(task); return ERR_PTR(error); } static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct inode *inode = d_inode(dentry); const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; struct task_struct *task; char name[50]; int res = -EACCES; task = get_proc_task(inode); if (!task) return res; if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { res = ns_get_name(name, sizeof(name), task, ns_ops); if (res >= 0) res = readlink_copy(buffer, buflen, name, strlen(name)); } put_task_struct(task); return res; } static const struct inode_operations proc_ns_link_inode_operations = { .readlink = proc_ns_readlink, .get_link = proc_ns_get_link, .setattr = proc_setattr, }; static struct dentry *proc_ns_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { const struct proc_ns_operations *ns_ops = ptr; struct inode *inode; struct proc_inode *ei; inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | S_IRWXUGO); if (!inode) return ERR_PTR(-ENOENT); ei = PROC_I(inode); inode->i_op = &proc_ns_link_inode_operations; ei->ns_ops = ns_ops; pid_update_inode(task, inode); return d_splice_alias_ops(inode, dentry, &pid_dentry_operations); } static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx) { struct task_struct *task = get_proc_task(file_inode(file)); const struct proc_ns_operations **entry, **last; if (!task) return -ENOENT; if (!dir_emit_dots(file, ctx)) goto out; if (ctx->pos >= 2 + ARRAY_SIZE(ns_entries)) goto out; entry = ns_entries + (ctx->pos - 2); last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; while (entry <= last) { const struct proc_ns_operations *ops = *entry; if (!proc_fill_cache(file, ctx, ops->name, strlen(ops->name), proc_ns_instantiate, task, ops)) break; ctx->pos++; entry++; } out: put_task_struct(task); return 0; } const struct file_operations proc_ns_dir_operations = { .read = generic_read_dir, .iterate_shared = proc_ns_dir_readdir, .llseek = generic_file_llseek, }; static struct dentry *proc_ns_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct task_struct *task = get_proc_task(dir); const struct proc_ns_operations **entry, **last; unsigned int len = dentry->d_name.len; struct dentry *res = ERR_PTR(-ENOENT); if (!task) goto out_no_task; last = &ns_entries[ARRAY_SIZE(ns_entries)]; for (entry = ns_entries; entry < last; entry++) { if (strlen((*entry)->name) != len) continue; if (!memcmp(dentry->d_name.name, (*entry)->name, len)) break; } if (entry == last) goto out; res = proc_ns_instantiate(dentry, task, *entry); out: put_task_struct(task); out_no_task: return res; } const struct inode_operations proc_ns_dir_inode_operations = { .lookup = proc_ns_dir_lookup, .getattr = pid_getattr, .setattr = proc_setattr, };
3 1 4 3 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 // SPDX-License-Identifier: GPL-2.0-or-later /* AFS cell alias detection * * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/slab.h> #include <linux/sched.h> #include <linux/namei.h> #include <keys/rxrpc-type.h> #include "internal.h" /* * Sample a volume. */ static struct afs_volume *afs_sample_volume(struct afs_cell *cell, struct key *key, const char *name, unsigned int namelen) { struct afs_volume *volume; struct afs_fs_context fc = { .type = 0, /* Explicitly leave it to the VLDB */ .volnamesz = namelen, .volname = name, .net = cell->net, .cell = cell, .key = key, /* This might need to be something */ }; volume = afs_create_volume(&fc); _leave(" = %p", volume); return volume; } /* * Compare the address lists of a pair of fileservers. */ static int afs_compare_fs_alists(const struct afs_server *server_a, const struct afs_server *server_b) { const struct afs_addr_list *la, *lb; int a = 0, b = 0, addr_matches = 0; la = rcu_dereference(server_a->endpoint_state)->addresses; lb = rcu_dereference(server_b->endpoint_state)->addresses; while (a < la->nr_addrs && b < lb->nr_addrs) { unsigned long pa = (unsigned long)la->addrs[a].peer; unsigned long pb = (unsigned long)lb->addrs[b].peer; long diff = pa - pb; if (diff < 0) { a++; } else if (diff > 0) { b++; } else { addr_matches++; a++; b++; } } return addr_matches; } /* * Compare the fileserver lists of two volumes. The server lists are sorted in * order of ascending UUID. */ static int afs_compare_volume_slists(const struct afs_volume *vol_a, const struct afs_volume *vol_b) { const struct afs_server_list *la, *lb; int i, a = 0, b = 0, uuid_matches = 0, addr_matches = 0; la = rcu_dereference(vol_a->servers); lb = rcu_dereference(vol_b->servers); for (i = 0; i < AFS_MAXTYPES; i++) if (vol_a->vids[i] != vol_b->vids[i]) return 0; while (a < la->nr_servers && b < lb->nr_servers) { const struct afs_server *server_a = la->servers[a].server; const struct afs_server *server_b = lb->servers[b].server; int diff = memcmp(&server_a->uuid, &server_b->uuid, sizeof(uuid_t)); if (diff < 0) { a++; } else if (diff > 0) { b++; } else { uuid_matches++; addr_matches += afs_compare_fs_alists(server_a, server_b); a++; b++; } } _leave(" = %d [um %d]", addr_matches, uuid_matches); return addr_matches; } /* * Compare root.cell volumes. */ static int afs_compare_cell_roots(struct afs_cell *cell) { struct afs_cell *p; _enter(""); rcu_read_lock(); hlist_for_each_entry_rcu(p, &cell->net->proc_cells, proc_link) { if (p == cell || p->alias_of) continue; if (!p->root_volume) continue; /* Ignore cells that don't have a root.cell volume. */ if (afs_compare_volume_slists(cell->root_volume, p->root_volume) != 0) goto is_alias; } rcu_read_unlock(); _leave(" = 0"); return 0; is_alias: rcu_read_unlock(); cell->alias_of = afs_use_cell(p, afs_cell_trace_use_alias); return 1; } /* * Query the new cell for a volume from a cell we're already using. */ static int afs_query_for_alias_one(struct afs_cell *cell, struct key *key, struct afs_cell *p) { struct afs_volume *volume, *pvol = NULL; int ret; /* Arbitrarily pick a volume from the list. */ read_seqlock_excl(&p->volume_lock); if (!RB_EMPTY_ROOT(&p->volumes)) pvol = afs_get_volume(rb_entry(p->volumes.rb_node, struct afs_volume, cell_node), afs_volume_trace_get_query_alias); read_sequnlock_excl(&p->volume_lock); if (!pvol) return 0; _enter("%s:%s", cell->name, pvol->name); /* And see if it's in the new cell. */ volume = afs_sample_volume(cell, key, pvol->name, pvol->name_len); if (IS_ERR(volume)) { afs_put_volume(pvol, afs_volume_trace_put_query_alias); if (PTR_ERR(volume) != -ENOMEDIUM) return PTR_ERR(volume); /* That volume is not in the new cell, so not an alias */ return 0; } /* The new cell has a like-named volume also - compare volume ID, * server and address lists. */ ret = 0; if (pvol->vid == volume->vid) { rcu_read_lock(); if (afs_compare_volume_slists(volume, pvol)) ret = 1; rcu_read_unlock(); } afs_put_volume(volume, afs_volume_trace_put_query_alias); afs_put_volume(pvol, afs_volume_trace_put_query_alias); return ret; } /* * Query the new cell for volumes we know exist in cells we're already using. */ static int afs_query_for_alias(struct afs_cell *cell, struct key *key) { struct afs_cell *p; _enter("%s", cell->name); if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) return -ERESTARTSYS; hlist_for_each_entry(p, &cell->net->proc_cells, proc_link) { if (p == cell || p->alias_of) continue; if (RB_EMPTY_ROOT(&p->volumes)) continue; if (p->root_volume) continue; /* Ignore cells that have a root.cell volume. */ afs_use_cell(p, afs_cell_trace_use_check_alias); mutex_unlock(&cell->net->proc_cells_lock); if (afs_query_for_alias_one(cell, key, p) != 0) goto is_alias; if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) { afs_unuse_cell(p, afs_cell_trace_unuse_check_alias); return -ERESTARTSYS; } afs_unuse_cell(p, afs_cell_trace_unuse_check_alias); } mutex_unlock(&cell->net->proc_cells_lock); _leave(" = 0"); return 0; is_alias: cell->alias_of = p; /* Transfer our ref */ return 1; } /* * Look up a VLDB record for a volume. */ static char *afs_vl_get_cell_name(struct afs_cell *cell, struct key *key) { struct afs_vl_cursor vc; char *cell_name = ERR_PTR(-EDESTADDRREQ); bool skipped = false, not_skipped = false; int ret; if (!afs_begin_vlserver_operation(&vc, cell, key)) return ERR_PTR(-ERESTARTSYS); while (afs_select_vlserver(&vc)) { if (!test_bit(AFS_VLSERVER_FL_IS_YFS, &vc.server->flags)) { vc.call_error = -EOPNOTSUPP; skipped = true; continue; } not_skipped = true; cell_name = afs_yfsvl_get_cell_name(&vc); } ret = afs_end_vlserver_operation(&vc); if (skipped && !not_skipped) ret = -EOPNOTSUPP; return ret < 0 ? ERR_PTR(ret) : cell_name; } static int yfs_check_canonical_cell_name(struct afs_cell *cell, struct key *key) { struct afs_cell *master; size_t name_len; char *cell_name; cell_name = afs_vl_get_cell_name(cell, key); if (IS_ERR(cell_name)) return PTR_ERR(cell_name); if (strcmp(cell_name, cell->name) == 0) { kfree(cell_name); return 0; } name_len = strlen(cell_name); if (!name_len || name_len > AFS_MAXCELLNAME) master = ERR_PTR(-EOPNOTSUPP); else master = afs_lookup_cell(cell->net, cell_name, name_len, NULL, false, afs_cell_trace_use_lookup_canonical); kfree(cell_name); if (IS_ERR(master)) return PTR_ERR(master); cell->alias_of = master; /* Transfer our ref */ return 1; } static int afs_do_cell_detect_alias(struct afs_cell *cell, struct key *key) { struct afs_volume *root_volume; int ret; _enter("%s", cell->name); ret = yfs_check_canonical_cell_name(cell, key); if (ret != -EOPNOTSUPP) return ret; /* Try and get the root.cell volume for comparison with other cells */ root_volume = afs_sample_volume(cell, key, "root.cell", 9); if (!IS_ERR(root_volume)) { cell->root_volume = root_volume; return afs_compare_cell_roots(cell); } if (PTR_ERR(root_volume) != -ENOMEDIUM) return PTR_ERR(root_volume); /* Okay, this cell doesn't have an root.cell volume. We need to * locate some other random volume and use that to check. */ return afs_query_for_alias(cell, key); } /* * Check to see if a new cell is an alias of a cell we already have. At this * point we have the cell's volume server list. * * Returns 0 if we didn't detect an alias, 1 if we found an alias and an error * if we had problems gathering the data required. In the case the we did * detect an alias, cell->alias_of is set to point to the assumed master. */ int afs_cell_detect_alias(struct afs_cell *cell, struct key *key) { struct afs_net *net = cell->net; int ret; if (mutex_lock_interruptible(&net->cells_alias_lock) < 0) return -ERESTARTSYS; if (test_bit(AFS_CELL_FL_CHECK_ALIAS, &cell->flags)) { ret = afs_do_cell_detect_alias(cell, key); if (ret >= 0) clear_bit_unlock(AFS_CELL_FL_CHECK_ALIAS, &cell->flags); } else { ret = cell->alias_of ? 1 : 0; } mutex_unlock(&net->cells_alias_lock); if (ret == 1) pr_notice("kAFS: Cell %s is an alias of %s\n", cell->name, cell->alias_of->name); return ret; }
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 2 3 1 2 2 2 2 2 1 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 // SPDX-License-Identifier: GPL-2.0 /* * comedi/drivers/dt2801.c * Device Driver for DataTranslation DT2801 * */ /* * Driver: dt2801 * Description: Data Translation DT2801 series and DT01-EZ * Author: ds * Status: works * Devices: [Data Translation] DT2801 (dt2801), DT2801-A, DT2801/5716A, * DT2805, DT2805/5716A, DT2808, DT2818, DT2809, DT01-EZ * * This driver can autoprobe the type of board. * * Configuration options: * [0] - I/O port base address * [1] - unused * [2] - A/D reference 0=differential, 1=single-ended * [3] - A/D range * 0 = [-10, 10] * 1 = [0,10] * [4] - D/A 0 range * 0 = [-10, 10] * 1 = [-5,5] * 2 = [-2.5,2.5] * 3 = [0,10] * 4 = [0,5] * [5] - D/A 1 range (same choices) */ #include <linux/module.h> #include <linux/comedi/comedidev.h> #include <linux/delay.h> #define DT2801_TIMEOUT 1000 /* Hardware Configuration */ /* ====================== */ #define DT2801_MAX_DMA_SIZE (64 * 1024) /* define's */ /* ====================== */ /* Commands */ #define DT_C_RESET 0x0 #define DT_C_CLEAR_ERR 0x1 #define DT_C_READ_ERRREG 0x2 #define DT_C_SET_CLOCK 0x3 #define DT_C_TEST 0xb #define DT_C_STOP 0xf #define DT_C_SET_DIGIN 0x4 #define DT_C_SET_DIGOUT 0x5 #define DT_C_READ_DIG 0x6 #define DT_C_WRITE_DIG 0x7 #define DT_C_WRITE_DAIM 0x8 #define DT_C_SET_DA 0x9 #define DT_C_WRITE_DA 0xa #define DT_C_READ_ADIM 0xc #define DT_C_SET_AD 0xd #define DT_C_READ_AD 0xe /* * Command modifiers (only used with read/write), EXTTRIG can be * used with some other commands. */ #define DT_MOD_DMA BIT(4) #define DT_MOD_CONT BIT(5) #define DT_MOD_EXTCLK BIT(6) #define DT_MOD_EXTTRIG BIT(7) /* Bits in status register */ #define DT_S_DATA_OUT_READY BIT(0) #define DT_S_DATA_IN_FULL BIT(1) #define DT_S_READY BIT(2) #define DT_S_COMMAND BIT(3) #define DT_S_COMPOSITE_ERROR BIT(7) /* registers */ #define DT2801_DATA 0 #define DT2801_STATUS 1 #define DT2801_CMD 1 #if 0 /* ignore 'defined but not used' warning */ static const struct comedi_lrange range_dt2801_ai_pgh_bipolar = { 4, { BIP_RANGE(10), BIP_RANGE(5), BIP_RANGE(2.5), BIP_RANGE(1.25) } }; #endif static const struct comedi_lrange range_dt2801_ai_pgl_bipolar = { 4, { BIP_RANGE(10), BIP_RANGE(1), BIP_RANGE(0.1), BIP_RANGE(0.02) } }; #if 0 /* ignore 'defined but not used' warning */ static const struct comedi_lrange range_dt2801_ai_pgh_unipolar = { 4, { UNI_RANGE(10), UNI_RANGE(5), UNI_RANGE(2.5), UNI_RANGE(1.25) } }; #endif static const struct comedi_lrange range_dt2801_ai_pgl_unipolar = { 4, { UNI_RANGE(10), UNI_RANGE(1), UNI_RANGE(0.1), UNI_RANGE(0.02) } }; struct dt2801_board { const char *name; int boardcode; int ad_diff; int ad_chan; int adbits; int adrangetype; int dabits; }; /* * Typeid's for the different boards of the DT2801-series * (taken from the test-software, that comes with the board) */ static const struct dt2801_board boardtypes[] = { { .name = "dt2801", .boardcode = 0x09, .ad_diff = 2, .ad_chan = 16, .adbits = 12, .adrangetype = 0, .dabits = 12}, { .name = "dt2801-a", .boardcode = 0x52, .ad_diff = 2, .ad_chan = 16, .adbits = 12, .adrangetype = 0, .dabits = 12}, { .name = "dt2801/5716a", .boardcode = 0x82, .ad_diff = 1, .ad_chan = 16, .adbits = 16, .adrangetype = 1, .dabits = 12}, { .name = "dt2805", .boardcode = 0x12, .ad_diff = 1, .ad_chan = 16, .adbits = 12, .adrangetype = 0, .dabits = 12}, { .name = "dt2805/5716a", .boardcode = 0x92, .ad_diff = 1, .ad_chan = 16, .adbits = 16, .adrangetype = 1, .dabits = 12}, { .name = "dt2808", .boardcode = 0x20, .ad_diff = 0, .ad_chan = 16, .adbits = 12, .adrangetype = 2, .dabits = 8}, { .name = "dt2818", .boardcode = 0xa2, .ad_diff = 0, .ad_chan = 4, .adbits = 12, .adrangetype = 0, .dabits = 12}, { .name = "dt2809", .boardcode = 0xb0, .ad_diff = 0, .ad_chan = 8, .adbits = 12, .adrangetype = 1, .dabits = 12}, }; struct dt2801_private { const struct comedi_lrange *dac_range_types[2]; }; /* * These are the low-level routines: * writecommand: write a command to the board * writedata: write data byte * readdata: read data byte */ /* * Only checks DataOutReady-flag, not the Ready-flag as it is done * in the examples of the manual. I don't see why this should be * necessary. */ static int dt2801_readdata(struct comedi_device *dev, int *data) { int stat = 0; int timeout = DT2801_TIMEOUT; do { stat = inb_p(dev->iobase + DT2801_STATUS); if (stat & (DT_S_COMPOSITE_ERROR | DT_S_READY)) return stat; if (stat & DT_S_DATA_OUT_READY) { *data = inb_p(dev->iobase + DT2801_DATA); return 0; } } while (--timeout > 0); return -ETIME; } static int dt2801_readdata2(struct comedi_device *dev, int *data) { int lb = 0; int hb = 0; int ret; ret = dt2801_readdata(dev, &lb); if (ret) return ret; ret = dt2801_readdata(dev, &hb); if (ret) return ret; *data = (hb << 8) + lb; return 0; } static int dt2801_writedata(struct comedi_device *dev, unsigned int data) { int stat = 0; int timeout = DT2801_TIMEOUT; do { stat = inb_p(dev->iobase + DT2801_STATUS); if (stat & DT_S_COMPOSITE_ERROR) return stat; if (!(stat & DT_S_DATA_IN_FULL)) { outb_p(data & 0xff, dev->iobase + DT2801_DATA); return 0; } } while (--timeout > 0); return -ETIME; } static int dt2801_writedata2(struct comedi_device *dev, unsigned int data) { int ret; ret = dt2801_writedata(dev, data & 0xff); if (ret < 0) return ret; ret = dt2801_writedata(dev, data >> 8); if (ret < 0) return ret; return 0; } static int dt2801_wait_for_ready(struct comedi_device *dev) { int timeout = DT2801_TIMEOUT; int stat; stat = inb_p(dev->iobase + DT2801_STATUS); if (stat & DT_S_READY) return 0; do { stat = inb_p(dev->iobase + DT2801_STATUS); if (stat & DT_S_COMPOSITE_ERROR) return stat; if (stat & DT_S_READY) return 0; } while (--timeout > 0); return -ETIME; } static void dt2801_writecmd(struct comedi_device *dev, int command) { int stat; dt2801_wait_for_ready(dev); stat = inb_p(dev->iobase + DT2801_STATUS); if (stat & DT_S_COMPOSITE_ERROR) { dev_dbg(dev->class_dev, "composite-error in %s, ignoring\n", __func__); } if (!(stat & DT_S_READY)) dev_dbg(dev->class_dev, "!ready in %s, ignoring\n", __func__); outb_p(command, dev->iobase + DT2801_CMD); } static int dt2801_reset(struct comedi_device *dev) { int board_code = 0; unsigned int stat; int timeout; /* pull random data from data port */ inb_p(dev->iobase + DT2801_DATA); inb_p(dev->iobase + DT2801_DATA); inb_p(dev->iobase + DT2801_DATA); inb_p(dev->iobase + DT2801_DATA); /* dt2801_writecmd(dev,DT_C_STOP); */ outb_p(DT_C_STOP, dev->iobase + DT2801_CMD); /* dt2801_wait_for_ready(dev); */ usleep_range(100, 200); timeout = 10000; do { stat = inb_p(dev->iobase + DT2801_STATUS); if (stat & DT_S_READY) break; } while (timeout--); if (!timeout) dev_dbg(dev->class_dev, "timeout 1 status=0x%02x\n", stat); /* dt2801_readdata(dev,&board_code); */ outb_p(DT_C_RESET, dev->iobase + DT2801_CMD); /* dt2801_writecmd(dev,DT_C_RESET); */ usleep_range(100, 200); timeout = 10000; do { stat = inb_p(dev->iobase + DT2801_STATUS); if (stat & DT_S_READY) break; } while (timeout--); if (!timeout) dev_dbg(dev->class_dev, "timeout 2 status=0x%02x\n", stat); dt2801_readdata(dev, &board_code); return board_code; } static int probe_number_of_ai_chans(struct comedi_device *dev) { int n_chans; int stat; int data; for (n_chans = 0; n_chans < 16; n_chans++) { dt2801_writecmd(dev, DT_C_READ_ADIM); dt2801_writedata(dev, 0); dt2801_writedata(dev, n_chans); stat = dt2801_readdata2(dev, &data); if (stat) break; } dt2801_reset(dev); dt2801_reset(dev); return n_chans; } static const struct comedi_lrange *dac_range_table[] = { &range_bipolar10, &range_bipolar5, &range_bipolar2_5, &range_unipolar10, &range_unipolar5 }; static const struct comedi_lrange *dac_range_lkup(int opt) { if (opt < 0 || opt >= 5) return &range_unknown; return dac_range_table[opt]; } static const struct comedi_lrange *ai_range_lkup(int type, int opt) { switch (type) { case 0: return (opt) ? &range_dt2801_ai_pgl_unipolar : &range_dt2801_ai_pgl_bipolar; case 1: return (opt) ? &range_unipolar10 : &range_bipolar10; case 2: return &range_unipolar5; } return &range_unknown; } static int dt2801_error(struct comedi_device *dev, int stat) { if (stat < 0) { if (stat == -ETIME) dev_dbg(dev->class_dev, "timeout\n"); else dev_dbg(dev->class_dev, "error %d\n", stat); return stat; } dev_dbg(dev->class_dev, "error status 0x%02x, resetting...\n", stat); dt2801_reset(dev); dt2801_reset(dev); return -EIO; } static int dt2801_ai_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int d; int stat; int i; for (i = 0; i < insn->n; i++) { dt2801_writecmd(dev, DT_C_READ_ADIM); dt2801_writedata(dev, CR_RANGE(insn->chanspec)); dt2801_writedata(dev, CR_CHAN(insn->chanspec)); stat = dt2801_readdata2(dev, &d); if (stat != 0) return dt2801_error(dev, stat); data[i] = d; } return i; } static int dt2801_ao_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); dt2801_writecmd(dev, DT_C_WRITE_DAIM); dt2801_writedata(dev, chan); dt2801_writedata2(dev, data[0]); s->readback[chan] = data[0]; return 1; } static int dt2801_dio_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int which = (s == &dev->subdevices[3]) ? 1 : 0; unsigned int val = 0; if (comedi_dio_update_state(s, data)) { dt2801_writecmd(dev, DT_C_WRITE_DIG); dt2801_writedata(dev, which); dt2801_writedata(dev, s->state); } dt2801_writecmd(dev, DT_C_READ_DIG); dt2801_writedata(dev, which); dt2801_readdata(dev, &val); data[1] = val; return insn->n; } static int dt2801_dio_insn_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int ret; ret = comedi_dio_insn_config(dev, s, insn, data, 0xff); if (ret) return ret; dt2801_writecmd(dev, s->io_bits ? DT_C_SET_DIGOUT : DT_C_SET_DIGIN); dt2801_writedata(dev, (s == &dev->subdevices[3]) ? 1 : 0); return insn->n; } /* * options: * [0] - i/o base * [1] - unused * [2] - a/d 0=differential, 1=single-ended * [3] - a/d range 0=[-10,10], 1=[0,10] * [4] - dac0 range 0=[-10,10], 1=[-5,5], 2=[-2.5,2.5] 3=[0,10], 4=[0,5] * [5] - dac1 range 0=[-10,10], 1=[-5,5], 2=[-2.5,2.5] 3=[0,10], 4=[0,5] */ static int dt2801_attach(struct comedi_device *dev, struct comedi_devconfig *it) { const struct dt2801_board *board; struct dt2801_private *devpriv; struct comedi_subdevice *s; int board_code, type; int ret = 0; int n_ai_chans; ret = comedi_request_region(dev, it->options[0], 0x2); if (ret) return ret; /* do some checking */ board_code = dt2801_reset(dev); /* heh. if it didn't work, try it again. */ if (!board_code) board_code = dt2801_reset(dev); for (type = 0; type < ARRAY_SIZE(boardtypes); type++) { if (boardtypes[type].boardcode == board_code) goto havetype; } dev_dbg(dev->class_dev, "unrecognized board code=0x%02x, contact author\n", board_code); type = 0; havetype: dev->board_ptr = boardtypes + type; board = dev->board_ptr; n_ai_chans = probe_number_of_ai_chans(dev); ret = comedi_alloc_subdevices(dev, 4); if (ret) goto out; devpriv = comedi_alloc_devpriv(dev, sizeof(*devpriv)); if (!devpriv) return -ENOMEM; dev->board_name = board->name; s = &dev->subdevices[0]; /* ai subdevice */ s->type = COMEDI_SUBD_AI; s->subdev_flags = SDF_READABLE | SDF_GROUND; #if 1 s->n_chan = n_ai_chans; #else if (it->options[2]) s->n_chan = board->ad_chan; else s->n_chan = board->ad_chan / 2; #endif s->maxdata = (1 << board->adbits) - 1; s->range_table = ai_range_lkup(board->adrangetype, it->options[3]); s->insn_read = dt2801_ai_insn_read; s = &dev->subdevices[1]; /* ao subdevice */ s->type = COMEDI_SUBD_AO; s->subdev_flags = SDF_WRITABLE; s->n_chan = 2; s->maxdata = (1 << board->dabits) - 1; s->range_table_list = devpriv->dac_range_types; devpriv->dac_range_types[0] = dac_range_lkup(it->options[4]); devpriv->dac_range_types[1] = dac_range_lkup(it->options[5]); s->insn_write = dt2801_ao_insn_write; ret = comedi_alloc_subdev_readback(s); if (ret) return ret; s = &dev->subdevices[2]; /* 1st digital subdevice */ s->type = COMEDI_SUBD_DIO; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; s->n_chan = 8; s->maxdata = 1; s->range_table = &range_digital; s->insn_bits = dt2801_dio_insn_bits; s->insn_config = dt2801_dio_insn_config; s = &dev->subdevices[3]; /* 2nd digital subdevice */ s->type = COMEDI_SUBD_DIO; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; s->n_chan = 8; s->maxdata = 1; s->range_table = &range_digital; s->insn_bits = dt2801_dio_insn_bits; s->insn_config = dt2801_dio_insn_config; ret = 0; out: return ret; } static struct comedi_driver dt2801_driver = { .driver_name = "dt2801", .module = THIS_MODULE, .attach = dt2801_attach, .detach = comedi_legacy_detach, }; module_comedi_driver(dt2801_driver); MODULE_AUTHOR("Comedi https://www.comedi.org"); MODULE_DESCRIPTION("Comedi low-level driver"); MODULE_LICENSE("GPL");
10 7 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 // SPDX-License-Identifier: GPL-2.0-or-later /* * Force feedback support for memoryless devices * * Copyright (c) 2006 Anssi Hannula <anssi.hannula@gmail.com> * Copyright (c) 2006 Dmitry Torokhov <dtor@mail.ru> */ /* #define DEBUG */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/export.h> #include <linux/slab.h> #include <linux/input.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/spinlock.h> #include <linux/jiffies.h> #include <linux/fixp-arith.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Anssi Hannula <anssi.hannula@gmail.com>"); MODULE_DESCRIPTION("Force feedback support for memoryless devices"); /* Number of effects handled with memoryless devices */ #define FF_MEMLESS_EFFECTS 16 /* Envelope update interval in ms */ #define FF_ENVELOPE_INTERVAL 50 #define FF_EFFECT_STARTED 0 #define FF_EFFECT_PLAYING 1 #define FF_EFFECT_ABORTING 2 struct ml_effect_state { struct ff_effect *effect; unsigned long flags; /* effect state (STARTED, PLAYING, etc) */ int count; /* loop count of the effect */ unsigned long play_at; /* start time */ unsigned long stop_at; /* stop time */ unsigned long adj_at; /* last time the effect was sent */ }; struct ml_device { void *private; struct ml_effect_state states[FF_MEMLESS_EFFECTS]; int gain; struct timer_list timer; struct input_dev *dev; int (*play_effect)(struct input_dev *dev, void *data, struct ff_effect *effect); }; static const struct ff_envelope *get_envelope(const struct ff_effect *effect) { static const struct ff_envelope empty_envelope; switch (effect->type) { case FF_PERIODIC: return &effect->u.periodic.envelope; case FF_CONSTANT: return &effect->u.constant.envelope; default: return &empty_envelope; } } /* * Check for the next time envelope requires an update on memoryless devices */ static unsigned long calculate_next_time(struct ml_effect_state *state) { const struct ff_envelope *envelope = get_envelope(state->effect); unsigned long attack_stop, fade_start, next_fade; if (envelope->attack_length) { attack_stop = state->play_at + msecs_to_jiffies(envelope->attack_length); if (time_before(state->adj_at, attack_stop)) return state->adj_at + msecs_to_jiffies(FF_ENVELOPE_INTERVAL); } if (state->effect->replay.length) { if (envelope->fade_length) { /* check when fading should start */ fade_start = state->stop_at - msecs_to_jiffies(envelope->fade_length); if (time_before(state->adj_at, fade_start)) return fade_start; /* already fading, advance to next checkpoint */ next_fade = state->adj_at + msecs_to_jiffies(FF_ENVELOPE_INTERVAL); if (time_before(next_fade, state->stop_at)) return next_fade; } return state->stop_at; } return state->play_at; } static void ml_schedule_timer(struct ml_device *ml) { struct ml_effect_state *state; unsigned long now = jiffies; unsigned long earliest = 0; unsigned long next_at; int events = 0; int i; pr_debug("calculating next timer\n"); for (i = 0; i < FF_MEMLESS_EFFECTS; i++) { state = &ml->states[i]; if (!test_bit(FF_EFFECT_STARTED, &state->flags)) continue; if (test_bit(FF_EFFECT_PLAYING, &state->flags)) next_at = calculate_next_time(state); else next_at = state->play_at; if (time_before_eq(now, next_at) && (++events == 1 || time_before(next_at, earliest))) earliest = next_at; } if (!events) { pr_debug("no actions\n"); timer_delete(&ml->timer); } else { pr_debug("timer set\n"); mod_timer(&ml->timer, earliest); } } /* * Apply an envelope to a value */ static int apply_envelope(struct ml_effect_state *state, int value, struct ff_envelope *envelope) { struct ff_effect *effect = state->effect; unsigned long now = jiffies; int time_from_level; int time_of_envelope; int envelope_level; int difference; if (envelope->attack_length && time_before(now, state->play_at + msecs_to_jiffies(envelope->attack_length))) { pr_debug("value = 0x%x, attack_level = 0x%x\n", value, envelope->attack_level); time_from_level = jiffies_to_msecs(now - state->play_at); time_of_envelope = envelope->attack_length; envelope_level = min_t(u16, envelope->attack_level, 0x7fff); } else if (envelope->fade_length && effect->replay.length && time_after(now, state->stop_at - msecs_to_jiffies(envelope->fade_length)) && time_before(now, state->stop_at)) { time_from_level = jiffies_to_msecs(state->stop_at - now); time_of_envelope = envelope->fade_length; envelope_level = min_t(u16, envelope->fade_level, 0x7fff); } else return value; difference = abs(value) - envelope_level; pr_debug("difference = %d\n", difference); pr_debug("time_from_level = 0x%x\n", time_from_level); pr_debug("time_of_envelope = 0x%x\n", time_of_envelope); difference = difference * time_from_level / time_of_envelope; pr_debug("difference = %d\n", difference); return value < 0 ? -(difference + envelope_level) : (difference + envelope_level); } /* * Return the type the effect has to be converted into (memless devices) */ static int get_compatible_type(struct ff_device *ff, int effect_type) { if (test_bit(effect_type, ff->ffbit)) return effect_type; if (effect_type == FF_PERIODIC && test_bit(FF_RUMBLE, ff->ffbit)) return FF_RUMBLE; pr_err("invalid type in get_compatible_type()\n"); return 0; } /* * Only left/right direction should be used (under/over 0x8000) for * forward/reverse motor direction (to keep calculation fast & simple). */ static u16 ml_calculate_direction(u16 direction, u16 force, u16 new_direction, u16 new_force) { if (!force) return new_direction; if (!new_force) return direction; return (((u32)(direction >> 1) * force + (new_direction >> 1) * new_force) / (force + new_force)) << 1; } #define FRAC_N 8 static inline s16 fixp_new16(s16 a) { return ((s32)a) >> (16 - FRAC_N); } static inline s16 fixp_mult(s16 a, s16 b) { a = ((s32)a * 0x100) / 0x7fff; return ((s32)(a * b)) >> FRAC_N; } /* * Combine two effects and apply gain. */ static void ml_combine_effects(struct ff_effect *effect, struct ml_effect_state *state, int gain) { struct ff_effect *new = state->effect; unsigned int strong, weak, i; int x, y; s16 level; switch (new->type) { case FF_CONSTANT: i = new->direction * 360 / 0xffff; level = fixp_new16(apply_envelope(state, new->u.constant.level, &new->u.constant.envelope)); x = fixp_mult(fixp_sin16(i), level) * gain / 0xffff; y = fixp_mult(-fixp_cos16(i), level) * gain / 0xffff; /* * here we abuse ff_ramp to hold x and y of constant force * If in future any driver wants something else than x and y * in s8, this should be changed to something more generic */ effect->u.ramp.start_level = clamp_val(effect->u.ramp.start_level + x, -0x80, 0x7f); effect->u.ramp.end_level = clamp_val(effect->u.ramp.end_level + y, -0x80, 0x7f); break; case FF_RUMBLE: strong = (u32)new->u.rumble.strong_magnitude * gain / 0xffff; weak = (u32)new->u.rumble.weak_magnitude * gain / 0xffff; if (effect->u.rumble.strong_magnitude + strong) effect->direction = ml_calculate_direction( effect->direction, effect->u.rumble.strong_magnitude, new->direction, strong); else if (effect->u.rumble.weak_magnitude + weak) effect->direction = ml_calculate_direction( effect->direction, effect->u.rumble.weak_magnitude, new->direction, weak); else effect->direction = 0; effect->u.rumble.strong_magnitude = min(strong + effect->u.rumble.strong_magnitude, 0xffffU); effect->u.rumble.weak_magnitude = min(weak + effect->u.rumble.weak_magnitude, 0xffffU); break; case FF_PERIODIC: i = apply_envelope(state, abs(new->u.periodic.magnitude), &new->u.periodic.envelope); /* here we also scale it 0x7fff => 0xffff */ i = i * gain / 0x7fff; if (effect->u.rumble.strong_magnitude + i) effect->direction = ml_calculate_direction( effect->direction, effect->u.rumble.strong_magnitude, new->direction, i); else effect->direction = 0; effect->u.rumble.strong_magnitude = min(i + effect->u.rumble.strong_magnitude, 0xffffU); effect->u.rumble.weak_magnitude = min(i + effect->u.rumble.weak_magnitude, 0xffffU); break; default: pr_err("invalid type in ml_combine_effects()\n"); break; } } /* * Because memoryless devices have only one effect per effect type active * at one time we have to combine multiple effects into one */ static int ml_get_combo_effect(struct ml_device *ml, unsigned long *effect_handled, struct ff_effect *combo_effect) { struct ff_effect *effect; struct ml_effect_state *state; int effect_type; int i; memset(combo_effect, 0, sizeof(struct ff_effect)); for (i = 0; i < FF_MEMLESS_EFFECTS; i++) { if (__test_and_set_bit(i, effect_handled)) continue; state = &ml->states[i]; effect = state->effect; if (!test_bit(FF_EFFECT_STARTED, &state->flags)) continue; if (time_before(jiffies, state->play_at)) continue; /* * here we have started effects that are either * currently playing (and may need be aborted) * or need to start playing. */ effect_type = get_compatible_type(ml->dev->ff, effect->type); if (combo_effect->type != effect_type) { if (combo_effect->type != 0) { __clear_bit(i, effect_handled); continue; } combo_effect->type = effect_type; } if (__test_and_clear_bit(FF_EFFECT_ABORTING, &state->flags)) { __clear_bit(FF_EFFECT_PLAYING, &state->flags); __clear_bit(FF_EFFECT_STARTED, &state->flags); } else if (effect->replay.length && time_after_eq(jiffies, state->stop_at)) { __clear_bit(FF_EFFECT_PLAYING, &state->flags); if (--state->count <= 0) { __clear_bit(FF_EFFECT_STARTED, &state->flags); } else { state->play_at = jiffies + msecs_to_jiffies(effect->replay.delay); state->stop_at = state->play_at + msecs_to_jiffies(effect->replay.length); } } else { __set_bit(FF_EFFECT_PLAYING, &state->flags); state->adj_at = jiffies; ml_combine_effects(combo_effect, state, ml->gain); } } return combo_effect->type != 0; } static void ml_play_effects(struct ml_device *ml) { struct ff_effect effect; DECLARE_BITMAP(handled_bm, FF_MEMLESS_EFFECTS); memset(handled_bm, 0, sizeof(handled_bm)); while (ml_get_combo_effect(ml, handled_bm, &effect)) ml->play_effect(ml->dev, ml->private, &effect); ml_schedule_timer(ml); } static void ml_effect_timer(struct timer_list *t) { struct ml_device *ml = timer_container_of(ml, t, timer); struct input_dev *dev = ml->dev; pr_debug("timer: updating effects\n"); guard(spinlock_irqsave)(&dev->event_lock); ml_play_effects(ml); } /* * Sets requested gain for FF effects. Called with dev->event_lock held. */ static void ml_ff_set_gain(struct input_dev *dev, u16 gain) { struct ml_device *ml = dev->ff->private; int i; ml->gain = gain; for (i = 0; i < FF_MEMLESS_EFFECTS; i++) __clear_bit(FF_EFFECT_PLAYING, &ml->states[i].flags); ml_play_effects(ml); } /* * Start/stop specified FF effect. Called with dev->event_lock held. */ static int ml_ff_playback(struct input_dev *dev, int effect_id, int value) { struct ml_device *ml = dev->ff->private; struct ml_effect_state *state = &ml->states[effect_id]; if (value > 0) { pr_debug("initiated play\n"); __set_bit(FF_EFFECT_STARTED, &state->flags); state->count = value; state->play_at = jiffies + msecs_to_jiffies(state->effect->replay.delay); state->stop_at = state->play_at + msecs_to_jiffies(state->effect->replay.length); state->adj_at = state->play_at; } else { pr_debug("initiated stop\n"); if (test_bit(FF_EFFECT_PLAYING, &state->flags)) __set_bit(FF_EFFECT_ABORTING, &state->flags); else __clear_bit(FF_EFFECT_STARTED, &state->flags); } ml_play_effects(ml); return 0; } static int ml_ff_upload(struct input_dev *dev, struct ff_effect *effect, struct ff_effect *old) { struct ml_device *ml = dev->ff->private; struct ml_effect_state *state = &ml->states[effect->id]; guard(spinlock_irq)(&dev->event_lock); if (test_bit(FF_EFFECT_STARTED, &state->flags)) { __clear_bit(FF_EFFECT_PLAYING, &state->flags); state->play_at = jiffies + msecs_to_jiffies(state->effect->replay.delay); state->stop_at = state->play_at + msecs_to_jiffies(state->effect->replay.length); state->adj_at = state->play_at; ml_schedule_timer(ml); } return 0; } static void ml_ff_destroy(struct ff_device *ff) { struct ml_device *ml = ff->private; /* * Even though we stop all playing effects when tearing down * an input device (via input_device_flush() that calls into * input_ff_flush() that stops and erases all effects), we * do not actually stop the timer, and therefore we should * do it here. */ timer_delete_sync(&ml->timer); kfree(ml->private); } /** * input_ff_create_memless() - create memoryless force-feedback device * @dev: input device supporting force-feedback * @data: driver-specific data to be passed into @play_effect * @play_effect: driver-specific method for playing FF effect */ int input_ff_create_memless(struct input_dev *dev, void *data, int (*play_effect)(struct input_dev *, void *, struct ff_effect *)) { struct ff_device *ff; int error; int i; struct ml_device *ml __free(kfree) = kzalloc(sizeof(*ml), GFP_KERNEL); if (!ml) return -ENOMEM; ml->dev = dev; ml->private = data; ml->play_effect = play_effect; ml->gain = 0xffff; timer_setup(&ml->timer, ml_effect_timer, 0); set_bit(FF_GAIN, dev->ffbit); error = input_ff_create(dev, FF_MEMLESS_EFFECTS); if (error) return error; ff = dev->ff; ff->upload = ml_ff_upload; ff->playback = ml_ff_playback; ff->set_gain = ml_ff_set_gain; ff->destroy = ml_ff_destroy; /* we can emulate periodic effects with RUMBLE */ if (test_bit(FF_RUMBLE, ff->ffbit)) { set_bit(FF_PERIODIC, dev->ffbit); set_bit(FF_SINE, dev->ffbit); set_bit(FF_TRIANGLE, dev->ffbit); set_bit(FF_SQUARE, dev->ffbit); } for (i = 0; i < FF_MEMLESS_EFFECTS; i++) ml->states[i].effect = &ff->effects[i]; ff->private = no_free_ptr(ml); return 0; } EXPORT_SYMBOL_GPL(input_ff_create_memless);
1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 // SPDX-License-Identifier: GPL-2.0-or-later /* RxRPC packet reception * * Copyright (C) 2007, 2016, 2022 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include "ar-internal.h" static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, struct sockaddr_rxrpc *peer_srx, struct sk_buff *skb); /* * handle data received on the local endpoint * - may be called in interrupt context * * [!] Note that as this is called from the encap_rcv hook, the socket is not * held locked by the caller and nothing prevents sk_user_data on the UDP from * being cleared in the middle of processing this function. * * Called with the RCU read lock held from the IP layer via UDP. */ int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb) { struct sk_buff_head *rx_queue; struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk); struct task_struct *io_thread; if (unlikely(!local)) { kfree_skb(skb); return 0; } io_thread = READ_ONCE(local->io_thread); if (!io_thread) { kfree_skb(skb); return 0; } if (skb->tstamp == 0) skb->tstamp = ktime_get_real(); skb->mark = RXRPC_SKB_MARK_PACKET; rxrpc_new_skb(skb, rxrpc_skb_new_encap_rcv); rx_queue = &local->rx_queue; #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY if (rxrpc_inject_rx_delay || !skb_queue_empty(&local->rx_delay_queue)) { skb->tstamp = ktime_add_ms(skb->tstamp, rxrpc_inject_rx_delay); rx_queue = &local->rx_delay_queue; } #endif skb_queue_tail(rx_queue, skb); wake_up_process(io_thread); return 0; } /* * Handle an error received on the local endpoint. */ void rxrpc_error_report(struct sock *sk) { struct rxrpc_local *local; struct sk_buff *skb; rcu_read_lock(); local = rcu_dereference_sk_user_data(sk); if (unlikely(!local)) { rcu_read_unlock(); return; } while ((skb = skb_dequeue(&sk->sk_error_queue))) { skb->mark = RXRPC_SKB_MARK_ERROR; rxrpc_new_skb(skb, rxrpc_skb_new_error_report); skb_queue_tail(&local->rx_queue, skb); } rxrpc_wake_up_io_thread(local); rcu_read_unlock(); } /* * Directly produce an abort from a packet. */ bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, s32 abort_code, int err) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, abort_code, err); skb->mark = RXRPC_SKB_MARK_REJECT_ABORT; skb->priority = abort_code; return false; } /* * Directly produce a connection abort from a packet. */ bool rxrpc_direct_conn_abort(struct sk_buff *skb, enum rxrpc_abort_reason why, s32 abort_code, int err) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); trace_rxrpc_abort(0, why, sp->hdr.cid, 0, sp->hdr.seq, abort_code, err); skb->mark = RXRPC_SKB_MARK_REJECT_CONN_ABORT; skb->priority = abort_code; return false; } static bool rxrpc_bad_message(struct sk_buff *skb, enum rxrpc_abort_reason why) { return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EBADMSG); } #define just_discard true /* * Process event packets targeted at a local endpoint. */ static bool rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); char v; _enter(""); rxrpc_see_skb(skb, rxrpc_skb_see_version); if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &v, 1) >= 0) { if (v == 0) rxrpc_send_version_request(local, &sp->hdr, skb); } return true; } /* * Extract the wire header from a packet and translate the byte order. */ static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) { struct rxrpc_wire_header whdr; struct rxrpc_ackpacket ack; /* dig out the RxRPC connection details */ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) return rxrpc_bad_message(skb, rxrpc_badmsg_short_hdr); memset(sp, 0, sizeof(*sp)); sp->hdr.epoch = ntohl(whdr.epoch); sp->hdr.cid = ntohl(whdr.cid); sp->hdr.callNumber = ntohl(whdr.callNumber); sp->hdr.seq = ntohl(whdr.seq); sp->hdr.serial = ntohl(whdr.serial); sp->hdr.flags = whdr.flags; sp->hdr.type = whdr.type; sp->hdr.userStatus = whdr.userStatus; sp->hdr.securityIndex = whdr.securityIndex; sp->hdr._rsvd = ntohs(whdr._rsvd); sp->hdr.serviceId = ntohs(whdr.serviceId); if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK) { if (skb_copy_bits(skb, sizeof(whdr), &ack, sizeof(ack)) < 0) return rxrpc_bad_message(skb, rxrpc_badmsg_short_ack); sp->ack.first_ack = ntohl(ack.firstPacket); sp->ack.prev_ack = ntohl(ack.previousPacket); sp->ack.acked_serial = ntohl(ack.serial); sp->ack.reason = ack.reason; sp->ack.nr_acks = ack.nAcks; } return true; } /* * Extract the abort code from an ABORT packet and stash it in skb->priority. */ static bool rxrpc_extract_abort(struct sk_buff *skb) { __be32 wtmp; if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), &wtmp, sizeof(wtmp)) < 0) return false; skb->priority = ntohl(wtmp); return true; } /* * Process packets received on the local endpoint */ static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb) { struct rxrpc_connection *conn; struct sockaddr_rxrpc peer_srx; struct rxrpc_skb_priv *sp; struct rxrpc_peer *peer = NULL; struct sk_buff *skb = *_skb; bool ret = false; skb_pull(skb, sizeof(struct udphdr)); sp = rxrpc_skb(skb); /* dig out the RxRPC connection details */ if (!rxrpc_extract_header(sp, skb)) return just_discard; if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { static int lose; if ((lose++ & 7) == 7) { trace_rxrpc_rx_lose(sp); return just_discard; } } trace_rxrpc_rx_packet(sp); switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: if (rxrpc_to_client(sp)) return just_discard; return rxrpc_input_version(local, skb); case RXRPC_PACKET_TYPE_BUSY: if (rxrpc_to_server(sp)) return just_discard; fallthrough; case RXRPC_PACKET_TYPE_ACK: case RXRPC_PACKET_TYPE_ACKALL: if (sp->hdr.callNumber == 0) return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call); break; case RXRPC_PACKET_TYPE_ABORT: if (!rxrpc_extract_abort(skb)) return just_discard; /* Just discard if malformed */ break; case RXRPC_PACKET_TYPE_DATA: if (sp->hdr.callNumber == 0) return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call); if (sp->hdr.seq == 0) return rxrpc_bad_message(skb, rxrpc_badmsg_zero_seq); /* Unshare the packet so that it can be modified for in-place * decryption. */ if (sp->hdr.securityIndex != 0) { skb = skb_unshare(skb, GFP_ATOMIC); if (!skb) { rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem); *_skb = NULL; return just_discard; } if (skb != *_skb) { rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare); *_skb = skb; rxrpc_new_skb(skb, rxrpc_skb_new_unshared); sp = rxrpc_skb(skb); } } break; case RXRPC_PACKET_TYPE_CHALLENGE: if (rxrpc_to_server(sp)) return just_discard; break; case RXRPC_PACKET_TYPE_RESPONSE: if (rxrpc_to_client(sp)) return just_discard; break; /* Packet types 9-11 should just be ignored. */ case RXRPC_PACKET_TYPE_PARAMS: case RXRPC_PACKET_TYPE_10: case RXRPC_PACKET_TYPE_11: return just_discard; default: return rxrpc_bad_message(skb, rxrpc_badmsg_unsupported_packet); } if (sp->hdr.serviceId == 0) return rxrpc_bad_message(skb, rxrpc_badmsg_zero_service); if (WARN_ON_ONCE(rxrpc_extract_addr_from_skb(&peer_srx, skb) < 0)) return just_discard; /* Unsupported address type. */ if (peer_srx.transport.family != local->srx.transport.family && (peer_srx.transport.family == AF_INET && local->srx.transport.family != AF_INET6)) { pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n", peer_srx.transport.family, local->srx.transport.family); return just_discard; /* Wrong address type. */ } if (rxrpc_to_client(sp)) { rcu_read_lock(); conn = rxrpc_find_client_connection_rcu(local, &peer_srx, skb); conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input); rcu_read_unlock(); if (!conn) return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_conn); ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb); rxrpc_put_connection(conn, rxrpc_conn_put_call_input); return ret; } /* We need to look up service connections by the full protocol * parameter set. We look up the peer first as an intermediate step * and then the connection from the peer's tree. */ rcu_read_lock(); peer = rxrpc_lookup_peer_rcu(local, &peer_srx); if (!peer) { rcu_read_unlock(); return rxrpc_new_incoming_call(local, NULL, NULL, &peer_srx, skb); } conn = rxrpc_find_service_conn_rcu(peer, skb); conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input); if (conn) { rcu_read_unlock(); ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb); rxrpc_put_connection(conn, rxrpc_conn_put_call_input); return ret; } peer = rxrpc_get_peer_maybe(peer, rxrpc_peer_get_input); rcu_read_unlock(); ret = rxrpc_new_incoming_call(local, peer, NULL, &peer_srx, skb); rxrpc_put_peer(peer, rxrpc_peer_put_input); return ret; } /* * Deal with a packet that's associated with an extant connection. */ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, struct sockaddr_rxrpc *peer_srx, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_channel *chan; struct rxrpc_call *call = NULL; unsigned int channel; if (sp->hdr.securityIndex != conn->security_ix) return rxrpc_direct_abort(skb, rxrpc_eproto_wrong_security, RXKADINCONSISTENCY, -EBADMSG); if (sp->hdr.serviceId != conn->service_id) { int old_id; if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags)) return rxrpc_protocol_error(skb, rxrpc_eproto_reupgrade); old_id = cmpxchg(&conn->service_id, conn->orig_service_id, sp->hdr.serviceId); if (old_id != conn->orig_service_id && old_id != sp->hdr.serviceId) return rxrpc_protocol_error(skb, rxrpc_eproto_bad_upgrade); } if (after(sp->hdr.serial, conn->hi_serial)) conn->hi_serial = sp->hdr.serial; /* It's a connection-level packet if the call number is 0. */ if (sp->hdr.callNumber == 0) return rxrpc_input_conn_packet(conn, skb); /* Deal with path MTU discovery probing. */ if (sp->hdr.type == RXRPC_PACKET_TYPE_ACK && conn->pmtud_probe && after_eq(sp->ack.acked_serial, conn->pmtud_probe)) rxrpc_input_probe_for_pmtud(conn, sp->ack.acked_serial, false); /* Call-bound packets are routed by connection channel. */ channel = sp->hdr.cid & RXRPC_CHANNELMASK; chan = &conn->channels[channel]; /* Ignore really old calls */ if (sp->hdr.callNumber < chan->last_call) return just_discard; if (sp->hdr.callNumber == chan->last_call) { if (chan->call || sp->hdr.type == RXRPC_PACKET_TYPE_ABORT) return just_discard; /* For the previous service call, if completed successfully, we * discard all further packets. */ if (rxrpc_conn_is_service(conn) && chan->last_type == RXRPC_PACKET_TYPE_ACK) return just_discard; /* But otherwise we need to retransmit the final packet from * data cached in the connection record. */ if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) trace_rxrpc_rx_data(chan->call_debug_id, sp->hdr.seq, sp->hdr.serial, sp->hdr.flags); rxrpc_conn_retransmit_call(conn, skb, channel); return just_discard; } call = rxrpc_try_get_call(chan->call, rxrpc_call_get_input); if (sp->hdr.callNumber > chan->call_id) { if (rxrpc_to_client(sp)) { rxrpc_put_call(call, rxrpc_call_put_input); return rxrpc_protocol_error(skb, rxrpc_eproto_unexpected_implicit_end); } if (call) { rxrpc_implicit_end_call(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); call = NULL; } } if (!call) { if (rxrpc_to_client(sp)) return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_call); return rxrpc_new_incoming_call(conn->local, conn->peer, conn, peer_srx, skb); } rxrpc_queue_rx_call_packet(call, skb); rxrpc_put_call(call, rxrpc_call_put_input); return true; } /* * I/O and event handling thread. */ int rxrpc_io_thread(void *data) { struct rxrpc_connection *conn; struct sk_buff_head rx_queue; struct rxrpc_local *local = data; struct rxrpc_call *call; struct sk_buff *skb; #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY ktime_t now; #endif bool should_stop; LIST_HEAD(conn_attend_q); LIST_HEAD(call_attend_q); complete(&local->io_thread_ready); skb_queue_head_init(&rx_queue); set_user_nice(current, MIN_NICE); for (;;) { rxrpc_inc_stat(local->rxnet, stat_io_loop); /* Inject a delay into packets if requested. */ #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY now = ktime_get_real(); while ((skb = skb_peek(&local->rx_delay_queue))) { if (ktime_before(now, skb->tstamp)) break; skb = skb_dequeue(&local->rx_delay_queue); skb_queue_tail(&local->rx_queue, skb); } #endif if (!skb_queue_empty(&local->rx_queue)) { spin_lock_irq(&local->rx_queue.lock); skb_queue_splice_tail_init(&local->rx_queue, &rx_queue); spin_unlock_irq(&local->rx_queue.lock); trace_rxrpc_iothread_rx(local, skb_queue_len(&rx_queue)); } /* Distribute packets and errors. */ while ((skb = __skb_dequeue(&rx_queue))) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); switch (skb->mark) { case RXRPC_SKB_MARK_PACKET: skb->priority = 0; if (!rxrpc_input_packet(local, &skb)) rxrpc_reject_packet(local, skb); trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_free_skb(skb, rxrpc_skb_put_input); break; case RXRPC_SKB_MARK_ERROR: rxrpc_input_error(local, skb); rxrpc_free_skb(skb, rxrpc_skb_put_error_report); break; case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: rxrpc_input_conn_event(sp->poke_conn, skb); rxrpc_put_connection(sp->poke_conn, rxrpc_conn_put_poke); rxrpc_free_skb(skb, rxrpc_skb_put_conn_secured); break; default: WARN_ON_ONCE(1); rxrpc_free_skb(skb, rxrpc_skb_put_unknown); break; } } /* Deal with connections that want immediate attention. */ if (!list_empty_careful(&local->conn_attend_q)) { spin_lock_irq(&local->lock); list_splice_tail_init(&local->conn_attend_q, &conn_attend_q); spin_unlock_irq(&local->lock); } while ((conn = list_first_entry_or_null(&conn_attend_q, struct rxrpc_connection, attend_link))) { spin_lock_irq(&local->lock); list_del_init(&conn->attend_link); spin_unlock_irq(&local->lock); rxrpc_input_conn_event(conn, NULL); rxrpc_put_connection(conn, rxrpc_conn_put_poke); } if (test_and_clear_bit(RXRPC_CLIENT_CONN_REAP_TIMER, &local->client_conn_flags)) rxrpc_discard_expired_client_conns(local); /* Deal with calls that want immediate attention. */ spin_lock_irq(&local->lock); list_splice_tail_init(&local->call_attend_q, &call_attend_q); spin_unlock_irq(&local->lock); while ((call = list_first_entry_or_null(&call_attend_q, struct rxrpc_call, attend_link))) { spin_lock_irq(&local->lock); list_del_init(&call->attend_link); spin_unlock_irq(&local->lock); trace_rxrpc_call_poked(call); rxrpc_input_call_event(call); rxrpc_put_call(call, rxrpc_call_put_poke); } if (!list_empty(&local->new_client_calls)) rxrpc_connect_client_calls(local); set_current_state(TASK_INTERRUPTIBLE); should_stop = kthread_should_stop(); if (!skb_queue_empty(&local->rx_queue) || !list_empty(&local->call_attend_q) || !list_empty(&local->conn_attend_q) || !list_empty(&local->new_client_calls) || test_bit(RXRPC_CLIENT_CONN_REAP_TIMER, &local->client_conn_flags)) { __set_current_state(TASK_RUNNING); continue; } if (should_stop) break; #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY skb = skb_peek(&local->rx_delay_queue); if (skb) { unsigned long timeout; ktime_t tstamp = skb->tstamp; ktime_t now = ktime_get_real(); s64 delay_ns = ktime_to_ns(ktime_sub(tstamp, now)); if (delay_ns <= 0) { __set_current_state(TASK_RUNNING); continue; } timeout = nsecs_to_jiffies(delay_ns); timeout = umax(timeout, 1); schedule_timeout(timeout); __set_current_state(TASK_RUNNING); continue; } #endif schedule(); } __set_current_state(TASK_RUNNING); rxrpc_see_local(local, rxrpc_local_stop); rxrpc_destroy_local(local); WRITE_ONCE(local->io_thread, NULL); rxrpc_see_local(local, rxrpc_local_stopped); return 0; }
2 1 4 3 2 2 1 2 1 1 2 106 9 28 7 1 7 2 26 8 4 4 19 20 4 1 4 3 10 113 112 1 13 20 15 1 2 12 1 8 1 1 6 5 25 1 1 21 4 3 17 15 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/syscalls.h> #include <linux/time_namespace.h> #include "futex.h" /* * Support for robust futexes: the kernel cleans up held futexes at * thread exit time. * * Implementation: user-space maintains a per-thread list of locks it * is holding. Upon do_exit(), the kernel carefully walks this list, * and marks all locks that are owned by this thread with the * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is * always manipulated with the lock held, so the list is private and * per-thread. Userspace also maintains a per-thread 'list_op_pending' * field, to allow the kernel to clean up if the thread dies after * acquiring the lock, but just before it could have added itself to * the list. There can only be one such pending lock. */ /** * sys_set_robust_list() - Set the robust-futex list head of a task * @head: pointer to the list-head * @len: length of the list-head, as userspace expects */ SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, size_t, len) { /* * The kernel knows only one size for now: */ if (unlikely(len != sizeof(*head))) return -EINVAL; current->robust_list = head; return 0; } /** * sys_get_robust_list() - Get the robust-futex list head of a task * @pid: pid of the process [zero for current task] * @head_ptr: pointer to a list-head pointer, the kernel fills it in * @len_ptr: pointer to a length field, the kernel fills in the header size */ SYSCALL_DEFINE3(get_robust_list, int, pid, struct robust_list_head __user * __user *, head_ptr, size_t __user *, len_ptr) { struct robust_list_head __user *head; unsigned long ret; struct task_struct *p; rcu_read_lock(); ret = -ESRCH; if (!pid) p = current; else { p = find_task_by_vpid(pid); if (!p) goto err_unlock; } ret = -EPERM; if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) goto err_unlock; head = p->robust_list; rcu_read_unlock(); if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(head, head_ptr); err_unlock: rcu_read_unlock(); return ret; } long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3) { unsigned int flags = futex_to_flags(op); int cmd = op & FUTEX_CMD_MASK; if (flags & FLAGS_CLOCKRT) { if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI && cmd != FUTEX_LOCK_PI2) return -ENOSYS; } switch (cmd) { case FUTEX_WAIT: val3 = FUTEX_BITSET_MATCH_ANY; fallthrough; case FUTEX_WAIT_BITSET: return futex_wait(uaddr, flags, val, timeout, val3); case FUTEX_WAKE: val3 = FUTEX_BITSET_MATCH_ANY; fallthrough; case FUTEX_WAKE_BITSET: return futex_wake(uaddr, flags, val, val3); case FUTEX_REQUEUE: return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, NULL, 0); case FUTEX_CMP_REQUEUE: return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 0); case FUTEX_WAKE_OP: return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3); case FUTEX_LOCK_PI: flags |= FLAGS_CLOCKRT; fallthrough; case FUTEX_LOCK_PI2: return futex_lock_pi(uaddr, flags, timeout, 0); case FUTEX_UNLOCK_PI: return futex_unlock_pi(uaddr, flags); case FUTEX_TRYLOCK_PI: return futex_lock_pi(uaddr, flags, NULL, 1); case FUTEX_WAIT_REQUEUE_PI: val3 = FUTEX_BITSET_MATCH_ANY; return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3, uaddr2); case FUTEX_CMP_REQUEUE_PI: return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 1); } return -ENOSYS; } static __always_inline bool futex_cmd_has_timeout(u32 cmd) { switch (cmd) { case FUTEX_WAIT: case FUTEX_LOCK_PI: case FUTEX_LOCK_PI2: case FUTEX_WAIT_BITSET: case FUTEX_WAIT_REQUEUE_PI: return true; } return false; } static __always_inline int futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t) { if (!timespec64_valid(ts)) return -EINVAL; *t = timespec64_to_ktime(*ts); if (cmd == FUTEX_WAIT) *t = ktime_add_safe(ktime_get(), *t); else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME)) *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t); return 0; } SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, const struct __kernel_timespec __user *, utime, u32 __user *, uaddr2, u32, val3) { int ret, cmd = op & FUTEX_CMD_MASK; ktime_t t, *tp = NULL; struct timespec64 ts; if (utime && futex_cmd_has_timeout(cmd)) { if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) return -EFAULT; if (get_timespec64(&ts, utime)) return -EFAULT; ret = futex_init_timeout(cmd, op, &ts, &t); if (ret) return ret; tp = &t; } return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); } /** * futex_parse_waitv - Parse a waitv array from userspace * @futexv: Kernel side list of waiters to be filled * @uwaitv: Userspace list to be parsed * @nr_futexes: Length of futexv * @wake: Wake to call when futex is woken * @wake_data: Data for the wake handler * * Return: Error code on failure, 0 on success */ int futex_parse_waitv(struct futex_vector *futexv, struct futex_waitv __user *uwaitv, unsigned int nr_futexes, futex_wake_fn *wake, void *wake_data) { struct futex_waitv aux; unsigned int i; for (i = 0; i < nr_futexes; i++) { unsigned int flags; if (copy_from_user(&aux, &uwaitv[i], sizeof(aux))) return -EFAULT; if ((aux.flags & ~FUTEX2_VALID_MASK) || aux.__reserved) return -EINVAL; flags = futex2_to_flags(aux.flags); if (!futex_flags_valid(flags)) return -EINVAL; if (!futex_validate_input(flags, aux.val)) return -EINVAL; futexv[i].w.flags = flags; futexv[i].w.val = aux.val; futexv[i].w.uaddr = aux.uaddr; futexv[i].q = futex_q_init; futexv[i].q.wake = wake; futexv[i].q.wake_data = wake_data; } return 0; } static int futex2_setup_timeout(struct __kernel_timespec __user *timeout, clockid_t clockid, struct hrtimer_sleeper *to) { int flag_clkid = 0, flag_init = 0; struct timespec64 ts; ktime_t time; int ret; if (!timeout) return 0; if (clockid == CLOCK_REALTIME) { flag_clkid = FLAGS_CLOCKRT; flag_init = FUTEX_CLOCK_REALTIME; } if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) return -EINVAL; if (get_timespec64(&ts, timeout)) return -EFAULT; /* * Since there's no opcode for futex_waitv, use * FUTEX_WAIT_BITSET that uses absolute timeout as well */ ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time); if (ret) return ret; futex_setup_timer(&time, to, flag_clkid, 0); return 0; } static inline void futex2_destroy_timeout(struct hrtimer_sleeper *to) { hrtimer_cancel(&to->timer); destroy_hrtimer_on_stack(&to->timer); } /** * sys_futex_waitv - Wait on a list of futexes * @waiters: List of futexes to wait on * @nr_futexes: Length of futexv * @flags: Flag for timeout (monotonic/realtime) * @timeout: Optional absolute timeout. * @clockid: Clock to be used for the timeout, realtime or monotonic. * * Given an array of `struct futex_waitv`, wait on each uaddr. The thread wakes * if a futex_wake() is performed at any uaddr. The syscall returns immediately * if any waiter has *uaddr != val. *timeout is an optional timeout value for * the operation. Each waiter has individual flags. The `flags` argument for * the syscall should be used solely for specifying the timeout as realtime, if * needed. Flags for private futexes, sizes, etc. should be used on the * individual flags of each waiter. * * Returns the array index of one of the woken futexes. No further information * is provided: any number of other futexes may also have been woken by the * same event, and if more than one futex was woken, the retrned index may * refer to any one of them. (It is not necessaryily the futex with the * smallest index, nor the one most recently woken, nor...) */ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, unsigned int, nr_futexes, unsigned int, flags, struct __kernel_timespec __user *, timeout, clockid_t, clockid) { struct hrtimer_sleeper to; struct futex_vector *futexv; int ret; /* This syscall supports no flags for now */ if (flags) return -EINVAL; if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters) return -EINVAL; if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) return ret; futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL); if (!futexv) { ret = -ENOMEM; goto destroy_timer; } ret = futex_parse_waitv(futexv, waiters, nr_futexes, futex_wake_mark, NULL); if (!ret) ret = futex_wait_multiple(futexv, nr_futexes, timeout ? &to : NULL); kfree(futexv); destroy_timer: if (timeout) futex2_destroy_timeout(&to); return ret; } /* * sys_futex_wake - Wake a number of futexes * @uaddr: Address of the futex(es) to wake * @mask: bitmask * @nr: Number of the futexes to wake * @flags: FUTEX2 flags * * Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the * futex2 family of calls. */ SYSCALL_DEFINE4(futex_wake, void __user *, uaddr, unsigned long, mask, int, nr, unsigned int, flags) { if (flags & ~FUTEX2_VALID_MASK) return -EINVAL; flags = futex2_to_flags(flags); if (!futex_flags_valid(flags)) return -EINVAL; if (!futex_validate_input(flags, mask)) return -EINVAL; return futex_wake(uaddr, FLAGS_STRICT | flags, nr, mask); } /* * sys_futex_wait - Wait on a futex * @uaddr: Address of the futex to wait on * @val: Value of @uaddr * @mask: bitmask * @flags: FUTEX2 flags * @timeout: Optional absolute timeout * @clockid: Clock to be used for the timeout, realtime or monotonic * * Identical to the traditional FUTEX_WAIT_BITSET op, except it is part of the * futex2 familiy of calls. */ SYSCALL_DEFINE6(futex_wait, void __user *, uaddr, unsigned long, val, unsigned long, mask, unsigned int, flags, struct __kernel_timespec __user *, timeout, clockid_t, clockid) { struct hrtimer_sleeper to; int ret; if (flags & ~FUTEX2_VALID_MASK) return -EINVAL; flags = futex2_to_flags(flags); if (!futex_flags_valid(flags)) return -EINVAL; if (!futex_validate_input(flags, val) || !futex_validate_input(flags, mask)) return -EINVAL; if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) return ret; ret = __futex_wait(uaddr, flags, val, timeout ? &to : NULL, mask); if (timeout) futex2_destroy_timeout(&to); return ret; } /* * sys_futex_requeue - Requeue a waiter from one futex to another * @waiters: array describing the source and destination futex * @flags: unused * @nr_wake: number of futexes to wake * @nr_requeue: number of futexes to requeue * * Identical to the traditional FUTEX_CMP_REQUEUE op, except it is part of the * futex2 family of calls. */ SYSCALL_DEFINE4(futex_requeue, struct futex_waitv __user *, waiters, unsigned int, flags, int, nr_wake, int, nr_requeue) { struct futex_vector futexes[2]; u32 cmpval; int ret; if (flags) return -EINVAL; if (!waiters) return -EINVAL; ret = futex_parse_waitv(futexes, waiters, 2, futex_wake_mark, NULL); if (ret) return ret; cmpval = futexes[0].w.val; return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags, u64_to_user_ptr(futexes[1].w.uaddr), futexes[1].w.flags, nr_wake, nr_requeue, &cmpval, 0); } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, compat_size_t, len) { if (unlikely(len != sizeof(*head))) return -EINVAL; current->compat_robust_list = head; return 0; } COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid, compat_uptr_t __user *, head_ptr, compat_size_t __user *, len_ptr) { struct compat_robust_list_head __user *head; unsigned long ret; struct task_struct *p; rcu_read_lock(); ret = -ESRCH; if (!pid) p = current; else { p = find_task_by_vpid(pid); if (!p) goto err_unlock; } ret = -EPERM; if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) goto err_unlock; head = p->compat_robust_list; rcu_read_unlock(); if (put_user(sizeof(*head), len_ptr)) return -EFAULT; return put_user(ptr_to_compat(head), head_ptr); err_unlock: rcu_read_unlock(); return ret; } #endif /* CONFIG_COMPAT */ #ifdef CONFIG_COMPAT_32BIT_TIME SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val, const struct old_timespec32 __user *, utime, u32 __user *, uaddr2, u32, val3) { int ret, cmd = op & FUTEX_CMD_MASK; ktime_t t, *tp = NULL; struct timespec64 ts; if (utime && futex_cmd_has_timeout(cmd)) { if (get_old_timespec32(&ts, utime)) return -EFAULT; ret = futex_init_timeout(cmd, op, &ts, &t); if (ret) return ret; tp = &t; } return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3); } #endif /* CONFIG_COMPAT_32BIT_TIME */
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 /* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2007 Oracle. All rights reserved. */ #ifndef BTRFS_DISK_IO_H #define BTRFS_DISK_IO_H #include <linux/sizes.h> #include <linux/compiler_types.h> #include "ctree.h" #include "fs.h" struct block_device; struct super_block; struct extent_buffer; struct btrfs_device; struct btrfs_fs_devices; struct btrfs_fs_info; struct btrfs_super_block; struct btrfs_trans_handle; struct btrfs_tree_parent_check; struct btrfs_transaction; #define BTRFS_SUPER_MIRROR_MAX 3 #define BTRFS_SUPER_MIRROR_SHIFT 12 /* * Fixed blocksize for all devices, applies to specific ways of reading * metadata like superblock. Must meet the set_blocksize requirements. * * Do not change. */ #define BTRFS_BDEV_BLOCKSIZE (4096) static inline u64 btrfs_sb_offset(int mirror) { u64 start = SZ_16K; if (mirror) return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror); return BTRFS_SUPER_INFO_OFFSET; } void btrfs_check_leaked_roots(const struct btrfs_fs_info *fs_info); void btrfs_init_fs_info(struct btrfs_fs_info *fs_info); struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, struct btrfs_tree_parent_check *check); struct extent_buffer *btrfs_find_create_tree_block( struct btrfs_fs_info *fs_info, u64 bytenr, u64 owner_root, int level); int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info); int btrfs_check_super_csum(struct btrfs_fs_info *fs_info, const struct btrfs_super_block *disk_sb); int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices); void __cold close_ctree(struct btrfs_fs_info *fs_info); int btrfs_validate_super(const struct btrfs_fs_info *fs_info, const struct btrfs_super_block *sb, int mirror_num); int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount); int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors); int btrfs_commit_super(struct btrfs_fs_info *fs_info); struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, const struct btrfs_key *key); int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, bool check_ref); struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info, u64 objectid, dev_t *anon_dev); struct btrfs_root *btrfs_get_fs_root_commit_root(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 objectid); int btrfs_global_root_insert(struct btrfs_root *root); void btrfs_global_root_delete(struct btrfs_root *root); struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info, struct btrfs_key *key); struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr); struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr); void btrfs_free_fs_info(struct btrfs_fs_info *fs_info); void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info); void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info); void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); int btrfs_validate_extent_buffer(struct extent_buffer *eb, const struct btrfs_tree_parent_check *check); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info); #endif /* * This function is used to grab the root, and avoid it is freed when we * access it. But it doesn't ensure that the tree is not dropped. */ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root) { if (!root) return NULL; if (refcount_inc_not_zero(&root->refs)) return root; return NULL; } void btrfs_put_root(struct btrfs_root *root); void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans, struct extent_buffer *buf); int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, bool atomic); int btrfs_read_extent_buffer(struct extent_buffer *buf, const struct btrfs_tree_parent_check *check); int btree_csum_one_bio(struct btrfs_bio *bbio); int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_add_log_tree(struct btrfs_trans_handle *trans, struct btrfs_root *root); void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *trans, struct btrfs_fs_info *fs_info); void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans); struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, u64 objectid); int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags); int btrfs_get_free_objectid(struct btrfs_root *root, u64 *objectid); int btrfs_init_root_free_objectid(struct btrfs_root *root); #endif
5 5 9 9 3 3 3 3 1 3 3 3 3 7 18 20 2 18 124 122 254 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 // SPDX-License-Identifier: GPL-2.0-only #include <linux/export.h> #include <linux/sched/signal.h> #include <linux/sched/task.h> #include <linux/fs.h> #include <linux/path.h> #include <linux/slab.h> #include <linux/fs_struct.h> #include "internal.h" /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. * It can block. */ void set_fs_root(struct fs_struct *fs, const struct path *path) { struct path old_root; path_get(path); write_seqlock(&fs->seq); old_root = fs->root; fs->root = *path; write_sequnlock(&fs->seq); if (old_root.dentry) path_put(&old_root); } /* * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. * It can block. */ void set_fs_pwd(struct fs_struct *fs, const struct path *path) { struct path old_pwd; path_get(path); write_seqlock(&fs->seq); old_pwd = fs->pwd; fs->pwd = *path; write_sequnlock(&fs->seq); if (old_pwd.dentry) path_put(&old_pwd); } static inline int replace_path(struct path *p, const struct path *old, const struct path *new) { if (likely(p->dentry != old->dentry || p->mnt != old->mnt)) return 0; *p = *new; return 1; } void chroot_fs_refs(const struct path *old_root, const struct path *new_root) { struct task_struct *g, *p; struct fs_struct *fs; int count = 0; read_lock(&tasklist_lock); for_each_process_thread(g, p) { task_lock(p); fs = p->fs; if (fs) { int hits = 0; write_seqlock(&fs->seq); hits += replace_path(&fs->root, old_root, new_root); hits += replace_path(&fs->pwd, old_root, new_root); while (hits--) { count++; path_get(new_root); } write_sequnlock(&fs->seq); } task_unlock(p); } read_unlock(&tasklist_lock); while (count--) path_put(old_root); } void free_fs_struct(struct fs_struct *fs) { path_put(&fs->root); path_put(&fs->pwd); kmem_cache_free(fs_cachep, fs); } void exit_fs(struct task_struct *tsk) { struct fs_struct *fs = tsk->fs; if (fs) { int kill; task_lock(tsk); read_seqlock_excl(&fs->seq); tsk->fs = NULL; kill = !--fs->users; read_sequnlock_excl(&fs->seq); task_unlock(tsk); if (kill) free_fs_struct(fs); } } struct fs_struct *copy_fs_struct(struct fs_struct *old) { struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); /* We don't need to lock fs - think why ;-) */ if (fs) { fs->users = 1; fs->in_exec = 0; seqlock_init(&fs->seq); fs->umask = old->umask; read_seqlock_excl(&old->seq); fs->root = old->root; path_get(&fs->root); fs->pwd = old->pwd; path_get(&fs->pwd); read_sequnlock_excl(&old->seq); } return fs; } int unshare_fs_struct(void) { struct fs_struct *fs = current->fs; struct fs_struct *new_fs = copy_fs_struct(fs); int kill; if (!new_fs) return -ENOMEM; task_lock(current); read_seqlock_excl(&fs->seq); kill = !--fs->users; current->fs = new_fs; read_sequnlock_excl(&fs->seq); task_unlock(current); if (kill) free_fs_struct(fs); return 0; } EXPORT_SYMBOL_GPL(unshare_fs_struct); int current_umask(void) { return current->fs->umask; } EXPORT_SYMBOL(current_umask); /* to be mentioned only in INIT_TASK */ struct fs_struct init_fs = { .users = 1, .seq = __SEQLOCK_UNLOCKED(init_fs.seq), .umask = 0022, };
3 1 3 2 1 3 3 1 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 // SPDX-License-Identifier: GPL-2.0+ /* * comedi/drivers/ni_usb6501.c * Comedi driver for National Instruments USB-6501 * * COMEDI - Linux Control and Measurement Device Interface * Copyright (C) 2014 Luca Ellero <luca.ellero@brickedbrain.com> */ /* * Driver: ni_usb6501 * Description: National Instruments USB-6501 module * Devices: [National Instruments] USB-6501 (ni_usb6501) * Author: Luca Ellero <luca.ellero@brickedbrain.com> * Updated: 8 Sep 2014 * Status: works * * * Configuration Options: * none */ /* * NI-6501 - USB PROTOCOL DESCRIPTION * * Every command is composed by two USB packets: * - request (out) * - response (in) * * Every packet is at least 12 bytes long, here is the meaning of * every field (all values are hex): * * byte 0 is always 00 * byte 1 is always 01 * byte 2 is always 00 * byte 3 is the total packet length * * byte 4 is always 00 * byte 5 is the total packet length - 4 * byte 6 is always 01 * byte 7 is the command * * byte 8 is 02 (request) or 00 (response) * byte 9 is 00 (response) or 10 (port request) or 20 (counter request) * byte 10 is always 00 * byte 11 is 00 (request) or 02 (response) * * PORT PACKETS * * CMD: 0xE READ_PORT * REQ: 00 01 00 10 00 0C 01 0E 02 10 00 00 00 03 <PORT> 00 * RES: 00 01 00 10 00 0C 01 00 00 00 00 02 00 03 <BMAP> 00 * * CMD: 0xF WRITE_PORT * REQ: 00 01 00 14 00 10 01 0F 02 10 00 00 00 03 <PORT> 00 03 <BMAP> 00 00 * RES: 00 01 00 0C 00 08 01 00 00 00 00 02 * * CMD: 0x12 SET_PORT_DIR (0 = input, 1 = output) * REQ: 00 01 00 18 00 14 01 12 02 10 00 00 * 00 05 <PORT 0> <PORT 1> <PORT 2> 00 05 00 00 00 00 00 * RES: 00 01 00 0C 00 08 01 00 00 00 00 02 * * COUNTER PACKETS * * CMD 0x9: START_COUNTER * REQ: 00 01 00 0C 00 08 01 09 02 20 00 00 * RES: 00 01 00 0C 00 08 01 00 00 00 00 02 * * CMD 0xC: STOP_COUNTER * REQ: 00 01 00 0C 00 08 01 0C 02 20 00 00 * RES: 00 01 00 0C 00 08 01 00 00 00 00 02 * * CMD 0xE: READ_COUNTER * REQ: 00 01 00 0C 00 08 01 0E 02 20 00 00 * RES: 00 01 00 10 00 0C 01 00 00 00 00 02 <u32 counter value, Big Endian> * * CMD 0xF: WRITE_COUNTER * REQ: 00 01 00 10 00 0C 01 0F 02 20 00 00 <u32 counter value, Big Endian> * RES: 00 01 00 0C 00 08 01 00 00 00 00 02 * * * Please visit https://www.brickedbrain.com if you need * additional information or have any questions. * */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/comedi/comedi_usb.h> #define NI6501_TIMEOUT 1000 /* Port request packets */ static const u8 READ_PORT_REQUEST[] = {0x00, 0x01, 0x00, 0x10, 0x00, 0x0C, 0x01, 0x0E, 0x02, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00}; static const u8 WRITE_PORT_REQUEST[] = {0x00, 0x01, 0x00, 0x14, 0x00, 0x10, 0x01, 0x0F, 0x02, 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00}; static const u8 SET_PORT_DIR_REQUEST[] = {0x00, 0x01, 0x00, 0x18, 0x00, 0x14, 0x01, 0x12, 0x02, 0x10, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00}; /* Counter request packets */ static const u8 START_COUNTER_REQUEST[] = {0x00, 0x01, 0x00, 0x0C, 0x00, 0x08, 0x01, 0x09, 0x02, 0x20, 0x00, 0x00}; static const u8 STOP_COUNTER_REQUEST[] = {0x00, 0x01, 0x00, 0x0C, 0x00, 0x08, 0x01, 0x0C, 0x02, 0x20, 0x00, 0x00}; static const u8 READ_COUNTER_REQUEST[] = {0x00, 0x01, 0x00, 0x0C, 0x00, 0x08, 0x01, 0x0E, 0x02, 0x20, 0x00, 0x00}; static const u8 WRITE_COUNTER_REQUEST[] = {0x00, 0x01, 0x00, 0x10, 0x00, 0x0C, 0x01, 0x0F, 0x02, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; /* Response packets */ static const u8 GENERIC_RESPONSE[] = {0x00, 0x01, 0x00, 0x0C, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02}; static const u8 READ_PORT_RESPONSE[] = {0x00, 0x01, 0x00, 0x10, 0x00, 0x0C, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x03, 0x00, 0x00}; static const u8 READ_COUNTER_RESPONSE[] = {0x00, 0x01, 0x00, 0x10, 0x00, 0x0C, 0x01, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00}; /* Largest supported packets */ static const size_t TX_MAX_SIZE = sizeof(SET_PORT_DIR_REQUEST); static const size_t RX_MAX_SIZE = sizeof(READ_PORT_RESPONSE); enum commands { READ_PORT, WRITE_PORT, SET_PORT_DIR, START_COUNTER, STOP_COUNTER, READ_COUNTER, WRITE_COUNTER }; struct ni6501_private { struct usb_endpoint_descriptor *ep_rx; struct usb_endpoint_descriptor *ep_tx; struct mutex mut; u8 *usb_rx_buf; u8 *usb_tx_buf; }; static int ni6501_port_command(struct comedi_device *dev, int command, unsigned int val, u8 *bitmap) { struct usb_device *usb = comedi_to_usb_dev(dev); struct ni6501_private *devpriv = dev->private; int request_size, response_size; u8 *tx = devpriv->usb_tx_buf; int ret; if (command != SET_PORT_DIR && !bitmap) return -EINVAL; mutex_lock(&devpriv->mut); switch (command) { case READ_PORT: request_size = sizeof(READ_PORT_REQUEST); response_size = sizeof(READ_PORT_RESPONSE); memcpy(tx, READ_PORT_REQUEST, request_size); tx[14] = val & 0xff; break; case WRITE_PORT: request_size = sizeof(WRITE_PORT_REQUEST); response_size = sizeof(GENERIC_RESPONSE); memcpy(tx, WRITE_PORT_REQUEST, request_size); tx[14] = val & 0xff; tx[17] = *bitmap; break; case SET_PORT_DIR: request_size = sizeof(SET_PORT_DIR_REQUEST); response_size = sizeof(GENERIC_RESPONSE); memcpy(tx, SET_PORT_DIR_REQUEST, request_size); tx[14] = val & 0xff; tx[15] = (val >> 8) & 0xff; tx[16] = (val >> 16) & 0xff; break; default: ret = -EINVAL; goto end; } ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->ep_tx->bEndpointAddress), devpriv->usb_tx_buf, request_size, NULL, NI6501_TIMEOUT); if (ret) goto end; ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->ep_rx->bEndpointAddress), devpriv->usb_rx_buf, response_size, NULL, NI6501_TIMEOUT); if (ret) goto end; /* Check if results are valid */ if (command == READ_PORT) { *bitmap = devpriv->usb_rx_buf[14]; /* mask bitmap for comparing */ devpriv->usb_rx_buf[14] = 0x00; if (memcmp(devpriv->usb_rx_buf, READ_PORT_RESPONSE, sizeof(READ_PORT_RESPONSE))) { ret = -EINVAL; } } else if (memcmp(devpriv->usb_rx_buf, GENERIC_RESPONSE, sizeof(GENERIC_RESPONSE))) { ret = -EINVAL; } end: mutex_unlock(&devpriv->mut); return ret; } static int ni6501_counter_command(struct comedi_device *dev, int command, u32 *val) { struct usb_device *usb = comedi_to_usb_dev(dev); struct ni6501_private *devpriv = dev->private; int request_size, response_size; u8 *tx = devpriv->usb_tx_buf; int ret; if ((command == READ_COUNTER || command == WRITE_COUNTER) && !val) return -EINVAL; mutex_lock(&devpriv->mut); switch (command) { case START_COUNTER: request_size = sizeof(START_COUNTER_REQUEST); response_size = sizeof(GENERIC_RESPONSE); memcpy(tx, START_COUNTER_REQUEST, request_size); break; case STOP_COUNTER: request_size = sizeof(STOP_COUNTER_REQUEST); response_size = sizeof(GENERIC_RESPONSE); memcpy(tx, STOP_COUNTER_REQUEST, request_size); break; case READ_COUNTER: request_size = sizeof(READ_COUNTER_REQUEST); response_size = sizeof(READ_COUNTER_RESPONSE); memcpy(tx, READ_COUNTER_REQUEST, request_size); break; case WRITE_COUNTER: request_size = sizeof(WRITE_COUNTER_REQUEST); response_size = sizeof(GENERIC_RESPONSE); memcpy(tx, WRITE_COUNTER_REQUEST, request_size); /* Setup tx packet: bytes 12,13,14,15 hold the */ /* u32 counter value (Big Endian) */ *((__be32 *)&tx[12]) = cpu_to_be32(*val); break; default: ret = -EINVAL; goto end; } ret = usb_bulk_msg(usb, usb_sndbulkpipe(usb, devpriv->ep_tx->bEndpointAddress), devpriv->usb_tx_buf, request_size, NULL, NI6501_TIMEOUT); if (ret) goto end; ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, devpriv->ep_rx->bEndpointAddress), devpriv->usb_rx_buf, response_size, NULL, NI6501_TIMEOUT); if (ret) goto end; /* Check if results are valid */ if (command == READ_COUNTER) { int i; /* Read counter value: bytes 12,13,14,15 of rx packet */ /* hold the u32 counter value (Big Endian) */ *val = be32_to_cpu(*((__be32 *)&devpriv->usb_rx_buf[12])); /* mask counter value for comparing */ for (i = 12; i < sizeof(READ_COUNTER_RESPONSE); ++i) devpriv->usb_rx_buf[i] = 0x00; if (memcmp(devpriv->usb_rx_buf, READ_COUNTER_RESPONSE, sizeof(READ_COUNTER_RESPONSE))) { ret = -EINVAL; } } else if (memcmp(devpriv->usb_rx_buf, GENERIC_RESPONSE, sizeof(GENERIC_RESPONSE))) { ret = -EINVAL; } end: mutex_unlock(&devpriv->mut); return ret; } static int ni6501_dio_insn_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int ret; ret = comedi_dio_insn_config(dev, s, insn, data, 0); if (ret) return ret; ret = ni6501_port_command(dev, SET_PORT_DIR, s->io_bits, NULL); if (ret) return ret; return insn->n; } static int ni6501_dio_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int mask; int ret; u8 port; u8 bitmap; mask = comedi_dio_update_state(s, data); for (port = 0; port < 3; port++) { if (mask & (0xFF << port * 8)) { bitmap = (s->state >> port * 8) & 0xFF; ret = ni6501_port_command(dev, WRITE_PORT, port, &bitmap); if (ret) return ret; } } data[1] = 0; for (port = 0; port < 3; port++) { ret = ni6501_port_command(dev, READ_PORT, port, &bitmap); if (ret) return ret; data[1] |= bitmap << port * 8; } return insn->n; } static int ni6501_cnt_insn_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int ret; u32 val = 0; switch (data[0]) { case INSN_CONFIG_ARM: ret = ni6501_counter_command(dev, START_COUNTER, NULL); break; case INSN_CONFIG_DISARM: ret = ni6501_counter_command(dev, STOP_COUNTER, NULL); break; case INSN_CONFIG_RESET: ret = ni6501_counter_command(dev, STOP_COUNTER, NULL); if (ret) break; ret = ni6501_counter_command(dev, WRITE_COUNTER, &val); break; default: return -EINVAL; } return ret ? ret : insn->n; } static int ni6501_cnt_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int ret; u32 val; unsigned int i; for (i = 0; i < insn->n; i++) { ret = ni6501_counter_command(dev, READ_COUNTER, &val); if (ret) return ret; data[i] = val; } return insn->n; } static int ni6501_cnt_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int ret; if (insn->n) { u32 val = data[insn->n - 1]; ret = ni6501_counter_command(dev, WRITE_COUNTER, &val); if (ret) return ret; } return insn->n; } static int ni6501_alloc_usb_buffers(struct comedi_device *dev) { struct ni6501_private *devpriv = dev->private; size_t size; size = usb_endpoint_maxp(devpriv->ep_rx); devpriv->usb_rx_buf = kzalloc(size, GFP_KERNEL); if (!devpriv->usb_rx_buf) return -ENOMEM; size = usb_endpoint_maxp(devpriv->ep_tx); devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL); if (!devpriv->usb_tx_buf) return -ENOMEM; return 0; } static int ni6501_find_endpoints(struct comedi_device *dev) { struct usb_interface *intf = comedi_to_usb_interface(dev); struct ni6501_private *devpriv = dev->private; struct usb_host_interface *iface_desc = intf->cur_altsetting; struct usb_endpoint_descriptor *ep_desc; int i; if (iface_desc->desc.bNumEndpoints != 2) { dev_err(dev->class_dev, "Wrong number of endpoints\n"); return -ENODEV; } for (i = 0; i < iface_desc->desc.bNumEndpoints; i++) { ep_desc = &iface_desc->endpoint[i].desc; if (usb_endpoint_is_bulk_in(ep_desc)) { if (!devpriv->ep_rx) devpriv->ep_rx = ep_desc; continue; } if (usb_endpoint_is_bulk_out(ep_desc)) { if (!devpriv->ep_tx) devpriv->ep_tx = ep_desc; continue; } } if (!devpriv->ep_rx || !devpriv->ep_tx) return -ENODEV; if (usb_endpoint_maxp(devpriv->ep_rx) < RX_MAX_SIZE) return -ENODEV; if (usb_endpoint_maxp(devpriv->ep_tx) < TX_MAX_SIZE) return -ENODEV; return 0; } static int ni6501_auto_attach(struct comedi_device *dev, unsigned long context) { struct usb_interface *intf = comedi_to_usb_interface(dev); struct ni6501_private *devpriv; struct comedi_subdevice *s; int ret; devpriv = comedi_alloc_devpriv(dev, sizeof(*devpriv)); if (!devpriv) return -ENOMEM; mutex_init(&devpriv->mut); usb_set_intfdata(intf, devpriv); ret = ni6501_find_endpoints(dev); if (ret) return ret; ret = ni6501_alloc_usb_buffers(dev); if (ret) return ret; ret = comedi_alloc_subdevices(dev, 2); if (ret) return ret; /* Digital Input/Output subdevice */ s = &dev->subdevices[0]; s->type = COMEDI_SUBD_DIO; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; s->n_chan = 24; s->maxdata = 1; s->range_table = &range_digital; s->insn_bits = ni6501_dio_insn_bits; s->insn_config = ni6501_dio_insn_config; /* Counter subdevice */ s = &dev->subdevices[1]; s->type = COMEDI_SUBD_COUNTER; s->subdev_flags = SDF_READABLE | SDF_WRITABLE | SDF_LSAMPL; s->n_chan = 1; s->maxdata = 0xffffffff; s->insn_read = ni6501_cnt_insn_read; s->insn_write = ni6501_cnt_insn_write; s->insn_config = ni6501_cnt_insn_config; return 0; } static void ni6501_detach(struct comedi_device *dev) { struct usb_interface *intf = comedi_to_usb_interface(dev); struct ni6501_private *devpriv = dev->private; if (!devpriv) return; mutex_destroy(&devpriv->mut); usb_set_intfdata(intf, NULL); kfree(devpriv->usb_rx_buf); kfree(devpriv->usb_tx_buf); } static struct comedi_driver ni6501_driver = { .module = THIS_MODULE, .driver_name = "ni6501", .auto_attach = ni6501_auto_attach, .detach = ni6501_detach, }; static int ni6501_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { return comedi_usb_auto_config(intf, &ni6501_driver, id->driver_info); } static const struct usb_device_id ni6501_usb_table[] = { { USB_DEVICE(0x3923, 0x718a) }, { } }; MODULE_DEVICE_TABLE(usb, ni6501_usb_table); static struct usb_driver ni6501_usb_driver = { .name = "ni6501", .id_table = ni6501_usb_table, .probe = ni6501_usb_probe, .disconnect = comedi_usb_auto_unconfig, }; module_comedi_usb_driver(ni6501_driver, ni6501_usb_driver); MODULE_AUTHOR("Luca Ellero"); MODULE_DESCRIPTION("Comedi driver for National Instruments USB-6501"); MODULE_LICENSE("GPL");
200 201 201 200 201 200 201 67 67 67 67 75 67 75 74 200 200 200 201 200 200 59 59 201 197 200 199 200 201 201 201 200 200 200 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 // SPDX-License-Identifier: GPL-2.0-or-later /* * LED state routines for driver control interface * Copyright (c) 2021 by Jaroslav Kysela <perex@perex.cz> */ #include <linux/slab.h> #include <linux/module.h> #include <linux/leds.h> #include <sound/core.h> #include <sound/control.h> MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>"); MODULE_DESCRIPTION("ALSA control interface to LED trigger code."); MODULE_LICENSE("GPL"); #define MAX_LED (((SNDRV_CTL_ELEM_ACCESS_MIC_LED - SNDRV_CTL_ELEM_ACCESS_SPK_LED) \ >> SNDRV_CTL_ELEM_ACCESS_LED_SHIFT) + 1) #define to_led_card_dev(_dev) \ container_of(_dev, struct snd_ctl_led_card, dev) enum snd_ctl_led_mode { MODE_FOLLOW_MUTE = 0, MODE_FOLLOW_ROUTE, MODE_OFF, MODE_ON, }; struct snd_ctl_led_card { struct device dev; int number; struct snd_ctl_led *led; }; struct snd_ctl_led { struct device dev; struct list_head controls; const char *name; unsigned int group; enum led_audio trigger_type; enum snd_ctl_led_mode mode; struct snd_ctl_led_card *cards[SNDRV_CARDS]; }; struct snd_ctl_led_ctl { struct list_head list; struct snd_card *card; unsigned int access; struct snd_kcontrol *kctl; unsigned int index_offset; }; static DEFINE_MUTEX(snd_ctl_led_mutex); static bool snd_ctl_led_card_valid[SNDRV_CARDS]; static struct led_trigger *snd_ctl_ledtrig_audio[NUM_AUDIO_LEDS]; static struct snd_ctl_led snd_ctl_leds[MAX_LED] = { { .name = "speaker", .group = (SNDRV_CTL_ELEM_ACCESS_SPK_LED >> SNDRV_CTL_ELEM_ACCESS_LED_SHIFT) - 1, .trigger_type = LED_AUDIO_MUTE, .mode = MODE_FOLLOW_MUTE, }, { .name = "mic", .group = (SNDRV_CTL_ELEM_ACCESS_MIC_LED >> SNDRV_CTL_ELEM_ACCESS_LED_SHIFT) - 1, .trigger_type = LED_AUDIO_MICMUTE, .mode = MODE_FOLLOW_MUTE, }, }; static void snd_ctl_led_sysfs_add(struct snd_card *card); static void snd_ctl_led_sysfs_remove(struct snd_card *card); #define UPDATE_ROUTE(route, cb) \ do { \ int route2 = (cb); \ if (route2 >= 0) \ route = route < 0 ? route2 : (route | route2); \ } while (0) static inline unsigned int access_to_group(unsigned int access) { return ((access & SNDRV_CTL_ELEM_ACCESS_LED_MASK) >> SNDRV_CTL_ELEM_ACCESS_LED_SHIFT) - 1; } static inline unsigned int group_to_access(unsigned int group) { return (group + 1) << SNDRV_CTL_ELEM_ACCESS_LED_SHIFT; } static struct snd_ctl_led *snd_ctl_led_get_by_access(unsigned int access) { unsigned int group = access_to_group(access); if (group >= MAX_LED) return NULL; return &snd_ctl_leds[group]; } /* * A note for callers: * The two static variables info and value are protected using snd_ctl_led_mutex. */ static int snd_ctl_led_get(struct snd_ctl_led_ctl *lctl) { static struct snd_ctl_elem_info info; static struct snd_ctl_elem_value value; struct snd_kcontrol *kctl = lctl->kctl; unsigned int i; int result; memset(&info, 0, sizeof(info)); info.id = kctl->id; info.id.index += lctl->index_offset; info.id.numid += lctl->index_offset; result = kctl->info(kctl, &info); if (result < 0) return -1; memset(&value, 0, sizeof(value)); value.id = info.id; result = kctl->get(kctl, &value); if (result < 0) return -1; if (info.type == SNDRV_CTL_ELEM_TYPE_BOOLEAN || info.type == SNDRV_CTL_ELEM_TYPE_INTEGER) { for (i = 0; i < info.count; i++) if (value.value.integer.value[i] != info.value.integer.min) return 1; } else if (info.type == SNDRV_CTL_ELEM_TYPE_INTEGER64) { for (i = 0; i < info.count; i++) if (value.value.integer64.value[i] != info.value.integer64.min) return 1; } return 0; } static void snd_ctl_led_set_state(struct snd_card *card, unsigned int access, struct snd_kcontrol *kctl, unsigned int ioff) { struct snd_ctl_led *led; struct snd_ctl_led_ctl *lctl; int route; bool found; led = snd_ctl_led_get_by_access(access); if (!led) return; route = -1; found = false; scoped_guard(mutex, &snd_ctl_led_mutex) { /* the card may not be registered (active) at this point */ if (card && !snd_ctl_led_card_valid[card->number]) return; list_for_each_entry(lctl, &led->controls, list) { if (lctl->kctl == kctl && lctl->index_offset == ioff) found = true; UPDATE_ROUTE(route, snd_ctl_led_get(lctl)); } if (!found && kctl && card) { lctl = kzalloc(sizeof(*lctl), GFP_KERNEL); if (lctl) { lctl->card = card; lctl->access = access; lctl->kctl = kctl; lctl->index_offset = ioff; list_add(&lctl->list, &led->controls); UPDATE_ROUTE(route, snd_ctl_led_get(lctl)); } } } switch (led->mode) { case MODE_OFF: route = 1; break; case MODE_ON: route = 0; break; case MODE_FOLLOW_ROUTE: if (route >= 0) route ^= 1; break; case MODE_FOLLOW_MUTE: /* noop */ break; } if (route >= 0) { struct led_trigger *trig = snd_ctl_ledtrig_audio[led->trigger_type]; led_trigger_event(trig, route ? LED_OFF : LED_ON); } } static struct snd_ctl_led_ctl *snd_ctl_led_find(struct snd_kcontrol *kctl, unsigned int ioff) { struct list_head *controls; struct snd_ctl_led_ctl *lctl; unsigned int group; for (group = 0; group < MAX_LED; group++) { controls = &snd_ctl_leds[group].controls; list_for_each_entry(lctl, controls, list) if (lctl->kctl == kctl && lctl->index_offset == ioff) return lctl; } return NULL; } static unsigned int snd_ctl_led_remove(struct snd_kcontrol *kctl, unsigned int ioff, unsigned int access) { struct snd_ctl_led_ctl *lctl; unsigned int ret = 0; guard(mutex)(&snd_ctl_led_mutex); lctl = snd_ctl_led_find(kctl, ioff); if (lctl && (access == 0 || access != lctl->access)) { ret = lctl->access; list_del(&lctl->list); kfree(lctl); } return ret; } static void snd_ctl_led_notify(struct snd_card *card, unsigned int mask, struct snd_kcontrol *kctl, unsigned int ioff) { struct snd_kcontrol_volatile *vd; unsigned int access, access2; if (mask == SNDRV_CTL_EVENT_MASK_REMOVE) { access = snd_ctl_led_remove(kctl, ioff, 0); if (access) snd_ctl_led_set_state(card, access, NULL, 0); } else if (mask & SNDRV_CTL_EVENT_MASK_INFO) { vd = &kctl->vd[ioff]; access = vd->access & SNDRV_CTL_ELEM_ACCESS_LED_MASK; access2 = snd_ctl_led_remove(kctl, ioff, access); if (access2) snd_ctl_led_set_state(card, access2, NULL, 0); if (access) snd_ctl_led_set_state(card, access, kctl, ioff); } else if ((mask & (SNDRV_CTL_EVENT_MASK_ADD | SNDRV_CTL_EVENT_MASK_VALUE)) != 0) { vd = &kctl->vd[ioff]; access = vd->access & SNDRV_CTL_ELEM_ACCESS_LED_MASK; if (access) snd_ctl_led_set_state(card, access, kctl, ioff); } } DEFINE_FREE(snd_card_unref, struct snd_card *, if (_T) snd_card_unref(_T)) static int snd_ctl_led_set_id(int card_number, struct snd_ctl_elem_id *id, unsigned int group, bool set) { struct snd_card *card __free(snd_card_unref) = NULL; struct snd_kcontrol *kctl; struct snd_kcontrol_volatile *vd; unsigned int ioff, access, new_access; card = snd_card_ref(card_number); if (!card) return -ENXIO; guard(rwsem_write)(&card->controls_rwsem); kctl = snd_ctl_find_id(card, id); if (!kctl) return -ENOENT; ioff = snd_ctl_get_ioff(kctl, id); vd = &kctl->vd[ioff]; access = vd->access & SNDRV_CTL_ELEM_ACCESS_LED_MASK; if (access != 0 && access != group_to_access(group)) return -EXDEV; new_access = vd->access & ~SNDRV_CTL_ELEM_ACCESS_LED_MASK; if (set) new_access |= group_to_access(group); if (new_access != vd->access) { vd->access = new_access; snd_ctl_led_notify(card, SNDRV_CTL_EVENT_MASK_INFO, kctl, ioff); } return 0; } static void snd_ctl_led_refresh(void) { unsigned int group; for (group = 0; group < MAX_LED; group++) snd_ctl_led_set_state(NULL, group_to_access(group), NULL, 0); } static void snd_ctl_led_ctl_destroy(struct snd_ctl_led_ctl *lctl) { list_del(&lctl->list); kfree(lctl); } static void snd_ctl_led_clean(struct snd_card *card) { unsigned int group; struct snd_ctl_led_ctl *lctl, *_lctl; struct snd_ctl_led *led; for (group = 0; group < MAX_LED; group++) { led = &snd_ctl_leds[group]; list_for_each_entry_safe(lctl, _lctl, &led->controls, list) if (!card || lctl->card == card) snd_ctl_led_ctl_destroy(lctl); } } static int snd_ctl_led_reset(int card_number, unsigned int group) { struct snd_card *card __free(snd_card_unref) = NULL; struct snd_ctl_led_ctl *lctl, *_lctl; struct snd_ctl_led *led; struct snd_kcontrol_volatile *vd; bool change = false; card = snd_card_ref(card_number); if (!card) return -ENXIO; scoped_guard(mutex, &snd_ctl_led_mutex) { if (!snd_ctl_led_card_valid[card_number]) return -ENXIO; led = &snd_ctl_leds[group]; list_for_each_entry_safe(lctl, _lctl, &led->controls, list) if (lctl->card == card) { vd = &lctl->kctl->vd[lctl->index_offset]; vd->access &= ~group_to_access(group); snd_ctl_led_ctl_destroy(lctl); change = true; } } if (change) snd_ctl_led_set_state(NULL, group_to_access(group), NULL, 0); return 0; } static void snd_ctl_led_register(struct snd_card *card) { struct snd_kcontrol *kctl; unsigned int ioff; if (snd_BUG_ON(card->number < 0 || card->number >= ARRAY_SIZE(snd_ctl_led_card_valid))) return; scoped_guard(mutex, &snd_ctl_led_mutex) snd_ctl_led_card_valid[card->number] = true; /* the register callback is already called with held card->controls_rwsem */ list_for_each_entry(kctl, &card->controls, list) for (ioff = 0; ioff < kctl->count; ioff++) snd_ctl_led_notify(card, SNDRV_CTL_EVENT_MASK_VALUE, kctl, ioff); snd_ctl_led_refresh(); snd_ctl_led_sysfs_add(card); } static void snd_ctl_led_disconnect(struct snd_card *card) { snd_ctl_led_sysfs_remove(card); scoped_guard(mutex, &snd_ctl_led_mutex) { snd_ctl_led_card_valid[card->number] = false; snd_ctl_led_clean(card); } snd_ctl_led_refresh(); } static void snd_ctl_led_card_release(struct device *dev) { struct snd_ctl_led_card *led_card = to_led_card_dev(dev); kfree(led_card); } static void snd_ctl_led_release(struct device *dev) { } static void snd_ctl_led_dev_release(struct device *dev) { } /* * sysfs */ static ssize_t mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct snd_ctl_led *led = container_of(dev, struct snd_ctl_led, dev); const char *str = NULL; switch (led->mode) { case MODE_FOLLOW_MUTE: str = "follow-mute"; break; case MODE_FOLLOW_ROUTE: str = "follow-route"; break; case MODE_ON: str = "on"; break; case MODE_OFF: str = "off"; break; } return sysfs_emit(buf, "%s\n", str); } static ssize_t mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct snd_ctl_led *led = container_of(dev, struct snd_ctl_led, dev); char _buf[16]; size_t l = min(count, sizeof(_buf) - 1); enum snd_ctl_led_mode mode; memcpy(_buf, buf, l); _buf[l] = '\0'; if (strstr(_buf, "mute")) mode = MODE_FOLLOW_MUTE; else if (strstr(_buf, "route")) mode = MODE_FOLLOW_ROUTE; else if (strncmp(_buf, "off", 3) == 0 || strncmp(_buf, "0", 1) == 0) mode = MODE_OFF; else if (strncmp(_buf, "on", 2) == 0 || strncmp(_buf, "1", 1) == 0) mode = MODE_ON; else return count; scoped_guard(mutex, &snd_ctl_led_mutex) led->mode = mode; snd_ctl_led_set_state(NULL, group_to_access(led->group), NULL, 0); return count; } static ssize_t brightness_show(struct device *dev, struct device_attribute *attr, char *buf) { struct snd_ctl_led *led = container_of(dev, struct snd_ctl_led, dev); struct led_trigger *trig = snd_ctl_ledtrig_audio[led->trigger_type]; return sysfs_emit(buf, "%u\n", led_trigger_get_brightness(trig)); } static DEVICE_ATTR_RW(mode); static DEVICE_ATTR_RO(brightness); static struct attribute *snd_ctl_led_dev_attrs[] = { &dev_attr_mode.attr, &dev_attr_brightness.attr, NULL, }; static const struct attribute_group snd_ctl_led_dev_attr_group = { .attrs = snd_ctl_led_dev_attrs, }; static const struct attribute_group *snd_ctl_led_dev_attr_groups[] = { &snd_ctl_led_dev_attr_group, NULL, }; static char *find_eos(char *s) { while (*s && *s != ',') s++; if (*s) s++; return s; } static char *parse_uint(char *s, unsigned int *val) { unsigned long long res; if (kstrtoull(s, 10, &res)) res = 0; *val = res; return find_eos(s); } static char *parse_string(char *s, char *val, size_t val_size) { if (*s == '"' || *s == '\'') { char c = *s; s++; while (*s && *s != c) { if (val_size > 1) { *val++ = *s; val_size--; } s++; } } else { while (*s && *s != ',') { if (val_size > 1) { *val++ = *s; val_size--; } s++; } } *val = '\0'; if (*s) s++; return s; } static char *parse_iface(char *s, snd_ctl_elem_iface_t *val) { if (!strncasecmp(s, "card", 4)) *val = SNDRV_CTL_ELEM_IFACE_CARD; else if (!strncasecmp(s, "mixer", 5)) *val = SNDRV_CTL_ELEM_IFACE_MIXER; return find_eos(s); } /* * These types of input strings are accepted: * * unsigned integer - numid (equivaled to numid=UINT) * string - basic mixer name (equivalent to iface=MIXER,name=STR) * numid=UINT * [iface=MIXER,][device=UINT,][subdevice=UINT,]name=STR[,index=UINT] */ static ssize_t set_led_id(struct snd_ctl_led_card *led_card, const char *buf, size_t count, bool attach) { char buf2[256], *s, *os; struct snd_ctl_elem_id id; int err; if (strscpy(buf2, buf, sizeof(buf2)) < 0) return -E2BIG; memset(&id, 0, sizeof(id)); id.iface = SNDRV_CTL_ELEM_IFACE_MIXER; s = buf2; while (*s) { os = s; if (!strncasecmp(s, "numid=", 6)) { s = parse_uint(s + 6, &id.numid); } else if (!strncasecmp(s, "iface=", 6)) { s = parse_iface(s + 6, &id.iface); } else if (!strncasecmp(s, "device=", 7)) { s = parse_uint(s + 7, &id.device); } else if (!strncasecmp(s, "subdevice=", 10)) { s = parse_uint(s + 10, &id.subdevice); } else if (!strncasecmp(s, "name=", 5)) { s = parse_string(s + 5, id.name, sizeof(id.name)); } else if (!strncasecmp(s, "index=", 6)) { s = parse_uint(s + 6, &id.index); } else if (s == buf2) { while (*s) { if (*s < '0' || *s > '9') break; s++; } if (*s == '\0') parse_uint(buf2, &id.numid); else { for (; *s >= ' '; s++); *s = '\0'; strscpy(id.name, buf2, sizeof(id.name)); } break; } if (*s == ',') s++; if (s == os) break; } err = snd_ctl_led_set_id(led_card->number, &id, led_card->led->group, attach); if (err < 0) return err; return count; } static ssize_t attach_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct snd_ctl_led_card *led_card = container_of(dev, struct snd_ctl_led_card, dev); return set_led_id(led_card, buf, count, true); } static ssize_t detach_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct snd_ctl_led_card *led_card = container_of(dev, struct snd_ctl_led_card, dev); return set_led_id(led_card, buf, count, false); } static ssize_t reset_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct snd_ctl_led_card *led_card = container_of(dev, struct snd_ctl_led_card, dev); int err; if (count > 0 && buf[0] == '1') { err = snd_ctl_led_reset(led_card->number, led_card->led->group); if (err < 0) return err; } return count; } static ssize_t list_show(struct device *dev, struct device_attribute *attr, char *buf) { struct snd_ctl_led_card *led_card = container_of(dev, struct snd_ctl_led_card, dev); struct snd_card *card __free(snd_card_unref) = NULL; struct snd_ctl_led_ctl *lctl; size_t l = 0; card = snd_card_ref(led_card->number); if (!card) return -ENXIO; guard(rwsem_read)(&card->controls_rwsem); guard(mutex)(&snd_ctl_led_mutex); if (snd_ctl_led_card_valid[led_card->number]) { list_for_each_entry(lctl, &led_card->led->controls, list) { if (lctl->card != card) continue; if (l) l += sysfs_emit_at(buf, l, " "); l += sysfs_emit_at(buf, l, "%u", lctl->kctl->id.numid + lctl->index_offset); } } return l; } static DEVICE_ATTR_WO(attach); static DEVICE_ATTR_WO(detach); static DEVICE_ATTR_WO(reset); static DEVICE_ATTR_RO(list); static struct attribute *snd_ctl_led_card_attrs[] = { &dev_attr_attach.attr, &dev_attr_detach.attr, &dev_attr_reset.attr, &dev_attr_list.attr, NULL, }; static const struct attribute_group snd_ctl_led_card_attr_group = { .attrs = snd_ctl_led_card_attrs, }; static const struct attribute_group *snd_ctl_led_card_attr_groups[] = { &snd_ctl_led_card_attr_group, NULL, }; static struct device snd_ctl_led_dev; static void snd_ctl_led_sysfs_add(struct snd_card *card) { unsigned int group; struct snd_ctl_led_card *led_card; struct snd_ctl_led *led; char link_name[32]; for (group = 0; group < MAX_LED; group++) { led = &snd_ctl_leds[group]; led_card = kzalloc(sizeof(*led_card), GFP_KERNEL); if (!led_card) goto cerr2; led_card->number = card->number; led_card->led = led; device_initialize(&led_card->dev); led_card->dev.release = snd_ctl_led_card_release; if (dev_set_name(&led_card->dev, "card%d", card->number) < 0) goto cerr; led_card->dev.parent = &led->dev; led_card->dev.groups = snd_ctl_led_card_attr_groups; if (device_add(&led_card->dev)) goto cerr; led->cards[card->number] = led_card; snprintf(link_name, sizeof(link_name), "led-%s", led->name); if (sysfs_create_link(&card->ctl_dev->kobj, &led_card->dev.kobj, link_name)) dev_err(card->dev, "%s: can't create symlink to controlC%i device\n", __func__, card->number); if (sysfs_create_link(&led_card->dev.kobj, &card->card_dev.kobj, "card")) dev_err(card->dev, "%s: can't create symlink to card%i\n", __func__, card->number); continue; cerr: put_device(&led_card->dev); cerr2: dev_err(card->dev, "snd_ctl_led: unable to add card%d", card->number); } } static void snd_ctl_led_sysfs_remove(struct snd_card *card) { unsigned int group; struct snd_ctl_led_card *led_card; struct snd_ctl_led *led; char link_name[32]; for (group = 0; group < MAX_LED; group++) { led = &snd_ctl_leds[group]; led_card = led->cards[card->number]; if (!led_card) continue; snprintf(link_name, sizeof(link_name), "led-%s", led->name); sysfs_remove_link(&card->ctl_dev->kobj, link_name); sysfs_remove_link(&led_card->dev.kobj, "card"); device_unregister(&led_card->dev); led->cards[card->number] = NULL; } } /* * Control layer registration */ static struct snd_ctl_layer_ops snd_ctl_led_lops = { .module_name = SND_CTL_LAYER_MODULE_LED, .lregister = snd_ctl_led_register, .ldisconnect = snd_ctl_led_disconnect, .lnotify = snd_ctl_led_notify, }; static int __init snd_ctl_led_init(void) { struct snd_ctl_led *led; unsigned int group; led_trigger_register_simple("audio-mute", &snd_ctl_ledtrig_audio[LED_AUDIO_MUTE]); led_trigger_register_simple("audio-micmute", &snd_ctl_ledtrig_audio[LED_AUDIO_MICMUTE]); device_initialize(&snd_ctl_led_dev); snd_ctl_led_dev.class = &sound_class; snd_ctl_led_dev.release = snd_ctl_led_dev_release; dev_set_name(&snd_ctl_led_dev, "ctl-led"); if (device_add(&snd_ctl_led_dev)) { put_device(&snd_ctl_led_dev); return -ENOMEM; } for (group = 0; group < MAX_LED; group++) { led = &snd_ctl_leds[group]; INIT_LIST_HEAD(&led->controls); device_initialize(&led->dev); led->dev.parent = &snd_ctl_led_dev; led->dev.release = snd_ctl_led_release; led->dev.groups = snd_ctl_led_dev_attr_groups; dev_set_name(&led->dev, led->name); if (device_add(&led->dev)) { put_device(&led->dev); for (; group > 0; group--) { led = &snd_ctl_leds[group - 1]; device_unregister(&led->dev); } device_unregister(&snd_ctl_led_dev); return -ENOMEM; } } snd_ctl_register_layer(&snd_ctl_led_lops); return 0; } static void __exit snd_ctl_led_exit(void) { struct snd_ctl_led *led; struct snd_card *card; unsigned int group, card_number; snd_ctl_disconnect_layer(&snd_ctl_led_lops); for (card_number = 0; card_number < SNDRV_CARDS; card_number++) { if (!snd_ctl_led_card_valid[card_number]) continue; card = snd_card_ref(card_number); if (card) { snd_ctl_led_sysfs_remove(card); snd_card_unref(card); } } for (group = 0; group < MAX_LED; group++) { led = &snd_ctl_leds[group]; device_unregister(&led->dev); } device_unregister(&snd_ctl_led_dev); snd_ctl_led_clean(NULL); led_trigger_unregister_simple(snd_ctl_ledtrig_audio[LED_AUDIO_MUTE]); led_trigger_unregister_simple(snd_ctl_ledtrig_audio[LED_AUDIO_MICMUTE]); } module_init(snd_ctl_led_init) module_exit(snd_ctl_led_exit) MODULE_ALIAS("ledtrig:audio-mute"); MODULE_ALIAS("ledtrig:audio-micmute");
97 96 3 47 98 79 1 18 99 76 1 22 1 94 167 13 10 92 75 1 1 1 1 1 11 132 130 3 129 49 2 57 2 1 23 26 1 1 1 24 4 1 4 9 38 1 26 1 49 1 49 1 1 1 1 2 5 13 1 1 1 1 1 1 1 2 8 14 1 1 1 1 1 1 1 2 5 83 1 1 1 1 1 82 82 1 63 3 6 66 2 8 71 58 1 12 62 1 1 3 12 59 3 19 35 31 4 27 41 41 35 35 33 1 34 1 34 1 33 1 33 3 68 82 72 9 66 15 81 1 81 79 6 71 76 72 69 2 71 70 1 71 70 1 71 34 37 2 27 8 30 51 133 133 82 30 23 8 7 3 23 30 6 6 4 1 1 6 4 4 56 55 16 16 16 16 10 10 5 5 13 5 5 55 54 1 56 56 51 6 55 47 10 41 16 53 5 45 13 50 5 56 55 2 2 54 55 55 55 56 56 55 56 56 18 18 5 3 3 8 1 3 4 4 1 16 133 134 131 3 1 1 7 4 3 2 2 3 2 1 1 1 1 1 1 1 1 1 1 2 2 23 3 20 19 1 19 1 11 8 1 19 3 16 16 15 1 19 4 6 5 109 2 2 28 2 73 2 3 2 94 2 49 22 22 20 56 2 18 42 43 37 33 7 6 6 43 6 48 6 47 6 33 26 17 128 11 83 80 79 78 76 49 1 50 48 2 47 4 82 6 75 31 1 43 79 37 43 26 6 4 4 4 1 4 4 4 1 3 3 2 3 3 3 2 1 2 1 2 1 3 3 1 1 25 25 1 22 1 23 5 17 18 2 2 2 4 1 1 2 2 2 4 3 1 1 2 2 5 5 3 3 10 10 1 7 1 8 1 6 4 1 1 2 2 2 2 10 10 8 2 2 7 1 4 3 22 2 12 27 1 26 25 22 1 23 2 24 1 7 2 8 1 9 9 9 327 3 1 319 9 354 1 352 1 352 33 22 11 320 1 7 332 1 321 15 354 5 29 9 22 15 35 4 35 2 38 38 38 37 38 38 42 37 38 5 25 26 25 1 25 26 26 26 25 26 35 16 18 8